Index: projects/runtime-coverage-v2/lib/atf/libatf-c/Makefile =================================================================== --- projects/runtime-coverage-v2/lib/atf/libatf-c/Makefile (revision 347075) +++ projects/runtime-coverage-v2/lib/atf/libatf-c/Makefile (revision 347076) @@ -1,104 +1,163 @@ #- # Copyright (c) 2011 Google, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ .include .include # Store the toolchain executable in ATF_BUILD_{CC,CPP,CXX} to ensure other # values -- like -target, -B ..., etc -- don't get leaked into the tests. # # Be sure to omit ${CCACHE_BIN} (if specified) from the variable as it gets # automatically appended to the variables in bsd.compiler.mk when # ${MK_CCACHE_BUILD} != no. ATF_BUILD_CC:= ${CC:N${CCACHE_BIN}:[1]} ATF_BUILD_CPP:= ${CPP:N${CCACHE_BIN}:[1]} ATF_BUILD_CXX:= ${CXX:N${CCACHE_BIN}:[1]} # Only capture defines, includes, linker flags, optimization levels, warnings # and preprocessor flags when building ATF_BUILD_{C,CPP,CXX}FLAGS. ATF_BUILD_CFLAGS:= ${CFLAGS:M-[DILOWf]*} ATF_BUILD_CPPFLAGS:= ${CPPFLAGS:M-[DILOWf]*} ATF_BUILD_CXXFLAGS:= ${CXXFLAGS:M-[DILOWf]*} LIB= atf-c PRIVATELIB= true SHLIB_MAJOR= 1 ATF= ${SRCTOP}/contrib/atf .PATH: ${ATF} .PATH: ${ATF}/atf-c .PATH: ${ATF}/atf-c/detail CFLAGS+= -DATF_BUILD_CC='"${ATF_BUILD_CC}"' CFLAGS+= -DATF_BUILD_CFLAGS='"${ATF_BUILD_CFLAGS}"' CFLAGS+= -DATF_BUILD_CPP='"${ATF_BUILD_CPP}"' CFLAGS+= -DATF_BUILD_CPPFLAGS='"${ATF_BUILD_CPPFLAGS}"' CFLAGS+= -DATF_BUILD_CXX='"${ATF_BUILD_CXX}"' CFLAGS+= -DATF_BUILD_CXXFLAGS='"${ATF_BUILD_CXXFLAGS}"' CFLAGS+= -I${ATF} CFLAGS+= -I${.CURDIR} CFLAGS+= -I. SRCS= build.c \ check.c \ dynstr.c \ env.c \ error.c \ fs.c \ list.c \ map.c \ process.c \ sanity.c \ text.c \ user.c \ utils.c \ tc.c \ tp.c \ tp_main.c INCS= build.h \ check.h \ defs.h \ error.h \ error_fwd.h \ macros.h \ tc.h \ tp.h \ utils.h INCSDIR= ${INCLUDEDIR}/atf-c INCS+= atf-c.h INCSDIR_atf-c.h= ${INCLUDEDIR} MAN= atf-c.3 +MLINKS+= atf-c.3 ATF_CHECK.3 \ + atf-c.3 ATF_CHECK_MSG.3 \ + atf-c.3 ATF_CHECK_EQ.3 \ + atf-c.3 ATF_CHECK_EQ_MSG.3 \ + atf-c.3 ATF_CHECK_MATCH.3 \ + atf-c.3 ATF_CHECK_MATCH_MSG.3 \ + atf-c.3 ATF_CHECK_STREQ.3 \ + atf-c.3 ATF_CHECK_STREQ_MSG.3 \ + atf-c.3 ATF_CHECK_ERRNO.3 \ + atf-c.3 ATF_REQUIRE.3 \ + atf-c.3 ATF_REQUIRE_MSG.3 \ + atf-c.3 ATF_REQUIRE_EQ.3 \ + atf-c.3 ATF_REQUIRE_EQ_MSG.3 \ + atf-c.3 ATF_REQUIRE_MATCH.3 \ + atf-c.3 ATF_REQUIRE_MATCH_MSG.3 \ + atf-c.3 ATF_REQUIRE_STREQ.3 \ + atf-c.3 ATF_REQUIRE_STREQ_MSG.3 \ + atf-c.3 ATF_REQUIRE_ERRNO.3 \ + atf-c.3 ATF_TC.3 \ + atf-c.3 ATF_TC_BODY.3 \ + atf-c.3 ATF_TC_BODY_NAME.3 \ + atf-c.3 ATF_TC_CLEANUP.3 \ + atf-c.3 ATF_TC_CLEANUP_NAME.3 \ + atf-c.3 ATF_TC_HEAD.3 \ + atf-c.3 ATF_TC_HEAD_NAME.3 \ + atf-c.3 ATF_TC_NAME.3 \ + atf-c.3 ATF_TC_WITH_CLEANUP.3 \ + atf-c.3 ATF_TC_WITHOUT_HEAD.3 \ + atf-c.3 ATF_TP_ADD_TC.3 \ + atf-c.3 ATF_TP_ADD_TCS.3 \ + atf-c.3 atf_tc_get_config_var.3 \ + atf-c.3 atf_tc_get_config_var_wd.3 \ + atf-c.3 atf_tc_get_config_var_as_bool.3 \ + atf-c.3 atf_tc_get_config_var_as_bool_wd.3 \ + atf-c.3 atf_tc_get_config_var_as_long.3 \ + atf-c.3 atf_tc_get_config_var_as_long_wd.3 \ + atf-c.3 atf_no_error.3 \ + atf-c.3 atf_tc_expect_death.3 \ + atf-c.3 atf_tc_expect_exit.3 \ + atf-c.3 atf_tc_expect_fail.3 \ + atf-c.3 atf_tc_expect_pass.3 \ + atf-c.3 atf_tc_expect_signal.3 \ + atf-c.3 atf_tc_expect_timeout.3 \ + atf-c.3 atf_tc_fail.3 \ + atf-c.3 atf_tc_fail_nonfatal.3 \ + atf-c.3 atf_tc_pass.3 \ + atf-c.3 atf_tc_skip.3 \ + atf-c.3 atf_utils_cat_file.3 \ + atf-c.3 atf_utils_compare_file.3 \ + atf-c.3 atf_utils_copy_file.3 \ + atf-c.3 atf_utils_create_file.3 \ + atf-c.3 atf_utils_file_exists.3 \ + atf-c.3 atf_utils_fork.3 \ + atf-c.3 atf_utils_free_charpp.3 \ + atf-c.3 atf_utils_grep_file.3 \ + atf-c.3 atf_utils_grep_string.3 \ + atf-c.3 atf_utils_readline.3 \ + atf-c.3 atf_utils_redirect.3 \ + atf-c.3 atf_utils_wait.3 MLINKS+= atf-c.3 atf-c-api.3 # Backwards compatibility. HAS_TESTS= SUBDIR.${MK_TESTS}+= tests .include "../common.mk" .include Index: projects/runtime-coverage-v2/lib/libbe/be.c =================================================================== --- projects/runtime-coverage-v2/lib/libbe/be.c (revision 347075) +++ projects/runtime-coverage-v2/lib/libbe/be.c (revision 347076) @@ -1,1097 +1,1097 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Kyle J. Kneitinger * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "be.h" #include "be_impl.h" struct be_destroy_data { libbe_handle_t *lbh; char *snapname; }; #if SOON static int be_create_child_noent(libbe_handle_t *lbh, const char *active, const char *child_path); static int be_create_child_cloned(libbe_handle_t *lbh, const char *active); #endif /* Arbitrary... should tune */ #define BE_SNAP_SERIAL_MAX 1024 /* * Iterator function for locating the rootfs amongst the children of the * zfs_be_root set by loader(8). data is expected to be a libbe_handle_t *. */ static int be_locate_rootfs(libbe_handle_t *lbh) { struct statfs sfs; struct extmnttab entry; zfs_handle_t *zfs; /* * Check first if root is ZFS; if not, we'll bail on rootfs capture. * Unfortunately needed because zfs_path_to_zhandle will emit to * stderr if / isn't actually a ZFS filesystem, which we'd like * to avoid. */ if (statfs("/", &sfs) == 0) { statfs2mnttab(&sfs, &entry); if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) return (1); } else return (1); zfs = zfs_path_to_zhandle(lbh->lzh, "/", ZFS_TYPE_FILESYSTEM); if (zfs == NULL) return (1); strlcpy(lbh->rootfs, zfs_get_name(zfs), sizeof(lbh->rootfs)); zfs_close(zfs); return (0); } /* * Initializes the libbe context to operate in the root boot environment * dataset, for example, zroot/ROOT. */ libbe_handle_t * libbe_init(const char *root) { char altroot[MAXPATHLEN]; libbe_handle_t *lbh; char *poolname, *pos; int pnamelen; lbh = NULL; poolname = pos = NULL; if ((lbh = calloc(1, sizeof(libbe_handle_t))) == NULL) goto err; if ((lbh->lzh = libzfs_init()) == NULL) goto err; /* * Grab rootfs, we'll work backwards from there if an optional BE root * has not been passed in. */ if (be_locate_rootfs(lbh) != 0) { if (root == NULL) goto err; *lbh->rootfs = '\0'; } if (root == NULL) { /* Strip off the final slash from rootfs to get the be root */ strlcpy(lbh->root, lbh->rootfs, sizeof(lbh->root)); pos = strrchr(lbh->root, '/'); if (pos == NULL) goto err; *pos = '\0'; } else strlcpy(lbh->root, root, sizeof(lbh->root)); if ((pos = strchr(lbh->root, '/')) == NULL) goto err; pnamelen = pos - lbh->root; poolname = malloc(pnamelen + 1); if (poolname == NULL) goto err; strlcpy(poolname, lbh->root, pnamelen + 1); if ((lbh->active_phandle = zpool_open(lbh->lzh, poolname)) == NULL) goto err; free(poolname); poolname = NULL; if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_BOOTFS, lbh->bootfs, sizeof(lbh->bootfs), NULL, true) != 0) goto err; if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_ALTROOT, altroot, sizeof(altroot), NULL, true) == 0 && strcmp(altroot, "-") != 0) lbh->altroot_len = strlen(altroot); return (lbh); err: if (lbh != NULL) { if (lbh->active_phandle != NULL) zpool_close(lbh->active_phandle); if (lbh->lzh != NULL) libzfs_fini(lbh->lzh); free(lbh); } free(poolname); return (NULL); } /* * Free memory allocated by libbe_init() */ void libbe_close(libbe_handle_t *lbh) { if (lbh->active_phandle != NULL) zpool_close(lbh->active_phandle); libzfs_fini(lbh->lzh); free(lbh); } /* * Proxy through to libzfs for the moment. */ void be_nicenum(uint64_t num, char *buf, size_t buflen) { zfs_nicenum(num, buf, buflen); } static int be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) { char path[BE_MAXPATHLEN]; struct be_destroy_data *bdd; zfs_handle_t *snap; int err; bdd = (struct be_destroy_data *)data; if (bdd->snapname == NULL) { err = zfs_iter_children(zfs_hdl, be_destroy_cb, data); if (err != 0) return (err); return (zfs_destroy(zfs_hdl, false)); } /* If we're dealing with snapshots instead, delete that one alone */ err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data); if (err != 0) return (err); /* * This part is intentionally glossing over any potential errors, * because there's a lot less potential for errors when we're cleaning * up snapshots rather than a full deep BE. The primary error case * here being if the snapshot doesn't exist in the first place, which * the caller will likely deem insignificant as long as it doesn't * exist after the call. Thus, such a missing snapshot shouldn't jam * up the destruction. */ snprintf(path, sizeof(path), "%s@%s", zfs_get_name(zfs_hdl), bdd->snapname); if (!zfs_dataset_exists(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) return (0); snap = zfs_open(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT); if (snap != NULL) zfs_destroy(snap, false); return (0); } /* * Destroy the boot environment or snapshot specified by the name * parameter. Options are or'd together with the possible values: * BE_DESTROY_FORCE : forces operation on mounted datasets * BE_DESTROY_ORIGIN: destroy the origin snapshot as well */ int be_destroy(libbe_handle_t *lbh, const char *name, int options) { struct be_destroy_data bdd; char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN]; zfs_handle_t *fs; char *snapdelim; int err, force, mounted; size_t rootlen; bdd.lbh = lbh; bdd.snapname = NULL; force = options & BE_DESTROY_FORCE; *origin = '\0'; be_root_concat(lbh, name, path); if ((snapdelim = strchr(path, '@')) == NULL) { if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_FILESYSTEM)) return (set_error(lbh, BE_ERR_NOENT)); if (strcmp(path, lbh->rootfs) == 0 || strcmp(path, lbh->bootfs) == 0) return (set_error(lbh, BE_ERR_DESTROYACT)); fs = zfs_open(lbh->lzh, path, ZFS_TYPE_FILESYSTEM); if (fs == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); if ((options & BE_DESTROY_ORIGIN) != 0 && zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin), NULL, NULL, 0, 1) != 0) return (set_error(lbh, BE_ERR_NOORIGIN)); /* Don't destroy a mounted dataset unless force is specified */ if ((mounted = zfs_is_mounted(fs, NULL)) != 0) { if (force) { zfs_unmount(fs, NULL, 0); } else { free(bdd.snapname); return (set_error(lbh, BE_ERR_DESTROYMNT)); } } } else { if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) return (set_error(lbh, BE_ERR_NOENT)); bdd.snapname = strdup(snapdelim + 1); if (bdd.snapname == NULL) return (set_error(lbh, BE_ERR_NOMEM)); *snapdelim = '\0'; fs = zfs_open(lbh->lzh, path, ZFS_TYPE_DATASET); if (fs == NULL) { free(bdd.snapname); return (set_error(lbh, BE_ERR_ZFSOPEN)); } } err = be_destroy_cb(fs, &bdd); zfs_close(fs); free(bdd.snapname); if (err != 0) { /* Children are still present or the mount is referenced */ if (err == EBUSY) return (set_error(lbh, BE_ERR_DESTROYMNT)); return (set_error(lbh, BE_ERR_UNKNOWN)); } if ((options & BE_DESTROY_ORIGIN) == 0) return (0); /* The origin can't possibly be shorter than the BE root */ rootlen = strlen(lbh->root); if (*origin == '\0' || strlen(origin) <= rootlen + 1) return (set_error(lbh, BE_ERR_INVORIGIN)); /* * We'll be chopping off the BE root and running this back through * be_destroy, so that we properly handle the origin snapshot whether * it be that of a deep BE or not. */ if (strncmp(origin, lbh->root, rootlen) != 0 || origin[rootlen] != '/') return (0); return (be_destroy(lbh, origin + rootlen + 1, options & ~BE_DESTROY_ORIGIN)); } static void be_setup_snapshot_name(libbe_handle_t *lbh, char *buf, size_t buflen) { time_t rawtime; int len, serial; time(&rawtime); len = strlen(buf); len += strftime(buf + len, buflen - len, "@%F-%T", localtime(&rawtime)); /* No room for serial... caller will do its best */ if (buflen - len < 2) return; for (serial = 0; serial < BE_SNAP_SERIAL_MAX; ++serial) { snprintf(buf + len, buflen - len, "-%d", serial); if (!zfs_dataset_exists(lbh->lzh, buf, ZFS_TYPE_SNAPSHOT)) return; } } int be_snapshot(libbe_handle_t *lbh, const char *source, const char *snap_name, bool recursive, char *result) { char buf[BE_MAXPATHLEN]; int err; be_root_concat(lbh, source, buf); if ((err = be_exists(lbh, buf)) != 0) return (set_error(lbh, err)); if (snap_name != NULL) { if (strlcat(buf, "@", sizeof(buf)) >= sizeof(buf)) return (set_error(lbh, BE_ERR_INVALIDNAME)); if (strlcat(buf, snap_name, sizeof(buf)) >= sizeof(buf)) return (set_error(lbh, BE_ERR_INVALIDNAME)); if (result != NULL) snprintf(result, BE_MAXPATHLEN, "%s@%s", source, snap_name); } else { be_setup_snapshot_name(lbh, buf, sizeof(buf)); if (result != NULL && strlcpy(result, strrchr(buf, '/') + 1, sizeof(buf)) >= sizeof(buf)) return (set_error(lbh, BE_ERR_INVALIDNAME)); } if ((err = zfs_snapshot(lbh->lzh, buf, recursive, NULL)) != 0) { switch (err) { case EZFS_INVALIDNAME: return (set_error(lbh, BE_ERR_INVALIDNAME)); default: /* * The other errors that zfs_ioc_snapshot might return * shouldn't happen if we've set things up properly, so * we'll gloss over them and call it UNKNOWN as it will * require further triage. */ if (errno == ENOTSUP) return (set_error(lbh, BE_ERR_NOPOOL)); return (set_error(lbh, BE_ERR_UNKNOWN)); } } return (BE_ERR_SUCCESS); } /* * Create the boot environment specified by the name parameter */ int be_create(libbe_handle_t *lbh, const char *name) { int err; err = be_create_from_existing(lbh, name, be_active_path(lbh)); return (set_error(lbh, err)); } static int be_deep_clone_prop(int prop, void *cb) { int err; struct libbe_dccb *dccb; zprop_source_t src; char pval[BE_MAXPATHLEN]; char source[BE_MAXPATHLEN]; char *val; dccb = cb; /* Skip some properties we don't want to touch */ if (prop == ZFS_PROP_CANMOUNT) return (ZPROP_CONT); /* Don't copy readonly properties */ if (zfs_prop_readonly(prop)) return (ZPROP_CONT); if ((err = zfs_prop_get(dccb->zhp, prop, (char *)&pval, sizeof(pval), &src, (char *)&source, sizeof(source), false))) /* Just continue if we fail to read a property */ return (ZPROP_CONT); /* * Only copy locally defined or received properties. This continues * to avoid temporary/default/local properties intentionally without * breaking received datasets. */ if (src != ZPROP_SRC_LOCAL && src != ZPROP_SRC_RECEIVED) return (ZPROP_CONT); /* Augment mountpoint with altroot, if needed */ val = pval; if (prop == ZFS_PROP_MOUNTPOINT) val = be_mountpoint_augmented(dccb->lbh, val); nvlist_add_string(dccb->props, zfs_prop_to_name(prop), val); return (ZPROP_CONT); } /* * Return the corresponding boot environment path for a given * dataset path, the constructed path is placed in 'result'. * * example: say our new boot environment name is 'bootenv' and * the dataset path is 'zroot/ROOT/default/data/set'. * * result should produce: 'zroot/ROOT/bootenv/data/set' */ static int be_get_path(struct libbe_deep_clone *ldc, const char *dspath, char *result, int result_size) { char *pos; char *child_dataset; /* match the root path for the boot environments */ pos = strstr(dspath, ldc->lbh->root); /* no match, different pools? */ if (pos == NULL) return (BE_ERR_BADPATH); /* root path of the new boot environment */ snprintf(result, result_size, "%s/%s", ldc->lbh->root, ldc->bename); /* gets us to the parent dataset, the +1 consumes a trailing slash */ pos += strlen(ldc->lbh->root) + 1; /* skip the parent dataset */ if ((child_dataset = strchr(pos, '/')) != NULL) strlcat(result, child_dataset, result_size); return (BE_ERR_SUCCESS); } static int be_clone_cb(zfs_handle_t *ds, void *data) { int err; char be_path[BE_MAXPATHLEN]; char snap_path[BE_MAXPATHLEN]; const char *dspath; zfs_handle_t *snap_hdl; nvlist_t *props; struct libbe_deep_clone *ldc; struct libbe_dccb dccb; ldc = (struct libbe_deep_clone *)data; dspath = zfs_get_name(ds); snprintf(snap_path, sizeof(snap_path), "%s@%s", dspath, ldc->snapname); /* construct the boot environment path from the dataset we're cloning */ if (be_get_path(ldc, dspath, be_path, sizeof(be_path)) != BE_ERR_SUCCESS) return (set_error(ldc->lbh, BE_ERR_UNKNOWN)); /* the dataset to be created (i.e. the boot environment) already exists */ if (zfs_dataset_exists(ldc->lbh->lzh, be_path, ZFS_TYPE_DATASET)) return (set_error(ldc->lbh, BE_ERR_EXISTS)); /* no snapshot found for this dataset, silently skip it */ if (!zfs_dataset_exists(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT)) return (0); if ((snap_hdl = zfs_open(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT)) == NULL) return (set_error(ldc->lbh, BE_ERR_ZFSOPEN)); nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP); nvlist_add_string(props, "canmount", "noauto"); dccb.lbh = ldc->lbh; dccb.zhp = ds; dccb.props = props; if (zprop_iter(be_deep_clone_prop, &dccb, B_FALSE, B_FALSE, ZFS_TYPE_FILESYSTEM) == ZPROP_INVAL) return (-1); if ((err = zfs_clone(snap_hdl, be_path, props)) != 0) return (set_error(ldc->lbh, BE_ERR_ZFSCLONE)); nvlist_free(props); zfs_close(snap_hdl); if (ldc->depth_limit == -1 || ldc->depth < ldc->depth_limit) { ldc->depth++; err = zfs_iter_filesystems(ds, be_clone_cb, ldc); ldc->depth--; } return (set_error(ldc->lbh, err)); } /* * Create a boot environment with a given name from a given snapshot. * Snapshots can be in the format 'zroot/ROOT/default@snapshot' or * 'default@snapshot'. In the latter case, 'default@snapshot' will be prepended * with the root path that libbe was initailized with. */ static int be_clone(libbe_handle_t *lbh, const char *bename, const char *snapshot, int depth) { int err; char snap_path[BE_MAXPATHLEN]; char *parentname, *snapname; zfs_handle_t *parent_hdl; struct libbe_deep_clone ldc; /* ensure the boot environment name is valid */ if ((err = be_validate_name(lbh, bename)) != 0) return (set_error(lbh, err)); /* * prepend the boot environment root path if we're * given a partial snapshot name. */ if ((err = be_root_concat(lbh, snapshot, snap_path)) != 0) return (set_error(lbh, err)); /* ensure the snapshot exists */ if ((err = be_validate_snap(lbh, snap_path)) != 0) return (set_error(lbh, err)); /* get a copy of the snapshot path so we can disect it */ if ((parentname = strdup(snap_path)) == NULL) return (set_error(lbh, BE_ERR_UNKNOWN)); /* split dataset name from snapshot name */ snapname = strchr(parentname, '@'); if (snapname == NULL) { free(parentname); return (set_error(lbh, BE_ERR_UNKNOWN)); } *snapname = '\0'; snapname++; /* set-up the boot environment */ ldc.lbh = lbh; ldc.bename = bename; ldc.snapname = snapname; ldc.depth = 0; ldc.depth_limit = depth; /* the boot environment will be cloned from this dataset */ parent_hdl = zfs_open(lbh->lzh, parentname, ZFS_TYPE_DATASET); /* create the boot environment */ err = be_clone_cb(parent_hdl, &ldc); free(parentname); return (set_error(lbh, err)); } /* * Create a boot environment from pre-existing snapshot, specifying a depth. */ int be_create_depth(libbe_handle_t *lbh, const char *bename, const char *snap, int depth) { return (be_clone(lbh, bename, snap, depth)); } /* * Create the boot environment from pre-existing snapshot */ int be_create_from_existing_snap(libbe_handle_t *lbh, const char *bename, const char *snap) { return (be_clone(lbh, bename, snap, -1)); } /* * Create a boot environment from an existing boot environment */ int be_create_from_existing(libbe_handle_t *lbh, const char *bename, const char *old) { int err; char snap[BE_MAXPATHLEN]; if ((err = be_snapshot(lbh, old, NULL, true, snap)) != 0) return (set_error(lbh, err)); err = be_clone(lbh, bename, snap, -1); return (set_error(lbh, err)); } /* * Verifies that a snapshot has a valid name, exists, and has a mountpoint of * '/'. Returns BE_ERR_SUCCESS (0), upon success, or the relevant BE_ERR_* upon * failure. Does not set the internal library error state. */ int be_validate_snap(libbe_handle_t *lbh, const char *snap_name) { if (strlen(snap_name) >= BE_MAXPATHLEN) return (BE_ERR_PATHLEN); if (!zfs_name_valid(snap_name, ZFS_TYPE_SNAPSHOT)) return (BE_ERR_INVALIDNAME); if (!zfs_dataset_exists(lbh->lzh, snap_name, ZFS_TYPE_SNAPSHOT)) return (BE_ERR_NOENT); return (BE_ERR_SUCCESS); } /* * Idempotently appends the name argument to the root boot environment path * and copies the resulting string into the result buffer (which is assumed * to be at least BE_MAXPATHLEN characters long. Returns BE_ERR_SUCCESS upon * success, BE_ERR_PATHLEN if the resulting path is longer than BE_MAXPATHLEN, * or BE_ERR_INVALIDNAME if the name is a path that does not begin with * zfs_be_root. Does not set internal library error state. */ int be_root_concat(libbe_handle_t *lbh, const char *name, char *result) { size_t name_len, root_len; name_len = strlen(name); root_len = strlen(lbh->root); /* Act idempotently; return be name if it is already a full path */ if (strrchr(name, '/') != NULL) { if (strstr(name, lbh->root) != name) return (BE_ERR_INVALIDNAME); if (name_len >= BE_MAXPATHLEN) return (BE_ERR_PATHLEN); strlcpy(result, name, BE_MAXPATHLEN); return (BE_ERR_SUCCESS); } else if (name_len + root_len + 1 < BE_MAXPATHLEN) { snprintf(result, BE_MAXPATHLEN, "%s/%s", lbh->root, name); return (BE_ERR_SUCCESS); } return (BE_ERR_PATHLEN); } /* * Verifies the validity of a boot environment name (A-Za-z0-9-_.). Returns * BE_ERR_SUCCESS (0) if name is valid, otherwise returns BE_ERR_INVALIDNAME * or BE_ERR_PATHLEN. * Does not set internal library error state. */ int be_validate_name(libbe_handle_t *lbh, const char *name) { /* * Impose the additional restriction that the entire dataset name must * not exceed the maximum length of a dataset, i.e. MAXNAMELEN. */ if (strlen(lbh->root) + 1 + strlen(name) > MAXNAMELEN) return (BE_ERR_PATHLEN); if (!zfs_name_valid(name, ZFS_TYPE_DATASET)) return (BE_ERR_INVALIDNAME); return (BE_ERR_SUCCESS); } /* * usage */ int be_rename(libbe_handle_t *lbh, const char *old, const char *new) { char full_old[BE_MAXPATHLEN]; char full_new[BE_MAXPATHLEN]; zfs_handle_t *zfs_hdl; int err; /* * be_validate_name is documented not to set error state, so we should * do so here. */ if ((err = be_validate_name(lbh, new)) != 0) return (set_error(lbh, err)); if ((err = be_root_concat(lbh, old, full_old)) != 0) return (set_error(lbh, err)); if ((err = be_root_concat(lbh, new, full_new)) != 0) return (set_error(lbh, err)); if (!zfs_dataset_exists(lbh->lzh, full_old, ZFS_TYPE_DATASET)) return (set_error(lbh, BE_ERR_NOENT)); if (zfs_dataset_exists(lbh->lzh, full_new, ZFS_TYPE_DATASET)) return (set_error(lbh, BE_ERR_EXISTS)); if ((zfs_hdl = zfs_open(lbh->lzh, full_old, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); /* recurse, nounmount, forceunmount */ struct renameflags flags = { .nounmount = 1, }; err = zfs_rename(zfs_hdl, NULL, full_new, flags); zfs_close(zfs_hdl); if (err != 0) return (set_error(lbh, BE_ERR_UNKNOWN)); return (0); } int be_export(libbe_handle_t *lbh, const char *bootenv, int fd) { char snap_name[BE_MAXPATHLEN]; char buf[BE_MAXPATHLEN]; zfs_handle_t *zfs; int err; if ((err = be_snapshot(lbh, bootenv, NULL, true, snap_name)) != 0) /* Use the error set by be_snapshot */ return (err); be_root_concat(lbh, snap_name, buf); if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_DATASET)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); err = zfs_send_one(zfs, NULL, fd, 0); zfs_close(zfs); return (err); } int be_import(libbe_handle_t *lbh, const char *bootenv, int fd) { char buf[BE_MAXPATHLEN]; nvlist_t *props; zfs_handle_t *zfs; recvflags_t flags = { .nomount = 1 }; int err; be_root_concat(lbh, bootenv, buf); if ((err = zfs_receive(lbh->lzh, buf, NULL, &flags, fd, NULL)) != 0) { switch (err) { case EINVAL: return (set_error(lbh, BE_ERR_NOORIGIN)); case ENOENT: return (set_error(lbh, BE_ERR_NOENT)); case EIO: return (set_error(lbh, BE_ERR_IO)); default: return (set_error(lbh, BE_ERR_UNKNOWN)); } } if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP); nvlist_add_string(props, "canmount", "noauto"); - nvlist_add_string(props, "mountpoint", "/"); + nvlist_add_string(props, "mountpoint", "none"); err = zfs_prop_set_list(zfs, props); nvlist_free(props); zfs_close(zfs); if (err != 0) return (set_error(lbh, BE_ERR_UNKNOWN)); return (0); } #if SOON static int be_create_child_noent(libbe_handle_t *lbh, const char *active, const char *child_path) { nvlist_t *props; zfs_handle_t *zfs; int err; nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP); nvlist_add_string(props, "canmount", "noauto"); nvlist_add_string(props, "mountpoint", child_path); /* Create */ if ((err = zfs_create(lbh->lzh, active, ZFS_TYPE_DATASET, props)) != 0) { switch (err) { case EZFS_EXISTS: return (set_error(lbh, BE_ERR_EXISTS)); case EZFS_NOENT: return (set_error(lbh, BE_ERR_NOENT)); case EZFS_BADTYPE: case EZFS_BADVERSION: return (set_error(lbh, BE_ERR_NOPOOL)); case EZFS_BADPROP: default: /* We set something up wrong, probably... */ return (set_error(lbh, BE_ERR_UNKNOWN)); } } nvlist_free(props); if ((zfs = zfs_open(lbh->lzh, active, ZFS_TYPE_DATASET)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); /* Set props */ if ((err = zfs_prop_set(zfs, "canmount", "noauto")) != 0) { zfs_close(zfs); /* * Similar to other cases, this shouldn't fail unless we've * done something wrong. This is a new dataset that shouldn't * have been mounted anywhere between creation and now. */ if (err == EZFS_NOMEM) return (set_error(lbh, BE_ERR_NOMEM)); return (set_error(lbh, BE_ERR_UNKNOWN)); } zfs_close(zfs); return (BE_ERR_SUCCESS); } static int be_create_child_cloned(libbe_handle_t *lbh, const char *active) { char buf[BE_MAXPATHLEN], tmp[BE_MAXPATHLEN];; zfs_handle_t *zfs; int err; /* XXX TODO ? */ /* * Establish if the existing path is a zfs dataset or just * the subdirectory of one */ strlcpy(tmp, "tmp/be_snap.XXXXX", sizeof(tmp)); if (mktemp(tmp) == NULL) return (set_error(lbh, BE_ERR_UNKNOWN)); be_root_concat(lbh, tmp, buf); printf("Here %s?\n", buf); if ((err = zfs_snapshot(lbh->lzh, buf, false, NULL)) != 0) { switch (err) { case EZFS_INVALIDNAME: return (set_error(lbh, BE_ERR_INVALIDNAME)); default: /* * The other errors that zfs_ioc_snapshot might return * shouldn't happen if we've set things up properly, so * we'll gloss over them and call it UNKNOWN as it will * require further triage. */ if (errno == ENOTSUP) return (set_error(lbh, BE_ERR_NOPOOL)); return (set_error(lbh, BE_ERR_UNKNOWN)); } } /* Clone */ if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_SNAPSHOT)) == NULL) return (BE_ERR_ZFSOPEN); if ((err = zfs_clone(zfs, active, NULL)) != 0) /* XXX TODO correct error */ return (set_error(lbh, BE_ERR_UNKNOWN)); /* set props */ zfs_close(zfs); return (BE_ERR_SUCCESS); } int be_add_child(libbe_handle_t *lbh, const char *child_path, bool cp_if_exists) { struct stat sb; char active[BE_MAXPATHLEN], buf[BE_MAXPATHLEN]; nvlist_t *props; const char *s; /* Require absolute paths */ if (*child_path != '/') return (set_error(lbh, BE_ERR_BADPATH)); strlcpy(active, be_active_path(lbh), BE_MAXPATHLEN); strcpy(buf, active); /* Create non-mountable parent dataset(s) */ s = child_path; for (char *p; (p = strchr(s+1, '/')) != NULL; s = p) { size_t len = p - s; strncat(buf, s, len); nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP); nvlist_add_string(props, "canmount", "off"); nvlist_add_string(props, "mountpoint", "none"); zfs_create(lbh->lzh, buf, ZFS_TYPE_DATASET, props); nvlist_free(props); } /* Path does not exist as a descendent of / yet */ if (strlcat(active, child_path, BE_MAXPATHLEN) >= BE_MAXPATHLEN) return (set_error(lbh, BE_ERR_PATHLEN)); if (stat(child_path, &sb) != 0) { /* Verify that error is ENOENT */ if (errno != ENOENT) return (set_error(lbh, BE_ERR_UNKNOWN)); return (be_create_child_noent(lbh, active, child_path)); } else if (cp_if_exists) /* Path is already a descendent of / and should be copied */ return (be_create_child_cloned(lbh, active)); return (set_error(lbh, BE_ERR_EXISTS)); } #endif /* SOON */ static int be_set_nextboot(libbe_handle_t *lbh, nvlist_t *config, uint64_t pool_guid, const char *zfsdev) { nvlist_t **child; uint64_t vdev_guid; int c, children; if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; ++c) if (be_set_nextboot(lbh, child[c], pool_guid, zfsdev) != 0) return (1); return (0); } if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) != 0) { return (1); } if (zpool_nextboot(lbh->lzh, pool_guid, vdev_guid, zfsdev) != 0) { perror("ZFS_IOC_NEXTBOOT failed"); return (1); } return (0); } /* * Deactivate old BE dataset; currently just sets canmount=noauto */ static int be_deactivate(libbe_handle_t *lbh, const char *ds) { zfs_handle_t *zfs; if ((zfs = zfs_open(lbh->lzh, ds, ZFS_TYPE_DATASET)) == NULL) return (1); if (zfs_prop_set(zfs, "canmount", "noauto") != 0) return (1); zfs_close(zfs); return (0); } int be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary) { char be_path[BE_MAXPATHLEN]; char buf[BE_MAXPATHLEN]; nvlist_t *config, *dsprops, *vdevs; char *origin; uint64_t pool_guid; zfs_handle_t *zhp; int err; be_root_concat(lbh, bootenv, be_path); /* Note: be_exists fails if mountpoint is not / */ if ((err = be_exists(lbh, be_path)) != 0) return (set_error(lbh, err)); if (temporary) { config = zpool_get_config(lbh->active_phandle, NULL); if (config == NULL) /* config should be fetchable... */ return (set_error(lbh, BE_ERR_UNKNOWN)); if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) /* Similarly, it shouldn't be possible */ return (set_error(lbh, BE_ERR_UNKNOWN)); /* Expected format according to zfsbootcfg(8) man */ snprintf(buf, sizeof(buf), "zfs:%s:", be_path); /* We have no config tree */ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdevs) != 0) return (set_error(lbh, BE_ERR_NOPOOL)); return (be_set_nextboot(lbh, vdevs, pool_guid, buf)); } else { if (be_deactivate(lbh, lbh->bootfs) != 0) return (-1); /* Obtain bootenv zpool */ err = zpool_set_prop(lbh->active_phandle, "bootfs", be_path); if (err) return (-1); zhp = zfs_open(lbh->lzh, be_path, ZFS_TYPE_FILESYSTEM); if (zhp == NULL) return (-1); if (be_prop_list_alloc(&dsprops) != 0) return (-1); if (be_get_dataset_props(lbh, be_path, dsprops) != 0) { nvlist_free(dsprops); return (-1); } if (nvlist_lookup_string(dsprops, "origin", &origin) == 0) err = zfs_promote(zhp); nvlist_free(dsprops); zfs_close(zhp); if (err) return (-1); } return (BE_ERR_SUCCESS); } Index: projects/runtime-coverage-v2/lib/libbe/be_access.c =================================================================== --- projects/runtime-coverage-v2/lib/libbe/be_access.c (revision 347075) +++ projects/runtime-coverage-v2/lib/libbe/be_access.c (revision 347076) @@ -1,316 +1,335 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Kyle J. Kneitinger * Copyright (c) 2018 Kyle Evans * Copyright (c) 2019 Wes Maag * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "be.h" #include "be_impl.h" struct be_mountcheck_info { const char *path; char *name; }; struct be_mount_info { libbe_handle_t *lbh; const char *be; const char *mountpoint; int mntflags; int deepmount; + int depth; }; static int be_mountcheck_cb(zfs_handle_t *zfs_hdl, void *data) { struct be_mountcheck_info *info; char *mountpoint; if (data == NULL) return (1); info = (struct be_mountcheck_info *)data; if (!zfs_is_mounted(zfs_hdl, &mountpoint)) return (0); if (strcmp(mountpoint, info->path) == 0) { info->name = strdup(zfs_get_name(zfs_hdl)); free(mountpoint); return (1); } free(mountpoint); return (0); } /* * Called from be_mount, uses the given zfs_handle and attempts to * mount it at the passed mountpoint. If the deepmount flag is set, continue * calling the function for each child dataset. */ static int be_mount_iter(zfs_handle_t *zfs_hdl, void *data) { int err; char *mountpoint; char tmp[BE_MAXPATHLEN], zfs_mnt[BE_MAXPATHLEN]; struct be_mount_info *info; + char opt; info = (struct be_mount_info *)data; if (zfs_is_mounted(zfs_hdl, &mountpoint)) { free(mountpoint); return (0); } if (zfs_prop_get_int(zfs_hdl, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF) return (0); if (zfs_prop_get(zfs_hdl, ZFS_PROP_MOUNTPOINT, zfs_mnt, BE_MAXPATHLEN, NULL, NULL, 0, 1)) return (1); - if (strcmp("none", zfs_mnt) != 0) { - char opt = '\0'; + if (strcmp("none", zfs_mnt) == 0) { + /* + * mountpoint=none; we'll mount it at info->mountpoint assuming + * we're at the root. If we're not at the root... that's less + * than stellar and not entirely sure what to do with that. + * For now, we won't treat it as an error condition -- we just + * won't mount it, and we'll continue on. + */ + if (info->depth > 0) + return (0); + snprintf(tmp, BE_MAXPATHLEN, "%s", info->mountpoint); + } else { mountpoint = be_mountpoint_augmented(info->lbh, zfs_mnt); - snprintf(tmp, BE_MAXPATHLEN, "%s%s", info->mountpoint, mountpoint); + } - if ((err = zmount(zfs_get_name(zfs_hdl), tmp, info->mntflags, - __DECONST(char *, MNTTYPE_ZFS), NULL, 0, &opt, 1)) != 0) { - switch (errno) { - case ENAMETOOLONG: - return (set_error(info->lbh, BE_ERR_PATHLEN)); - case ELOOP: - case ENOENT: - case ENOTDIR: - return (set_error(info->lbh, BE_ERR_BADPATH)); - case EPERM: - return (set_error(info->lbh, BE_ERR_PERMS)); - case EBUSY: - return (set_error(info->lbh, BE_ERR_PATHBUSY)); - default: - return (set_error(info->lbh, BE_ERR_UNKNOWN)); - } + opt = '\0'; + if ((err = zmount(zfs_get_name(zfs_hdl), tmp, info->mntflags, + __DECONST(char *, MNTTYPE_ZFS), NULL, 0, &opt, 1)) != 0) { + switch (errno) { + case ENAMETOOLONG: + return (set_error(info->lbh, BE_ERR_PATHLEN)); + case ELOOP: + case ENOENT: + case ENOTDIR: + return (set_error(info->lbh, BE_ERR_BADPATH)); + case EPERM: + return (set_error(info->lbh, BE_ERR_PERMS)); + case EBUSY: + return (set_error(info->lbh, BE_ERR_PATHBUSY)); + default: + return (set_error(info->lbh, BE_ERR_UNKNOWN)); } } if (!info->deepmount) return (0); - return (zfs_iter_filesystems(zfs_hdl, be_mount_iter, info)); + ++info->depth; + err = zfs_iter_filesystems(zfs_hdl, be_mount_iter, info); + --info->depth; + return (err); } static int be_umount_iter(zfs_handle_t *zfs_hdl, void *data) { int err; char *mountpoint; struct be_mount_info *info; info = (struct be_mount_info *)data; + ++info->depth; if((err = zfs_iter_filesystems(zfs_hdl, be_umount_iter, info)) != 0) { return (err); } + --info->depth; if (!zfs_is_mounted(zfs_hdl, &mountpoint)) { return (0); } free(mountpoint); if (zfs_unmount(zfs_hdl, NULL, info->mntflags) != 0) { switch (errno) { case ENAMETOOLONG: return (set_error(info->lbh, BE_ERR_PATHLEN)); case ELOOP: case ENOENT: case ENOTDIR: return (set_error(info->lbh, BE_ERR_BADPATH)); case EPERM: return (set_error(info->lbh, BE_ERR_PERMS)); case EBUSY: return (set_error(info->lbh, BE_ERR_PATHBUSY)); default: return (set_error(info->lbh, BE_ERR_UNKNOWN)); } } return (0); } /* * usage */ int be_mounted_at(libbe_handle_t *lbh, const char *path, nvlist_t *details) { char be[BE_MAXPATHLEN]; zfs_handle_t *root_hdl; struct be_mountcheck_info info; prop_data_t propinfo; bzero(&be, BE_MAXPATHLEN); if ((root_hdl = zfs_open(lbh->lzh, lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); info.path = path; info.name = NULL; zfs_iter_filesystems(root_hdl, be_mountcheck_cb, &info); zfs_close(root_hdl); if (info.name != NULL) { if (details != NULL) { if ((root_hdl = zfs_open(lbh->lzh, lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) { free(info.name); return (BE_ERR_ZFSOPEN); } propinfo.lbh = lbh; propinfo.list = details; propinfo.single_object = false; prop_list_builder_cb(root_hdl, &propinfo); zfs_close(root_hdl); } free(info.name); return (0); } return (1); } /* * usage */ int be_mount(libbe_handle_t *lbh, char *bootenv, char *mountpoint, int flags, char *result_loc) { char be[BE_MAXPATHLEN]; char mnt_temp[BE_MAXPATHLEN]; int mntflags, mntdeep; int err; struct be_mount_info info; zfs_handle_t *zhdl; if ((err = be_root_concat(lbh, bootenv, be)) != 0) return (set_error(lbh, err)); if ((err = be_exists(lbh, bootenv)) != 0) return (set_error(lbh, err)); if (is_mounted(lbh->lzh, be, NULL)) return (set_error(lbh, BE_ERR_MOUNTED)); mntdeep = (flags & BE_MNT_DEEP) ? 1 : 0; mntflags = (flags & BE_MNT_FORCE) ? MNT_FORCE : 0; /* Create mountpoint if it is not specified */ if (mountpoint == NULL) { strlcpy(mnt_temp, "/tmp/be_mount.XXXX", sizeof(mnt_temp)); if (mkdtemp(mnt_temp) == NULL) return (set_error(lbh, BE_ERR_IO)); } if ((zhdl = zfs_open(lbh->lzh, be, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); info.lbh = lbh; info.be = be; info.mountpoint = (mountpoint == NULL) ? mnt_temp : mountpoint; info.mntflags = mntflags; info.deepmount = mntdeep; + info.depth = 0; if((err = be_mount_iter(zhdl, &info) != 0)) { zfs_close(zhdl); return (err); } zfs_close(zhdl); if (result_loc != NULL) strlcpy(result_loc, mountpoint == NULL ? mnt_temp : mountpoint, BE_MAXPATHLEN); return (BE_ERR_SUCCESS); } /* * usage */ int be_unmount(libbe_handle_t *lbh, char *bootenv, int flags) { int err; char be[BE_MAXPATHLEN]; zfs_handle_t *root_hdl; struct be_mount_info info; if ((err = be_root_concat(lbh, bootenv, be)) != 0) return (set_error(lbh, err)); if ((root_hdl = zfs_open(lbh->lzh, be, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); info.lbh = lbh; info.be = be; info.mountpoint = NULL; info.mntflags = (flags & BE_MNT_FORCE) ? MS_FORCE : 0; + info.depth = 0; if ((err = be_umount_iter(root_hdl, &info)) != 0) { zfs_close(root_hdl); return (err); } zfs_close(root_hdl); return (BE_ERR_SUCCESS); } /* * This function will blow away the input buffer as needed if we're discovered * to be looking at a root-mount. If the mountpoint is naturally beyond the * root, however, the buffer may be left intact and a pointer to the section * past altroot will be returned instead for the caller's perusal. */ char * be_mountpoint_augmented(libbe_handle_t *lbh, char *mountpoint) { if (lbh->altroot_len == 0) return (mountpoint); if (mountpoint == NULL || *mountpoint == '\0') return (mountpoint); if (mountpoint[lbh->altroot_len] == '\0') { *(mountpoint + 1) = '\0'; return (mountpoint); } else return (mountpoint + lbh->altroot_len); } Index: projects/runtime-coverage-v2/lib/libc/gen/directory.3 =================================================================== --- projects/runtime-coverage-v2/lib/libc/gen/directory.3 (revision 347075) +++ projects/runtime-coverage-v2/lib/libc/gen/directory.3 (revision 347076) @@ -1,309 +1,333 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)directory.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd May 22, 2017 +.Dd April 30, 2019 .Dt DIRECTORY 3 .Os .Sh NAME .Nm opendir , .Nm fdopendir , .Nm readdir , .Nm readdir_r , .Nm telldir , .Nm seekdir , .Nm rewinddir , .Nm closedir , .Nm fdclosedir , .Nm dirfd .Nd directory operations .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In dirent.h .Ft DIR * .Fn opendir "const char *filename" .Ft DIR * .Fn fdopendir "int fd" .Ft struct dirent * .Fn readdir "DIR *dirp" .Ft int .Fn readdir_r "DIR *dirp" "struct dirent *entry" "struct dirent **result" .Ft long .Fn telldir "DIR *dirp" .Ft void .Fn seekdir "DIR *dirp" "long loc" .Ft void .Fn rewinddir "DIR *dirp" .Ft int .Fn closedir "DIR *dirp" .Ft int .Fn fdclosedir "DIR *dirp" .Ft int .Fn dirfd "DIR *dirp" .Sh DESCRIPTION .Bf -symbolic The .Fn readdir_r interface is deprecated because it cannot be used correctly unless .Brq Va NAME_MAX is a fixed value. .Ef .Pp The .Fn opendir function opens the directory named by .Fa filename , associates a .Em directory stream with it and returns a pointer to be used to identify the .Em directory stream in subsequent operations. The pointer .Dv NULL is returned if .Fa filename cannot be accessed, or if it cannot .Xr malloc 3 enough memory to hold the whole thing. .Pp The .Fn fdopendir function is equivalent to the .Fn opendir function except that the directory is specified by a file descriptor .Fa fd rather than by a name. The file offset associated with the file descriptor at the time of the call determines which entries are returned. .Pp Upon successful return from .Fn fdopendir , the file descriptor is under the control of the system, and if any attempt is made to close the file descriptor, or to modify the state of the associated description other than by means of .Fn closedir , .Fn readdir , .Fn readdir_r , or .Fn rewinddir , the behavior is undefined. Upon calling .Fn closedir the file descriptor is closed. The .Dv FD_CLOEXEC flag is set on the file descriptor by a successful call to .Fn fdopendir . .Pp The .Fn readdir function returns a pointer to the next directory entry. The directory entry remains valid until the next call to .Fn readdir or .Fn closedir on the same .Em directory stream . The function returns .Dv NULL upon reaching the end of the directory or on error. In the event of an error, .Va errno may be set to any of the values documented for the .Xr getdirentries 2 system call. .Pp The .Fn readdir_r function provides the same functionality as .Fn readdir , but the caller must provide a directory .Fa entry buffer to store the results in. The buffer must be large enough for a .Vt struct dirent with a .Va d_name array with .Brq Va NAME_MAX + 1 elements. If the read succeeds, .Fa result is pointed at the .Fa entry ; upon reaching the end of the directory .Fa result is set to .Dv NULL . The .Fn readdir_r function returns 0 on success or an error number to indicate failure. .Pp The .Fn telldir function returns a token representing the current location associated with the named .Em directory stream . Values returned by .Fn telldir are good only for the lifetime of the .Dv DIR pointer, .Fa dirp , from which they are derived. If the directory is closed and then reopened, prior values returned by .Fn telldir will no longer be valid. Values returned by .Fn telldir are also invalidated by a call to .Fn rewinddir . .Pp The .Fn seekdir function sets the position of the next .Fn readdir operation on the .Em directory stream . The new position reverts to the one associated with the .Em directory stream when the .Fn telldir operation was performed. .Pp The .Fn rewinddir function resets the position of the named .Em directory stream to the beginning of the directory. .Pp The .Fn closedir function closes the named .Em directory stream and frees the structure associated with the .Fa dirp pointer, returning 0 on success. On failure, \-1 is returned and the global variable .Va errno is set to indicate the error. .Pp The .Fn fdclosedir function is equivalent to the .Fn closedir function except that this function returns directory file descriptor instead of closing it. .Pp The .Fn dirfd function returns the integer file descriptor associated with the named .Em directory stream , see .Xr open 2 . .Pp Sample code which searches a directory for entry ``name'' is: .Bd -literal -offset indent dirp = opendir("."); if (dirp == NULL) return (ERROR); len = strlen(name); while ((dp = readdir(dirp)) != NULL) { if (dp->d_namlen == len && strcmp(dp->d_name, name) == 0) { (void)closedir(dirp); return (FOUND); } } (void)closedir(dirp); return (NOT_FOUND); .Ed .Sh SEE ALSO .Xr close 2 , .Xr lseek 2 , .Xr open 2 , .Xr read 2 , .Xr dir 5 +.Sh STANDARDS +The +.Fn closedir , +.Fn dirfd , +.Fn fdopendir , +.Fn opendir , +.Fn readdir , +.Fn readdir_r , +.Fn rewinddir , +.Fn seekdir +and +.Fn telldir +functions are expected to conform to +.St -p1003.1-2008 . +The +.Fn fdclosedir +function and the +.Fa d_off , +.Fa d_reclen +and +.Fa d_type +fields of +.Vt struct dirent +are non-standard, and should not be used in portable programs. .Sh HISTORY The .Fn opendir , .Fn readdir , .Fn telldir , .Fn seekdir , .Fn rewinddir , .Fn closedir , and .Fn dirfd functions appeared in .Bx 4.2 . The .Fn fdopendir function appeared in .Fx 8.0 . .Fn fdclosedir function appeared in .Fx 10.0 . .Sh BUGS The behaviour of .Fn telldir and .Fn seekdir is likely to be wrong if there are parallel unlinks happening and the directory is larger than one page. There is code to ensure that a .Fn seekdir to the location given by a .Fn telldir immediately before the last .Fn readdir will always set the correct location to return the same value as that last .Fn readdir performed. This is enough for some applications which want to "push back the last entry read", e.g., Samba. Seeks back to any other location, other than the beginning of the directory, may result in unexpected behaviour if deletes are present. It is hoped that this situation will be resolved with changes to .Fn getdirentries and the VFS. Index: projects/runtime-coverage-v2/lib/libfetch/common.c =================================================================== --- projects/runtime-coverage-v2/lib/libfetch/common.c (revision 347075) +++ projects/runtime-coverage-v2/lib/libfetch/common.c (revision 347076) @@ -1,1518 +1,1519 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1998-2016 Dag-Erling Smørgrav * Copyright (c) 2013 Michael Gmelin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WITH_SSL #include #endif #include "fetch.h" #include "common.h" /*** Local data **************************************************************/ /* * Error messages for resolver errors */ static struct fetcherr netdb_errlist[] = { #ifdef EAI_NODATA { EAI_NODATA, FETCH_RESOLV, "Host not found" }, #endif { EAI_AGAIN, FETCH_TEMP, "Transient resolver failure" }, { EAI_FAIL, FETCH_RESOLV, "Non-recoverable resolver failure" }, { EAI_NONAME, FETCH_RESOLV, "No address record" }, { -1, FETCH_UNKNOWN, "Unknown resolver error" } }; /* End-of-Line */ static const char ENDL[2] = "\r\n"; /*** Error-reporting functions ***********************************************/ /* * Map error code to string */ static struct fetcherr * fetch_finderr(struct fetcherr *p, int e) { while (p->num != -1 && p->num != e) p++; return (p); } /* * Set error code */ void fetch_seterr(struct fetcherr *p, int e) { p = fetch_finderr(p, e); fetchLastErrCode = p->cat; snprintf(fetchLastErrString, MAXERRSTRING, "%s", p->string); } /* * Set error code according to errno */ void fetch_syserr(void) { switch (errno) { case 0: fetchLastErrCode = FETCH_OK; break; case EPERM: case EACCES: case EROFS: case EAUTH: case ENEEDAUTH: fetchLastErrCode = FETCH_AUTH; break; case ENOENT: case EISDIR: /* XXX */ fetchLastErrCode = FETCH_UNAVAIL; break; case ENOMEM: fetchLastErrCode = FETCH_MEMORY; break; case EBUSY: case EAGAIN: fetchLastErrCode = FETCH_TEMP; break; case EEXIST: fetchLastErrCode = FETCH_EXISTS; break; case ENOSPC: fetchLastErrCode = FETCH_FULL; break; case EADDRINUSE: case EADDRNOTAVAIL: case ENETDOWN: case ENETUNREACH: case ENETRESET: case EHOSTUNREACH: fetchLastErrCode = FETCH_NETWORK; break; case ECONNABORTED: case ECONNRESET: fetchLastErrCode = FETCH_ABORT; break; case ETIMEDOUT: fetchLastErrCode = FETCH_TIMEOUT; break; case ECONNREFUSED: case EHOSTDOWN: fetchLastErrCode = FETCH_DOWN; break; default: fetchLastErrCode = FETCH_UNKNOWN; } snprintf(fetchLastErrString, MAXERRSTRING, "%s", strerror(errno)); } /* * Emit status message */ void fetch_info(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); } /*** Network-related utility functions ***************************************/ /* * Return the default port for a scheme */ int fetch_default_port(const char *scheme) { struct servent *se; if ((se = getservbyname(scheme, "tcp")) != NULL) return (ntohs(se->s_port)); if (strcmp(scheme, SCHEME_FTP) == 0) return (FTP_DEFAULT_PORT); if (strcmp(scheme, SCHEME_HTTP) == 0) return (HTTP_DEFAULT_PORT); return (0); } /* * Return the default proxy port for a scheme */ int fetch_default_proxy_port(const char *scheme) { if (strcmp(scheme, SCHEME_FTP) == 0) return (FTP_DEFAULT_PROXY_PORT); if (strcmp(scheme, SCHEME_HTTP) == 0) return (HTTP_DEFAULT_PROXY_PORT); return (0); } /* * Create a connection for an existing descriptor. */ conn_t * fetch_reopen(int sd) { conn_t *conn; int opt = 1; /* allocate and fill connection structure */ if ((conn = calloc(1, sizeof(*conn))) == NULL) return (NULL); fcntl(sd, F_SETFD, FD_CLOEXEC); setsockopt(sd, SOL_SOCKET, SO_NOSIGPIPE, &opt, sizeof opt); conn->sd = sd; ++conn->ref; return (conn); } /* * Bump a connection's reference count. */ conn_t * fetch_ref(conn_t *conn) { ++conn->ref; return (conn); } /* * Resolve an address */ struct addrinfo * fetch_resolve(const char *addr, int port, int af) { char hbuf[256], sbuf[8]; struct addrinfo hints, *res; const char *hb, *he, *sep; const char *host, *service; int err, len; /* first, check for a bracketed IPv6 address */ if (*addr == '[') { hb = addr + 1; if ((sep = strchr(hb, ']')) == NULL) { errno = EINVAL; goto syserr; } he = sep++; } else { hb = addr; sep = strchrnul(hb, ':'); he = sep; } /* see if we need to copy the host name */ if (*he != '\0') { len = snprintf(hbuf, sizeof(hbuf), "%.*s", (int)(he - hb), hb); if (len < 0) goto syserr; if (len >= (int)sizeof(hbuf)) { errno = ENAMETOOLONG; goto syserr; } host = hbuf; } else { host = hb; } /* was it followed by a service name? */ if (*sep == '\0' && port != 0) { if (port < 1 || port > 65535) { errno = EINVAL; goto syserr; } if (snprintf(sbuf, sizeof(sbuf), "%d", port) < 0) goto syserr; service = sbuf; } else if (*sep != '\0') { service = sep + 1; } else { service = NULL; } /* resolve */ memset(&hints, 0, sizeof(hints)); hints.ai_family = af; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_ADDRCONFIG; if ((err = getaddrinfo(host, service, &hints, &res)) != 0) { netdb_seterr(err); return (NULL); } return (res); syserr: fetch_syserr(); return (NULL); } /* * Bind a socket to a specific local address */ int fetch_bind(int sd, int af, const char *addr) { struct addrinfo *cliai, *ai; int err; if ((cliai = fetch_resolve(addr, 0, af)) == NULL) return (-1); for (ai = cliai; ai != NULL; ai = ai->ai_next) if ((err = bind(sd, ai->ai_addr, ai->ai_addrlen)) == 0) break; if (err != 0) fetch_syserr(); freeaddrinfo(cliai); return (err == 0 ? 0 : -1); } /* * Establish a TCP connection to the specified port on the specified host. */ conn_t * fetch_connect(const char *host, int port, int af, int verbose) { struct addrinfo *cais = NULL, *sais = NULL, *cai, *sai; const char *bindaddr; conn_t *conn = NULL; int err = 0, sd = -1; DEBUGF("---> %s:%d\n", host, port); /* resolve server address */ if (verbose) fetch_info("resolving server address: %s:%d", host, port); if ((sais = fetch_resolve(host, port, af)) == NULL) goto fail; /* resolve client address */ bindaddr = getenv("FETCH_BIND_ADDRESS"); if (bindaddr != NULL && *bindaddr != '\0') { if (verbose) fetch_info("resolving client address: %s", bindaddr); if ((cais = fetch_resolve(bindaddr, 0, af)) == NULL) goto fail; } /* try each server address in turn */ for (err = 0, sai = sais; sai != NULL; sai = sai->ai_next) { /* open socket */ if ((sd = socket(sai->ai_family, SOCK_STREAM, 0)) < 0) goto syserr; /* attempt to bind to client address */ for (err = 0, cai = cais; cai != NULL; cai = cai->ai_next) { if (cai->ai_family != sai->ai_family) continue; if ((err = bind(sd, cai->ai_addr, cai->ai_addrlen)) == 0) break; } if (err != 0) { if (verbose) fetch_info("failed to bind to %s", bindaddr); goto syserr; } /* attempt to connect to server address */ if ((err = connect(sd, sai->ai_addr, sai->ai_addrlen)) == 0) break; /* clean up before next attempt */ close(sd); sd = -1; } if (err != 0) { if (verbose) fetch_info("failed to connect to %s:%d", host, port); goto syserr; } if ((conn = fetch_reopen(sd)) == NULL) goto syserr; if (cais != NULL) freeaddrinfo(cais); if (sais != NULL) freeaddrinfo(sais); return (conn); syserr: fetch_syserr(); goto fail; fail: if (sd >= 0) close(sd); if (cais != NULL) freeaddrinfo(cais); if (sais != NULL) freeaddrinfo(sais); return (NULL); } #ifdef WITH_SSL /* * Convert characters A-Z to lowercase (intentionally avoid any locale * specific conversions). */ static char fetch_ssl_tolower(char in) { if (in >= 'A' && in <= 'Z') return (in + 32); else return (in); } /* * isalpha implementation that intentionally avoids any locale specific * conversions. */ static int fetch_ssl_isalpha(char in) { return ((in >= 'A' && in <= 'Z') || (in >= 'a' && in <= 'z')); } /* * Check if passed hostnames a and b are equal. */ static int fetch_ssl_hname_equal(const char *a, size_t alen, const char *b, size_t blen) { size_t i; if (alen != blen) return (0); for (i = 0; i < alen; ++i) { if (fetch_ssl_tolower(a[i]) != fetch_ssl_tolower(b[i])) return (0); } return (1); } /* * Check if domain label is traditional, meaning that only A-Z, a-z, 0-9 * and '-' (hyphen) are allowed. Hyphens have to be surrounded by alpha- * numeric characters. Double hyphens (like they're found in IDN a-labels * 'xn--') are not allowed. Empty labels are invalid. */ static int fetch_ssl_is_trad_domain_label(const char *l, size_t len, int wcok) { size_t i; if (!len || l[0] == '-' || l[len-1] == '-') return (0); for (i = 0; i < len; ++i) { if (!isdigit(l[i]) && !fetch_ssl_isalpha(l[i]) && !(l[i] == '*' && wcok) && !(l[i] == '-' && l[i - 1] != '-')) return (0); } return (1); } /* * Check if host name consists only of numbers. This might indicate an IP * address, which is not a good idea for CN wildcard comparison. */ static int fetch_ssl_hname_is_only_numbers(const char *hostname, size_t len) { size_t i; for (i = 0; i < len; ++i) { if (!((hostname[i] >= '0' && hostname[i] <= '9') || hostname[i] == '.')) return (0); } return (1); } /* * Check if the host name h passed matches the pattern passed in m which * is usually part of subjectAltName or CN of a certificate presented to * the client. This includes wildcard matching. The algorithm is based on * RFC6125, sections 6.4.3 and 7.2, which clarifies RFC2818 and RFC3280. */ static int fetch_ssl_hname_match(const char *h, size_t hlen, const char *m, size_t mlen) { int delta, hdotidx, mdot1idx, wcidx; const char *hdot, *mdot1, *mdot2; const char *wc; /* wildcard */ if (!(h && *h && m && *m)) return (0); if ((wc = strnstr(m, "*", mlen)) == NULL) return (fetch_ssl_hname_equal(h, hlen, m, mlen)); wcidx = wc - m; /* hostname should not be just dots and numbers */ if (fetch_ssl_hname_is_only_numbers(h, hlen)) return (0); /* only one wildcard allowed in pattern */ if (strnstr(wc + 1, "*", mlen - wcidx - 1) != NULL) return (0); /* * there must be at least two more domain labels and * wildcard has to be in the leftmost label (RFC6125) */ mdot1 = strnstr(m, ".", mlen); if (mdot1 == NULL || mdot1 < wc || (mlen - (mdot1 - m)) < 4) return (0); mdot1idx = mdot1 - m; mdot2 = strnstr(mdot1 + 1, ".", mlen - mdot1idx - 1); if (mdot2 == NULL || (mlen - (mdot2 - m)) < 2) return (0); /* hostname must contain a dot and not be the 1st char */ hdot = strnstr(h, ".", hlen); if (hdot == NULL || hdot == h) return (0); hdotidx = hdot - h; /* * host part of hostname must be at least as long as * pattern it's supposed to match */ if (hdotidx < mdot1idx) return (0); /* * don't allow wildcards in non-traditional domain names * (IDN, A-label, U-label...) */ if (!fetch_ssl_is_trad_domain_label(h, hdotidx, 0) || !fetch_ssl_is_trad_domain_label(m, mdot1idx, 1)) return (0); /* match domain part (part after first dot) */ if (!fetch_ssl_hname_equal(hdot, hlen - hdotidx, mdot1, mlen - mdot1idx)) return (0); /* match part left of wildcard */ if (!fetch_ssl_hname_equal(h, wcidx, m, wcidx)) return (0); /* match part right of wildcard */ delta = mdot1idx - wcidx - 1; if (!fetch_ssl_hname_equal(hdot - delta, delta, mdot1 - delta, delta)) return (0); /* all tests succeeded, it's a match */ return (1); } /* * Get numeric host address info - returns NULL if host was not an IP * address. The caller is responsible for deallocation using * freeaddrinfo(3). */ static struct addrinfo * fetch_ssl_get_numeric_addrinfo(const char *hostname, size_t len) { struct addrinfo hints, *res; char *host; host = (char *)malloc(len + 1); memcpy(host, hostname, len); host[len] = '\0'; memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = 0; hints.ai_flags = AI_NUMERICHOST; /* port is not relevant for this purpose */ if (getaddrinfo(host, "443", &hints, &res) != 0) res = NULL; free(host); return res; } /* * Compare ip address in addrinfo with address passes. */ static int fetch_ssl_ipaddr_match_bin(const struct addrinfo *lhost, const char *rhost, size_t rhostlen) { const void *left; if (lhost->ai_family == AF_INET && rhostlen == 4) { left = (void *)&((struct sockaddr_in*)(void *) lhost->ai_addr)->sin_addr.s_addr; #ifdef INET6 } else if (lhost->ai_family == AF_INET6 && rhostlen == 16) { left = (void *)&((struct sockaddr_in6 *)(void *) lhost->ai_addr)->sin6_addr; #endif } else return (0); return (!memcmp(left, (const void *)rhost, rhostlen) ? 1 : 0); } /* * Compare ip address in addrinfo with host passed. If host is not an IP * address, comparison will fail. */ static int fetch_ssl_ipaddr_match(const struct addrinfo *laddr, const char *r, size_t rlen) { struct addrinfo *raddr; int ret; char *rip; ret = 0; if ((raddr = fetch_ssl_get_numeric_addrinfo(r, rlen)) == NULL) return 0; /* not a numeric host */ if (laddr->ai_family == raddr->ai_family) { if (laddr->ai_family == AF_INET) { rip = (char *)&((struct sockaddr_in *)(void *) raddr->ai_addr)->sin_addr.s_addr; ret = fetch_ssl_ipaddr_match_bin(laddr, rip, 4); #ifdef INET6 } else if (laddr->ai_family == AF_INET6) { rip = (char *)&((struct sockaddr_in6 *)(void *) raddr->ai_addr)->sin6_addr; ret = fetch_ssl_ipaddr_match_bin(laddr, rip, 16); #endif } } freeaddrinfo(raddr); return (ret); } /* * Verify server certificate by subjectAltName. */ static int fetch_ssl_verify_altname(STACK_OF(GENERAL_NAME) *altnames, const char *host, struct addrinfo *ip) { const GENERAL_NAME *name; size_t nslen; int i; const char *ns; for (i = 0; i < sk_GENERAL_NAME_num(altnames); ++i) { #if OPENSSL_VERSION_NUMBER < 0x10000000L /* * This is a workaround, since the following line causes * alignment issues in clang: * name = sk_GENERAL_NAME_value(altnames, i); * OpenSSL explicitly warns not to use those macros * directly, but there isn't much choice (and there * shouldn't be any ill side effects) */ name = (GENERAL_NAME *)SKM_sk_value(void, altnames, i); #else name = sk_GENERAL_NAME_value(altnames, i); #endif #if OPENSSL_VERSION_NUMBER < 0x10100000L ns = (const char *)ASN1_STRING_data(name->d.ia5); #else ns = (const char *)ASN1_STRING_get0_data(name->d.ia5); #endif nslen = (size_t)ASN1_STRING_length(name->d.ia5); if (name->type == GEN_DNS && ip == NULL && fetch_ssl_hname_match(host, strlen(host), ns, nslen)) return (1); else if (name->type == GEN_IPADD && ip != NULL && fetch_ssl_ipaddr_match_bin(ip, ns, nslen)) return (1); } return (0); } /* * Verify server certificate by CN. */ static int fetch_ssl_verify_cn(X509_NAME *subject, const char *host, struct addrinfo *ip) { ASN1_STRING *namedata; X509_NAME_ENTRY *nameentry; int cnlen, lastpos, loc, ret; unsigned char *cn; ret = 0; lastpos = -1; loc = -1; cn = NULL; /* get most specific CN (last entry in list) and compare */ while ((lastpos = X509_NAME_get_index_by_NID(subject, NID_commonName, lastpos)) != -1) loc = lastpos; if (loc > -1) { nameentry = X509_NAME_get_entry(subject, loc); namedata = X509_NAME_ENTRY_get_data(nameentry); cnlen = ASN1_STRING_to_UTF8(&cn, namedata); if (ip == NULL && fetch_ssl_hname_match(host, strlen(host), cn, cnlen)) ret = 1; else if (ip != NULL && fetch_ssl_ipaddr_match(ip, cn, cnlen)) ret = 1; OPENSSL_free(cn); } return (ret); } /* * Verify that server certificate subjectAltName/CN matches * hostname. First check, if there are alternative subject names. If yes, * those have to match. Only if those don't exist it falls back to * checking the subject's CN. */ static int fetch_ssl_verify_hname(X509 *cert, const char *host) { struct addrinfo *ip; STACK_OF(GENERAL_NAME) *altnames; X509_NAME *subject; int ret; ret = 0; ip = fetch_ssl_get_numeric_addrinfo(host, strlen(host)); altnames = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL); if (altnames != NULL) { ret = fetch_ssl_verify_altname(altnames, host, ip); } else { subject = X509_get_subject_name(cert); if (subject != NULL) ret = fetch_ssl_verify_cn(subject, host, ip); } if (ip != NULL) freeaddrinfo(ip); if (altnames != NULL) GENERAL_NAMES_free(altnames); return (ret); } /* * Configure transport security layer based on environment. */ static void fetch_ssl_setup_transport_layer(SSL_CTX *ctx, int verbose) { long ssl_ctx_options; ssl_ctx_options = SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_TICKET; if (getenv("SSL_ALLOW_SSL3") == NULL) ssl_ctx_options |= SSL_OP_NO_SSLv3; if (getenv("SSL_NO_TLS1") != NULL) ssl_ctx_options |= SSL_OP_NO_TLSv1; if (getenv("SSL_NO_TLS1_1") != NULL) ssl_ctx_options |= SSL_OP_NO_TLSv1_1; if (getenv("SSL_NO_TLS1_2") != NULL) ssl_ctx_options |= SSL_OP_NO_TLSv1_2; if (verbose) fetch_info("SSL options: %lx", ssl_ctx_options); SSL_CTX_set_options(ctx, ssl_ctx_options); } /* * Configure peer verification based on environment. */ #define LOCAL_CERT_FILE "/usr/local/etc/ssl/cert.pem" #define BASE_CERT_FILE "/etc/ssl/cert.pem" static int fetch_ssl_setup_peer_verification(SSL_CTX *ctx, int verbose) { X509_LOOKUP *crl_lookup; X509_STORE *crl_store; const char *ca_cert_file, *ca_cert_path, *crl_file; if (getenv("SSL_NO_VERIFY_PEER") == NULL) { ca_cert_file = getenv("SSL_CA_CERT_FILE"); if (ca_cert_file == NULL && access(LOCAL_CERT_FILE, R_OK) == 0) ca_cert_file = LOCAL_CERT_FILE; if (ca_cert_file == NULL && access(BASE_CERT_FILE, R_OK) == 0) ca_cert_file = BASE_CERT_FILE; ca_cert_path = getenv("SSL_CA_CERT_PATH"); if (verbose) { fetch_info("Peer verification enabled"); if (ca_cert_file != NULL) fetch_info("Using CA cert file: %s", ca_cert_file); if (ca_cert_path != NULL) fetch_info("Using CA cert path: %s", ca_cert_path); if (ca_cert_file == NULL && ca_cert_path == NULL) fetch_info("Using OpenSSL default " "CA cert file and path"); } SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, fetch_ssl_cb_verify_crt); if (ca_cert_file != NULL || ca_cert_path != NULL) SSL_CTX_load_verify_locations(ctx, ca_cert_file, ca_cert_path); else SSL_CTX_set_default_verify_paths(ctx); if ((crl_file = getenv("SSL_CRL_FILE")) != NULL) { if (verbose) fetch_info("Using CRL file: %s", crl_file); crl_store = SSL_CTX_get_cert_store(ctx); crl_lookup = X509_STORE_add_lookup(crl_store, X509_LOOKUP_file()); if (crl_lookup == NULL || !X509_load_crl_file(crl_lookup, crl_file, X509_FILETYPE_PEM)) { fprintf(stderr, "Could not load CRL file %s\n", crl_file); return (0); } X509_STORE_set_flags(crl_store, X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); } } return (1); } /* * Configure client certificate based on environment. */ static int fetch_ssl_setup_client_certificate(SSL_CTX *ctx, int verbose) { const char *client_cert_file, *client_key_file; if ((client_cert_file = getenv("SSL_CLIENT_CERT_FILE")) != NULL) { client_key_file = getenv("SSL_CLIENT_KEY_FILE") != NULL ? getenv("SSL_CLIENT_KEY_FILE") : client_cert_file; if (verbose) { fetch_info("Using client cert file: %s", client_cert_file); fetch_info("Using client key file: %s", client_key_file); } if (SSL_CTX_use_certificate_chain_file(ctx, client_cert_file) != 1) { fprintf(stderr, "Could not load client certificate %s\n", client_cert_file); return (0); } if (SSL_CTX_use_PrivateKey_file(ctx, client_key_file, SSL_FILETYPE_PEM) != 1) { fprintf(stderr, "Could not load client key %s\n", client_key_file); return (0); } } return (1); } /* * Callback for SSL certificate verification, this is called on server * cert verification. It takes no decision, but informs the user in case * verification failed. */ int fetch_ssl_cb_verify_crt(int verified, X509_STORE_CTX *ctx) { X509 *crt; X509_NAME *name; char *str; str = NULL; if (!verified) { if ((crt = X509_STORE_CTX_get_current_cert(ctx)) != NULL && (name = X509_get_subject_name(crt)) != NULL) str = X509_NAME_oneline(name, 0, 0); fprintf(stderr, "Certificate verification failed for %s\n", str != NULL ? str : "no relevant certificate"); OPENSSL_free(str); } return (verified); } #endif /* * Enable SSL on a connection. */ int fetch_ssl(conn_t *conn, const struct url *URL, int verbose) { #ifdef WITH_SSL int ret, ssl_err; X509_NAME *name; char *str; /* Init the SSL library and context */ if (!SSL_library_init()){ fprintf(stderr, "SSL library init failed\n"); return (-1); } SSL_load_error_strings(); conn->ssl_meth = SSLv23_client_method(); conn->ssl_ctx = SSL_CTX_new(conn->ssl_meth); SSL_CTX_set_mode(conn->ssl_ctx, SSL_MODE_AUTO_RETRY); fetch_ssl_setup_transport_layer(conn->ssl_ctx, verbose); if (!fetch_ssl_setup_peer_verification(conn->ssl_ctx, verbose)) return (-1); if (!fetch_ssl_setup_client_certificate(conn->ssl_ctx, verbose)) return (-1); conn->ssl = SSL_new(conn->ssl_ctx); if (conn->ssl == NULL) { fprintf(stderr, "SSL context creation failed\n"); return (-1); } SSL_set_fd(conn->ssl, conn->sd); #if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT) if (!SSL_set_tlsext_host_name(conn->ssl, __DECONST(struct url *, URL)->host)) { fprintf(stderr, "TLS server name indication extension failed for host %s\n", URL->host); return (-1); } #endif while ((ret = SSL_connect(conn->ssl)) == -1) { ssl_err = SSL_get_error(conn->ssl, ret); if (ssl_err != SSL_ERROR_WANT_READ && ssl_err != SSL_ERROR_WANT_WRITE) { ERR_print_errors_fp(stderr); return (-1); } } conn->ssl_cert = SSL_get_peer_certificate(conn->ssl); if (conn->ssl_cert == NULL) { fprintf(stderr, "No server SSL certificate\n"); return (-1); } if (getenv("SSL_NO_VERIFY_HOSTNAME") == NULL) { if (verbose) fetch_info("Verify hostname"); if (!fetch_ssl_verify_hname(conn->ssl_cert, URL->host)) { fprintf(stderr, "SSL certificate subject doesn't match host %s\n", URL->host); return (-1); } } if (verbose) { fetch_info("%s connection established using %s", SSL_get_version(conn->ssl), SSL_get_cipher(conn->ssl)); name = X509_get_subject_name(conn->ssl_cert); str = X509_NAME_oneline(name, 0, 0); fetch_info("Certificate subject: %s", str); OPENSSL_free(str); name = X509_get_issuer_name(conn->ssl_cert); str = X509_NAME_oneline(name, 0, 0); fetch_info("Certificate issuer: %s", str); OPENSSL_free(str); } return (0); #else (void)conn; (void)verbose; + (void)URL; fprintf(stderr, "SSL support disabled\n"); return (-1); #endif } #define FETCH_READ_WAIT -2 #define FETCH_READ_ERROR -1 #define FETCH_READ_DONE 0 #ifdef WITH_SSL static ssize_t fetch_ssl_read(SSL *ssl, char *buf, size_t len) { ssize_t rlen; int ssl_err; rlen = SSL_read(ssl, buf, len); if (rlen < 0) { ssl_err = SSL_get_error(ssl, rlen); if (ssl_err == SSL_ERROR_WANT_READ || ssl_err == SSL_ERROR_WANT_WRITE) { return (FETCH_READ_WAIT); } else { ERR_print_errors_fp(stderr); return (FETCH_READ_ERROR); } } return (rlen); } #endif static ssize_t fetch_socket_read(int sd, char *buf, size_t len) { ssize_t rlen; rlen = read(sd, buf, len); if (rlen < 0) { if (errno == EAGAIN || (errno == EINTR && fetchRestartCalls)) return (FETCH_READ_WAIT); else return (FETCH_READ_ERROR); } return (rlen); } /* * Read a character from a connection w/ timeout */ ssize_t fetch_read(conn_t *conn, char *buf, size_t len) { struct timeval now, timeout, delta; struct pollfd pfd; ssize_t rlen; int deltams; if (fetchTimeout > 0) { gettimeofday(&timeout, NULL); timeout.tv_sec += fetchTimeout; } deltams = INFTIM; memset(&pfd, 0, sizeof pfd); pfd.fd = conn->sd; pfd.events = POLLIN | POLLERR; for (;;) { /* * The socket is non-blocking. Instead of the canonical * poll() -> read(), we do the following: * * 1) call read() or SSL_read(). * 2) if we received some data, return it. * 3) if an error occurred, return -1. * 4) if read() or SSL_read() signaled EOF, return. * 5) if we did not receive any data but we're not at EOF, * call poll(). * * In the SSL case, this is necessary because if we * receive a close notification, we have to call * SSL_read() one additional time after we've read * everything we received. * * In the non-SSL case, it may improve performance (very * slightly) when reading small amounts of data. */ #ifdef WITH_SSL if (conn->ssl != NULL) rlen = fetch_ssl_read(conn->ssl, buf, len); else #endif rlen = fetch_socket_read(conn->sd, buf, len); if (rlen >= 0) { break; } else if (rlen == FETCH_READ_ERROR) { fetch_syserr(); return (-1); } // assert(rlen == FETCH_READ_WAIT); if (fetchTimeout > 0) { gettimeofday(&now, NULL); if (!timercmp(&timeout, &now, >)) { errno = ETIMEDOUT; fetch_syserr(); return (-1); } timersub(&timeout, &now, &delta); deltams = delta.tv_sec * 1000 + delta.tv_usec / 1000;; } errno = 0; pfd.revents = 0; if (poll(&pfd, 1, deltams) < 0) { if (errno == EINTR && fetchRestartCalls) continue; fetch_syserr(); return (-1); } } return (rlen); } /* * Read a line of text from a connection w/ timeout */ #define MIN_BUF_SIZE 1024 int fetch_getln(conn_t *conn) { char *tmp; size_t tmpsize; ssize_t len; char c; if (conn->buf == NULL) { if ((conn->buf = malloc(MIN_BUF_SIZE)) == NULL) { errno = ENOMEM; return (-1); } conn->bufsize = MIN_BUF_SIZE; } conn->buf[0] = '\0'; conn->buflen = 0; do { len = fetch_read(conn, &c, 1); if (len == -1) return (-1); if (len == 0) break; conn->buf[conn->buflen++] = c; if (conn->buflen == conn->bufsize) { tmp = conn->buf; tmpsize = conn->bufsize * 2 + 1; if ((tmp = realloc(tmp, tmpsize)) == NULL) { errno = ENOMEM; return (-1); } conn->buf = tmp; conn->bufsize = tmpsize; } } while (c != '\n'); conn->buf[conn->buflen] = '\0'; DEBUGF("<<< %s", conn->buf); return (0); } /* * Write to a connection w/ timeout */ ssize_t fetch_write(conn_t *conn, const char *buf, size_t len) { struct iovec iov; iov.iov_base = __DECONST(char *, buf); iov.iov_len = len; return fetch_writev(conn, &iov, 1); } /* * Write a vector to a connection w/ timeout * Note: can modify the iovec. */ ssize_t fetch_writev(conn_t *conn, struct iovec *iov, int iovcnt) { struct timeval now, timeout, delta; struct pollfd pfd; ssize_t wlen, total; int deltams; memset(&pfd, 0, sizeof pfd); if (fetchTimeout) { pfd.fd = conn->sd; pfd.events = POLLOUT | POLLERR; gettimeofday(&timeout, NULL); timeout.tv_sec += fetchTimeout; } total = 0; while (iovcnt > 0) { while (fetchTimeout && pfd.revents == 0) { gettimeofday(&now, NULL); if (!timercmp(&timeout, &now, >)) { errno = ETIMEDOUT; fetch_syserr(); return (-1); } timersub(&timeout, &now, &delta); deltams = delta.tv_sec * 1000 + delta.tv_usec / 1000; errno = 0; pfd.revents = 0; if (poll(&pfd, 1, deltams) < 0) { /* POSIX compliance */ if (errno == EAGAIN) continue; if (errno == EINTR && fetchRestartCalls) continue; return (-1); } } errno = 0; #ifdef WITH_SSL if (conn->ssl != NULL) wlen = SSL_write(conn->ssl, iov->iov_base, iov->iov_len); else #endif wlen = writev(conn->sd, iov, iovcnt); if (wlen == 0) { /* we consider a short write a failure */ /* XXX perhaps we shouldn't in the SSL case */ errno = EPIPE; fetch_syserr(); return (-1); } if (wlen < 0) { if (errno == EINTR && fetchRestartCalls) continue; return (-1); } total += wlen; while (iovcnt > 0 && wlen >= (ssize_t)iov->iov_len) { wlen -= iov->iov_len; iov++; iovcnt--; } if (iovcnt > 0) { iov->iov_len -= wlen; iov->iov_base = __DECONST(char *, iov->iov_base) + wlen; } } return (total); } /* * Write a line of text to a connection w/ timeout */ int fetch_putln(conn_t *conn, const char *str, size_t len) { struct iovec iov[2]; int ret; DEBUGF(">>> %s\n", str); iov[0].iov_base = __DECONST(char *, str); iov[0].iov_len = len; iov[1].iov_base = __DECONST(char *, ENDL); iov[1].iov_len = sizeof(ENDL); if (len == 0) ret = fetch_writev(conn, &iov[1], 1); else ret = fetch_writev(conn, iov, 2); if (ret == -1) return (-1); return (0); } /* * Close connection */ int fetch_close(conn_t *conn) { int ret; if (--conn->ref > 0) return (0); #ifdef WITH_SSL if (conn->ssl) { SSL_shutdown(conn->ssl); SSL_set_connect_state(conn->ssl); SSL_free(conn->ssl); conn->ssl = NULL; } if (conn->ssl_ctx) { SSL_CTX_free(conn->ssl_ctx); conn->ssl_ctx = NULL; } if (conn->ssl_cert) { X509_free(conn->ssl_cert); conn->ssl_cert = NULL; } #endif ret = close(conn->sd); free(conn->buf); free(conn); return (ret); } /*** Directory-related utility functions *************************************/ int fetch_add_entry(struct url_ent **p, int *size, int *len, const char *name, struct url_stat *us) { struct url_ent *tmp; if (*p == NULL) { *size = 0; *len = 0; } if (*len >= *size - 1) { tmp = reallocarray(*p, *size * 2 + 1, sizeof(**p)); if (tmp == NULL) { errno = ENOMEM; fetch_syserr(); return (-1); } *size = (*size * 2 + 1); *p = tmp; } tmp = *p + *len; snprintf(tmp->name, PATH_MAX, "%s", name); memcpy(&tmp->stat, us, sizeof(*us)); (*len)++; (++tmp)->name[0] = 0; return (0); } /*** Authentication-related utility functions ********************************/ static const char * fetch_read_word(FILE *f) { static char word[1024]; if (fscanf(f, " %1023s ", word) != 1) return (NULL); return (word); } static int fetch_netrc_open(void) { struct passwd *pwd; char fn[PATH_MAX]; const char *p; int fd, serrno; if ((p = getenv("NETRC")) != NULL) { DEBUGF("NETRC=%s\n", p); if (snprintf(fn, sizeof(fn), "%s", p) >= (int)sizeof(fn)) { fetch_info("$NETRC specifies a file name " "longer than PATH_MAX"); return (-1); } } else { if ((p = getenv("HOME")) == NULL) { if ((pwd = getpwuid(getuid())) == NULL || (p = pwd->pw_dir) == NULL) return (-1); } if (snprintf(fn, sizeof(fn), "%s/.netrc", p) >= (int)sizeof(fn)) return (-1); } if ((fd = open(fn, O_RDONLY)) < 0) { serrno = errno; DEBUGF("%s: %s\n", fn, strerror(serrno)); errno = serrno; } return (fd); } /* * Get authentication data for a URL from .netrc */ int fetch_netrc_auth(struct url *url) { const char *word; int serrno; FILE *f; if (url->netrcfd < 0) url->netrcfd = fetch_netrc_open(); if (url->netrcfd < 0) return (-1); if ((f = fdopen(url->netrcfd, "r")) == NULL) { serrno = errno; DEBUGF("fdopen(netrcfd): %s", strerror(errno)); close(url->netrcfd); url->netrcfd = -1; errno = serrno; return (-1); } rewind(f); DEBUGF("searching netrc for %s\n", url->host); while ((word = fetch_read_word(f)) != NULL) { if (strcmp(word, "default") == 0) { DEBUGF("using default netrc settings\n"); break; } if (strcmp(word, "machine") == 0 && (word = fetch_read_word(f)) != NULL && strcasecmp(word, url->host) == 0) { DEBUGF("using netrc settings for %s\n", word); break; } } if (word == NULL) goto ferr; while ((word = fetch_read_word(f)) != NULL) { if (strcmp(word, "login") == 0) { if ((word = fetch_read_word(f)) == NULL) goto ferr; if (snprintf(url->user, sizeof(url->user), "%s", word) > (int)sizeof(url->user)) { fetch_info("login name in .netrc is too long"); url->user[0] = '\0'; } } else if (strcmp(word, "password") == 0) { if ((word = fetch_read_word(f)) == NULL) goto ferr; if (snprintf(url->pwd, sizeof(url->pwd), "%s", word) > (int)sizeof(url->pwd)) { fetch_info("password in .netrc is too long"); url->pwd[0] = '\0'; } } else if (strcmp(word, "account") == 0) { if ((word = fetch_read_word(f)) == NULL) goto ferr; /* XXX not supported! */ } else { break; } } fclose(f); url->netrcfd = -1; return (0); ferr: serrno = errno; fclose(f); url->netrcfd = -1; errno = serrno; return (-1); } /* * The no_proxy environment variable specifies a set of domains for * which the proxy should not be consulted; the contents is a comma-, * or space-separated list of domain names. A single asterisk will * override all proxy variables and no transactions will be proxied * (for compatibility with lynx and curl, see the discussion at * ). */ int fetch_no_proxy_match(const char *host) { const char *no_proxy, *p, *q; size_t h_len, d_len; if ((no_proxy = getenv("NO_PROXY")) == NULL && (no_proxy = getenv("no_proxy")) == NULL) return (0); /* asterisk matches any hostname */ if (strcmp(no_proxy, "*") == 0) return (1); h_len = strlen(host); p = no_proxy; do { /* position p at the beginning of a domain suffix */ while (*p == ',' || isspace((unsigned char)*p)) p++; /* position q at the first separator character */ for (q = p; *q; ++q) if (*q == ',' || isspace((unsigned char)*q)) break; d_len = q - p; if (d_len > 0 && h_len >= d_len && strncasecmp(host + h_len - d_len, p, d_len) == 0) { /* domain name matches */ return (1); } p = q + 1; } while (*q); return (0); } Index: projects/runtime-coverage-v2/lib/libvgl/bitmap.c =================================================================== --- projects/runtime-coverage-v2/lib/libvgl/bitmap.c (revision 347075) +++ projects/runtime-coverage-v2/lib/libvgl/bitmap.c (revision 347076) @@ -1,320 +1,317 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991-1997 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "vgl.h" #define min(x, y) (((x) < (y)) ? (x) : (y)) static byte mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01}; static int color2bit[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101}; static void WriteVerticalLine(VGLBitmap *dst, int x, int y, int width, byte *line) { - int i, pos, last, planepos, start_offset, end_offset, offset; + int bwidth, i, pos, last, planepos, start_offset, end_offset, offset; int len; unsigned int word = 0; byte *address; byte *VGLPlane[4]; switch (dst->Type) { case VIDBUF4: case VIDBUF4S: start_offset = (x & 0x07); end_offset = (x + width) & 0x07; - i = (width + start_offset) / 8; + bwidth = (width + start_offset) / 8; if (end_offset) - i++; + bwidth++; VGLPlane[0] = VGLBuf; - VGLPlane[1] = VGLPlane[0] + i; - VGLPlane[2] = VGLPlane[1] + i; - VGLPlane[3] = VGLPlane[2] + i; + VGLPlane[1] = VGLPlane[0] + bwidth; + VGLPlane[2] = VGLPlane[1] + bwidth; + VGLPlane[3] = VGLPlane[2] + bwidth; pos = 0; planepos = 0; last = 8 - start_offset; while (pos < width) { word = 0; while (pos < last && pos < width) word = (word<<1) | color2bit[line[pos++]&0x0f]; VGLPlane[0][planepos] = word; VGLPlane[1][planepos] = word>>8; VGLPlane[2][planepos] = word>>16; VGLPlane[3][planepos] = word>>24; planepos++; last += 8; } planepos--; if (end_offset) { word <<= (8 - end_offset); VGLPlane[0][planepos] = word; VGLPlane[1][planepos] = word>>8; VGLPlane[2][planepos] = word>>16; VGLPlane[3][planepos] = word>>24; } - if (start_offset || end_offset) - width+=8; - width /= 8; outb(0x3ce, 0x01); outb(0x3cf, 0x00); /* set/reset enable */ outb(0x3ce, 0x08); outb(0x3cf, 0xff); /* bit mask */ for (i=0; i<4; i++) { outb(0x3c4, 0x02); outb(0x3c5, 0x01<Type == VIDBUF4) { if (end_offset) VGLPlane[i][planepos] |= dst->Bitmap[pos+planepos] & mask[end_offset]; if (start_offset) VGLPlane[i][0] |= dst->Bitmap[pos] & ~mask[start_offset]; - bcopy(&VGLPlane[i][0], dst->Bitmap + pos, width); + bcopy(&VGLPlane[i][0], dst->Bitmap + pos, bwidth); } else { /* VIDBUF4S */ if (end_offset) { offset = VGLSetSegment(pos + planepos); VGLPlane[i][planepos] |= dst->Bitmap[offset] & mask[end_offset]; } offset = VGLSetSegment(pos); if (start_offset) VGLPlane[i][0] |= dst->Bitmap[offset] & ~mask[start_offset]; - for (last = width; ; ) { + for (last = bwidth; ; ) { len = min(VGLAdpInfo.va_window_size - offset, last); - bcopy(&VGLPlane[i][width - last], dst->Bitmap + offset, len); + bcopy(&VGLPlane[i][bwidth - last], dst->Bitmap + offset, len); pos += len; last -= len; if (last <= 0) break; offset = VGLSetSegment(pos); } } } break; case VIDBUF8X: address = dst->Bitmap + VGLAdpInfo.va_line_width * y + x/4; for (i=0; i<4; i++) { outb(0x3c4, 0x02); outb(0x3c5, 0x01 << ((x + i)%4)); for (planepos=0, pos=i; posPixelBytes; pos = (dst->VXsize * y + x) * dst->PixelBytes; while (width > 0) { offset = VGLSetSegment(pos); i = min(VGLAdpInfo.va_window_size - offset, width); bcopy(line, dst->Bitmap + offset, i); line += i; pos += i; width -= i; } break; case MEMBUF: case VIDBUF8: case VIDBUF16: case VIDBUF24: case VIDBUF32: address = dst->Bitmap + (dst->VXsize * y + x) * dst->PixelBytes; bcopy(line, address, width * dst->PixelBytes); break; default: ; } } int __VGLBitmapCopy(VGLBitmap *src, int srcx, int srcy, VGLBitmap *dst, int dstx, int dsty, int width, int hight) { byte *buffer, *p; int mousemerge, srcline, dstline, yend, yextra, ystep; mousemerge = 0; if (hight < 0) { hight = -hight; mousemerge = (dst == VGLDisplay && VGLMouseOverlap(dstx, dsty, width, hight)); if (mousemerge) buffer = alloca(width*src->PixelBytes); } if (srcx>src->VXsize || srcy>src->VYsize || dstx>dst->VXsize || dsty>dst->VYsize) return -1; if (srcx < 0) { width=width+srcx; dstx-=srcx; srcx=0; } if (srcy < 0) { hight=hight+srcy; dsty-=srcy; srcy=0; } if (dstx < 0) { width=width+dstx; srcx-=dstx; dstx=0; } if (dsty < 0) { hight=hight+dsty; srcy-=dsty; dsty=0; } if (srcx+width > src->VXsize) width=src->VXsize-srcx; if (srcy+hight > src->VYsize) hight=src->VYsize-srcy; if (dstx+width > dst->VXsize) width=dst->VXsize-dstx; if (dsty+hight > dst->VYsize) hight=dst->VYsize-dsty; if (width < 0 || hight < 0) return -1; yend = srcy + hight; yextra = 0; ystep = 1; if (src->Bitmap == dst->Bitmap && srcy < dsty) { yend = srcy - 1; yextra = hight - 1; ystep = -1; } for (srcline = srcy + yextra, dstline = dsty + yextra; srcline != yend; srcline += ystep, dstline += ystep) { p = src->Bitmap+(srcline*src->VXsize+srcx)*dst->PixelBytes; if (mousemerge && VGLMouseOverlap(dstx, dstline, width, 1)) { bcopy(p, buffer, width*src->PixelBytes); p = buffer; VGLMouseMerge(dstx, dstline, width, p); } WriteVerticalLine(dst, dstx, dstline, width, p); } return 0; } int VGLBitmapCopy(VGLBitmap *src, int srcx, int srcy, VGLBitmap *dst, int dstx, int dsty, int width, int hight) { int error; if (hight < 0) return -1; if (src == VGLDisplay) src = &VGLVDisplay; if (src->Type != MEMBUF) return -1; /* invalid */ if (dst == VGLDisplay) { VGLMouseFreeze(); __VGLBitmapCopy(src, srcx, srcy, &VGLVDisplay, dstx, dsty, width, hight); error = __VGLBitmapCopy(src, srcx, srcy, &VGLVDisplay, dstx, dsty, width, hight); if (error != 0) return error; src = &VGLVDisplay; srcx = dstx; srcy = dsty; } else if (dst->Type != MEMBUF) return -1; /* invalid */ error = __VGLBitmapCopy(src, srcx, srcy, dst, dstx, dsty, width, -hight); if (dst == VGLDisplay) VGLMouseUnFreeze(); return error; } VGLBitmap *VGLBitmapCreate(int type, int xsize, int ysize, byte *bits) { VGLBitmap *object; if (type != MEMBUF) return NULL; if (xsize < 0 || ysize < 0) return NULL; object = (VGLBitmap *)malloc(sizeof(*object)); if (object == NULL) return NULL; object->Type = type; object->Xsize = xsize; object->Ysize = ysize; object->VXsize = xsize; object->VYsize = ysize; object->Xorigin = 0; object->Yorigin = 0; object->Bitmap = bits; object->PixelBytes = VGLDisplay->PixelBytes; return object; } void VGLBitmapDestroy(VGLBitmap *object) { if (object->Bitmap) free(object->Bitmap); free(object); } int VGLBitmapAllocateBits(VGLBitmap *object) { object->Bitmap = malloc(object->VXsize*object->VYsize*object->PixelBytes); if (object->Bitmap == NULL) return -1; return 0; } void VGLBitmapCvt(VGLBitmap *src, VGLBitmap *dst) { u_long color; int dstpos, i, pb, size, srcpb, srcpos; size = src->VXsize * src->VYsize; srcpb = src->PixelBytes; if (srcpb <= 0) srcpb = 1; pb = dst->PixelBytes; if (pb == srcpb) { bcopy(src->Bitmap, dst->Bitmap, size * pb); return; } if (srcpb != 1) return; /* not supported */ for (srcpos = dstpos = 0; srcpos < size; srcpos++) { color = VGLrgb332ToNative(src->Bitmap[srcpos]); for (i = 0; i < pb; i++, color >>= 8) dst->Bitmap[dstpos++] = color; } } Index: projects/runtime-coverage-v2/libexec/rtld-elf/rtld_malloc.c =================================================================== --- projects/runtime-coverage-v2/libexec/rtld-elf/rtld_malloc.c (revision 347075) +++ projects/runtime-coverage-v2/libexec/rtld-elf/rtld_malloc.c (revision 347076) @@ -1,494 +1,383 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1983 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) /*static char *sccsid = "from: @(#)malloc.c 5.11 (Berkeley) 2/23/91";*/ static char *rcsid = "$FreeBSD$"; #endif /* LIBC_SCCS and not lint */ /* * malloc.c (Caltech) 2/21/82 * Chris Kingsley, kingsley@cit-20. * * This is a very fast storage allocator. It allocates blocks of a small * number of different sizes, and keeps free lists of each size. Blocks that * don't exactly fit are passed up to the next larger size. In this * implementation, the available sizes are 2^n-4 (or 2^n-10) bytes long. * This is designed for use in a virtual memory environment. */ -#include +#include #include +#include #include #include #include -#include -#include #include #include -#include -#include #include "rtld.h" #include "rtld_printf.h" #include "paths.h" /* * Pre-allocate mmap'ed pages */ #define NPOOLPAGES (128*1024/pagesz) static caddr_t pagepool_start, pagepool_end; /* * The overhead on a block is at least 4 bytes. When free, this space * contains a pointer to the next free block, and the bottom two bits must * be zero. When in use, the first byte is set to MAGIC, and the second * byte is the size index. The remaining bytes are for alignment. * If range checking is enabled then a second word holds the size of the * requested block, less 1, rounded up to a multiple of sizeof(RMAGIC). * The order of elements is critical: ov_magic must overlay the low order * bits of ov_next, and ov_magic can not be a valid ov_next bit pattern. */ union overhead { union overhead *ov_next; /* when free */ struct { u_char ovu_magic; /* magic number */ u_char ovu_index; /* bucket # */ -#ifdef RCHECK - u_short ovu_rmagic; /* range magic number */ - u_int ovu_size; /* actual block size */ -#endif } ovu; #define ov_magic ovu.ovu_magic #define ov_index ovu.ovu_index #define ov_rmagic ovu.ovu_rmagic #define ov_size ovu.ovu_size }; static void morecore(int bucket); static int morepages(int n); static int findbucket(union overhead *freep, int srchlen); #define MAGIC 0xef /* magic # on accounting info */ #define RMAGIC 0x5555 /* magic # on range info */ -#ifdef RCHECK -#define RSLOP sizeof (u_short) -#else -#define RSLOP 0 -#endif - /* * nextf[i] is the pointer to the next free block of size 2^(i+3). The * smallest allocatable block is 8 bytes. The overhead information * precedes the data area returned to the user. */ #define NBUCKETS 30 static union overhead *nextf[NBUCKETS]; static int pagesz; /* page size */ static int pagebucket; /* page size bucket */ -#ifdef MSTATS /* - * nmalloc[i] is the difference between the number of mallocs and frees - * for a given block size. - */ -static u_int nmalloc[NBUCKETS]; -#include -#endif - -#if defined(MALLOC_DEBUG) || defined(RCHECK) -#define ASSERT(p) if (!(p)) botch("p") -#include -static void -botch(s) - char *s; -{ - fprintf(stderr, "\r\nassertion botched: %s\r\n", s); - (void) fflush(stderr); /* just in case user buffered it */ - abort(); -} -#else -#define ASSERT(p) -#endif - -/* Debugging stuff */ -#define TRACE() rtld_printf("TRACE %s:%d\n", __FILE__, __LINE__) - -/* * The array of supported page sizes is provided by the user, i.e., the * program that calls this storage allocator. That program must initialize * the array before making its first call to allocate storage. The array * must contain at least one page size. The page sizes must be stored in * increasing order. */ void * __crt_malloc(size_t nbytes) { union overhead *op; int bucket; ssize_t n; size_t amt; /* * First time malloc is called, setup page size and * align break pointer so all data will be page aligned. */ if (pagesz == 0) { pagesz = n = pagesizes[0]; if (morepages(NPOOLPAGES) == 0) return NULL; op = (union overhead *)(pagepool_start); n = n - sizeof (*op) - ((long)op & (n - 1)); if (n < 0) n += pagesz; if (n) { pagepool_start += n; } bucket = 0; amt = 8; while ((unsigned)pagesz > amt) { amt <<= 1; bucket++; } pagebucket = bucket; } /* * Convert amount of memory requested into closest block size * stored in hash buckets which satisfies request. * Account for space used per block for accounting. */ - if (nbytes <= (unsigned long)(n = pagesz - sizeof (*op) - RSLOP)) { -#ifndef RCHECK + if (nbytes <= (unsigned long)(n = pagesz - sizeof(*op))) { amt = 8; /* size of first bucket */ bucket = 0; -#else - amt = 16; /* size of first bucket */ - bucket = 1; -#endif - n = -(sizeof (*op) + RSLOP); + n = -sizeof(*op); } else { amt = pagesz; bucket = pagebucket; } while (nbytes > amt + n) { amt <<= 1; if (amt == 0) return (NULL); bucket++; } /* * If nothing in hash bucket right now, * request more memory from the system. */ if ((op = nextf[bucket]) == NULL) { morecore(bucket); if ((op = nextf[bucket]) == NULL) return (NULL); } /* remove from linked list */ nextf[bucket] = op->ov_next; op->ov_magic = MAGIC; op->ov_index = bucket; -#ifdef MSTATS - nmalloc[bucket]++; -#endif -#ifdef RCHECK - /* - * Record allocated size of block and - * bound space with magic numbers. - */ - op->ov_size = roundup2(nbytes, RSLOP); - op->ov_rmagic = RMAGIC; - *(u_short *)((caddr_t)(op + 1) + op->ov_size) = RMAGIC; -#endif return ((char *)(op + 1)); } void * __crt_calloc(size_t num, size_t size) { void *ret; if (size != 0 && (num * size) / size != num) { /* size_t overflow. */ return (NULL); } if ((ret = __crt_malloc(num * size)) != NULL) memset(ret, 0, num * size); return (ret); } /* * Allocate more memory to the indicated bucket. */ static void morecore(int bucket) { union overhead *op; int sz; /* size of desired block */ int amt; /* amount to allocate */ int nblks; /* how many blocks we get */ /* * sbrk_size <= 0 only for big, FLUFFY, requests (about * 2^30 bytes on a VAX, I think) or for a negative arg. */ - sz = 1 << (bucket + 3); -#ifdef MALLOC_DEBUG - ASSERT(sz > 0); -#else - if (sz <= 0) + if ((unsigned)bucket >= NBBY * sizeof(int) - 4) return; -#endif + sz = 1 << (bucket + 3); if (sz < pagesz) { amt = pagesz; nblks = amt / sz; } else { amt = sz + pagesz; nblks = 1; } if (amt > pagepool_end - pagepool_start) if (morepages(amt/pagesz + NPOOLPAGES) == 0) return; op = (union overhead *)pagepool_start; pagepool_start += amt; /* * Add new memory allocated to that on * free list for this hash bucket. */ nextf[bucket] = op; while (--nblks > 0) { op->ov_next = (union overhead *)((caddr_t)op + sz); op = (union overhead *)((caddr_t)op + sz); } } void __crt_free(void *cp) { int size; union overhead *op; if (cp == NULL) return; op = (union overhead *)((caddr_t)cp - sizeof (union overhead)); -#ifdef MALLOC_DEBUG - ASSERT(op->ov_magic == MAGIC); /* make sure it was in use */ -#else if (op->ov_magic != MAGIC) return; /* sanity */ -#endif -#ifdef RCHECK - ASSERT(op->ov_rmagic == RMAGIC); - ASSERT(*(u_short *)((caddr_t)(op + 1) + op->ov_size) == RMAGIC); -#endif size = op->ov_index; - ASSERT(size < NBUCKETS); op->ov_next = nextf[size]; /* also clobbers ov_magic */ nextf[size] = op; -#ifdef MSTATS - nmalloc[size]--; -#endif } /* * When a program attempts "storage compaction" as mentioned in the * old malloc man page, it realloc's an already freed block. Usually * this is the last block it freed; occasionally it might be farther * back. We have to search all the free lists for the block in order * to determine its bucket: 1st we make one pass through the lists * checking only the first block in each; if that fails we search * ``realloc_srchlen'' blocks in each list for a match (the variable * is extern so the caller can modify it). If that fails we just copy * however many bytes was given to realloc() and hope it's not huge. */ static int realloc_srchlen = 4; /* 4 should be plenty, -1 =>'s whole list */ void * __crt_realloc(void *cp, size_t nbytes) { u_int onb; int i; union overhead *op; char *res; int was_alloced = 0; if (cp == NULL) return (__crt_malloc(nbytes)); op = (union overhead *)((caddr_t)cp - sizeof (union overhead)); if (op->ov_magic == MAGIC) { was_alloced++; i = op->ov_index; } else { /* * Already free, doing "compaction". * * Search for the old block of memory on the * free list. First, check the most common * case (last element free'd), then (this failing) * the last ``realloc_srchlen'' items free'd. * If all lookups fail, then assume the size of * the memory block being realloc'd is the * largest possible (so that all "nbytes" of new * memory are copied into). Note that this could cause * a memory fault if the old area was tiny, and the moon * is gibbous. However, that is very unlikely. */ if ((i = findbucket(op, 1)) < 0 && (i = findbucket(op, realloc_srchlen)) < 0) i = NBUCKETS; } onb = 1 << (i + 3); if (onb < (u_int)pagesz) - onb -= sizeof (*op) + RSLOP; + onb -= sizeof(*op); else - onb += pagesz - sizeof (*op) - RSLOP; + onb += pagesz - sizeof(*op); /* avoid the copy if same size block */ if (was_alloced) { if (i) { i = 1 << (i + 2); if (i < pagesz) - i -= sizeof (*op) + RSLOP; + i -= sizeof(*op); else - i += pagesz - sizeof (*op) - RSLOP; + i += pagesz - sizeof(*op); } - if (nbytes <= onb && nbytes > (size_t)i) { -#ifdef RCHECK - op->ov_size = roundup2(nbytes, RSLOP); - *(u_short *)((caddr_t)(op + 1) + op->ov_size) = RMAGIC; -#endif - return(cp); - } else - __crt_free(cp); + if (nbytes <= onb && nbytes > (size_t)i) + return (cp); + __crt_free(cp); } if ((res = __crt_malloc(nbytes)) == NULL) return (NULL); if (cp != res) /* common optimization if "compacting" */ bcopy(cp, res, (nbytes < onb) ? nbytes : onb); return (res); } /* * Search ``srchlen'' elements of each free list for a block whose * header starts at ``freep''. If srchlen is -1 search the whole list. * Return bucket number, or -1 if not found. */ static int findbucket(union overhead *freep, int srchlen) { union overhead *p; int i, j; for (i = 0; i < NBUCKETS; i++) { j = 0; for (p = nextf[i]; p && j != srchlen; p = p->ov_next) { if (p == freep) return (i); j++; } } return (-1); } - -#ifdef MSTATS -/* - * mstats - print out statistics about malloc - * - * Prints two lines of numbers, one showing the length of the free list - * for each size category, the second showing the number of mallocs - - * frees for each size category. - */ -mstats(char * s) -{ - int i, j; - union overhead *p; - int totfree = 0, - totused = 0; - - fprintf(stderr, "Memory allocation statistics %s\nfree:\t", s); - for (i = 0; i < NBUCKETS; i++) { - for (j = 0, p = nextf[i]; p; p = p->ov_next, j++) - ; - fprintf(stderr, " %d", j); - totfree += j * (1 << (i + 3)); - } - fprintf(stderr, "\nused:\t"); - for (i = 0; i < NBUCKETS; i++) { - fprintf(stderr, " %d", nmalloc[i]); - totused += nmalloc[i] * (1 << (i + 3)); - } - fprintf(stderr, "\n\tTotal in use: %d, total free: %d\n", - totused, totfree); -} -#endif - static int morepages(int n) { int fd = -1; int offset; if (pagepool_end - pagepool_start > pagesz) { caddr_t addr = (caddr_t) (((long)pagepool_start + pagesz - 1) & ~(pagesz - 1)); if (munmap(addr, pagepool_end - addr) != 0) { #ifdef IN_RTLD rtld_fdprintf(STDERR_FILENO, _BASENAME_RTLD ": " "morepages: cannot munmap %p: %s\n", addr, rtld_strerror(errno)); #endif } } offset = (long)pagepool_start - ((long)pagepool_start & ~(pagesz - 1)); if ((pagepool_start = mmap(0, n * pagesz, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, fd, 0)) == (caddr_t)-1) { #ifdef IN_RTLD rtld_fdprintf(STDERR_FILENO, _BASENAME_RTLD ": morepages: " "cannot mmap anonymous memory: %s\n", rtld_strerror(errno)); #endif return 0; } pagepool_end = pagepool_start + n * pagesz; pagepool_start += offset; return n; } Index: projects/runtime-coverage-v2/release/Makefile.vm =================================================================== --- projects/runtime-coverage-v2/release/Makefile.vm (revision 347075) +++ projects/runtime-coverage-v2/release/Makefile.vm (revision 347076) @@ -1,170 +1,170 @@ # # $FreeBSD$ # # # Makefile for building virtual machine and cloud provider disk images. # VMTARGETS= vm-image VMFORMATS?= vhd vmdk qcow2 raw -VMSIZE?= 30G +VMSIZE?= 3072M SWAPSIZE?= 1G VMBASE?= vm VHD_DESC= Azure, VirtualPC, Hyper-V, Xen disk image VMDK_DESC= VMWare, VirtualBox disk image QCOW2_DESC= Qemu, KVM disk image RAW_DESC= Unformatted raw disk image CLOUDWARE?= EC2 \ GCE \ VAGRANT-VIRTUALBOX \ VAGRANT-VMWARE AZURE_FORMAT= vhdf AZURE_DESC= Microsoft Azure platform image AZURE_DISK= ${OSRELEASE}.${AZURE_FORMAT} EC2_FORMAT= raw EC2_DESC= Amazon EC2 image EC2_DISK= ${OSRELEASE}.${EC2_FORMAT} GCE_FORMAT= raw GCE_DESC= Google Compute Engine image GCE_DISK= disk.${GCE_FORMAT} OPENSTACK_FORMAT=qcow2 OPENSTACK_DESC= OpenStack platform image OPENSTACK_DISK= ${OSRELEASE}.${OPENSTACK_FORMAT} VAGRANT-VIRTUALBOX_FORMAT= vmdk VAGRANT-VIRTUALBOX_DESC= Vagrant Image for VirtualBox VAGRANT-VIRTUALBOX_DISK= ${OSRELEASE}.vbox.${VAGRANT_FORMAT} VAGRANT-VMWARE_FORMAT= vmdk VAGRANT-VMWARE_DESC= Vagrant Image for VMWare VAGRANT-VMWARE_DISK= ${OSRELEASE}.vmware.${VAGRANT_FORMAT} emulator-portinstall: .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ( ${TARGET_ARCH} != "i386" ) || ( ${MACHINE_ARCH} != "amd64" ) .if !exists(/usr/local/bin/qemu-${TARGET_ARCH}-static) .if exists(${PORTSDIR}/emulators/qemu-user-static/Makefile) env - PATH=$$PATH make -C ${PORTSDIR}/emulators/qemu-user-static BATCH=1 all install clean .else .if !exists(/usr/local/sbin/pkg-static) env ASSUME_ALWAYS_YES=yes pkg bootstrap -y .endif env ASSUME_ALWAYS_YES=yes pkg install -y emulators/qemu-user-static .endif .endif QEMUSTATIC=/usr/local/bin/qemu-${TARGET_ARCH}-static .endif .endif .if defined(WITH_CLOUDWARE) && !empty(WITH_CLOUDWARE) && !empty(CLOUDWARE) . for _CW in ${CLOUDWARE} CLOUDTARGETS+= cw-${_CW:tl} CLEANDIRS+= cw-${_CW:tl} CLEANFILES+= ${_CW:tl}.img \ ${_CW:tl}.${${_CW:tu}_FORMAT} \ ${_CW:tl}.${${_CW:tu}_FORMAT}.raw \ cw${_CW:tl}-package CLOUDPACKAGE+= cw${_CW:tl}-package ${_CW:tu}IMAGE= ${_CW:tl}.${${_CW:tu}_FORMAT} . if exists(${.CURDIR}/tools/${_CW:tl}.conf) && !defined(${_CW:tu}CONF) ${_CW:tu}CONF?= ${.CURDIR}/tools/${_CW:tl}.conf . endif cw-${_CW:tl}: emulator-portinstall mkdir -p ${.OBJDIR}/${.TARGET} env TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} SWAPSIZE=${SWAPSIZE} \ QEMUSTATIC=${QEMUSTATIC} \ ${.CURDIR}/scripts/mk-vmimage.sh \ -C ${.CURDIR}/tools/vmimage.subr -d ${.OBJDIR}/${.TARGET} \ -i ${.OBJDIR}/${_CW:tl}.img -s ${VMSIZE} -f ${${_CW:tu}_FORMAT} \ -S ${WORLDDIR} -o ${.OBJDIR}/${${_CW:tu}IMAGE} -c ${${_CW:tu}CONF} touch ${.TARGET} cw${_CW:tl}-package: @# Special target to handle packaging cloud images in the formats @# specific to each hosting provider. .if exists(${.CURDIR}/tools/${_CW:tl}-package.sh) env TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ ${.CURDIR}/tools/${_CW:tl}-package.sh \ -D ${DESTDIR} -I ${${_CW}_DISK} -S ${WORLDDIR} .endif touch ${.TARGET} . endfor .endif .if defined(WITH_VMIMAGES) && !empty(WITH_VMIMAGES) CLEANDIRS+= ${VMTARGETS} . for FORMAT in ${VMFORMATS} CLEANFILES+= ${FORMAT}.img CLEANFILES+= ${VMBASE}.${FORMAT} . endfor .endif vm-base: vm-image vm-image: .if defined(WITH_VMIMAGES) && !empty(WITH_VMIMAGES) . for FORMAT in ${VMFORMATS} mkdir -p ${.OBJDIR}/${.TARGET} env TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} SWAPSIZE=${SWAPSIZE} \ ${.CURDIR}/scripts/mk-vmimage.sh \ -C ${.CURDIR}/tools/vmimage.subr -d ${.OBJDIR}/${.TARGET} \ -i ${.OBJDIR}/${FORMAT}.img -s ${VMSIZE} -f ${FORMAT} \ -S ${WORLDDIR} -o ${.OBJDIR}/${VMBASE}.${FORMAT} . endfor .endif touch ${.TARGET} vm-cloudware: ${CLOUDTARGETS} list-vmtargets: list-cloudware @${ECHO} @${ECHO} "Supported virtual machine disk image formats:" .for FORMAT in ${VMFORMATS:tu} @${ECHO} " ${FORMAT:tl}: ${${FORMAT}_DESC}" .endfor list-cloudware: .if !empty(CLOUDWARE) @${ECHO} @${ECHO} "Supported cloud hosting provider images:" . for _CW in ${CLOUDWARE} @${ECHO} " ${_CW:tu}: ${${_CW:tu}_DESC}" . endfor .endif vm-install: .if defined(WITH_VMIMAGES) && !empty(WITH_VMIMAGES) mkdir -p ${DESTDIR}/vmimages . for FORMAT in ${VMFORMATS} cp -p ${VMBASE}.${FORMAT} \ ${DESTDIR}/vmimages/${OSRELEASE}.${FORMAT} . endfor . if defined(WITH_COMPRESSED_VMIMAGES) && !empty(WITH_COMPRESSED_VMIMAGES) . for FORMAT in ${VMFORMATS} # Don't keep the originals. There is a copy in ${.OBJDIR} if needed. ${XZ_CMD} ${DESTDIR}/vmimages/${OSRELEASE}.${FORMAT} . endfor . endif cd ${DESTDIR}/vmimages && sha512 ${OSRELEASE}* > \ ${DESTDIR}/vmimages/CHECKSUM.SHA512 cd ${DESTDIR}/vmimages && sha256 ${OSRELEASE}* > \ ${DESTDIR}/vmimages/CHECKSUM.SHA256 .endif vm-release: .if defined(WITH_VMIMAGES) && !empty(WITH_VMIMAGES) ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} ${VMTARGETS} .endif cloudware-release: .if defined(WITH_CLOUDWARE) && !empty(WITH_CLOUDWARE) && !empty(CLOUDWARE) ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} ${CLOUDTARGETS} .endif .include "${.CURDIR}/Makefile.ec2" .include "${.CURDIR}/Makefile.azure" .include "${.CURDIR}/Makefile.gce" .include "${.CURDIR}/Makefile.vagrant" Index: projects/runtime-coverage-v2/release/tools/gce.conf =================================================================== --- projects/runtime-coverage-v2/release/tools/gce.conf (revision 347075) +++ projects/runtime-coverage-v2/release/tools/gce.conf (revision 347076) @@ -1,126 +1,126 @@ #!/bin/sh # # $FreeBSD$ # -# Reduce VMSIZE to be below the free quota limit. -export VMSIZE=27G +# The default of 3GB is too small for GCE, so override the size here. +export VMSIZE=20G # Set to a list of packages to install. export VM_EXTRA_PACKAGES="firstboot-freebsd-update firstboot-pkgs \ google-cloud-sdk panicmail sudo sysutils/py-google-compute-engine \ lang/python lang/python2 lang/python3" # Set to a list of third-party software to enable in rc.conf(5). export VM_RC_LIST="ntpd sshd growfs \ firstboot_pkgs firstboot_freebsd_update google_startup \ google_accounts_daemon google_clock_skew_daemon \ google_instance_setup google_network_daemon" vm_extra_install_base() { echo 'search google.internal' > ${DESTDIR}/etc/resolv.conf echo 'nameserver 169.254.169.254' >> ${DESTDIR}/etc/resolv.conf echo 'nameserver 8.8.8.8' >> ${DESTDIR}/etc/resolv.conf } vm_extra_pre_umount() { # Enable growfs on every boot, not only the first, as as instance's disk can # be enlarged post-creation sed -i -e '/KEYWORD: firstboot/d' /etc/rc.d/growfs cat << EOF >> ${DESTDIR}/etc/rc.conf dumpdev="AUTO" ifconfig_DEFAULT="SYNCDHCP mtu 1460" ntpd_sync_on_start="YES" # need to fill in something here #firstboot_pkgs_list="" panicmail_autosubmit="YES" EOF cat << EOF >> ${DESTDIR}/boot/loader.conf autoboot_delay="-1" beastie_disable="YES" loader_logo="none" hw.memtest.tests="0" console="comconsole,vidconsole" hw.vtnet.mq_disable=1 kern.timecounter.hardware=ACPI-safe aesni_load="YES" nvme_load="YES" EOF echo '169.254.169.254 metadata.google.internal metadata' >> \ ${DESTDIR}/etc/hosts # overwrite ntp.conf cat << EOF > ${DESTDIR}/etc/ntp.conf server metadata.google.internal iburst restrict default kod nomodify notrap nopeer noquery restrict -6 default kod nomodify notrap nopeer noquery restrict 127.0.0.1 restrict -6 ::1 restrict 127.127.1.0 EOF cat << EOF >> ${DESTDIR}/etc/syslog.conf *.err;kern.warning;auth.notice;mail.crit /dev/console EOF cat << EOF >> ${DESTDIR}/etc/ssh/sshd_config ChallengeResponseAuthentication no X11Forwarding no AcceptEnv LANG AllowAgentForwarding no ClientAliveInterval 420 EOF cat << EOF >> ${DESTDIR}/etc/crontab 0 3 * * * root /usr/sbin/freebsd-update cron EOF cat << EOF >> ${DESTDIR}/etc/sysctl.conf net.inet.icmp.drop_redirect=1 net.inet.ip.redirect=0 net.inet.tcp.blackhole=2 net.inet.udp.blackhole=1 kern.ipc.somaxconn=1024 debug.trace_on_panic=1 debug.debugger_on_panic=0 EOF # To meet GCE marketplace requirements, extract the src.txz and # ports.txz distributions to the target virtual machine disk image # and fetch the sources for the third-party software installed on # the image. if [ ! -c "${DESTDIR}/dev/null" ]; then mkdir -p ${DESTDIR}/dev mount -t devfs devfs ${DESTDIR}/dev fi if [ -e "${DESTDIR}/../ftp/src.txz" ]; then tar fxJ ${DESTDIR}/../ftp/src.txz -C ${DESTDIR} fi if [ -e "${DESTDIR}/../ftp/ports.txz" ]; then tar fxJ ${DESTDIR}/../ftp/ports.txz -C ${DESTDIR} _INSTALLED_PACKAGES=$(chroot ${DESTDIR} pkg info -o -q -a) for PACKAGE in ${_INSTALLED_PACKAGES}; do chroot ${DESTDIR} \ make -C /usr/ports/${PACKAGE} fetch done fi if [ -c "${DESTDIR}/dev/null" ]; then umount_loop ${DESTDIR}/dev fi ## XXX: Verify this is needed. I do not see this requirement ## in the docs, and it impairs the ability to boot-test a copy ## of the image prior to packaging for upload to GCE. #sed -E -i '' 's/^([^#].*[[:space:]])on/\1off/' ${DESTDIR}/etc/ttys touch ${DESTDIR}/firstboot rm -f ${DESTDIR}/etc/resolv.conf return 0 } Index: projects/runtime-coverage-v2/release/tools/vmimage.subr =================================================================== --- projects/runtime-coverage-v2/release/tools/vmimage.subr (revision 347075) +++ projects/runtime-coverage-v2/release/tools/vmimage.subr (revision 347076) @@ -1,265 +1,268 @@ #!/bin/sh # # $FreeBSD$ # # # Common functions for virtual machine image build scripts. # scriptdir=$(dirname $(realpath $0)) . ${scriptdir}/../../tools/boot/install-boot.sh export PATH="/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" trap "cleanup" INT QUIT TRAP ABRT TERM write_partition_layout() { if [ -z "${NOSWAP}" ]; then SWAPOPT="-p freebsd-swap/swapfs::${SWAPSIZE}" fi BOOTFILES="$(env TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ WITH_UNIFIED_OBJDIR=yes \ make -C ${WORLDDIR}/stand -V .OBJDIR)" BOOTFILES="$(realpath ${BOOTFILES})" case "${TARGET}:${TARGET_ARCH}" in amd64:amd64 | i386:i386) mkimg -s gpt -f ${VMFORMAT} \ -b ${BOOTFILES}/i386/pmbr/pmbr \ -p freebsd-boot/bootfs:=${BOOTFILES}/i386/gptboot/gptboot \ ${SWAPOPT} \ -p freebsd-ufs/rootfs:=${VMBASE} \ -o ${VMIMAGE} ;; arm64:aarch64) # Create an ESP espfilename=$(mktemp /tmp/efiboot.XXXXXX) make_esp_file ${espfilename} ${fat32min} ${BOOTFILES}/efi/loader_lua/loader_lua.efi mkimg -s mbr -f ${VMFORMAT} \ -p efi:=${espfilename} \ -p freebsd:=${VMBASE} \ -o ${VMIMAGE} rm ${espfilename} ;; powerpc:powerpc*) mkimg -s apm -f ${VMFORMAT} \ -p apple-boot/bootfs:=${BOOTFILES}/powerpc/boot1.chrp/boot1.hfs \ ${SWAPOPT} \ -p freebsd-ufs/rootfs:=${VMBASE} \ -o ${VMIMAGE} ;; *) # ENOTSUPP return 1 ;; esac return 0 } err() { printf "${@}\n" cleanup return 1 } cleanup() { if [ -c "${DESTDIR}/dev/null" ]; then umount_loop ${DESTDIR}/dev 2>/dev/null fi umount_loop ${DESTDIR} if [ ! -z "${mddev}" ]; then mdconfig -d -u ${mddev} fi return 0 } vm_create_base() { # Creates the UFS root filesystem for the virtual machine disk, # written to the formatted disk image with mkimg(1). mkdir -p ${DESTDIR} truncate -s ${VMSIZE} ${VMBASE} mddev=$(mdconfig -f ${VMBASE}) newfs -L rootfs /dev/${mddev} mount /dev/${mddev} ${DESTDIR} return 0 } vm_copy_base() { # Creates a new UFS root filesystem and copies the contents of the # current root filesystem into it. This produces a "clean" disk # image without any remnants of files which were created temporarily # during image-creation and have since been deleted (e.g., downloaded # package archives). mkdir -p ${DESTDIR}/old mdold=$(mdconfig -f ${VMBASE}) mount /dev/${mdold} ${DESTDIR}/old truncate -s ${VMSIZE} ${VMBASE}.tmp mkdir -p ${DESTDIR}/new mdnew=$(mdconfig -f ${VMBASE}.tmp) newfs -L rootfs /dev/${mdnew} mount /dev/${mdnew} ${DESTDIR}/new tar -cf- -C ${DESTDIR}/old . | tar -xUf- -C ${DESTDIR}/new umount_loop /dev/${mdold} rmdir ${DESTDIR}/old mdconfig -d -u ${mdold} umount_loop /dev/${mdnew} rmdir ${DESTDIR}/new tunefs -n enable /dev/${mdnew} mdconfig -d -u ${mdnew} mv ${VMBASE}.tmp ${VMBASE} } vm_install_base() { # Installs the FreeBSD userland/kernel to the virtual machine disk. cd ${WORLDDIR} && \ make DESTDIR=${DESTDIR} \ installworld installkernel distribution || \ err "\n\nCannot install the base system to ${DESTDIR}." # Bootstrap etcupdate(8) and mergemaster(8) databases. mkdir -p ${DESTDIR}/var/db/etcupdate etcupdate extract -B \ -M "TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH}" \ -s ${WORLDDIR} -d ${DESTDIR}/var/db/etcupdate sh ${WORLDDIR}/release/scripts/mm-mtree.sh -m ${WORLDDIR} \ -F "TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH}" \ -D ${DESTDIR} echo '# Custom /etc/fstab for FreeBSD VM images' \ > ${DESTDIR}/etc/fstab echo "/dev/${ROOTLABEL}/rootfs / ufs rw 1 1" \ >> ${DESTDIR}/etc/fstab if [ -z "${NOSWAP}" ]; then echo '/dev/gpt/swapfs none swap sw 0 0' \ >> ${DESTDIR}/etc/fstab fi local hostname hostname="$(echo $(uname -o) | tr '[:upper:]' '[:lower:]')" echo "hostname=\"${hostname}\"" >> ${DESTDIR}/etc/rc.conf if ! [ -z "${QEMUSTATIC}" ]; then export EMULATOR=/qemu cp ${QEMUSTATIC} ${DESTDIR}/${EMULATOR} fi mkdir -p ${DESTDIR}/dev mount -t devfs devfs ${DESTDIR}/dev chroot ${DESTDIR} ${EMULATOR} /usr/bin/newaliases chroot ${DESTDIR} ${EMULATOR} /bin/sh /etc/rc.d/ldconfig forcestart umount_loop ${DESTDIR}/dev cp /etc/resolv.conf ${DESTDIR}/etc/resolv.conf return 0 } vm_extra_install_base() { # Prototype. When overridden, runs extra post-installworld commands # as needed, based on the target virtual machine image or cloud # provider image target. return 0 } vm_extra_enable_services() { if [ ! -z "${VM_RC_LIST}" ]; then for _rcvar in ${VM_RC_LIST}; do echo ${_rcvar}_enable="YES" >> ${DESTDIR}/etc/rc.conf done fi if [ -z "${VMCONFIG}" -o -c "${VMCONFIG}" ]; then echo 'ifconfig_DEFAULT="DHCP inet6 accept_rtadv"' >> \ ${DESTDIR}/etc/rc.conf + # Expand the filesystem to fill the disk. + echo 'growfs_enable="YES"' >> ${DESTDIR}/etc/rc.conf + touch ${DESTDIR}/firstboot fi return 0 } vm_extra_install_packages() { if [ -z "${VM_EXTRA_PACKAGES}" ]; then return 0 fi mkdir -p ${DESTDIR}/dev mount -t devfs devfs ${DESTDIR}/dev chroot ${DESTDIR} ${EMULATOR} env ASSUME_ALWAYS_YES=yes \ /usr/sbin/pkg bootstrap -y chroot ${DESTDIR} ${EMULATOR} env ASSUME_ALWAYS_YES=yes \ /usr/sbin/pkg install -y ${VM_EXTRA_PACKAGES} umount_loop ${DESTDIR}/dev return 0 } vm_extra_install_ports() { # Prototype. When overridden, installs additional ports within the # virtual machine environment. return 0 } vm_extra_pre_umount() { # Prototype. When overridden, performs additional tasks within the # virtual machine environment prior to unmounting the filesystem. # Note: When overriding this function, removing resolv.conf in the # disk image must be included. if ! [ -z "${QEMUSTATIC}" ]; then rm -f ${DESTDIR}/${EMULATOR} fi rm -f ${DESTDIR}/etc/resolv.conf return 0 } vm_extra_pkg_rmcache() { if [ -e ${DESTDIR}/usr/local/sbin/pkg ]; then chroot ${DESTDIR} ${EMULATOR} env ASSUME_ALWAYS_YES=yes \ /usr/local/sbin/pkg clean -y -a fi return 0 } umount_loop() { DIR=$1 i=0 sync while ! umount ${DIR}; do i=$(( $i + 1 )) if [ $i -ge 10 ]; then # This should never happen. But, it has happened. echo "Cannot umount(8) ${DIR}" echo "Something has gone horribly wrong." return 1 fi sleep 1 done return 0 } vm_create_disk() { echo "Creating image... Please wait." echo write_partition_layout || return 1 return 0 } vm_extra_create_disk() { return 0 } Index: projects/runtime-coverage-v2/sbin/fsck_ffs/dir.c =================================================================== --- projects/runtime-coverage-v2/sbin/fsck_ffs/dir.c (revision 347075) +++ projects/runtime-coverage-v2/sbin/fsck_ffs/dir.c (revision 347076) @@ -1,713 +1,788 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)dir.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "fsck.h" static struct dirtemplate emptydir = { 0, DIRBLKSIZ, DT_UNKNOWN, 0, "", 0, 0, DT_UNKNOWN, 0, "" }; static struct dirtemplate dirhead = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static int chgino(struct inodesc *); static int dircheck(struct inodesc *, struct direct *); static int expanddir(union dinode *dp, char *name); static void freedir(ino_t ino, ino_t parent); static struct direct *fsck_readdir(struct inodesc *); static struct bufarea *getdirblk(ufs2_daddr_t blkno, long size); static int lftempname(char *bufp, ino_t ino); static int mkentry(struct inodesc *); /* * Propagate connected state through the tree. */ void propagate(void) { struct inoinfo **inpp, *inp; struct inoinfo **inpend; long change; inpend = &inpsort[inplast]; do { change = 0; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0) continue; if (inoinfo(inp->i_parent)->ino_state == DFOUND && INO_IS_DUNFOUND(inp->i_number)) { inoinfo(inp->i_number)->ino_state = DFOUND; change++; } } } while (change > 0); } /* * Scan each entry in a directory block. */ int dirscan(struct inodesc *idesc) { struct direct *dp; struct bufarea *bp; u_int dsize, n; long blksiz; char dbuf[DIRBLKSIZ]; if (idesc->id_type != DATA) errx(EEXIT, "wrong type to dirscan %d", idesc->id_type); if (idesc->id_entryno == 0 && (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); blksiz = idesc->id_numfrags * sblock.fs_fsize; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { idesc->id_filesize -= blksiz; return (SKIP); } idesc->id_loc = 0; for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { dsize = dp->d_reclen; if (dsize > sizeof(dbuf)) dsize = sizeof(dbuf); memmove(dbuf, dp, (size_t)dsize); idesc->id_dirp = (struct direct *)dbuf; if ((n = (*idesc->id_func)(idesc)) & ALTERED) { bp = getdirblk(idesc->id_blkno, blksiz); memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf, (size_t)dsize); dirty(bp); sbdirty(); rerun = 1; } if (n & STOP) return (n); } return (idesc->id_filesize > 0 ? KEEPON : STOP); } /* * get next entry in a directory. */ static struct direct * fsck_readdir(struct inodesc *idesc) { struct direct *dp, *ndp; struct bufarea *bp; long size, blksiz, fix, dploc; + int dc; blksiz = idesc->id_numfrags * sblock.fs_fsize; bp = getdirblk(idesc->id_blkno, blksiz); if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 && idesc->id_loc < blksiz) { dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); - if (dircheck(idesc, dp)) + if ((dc = dircheck(idesc, dp)) > 0) { + if (dc == 2) { + /* + * dircheck() cleared unused directory space. + * Mark the buffer as dirty to write it out. + */ + dirty(bp); + } goto dpok; + } if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); dp->d_reclen = DIRBLKSIZ; dp->d_ino = 0; dp->d_type = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; if (fix) dirty(bp); idesc->id_loc += DIRBLKSIZ; idesc->id_filesize -= DIRBLKSIZ; return (dp); } dpok: if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz) return NULL; dploc = idesc->id_loc; dp = (struct direct *)(bp->b_un.b_buf + dploc); idesc->id_loc += dp->d_reclen; idesc->id_filesize -= dp->d_reclen; if ((idesc->id_loc % DIRBLKSIZ) == 0) return (dp); ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); - if (idesc->id_loc < blksiz && idesc->id_filesize > 0 && - dircheck(idesc, ndp) == 0) { - size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); - idesc->id_loc += size; - idesc->id_filesize -= size; - if (idesc->id_fix == IGNORE) - return (0); - fix = dofix(idesc, "DIRECTORY CORRUPTED"); - bp = getdirblk(idesc->id_blkno, blksiz); - dp = (struct direct *)(bp->b_un.b_buf + dploc); - dp->d_reclen += size; - if (fix) + if (idesc->id_loc < blksiz && idesc->id_filesize > 0) { + if ((dc = dircheck(idesc, ndp)) == 0) { + size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); + idesc->id_loc += size; + idesc->id_filesize -= size; + if (idesc->id_fix == IGNORE) + return (0); + fix = dofix(idesc, "DIRECTORY CORRUPTED"); + bp = getdirblk(idesc->id_blkno, blksiz); + dp = (struct direct *)(bp->b_un.b_buf + dploc); + dp->d_reclen += size; + if (fix) + dirty(bp); + } else if (dc == 2) { + /* + * dircheck() cleared unused directory space. + * Mark the buffer as dirty to write it out. + */ dirty(bp); + } } return (dp); } /* * Verify that a directory entry is valid. * This is a superset of the checks made in the kernel. + * Also optionally clears padding and unused directory space. + * + * Returns 0 if the entry is bad, 1 if the entry is good and no changes + * were made, and 2 if the entry is good but modified to clear out padding + * and unused space and needs to be written back to disk. */ static int dircheck(struct inodesc *idesc, struct direct *dp) { size_t size; char *cp; u_char type; u_int8_t namlen; - int spaceleft; + int spaceleft, modified, unused; + modified = 0; spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); if (dp->d_reclen == 0 || dp->d_reclen > spaceleft || - (dp->d_reclen & 0x3) != 0) + (dp->d_reclen & (DIR_ROUNDUP - 1)) != 0) goto bad; - if (dp->d_ino == 0) - return (1); + if (dp->d_ino == 0) { + /* + * Special case of an unused directory entry. Normally + * the kernel would coalesce unused space with the previous + * entry by extending its d_reclen, but there are situations + * (e.g. fsck) where that doesn't occur. + * If we're clearing out directory cruft (-z flag), then make + * sure this entry gets fully cleared as well. + */ + if (zflag && fswritefd >= 0) { + if (dp->d_type != 0) { + dp->d_type = 0; + modified = 1; + } + if (dp->d_namlen != 0) { + dp->d_namlen = 0; + modified = 1; + } + if (dp->d_name[0] != '\0') { + dp->d_name[0] = '\0'; + modified = 1; + } + } + goto good; + } size = DIRSIZ(0, dp); namlen = dp->d_namlen; type = dp->d_type; if (dp->d_reclen < size || idesc->id_filesize < size || namlen == 0 || type > 15) goto bad; for (cp = dp->d_name, size = 0; size < namlen; size++) if (*cp == '\0' || (*cp++ == '/')) goto bad; if (*cp != '\0') goto bad; + +good: + if (zflag && fswritefd >= 0) { + /* + * Clear unused directory entry space, including the d_name + * padding. + */ + /* First figure the number of pad bytes. */ + unused = roundup2(namlen + 1, DIR_ROUNDUP) - (namlen + 1); + + /* Add in the free space to the end of the record. */ + unused += dp->d_reclen - DIRSIZ(0, dp); + + /* + * Now clear out the unused space, keeping track if we actually + * changed anything. + */ + for (cp = &dp->d_name[namlen + 1]; unused > 0; unused--, cp++) { + if (*cp != '\0') { + *cp = '\0'; + modified = 1; + } + } + + if (modified) { + return 2; + } + } + return (1); + bad: if (debug) printf("Bad dir: ino %d reclen %d namlen %d type %d name %s\n", dp->d_ino, dp->d_reclen, dp->d_namlen, dp->d_type, dp->d_name); return (0); } void direrror(ino_t ino, const char *errmesg) { fileerror(ino, ino, errmesg); } void fileerror(ino_t cwd, ino_t ino, const char *errmesg) { union dinode *dp; char pathbuf[MAXPATHLEN + 1]; pwarn("%s ", errmesg); if (ino < UFS_ROOTINO || ino > maxino) { pfatal("out-of-range inode number %ju", (uintmax_t)ino); return; } dp = ginode(ino); prtinode(ino, dp); printf("\n"); getpathname(pathbuf, cwd, ino); if (ftypeok(dp)) pfatal("%s=%s\n", (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf); else pfatal("NAME=%s\n", pathbuf); } void adjust(struct inodesc *idesc, int lcnt) { union dinode *dp; int saveresolved; dp = ginode(idesc->id_number); if (DIP(dp, di_nlink) == lcnt) { /* * If we have not hit any unresolved problems, are running * in preen mode, and are on a file system using soft updates, * then just toss any partially allocated files. */ if (resolved && (preen || bkgrdflag) && usedsoftdep) { clri(idesc, "UNREF", 1); return; } else { /* * The file system can be marked clean even if * a file is not linked up, but is cleared. * Hence, resolved should not be cleared when * linkup is answered no, but clri is answered yes. */ saveresolved = resolved; if (linkup(idesc->id_number, (ino_t)0, NULL) == 0) { resolved = saveresolved; clri(idesc, "UNREF", 0); return; } /* * Account for the new reference created by linkup(). */ dp = ginode(idesc->id_number); lcnt--; } } if (lcnt != 0) { pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname : ((DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE")); prtinode(idesc->id_number, dp); printf(" COUNT %d SHOULD BE %d", DIP(dp, di_nlink), DIP(dp, di_nlink) - lcnt); if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } if (preen) printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { if (bkgrdflag == 0) { DIP_SET(dp, di_nlink, DIP(dp, di_nlink) - lcnt); inodirty(dp); } else { cmd.value = idesc->id_number; cmd.size = -lcnt; if (debug) printf("adjrefcnt ino %ld amt %lld\n", (long)cmd.value, (long long)cmd.size); if (sysctl(adjrefcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE", cmd.value); } } } } static int mkentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; struct direct newent; int newlen, oldlen; newent.d_namlen = strlen(idesc->id_name); newlen = DIRSIZ(0, &newent); if (dirp->d_ino != 0) oldlen = DIRSIZ(0, dirp); else oldlen = 0; if (dirp->d_reclen - oldlen < newlen) return (KEEPON); newent.d_reclen = dirp->d_reclen - oldlen; dirp->d_reclen = oldlen; dirp = (struct direct *)(((char *)dirp) + oldlen); dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ dirp->d_reclen = newent.d_reclen; dirp->d_type = inoinfo(idesc->id_parent)->ino_type; dirp->d_namlen = newent.d_namlen; memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1); return (ALTERED|STOP); } static int chgino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1)) return (KEEPON); dirp->d_ino = idesc->id_parent; dirp->d_type = inoinfo(idesc->id_parent)->ino_type; return (ALTERED|STOP); } int linkup(ino_t orphan, ino_t parentdir, char *name) { union dinode *dp; int lostdir; ino_t oldlfdir; struct inodesc idesc; char tempname[BUFSIZ]; memset(&idesc, 0, sizeof(struct inodesc)); dp = ginode(orphan); lostdir = (DIP(dp, di_mode) & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); prtinode(orphan, dp); printf("\n"); if (preen && DIP(dp, di_size) == 0) return (0); if (cursnapshot != 0) { pfatal("FILE LINKUP IN SNAPSHOT"); return (0); } if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); if (lfdir == 0) { dp = ginode(UFS_ROOTINO); idesc.id_name = strdup(lfname); idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = UFS_ROOTINO; if ((ckinode(dp, &idesc) & FOUND) != 0) { lfdir = idesc.id_parent; } else { pwarn("NO lost+found DIRECTORY"); if (preen || reply("CREATE")) { lfdir = allocdir(UFS_ROOTINO, (ino_t)0, lfmode); if (lfdir != 0) { if (makeentry(UFS_ROOTINO, lfdir, lfname) != 0) { numdirs++; if (preen) printf(" (CREATED)\n"); } else { freedir(lfdir, UFS_ROOTINO); lfdir = 0; if (preen) printf("\n"); } } } } if (lfdir == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY"); printf("\n\n"); return (0); } } dp = ginode(lfdir); if ((DIP(dp, di_mode) & IFMT) != IFDIR) { pfatal("lost+found IS NOT A DIRECTORY"); if (reply("REALLOCATE") == 0) return (0); oldlfdir = lfdir; if ((lfdir = allocdir(UFS_ROOTINO, (ino_t)0, lfmode)) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } if ((changeino(UFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } inodirty(dp); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = oldlfdir; adjust(&idesc, inoinfo(oldlfdir)->ino_linkcnt + 1); inoinfo(oldlfdir)->ino_linkcnt = 0; dp = ginode(lfdir); } if (inoinfo(lfdir)->ino_state != DFOUND) { pfatal("SORRY. NO lost+found DIRECTORY\n\n"); return (0); } (void)lftempname(tempname, orphan); if (makeentry(lfdir, orphan, (name ? name : tempname)) == 0) { pfatal("SORRY. NO SPACE IN lost+found DIRECTORY"); printf("\n\n"); return (0); } inoinfo(orphan)->ino_linkcnt--; if (lostdir) { if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && parentdir != (ino_t)-1) (void)makeentry(orphan, lfdir, ".."); dp = ginode(lfdir); DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1); inodirty(dp); inoinfo(lfdir)->ino_linkcnt++; pwarn("DIR I=%lu CONNECTED. ", (u_long)orphan); if (parentdir != (ino_t)-1) { printf("PARENT WAS I=%lu\n", (u_long)parentdir); /* * The parent directory, because of the ordering * guarantees, has had the link count incremented * for the child, but no entry was made. This * fixes the parent link count so that fsck does * not need to be rerun. */ inoinfo(parentdir)->ino_linkcnt++; } if (preen == 0) printf("\n"); } return (1); } /* * fix an entry in a directory. */ int changeino(ino_t dir, const char *name, ino_t newnum) { struct inodesc idesc; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = chgino; idesc.id_number = dir; idesc.id_fix = DONTKNOW; idesc.id_name = strdup(name); idesc.id_parent = newnum; /* new value for name */ return (ckinode(ginode(dir), &idesc)); } /* * make an entry in a directory */ int makeentry(ino_t parent, ino_t ino, const char *name) { union dinode *dp; struct inodesc idesc; char pathbuf[MAXPATHLEN + 1]; if (parent < UFS_ROOTINO || parent >= maxino || ino < UFS_ROOTINO || ino >= maxino) return (0); memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = mkentry; idesc.id_number = parent; idesc.id_parent = ino; /* this is the inode to enter */ idesc.id_fix = DONTKNOW; idesc.id_name = strdup(name); dp = ginode(parent); if (DIP(dp, di_size) % DIRBLKSIZ) { DIP_SET(dp, di_size, roundup(DIP(dp, di_size), DIRBLKSIZ)); inodirty(dp); } if ((ckinode(dp, &idesc) & ALTERED) != 0) return (1); getpathname(pathbuf, parent, parent); dp = ginode(parent); if (expanddir(dp, pathbuf) == 0) return (0); return (ckinode(dp, &idesc) & ALTERED); } /* * Attempt to expand the size of a directory */ static int expanddir(union dinode *dp, char *name) { ufs2_daddr_t lastbn, newblk; struct bufarea *bp; char *cp, firstblk[DIRBLKSIZ]; lastbn = lblkno(&sblock, DIP(dp, di_size)); if (lastbn >= UFS_NDADDR - 1 || DIP(dp, di_db[lastbn]) == 0 || DIP(dp, di_size) == 0) return (0); if ((newblk = allocblk(sblock.fs_frag)) == 0) return (0); DIP_SET(dp, di_db[lastbn + 1], DIP(dp, di_db[lastbn])); DIP_SET(dp, di_db[lastbn], newblk); DIP_SET(dp, di_size, DIP(dp, di_size) + sblock.fs_bsize); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(sblock.fs_bsize)); bp = getdirblk(DIP(dp, di_db[lastbn + 1]), sblksize(&sblock, DIP(dp, di_size), lastbn + 1)); if (bp->b_errs) goto bad; memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ); bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_bsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); bp = getdirblk(DIP(dp, di_db[lastbn + 1]), sblksize(&sblock, DIP(dp, di_size), lastbn + 1)); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir); pwarn("NO SPACE LEFT IN %s", name); if (preen) printf(" (EXPANDED)\n"); else if (reply("EXPAND") == 0) goto bad; dirty(bp); inodirty(dp); return (1); bad: DIP_SET(dp, di_db[lastbn], DIP(dp, di_db[lastbn + 1])); DIP_SET(dp, di_db[lastbn + 1], 0); DIP_SET(dp, di_size, DIP(dp, di_size) - sblock.fs_bsize); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) - btodb(sblock.fs_bsize)); freeblk(newblk, sblock.fs_frag); return (0); } /* * allocate a new directory */ ino_t allocdir(ino_t parent, ino_t request, int mode) { ino_t ino; char *cp; union dinode *dp; struct bufarea *bp; struct inoinfo *inp; struct dirtemplate *dirp; ino = allocino(request, IFDIR|mode); dirp = &dirhead; dirp->dot_ino = ino; dirp->dotdot_ino = parent; dp = ginode(ino); bp = getdirblk(DIP(dp, di_db[0]), sblock.fs_fsize); if (bp->b_errs) { freeino(ino); return (0); } memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate)); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_fsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); DIP_SET(dp, di_nlink, 2); inodirty(dp); if (ino == UFS_ROOTINO) { inoinfo(ino)->ino_linkcnt = DIP(dp, di_nlink); cacheino(dp, ino); return(ino); } if (!INO_IS_DVALID(parent)) { freeino(ino); return (0); } cacheino(dp, ino); inp = getinoinfo(ino); inp->i_parent = parent; inp->i_dotdot = parent; inoinfo(ino)->ino_state = inoinfo(parent)->ino_state; if (inoinfo(ino)->ino_state == DSTATE) { inoinfo(ino)->ino_linkcnt = DIP(dp, di_nlink); inoinfo(parent)->ino_linkcnt++; } dp = ginode(parent); DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1); inodirty(dp); return (ino); } /* * free a directory inode */ static void freedir(ino_t ino, ino_t parent) { union dinode *dp; if (ino != parent) { dp = ginode(parent); DIP_SET(dp, di_nlink, DIP(dp, di_nlink) - 1); inodirty(dp); } freeino(ino); } /* * generate a temporary name for the lost+found directory. */ static int lftempname(char *bufp, ino_t ino) { ino_t in; char *cp; int namlen; cp = bufp + 2; for (in = maxino; in > 0; in /= 10) cp++; *--cp = 0; namlen = cp - bufp; in = ino; while (cp > bufp) { *--cp = (in % 10) + '0'; in /= 10; } *cp = '#'; return (namlen); } /* * Get a directory block. * Insure that it is held until another is requested. */ static struct bufarea * getdirblk(ufs2_daddr_t blkno, long size) { if (pdirbp != NULL) pdirbp->b_flags &= ~B_INUSE; pdirbp = getdatablk(blkno, size, BT_DIRDATA); return (pdirbp); } Index: projects/runtime-coverage-v2/sbin/fsck_ffs/fsck.h =================================================================== --- projects/runtime-coverage-v2/sbin/fsck_ffs/fsck.h (revision 347075) +++ projects/runtime-coverage-v2/sbin/fsck_ffs/fsck.h (revision 347076) @@ -1,484 +1,485 @@ /*- * SPDX-License-Identifier: BSD-3-Clause and BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 * $FreeBSD$ */ #ifndef _FSCK_H_ #define _FSCK_H_ #include #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MINBUFS 10 /* minimum number of buffers required */ #define MAXBUFS 40 /* maximum space to allocate to buffers */ #define INOBUFSIZE 64*1024 /* size of buffer to read inodes in pass1 */ #define ZEROBUFSIZE (dev_bsize * 128) /* size of zero buffer used by -Z */ union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define DIP_SET(dp, field, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (dp)->dp1.field = (val); \ else \ (dp)->dp2.field = (val); \ } while (0) /* * Each inode on the file system is described by the following structure. * The linkcnt is initially set to the value in the inode. Each time it * is found during the descent in passes 2, 3, and 4 the count is * decremented. Any inodes whose count is non-zero after pass 4 needs to * have its link count adjusted by the value remaining in ino_linkcnt. */ struct inostat { char ino_state; /* state of inode, see below */ char ino_type; /* type of inode */ short ino_linkcnt; /* number of links not found */ }; /* * Inode states. */ #define USTATE 0x1 /* inode not allocated */ #define FSTATE 0x2 /* inode is file */ #define FZLINK 0x3 /* inode is file with a link count of zero */ #define DSTATE 0x4 /* inode is directory */ #define DZLINK 0x5 /* inode is directory with a zero link count */ #define DFOUND 0x6 /* directory found during descent */ /* 0x7 UNUSED - see S_IS_DVALID() definition */ #define DCLEAR 0x8 /* directory is to be cleared */ #define FCLEAR 0x9 /* file is to be cleared */ /* DUNFOUND === (state == DSTATE || state == DZLINK) */ #define S_IS_DUNFOUND(state) (((state) & ~0x1) == DSTATE) /* DVALID === (state == DSTATE || state == DZLINK || state == DFOUND) */ #define S_IS_DVALID(state) (((state) & ~0x3) == DSTATE) #define INO_IS_DUNFOUND(ino) S_IS_DUNFOUND(inoinfo(ino)->ino_state) #define INO_IS_DVALID(ino) S_IS_DVALID(inoinfo(ino)->ino_state) /* * Inode state information is contained on per cylinder group lists * which are described by the following structure. */ struct inostatlist { long il_numalloced; /* number of inodes allocated in this cg */ struct inostat *il_stat;/* inostat info for this cylinder group */ } *inostathead; /* * buffer cache structure. */ struct bufarea { TAILQ_ENTRY(bufarea) b_list; /* buffer list */ ufs2_daddr_t b_bno; int b_size; int b_errs; int b_flags; int b_type; union { char *b_buf; /* buffer space */ ufs1_daddr_t *b_indir1; /* UFS1 indirect block */ ufs2_daddr_t *b_indir2; /* UFS2 indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct ufs1_dinode *b_dinode1; /* UFS1 inode block */ struct ufs2_dinode *b_dinode2; /* UFS2 inode block */ } b_un; char b_dirty; }; #define IBLK(bp, i) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (bp)->b_un.b_indir1[i] : (bp)->b_un.b_indir2[i]) #define IBLK_SET(bp, i, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (bp)->b_un.b_indir1[i] = (val); \ else \ (bp)->b_un.b_indir2[i] = (val); \ } while (0) /* * Buffer flags */ #define B_INUSE 0x00000001 /* Buffer is in use */ /* * Type of data in buffer */ #define BT_UNKNOWN 0 /* Buffer holds a superblock */ #define BT_SUPERBLK 1 /* Buffer holds a superblock */ #define BT_CYLGRP 2 /* Buffer holds a cylinder group map */ #define BT_LEVEL1 3 /* Buffer holds single level indirect */ #define BT_LEVEL2 4 /* Buffer holds double level indirect */ #define BT_LEVEL3 5 /* Buffer holds triple level indirect */ #define BT_EXTATTR 6 /* Buffer holds external attribute data */ #define BT_INODES 7 /* Buffer holds external attribute data */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ #define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ "Cylinder Group", \ "Single Level Indirect", \ "Double Level Indirect", \ "Triple Level Indirect", \ "External Attribute", \ "Inode Block", \ "Directory Contents", \ "User Data" } extern long readcnt[BT_NUMBUFTYPES]; extern long totalreadcnt[BT_NUMBUFTYPES]; extern struct timespec readtime[BT_NUMBUFTYPES]; extern struct timespec totalreadtime[BT_NUMBUFTYPES]; extern struct timespec startprog; extern struct bufarea sblk; /* file system superblock */ extern struct bufarea *pdirbp; /* current directory contents */ extern struct bufarea *pbp; /* current inode block */ #define dirty(bp) do { \ if (fswritefd < 0) \ pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); \ else \ (bp)->b_dirty = 1; \ } while (0) #define initbarea(bp, type) do { \ (bp)->b_dirty = 0; \ (bp)->b_bno = (ufs2_daddr_t)-1; \ (bp)->b_flags = 0; \ (bp)->b_type = type; \ } while (0) #define sbdirty() dirty(&sblk) #define sblock (*sblk.b_un.b_fs) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; extern ino_t cursnapshot; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(struct inodesc *); /* function to be applied to blocks of inode */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_lbn_t id_lbn; /* logical block number of current block */ ufs2_daddr_t id_blkno; /* current block number being examined */ int id_level; /* level of indirection of this block */ int id_numfrags; /* number of frags contained in block */ ufs_lbn_t id_lballoc; /* pass1: last LBN that is allocated */ off_t id_filesize; /* for DATA nodes, the size of the directory */ ufs2_daddr_t id_entryno;/* for DATA nodes, current entry number */ int id_loc; /* for DATA nodes, current location in dir */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA or ADDR */ }; /* file types */ #define DATA 1 /* a directory */ #define SNAP 2 /* a snapshot */ #define ADDR 3 /* anything but a directory or a snapshot */ /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurrences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs2_daddr_t dup; }; struct dups *duplist; /* head of dup list */ struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Inode cache data structures. */ struct inoinfo { struct inoinfo *i_nexthash; /* next entry in hash chain */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_numblks; /* size of block array in bytes */ ufs2_daddr_t i_blks[1]; /* actually longer */ } **inphead, **inpsort; extern long dirhash, inplast; extern unsigned long numdirs, listmax; extern long countdirs; /* number of directories we actually found */ #define MIBSIZE 3 /* size of fsck sysctl MIBs */ extern int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */ extern int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */ extern int setsize[MIBSIZE]; /* MIB command to set inode size */ extern int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */ extern int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */ extern int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */ extern int adjnffree[MIBSIZE]; /* MIB command to adjust number of free frags */ extern int adjnumclusters[MIBSIZE]; /* MIB command to adjust number of free clusters */ extern int freefiles[MIBSIZE]; /* MIB command to free a set of files */ extern int freedirs[MIBSIZE]; /* MIB command to free a set of directories */ extern int freeblks[MIBSIZE]; /* MIB command to free a set of data blocks */ extern struct fsck_cmd cmd; /* sysctl file system update commands */ extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ extern char *cdevname; /* name of device being checked */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern long secsize; /* actual disk sector size */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ extern char nflag; /* assume a no response */ extern char yflag; /* assume a yes response */ extern int bkgrdflag; /* use a snapshot to run on an active system */ extern off_t bflag; /* location of alternate super block */ extern int debug; /* output debugging info */ extern int Eflag; /* delete empty data blocks */ extern int Zflag; /* zero empty data blocks */ +extern int zflag; /* zero unused directory space */ extern int inoopt; /* trim out unused inodes */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int cvtlevel; /* convert to newer file system format */ extern int ckhashadd; /* check hashes to be added */ extern int bkgrdcheck; /* determine if background check is possible */ extern int bkgrdsumadj; /* whether the kernel have ability to adjust superblock summary */ extern char usedsoftdep; /* just fix soft dependency inconsistencies */ extern char preen; /* just fix normal inconsistencies */ extern char rerun; /* rerun fsck. Only used in non-preen mode */ extern int returntosingle; /* 1 => return to single user mode on exit */ extern char resolved; /* cleared if unresolved changes => not clean */ extern char havesb; /* superblock has been read */ extern char skipclean; /* skip clean file systems if preening */ extern int fsmodified; /* 1 => write done to file system */ extern int fsreadfd; /* file descriptor for reading file system */ extern int fswritefd; /* file descriptor for writing file system */ extern struct uufsd disk; /* libufs user-ufs disk structure */ extern int surrender; /* Give up if reads fail */ extern int wantrestart; /* Restart fsck on early termination */ extern ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ extern char *blockmap; /* ptr to primary blk allocation map */ extern ino_t maxino; /* number of inodes in file system */ extern ino_t lfdir; /* lost & found directory inode number */ extern const char *lfname; /* lost & found directory name */ extern int lfmode; /* lost & found directory creation mode */ extern ufs2_daddr_t n_blks; /* number of blocks in use */ extern ino_t n_files; /* number of files in use */ extern volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ extern volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ #define clearinode(dp) \ if (sblock.fs_magic == FS_UFS1_MAGIC) { \ (dp)->dp1 = ufs1_zino; \ } else { \ (dp)->dp2 = ufs2_zino; \ } extern struct ufs1_dinode ufs1_zino; extern struct ufs2_dinode ufs2_zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ #define ERERUN 16 /* fsck needs to be re-run. */ #define ERESTART -1 int flushentry(void); /* * Wrapper for malloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Malloc(size_t size) { void *retval; while ((retval = malloc(size)) == NULL) if (flushentry() == 0) break; return (retval); } /* * Wrapper for calloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Calloc(size_t cnt, size_t size) { void *retval; while ((retval = calloc(cnt, size)) == NULL) if (flushentry() == 0) break; return (retval); } struct fstab; void adjust(struct inodesc *, int lcnt); ufs2_daddr_t allocblk(long frags); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); char *blockcheck(char *name); int blread(int fd, char *buf, ufs2_daddr_t blk, long size); void bufinit(void); void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size); void blerase(int fd, ufs2_daddr_t blk, long size); void blzero(int fd, ufs2_daddr_t blk, long size); void cacheino(union dinode *dp, ino_t inumber); void catch(int); void catchquit(int); void cgdirty(struct bufarea *); int changeino(ino_t dir, const char *name, ino_t newnum); int check_cgmagic(int cg, struct bufarea *cgbp); int chkrange(ufs2_daddr_t blk, int cnt); void ckfini(int markclean); int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); int eascan(struct inodesc *, struct ufs2_dinode *dp); void fileerror(ino_t cwd, ino_t ino, const char *errmesg); void finalIOstats(void); int findino(struct inodesc *); int findname(struct inodesc *); void flush(int fd, struct bufarea *bp); void freeblk(ufs2_daddr_t blkno, long frags); void freeino(ino_t ino); void freeinodebuf(void); void fsutilinit(void); int ftypeok(union dinode *dp); void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size); struct bufarea *cglookup(int cg); struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type); struct inoinfo *getinoinfo(ino_t inumber); union dinode *getnextinode(ino_t inumber, int rebuildcg); void getpathname(char *namebuf, ino_t curdir, ino_t ino); union dinode *ginode(ino_t inumber); void infohandler(int sig); void alarmhandler(int sig); void inocleanup(void); void inodirty(union dinode *); struct inostat *inoinfo(ino_t inum); void IOstats(char *what); int linkup(ino_t orphan, ino_t parentdir, char *name); int makeentry(ino_t parent, ino_t ino, const char *name); void panic(const char *fmt, ...) __printflike(1, 2); void pass1(void); void pass1b(void); int pass1check(struct inodesc *); void pass2(void); void pass3(void); void pass4(void); int pass4check(struct inodesc *); void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void propagate(void); void prtinode(ino_t ino, union dinode *dp); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(int listerr); int reply(const char *question); void rwerror(const char *mesg, ufs2_daddr_t blk); void sblock_init(void); void setinodebuf(ino_t); int setup(char *dev); void gjournal_check(const char *filesys); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); void fsckinit(void); #endif /* !_FSCK_H_ */ Index: projects/runtime-coverage-v2/sbin/fsck_ffs/fsck_ffs.8 =================================================================== --- projects/runtime-coverage-v2/sbin/fsck_ffs/fsck_ffs.8 (revision 347075) +++ projects/runtime-coverage-v2/sbin/fsck_ffs/fsck_ffs.8 (revision 347076) @@ -1,440 +1,443 @@ .\" .\" Copyright (c) 1980, 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)fsck.8 8.4 (Berkeley) 5/9/95 .\" $FreeBSD$ .\" -.Dd January 13, 2018 +.Dd May 3, 2019 .Dt FSCK_FFS 8 .Os .Sh NAME .Nm fsck_ffs , .Nm fsck_ufs .Nd file system consistency check and interactive repair .Sh SYNOPSIS .Nm -.Op Fl BCdEFfnpRrSyZ +.Op Fl BCdEFfnpRrSyZz .Op Fl b Ar block .Op Fl c Ar level .Op Fl m Ar mode .Ar filesystem .Ar ... .Sh DESCRIPTION The specified disk partitions and/or file systems are checked. In "preen" or "check clean" mode the clean flag of each file system's superblock is examined and only those file systems that are not marked clean are checked. File systems are marked clean when they are unmounted, when they have been mounted read-only, or when .Nm runs on them successfully. If the .Fl f option is specified, the file systems will be checked regardless of the state of their clean flag. .Pp The kernel takes care that only a restricted class of innocuous file system inconsistencies can happen unless hardware or software failures intervene. These are limited to the following: .Pp .Bl -item -compact -offset indent .It Unreferenced inodes .It Link counts in inodes too large .It Missing blocks in the free map .It Blocks in the free map also in files .It Counts in the super-block wrong .El .Pp These are the only inconsistencies that .Nm with the .Fl p option will correct; if it encounters other inconsistencies, it exits with an abnormal return status and an automatic reboot will then fail. For each corrected inconsistency one or more lines will be printed identifying the file system on which the correction will take place, and the nature of the correction. After successfully correcting a file system, .Nm will print the number of files on that file system, the number of used and free blocks, and the percentage of fragmentation. .Pp If sent a .Dv QUIT signal, .Nm will finish the file system checks, then exit with an abnormal return status that causes an automatic reboot to fail. This is useful when you want to finish the file system checks during an automatic reboot, but do not want the machine to come up multiuser after the checks complete. .Pp If .Nm receives a .Dv SIGINFO (see the .Dq status argument for .Xr stty 1 ) signal, a line will be written to the standard output indicating the name of the device currently being checked, the current phase number and phase-specific progress information. .Pp Without the .Fl p option, .Nm audits and interactively repairs inconsistent conditions for file systems. If the file system is inconsistent the operator is prompted for concurrence before each correction is attempted. It should be noted that some of the corrective actions which are not correctable under the .Fl p option will result in some loss of data. The amount and severity of data lost may be determined from the diagnostic output. The default action for each consistency correction is to wait for the operator to respond .Li yes or .Li no . If the operator does not have write permission on the file system .Nm will default to a .Fl n action. .Pp The following flags are interpreted by .Nm : .Bl -tag -width indent .It Fl B A check is done on the specified and possibly active file system. The set of corrections that can be done is limited to those done when running in preen mode (see the .Fl p flag). If unexpected errors are found, the file system is marked as needing a foreground check and .Nm exits without attempting any further cleaning. .It Fl b Use the block specified immediately after the flag as the super block for the file system. An alternate super block is usually located at block 32 for UFS1, and block 160 for UFS2. .Pp See the .Fl N flag of .Xr newfs 8 . .It Fl C Check if file system was dismounted cleanly. If so, skip file system checks (like "preen"). However, if the file system was not cleanly dismounted, do full checks, as if .Nm was invoked without .Fl C . .It Fl c Convert the file system to the specified level. Note that the level of a file system can only be raised. There are currently four levels defined: .Bl -tag -width indent .It 0 The file system is in the old (static table) format. .It 1 The file system is in the new (dynamic table) format. .It 2 The file system supports 32-bit uid's and gid's, short symbolic links are stored in the inode, and directories have an added field showing the file type. .It 3 If maxcontig is greater than one, build the free segment maps to aid in finding contiguous sets of blocks. If maxcontig is equal to one, delete any existing segment maps. .El .Pp In interactive mode, .Nm will list the conversion to be made and ask whether the conversion should be done. If a negative answer is given, no further operations are done on the file system. In preen mode, the conversion is listed and done if possible without user interaction. Conversion in preen mode is best used when all the file systems are being converted at once. The format of a file system can be determined from the first line of output from .Xr dumpfs 8 . .Pp This option implies the .Fl f flag. .It Fl d Enable debugging messages. .It Fl E Clear unallocated blocks, notifying the underlying device that they are not used and that their contents may be discarded. This is useful for filesystems which have been mounted on systems without TRIM support, or with TRIM support disabled, as well as filesystems which have been copied from one device to another. .Pp See the .Fl E and .Fl t flags of .Xr newfs 8 , and the .Fl t flag of .Xr tunefs 8 . .It Fl F Determine whether the file system needs to be cleaned immediately in foreground, or if its cleaning can be deferred to background. To be eligible for background cleaning it must have been running with soft updates, not have been marked as needing a foreground check, and be mounted and writable when the background check is to be done. If these conditions are met, then .Nm exits with a zero exit status. Otherwise it exits with a non-zero exit status. If the file system is clean, it will exit with a non-zero exit status so that the clean status of the file system can be verified and reported during the foreground checks. Note that when invoked with the .Fl F flag, no cleanups are done. The only thing that .Nm does is to determine whether a foreground or background check is needed and exit with an appropriate status code. .It Fl f Force .Nm to check .Sq clean file systems when preening. .It Fl m Use the mode specified in octal immediately after the flag as the permission bits to use when creating the .Pa lost+found directory rather than the default 1777. In particular, systems that do not wish to have lost files accessible by all users on the system should use a more restrictive set of permissions such as 700. .It Fl n Assume a no response to all questions asked by .Nm except for .Ql CONTINUE? , which is assumed to be affirmative; do not open the file system for writing. .It Fl p Preen file systems (see above). .It Fl R Instruct fsck_ffs to restart itself if it encounters certain errors that warrant another run. It will limit itself to a maximum of 10 restarts in a given run in order to avoid an endless loop with extremely corrupted filesystems. .It Fl r Free up excess unused inodes. Decreasing the number of preallocated inodes reduces the running time of future runs of .Nm and frees up space that can allocated to files. The .Fl r option is ignored when running in preen mode. .It Fl S Surrender on error. With this flag enabled, a hard error returned on disk i/o will cause .Nm to abort instead of continuing on and possibly tripping over more i/o errors. .It Fl y Assume a yes response to all questions asked by .Nm ; this should be used with great caution as this is a free license to continue after essentially unlimited trouble has been encountered. .It Fl Z Similar to .Fl E , but overwrites unused blocks with zeroes. If both .Fl E and .Fl Z are specified, blocks are first zeroed and then erased. +.It Fl z +Clear unused directory space. +The cleared space includes deleted file names and name padding. .El .Pp Inconsistencies checked are as follows: .Pp .Bl -enum -compact .It Blocks claimed by more than one inode or the free map. .It Blocks claimed by an inode outside the range of the file system. .It Incorrect link counts. .It Size checks: .Bl -item -offset indent -compact .It Directory size not a multiple of DIRBLKSIZ. .It Partially truncated file. .El .It Bad inode format. .It Blocks not accounted for anywhere. .It Directory checks: .Bl -item -offset indent -compact .It File pointing to unallocated inode. .It Inode number out of range. .It Directories with unallocated blocks (holes). .It Dot or dot-dot not the first two entries of a directory or having the wrong inode number. .El .It Super Block checks: .Bl -item -offset indent -compact .It More blocks for inodes than there are in the file system. .It Bad free block map format. .It Total free block and/or free inode count incorrect. .El .El .Pp Orphaned files and directories (allocated but unreferenced) are, with the operator's concurrence, reconnected by placing them in the .Pa lost+found directory. The name assigned is the inode number. If the .Pa lost+found directory does not exist, it is created. If there is insufficient space its size is increased. .Pp The full foreground .Nm checks for many more problems that may occur after an unrecoverable disk write error. Thus, it is recommended that you perform foreground .Nm on your systems periodically and whenever you encounter unrecoverable disk write errors or file-system\-related panics. .Sh FILES .Bl -tag -width /etc/fstab -compact .It Pa /etc/fstab contains default list of file systems to check. .El .Sh EXIT STATUS .Ex -std .Pp Specific non-zero exit status values used are: .Bl -tag -width indent .It 1 Usage error (missing or invalid command arguments). .It 2 The .Fl p option was used and a .Dv SIGQUIT was received, indicating that the system should be returned to single user mode after the file system check. .It 3 The file system superblock cannot be read. This could indicate that the file system device does not exist or is not yet ready. .It 4 A mounted file system was modified; the system should be rebooted. .It 5 The .Fl B option was used and soft updates are not enabled on the file system. .It 6 The .Fl B option was used and the kernel lacks needed support. .It 7 The .Fl F option was used and the file system is clean. .It 8 General error exit. .It 16 The file system could not be completely repaired. The file system may be able to be repaired by running .Nm on the file system again. .El .Sh DIAGNOSTICS The diagnostics produced by .Nm are fully enumerated and explained in Appendix A of .Rs .%T "Fsck \- The UNIX File System Check Program" .Re .Sh SEE ALSO .Xr fs 5 , .Xr fstab 5 , .Xr fsck 8 , .Xr fsdb 8 , .Xr newfs 8 , .Xr reboot 8 .Sh HISTORY A .Nm fsck utility appeared in .Bx 4.0 . It became .Nm in .Fx 5.0 with the introduction of the filesystem independent wrapper as .Nm fsck . Index: projects/runtime-coverage-v2/sbin/fsck_ffs/globs.c =================================================================== --- projects/runtime-coverage-v2/sbin/fsck_ffs/globs.c (revision 347075) +++ projects/runtime-coverage-v2/sbin/fsck_ffs/globs.c (revision 347076) @@ -1,171 +1,172 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "fsck.h" long readcnt[BT_NUMBUFTYPES]; long totalreadcnt[BT_NUMBUFTYPES]; struct timespec readtime[BT_NUMBUFTYPES]; struct timespec totalreadtime[BT_NUMBUFTYPES]; struct timespec startprog; struct bufarea sblk; /* file system superblock */ struct bufarea *pdirbp; /* current directory contents */ struct bufarea *pbp; /* current inode block */ ino_t cursnapshot; long dirhash, inplast; unsigned long numdirs, listmax; long countdirs; /* number of directories we actually found */ int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */ int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */ int setsize[MIBSIZE]; /* MIB command to set inode size */ int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */ int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */ int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */ int adjnffree[MIBSIZE]; /* MIB command to adjust number of free frags */ int adjnumclusters[MIBSIZE]; /* MIB command to adjust number of free clusters */ int freefiles[MIBSIZE]; /* MIB command to free a set of files */ int freedirs[MIBSIZE]; /* MIB command to free a set of directories */ int freeblks[MIBSIZE]; /* MIB command to free a set of data blocks */ struct fsck_cmd cmd; /* sysctl file system update commands */ char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ u_int real_dev_bsize; /* actual disk sector size, not overridden */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bkgrdflag; /* use a snapshot to run on an active system */ off_t bflag; /* location of alternate super block */ int debug; /* output debugging info */ int Eflag; /* delete empty data blocks */ int Zflag; /* zero empty data blocks */ +int zflag; /* zero unused directory space */ int inoopt; /* trim out unused inodes */ char ckclean; /* only do work if not cleanly unmounted */ int cvtlevel; /* convert to newer file system format */ int ckhashadd; /* check hashes to be added */ int bkgrdcheck; /* determine if background check is possible */ int bkgrdsumadj; /* whether the kernel have ability to adjust superblock summary */ char usedsoftdep; /* just fix soft dependency inconsistencies */ char preen; /* just fix normal inconsistencies */ char rerun; /* rerun fsck. Only used in non-preen mode */ int returntosingle; /* 1 => return to single user mode on exit */ char resolved; /* cleared if unresolved changes => not clean */ char havesb; /* superblock has been read */ char skipclean; /* skip clean file systems if preening */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int surrender; /* Give up if reads fail */ int wantrestart; /* Restart fsck on early termination */ ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lfdir; /* lost & found directory inode number */ const char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs2_daddr_t n_blks; /* number of blocks in use */ ino_t n_files; /* number of files in use */ volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ struct ufs1_dinode ufs1_zino; struct ufs2_dinode ufs2_zino; void fsckinit(void) { bzero(readcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(totalreadcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(readtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(totalreadtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(&startprog, sizeof(struct timespec)); bzero(&sblk, sizeof(struct bufarea)); pdirbp = NULL; pbp = NULL; cursnapshot = 0; listmax = numdirs = dirhash = inplast = 0; countdirs = 0; bzero(adjrefcnt, sizeof(int) * MIBSIZE); bzero(adjblkcnt, sizeof(int) * MIBSIZE); bzero(setsize, sizeof(int) * MIBSIZE); bzero(adjndir, sizeof(int) * MIBSIZE); bzero(adjnbfree, sizeof(int) * MIBSIZE); bzero(adjnifree, sizeof(int) * MIBSIZE); bzero(adjnffree, sizeof(int) * MIBSIZE); bzero(adjnumclusters, sizeof(int) * MIBSIZE); bzero(freefiles, sizeof(int) * MIBSIZE); bzero(freedirs, sizeof(int) * MIBSIZE); bzero(freeblks, sizeof(int) * MIBSIZE); bzero(&cmd, sizeof(struct fsck_cmd)); bzero(snapname, sizeof(char) * BUFSIZ); cdevname = NULL; dev_bsize = 0; secsize = 0; real_dev_bsize = 0; bkgrdsumadj = 0; usedsoftdep = 0; rerun = 0; returntosingle = 0; resolved = 0; havesb = 0; fsmodified = 0; fsreadfd = 0; fswritefd = 0; maxfsblock = 0; blockmap = NULL; maxino = 0; lfdir = 0; lfname = "lost+found"; lfmode = 0700; n_blks = 0; n_files = 0; got_siginfo = 0; got_sigalarm = 0; bzero(&ufs1_zino, sizeof(struct ufs1_dinode)); bzero(&ufs2_zino, sizeof(struct ufs2_dinode)); } Index: projects/runtime-coverage-v2/sbin/fsck_ffs/main.c =================================================================== --- projects/runtime-coverage-v2/sbin/fsck_ffs/main.c (revision 347075) +++ projects/runtime-coverage-v2/sbin/fsck_ffs/main.c (revision 347076) @@ -1,750 +1,754 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #define IN_RTLD /* So we pickup the P_OSREL defines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" int restarts; static void usage(void) __dead2; static intmax_t argtoimax(int flag, const char *req, const char *str, int base); static int checkfilesys(char *filesys); static int chkdoreload(struct statfs *mntp); static struct statfs *getmntpt(const char *); int main(int argc, char *argv[]) { int ch; struct rlimit rlimit; struct itimerval itimerval; int fsret; int ret = 0; sync(); skipclean = 1; inoopt = 0; - while ((ch = getopt(argc, argv, "b:Bc:CdEfFm:npRrSyZ")) != -1) { + while ((ch = getopt(argc, argv, "b:Bc:CdEfFm:npRrSyZz")) != -1) { switch (ch) { case 'b': skipclean = 0; bflag = argtoimax('b', "number", optarg, 10); printf("Alternate super block location: %jd\n", bflag); break; case 'B': bkgrdflag = 1; break; case 'c': skipclean = 0; cvtlevel = argtoimax('c', "conversion level", optarg, 10); if (cvtlevel < 3) errx(EEXIT, "cannot do level %d conversion", cvtlevel); break; case 'd': debug++; break; case 'E': Eflag++; break; case 'f': skipclean = 0; break; case 'F': bkgrdcheck = 1; break; case 'm': lfmode = argtoimax('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': nflag++; yflag = 0; break; case 'p': preen++; /*FALLTHROUGH*/ case 'C': ckclean++; break; case 'R': wantrestart = 1; break; case 'r': inoopt++; break; case 'S': surrender = 1; break; case 'y': yflag++; nflag = 0; break; case 'Z': Zflag++; + break; + + case 'z': + zflag++; break; default: usage(); } } argc -= optind; argv += optind; if (!argc) usage(); if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (ckclean) (void)signal(SIGQUIT, catchquit); signal(SIGINFO, infohandler); if (bkgrdflag) { signal(SIGALRM, alarmhandler); itimerval.it_interval.tv_sec = 5; itimerval.it_interval.tv_usec = 0; itimerval.it_value.tv_sec = 5; itimerval.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &itimerval, NULL); } /* * Push up our allowed memory limit so we can cope * with huge file systems. */ if (getrlimit(RLIMIT_DATA, &rlimit) == 0) { rlimit.rlim_cur = rlimit.rlim_max; (void)setrlimit(RLIMIT_DATA, &rlimit); } while (argc > 0) { if ((fsret = checkfilesys(*argv)) == ERESTART) continue; ret |= fsret; argc--; argv++; } if (returntosingle) ret = 2; exit(ret); } static intmax_t argtoimax(int flag, const char *req, const char *str, int base) { char *cp; intmax_t ret; ret = strtoimax(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Check the specified file system. */ /* ARGSUSED */ static int checkfilesys(char *filesys) { ufs2_daddr_t n_ffree, n_bfree; struct dups *dp; struct statfs *mntp; struct stat snapdir; struct group *grp; struct iovec *iov; char errmsg[255]; int ofsmodified; int iovlen; int cylno; intmax_t blks, files; size_t size; iov = NULL; iovlen = 0; errmsg[0] = '\0'; fsutilinit(); fsckinit(); cdevname = filesys; if (debug && ckclean) pwarn("starting\n"); /* * Make best effort to get the disk name. Check first to see * if it is listed among the mounted file systems. Failing that * check to see if it is listed in /etc/fstab. */ mntp = getmntpt(filesys); if (mntp != NULL) filesys = mntp->f_mntfromname; else filesys = blockcheck(filesys); /* * If -F flag specified, check to see whether a background check * is possible and needed. If possible and needed, exit with * status zero. Otherwise exit with status non-zero. A non-zero * exit status will cause a foreground check to be run. */ sblock_init(); if (bkgrdcheck) { if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0) exit(3); /* Cannot read superblock */ close(fsreadfd); /* Earlier background failed or journaled */ if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ)) exit(4); if ((sblock.fs_flags & FS_DOSOFTDEP) == 0) exit(5); /* Not running soft updates */ size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0) exit(6); /* Lacks kernel support */ if ((mntp == NULL && sblock.fs_clean == 1) || (mntp != NULL && (sblock.fs_flags & FS_UNCLEAN) == 0)) exit(7); /* Filesystem clean, report it now */ exit(0); } if (ckclean && skipclean) { /* * If file system is gjournaled, check it here. */ if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0) exit(3); /* Cannot read superblock */ close(fsreadfd); if ((sblock.fs_flags & FS_GJOURNAL) != 0) { //printf("GJournaled file system detected on %s.\n", // filesys); if (sblock.fs_clean == 1) { pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); exit(0); } if ((sblock.fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) { gjournal_check(filesys); if (chkdoreload(mntp) == 0) exit(0); exit(4); } else { pfatal( "UNEXPECTED INCONSISTENCY, CANNOT RUN FAST FSCK\n"); } } } /* * If we are to do a background check: * Get the mount point information of the file system * create snapshot file * return created snapshot file * if not found, clear bkgrdflag and proceed with normal fsck */ if (bkgrdflag) { if (mntp == NULL) { bkgrdflag = 0; pfatal("NOT MOUNTED, CANNOT RUN IN BACKGROUND\n"); } else if ((mntp->f_flags & MNT_SOFTDEP) == 0) { bkgrdflag = 0; pfatal( "NOT USING SOFT UPDATES, CANNOT RUN IN BACKGROUND\n"); } else if ((mntp->f_flags & MNT_RDONLY) != 0) { bkgrdflag = 0; pfatal("MOUNTED READ-ONLY, CANNOT RUN IN BACKGROUND\n"); } else if ((fsreadfd = open(filesys, O_RDONLY)) >= 0) { if (readsb(0) != 0) { if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ)) { bkgrdflag = 0; pfatal( "UNEXPECTED INCONSISTENCY, CANNOT RUN IN BACKGROUND\n"); } if ((sblock.fs_flags & FS_UNCLEAN) == 0 && skipclean && ckclean) { /* * file system is clean; * skip snapshot and report it clean */ pwarn( "FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); goto clean; } } close(fsreadfd); } if (bkgrdflag) { snprintf(snapname, sizeof snapname, "%s/.snap", mntp->f_mntonname); if (stat(snapname, &snapdir) < 0) { if (errno != ENOENT) { bkgrdflag = 0; pfatal( "CANNOT FIND SNAPSHOT DIRECTORY %s: %s, CANNOT RUN IN BACKGROUND\n", snapname, strerror(errno)); } else if ((grp = getgrnam("operator")) == NULL || mkdir(snapname, 0770) < 0 || chown(snapname, -1, grp->gr_gid) < 0 || chmod(snapname, 0770) < 0) { bkgrdflag = 0; pfatal( "CANNOT CREATE SNAPSHOT DIRECTORY %s: %s, CANNOT RUN IN BACKGROUND\n", snapname, strerror(errno)); } } else if (!S_ISDIR(snapdir.st_mode)) { bkgrdflag = 0; pfatal( "%s IS NOT A DIRECTORY, CANNOT RUN IN BACKGROUND\n", snapname); } } if (bkgrdflag) { snprintf(snapname, sizeof snapname, "%s/.snap/fsck_snapshot", mntp->f_mntonname); build_iovec(&iov, &iovlen, "fstype", "ffs", 4); build_iovec(&iov, &iovlen, "from", snapname, (size_t)-1); build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname, (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "snapshot", NULL, 0); while (nmount(iov, iovlen, mntp->f_flags) < 0) { if (errno == EEXIST && unlink(snapname) == 0) continue; bkgrdflag = 0; pfatal("CANNOT CREATE SNAPSHOT %s: %s %s\n", snapname, strerror(errno), errmsg); break; } if (bkgrdflag != 0) filesys = snapname; } } switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (0); case -1: clean: pwarn("clean, %ld free ", (long)(sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree)); printf("(%jd frags, %jd blocks, %.1f%% fragmentation)\n", (intmax_t)sblock.fs_cstotal.cs_nffree, (intmax_t)sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* * Determine if we can and should do journal recovery. */ if ((sblock.fs_flags & FS_SUJ) == FS_SUJ) { if ((sblock.fs_flags & FS_NEEDSFSCK) != FS_NEEDSFSCK && skipclean) { if (preen || reply("USE JOURNAL")) { if (suj_check(filesys) == 0) { printf("\n***** FILE SYSTEM MARKED CLEAN *****\n"); if (chkdoreload(mntp) == 0) exit(0); exit(4); } } printf("** Skipping journal, falling through to full fsck\n\n"); } /* * Write the superblock so we don't try to recover the * journal on another pass. If this is the only change * to the filesystem, we do not want it to be called * out as modified. */ sblock.fs_mtime = time(NULL); sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; } /* * If the filesystem was run on an old kernel that did not * support check hashes, clear the check-hash flags so that * we do not try to verify them. */ if ((sblock.fs_flags & FS_METACKHASH) == 0) sblock.fs_metackhash = 0; /* * If we are running on a kernel that can provide check hashes * that are not yet enabled for the filesystem and we are * running manually without the -y flag, offer to add any * supported check hashes that are not already enabled. */ ckhashadd = 0; if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC && fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) { if ((sblock.fs_metackhash & CK_CYLGRP) == 0 && reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_CYLGRP; sblock.fs_metackhash |= CK_CYLGRP; } if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 && getosreldate() >= P_OSREL_CK_SUPERBLOCK && reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_SUPERBLOCK; sblock.fs_metackhash |= CK_SUPERBLOCK; } if ((sblock.fs_metackhash & CK_INODE) == 0 && getosreldate() >= P_OSREL_CK_INODE && reply("ADD INODE CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INODE; sblock.fs_metackhash |= CK_INODE; } #ifdef notyet if ((sblock.fs_metackhash & CK_INDIR) == 0 && getosreldate() >= P_OSREL_CK_INDIR && reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INDIR; sblock.fs_metackhash |= CK_INDIR; } if ((sblock.fs_metackhash & CK_DIR) == 0 && getosreldate() >= P_OSREL_CK_DIR && reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_DIR; sblock.fs_metackhash |= CK_DIR; } #endif /* notyet */ if (ckhashadd != 0) { sblock.fs_flags |= FS_METACKHASH; sbdirty(); } } /* * Cleared if any questions answered no. Used to decide if * the superblock should be marked clean. */ resolved = 1; /* * 1: scan inodes tallying blocks used */ if (preen == 0) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (mntp != NULL && mntp->f_flags & MNT_ROOTFS) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } clock_gettime(CLOCK_REALTIME_PRECISE, &startprog); pass1(); IOstats("Pass1"); /* * 1b: locate first references to duplicates, if any */ if (duplist) { if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with %s%s%s", preen ? "-p" : "", (preen && usedsoftdep) ? " and " : "", usedsoftdep ? "softupdates" : ""); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); IOstats("Pass1b"); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0) printf("** Phase 2 - Check Pathnames\n"); pass2(); IOstats("Pass2"); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0) printf("** Phase 3 - Check Connectivity\n"); pass3(); IOstats("Pass3"); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0) printf("** Phase 4 - Check Reference Counts\n"); pass4(); IOstats("Pass4"); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); pass5(); IOstats("Pass5"); /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; files = maxino - UFS_ROOTINO - sblock.fs_cstotal.cs_nifree - n_files; blks = n_blks + sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); blks += howmany(sblock.fs_cssize, sblock.fs_fsize); blks = maxfsblock - (n_ffree + sblock.fs_frag * n_bfree) - blks; if (bkgrdflag && (files > 0 || blks > 0)) { countdirs = sblock.fs_cstotal.cs_ndir - countdirs; pwarn("Reclaimed: %ld directories, %jd files, %jd fragments\n", countdirs, files - countdirs, blks); } pwarn("%ld files, %jd used, %ju free ", (long)n_files, (intmax_t)n_blks, (uintmax_t)n_ffree + sblock.fs_frag * n_bfree); printf("(%ju frags, %ju blocks, %.1f%% fragmentation)\n", (uintmax_t)n_ffree, (uintmax_t)n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug) { if (files < 0) printf("%jd inodes missing\n", -files); if (blks < 0) printf("%jd blocks missing\n", -blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %jd,", (intmax_t)dp->dup); printf("\n"); } } duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { sblock.fs_time = time(NULL); sbdirty(); } if (cvtlevel && sblk.b_dirty) { /* * Write out the duplicate super blocks */ for (cylno = 0; cylno < sblock.fs_ncg; cylno++) blwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBLOCKSIZE); } if (rerun) resolved = 0; finalIOstats(); /* * Check to see if the file system is mounted read-write. */ if (bkgrdflag == 0 && mntp != NULL && (mntp->f_flags & MNT_RDONLY) == 0) resolved = 0; ckfini(resolved); for (cylno = 0; cylno < sblock.fs_ncg; cylno++) if (inostathead[cylno].il_stat != NULL) free((char *)inostathead[cylno].il_stat); free((char *)inostathead); inostathead = NULL; if (fsmodified && !preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) { if (wantrestart && (restarts++ < 10) && (preen || reply("RESTART"))) return (ERESTART); printf("\n***** PLEASE RERUN FSCK *****\n"); } if (chkdoreload(mntp) != 0) { if (!fsmodified) return (0); if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (rerun ? ERERUN : 0); } static int chkdoreload(struct statfs *mntp) { struct iovec *iov; int iovlen; char errmsg[255]; if (mntp == NULL) return (0); iov = NULL; iovlen = 0; errmsg[0] = '\0'; /* * We modified a mounted file system. Do a mount update on * it unless it is read-write, so we can continue using it * as safely as possible. */ if (mntp->f_flags & MNT_RDONLY) { build_iovec(&iov, &iovlen, "fstype", "ffs", 4); build_iovec(&iov, &iovlen, "from", mntp->f_mntfromname, (size_t)-1); build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname, (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "reload", NULL, 0); /* * XX: We need the following line until we clean up * nmount parsing of root mounts and NFS root mounts. */ build_iovec(&iov, &iovlen, "ro", NULL, 0); if (nmount(iov, iovlen, mntp->f_flags) == 0) { return (0); } pwarn("mount reload of '%s' failed: %s %s\n\n", mntp->f_mntonname, strerror(errno), errmsg); return (1); } return (0); } /* * Get the mount point information for name. */ static struct statfs * getmntpt(const char *name) { struct stat devstat, mntdevstat; char device[sizeof(_PATH_DEV) - 1 + MNAMELEN]; char *ddevname; struct statfs *mntbuf, *statfsp; int i, mntsize, isdev; if (stat(name, &devstat) != 0) return (NULL); if (S_ISCHR(devstat.st_mode) || S_ISBLK(devstat.st_mode)) isdev = 1; else isdev = 0; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) { statfsp = &mntbuf[i]; ddevname = statfsp->f_mntfromname; if (*ddevname != '/') { if (strlen(_PATH_DEV) + strlen(ddevname) + 1 > sizeof(statfsp->f_mntfromname)) continue; strcpy(device, _PATH_DEV); strcat(device, ddevname); strcpy(statfsp->f_mntfromname, device); } if (isdev == 0) { if (strcmp(name, statfsp->f_mntonname)) continue; return (statfsp); } if (stat(ddevname, &mntdevstat) == 0 && mntdevstat.st_rdev == devstat.st_rdev) return (statfsp); } statfsp = NULL; return (statfsp); } static void usage(void) { (void) fprintf(stderr, "usage: %s [-BCdEFfnpRrSyZ] [-b block] [-c level] [-m mode] filesystem ...\n", getprogname()); exit(1); } void infohandler(int sig __unused) { got_siginfo = 1; } void alarmhandler(int sig __unused) { got_sigalarm = 1; } Index: projects/runtime-coverage-v2/sbin/ifconfig/ifconfig.8 =================================================================== --- projects/runtime-coverage-v2/sbin/ifconfig/ifconfig.8 (revision 347075) +++ projects/runtime-coverage-v2/sbin/ifconfig/ifconfig.8 (revision 347076) @@ -1,3028 +1,3044 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd June 27, 2018 +.Dd May 3, 2019 .Dt IFCONFIG 8 .Os .Sh NAME .Nm ifconfig .Nd configure network interface parameters .Sh SYNOPSIS .Nm .Op Fl f Ar type:format Ns Op Ar ,type:format .Op Fl L .Op Fl k .Op Fl m .Op Fl n .Ar interface .Op Cm create .Ar address_family .Oo .Ar address .Op Ar dest_address .Oc .Op Ar parameters .Nm .Ar interface .Cm destroy .Nm .Fl a .Op Fl L .Op Fl d .Op Fl m .Op Fl u .Op Fl v .Op Ar address_family .Nm .Fl l .Op Fl d .Op Fl u .Op Ar address_family .Nm .Op Fl L .Op Fl d .Op Fl k .Op Fl m .Op Fl u .Op Fl v .Op Fl C .Nm .Op Fl g Ar groupname .Sh DESCRIPTION The .Nm utility is used to assign an address to a network interface and/or configure network interface parameters. The .Nm utility must be used at boot time to define the network address of each interface present on a machine; it may also be used at a later time to redefine an interface's address or other operating parameters. .Pp The following options are available: .Bl -tag -width indent .It Ar address For the .Tn DARPA Ns -Internet family, the address is either a host name present in the host name data base, .Xr hosts 5 , or a .Tn DARPA Internet address expressed in the Internet standard .Dq dot notation . .Pp It is also possible to use the CIDR notation (also known as the slash notation) to include the netmask. That is, one can specify an address like .Li 192.168.0.1/16 . .Pp For the .Dq inet6 family, it is also possible to specify the prefix length using the slash notation, like .Li ::1/128 . See the .Cm prefixlen parameter below for more information. .\" For the Xerox Network Systems(tm) family, .\" addresses are .\" .Ar net:a.b.c.d.e.f , .\" where .\" .Ar net .\" is the assigned network number (in decimal), .\" and each of the six bytes of the host number, .\" .Ar a .\" through .\" .Ar f , .\" are specified in hexadecimal. .\" The host number may be omitted on IEEE 802 protocol .\" (Ethernet, FDDI, and Token Ring) interfaces, .\" which use the hardware physical address, .\" and on interfaces other than the first. .\" For the .\" .Tn ISO .\" family, addresses are specified as a long hexadecimal string, .\" as in the Xerox family. .\" However, two consecutive dots imply a zero .\" byte, and the dots are optional, if the user wishes to (carefully) .\" count out long strings of digits in network byte order. .Pp The link-level .Pq Dq link address is specified as a series of colon-separated hex digits. This can be used to, for example, set a new MAC address on an Ethernet interface, though the mechanism used is not Ethernet specific. Use the .Pq Dq random keyword to set a randomly generated MAC address. A randomly-generated MAC address might be the same as one already in use in the network. Such duplications are extremely unlikely. If the interface is already up when this option is used, it will be briefly brought down and then brought back up again in order to ensure that the receive filter in the underlying Ethernet hardware is properly reprogrammed. .It Ar address_family Specify the address family which affects interpretation of the remaining parameters. Since an interface can receive transmissions in differing protocols with different naming schemes, specifying the address family is recommended. The address or protocol families currently supported are .Dq inet , .Dq inet6 , and .Dq link . The default if available is .Dq inet or otherwise .Dq link . .Dq ether and .Dq lladdr are synonyms for .Dq link . When using the .Fl l flag, the .Dq ether address family has special meaning and is no longer synonymous with .Dq link or .Dq lladdr . Specifying .Fl l Dq ether will list only Ethernet interfaces, excluding all other interface types, including the loopback interface. .It Ar dest_address Specify the address of the correspondent on the other end of a point to point link. .It Ar interface This parameter is a string of the form .Dq name unit , for example, .Dq Li ed0 . .It Ar groupname List the interfaces in the given group. .El .Pp The output format of .Nm can be controlled using the .Fl f flag or the .Ev IFCONFIG_FORMAT environment variable. The format is specified as a comma separated list of .Sy type:format pairs. See the .Sx EXAMPLES section for more information. The .Sy types and their associated .Sy format strings are: .Bl -tag -width ether .It Sy addr Adjust the display of inet and inet6 addresses .Bl -tag -width default .It Sy default Display inet and inet6 addresses in the default format, .Sy numeric .It Sy fqdn Display inet and inet6 addresses as fully qualified domain names .Pq FQDN .It Sy host Display inet and inet6 addresses as unqualified hostnames .It Sy numeric Display inet and inet6 addresses in numeric format .El .It Sy ether Adjust the display of link-level ethernet (MAC) addresses .Bl -tag -width default .It Sy colon Separate address segments with a colon .It Sy dash Separate address segments with a dash .It Sy default Display ethernet addresses in the default format, .Sy colon .El .It Sy inet Adjust the display of inet address subnet masks: .Bl -tag -width default .It Sy cidr Display subnet masks in CIDR notation, for example: .br 10.0.0.0/8 or 203.0.113.224/26 .It Sy default Display subnet masks in the default format, .Sy hex .It Sy dotted Display subnet masks in dotted quad notation, for example: .br 255.255.0.0 or 255.255.255.192 .It Sy hex Display subnet masks in hexadecimal, for example: .br 0xffff0000 or 0xffffffc0 .El .It Sy inet6 Adjust the display of inet6 address prefixes (subnet masks): .Bl -tag -width default .It Sy cidr Display subnet prefix in CIDR notation, for example: .br ::1/128 or fe80::1%lo0/64 .It Sy default Display subnet prefix in the default format .Sy numeric .It Sy numeric Display subnet prefix in integer format, for example: .br prefixlen 64 .El .El .Pp The following parameters may be set with .Nm : .Bl -tag -width indent .It Cm add Another name for the .Cm alias parameter. Introduced for compatibility with .Bsx . .It Cm alias Establish an additional network address for this interface. This is sometimes useful when changing network numbers, and one wishes to accept packets addressed to the old interface. If the address is on the same subnet as the first network address for this interface, a non-conflicting netmask must be given. Usually .Li 0xffffffff is most appropriate. .It Fl alias Remove the network address specified. This would be used if you incorrectly specified an alias, or it was no longer needed. If you have incorrectly set an NS address having the side effect of specifying the host portion, removing all NS addresses will allow you to respecify the host portion. .It Cm anycast (Inet6 only.) Specify that the address configured is an anycast address. Based on the current specification, only routers may configure anycast addresses. Anycast address will not be used as source address of any of outgoing IPv6 packets. .It Cm arp Enable the use of the Address Resolution Protocol .Pq Xr arp 4 in mapping between network level addresses and link level addresses (default). This is currently implemented for mapping between .Tn DARPA Internet addresses and .Tn IEEE 802 48-bit MAC addresses (Ethernet, FDDI, and Token Ring addresses). .It Fl arp Disable the use of the Address Resolution Protocol .Pq Xr arp 4 . .It Cm staticarp If the Address Resolution Protocol is enabled, the host will only reply to requests for its addresses, and will never send any requests. .It Fl staticarp If the Address Resolution Protocol is enabled, the host will perform normally, sending out requests and listening for replies. .It Cm broadcast (Inet only.) Specify the address to use to represent broadcasts to the network. The default broadcast address is the address with a host part of all 1's. .It Cm debug Enable driver dependent debugging code; usually, this turns on extra console error logging. .It Fl debug Disable driver dependent debugging code. .It Cm promisc Put interface into permanently promiscuous mode. .It Fl promisc Disable permanently promiscuous mode. .It Cm delete Another name for the .Fl alias parameter. .It Cm description Ar value , Cm descr Ar value Specify a description of the interface. This can be used to label interfaces in situations where they may otherwise be difficult to distinguish. .It Cm -description , Cm -descr Clear the interface description. .It Cm down Mark an interface .Dq down . When an interface is marked .Dq down , the system will not attempt to transmit messages through that interface. If possible, the interface will be reset to disable reception as well. This action does not automatically disable routes using the interface. .It Cm group Ar group-name Assign the interface to a .Dq group . Any interface can be in multiple groups. .Pp Cloned interfaces are members of their interface family group by default. For example, a PPP interface such as .Em ppp0 is a member of the PPP interface family group, .Em ppp . .\" The interface(s) the default route(s) point to are members of the .\" .Em egress .\" interface group. .It Cm -group Ar group-name Remove the interface from the given .Dq group . .It Cm eui64 (Inet6 only.) Fill interface index (lowermost 64bit of an IPv6 address) automatically. .It Cm fib Ar fib_number Specify interface FIB. A FIB .Ar fib_number is assigned to all frames or packets received on that interface. The FIB is not inherited, e.g., vlans or other sub-interfaces will use the default FIB (0) irrespective of the parent interface's FIB. The kernel needs to be tuned to support more than the default FIB using the .Va ROUTETABLES kernel configuration option, or the .Va net.fibs tunable. .It Cm tunnelfib Ar fib_number Specify tunnel FIB. A FIB .Ar fib_number is assigned to all packets encapsulated by tunnel interface, e.g., .Xr gif 4 and .Xr gre 4 . .It Cm maclabel Ar label If Mandatory Access Control support is enabled in the kernel, set the MAC label to .Ar label . .\" (see .\" .Xr maclabel 7 ) . .It Cm media Ar type If the driver supports the media selection system, set the media type of the interface to .Ar type . Some interfaces support the mutually exclusive use of one of several different physical media connectors. For example, a 10Mbit/s Ethernet interface might support the use of either .Tn AUI or twisted pair connectors. Setting the media type to .Cm 10base5/AUI would change the currently active connector to the AUI port. Setting it to .Cm 10baseT/UTP would activate twisted pair. Refer to the interfaces' driver specific documentation or man page for a complete list of the available types. .It Cm mediaopt Ar opts If the driver supports the media selection system, set the specified media options on the interface. The .Ar opts argument is a comma delimited list of options to apply to the interface. Refer to the interfaces' driver specific man page for a complete list of available options. .It Fl mediaopt Ar opts If the driver supports the media selection system, disable the specified media options on the interface. .It Cm mode Ar mode If the driver supports the media selection system, set the specified operating mode on the interface to .Ar mode . For IEEE 802.11 wireless interfaces that support multiple operating modes this directive is used to select between 802.11a .Pq Cm 11a , 802.11b .Pq Cm 11b , and 802.11g .Pq Cm 11g operating modes. .It Cm txrtlmt Set if the driver supports TX rate limiting. .It Cm inst Ar minst , Cm instance Ar minst Set the media instance to .Ar minst . This is useful for devices which have multiple physical layer interfaces .Pq PHYs . .It Cm name Ar name Set the interface name to .Ar name . .It Cm rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, enable receive (or transmit) checksum offloading on the interface. The feature can be turned on selectively per protocol family. Use .Cm rxcsum6 , txcsum6 for .Xr ip6 4 or .Cm rxcsum , txcsum otherwise. Some drivers may not be able to enable these flags independently of each other, so setting one may also set the other. The driver will offload as much checksum work as it can reliably support, the exact level of offloading varies between drivers. .It Fl rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, disable receive (or transmit) checksum offloading on the interface. The feature can be turned off selectively per protocol family. Use .Fl rxcsum6 , txcsum6 for .Xr ip6 4 or .Fl rxcsum , txcsum otherwise. These settings may not always be independent of each other. .It Cm tso If the driver supports .Xr tcp 4 segmentation offloading, enable TSO on the interface. Some drivers may not be able to support TSO for .Xr ip 4 and .Xr ip6 4 packets, so they may enable only one of them. .It Fl tso If the driver supports .Xr tcp 4 segmentation offloading, disable TSO on the interface. It will always disable TSO for .Xr ip 4 and .Xr ip6 4 . .It Cm tso6 , tso4 If the driver supports .Xr tcp 4 segmentation offloading for .Xr ip6 4 or .Xr ip 4 use one of these to selectively enabled it only for one protocol family. .It Fl tso6 , tso4 If the driver supports .Xr tcp 4 segmentation offloading for .Xr ip6 4 or .Xr ip 4 use one of these to selectively disable it only for one protocol family. .It Cm lro If the driver supports .Xr tcp 4 large receive offloading, enable LRO on the interface. .It Fl lro If the driver supports .Xr tcp 4 large receive offloading, disable LRO on the interface. .It Cm wol , wol_ucast , wol_mcast , wol_magic Enable Wake On Lan (WOL) support, if available. WOL is a facility whereby a machine in a low power state may be woken in response to a received packet. There are three types of packets that may wake a system: ucast (directed solely to the machine's mac address), mcast (directed to a broadcast or multicast address), or magic (unicast or multicast frames with a ``magic contents''). Not all devices support WOL, those that do indicate the mechanisms they support in their capabilities. .Cm wol is a synonym for enabling all available WOL mechanisms. To disable WOL use .Fl wol . .It Cm vlanmtu , vlanhwtag, vlanhwfilter, vlanhwcsum, vlanhwtso If the driver offers user-configurable VLAN support, enable reception of extended frames, tag processing in hardware, frame filtering in hardware, checksum offloading, or TSO on VLAN, respectively. Note that this must be issued on a physical interface associated with .Xr vlan 4 , not on a .Xr vlan 4 interface itself. .It Fl vlanmtu , vlanhwtag, vlanhwfilter, vlanhwtso If the driver offers user-configurable VLAN support, disable reception of extended frames, tag processing in hardware, frame filtering in hardware, or TSO on VLAN, respectively. .It Cm vnet Ar jail Move the interface to the .Xr jail 8 , specified by name or JID. If the jail has a virtual network stack, the interface will disappear from the current environment and become visible to the jail. .It Fl vnet Ar jail Reclaim the interface from the .Xr jail 8 , specified by name or JID. If the jail has a virtual network stack, the interface will disappear from the jail, and become visible to the current network environment. .It Cm polling Turn on .Xr polling 4 feature and disable interrupts on the interface, if driver supports this mode. .It Fl polling Turn off .Xr polling 4 feature and enable interrupt mode on the interface. .It Cm create Create the specified network pseudo-device. If the interface is given without a unit number, try to create a new device with an arbitrary unit number. If creation of an arbitrary device is successful, the new device name is printed to standard output unless the interface is renamed or destroyed in the same .Nm invocation. .It Cm destroy Destroy the specified network pseudo-device. .It Cm plumb Another name for the .Cm create parameter. Included for .Tn Solaris compatibility. .It Cm unplumb Another name for the .Cm destroy parameter. Included for .Tn Solaris compatibility. .It Cm metric Ar n Set the routing metric of the interface to .Ar n , default 0. The routing metric is used by the routing protocol .Pq Xr routed 8 . Higher metrics have the effect of making a route less favorable; metrics are counted as additional hops to the destination network or host. .It Cm mtu Ar n Set the maximum transmission unit of the interface to .Ar n , default is interface specific. The MTU is used to limit the size of packets that are transmitted on an interface. Not all interfaces support setting the MTU, and some interfaces have range restrictions. .It Cm netmask Ar mask .\" (Inet and ISO.) (Inet only.) Specify how much of the address to reserve for subdividing networks into sub-networks. The mask includes the network part of the local address and the subnet part, which is taken from the host field of the address. The mask can be specified as a single hexadecimal number with a leading .Ql 0x , with a dot-notation Internet address, or with a pseudo-network name listed in the network table .Xr networks 5 . The mask contains 1's for the bit positions in the 32-bit address which are to be used for the network and subnet parts, and 0's for the host part. The mask should contain at least the standard network portion, and the subnet field should be contiguous with the network portion. .Pp The netmask can also be specified in CIDR notation after the address. See the .Ar address option above for more information. .It Cm prefixlen Ar len (Inet6 only.) Specify that .Ar len bits are reserved for subdividing networks into sub-networks. The .Ar len must be integer, and for syntactical reason it must be between 0 to 128. It is almost always 64 under the current IPv6 assignment rule. If the parameter is omitted, 64 is used. .Pp The prefix can also be specified using the slash notation after the address. See the .Ar address option above for more information. .It Cm remove Another name for the .Fl alias parameter. Introduced for compatibility with .Bsx . .Sm off .It Cm link Op Cm 0 No - Cm 2 .Sm on Enable special processing of the link level of the interface. These three options are interface specific in actual effect, however, they are in general used to select special modes of operation. An example of this is to enable SLIP compression, or to select the connector type for some Ethernet cards. Refer to the man page for the specific driver for more information. .Sm off .It Fl link Op Cm 0 No - Cm 2 .Sm on Disable special processing at the link level with the specified interface. .It Cm monitor Put the interface in monitor mode. No packets are transmitted, and received packets are discarded after .Xr bpf 4 processing. .It Fl monitor Take the interface out of monitor mode. .It Cm up Mark an interface .Dq up . This may be used to enable an interface after an .Dq Nm Cm down . It happens automatically when setting the first address on an interface. If the interface was reset when previously marked down, the hardware will be re-initialized. .El .Pp The following parameters are for ICMPv6 Neighbor Discovery Protocol. Note that the address family keyword .Dq Li inet6 is needed for them: .Bl -tag -width indent .It Cm accept_rtadv Set a flag to enable accepting ICMPv6 Router Advertisement messages. The .Xr sysctl 8 variable .Va net.inet6.ip6.accept_rtadv controls whether this flag is set by default or not. .It Cm -accept_rtadv Clear a flag .Cm accept_rtadv . .It Cm no_radr Set a flag to control whether routers from which the system accepts Router Advertisement messages will be added to the Default Router List or not. When the .Cm accept_rtadv flag is disabled, this flag has no effect. The .Xr sysctl 8 variable .Va net.inet6.ip6.no_radr controls whether this flag is set by default or not. .It Cm -no_radr Clear a flag .Cm no_radr . .It Cm auto_linklocal Set a flag to perform automatic link-local address configuration when the interface becomes available. The .Xr sysctl 8 variable .Va net.inet6.ip6.auto_linklocal controls whether this flag is set by default or not. .It Cm -auto_linklocal Clear a flag .Cm auto_linklocal . .It Cm defaultif Set the specified interface as the default route when there is no default router. .It Cm -defaultif Clear a flag .Cm defaultif . .It Cm ifdisabled Set a flag to disable all of IPv6 network communications on the specified interface. Note that if there are already configured IPv6 addresses on that interface, all of them are marked as .Dq tentative and DAD will be performed when this flag is cleared. .It Cm -ifdisabled Clear a flag .Cm ifdisabled . When this flag is cleared and .Cm auto_linklocal flag is enabled, automatic configuration of a link-local address is performed. .It Cm nud Set a flag to enable Neighbor Unreachability Detection. .It Cm -nud Clear a flag .Cm nud . .It Cm no_prefer_iface Set a flag to not honor rule 5 of source address selection in RFC 3484. In practice this means the address on the outgoing interface will not be preferred, effectively yielding the decision to the address selection policy table, configurable with .Xr ip6addrctl 8 . .It Cm -no_prefer_iface Clear a flag .Cm no_prefer_iface . .It Cm no_dad Set a flag to disable Duplicate Address Detection. .It Cm -no_dad Clear a flag .Cm no_dad . .El .Pp The following parameters are specific for IPv6 addresses. Note that the address family keyword .Dq Li inet6 is needed for them: .Bl -tag -width indent .It Cm autoconf Set the IPv6 autoconfigured address bit. .It Fl autoconf Clear the IPv6 autoconfigured address bit. .It Cm deprecated Set the IPv6 deprecated address bit. .It Fl deprecated Clear the IPv6 deprecated address bit. .It Cm pltime Ar n Set preferred lifetime for the address. .It Cm prefer_source Set a flag to prefer address as a candidate of the source address for outgoing packets. .It Cm -prefer_source Clear a flag .Cm prefer_source . .It Cm vltime Ar n Set valid lifetime for the address. .El .Pp The following parameters are specific to cloning IEEE 802.11 wireless interfaces with the .Cm create request: .Bl -tag -width indent .It Cm wlandev Ar device Use .Ar device as the parent for the cloned device. .It Cm wlanmode Ar mode Specify the operating mode for this cloned device. .Ar mode is one of .Cm sta , .Cm ahdemo (or .Cm adhoc-demo ) , .Cm ibss , (or .Cm adhoc ) , .Cm ap , (or .Cm hostap ) , .Cm wds , .Cm tdma , .Cm mesh , and .Cm monitor . The operating mode of a cloned interface cannot be changed. The .Cm tdma mode is actually implemented as an .Cm adhoc-demo interface with special properties. .It Cm wlanbssid Ar bssid The 802.11 mac address to use for the bssid. This must be specified at create time for a legacy .Cm wds device. .It Cm wlanaddr Ar address The local mac address. If this is not specified then a mac address will automatically be assigned to the cloned device. Typically this address is the same as the address of the parent device but if the .Cm bssid parameter is specified then the driver will craft a unique address for the device (if supported). .It Cm wdslegacy Mark a .Cm wds device as operating in ``legacy mode''. Legacy .Cm wds devices have a fixed peer relationship and do not, for example, roam if their peer stops communicating. For completeness a Dynamic WDS (DWDS) interface may marked as .Fl wdslegacy . .It Cm bssid Request a unique local mac address for the cloned device. This is only possible if the device supports multiple mac addresses. To force use of the parent's mac address use .Fl bssid . .It Cm beacons Mark the cloned interface as depending on hardware support to track received beacons. To have beacons tracked in software use .Fl beacons . For .Cm hostap mode .Fl beacons can also be used to indicate no beacons should be transmitted; this can be useful when creating a WDS configuration but .Cm wds interfaces can only be created as companions to an access point. .El .Pp The following parameters are specific to IEEE 802.11 wireless interfaces cloned with a .Cm create operation: .Bl -tag -width indent .It Cm ampdu Enable sending and receiving AMPDU frames when using 802.11n (default). The 802.11n specification states a compliant station must be capable of receiving AMPDU frames but transmission is optional. Use .Fl ampdu to disable all use of AMPDU with 802.11n. For testing and/or to work around interoperability problems one can use .Cm ampdutx and .Cm ampdurx to control use of AMPDU in one direction. .It Cm ampdudensity Ar density Set the AMPDU density parameter used when operating with 802.11n. This parameter controls the inter-packet gap for AMPDU frames. The sending device normally controls this setting but a receiving station may request wider gaps. Legal values for .Ar density are 0, .25, .5, 1, 2, 4, 8, and 16 (microseconds). A value of .Cm - is treated the same as 0. .It Cm ampdulimit Ar limit Set the limit on packet size for receiving AMPDU frames when operating with 802.11n. Legal values for .Ar limit are 8192, 16384, 32768, and 65536 but one can also specify just the unique prefix: 8, 16, 32, 64. Note the sender may limit the size of AMPDU frames to be less than the maximum specified by the receiving station. .It Cm amsdu Enable sending and receiving AMSDU frames when using 802.11n. By default AMSDU is received but not transmitted. Use .Fl amsdu to disable all use of AMSDU with 802.11n. For testing and/or to work around interoperability problems one can use .Cm amsdutx and .Cm amsdurx to control use of AMSDU in one direction. .It Cm amsdulimit Ar limit Set the limit on packet size for sending and receiving AMSDU frames when operating with 802.11n. Legal values for .Ar limit are 7935 and 3839 (bytes). Note the sender may limit the size of AMSDU frames to be less than the maximum specified by the receiving station. Note also that devices are not required to support the 7935 limit, only 3839 is required by the specification and the larger value may require more memory to be dedicated to support functionality that is rarely used. .It Cm apbridge When operating as an access point, pass packets between wireless clients directly (default). To instead let them pass up through the system and be forwarded using some other mechanism, use .Fl apbridge . Disabling the internal bridging is useful when traffic is to be processed with packet filtering. .It Cm authmode Ar mode Set the desired authentication mode in infrastructure mode. Not all adapters support all modes. The set of valid modes is .Cm none , open , shared (shared key), .Cm 8021x (IEEE 802.1x), and .Cm wpa (IEEE WPA/WPA2/802.11i). The .Cm 8021x and .Cm wpa modes are only useful when using an authentication service (a supplicant for client operation or an authenticator when operating as an access point). Modes are case insensitive. .It Cm bgscan Enable background scanning when operating as a station. Background scanning is a technique whereby a station associated to an access point will temporarily leave the channel to scan for neighboring stations. This allows a station to maintain a cache of nearby access points so that roaming between access points can be done without a lengthy scan operation. Background scanning is done only when a station is not busy and any outbound traffic will cancel a scan operation. Background scanning should never cause packets to be lost though there may be some small latency if outbound traffic interrupts a scan operation. By default background scanning is enabled if the device is capable. To disable background scanning, use .Fl bgscan . Background scanning is controlled by the .Cm bgscanidle and .Cm bgscanintvl parameters. Background scanning must be enabled for roaming; this is an artifact of the current implementation and may not be required in the future. .It Cm bgscanidle Ar idletime Set the minimum time a station must be idle (not transmitting or receiving frames) before a background scan is initiated. The .Ar idletime parameter is specified in milliseconds. By default a station must be idle at least 250 milliseconds before a background scan is initiated. The idle time may not be set to less than 100 milliseconds. .It Cm bgscanintvl Ar interval Set the interval at which background scanning is attempted. The .Ar interval parameter is specified in seconds. By default a background scan is considered every 300 seconds (5 minutes). The .Ar interval may not be set to less than 15 seconds. .It Cm bintval Ar interval Set the interval at which beacon frames are sent when operating in ad-hoc or ap mode. The .Ar interval parameter is specified in TU's (1024 usecs). By default beacon frames are transmitted every 100 TU's. .It Cm bmissthreshold Ar count Set the number of consecutive missed beacons at which the station will attempt to roam (i.e., search for a new access point). The .Ar count parameter must be in the range 1 to 255; though the upper bound may be reduced according to device capabilities. The default threshold is 7 consecutive missed beacons; but this may be overridden by the device driver. Another name for the .Cm bmissthreshold parameter is .Cm bmiss . .It Cm bssid Ar address Specify the MAC address of the access point to use when operating as a station in a BSS network. This overrides any automatic selection done by the system. To disable a previously selected access point, supply .Cm any , none , or .Cm - for the address. This option is useful when more than one access point uses the same SSID. Another name for the .Cm bssid parameter is .Cm ap . .It Cm burst Enable packet bursting. Packet bursting is a transmission technique whereby the wireless medium is acquired once to send multiple frames and the interframe spacing is reduced. This technique can significantly increase throughput by reducing transmission overhead. Packet bursting is supported by the 802.11e QoS specification and some devices that do not support QoS may still be capable. By default packet bursting is enabled if a device is capable of doing it. To disable packet bursting, use .Fl burst . .It Cm chanlist Ar channels Set the desired channels to use when scanning for access points, neighbors in an IBSS network, or looking for unoccupied channels when operating as an access point. The set of channels is specified as a comma-separated list with each element in the list representing either a single channel number or a range of the form .Dq Li a-b . Channel numbers must be in the range 1 to 255 and be permissible according to the operating characteristics of the device. .It Cm channel Ar number Set a single desired channel. Channels range from 1 to 255, but the exact selection available depends on the region your adaptor was manufactured for. Setting the channel to .Li any , or .Cm - will clear any desired channel and, if the device is marked up, force a scan for a channel to operate on. Alternatively the frequency, in megahertz, may be specified instead of the channel number. .Pp When there are several ways to use a channel the channel number/frequency may be appended with attributes to clarify. For example, if a device is capable of operating on channel 6 with 802.11n and 802.11g then one can specify that g-only use should be used by specifying ``6:g''. Similarly the channel width can be specified by appending it with ``/''; e.g., ``6/40'' specifies a 40MHz wide channel, These attributes can be combined as in: ``6:ht/40''. The full set of flags specified following a ``:'' are: .Cm a (802.11a), .Cm b (802.11b), .Cm d (Atheros Dynamic Turbo mode), .Cm g (802.11g), .Cm h or .Cm n (802.11n aka HT), .Cm s (Atheros Static Turbo mode), and .Cm t (Atheros Dynamic Turbo mode, or appended to ``st'' and ``dt''). The full set of channel widths following a '/' are: .Cm 5 (5MHz aka quarter-rate channel), .Cm 10 (10MHz aka half-rate channel), .Cm 20 (20MHz mostly for use in specifying ht20), and .Cm 40 (40MHz mostly for use in specifying ht40). In addition, a 40MHz HT channel specification may include the location of the extension channel by appending ``+'' or ``-'' for above and below, respectively; e.g., ``2437:ht/40+'' specifies 40MHz wide HT operation with the center channel at frequency 2437 and the extension channel above. .It Cm country Ar name Set the country code to use in calculating the regulatory constraints for operation. In particular the set of available channels, how the wireless device will operation on the channels, and the maximum transmit power that can be used on a channel are defined by this setting. Country/Region codes are specified as a 2-character abbreviation defined by ISO 3166 or using a longer, but possibly ambiguous, spelling; e.g., "ES" and "Spain". The set of country codes are taken from .Pa /etc/regdomain.xml and can also be viewed with the ``list countries'' request. Note that not all devices support changing the country code from a default setting; typically stored in EEPROM. See also .Cm regdomain , .Cm indoor , .Cm outdoor , and .Cm anywhere . .It Cm dfs Enable Dynamic Frequency Selection (DFS) as specified in 802.11h. DFS embodies several facilities including detection of overlapping radar signals, dynamic transmit power control, and channel selection according to a least-congested criteria. DFS support is mandatory for some 5GHz frequencies in certain locales (e.g., ETSI). By default DFS is enabled according to the regulatory definitions specified in .Pa /etc/regdomain.xml and the current country code, regdomain, and channel. Note the underlying device (and driver) must support radar detection for full DFS support to work. To be fully compliant with the local regulatory agency frequencies that require DFS should not be used unless it is fully supported. Use .Fl dfs to disable this functionality for testing. .It Cm dotd Enable support for the 802.11d specification (default). When this support is enabled in station mode, beacon frames that advertise a country code different than the currently configured country code will cause an event to be dispatched to user applications. This event can be used by the station to adopt that country code and operate according to the associated regulatory constraints. When operating as an access point with 802.11d enabled the beacon and probe response frames transmitted will advertise the current regulatory domain settings. To disable 802.11d use .Fl dotd . .It Cm doth Enable 802.11h support including spectrum management. When 802.11h is enabled beacon and probe response frames will have the SpectrumMgt bit set in the capabilities field and country and power constraint information elements will be present. 802.11h support also includes handling Channel Switch Announcements (CSA) which are a mechanism to coordinate channel changes by an access point. By default 802.11h is enabled if the device is capable. To disable 802.11h use .Fl doth . .It Cm deftxkey Ar index Set the default key to use for transmission. Typically this is only set when using WEP encryption. Note that you must set a default transmit key for the system to know which key to use in encrypting outbound traffic. The .Cm weptxkey is an alias for this request; it is provided for backwards compatibility. .It Cm dtimperiod Ar period Set the DTIM period for transmitting buffered multicast data frames when operating in ap mode. The .Ar period specifies the number of beacon intervals between DTIM and must be in the range 1 to 15. By default DTIM is 1 (i.e., DTIM occurs at each beacon). .It Cm quiet Enable the use of quiet IE. Hostap will use this to silence other stations to reduce interference for radar detection when operating on 5GHz frequency and doth support is enabled. Use .Fl quiet to disable this functionality. .It Cm quiet_period Ar period Set the QUIET .Ar period to the number of beacon intervals between the start of regularly scheduled quiet intervals defined by Quiet element. .It Cm quiet_count Ar count Set the QUIET .Ar count to the number of TBTTs until the beacon interval during which the next quiet interval shall start. A value of 1 indicates the quiet interval will start during the beacon interval starting at the next TBTT. A value 0 is reserved. .It Cm quiet_offset Ar offset Set the QUIET .Ar offset to the offset of the start of the quiet interval from the TBTT specified by the Quiet count, expressed in TUs. The value of the .Ar offset shall be less than one beacon interval. .It Cm quiet_duration Ar dur Set the QUIET .Ar dur to the duration of the Quiet interval, expressed in TUs. The value should be less than beacon interval. .It Cm dturbo Enable the use of Atheros Dynamic Turbo mode when communicating with another Dynamic Turbo-capable station. Dynamic Turbo mode is an Atheros-specific mechanism by which stations switch between normal 802.11 operation and a ``boosted'' mode in which a 40MHz wide channel is used for communication. Stations using Dynamic Turbo mode operate boosted only when the channel is free of non-dturbo stations; when a non-dturbo station is identified on the channel all stations will automatically drop back to normal operation. By default, Dynamic Turbo mode is not enabled, even if the device is capable. Note that turbo mode (dynamic or static) is only allowed on some channels depending on the regulatory constraints; use the .Cm list chan command to identify the channels where turbo mode may be used. To disable Dynamic Turbo mode use .Fl dturbo . .It Cm dwds Enable Dynamic WDS (DWDS) support. DWDS is a facility by which 4-address traffic can be carried between stations operating in infrastructure mode. A station first associates to an access point and authenticates using normal procedures (e.g., WPA). Then 4-address frames are passed to carry traffic for stations operating on either side of the wireless link. DWDS extends the normal WDS mechanism by leveraging existing security protocols and eliminating static binding. .Pp When DWDS is enabled on an access point 4-address frames received from an authorized station will generate a ``DWDS discovery'' event to user applications. This event should be used to create a WDS interface that is bound to the remote station (and usually plumbed into a bridge). Once the WDS interface is up and running 4-address traffic then logically flows through that interface. .Pp When DWDS is enabled on a station, traffic with a destination address different from the peer station are encapsulated in a 4-address frame and transmitted to the peer. All 4-address traffic uses the security information of the stations (e.g., cryptographic keys). A station is associated using 802.11n facilities may transport 4-address traffic using these same mechanisms; this depends on available resources and capabilities of the device. The DWDS implementation guards against layer 2 routing loops of multicast traffic. .It Cm ff Enable the use of Atheros Fast Frames when communicating with another Fast Frames-capable station. Fast Frames are an encapsulation technique by which two 802.3 frames are transmitted in a single 802.11 frame. This can noticeably improve throughput but requires that the receiving station understand how to decapsulate the frame. Fast frame use is negotiated using the Atheros 802.11 vendor-specific protocol extension so enabling use is safe when communicating with non-Atheros devices. By default, use of fast frames is enabled if the device is capable. To explicitly disable fast frames, use .Fl ff . .It Cm fragthreshold Ar length Set the threshold for which transmitted frames are broken into fragments. The .Ar length argument is the frame size in bytes and must be in the range 256 to 2346. Setting .Ar length to .Li 2346 , .Cm any , or .Cm - disables transmit fragmentation. Not all adapters honor the fragmentation threshold. .It Cm hidessid When operating as an access point, do not broadcast the SSID in beacon frames or respond to probe request frames unless they are directed to the ap (i.e., they include the ap's SSID). By default, the SSID is included in beacon frames and undirected probe request frames are answered. To re-enable the broadcast of the SSID etc., use .Fl hidessid . .It Cm ht Enable use of High Throughput (HT) when using 802.11n (default). The 802.11n specification includes mechanisms for operation on 20MHz and 40MHz wide channels using different signalling mechanisms than specified in 802.11b, 802.11g, and 802.11a. Stations negotiate use of these facilities, termed HT20 and HT40, when they associate. To disable all use of 802.11n use .Fl ht . To disable use of HT20 (e.g., to force only HT40 use) use .Fl ht20 . To disable use of HT40 use .Fl ht40 . .Pp HT configuration is used to ``auto promote'' operation when several choices are available. For example, if a station associates to an 11n-capable access point it controls whether the station uses legacy operation, HT20, or HT40. When an 11n-capable device is setup as an access point and Auto Channel Selection is used to locate a channel to operate on, HT configuration controls whether legacy, HT20, or HT40 operation is setup on the selected channel. If a fixed channel is specified for a station then HT configuration can be given as part of the channel specification; e.g., 6:ht/20 to setup HT20 operation on channel 6. .It Cm htcompat Enable use of compatibility support for pre-802.11n devices (default). The 802.11n protocol specification went through several incompatible iterations. Some vendors implemented 11n support to older specifications that will not interoperate with a purely 11n-compliant station. In particular the information elements included in management frames for old devices are different. When compatibility support is enabled both standard and compatible data will be provided. Stations that associate using the compatibility mechanisms are flagged in ``list sta''. To disable compatibility support use .Fl htcompat . .It Cm htprotmode Ar technique For interfaces operating in 802.11n, use the specified .Ar technique for protecting HT frames in a mixed legacy/HT network. The set of valid techniques is .Cm off , and .Cm rts (RTS/CTS, default). Technique names are case insensitive. .It Cm inact Enable inactivity processing for stations associated to an access point (default). When operating as an access point the 802.11 layer monitors the activity of each associated station. When a station is inactive for 5 minutes it will send several ``probe frames'' to see if the station is still present. If no response is received then the station is deauthenticated. Applications that prefer to handle this work can disable this facility by using .Fl inact . .It Cm indoor Set the location to use in calculating regulatory constraints. The location is also advertised in beacon and probe response frames when 802.11d is enabled with .Cm dotd . See also .Cm outdoor , .Cm anywhere , .Cm country , and .Cm regdomain . .It Cm list active Display the list of channels available for use taking into account any restrictions set with the .Cm chanlist directive. See the description of .Cm list chan for more information. .It Cm list caps Display the adaptor's capabilities, including the operating modes supported. .It Cm list chan Display the list of channels available for use. Channels are shown with their IEEE channel number, equivalent frequency, and usage modes. Channels identified as .Ql 11g are also usable in .Ql 11b mode. Channels identified as .Ql 11a Turbo may be used only for Atheros' Static Turbo mode (specified with . Cm mediaopt turbo ) . Channels marked with a .Ql * have a regulatory constraint that they be passively scanned. This means a station is not permitted to transmit on the channel until it identifies the channel is being used for 802.11 communication; typically by hearing a beacon frame from an access point operating on the channel. .Cm list freq is another way of requesting this information. By default a compacted list of channels is displayed; if the .Fl v option is specified then all channels are shown. .It Cm list countries Display the set of country codes and regulatory domains that can be used in regulatory configuration. .It Cm list mac Display the current MAC Access Control List state. Each address is prefixed with a character that indicates the current policy applied to it: .Ql + indicates the address is allowed access, .Ql - indicates the address is denied access, .Ql * indicates the address is present but the current policy open (so the ACL is not consulted). .It Cm list mesh Displays the mesh routing table, used for forwarding packets on a mesh network. .It Cm list regdomain Display the current regulatory settings including the available channels and transmit power caps. .It Cm list roam Display the parameters that govern roaming operation. .It Cm list txparam Display the parameters that govern transmit operation. .It Cm list txpower Display the transmit power caps for each channel. .It Cm list scan Display the access points and/or ad-hoc neighbors located in the vicinity. This information may be updated automatically by the adapter with a .Cm scan request or through background scanning. Depending on the capabilities of the stations the following flags can be included in the output: .Bl -tag -width 3n .It Li A Authorized. Indicates that the station is permitted to send/receive data frames. .It Li E Extended Rate Phy (ERP). Indicates that the station is operating in an 802.11g network using extended transmit rates. .It Li H High Throughput (HT). Indicates that the station is using HT transmit rates. If a `+' follows immediately after then the station associated using deprecated mechanisms supported only when .Cm htcompat is enabled. .It Li P Power Save. Indicates that the station is operating in power save mode. .It Li Q Quality of Service (QoS). Indicates that the station is using QoS encapsulation for data frame. QoS encapsulation is enabled only when WME mode is enabled. .It Li S Short Preamble. Indicates that the station is doing short preamble to optionally improve throughput performance with 802.11g and 802.11b. .It Li T Transitional Security Network (TSN). Indicates that the station associated using TSN; see also .Cm tsn below. .It Li W Wi-Fi Protected Setup (WPS). Indicates that the station associated using WPS. .El .Pp By default interesting information elements captured from the neighboring stations are displayed at the end of each row. Possible elements include: .Cm WME (station supports WME), .Cm WPA (station supports WPA), .Cm WPS (station supports WPS), .Cm RSN (station supports 802.11i/RSN), .Cm HTCAP (station supports 802.11n/HT communication), .Cm ATH (station supports Atheros protocol extensions), .Cm VEN (station supports unknown vendor-specific extensions). If the .Fl v flag is used all the information elements and their contents will be shown. Specifying the .Fl v flag also enables display of long SSIDs. The .Cm list ap command is another way of requesting this information. .It Cm list sta When operating as an access point display the stations that are currently associated. When operating in ad-hoc mode display stations identified as neighbors in the IBSS. When operating in mesh mode display stations identified as neighbors in the MBSS. When operating in station mode display the access point. Capabilities advertised by the stations are described under the .Cm scan request. Depending on the capabilities of the stations the following flags can be included in the output: .Bl -tag -width 3n .It Li A Authorized. Indicates that the station is permitted to send/receive data frames. .It Li E Extended Rate Phy (ERP). Indicates that the station is operating in an 802.11g network using extended transmit rates. .It Li H High Throughput (HT). Indicates that the station is using HT transmit rates. If a `+' follows immediately after then the station associated using deprecated mechanisms supported only when .Cm htcompat is enabled. .It Li P Power Save. Indicates that the station is operating in power save mode. .It Li Q Quality of Service (QoS). Indicates that the station is using QoS encapsulation for data frame. QoS encapsulation is enabled only when WME mode is enabled. .It Li S Short Preamble. Indicates that the station is doing short preamble to optionally improve throughput performance with 802.11g and 802.11b. .It Li T Transitional Security Network (TSN). Indicates that the station associated using TSN; see also .Cm tsn below. .It Li W Wi-Fi Protected Setup (WPS). Indicates that the station associated using WPS. .El .Pp By default information elements received from associated stations are displayed in a short form; the .Fl v flag causes this information to be displayed symbolically. .It Cm list wme Display the current channel parameters to use when operating in WME mode. If the .Fl v option is specified then both channel and BSS parameters are displayed for each AC (first channel, then BSS). When WME mode is enabled for an adaptor this information will be displayed with the regular status; this command is mostly useful for examining parameters when WME mode is disabled. See the description of the .Cm wme directive for information on the various parameters. .It Cm maxretry Ar count Set the maximum number of tries to use in sending unicast frames. The default setting is 6 but drivers may override this with a value they choose. .It Cm mcastrate Ar rate Set the rate for transmitting multicast/broadcast frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. This rate should be valid for the current operating conditions; if an invalid rate is specified drivers are free to chose an appropriate rate. .It Cm mgtrate Ar rate Set the rate for transmitting management and/or control frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. .It Cm outdoor Set the location to use in calculating regulatory constraints. The location is also advertised in beacon and probe response frames when 802.11d is enabled with .Cm dotd . See also .Cm anywhere , .Cm country , .Cm indoor , and .Cm regdomain . .It Cm powersave Enable powersave operation. When operating as a client, the station will conserve power by periodically turning off the radio and listening for messages from the access point telling it there are packets waiting. The station must then retrieve the packets. Not all devices support power save operation as a client. The 802.11 specification requires that all access points support power save but some drivers do not. Use .Fl powersave to disable powersave operation when operating as a client. .It Cm powersavesleep Ar sleep Set the desired max powersave sleep time in TU's (1024 usecs). By default the max powersave sleep time is 100 TU's. .It Cm protmode Ar technique For interfaces operating in 802.11g, use the specified .Ar technique for protecting OFDM frames in a mixed 11b/11g network. The set of valid techniques is .Cm off , cts (CTS to self), and .Cm rtscts (RTS/CTS). Technique names are case insensitive. Not all devices support .Cm cts as a protection technique. .It Cm pureg When operating as an access point in 802.11g mode allow only 11g-capable stations to associate (11b-only stations are not permitted to associate). To allow both 11g and 11b-only stations to associate, use .Fl pureg . .It Cm puren When operating as an access point in 802.11n mode allow only HT-capable stations to associate (legacy stations are not permitted to associate). To allow both HT and legacy stations to associate, use .Fl puren . .It Cm regdomain Ar sku Set the regulatory domain to use in calculating the regulatory constraints for operation. In particular the set of available channels, how the wireless device will operation on the channels, and the maximum transmit power that can be used on a channel are defined by this setting. Regdomain codes (SKU's) are taken from .Pa /etc/regdomain.xml and can also be viewed with the ``list countries'' request. Note that not all devices support changing the regdomain from a default setting; typically stored in EEPROM. See also .Cm country , .Cm indoor , .Cm outdoor , and .Cm anywhere . .It Cm rifs Enable use of Reduced InterFrame Spacing (RIFS) when operating in 802.11n on an HT channel. Note that RIFS must be supported by both the station and access point for it to be used. To disable RIFS use .Fl rifs . .It Cm roam:rate Ar rate Set the threshold for controlling roaming when operating in a BSS. The .Ar rate parameter specifies the transmit rate in megabits at which roaming should be considered. If the current transmit rate drops below this setting and background scanning is enabled, then the system will check if a more desirable access point is available and switch over to it. The current scan cache contents are used if they are considered valid according to the .Cm scanvalid parameter; otherwise a background scan operation is triggered before any selection occurs. Each channel type has a separate rate threshold; the default values are: 12 Mb/s (11a), 2 Mb/s (11b), 2 Mb/s (11g), MCS 1 (11na, 11ng). .It Cm roam:rssi Ar rssi Set the threshold for controlling roaming when operating in a BSS. The .Ar rssi parameter specifies the receive signal strength in dBm units at which roaming should be considered. If the current rssi drops below this setting and background scanning is enabled, then the system will check if a more desirable access point is available and switch over to it. The current scan cache contents are used if they are considered valid according to the .Cm scanvalid parameter; otherwise a background scan operation is triggered before any selection occurs. Each channel type has a separate rssi threshold; the default values are all 7 dBm. .It Cm roaming Ar mode When operating as a station, control how the system will behave when communication with the current access point is broken. The .Ar mode argument may be one of .Cm device (leave it to the hardware device to decide), .Cm auto (handle either in the device or the operating system\[em]as appropriate), .Cm manual (do nothing until explicitly instructed). By default, the device is left to handle this if it is capable; otherwise, the operating system will automatically attempt to reestablish communication. Manual mode is used by applications such as .Xr wpa_supplicant 8 that want to control the selection of an access point. .It Cm rtsthreshold Ar length Set the threshold for which transmitted frames are preceded by transmission of an RTS control frame. The .Ar length argument is the frame size in bytes and must be in the range 1 to 2346. Setting .Ar length to .Li 2346 , .Cm any , or .Cm - disables transmission of RTS frames. Not all adapters support setting the RTS threshold. .It Cm scan Initiate a scan of neighboring stations, wait for it to complete, and display all stations found. Only the super-user can initiate a scan. See .Cm list scan for information on the display. By default a background scan is done; otherwise a foreground scan is done and the station may roam to a different access point. The .Cm list scan request can be used to show recent scan results without initiating a new scan. .It Cm scanvalid Ar threshold Set the maximum time the scan cache contents are considered valid; i.e., will be used without first triggering a scan operation to refresh the data. The .Ar threshold parameter is specified in seconds and defaults to 60 seconds. The minimum setting for .Ar threshold is 10 seconds. One should take care setting this threshold; if it is set too low then attempts to roam to another access point may trigger unnecessary background scan operations. .It Cm shortgi Enable use of Short Guard Interval when operating in 802.11n on an HT channel. NB: this currently enables Short GI on both HT40 and HT20 channels. To disable Short GI use .Fl shortgi . .It Cm smps Enable use of Static Spatial Multiplexing Power Save (SMPS) when operating in 802.11n. A station operating with Static SMPS maintains only a single receive chain active (this can significantly reduce power consumption). To disable SMPS use .Fl smps . .It Cm smpsdyn Enable use of Dynamic Spatial Multiplexing Power Save (SMPS) when operating in 802.11n. A station operating with Dynamic SMPS maintains only a single receive chain active but switches to multiple receive chains when it receives an RTS frame (this can significantly reduce power consumption). Note that stations cannot distinguish between RTS/CTS intended to enable multiple receive chains and those used for other purposes. To disable SMPS use .Fl smps . .It Cm ssid Ar ssid Set the desired Service Set Identifier (aka network name). The SSID is a string up to 32 characters in length and may be specified as either a normal string or in hexadecimal when preceded by .Ql 0x . Additionally, the SSID may be cleared by setting it to .Ql - . .It Cm tdmaslot Ar slot When operating with TDMA, use the specified .Ar slot configuration. The .Ar slot is a number between 0 and the maximum number of slots in the BSS. Note that a station configured as slot 0 is a master and will broadcast beacon frames advertising the BSS; stations configured to use other slots will always scan to locate a master before they ever transmit. By default .Cm tdmaslot is set to 1. .It Cm tdmaslotcnt Ar cnt When operating with TDMA, setup a BSS with .Ar cnt slots. The slot count may be at most 8. The current implementation is only tested with two stations (i.e., point to point applications). This setting is only meaningful when a station is configured as slot 0; other stations adopt this setting from the BSS they join. By default .Cm tdmaslotcnt is set to 2. .It Cm tdmaslotlen Ar len When operating with TDMA, setup a BSS such that each station has a slot .Ar len microseconds long. The slot length must be at least 150 microseconds (1/8 TU) and no more than 65 milliseconds. Note that setting too small a slot length may result in poor channel bandwidth utilization due to factors such as timer granularity and guard time. This setting is only meaningful when a station is configured as slot 0; other stations adopt this setting from the BSS they join. By default .Cm tdmaslotlen is set to 10 milliseconds. .It Cm tdmabintval Ar intval When operating with TDMA, setup a BSS such that beacons are transmitted every .Ar intval superframes to synchronize the TDMA slot timing. A superframe is defined as the number of slots times the slot length; e.g., a BSS with two slots of 10 milliseconds has a 20 millisecond superframe. The beacon interval may not be zero. A lower setting of .Cm tdmabintval causes the timers to be resynchronized more often; this can be help if significant timer drift is observed. By default .Cm tdmabintval is set to 5. .It Cm tsn When operating as an access point with WPA/802.11i allow legacy stations to associate using static key WEP and open authentication. To disallow legacy station use of WEP, use .Fl tsn . .It Cm txpower Ar power Set the power used to transmit frames. The .Ar power argument is specified in .5 dBm units. Out of range values are truncated. Typically only a few discreet power settings are available and the driver will use the setting closest to the specified value. Not all adapters support changing the transmit power. .It Cm ucastrate Ar rate Set a fixed rate for transmitting unicast frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. This rate should be valid for the current operating conditions; if an invalid rate is specified drivers are free to chose an appropriate rate. .It Cm wepmode Ar mode Set the desired WEP mode. Not all adapters support all modes. The set of valid modes is .Cm off , on , and .Cm mixed . The .Cm mixed mode explicitly tells the adaptor to allow association with access points which allow both encrypted and unencrypted traffic. On these adapters, .Cm on means that the access point must only allow encrypted connections. On other adapters, .Cm on is generally another name for .Cm mixed . Modes are case insensitive. .It Cm weptxkey Ar index Set the WEP key to be used for transmission. This is the same as setting the default transmission key with .Cm deftxkey . .It Cm wepkey Ar key Ns | Ns Ar index : Ns Ar key Set the selected WEP key. If an .Ar index is not given, key 1 is set. A WEP key will be either 5 or 13 characters (40 or 104 bits) depending on the local network and the capabilities of the adaptor. It may be specified either as a plain string or as a string of hexadecimal digits preceded by .Ql 0x . For maximum portability, hex keys are recommended; the mapping of text keys to WEP encryption is usually driver-specific. In particular, the .Tn Windows drivers do this mapping differently to .Fx . A key may be cleared by setting it to .Ql - . If WEP is supported then there are at least four keys. Some adapters support more than four keys. If that is the case, then the first four keys (1-4) will be the standard temporary keys and any others will be adaptor specific keys such as permanent keys stored in NVRAM. .Pp Note that you must set a default transmit key with .Cm deftxkey for the system to know which key to use in encrypting outbound traffic. .It Cm wme Enable Wireless Multimedia Extensions (WME) support, if available, for the specified interface. WME is a subset of the IEEE 802.11e standard to support the efficient communication of realtime and multimedia data. To disable WME support, use .Fl wme . Another name for this parameter is .Cm wmm . .Pp The following parameters are meaningful only when WME support is in use. Parameters are specified per-AC (Access Category) and split into those that are used by a station when acting as an access point and those for client stations in the BSS. The latter are received from the access point and may not be changed (at the station). The following Access Categories are recognized: .Pp .Bl -tag -width ".Cm AC_BK" -compact .It Cm AC_BE (or .Cm BE ) best effort delivery, .It Cm AC_BK (or .Cm BK ) background traffic, .It Cm AC_VI (or .Cm VI ) video traffic, .It Cm AC_VO (or .Cm VO ) voice traffic. .El .Pp AC parameters are case-insensitive. Traffic classification is done in the operating system using the vlan priority associated with data frames or the ToS (Type of Service) indication in IP-encapsulated frames. If neither information is present, traffic is assigned to the Best Effort (BE) category. .Bl -tag -width indent .It Cm ack Ar ac Set the ACK policy for QoS transmissions by the local station; this controls whether or not data frames transmitted by a station require an ACK response from the receiving station. To disable waiting for an ACK use .Fl ack . This parameter is applied only to the local station. .It Cm acm Ar ac Enable the Admission Control Mandatory (ACM) mechanism for transmissions by the local station. To disable the ACM use .Fl acm . On stations in a BSS this parameter is read-only and indicates the setting received from the access point. NB: ACM is not supported right now. .It Cm aifs Ar ac Ar count Set the Arbitration Inter Frame Spacing (AIFS) channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm cwmin Ar ac Ar count Set the CWmin channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm cwmax Ar ac Ar count Set the CWmax channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm txoplimit Ar ac Ar limit Set the Transmission Opportunity Limit channel access parameter to use for transmissions by the local station. This parameter defines an interval of time when a WME station has the right to initiate transmissions onto the wireless medium. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm bss:aifs Ar ac Ar count Set the AIFS channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:cwmin Ar ac Ar count Set the CWmin channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:cwmax Ar ac Ar count Set the CWmax channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:txoplimit Ar ac Ar limit Set the TxOpLimit channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .El .It Cm wps Enable Wireless Privacy Subscriber support. Note that WPS support requires a WPS-capable supplicant. To disable this function use .Fl wps . .El .Pp The following parameters support an optional access control list feature available with some adapters when operating in ap mode; see .Xr wlan_acl 4 . This facility allows an access point to accept/deny association requests based on the MAC address of the station. Note that this feature does not significantly enhance security as MAC address spoofing is easy to do. .Bl -tag -width indent .It Cm mac:add Ar address Add the specified MAC address to the database. Depending on the policy setting association requests from the specified station will be allowed or denied. .It Cm mac:allow Set the ACL policy to permit association only by stations registered in the database. .It Cm mac:del Ar address Delete the specified MAC address from the database. .It Cm mac:deny Set the ACL policy to deny association only by stations registered in the database. .It Cm mac:kick Ar address Force the specified station to be deauthenticated. This typically is done to block a station after updating the address database. .It Cm mac:open Set the ACL policy to allow all stations to associate. .It Cm mac:flush Delete all entries in the database. .It Cm mac:radius Set the ACL policy to permit association only by stations approved by a RADIUS server. Note that this feature requires the .Xr hostapd 8 program be configured to do the right thing as it handles the RADIUS processing (and marks stations as authorized). .El .Pp The following parameters are related to a wireless interface operating in mesh mode: .Bl -tag -width indent .It Cm meshid Ar meshid Set the desired Mesh Identifier. The Mesh ID is a string up to 32 characters in length. A mesh interface must have a Mesh Identifier specified to reach an operational state. .It Cm meshttl Ar ttl Set the desired ``time to live'' for mesh forwarded packets; this is the number of hops a packet may be forwarded before it is discarded. The default setting for .Cm meshttl is 31. .It Cm meshpeering Enable or disable peering with neighbor mesh stations. Stations must peer before any data packets can be exchanged. By default .Cm meshpeering is enabled. .It Cm meshforward Enable or disable forwarding packets by a mesh interface. By default .Cm meshforward is enabled. .It Cm meshgate This attribute specifies whether or not the mesh STA activates mesh gate announcements. By default .Cm meshgate is disabled. .It Cm meshmetric Ar protocol Set the specified .Ar protocol as the link metric protocol used on a mesh network. The default protocol is called .Ar AIRTIME . The mesh interface will restart after changing this setting. .It Cm meshpath Ar protocol Set the specified .Ar protocol as the path selection protocol used on a mesh network. The only available protocol at the moment is called .Ar HWMP (Hybrid Wireless Mesh Protocol). The mesh interface will restart after changing this setting. .It Cm hwmprootmode Ar mode Stations on a mesh network can operate as ``root nodes.'' Root nodes try to find paths to all mesh nodes and advertise themselves regularly. When there is a root mesh node on a network, other mesh nodes can setup paths between themselves faster because they can use the root node to find the destination. This path may not be the best, but on-demand routing will eventually find the best path. The following modes are recognized: .Pp .Bl -tag -width ".Cm PROACTIVE" -compact .It Cm DISABLED Disable root mode. .It Cm NORMAL Send broadcast path requests every two seconds. Nodes on the mesh without a path to this root mesh station with try to discover a path to us. .It Cm PROACTIVE Send broadcast path requests every two seconds and every node must reply with a path reply even if it already has a path to this root mesh station. .It Cm RANN Send broadcast root announcement (RANN) frames. Nodes on the mesh without a path to this root mesh station with try to discover a path to us. .El By default .Cm hwmprootmode is set to .Ar DISABLED . .It Cm hwmpmaxhops Ar cnt Set the maximum number of hops allowed in an HMWP path to .Ar cnt . The default setting for .Cm hwmpmaxhops is 31. .El .Pp The following parameters are for compatibility with other systems: .Bl -tag -width indent .It Cm nwid Ar ssid Another name for the .Cm ssid parameter. Included for .Nx compatibility. .It Cm stationname Ar name Set the name of this station. The station name is not part of the IEEE 802.11 protocol though some interfaces support it. As such it only seems to be meaningful to identical or virtually identical equipment. Setting the station name is identical in syntax to setting the SSID. One can also use .Cm station for .Bsx compatibility. .It Cm wep Another way of saying .Cm wepmode on . Included for .Bsx compatibility. .It Fl wep Another way of saying .Cm wepmode off . Included for .Bsx compatibility. .It Cm nwkey key Another way of saying: .Dq Li "wepmode on weptxkey 1 wepkey 1:key wepkey 2:- wepkey 3:- wepkey 4:-" . Included for .Nx compatibility. .It Cm nwkey Xo .Sm off .Ar n : k1 , k2 , k3 , k4 .Sm on .Xc Another way of saying .Dq Li "wepmode on weptxkey n wepkey 1:k1 wepkey 2:k2 wepkey 3:k3 wepkey 4:k4" . Included for .Nx compatibility. .It Fl nwkey Another way of saying .Cm wepmode off . Included for .Nx compatibility. .El .Pp The following parameters are specific to bridge interfaces: .Bl -tag -width indent .It Cm addm Ar interface Add the interface named by .Ar interface as a member of the bridge. The interface is put into promiscuous mode so that it can receive every packet sent on the network. .It Cm deletem Ar interface Remove the interface named by .Ar interface from the bridge. Promiscuous mode is disabled on the interface when it is removed from the bridge. .It Cm maxaddr Ar size Set the size of the bridge address cache to .Ar size . The default is 2000 entries. .It Cm timeout Ar seconds Set the timeout of address cache entries to .Ar seconds seconds. If .Ar seconds is zero, then address cache entries will not be expired. The default is 1200 seconds. .It Cm addr Display the addresses that have been learned by the bridge. .It Cm static Ar interface-name Ar address Add a static entry into the address cache pointing to .Ar interface-name . Static entries are never aged out of the cache or re-placed, even if the address is seen on a different interface. .It Cm deladdr Ar address Delete .Ar address from the address cache. .It Cm flush Delete all dynamically-learned addresses from the address cache. .It Cm flushall Delete all addresses, including static addresses, from the address cache. .It Cm discover Ar interface Mark an interface as a .Dq discovering interface. When the bridge has no address cache entry (either dynamic or static) for the destination address of a packet, the bridge will forward the packet to all member interfaces marked as .Dq discovering . This is the default for all interfaces added to a bridge. .It Cm -discover Ar interface Clear the .Dq discovering attribute on a member interface. For packets without the .Dq discovering attribute, the only packets forwarded on the interface are broadcast or multicast packets and packets for which the destination address is known to be on the interface's segment. .It Cm learn Ar interface Mark an interface as a .Dq learning interface. When a packet arrives on such an interface, the source address of the packet is entered into the address cache as being a destination address on the interface's segment. This is the default for all interfaces added to a bridge. .It Cm -learn Ar interface Clear the .Dq learning attribute on a member interface. .It Cm sticky Ar interface Mark an interface as a .Dq sticky interface. Dynamically learned address entries are treated at static once entered into the cache. Sticky entries are never aged out of the cache or replaced, even if the address is seen on a different interface. .It Cm -sticky Ar interface Clear the .Dq sticky attribute on a member interface. .It Cm private Ar interface Mark an interface as a .Dq private interface. A private interface does not forward any traffic to any other port that is also a private interface. .It Cm -private Ar interface Clear the .Dq private attribute on a member interface. .It Cm span Ar interface Add the interface named by .Ar interface as a span port on the bridge. Span ports transmit a copy of every frame received by the bridge. This is most useful for snooping a bridged network passively on another host connected to one of the span ports of the bridge. .It Cm -span Ar interface Delete the interface named by .Ar interface from the list of span ports of the bridge. .It Cm stp Ar interface Enable Spanning Tree protocol on .Ar interface . The .Xr if_bridge 4 driver has support for the IEEE 802.1D Spanning Tree protocol (STP). Spanning Tree is used to detect and remove loops in a network topology. .It Cm -stp Ar interface Disable Spanning Tree protocol on .Ar interface . This is the default for all interfaces added to a bridge. .It Cm edge Ar interface Set .Ar interface as an edge port. An edge port connects directly to end stations cannot create bridging loops in the network, this allows it to transition straight to forwarding. .It Cm -edge Ar interface Disable edge status on .Ar interface . .It Cm autoedge Ar interface Allow .Ar interface to automatically detect edge status. This is the default for all interfaces added to a bridge. .It Cm -autoedge Ar interface Disable automatic edge status on .Ar interface . .It Cm ptp Ar interface Set the .Ar interface as a point to point link. This is required for straight transitions to forwarding and should be enabled on a direct link to another RSTP capable switch. .It Cm -ptp Ar interface Disable point to point link status on .Ar interface . This should be disabled for a half duplex link and for an interface connected to a shared network segment, like a hub or a wireless network. .It Cm autoptp Ar interface Automatically detect the point to point status on .Ar interface by checking the full duplex link status. This is the default for interfaces added to the bridge. .It Cm -autoptp Ar interface Disable automatic point to point link detection on .Ar interface . .It Cm maxage Ar seconds Set the time that a Spanning Tree protocol configuration is valid. The default is 20 seconds. The minimum is 6 seconds and the maximum is 40 seconds. .It Cm fwddelay Ar seconds Set the time that must pass before an interface begins forwarding packets when Spanning Tree is enabled. The default is 15 seconds. The minimum is 4 seconds and the maximum is 30 seconds. .It Cm hellotime Ar seconds Set the time between broadcasting of Spanning Tree protocol configuration messages. The hello time may only be changed when operating in legacy stp mode. The default is 2 seconds. The minimum is 1 second and the maximum is 2 seconds. .It Cm priority Ar value Set the bridge priority for Spanning Tree. The default is 32768. The minimum is 0 and the maximum is 61440. .It Cm proto Ar value Set the Spanning Tree protocol. The default is rstp. The available options are stp and rstp. .It Cm holdcnt Ar value Set the transmit hold count for Spanning Tree. This is the number of packets transmitted before being rate limited. The default is 6. The minimum is 1 and the maximum is 10. .It Cm ifpriority Ar interface Ar value Set the Spanning Tree priority of .Ar interface to .Ar value . The default is 128. The minimum is 0 and the maximum is 240. .It Cm ifpathcost Ar interface Ar value Set the Spanning Tree path cost of .Ar interface to .Ar value . The default is calculated from the link speed. To change a previously selected path cost back to automatic, set the cost to 0. The minimum is 1 and the maximum is 200000000. .It Cm ifmaxaddr Ar interface Ar size Set the maximum number of hosts allowed from an interface, packets with unknown source addresses are dropped until an existing host cache entry expires or is removed. Set to 0 to disable. .El .Pp The following parameters are specific to lagg interfaces: .Bl -tag -width indent .It Cm laggport Ar interface Add the interface named by .Ar interface as a port of the aggregation interface. .It Cm -laggport Ar interface Remove the interface named by .Ar interface from the aggregation interface. .It Cm laggproto Ar proto Set the aggregation protocol. The default is .Li failover . The available options are .Li failover , .Li lacp , .Li loadbalance , .Li roundrobin , .Li broadcast and .Li none . .It Cm lagghash Ar option Ns Oo , Ns Ar option Oc Set the packet layers to hash for aggregation protocols which load balance. The default is .Dq l2,l3,l4 . The options can be combined using commas. .Pp .Bl -tag -width ".Cm l2" -compact .It Cm l2 src/dst mac address and optional vlan number. .It Cm l3 src/dst address for IPv4 or IPv6. .It Cm l4 src/dst port for TCP/UDP/SCTP. .El .It Cm -use_flowid Enable local hash computation for RSS hash on the interface. The .Li loadbalance and .Li lacp modes will use the RSS hash from the network card if available to avoid computing one, this may give poor traffic distribution if the hash is invalid or uses less of the protocol header information. .Cm -use_flowid disables use of RSS hash from the network card. The default value can be set via the .Va net.link.lagg.default_use_flowid .Xr sysctl 8 variable. .Li 0 means .Dq disabled and .Li 1 means .Dq enabled . .It Cm use_flowid Use the RSS hash from the network card if available. .It Cm flowid_shift Ar number Set a shift parameter for RSS local hash computation. Hash is calculated by using flowid bits in a packet header mbuf which are shifted by the number of this parameter. +.It Cm use_numa +Enable selection of egress ports based on the native +.Xr NUMA 4 +domain for the packets being transmitted. +This is currently only implemented for lacp mode. +This works only on +.Xr NUMA 4 +hardware, running a kernel compiled with the +.Xr NUMA 4 +option, and when interfaces from multiple +.Xr NUMA 4 +domains are ports of the aggregation interface. +.It Cm -use_numa +Disable selection of egress ports based on the native +.Xr NUMA 4 +domain for the packets being transmitted. .It Cm lacp_fast_timeout Enable lacp fast-timeout on the interface. .It Cm -lacp_fast_timeout Disable lacp fast-timeout on the interface. .It Cm lacp_strict Enable lacp strict compliance on the interface. The default value can be set via the .Va net.link.lagg.lacp.default_strict_mode .Xr sysctl 8 variable. .Li 0 means .Dq disabled and .Li 1 means .Dq enabled . .It Cm -lacp_strict Disable lacp strict compliance on the interface. .El .Pp The following parameters apply to IP tunnel interfaces, .Xr gif 4 : .Bl -tag -width indent .It Cm tunnel Ar src_addr dest_addr Configure the physical source and destination address for IP tunnel interfaces. The arguments .Ar src_addr and .Ar dest_addr are interpreted as the outer source/destination for the encapsulating IPv4/IPv6 header. .It Fl tunnel Unconfigure the physical source and destination address for IP tunnel interfaces previously configured with .Cm tunnel . .It Cm deletetunnel Another name for the .Fl tunnel parameter. .It Cm accept_rev_ethip_ver Set a flag to accept both correct EtherIP packets and ones with reversed version field. Enabled by default. This is for backward compatibility with .Fx 6.1 , 6.2, 6.3, 7.0, and 7.1. .It Cm -accept_rev_ethip_ver Clear a flag .Cm accept_rev_ethip_ver . .It Cm ignore_source Set a flag to accept encapsulated packets destined to this host independently from source address. This may be useful for hosts, that receive encapsulated packets from the load balancers. .It Cm -ignore_source Clear a flag .Cm ignore_source . .It Cm send_rev_ethip_ver Set a flag to send EtherIP packets with reversed version field intentionally. Disabled by default. This is for backward compatibility with .Fx 6.1 , 6.2, 6.3, 7.0, and 7.1. .It Cm -send_rev_ethip_ver Clear a flag .Cm send_rev_ethip_ver . .El .Pp The following parameters apply to GRE tunnel interfaces, .Xr gre 4 : .Bl -tag -width indent .It Cm tunnel Ar src_addr dest_addr Configure the physical source and destination address for GRE tunnel interfaces. The arguments .Ar src_addr and .Ar dest_addr are interpreted as the outer source/destination for the encapsulating IPv4/IPv6 header. .It Fl tunnel Unconfigure the physical source and destination address for GRE tunnel interfaces previously configured with .Cm tunnel . .It Cm deletetunnel Another name for the .Fl tunnel parameter. .It Cm grekey Ar key Configure the GRE key to be used for outgoing packets. Note that .Xr gre 4 will always accept GRE packets with invalid or absent keys. This command will result in a four byte MTU reduction on the interface. .El .Pp The following parameters are specific to .Xr pfsync 4 interfaces: .Bl -tag -width indent .It Cm syncdev Ar iface Use the specified interface to send and receive pfsync state synchronisation messages. .It Fl syncdev Stop sending pfsync state synchronisation messages over the network. .It Cm syncpeer Ar peer_address Make the pfsync link point-to-point rather than using multicast to broadcast the state synchronisation messages. The peer_address is the IP address of the other host taking part in the pfsync cluster. .It Fl syncpeer Broadcast the packets using multicast. .It Cm maxupd Ar n Set the maximum number of updates for a single state which can be collapsed into one. This is an 8-bit number; the default value is 128. .It Cm defer Defer transmission of the first packet in a state until a peer has acknowledged that the associated state has been inserted. .It Fl defer Do not defer the first packet in a state. This is the default. .El .Pp The following parameters are specific to .Xr vlan 4 interfaces: .Bl -tag -width indent .It Cm vlan Ar vlan_tag Set the VLAN tag value to .Ar vlan_tag . This value is a 12-bit VLAN Identifier (VID) which is used to create an 802.1Q VLAN header for packets sent from the .Xr vlan 4 interface. Note that .Cm vlan and .Cm vlandev must both be set at the same time. .It Cm vlanpcp Ar priority_code_point Priority code point .Pq Dv PCP is an 3-bit field which refers to the IEEE 802.1p class of service and maps to the frame priority level. .Pp Values in order of priority are: .Cm 1 .Pq Dv Background (lowest) , .Cm 0 .Pq Dv Best effort (default) , .Cm 2 .Pq Dv Excellent effort , .Cm 3 .Pq Dv Critical applications , .Cm 4 .Pq Dv Video, < 100ms latency , .Cm 5 .Pq Dv Video, < 10ms latency , .Cm 6 .Pq Dv Internetwork control , .Cm 7 .Pq Dv Network control (highest) . .It Cm vlandev Ar iface Associate the physical interface .Ar iface with a .Xr vlan 4 interface. Packets transmitted through the .Xr vlan 4 interface will be diverted to the specified physical interface .Ar iface with 802.1Q VLAN encapsulation. Packets with 802.1Q encapsulation received by the parent interface with the correct VLAN Identifier will be diverted to the associated .Xr vlan 4 pseudo-interface. The .Xr vlan 4 interface is assigned a copy of the parent interface's flags and the parent's Ethernet address. The .Cm vlandev and .Cm vlan must both be set at the same time. If the .Xr vlan 4 interface already has a physical interface associated with it, this command will fail. To change the association to another physical interface, the existing association must be cleared first. .Pp Note: if the hardware tagging capability is set on the parent interface, the .Xr vlan 4 pseudo interface's behavior changes: the .Xr vlan 4 interface recognizes that the parent interface supports insertion and extraction of VLAN tags on its own (usually in firmware) and that it should pass packets to and from the parent unaltered. .It Fl vlandev Op Ar iface If the driver is a .Xr vlan 4 pseudo device, disassociate the parent interface from it. This breaks the link between the .Xr vlan 4 interface and its parent, clears its VLAN Identifier, flags and its link address and shuts the interface down. The .Ar iface argument is useless and hence deprecated. .El .Pp The following parameters are used to configure .Xr vxlan 4 interfaces. .Bl -tag -width indent .It Cm vxlanid Ar identifier This value is a 24-bit VXLAN Network Identifier (VNI) that identifies the virtual network segment membership of the interface. .It Cm vxlanlocal Ar address The source address used in the encapsulating IPv4/IPv6 header. The address should already be assigned to an existing interface. When the interface is configured in unicast mode, the listening socket is bound to this address. .It Cm vxlanremote Ar address The interface can be configured in a unicast, or point-to-point, mode to create a tunnel between two hosts. This is the IP address of the remote end of the tunnel. .It Cm vxlangroup Ar address The interface can be configured in a multicast mode to create a virtual network of hosts. This is the IP multicast group address the interface will join. .It Cm vxlanlocalport Ar port The port number the interface will listen on. The default port number is 4789. .It Cm vxlanremoteport Ar port The destination port number used in the encapsulating IPv4/IPv6 header. The remote host should be listening on this port. The default port number is 4789. Note some other implementations, such as Linux, do not default to the IANA assigned port, but instead listen on port 8472. .It Cm vxlanportrange Ar low high The range of source ports used in the encapsulating IPv4/IPv6 header. The port selected within the range is based on a hash of the inner frame. A range is useful to provide entropy within the outer IP header for more effective load balancing. The default range is between the .Xr sysctl 8 variables .Va net.inet.ip.portrange.first and .Va net.inet.ip.portrange.last .It Cm vxlantimeout Ar timeout The maximum time, in seconds, before an entry in the forwarding table is pruned. The default is 1200 seconds (20 minutes). .It Cm vxlanmaxaddr Ar max The maximum number of entries in the forwarding table. The default is 2000. .It Cm vxlandev Ar dev When the interface is configured in multicast mode, the .Cm dev interface is used to transmit IP multicast packets. .It Cm vxlanttl Ar ttl The TTL used in the encapsulating IPv4/IPv6 header. The default is 64. .It Cm vxlanlearn The source IP address and inner source Ethernet MAC address of received packets are used to dynamically populate the forwarding table. When in multicast mode, an entry in the forwarding table allows the interface to send the frame directly to the remote host instead of broadcasting the frame to the multicast group. This is the default. .It Fl vxlanlearn The forwarding table is not populated by received packets. .It Cm vxlanflush Delete all dynamically-learned addresses from the forwarding table. .It Cm vxlanflushall Delete all addresses, including static addresses, from the forwarding table. .El .Pp The following parameters are used to configure .Xr carp 4 protocol on an interface: .Bl -tag -width indent .It Cm vhid Ar n Set the virtual host ID. This is a required setting to initiate .Xr carp 4 . If the virtual host ID does not exist yet, it is created and attached to the interface, otherwise configuration of an existing vhid is adjusted. If the .Cm vhid keyword is supplied along with an .Dq inet6 or .Dq inet address, then this address is configured to be run under control of the specified vhid. Whenever a last address that refers to a particular vhid is removed from an interface, the vhid is automatically removed from interface and destroyed. Any other configuration parameters for the .Xr carp 4 protocol should be supplied along with the .Cm vhid keyword. Acceptable values for vhid are 1 to 255. .It Cm advbase Ar seconds Specifies the base of the advertisement interval in seconds. The acceptable values are 1 to 255. The default value is 1. .It Cm advskew Ar interval Specifies the skew to add to the base advertisement interval to make one host advertise slower than another host. It is specified in 1/256 of seconds. The acceptable values are 1 to 254. The default value is 0. .It Cm pass Ar phrase Set the authentication key to .Ar phrase . .It Cm state Ar MASTER|BACKUP Forcibly change state of a given vhid. .El .Pp The .Nm utility displays the current configuration for a network interface when no optional parameters are supplied. If a protocol family is specified, .Nm will report only the details specific to that protocol family. .Pp If the .Fl m flag is passed before an interface name, .Nm will display the capability list and all of the supported media for the specified interface. If .Fl L flag is supplied, address lifetime is displayed for IPv6 addresses, as time offset string. .Pp Optionally, the .Fl a flag may be used instead of an interface name. This flag instructs .Nm to display information about all interfaces in the system. The .Fl d flag limits this to interfaces that are down, and .Fl u limits this to interfaces that are up. When no arguments are given, .Fl a is implied. .Pp The .Fl l flag may be used to list all available interfaces on the system, with no other additional information. If an .Ar address_family is specified, only interfaces of that type will be listed. .Fl l Dq ether will list only Ethernet adapters, excluding the loopback interface. Use of this flag is mutually exclusive with all other flags and commands, except for .Fl d (only list interfaces that are down) and .Fl u (only list interfaces that are up). .Pp The .Fl v flag may be used to get more verbose status for an interface. .Pp The .Fl C flag may be used to list all of the interface cloners available on the system, with no additional information. Use of this flag is mutually exclusive with all other flags and commands. .Pp The .Fl k flag causes keying information for the interface, if available, to be printed. For example, the values of 802.11 WEP keys and .Xr carp 4 passphrases will be printed, if accessible to the current user. This information is not printed by default, as it may be considered sensitive. .Pp If the network interface driver is not present in the kernel then .Nm will attempt to load it. The .Fl n flag disables this behavior. .Pp Only the super-user may modify the configuration of a network interface. .Sh EXAMPLES Assign the IPv4 address .Li 192.0.2.10 , with a network mask of .Li 255.255.255.0 , to the interface .Li fxp0 : .Dl # ifconfig fxp0 inet 192.0.2.10 netmask 255.255.255.0 .Pp Add the IPv4 address .Li 192.0.2.45 , with the CIDR network prefix .Li /28 , to the interface .Li ed0 , using .Cm add as a synonym for the canonical form of the option .Cm alias : .Dl # ifconfig ed0 inet 192.0.2.45/28 add .Pp Remove the IPv4 address .Li 192.0.2.45 from the interface .Li ed0 : .Dl # ifconfig ed0 inet 192.0.2.45 -alias .Pp Enable IPv6 functionality of the interface: .Dl # ifconfig em0 inet6 -ifdisabled .Pp Add the IPv6 address .Li 2001:DB8:DBDB::123/48 to the interface .Li em0 : .Dl # ifconfig em0 inet6 2001:db8:bdbd::123 prefixlen 48 alias Note that lower case hexadecimal IPv6 addresses are acceptable. .Pp Remove the IPv6 address added in the above example, using the .Li / character as shorthand for the network prefix, and using .Cm delete as a synonym for the canonical form of the option .Fl alias : .Dl # ifconfig em0 inet6 2001:db8:bdbd::123/48 delete .Pp Configure a single CARP redundant address on igb0, and then switch it to be master: .Dl # ifconfig igb0 vhid 1 10.0.0.1/24 pass foobar up .Dl # ifconfig igb0 vhid 1 state master .Pp Configure the interface .Li xl0 , to use 100baseTX, full duplex Ethernet media options: .Dl # ifconfig xl0 media 100baseTX mediaopt full-duplex .Pp Label the em0 interface as an uplink: .Dl # ifconfig em0 description \&"Uplink to Gigabit Switch 2\&" .Pp Create the software network interface .Li gif1 : .Dl # ifconfig gif1 create .Pp Destroy the software network interface .Li gif1 : .Dl # ifconfig gif1 destroy .Pp Display available wireless networks using .Li wlan0 : .Dl # ifconfig wlan0 list scan .Pp Display inet and inet6 address subnet masks in CIDR notation .Dl # ifconfig -f inet:cidr,inet6:cidr .Sh DIAGNOSTICS Messages indicating the specified interface does not exist, the requested address is unknown, or the user is not privileged and tried to alter an interface's configuration. .Sh SEE ALSO .Xr netstat 1 , .Xr carp 4 , .Xr gif 4 , .Xr netintro 4 , .Xr pfsync 4 , .Xr polling 4 , .Xr vlan 4 , .Xr vxlan 4 , .Xr devd.conf 5 , .\" .Xr eon 5 , .Xr devd 8 , .Xr jail 8 , .Xr rc 8 , .Xr routed 8 , .Xr sysctl 8 .Sh HISTORY The .Nm utility appeared in .Bx 4.2 . .Sh BUGS Basic IPv6 node operation requires a link-local address on each interface configured for IPv6. Normally, such an address is automatically configured by the kernel on each interface added to the system or enabled; this behavior may be disabled by setting per-interface flag .Cm -auto_linklocal . The default value of this flag is 1 and can be disabled by using the sysctl MIB variable .Va net.inet6.ip6.auto_linklocal . .Pp Do not configure IPv6 addresses with no link-local address by using .Nm . It can result in unexpected behaviors of the kernel. Index: projects/runtime-coverage-v2/sbin/ifconfig/iflagg.c =================================================================== --- projects/runtime-coverage-v2/sbin/ifconfig/iflagg.c (revision 347075) +++ projects/runtime-coverage-v2/sbin/ifconfig/iflagg.c (revision 347076) @@ -1,331 +1,335 @@ /*- */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifconfig.h" char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ static void setlaggport(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqport rp; bzero(&rp, sizeof(rp)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname)); /* * Do not exit with an error here. Doing so permits a * failed NIC to take down an entire lagg. * * Don't error at all if the port is already in the lagg. */ if (ioctl(s, SIOCSLAGGPORT, &rp) && errno != EEXIST) { warnx("%s %s: SIOCSLAGGPORT: %s", name, val, strerror(errno)); exit_code = 1; } } static void unsetlaggport(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqport rp; bzero(&rp, sizeof(rp)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname)); if (ioctl(s, SIOCSLAGGDELPORT, &rp)) err(1, "SIOCSLAGGDELPORT"); } static void setlaggproto(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqall ra; int i; bzero(&ra, sizeof(ra)); ra.ra_proto = LAGG_PROTO_MAX; for (i = 0; i < nitems(lpr); i++) { if (strcmp(val, lpr[i].lpr_name) == 0) { ra.ra_proto = lpr[i].lpr_proto; break; } } if (ra.ra_proto == LAGG_PROTO_MAX) errx(1, "Invalid aggregation protocol: %s", val); strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); if (ioctl(s, SIOCSLAGG, &ra) != 0) err(1, "SIOCSLAGG"); } static void setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqopts ro; bzero(&ro, sizeof(ro)); ro.ro_opts = LAGG_OPT_FLOWIDSHIFT; strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); ro.ro_flowid_shift = (int)strtol(val, NULL, 10); if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK) errx(1, "Invalid flowid_shift option: %s", val); if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) err(1, "SIOCSLAGGOPTS"); } static void setlaggrr_limit(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqopts ro; bzero(&ro, sizeof(ro)); strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); ro.ro_bkt = (int)strtol(val, NULL, 10); if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) err(1, "SIOCSLAGG"); } static void setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqopts ro; bzero(&ro, sizeof(ro)); ro.ro_opts = d; switch (ro.ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TXTEST: case -LAGG_OPT_LACP_TXTEST: case LAGG_OPT_LACP_RXTEST: case -LAGG_OPT_LACP_RXTEST: case LAGG_OPT_LACP_TIMEOUT: case -LAGG_OPT_LACP_TIMEOUT: break; default: err(1, "Invalid lagg option"); } strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) err(1, "SIOCSLAGGOPTS"); } static void setlagghash(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqflags rf; char *str, *tmp, *tok; rf.rf_flags = 0; str = tmp = strdup(val); while ((tok = strsep(&tmp, ",")) != NULL) { if (strcmp(tok, "l2") == 0) rf.rf_flags |= LAGG_F_HASHL2; else if (strcmp(tok, "l3") == 0) rf.rf_flags |= LAGG_F_HASHL3; else if (strcmp(tok, "l4") == 0) rf.rf_flags |= LAGG_F_HASHL4; else errx(1, "Invalid lagghash option: %s", tok); } free(str); if (rf.rf_flags == 0) errx(1, "No lagghash options supplied"); strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCSLAGGHASH, &rf)) err(1, "SIOCSLAGGHASH"); } static char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3], (int)mac[4], (int)mac[5]); return (buf); } static char * lacp_format_peer(struct lacp_opreq *req, const char *sep) { char macbuf1[20]; char macbuf2[20]; snprintf(lacpbuf, sizeof(lacpbuf), "[(%04X,%s,%04X,%04X,%04X),%s(%04X,%s,%04X,%04X,%04X)]", req->actor_prio, lacp_format_mac(req->actor_mac, macbuf1, sizeof(macbuf1)), req->actor_key, req->actor_portprio, req->actor_portno, sep, req->partner_prio, lacp_format_mac(req->partner_mac, macbuf2, sizeof(macbuf2)), req->partner_key, req->partner_portprio, req->partner_portno); return(lacpbuf); } static void lagg_status(int s) { struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqport rpbuf[LAGG_MAX_PORTS]; struct lagg_reqall ra; struct lagg_reqopts ro; struct lagg_reqflags rf; struct lacp_opreq *lp; const char *proto = ""; int i; bzero(&ra, sizeof(ra)); bzero(&ro, sizeof(ro)); strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); ra.ra_size = sizeof(rpbuf); ra.ra_port = rpbuf; strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); ioctl(s, SIOCGLAGGOPTS, &ro); strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0) rf.rf_flags = 0; if (ioctl(s, SIOCGLAGG, &ra) == 0) { lp = (struct lacp_opreq *)&ra.ra_lacpreq; for (i = 0; i < nitems(lpr); i++) { if (ra.ra_proto == lpr[i].lpr_proto) { proto = lpr[i].lpr_name; break; } } printf("\tlaggproto %s", proto); if (rf.rf_flags & LAGG_F_HASHMASK) { const char *sep = ""; printf(" lagghash "); if (rf.rf_flags & LAGG_F_HASHL2) { printf("%sl2", sep); sep = ","; } if (rf.rf_flags & LAGG_F_HASHL3) { printf("%sl3", sep); sep = ","; } if (rf.rf_flags & LAGG_F_HASHL4) { printf("%sl4", sep); sep = ","; } } putchar('\n'); if (verbose) { printf("\tlagg options:\n"); printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS); putchar('\n'); printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift); if (ra.ra_proto == LAGG_PROTO_ROUNDROBIN) printf("\t\trr_limit: %d\n", ro.ro_bkt); printf("\tlagg statistics:\n"); printf("\t\tactive ports: %d\n", ro.ro_active); printf("\t\tflapping: %u\n", ro.ro_flapping); if (ra.ra_proto == LAGG_PROTO_LACP) { printf("\tlag id: %s\n", lacp_format_peer(lp, "\n\t\t ")); } } for (i = 0; i < ra.ra_ports; i++) { lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq; printf("\tlaggport: %s ", rpbuf[i].rp_portname); printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printb(" state", lp->actor_state, LACP_STATE_BITS); putchar('\n'); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf("\t\t%s\n", lacp_format_peer(lp, "\n\t\t ")); } if (0 /* XXX */) { printf("\tsupported aggregation protocols:\n"); for (i = 0; i < nitems(lpr); i++) printf("\t\tlaggproto %s\n", lpr[i].lpr_name); } } } static struct cmd lagg_cmds[] = { DEF_CMD_ARG("laggport", setlaggport), DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), DEF_CMD_ARG("lagghash", setlagghash), DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("use_numa", LAGG_OPT_USE_NUMA, setlaggsetopt), + DEF_CMD("-use_numa", -LAGG_OPT_USE_NUMA, setlaggsetopt), DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), DEF_CMD("-lacp_txtest", -LAGG_OPT_LACP_TXTEST, setlaggsetopt), DEF_CMD("lacp_rxtest", LAGG_OPT_LACP_RXTEST, setlaggsetopt), DEF_CMD("-lacp_rxtest", -LAGG_OPT_LACP_RXTEST, setlaggsetopt), DEF_CMD("lacp_fast_timeout", LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), DEF_CMD("-lacp_fast_timeout", -LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), DEF_CMD_ARG("flowid_shift", setlaggflowidshift), DEF_CMD_ARG("rr_limit", setlaggrr_limit), }; static struct afswtch af_lagg = { .af_name = "af_lagg", .af_af = AF_UNSPEC, .af_other_status = lagg_status, }; static __constructor void lagg_ctor(void) { int i; for (i = 0; i < nitems(lagg_cmds); i++) cmd_register(&lagg_cmds[i]); af_register(&af_lagg); } Index: projects/runtime-coverage-v2/sbin/reboot/boot_i386.8 =================================================================== --- projects/runtime-coverage-v2/sbin/reboot/boot_i386.8 (revision 347075) +++ projects/runtime-coverage-v2/sbin/reboot/boot_i386.8 (revision 347076) @@ -1,380 +1,380 @@ .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software written and contributed .\" to Berkeley by William Jolitz. .\" .\" Almost completely rewritten for FreeBSD 2.1 by Joerg Wunsch. .\" .\" Substantially revised for FreeBSD 3.1 by Robert Nordier. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)boot_i386.8 8.2 (Berkeley) 4/19/94 .\" .\" $FreeBSD$ .\" -.Dd May 15, 2018 +.Dd April 30, 2019 .Dt BOOT 8 i386 .Os .Sh NAME .Nm boot .Nd system bootstrapping procedures .Sh DESCRIPTION .Sy Power fail and crash recovery . Normally, the system will reboot itself at power-up or after crashes. An automatic consistency check of the file systems will be performed, and unless this fails, the system will resume multi-user operations. .Pp .Sy Cold starts . Most i386 PCs attempt to boot first from floppy disk drive 0 (sometimes known as drive A:) and, failing that, from hard disk drive 0 (sometimes known as drive C:, or as drive 0x80 to the BIOS). Some BIOSes allow you to change this default sequence, and may also include a CD-ROM drive as a boot device. .Pp Some newer PCs boot using UEFI firmware, not BIOS. That process is described in .Xr uefi 8 . .Pp By default, a three-stage bootstrap is employed, and control is automatically passed from the boot blocks (bootstrap stages one and two) to a separate third-stage bootstrap program, .Xr loader 8 . This third stage provides more sophisticated control over the booting process than it is possible to achieve in the boot blocks, which are constrained by occupying limited fixed space on a given disk or slice. .Pp However, it is possible to dispense with the third stage altogether, either by specifying a kernel name in the boot block parameter file, .Pa /boot.config , or, unless option .Fl n is set, by hitting a key during a brief pause (while one of the characters .Sy - , .Sy \e , .Sy \&| , or .Sy / is displayed) before .Xr loader 8 is invoked. Booting will also be attempted at stage two, if the third stage cannot be loaded. .Pp The remainder of this subsection deals only with the boot blocks. The .Xr loader 8 program is documented separately. .Pp After the boot blocks have been loaded, you should see a prompt similar to the following: .Bd -literal ->> FreeBSD/i386 BOOT +>> FreeBSD/x86 BOOT Default: 0:ad(0,a)/boot/loader boot: .Ed .Pp The automatic boot will attempt to load .Pa /boot/loader from partition .Ql a of either the floppy or the hard disk. This boot may be aborted by typing any character on the keyboard at the .Ql boot: prompt. At this time, the following input will be accepted: .Bl -tag -width indent .It Ic \&? Give a short listing of the files in the root directory of the default boot device, as a hint about available boot files. (A .Ic ?\& may also be specified as the last segment of a path, in which case the listing will be of the relevant subdirectory.) .It Xo .Sm off .Ar bios_drive : interface ( unit , Oo Ar slice , Oc Ar part ) .Ar filename .Sm on .Op Fl aCcDdghmnPprsv .Op Fl S Ns Ar speed .Xc Specify boot file and flags. .Bl -tag -width indent .It Ar bios_drive The drive number as recognized by the BIOS. 0 for the first drive, 1 for the second drive, etc. .It Ar interface The type of controller to boot from. Note that the controller is required to have BIOS support since the BIOS services are used to load the boot file image. .Pp The supported interfaces are: .Pp .Bl -tag -width "adXX" -compact .It ad ST506, IDE, ESDI, RLL disks on a WD100[2367] or lookalike controller .It fd 5 1/4" or 3 1/2" High density floppies .It da SCSI disk on any supported SCSI controller .\".It cd .\"boot from CDROM .El .It Ar unit The unit number of the drive on the interface being used. 0 for the first drive, 1 for the second drive, etc. .It Oo Ar slice , Oc Ns Ar part The partition letter inside the .Bx portion of the disk. See .Xr bsdlabel 8 . By convention, only partition .Ql a contains a bootable image. If sliced disks are used .Pq Dq fdisk partitions , any .Ar slice (1 for the first slice, 2 for the second slice, etc.\&) can be booted from, with the default (if not specified) being the active slice or, otherwise, the first .Fx slice. If .Ar slice is specified as 0, the first .Fx slice (also known as .Dq compatibility slice) is booted from. .It Ar filename The pathname of the file to boot (relative to the root directory on the specified partition). Defaults to .Pa /boot/kernel/kernel . Symbolic links are not supported (hard links are). .It Xo Op Fl aCcDdghmnPpqrsv .Op Fl S Ns Ar speed .Xc Boot flags: .Pp .Bl -tag -width "-CXX" -compact .It Fl a during kernel initialization, ask for the device to mount as the root file system. .It Fl C try to mount root file system from a CD-ROM. .It Fl c this flag is currently a no-op. .It Fl D boot with the dual console configuration. In the single configuration, the console will be either the internal display or the serial port, depending on the state of the .Fl h option below. In the dual console configuration, both the internal display and the serial port will become the console at the same time, regardless of the state of the .Fl h option. .It Fl d enter the DDB kernel debugger (see .Xr ddb 4 ) as early as possible in kernel initialization. .It Fl g use the GDB remote debugging protocol. .It Fl h force the serial console. For instance, if you boot from the internal console, you can use the .Fl h option to force the kernel to use the serial port as its console device. The serial port driver .Xr sio 4 (but not .Xr uart 4 ) has a flag (0x20) to override this option. If that flag is set, the serial port will always be used as the console, regardless of the .Fl h option described here. .It Fl m mute the console to suppress all kernel console input and output during the boot. .It Fl n ignore key press to interrupt boot before .Xr loader 8 is invoked. .It Fl P probe the keyboard. If no keyboard is found, the .Fl D and .Fl h options are automatically set. .It Fl p pause after each attached device during the device probing phase. .It Fl q be quiet, do not write anything to the console unless automatic boot fails or is disabled. This option only affects second-stage bootstrap, to prevent next stages from writing to the console use in combination with the .Fl m option. .It Fl r use the statically configured default for the device containing the root file system (see .Xr config 8 ) . Normally, the root file system is on the device that the kernel was loaded from. .It Fl s boot into single-user mode; if the console is marked as .Dq insecure (see .Xr ttys 5 ) , the root password must be entered. .It Fl S Ns Ar speed set the speed of the serial console to .Ar speed . The default is 9600 unless it has been overridden by setting .Va BOOT_COMCONSOLE_SPEED in .Xr make.conf 5 and recompiling and reinstalling the boot blocks. .It Fl v be verbose during device probing (and later). .El .El .El .Pp Use the .Pa /boot.config file to set the default configuration options for the boot block code. See .Xr boot.config 5 for more information about the .Pa /boot.config file. .Sh FILES .Bl -tag -width /boot/loader -compact .It Pa /boot.config parameters for the boot blocks (optional) .It Pa /boot/boot1 first stage bootstrap file .It Pa /boot/boot2 second stage bootstrap file .It Pa /boot/loader third stage bootstrap .It Pa /boot/kernel/kernel default kernel .It Pa /boot/kernel.old/kernel typical non-default kernel (optional) .El .Sh DIAGNOSTICS When disk-related errors occur, these are reported by the second-stage bootstrap using the same error codes returned by the BIOS, for example .Dq Disk error 0x1 (lba=0x12345678) . Here is a partial list of these error codes: .Pp .Bl -tag -width "0x80" -compact .It 0x1 Invalid argument .It 0x2 Address mark not found .It 0x4 Sector not found .It 0x8 DMA overrun .It 0x9 DMA attempt across 64K boundary .It 0xc Invalid media .It 0x10 Uncorrectable CRC/ECC error .It 0x20 Controller failure .It 0x40 Seek failed .It 0x80 Timeout .El .Pp .Sy "NOTE" : On older machines, or otherwise where EDD support (disk packet interface support) is not available, all boot-related files and structures (including the kernel) that need to be accessed during the boot phase must reside on the disk at or below cylinder 1023 (as the BIOS understands the geometry). When a .Dq Disk error 0x1 is reported by the second-stage bootstrap, it generally means that this requirement has not been adhered to. .Sh SEE ALSO .Xr ddb 4 , .Xr boot.config 5 , .Xr make.conf 5 , .Xr mount.conf 5 , .Xr ttys 5 , .Xr boot0cfg 8 , .Xr btxld 8 , .Xr config 8 , .Xr gpart 8 , .Xr gptboot 8 , .Xr halt 8 , .Xr loader 8 , .Xr nextboot 8 , .Xr reboot 8 , .Xr shutdown 8 , .Xr uefi 8 .Sh BUGS The bsdlabel format used by this version of .Bx is quite different from that of other architectures. .Pp Due to space constraints, the keyboard probe initiated by the .Fl P option is simply a test that the BIOS has detected an .Dq extended keyboard. If an .Dq XT/AT keyboard (with no F11 and F12 keys, etc.) is attached, the probe will fail. Index: projects/runtime-coverage-v2/share/man/man4/ccr.4 =================================================================== --- projects/runtime-coverage-v2/share/man/man4/ccr.4 (revision 347075) +++ projects/runtime-coverage-v2/share/man/man4/ccr.4 (revision 347076) @@ -1,111 +1,112 @@ .\" Copyright (c) 2017, Chelsio Inc .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd March 11, 2019 +.Dd April 29, 2019 .Dt CCR 4 .Os .Sh NAME .Nm ccr .Nd "Chelsio T6 crypto accelerator driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: -.Bd -ragged -offset indeunt +.Bd -ragged -offset indent .Cd "device ccr" .Ed .Pp To load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent ccr_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for the crypto accelerator engine included on PCI Express Ethernet adapters based on the Chelsio Terminator 6 ASIC (T6). -The driver accelerates AES-CBC, AES-CTR, AES-GCM, AES-XTS, SHA1, SHA2-224, -SHA2-256, SHA2-384, SHA2-512, SHA1-HMAC, SHA2-224-HMAC, -SHA2-256-HMAC, SHA2-384-HMAC, and SHA2-512-HMAC operations for +The driver accelerates AES-CBC, AES-CCM, AES-CTR, AES-GCM, AES-XTS, +SHA1, SHA2-224, SHA2-256, SHA2-384, SHA2-512, +SHA1-HMAC, SHA2-224-HMAC, SHA2-256-HMAC, SHA2-384-HMAC, and SHA2-512-HMAC +operations for .Xr crypto 4 and .Xr ipsec 4 . The driver also supports chaining one of AES-CBC, AES-CTR, or AES-XTS with SHA1-HMAC, SHA2-224-HMAC, SHA2-256-HMAC, SHA2-384-HMAC, or SHA2-512-HMAC for encrypt-then-authenticate operations. For further hardware information and questions related to hardware requirements, see .Pa http://www.chelsio.com/ . .Pp The .Nm driver attaches as a child of an existing Chelsio NIC device and thus requires that the .Xr cxgbe 4 driver be active. .Sh HARDWARE The .Nm driver supports the crypto accelerator engine included on adapters based on the T6 ASIC: .Pp .Bl -bullet -compact .It Chelsio T6225-CR .It Chelsio T6225-SO-CR .It Chelsio T62100-LP-CR .It Chelsio T62100-SO-CR .It Chelsio T62100-CR .El .Sh SUPPORT For general information and support, go to the Chelsio support website at: .Pa http://www.chelsio.com/ . .Pp If an issue is identified with this driver with a supported adapter, email all the specific information related to the issue to .Aq Mt support@chelsio.com . .Sh SEE ALSO .Xr crypto 4 , .Xr cxgbe 4 , .Xr ipsec 4 .Sh HISTORY The .Nm device driver first appeared in .Fx 12.0 . .Sh AUTHORS .An -nosplit The .Nm driver was written by .An John Baldwin Aq Mt jhb@FreeBSD.org . Index: projects/runtime-coverage-v2/share/man/man7/development.7 =================================================================== --- projects/runtime-coverage-v2/share/man/man7/development.7 (revision 347075) +++ projects/runtime-coverage-v2/share/man/man7/development.7 (revision 347076) @@ -1,187 +1,187 @@ .\" Copyright (c) 2018 Edward Tomasz Napierala .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd December 6, 2018 .Dt DEVELOPMENT 7 .Os .Sh NAME .Nm development .Nd introduction to .Fx development process .Sh DESCRIPTION .Fx development is split into three major suprojects: doc, ports, and src. Doc is the documentation, such as the .Fx Handbook. To read more, see: .Pp .Lk https://www.FreeBSD.org/doc/en/books/fdp-primer/ .Pp Ports, described further in .Xr ports 7 , are the way to build, package, and install third party software. To read more, see: .Pp .Lk https://www.FreeBSD.org/doc/en/books/porters-handbook/ .Pp The last one, src, revolves around the source code for the base system, consisting of the kernel, and the libraries and utilities commonly called the world. .Pp The Committer's Guide, describing topics relevant to all committers, can be found at: .Pp .Lk https://www.FreeBSD.org/doc/en/articles/committers-guide/ .Pp .Fx src development takes place in the CURRENT branch in Subversion, located at: .Pp .Lk https://svn.FreeBSD.org/base/head .Pp There is also a read-only GitHub mirror at: .Pp .Lk https://github.com/freebsd/freebsd .Pp Changes are first committed to CURRENT and then usually merged back to STABLE. Every few years the CURRENT branch is renamed to STABLE, and a new CURRENT is branched, with an incremented major version number. Releases are then branched off STABLE and numbered with consecutive minor numbers. .Pp Layout of the source tree is described in .Xr hier 7 . Build instructions can be found in .Xr build 7 and .Xr release 7 . Kernel programming interfaces (KPIs) are documented in section 9 manual pages; use .Ql "apropos -s 9 ''" for a list. Regression test suite is described in .Xr tests 7 . For coding conventions, see .Xr style 9 . .Pp To ask questions regarding development, use the mailing lists, such as freebsd-arch@ and freebsd-hackers@: .Pp -.Lk https://lists.FreeBSD.org/ +.Lk https://lists.FreeBSD.org .Pp To get your patches integrated into the main .Fx repository use Phabricator; it is a code review tool that allows other developers to review the changes, suggest improvements, and, eventually, allows them to pick up the change and commit it: .Pp -.Lk https://reviews.FreeBSD.org/ +.Lk https://reviews.FreeBSD.org .Sh EXAMPLES Check out the CURRENT branch, build it, and install, overwriting the current system: .Bd -literal -offset indent svnlite co https://svn.FreeBSD.org/base/head src cd src make -sj8 buildworld buildkernel installkernel shutdown -r now .Ed .Pp After reboot: .Bd -literal -offset indent cd src make -j8 installworld reboot .Ed .Pp Rebuild and reinstall a single piece of userspace, in this case .Xr ls 1 : .Bd -literal -offset indent cd src/bin/ls make clean all install .Ed .Pp Quickly rebuild and reinstall the kernel, only recompiling the files changed since last build; note that this will only work if the full kernel build has been completed in the past, not on a fresh source tree: .Bd -literal -offset indent cd src make -sj8 kernel KERNFAST=1 .Ed .Pp To rebuild parts of .Fx for another CPU architecture, first prepare your source tree by building the cross-toolchain: .Bd -literal -offset indent cd src make -sj8 toolchain TARGET_ARCH=armv6 .Ed .Pp Afterwards, to build and install a single piece of userspace, use: .Bd -literal -offset indent cd src/bin/ls make buildenv TARGET_ARCH=armv6 make clean all install DESTDIR=/clients/arm .Ed .Pp Likewise, to quickly rebuild and reinstall the kernel, use: .Bd -literal -offset indent cd src make buildenv TARGET_ARCH=armv6 make -sj8 kernel KERNFAST=1 DESTDIR=/clients/arm .Ed .Sh SEE ALSO .Xr svnlite 1 , .Xr witness 4 , .Xr build 7 , .Xr hier 7 , .Xr release 7 , .Xr locking 9 , .Xr style 9 .Sh HISTORY The .Nm manual page was originally written by .An Matthew Dillon Aq Mt dillon@FreeBSD.org and first appeared in .Fx 5.0 , December 2002. It was since extensively modified by .An Eitan Adler Aq Mt eadler@FreeBSD.org to reflect the repository conversion from .Xr cvs 1 to .Xr svn 1 . It was rewritten from scratch by .An Edward Tomasz Napierala Aq Mt trasz@FreeBSD.org for .Fx 12.0 . Index: projects/runtime-coverage-v2/share/misc/committers-src.dot =================================================================== --- projects/runtime-coverage-v2/share/misc/committers-src.dot (revision 347075) +++ projects/runtime-coverage-v2/share/misc/committers-src.dot (revision 347076) @@ -1,878 +1,881 @@ # $FreeBSD$ # This file is meant to list all FreeBSD src committers and describe the # mentor-mentee relationships between them. # The graphical output can be generated from this file with the following # command: # $ dot -T png -o file.png committers-src.dot # # The dot binary is part of the graphics/graphviz port. digraph src { # Node definitions follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"] # # ????/??/?? is the date when the commit bit was obtained, usually the one you # can find looking at svn logs for the svnadmin/conf/access file. # Use YYYY/MM/DD format. # # For returned commit bits, the node definition will follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"] # # The first date is the same as for an active committer, the second date is # the date when the commit bit has been returned. Again, check svn logs. node [color=grey62, style=filled, bgcolor=black]; # Alumni go here.. Try to keep things sorted. alm [label="Andrew Moore\nalm@FreeBSD.org\n1993/06/12\n????/??/??"] anholt [label="Eric Anholt\nanholt@FreeBSD.org\n2002/04/22\n2008/08/07"] archie [label="Archie Cobbs\narchie@FreeBSD.org\n1998/11/06\n2006/06/09"] arr [label="Andrew R. Reiter\narr@FreeBSD.org\n2001/11/02\n2005/05/25"] arun [label="Arun Sharma\narun@FreeBSD.org\n2003/03/06\n2006/12/16"] asmodai [label="Jeroen Ruigrok\nasmodai@FreeBSD.org\n1999/12/16\n2001/11/16"] benjsc [label="Benjamin Close\nbenjsc@FreeBSD.org\n2007/02/09\n2010/09/15"] billf [label="Bill Fumerola\nbillf@FreeBSD.org\n1998/11/11\n2008/11/10"] bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2002/01/29\n2009/09/13"] bmilekic [label="Bosko Milekic\nbmilekic@FreeBSD.org\n2000/09/21\n2008/11/10"] bushman [label="Michael Bushkov\nbushman@FreeBSD.org\n2007/03/10\n2010/04/29"] carl [label="Carl Delsey\ncarl@FreeBSD.org\n2013/01/14\n2014/03/06"] ceri [label="Ceri Davies\nceri@FreeBSD.org\n2006/11/07\n2012/03/07"] cjc [label="Crist J. Clark\ncjc@FreeBSD.org\n2001/06/01\n2006/12/29"] davidxu [label="David Xu\ndavidxu@FreeBSD.org\n2002/09/02\n2014/04/14"] dds [label="Diomidis Spinellis\ndds@FreeBSD.org\n2003/06/20\n2010/09/22"] dhartmei [label="Daniel Hartmeier\ndhartmei@FreeBSD.org\n2004/04/06\n2008/12/08"] dmlb [label="Duncan Barclay\ndmlb@FreeBSD.org\n2001/12/14\n2008/11/10"] dougb [label="Doug Barton\ndougb@FreeBSD.org\n2000/10/26\n2012/10/08"] eik [label="Oliver Eikemeier\neik@FreeBSD.org\n2004/05/20\n2008/11/10"] furuta [label="Atsushi Furuta\nfuruta@FreeBSD.org\n2000/06/21\n2003/03/08"] gj [label="Gary L. Jennejohn\ngj@FreeBSD.org\n1994/??/??\n2006/04/28"] groudier [label="Gerard Roudier\ngroudier@FreeBSD.org\n1999/12/30\n2006/04/06"] jake [label="Jake Burkholder\njake@FreeBSD.org\n2000/05/16\n2008/11/10"] jayanth [label="Jayanth Vijayaraghavan\njayanth@FreeBSD.org\n2000/05/08\n2008/11/10"] jb [label="John Birrell\njb@FreeBSD.org\n1997/03/27\n2009/12/15"] jdp [label="John Polstra\njdp@FreeBSD.org\n1995/12/07\n2008/02/26"] jedgar [label="Chris D. Faulhaber\njedgar@FreeBSD.org\n1999/12/15\n2006/04/07"] jkh [label="Jordan K. Hubbard\njkh@FreeBSD.org\n1993/06/12\n2008/06/13"] jlemon [label="Jonathan Lemon\njlemon@FreeBSD.org\n1997/08/14\n2008/11/10"] joe [label="Josef Karthauser\njoe@FreeBSD.org\n1999/10/22\n2008/08/10"] jtc [label="J.T. Conklin\njtc@FreeBSD.org\n1993/06/12\n????/??/??"] kargl [label="Steven G. Kargl\nkargl@FreeBSD.org\n2011/01/17\n2015/06/28"] kbyanc [label="Kelly Yancey\nkbyanc@FreeBSD.org\n2000/07/11\n2006/07/25"] keichii [label="Michael Wu\nkeichii@FreeBSD.org\n2001/03/07\n2006/04/28"] linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2006/09/30\n2008/05/04"] lulf [label="Ulf Lilleengen\nlulf@FreeBSD.org\n2007/10/24\n2012/01/19"] mb [label="Maxim Bolotin\nmb@FreeBSD.org\n2000/04/06\n2003/03/08"] marks [label="Mark Santcroos\nmarks@FreeBSD.org\n2004/03/18\n2008/09/29"] mike [label="Mike Barcroft\nmike@FreeBSD.org\n2001/07/17\n2006/04/28"] msmith [label="Mike Smith\nmsmith@FreeBSD.org\n1996/10/22\n2003/12/15"] murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2010/07/25"] mux [label="Maxime Henrion\nmux@FreeBSD.org\n2002/03/03\n2011/06/22"] nate [label="Nate Willams\nnate@FreeBSD.org\n1993/06/12\n2003/12/15"] njl [label="Nate Lawson\nnjl@FreeBSD.org\n2002/08/07\n2008/02/16"] non [label="Noriaki Mitsnaga\nnon@FreeBSD.org\n2000/06/19\n2007/03/06"] onoe [label="Atsushi Onoe\nonoe@FreeBSD.org\n2000/07/21\n2008/11/10"] rafan [label="Rong-En Fan\nrafan@FreeBSD.org\n2007/01/31\n2012/07/23"] randi [label="Randi Harper\nrandi@FreeBSD.org\n2010/04/20\n2012/05/10"] rink [label="Rink Springer\nrink@FreeBSD.org\n2006/01/16\n2010/11/04"] robert [label="Robert Drehmel\nrobert@FreeBSD.org\n2001/08/23\n2006/05/13"] sah [label="Sam Hopkins\nsah@FreeBSD.org\n2004/12/15\n2008/11/10"] shafeeq [label="Shafeeq Sinnamohideen\nshafeeq@FreeBSD.org\n2000/06/19\n2006/04/06"] sheldonh [label="Sheldon Hearn\nsheldonh@FreeBSD.org\n1999/06/14\n2006/05/13"] shiba [label="Takeshi Shibagaki\nshiba@FreeBSD.org\n2000/06/19\n2008/11/10"] shin [label="Yoshinobu Inoue\nshin@FreeBSD.org\n1999/07/29\n2003/03/08"] snb [label="Nick Barkas\nsnb@FreeBSD.org\n2009/05/05\n2010/11/04"] tmm [label="Thomas Moestl\ntmm@FreeBSD.org\n2001/03/07\n2006/07/12"] toshi [label="Toshihiko Arai\ntoshi@FreeBSD.org\n2000/07/06\n2003/03/08"] tshiozak [label="Takuya SHIOZAKI\ntshiozak@FreeBSD.org\n2001/04/25\n2003/03/08"] uch [label="UCHIYAMA Yasushi\nuch@FreeBSD.org\n2000/06/21\n2002/04/24"] wilko [label="Wilko Bulte\nwilko@FreeBSD.org\n2000/01/13\n2013/01/17"] yar [label="Yar Tikhiy\nyar@FreeBSD.org\n2001/03/25\n2012/05/23"] zack [label="Zack Kirsch\nzack@FreeBSD.org\n2010/11/05\n2012/09/08"] node [color=lightblue2, style=filled, bgcolor=black]; # Current src committers go here. Try to keep things sorted. ache [label="Andrey Chernov\nache@FreeBSD.org\n1993/10/31"] achim [label="Achim Leubner\nachim@FreeBSD.org\n2013/01/23"] adrian [label="Adrian Chadd\nadrian@FreeBSD.org\n2000/07/03"] ae [label="Andrey V. Elsukov\nae@FreeBSD.org\n2010/06/03"] akiyama [label="Shunsuke Akiyama\nakiyama@FreeBSD.org\n2000/06/19"] alc [label="Alan Cox\nalc@FreeBSD.org\n1999/02/23"] allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2015/07/30"] ambrisko [label="Doug Ambrisko\nambrisko@FreeBSD.org\n2001/12/19"] anchie [label="Ana Kukec\nanchie@FreeBSD.org\n2010/04/14"] andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"] andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"] andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"] araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"] arichardson [label="Alex Richardson\narichardson@FreeBSD.org\n2017/10/30"] ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"] art [label="Artem Belevich\nart@FreeBSD.org\n2011/03/29"] arybchik [label="Andrew Rybchenko\narybchik@FreeBSD.org\n2014/10/12"] asomers [label="Alan Somers\nasomers@FreeBSD.org\n2013/04/24"] avg [label="Andriy Gapon\navg@FreeBSD.org\n2009/02/18"] avos [label="Andriy Voskoboinyk\navos@FreeBSD.org\n2015/09/24"] badger [label="Eric Badger\nbadger@FreeBSD.org\n2016/07/01"] bapt [label="Baptiste Daroussin\nbapt@FreeBSD.org\n2011/12/23"] bcran [label="Rebecca Cran\nbcran@FreeBSD.org\n2010/01/29"] bde [label="Bruce Evans\nbde@FreeBSD.org\n1994/08/20"] bdrewery [label="Bryan Drewery\nbdrewery@FreeBSD.org\n2013/12/14"] benl [label="Ben Laurie\nbenl@FreeBSD.org\n2011/05/18"] benno [label="Benno Rice\nbenno@FreeBSD.org\n2000/11/02"] bms [label="Bruce M Simpson\nbms@FreeBSD.org\n2003/08/06"] br [label="Ruslan Bukin\nbr@FreeBSD.org\n2013/09/02"] brian [label="Brian Somers\nbrian@FreeBSD.org\n1996/12/16"] brooks [label="Brooks Davis\nbrooks@FreeBSD.org\n2001/06/21"] brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2006/02/28"] bruno [label="Bruno Ducrot\nbruno@FreeBSD.org\n2005/07/18"] bryanv [label="Bryan Venteicher\nbryanv@FreeBSD.org\n2012/11/03"] bschmidt [label="Bernhard Schmidt\nbschmidt@FreeBSD.org\n2010/02/06"] bwidawsk [label="Ben Widawsky\nbwidawsk@FreeBSD.org\n2018/07/05"] bz [label="Bjoern A. Zeeb\nbz@FreeBSD.org\n2004/07/27"] cem [label="Conrad Meyer\ncem@FreeBSD.org\n2015/07/05"] chuck [label="Chuck Tuffli\nchuck@FreeBSD.org\n2017/09/06"] cognet [label="Olivier Houchard\ncognet@FreeBSD.org\n2002/10/09"] cokane [label="Coleman Kane\ncokane@FreeBSD.org\n2000/06/19"] cperciva [label="Colin Percival\ncperciva@FreeBSD.org\n2004/01/20"] csjp [label="Christian S.J. Peron\ncsjp@FreeBSD.org\n2004/05/04"] cy [label="Cy Schubert\ncy@FreeBSD.org\n2013/04/23"] dab [label="David Bright\ndab@FreeBSD.org\n2016/10/24"] das [label="David Schultz\ndas@FreeBSD.org\n2003/02/21"] davide [label="Davide Italiano\ndavide@FreeBSD.org\n2012/01/27"] dchagin [label="Dmitry Chagin\ndchagin@FreeBSD.org\n2009/02/28"] def [label="Konrad Witaszczyk\ndef@FreeBSD.org\n2016/11/02"] delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"] des [label="Dag-Erling Smorgrav\ndes@FreeBSD.org\n1998/04/03"] dexuan [label="Dexuan Cui\ndexuan@FreeBSD.org\n2016/10/24"] dfr [label="Doug Rabson\ndfr@FreeBSD.org\n????/??/??"] dg [label="David Greenman\ndg@FreeBSD.org\n1993/06/14"] dim [label="Dimitry Andric\ndim@FreeBSD.org\n2010/08/30"] +dougm [label="Doug Moore\ndougm@FreeBSD.org\n2019/04/30"] dteske [label="Devin Teske\ndteske@FreeBSD.org\n2012/04/10"] dumbbell [label="Jean-Sebastien Pedron\ndumbbell@FreeBSD.org\n2004/11/29"] dwmalone [label="David Malone\ndwmalone@FreeBSD.org\n2000/07/11"] eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/01/18"] ed [label="Ed Schouten\ned@FreeBSD.org\n2008/05/22"] edavis [label="Eric Davis\nedavis@FreeBSD.org\n2013/10/09"] edwin [label="Edwin Groothuis\nedwin@FreeBSD.org\n2007/06/25"] eivind [label="Eivind Eklund\neivind@FreeBSD.org\n1997/02/02"] emaste [label="Ed Maste\nemaste@FreeBSD.org\n2005/10/04"] emax [label="Maksim Yevmenkin\nemax@FreeBSD.org\n2003/10/12"] eri [label="Ermal Luci\neri@FreeBSD.org\n2008/06/11"] erj [label="Eric Joyner\nerj@FreeBSD.org\n2014/12/14"] eugen [label="Eugene Grosbein\neugen@FreeBSD.org\n2017/09/19"] fabient [label="Fabien Thomas\nfabient@FreeBSD.org\n2009/03/16"] fanf [label="Tony Finch\nfanf@FreeBSD.org\n2002/05/05"] fjoe [label="Max Khon\nfjoe@FreeBSD.org\n2001/08/06"] flz [label="Florent Thoumie\nflz@FreeBSD.org\n2006/03/30"] fsu [label="Fedor Uporov\nfsu@FreeBSD.org\n2017/08/28"] gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2010/02/02"] gad [label="Garance A. Drosehn\ngad@FreeBSD.org\n2000/10/27"] gallatin [label="Andrew Gallatin\ngallatin@FreeBSD.org\n1999/01/15"] ganbold [label="Ganbold Tsagaankhuu\nganbold@FreeBSD.org\n2013/12/18"] gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2009/12/07"] gibbs [label="Justin T. Gibbs\ngibbs@FreeBSD.org\n????/??/??"] gjb [label="Glen Barber\ngjb@FreeBSD.org\n2013/06/04"] gleb [label="Gleb Kurtsou\ngleb@FreeBSD.org\n2011/09/19"] glebius [label="Gleb Smirnoff\nglebius@FreeBSD.org\n2004/07/14"] gnn [label="George V. Neville-Neil\ngnn@FreeBSD.org\n2004/10/11"] gordon [label="Gordon Tetlow\ngordon@FreeBSD.org\n2002/05/17"] grehan [label="Peter Grehan\ngrehan@FreeBSD.org\n2002/08/08"] grog [label="Greg Lehey\ngrog@FreeBSD.org\n1998/08/30"] gshapiro [label="Gregory Shapiro\ngshapiro@FreeBSD.org\n2000/07/12"] harti [label="Hartmut Brandt\nharti@FreeBSD.org\n2003/01/29"] hiren [label="Hiren Panchasara\nhiren@FreeBSD.org\n2013/04/12"] hmp [label="Hiten Pandya\nhmp@FreeBSD.org\n2004/03/23"] hselasky [label="Hans Petter Selasky\nhselasky@FreeBSD.org\n"] ian [label="Ian Lepore\nian@FreeBSD.org\n2013/01/07"] iedowse [label="Ian Dowse\niedowse@FreeBSD.org\n2000/12/01"] imp [label="Warner Losh\nimp@FreeBSD.org\n1996/09/20"] ivoras [label="Ivan Voras\nivoras@FreeBSD.org\n2008/06/10"] jah [label="Jason A. Harmening\njah@FreeBSD.org\n2015/03/08"] jamie [label="Jamie Gritton\njamie@FreeBSD.org\n2009/01/28"] jasone [label="Jason Evans\njasone@FreeBSD.org\n1999/03/03"] jceel [label="Jakub Klama\njceel@FreeBSD.org\n2011/09/25"] jch [label="Julien Charbon\njch@FreeBSD.org\n2014/09/24"] jchandra [label="Jayachandran C.\njchandra@FreeBSD.org\n2010/05/19"] jeb [label="Jeb Cramer\njeb@FreeBSD.org\n2018/01/25"] jeff [label="Jeff Roberson\njeff@FreeBSD.org\n2002/02/21"] jh [label="Jaakko Heinonen\njh@FreeBSD.org\n2009/10/02"] jhb [label="John Baldwin\njhb@FreeBSD.org\n1999/08/23"] jhibbits [label="Justin Hibbits\njhibbits@FreeBSD.org\n2011/11/30"] jilles [label="Jilles Tjoelker\njilles@FreeBSD.org\n2009/05/22"] jimharris [label="Jim Harris\njimharris@FreeBSD.org\n2011/12/09"] jinmei [label="JINMEI Tatuya\njinmei@FreeBSD.org\n2007/03/17"] jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2005/07/06"] jkoshy [label="A. Joseph Koshy\njkoshy@FreeBSD.org\n1998/05/13"] jlh [label="Jeremie Le Hen\njlh@FreeBSD.org\n2012/04/22"] jls [label="Jordan Sissel\njls@FreeBSD.org\n2006/12/06"] jmcneill [label="Jared McNeill\njmcneill@FreeBSD.org\n2016/02/24"] jmg [label="John-Mark Gurney\njmg@FreeBSD.org\n1997/02/13"] jmmv [label="Julio Merino\njmmv@FreeBSD.org\n2013/11/02"] joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1993/11/14"] johalun [label="Johannes Lundberg\njohalun@FreeBSD.org\n2019/01/19"] jon [label="Jonathan Chen\njon@FreeBSD.org\n2000/10/17"] jonathan [label="Jonathan Anderson\njonathan@FreeBSD.org\n2010/10/07"] jpaetzel [label="Josh Paetzel\njpaetzel@FreeBSD.org\n2011/01/21"] jtl [label="Jonathan T. Looney\njtl@FreeBSD.org\n2015/10/26"] julian [label="Julian Elischer\njulian@FreeBSD.org\n1993/04/19"] jwd [label="John De Boskey\njwd@FreeBSD.org\n2000/05/19"] kaiw [label="Kai Wang\nkaiw@FreeBSD.org\n2007/09/26"] kan [label="Alexander Kabaev\nkan@FreeBSD.org\n2002/07/21"] karels [label="Mike Karels\nkarels@FreeBSD.org\n2016/06/09"] ken [label="Ken Merry\nken@FreeBSD.org\n1998/09/08"] kensmith [label="Ken Smith\nkensmith@FreeBSD.org\n2004/01/23"] kevans [label="Kyle Evans\nkevans@FreeBSD.org\n2017/06/20"] kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"] kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"] kibab [label="Ilya Bakulin\nkibab@FreeBSD.org\n2017/09/02"] kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"] kp [label="Kristof Provost\nkp@FreeBSD.org\n2015/03/22"] landonf [label="Landon Fuller\nlandonf@FreeBSD.org\n2016/05/31"] le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"] leitao [label="Breno Leitao\nleitao@FreeBSD.org\n2018/05/22"] lidl [label="Kurt Lidl\nlidl@FreeBSD.org\n2015/10/21"] loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"] lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"] luporl [label="Leandro Lupori\nluporl@FreeBSD.org\n2018/05/21"] lwhsu [label="Li-Wen Hsu\nlwhsu@FreeBSD.org\n2018/08/09"] manu [label="Emmanuel Vadot\nmanu@FreeBSD.org\n2016/04/24"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"] marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"] markj [label="Mark Johnston\nmarkj@FreeBSD.org\n2012/12/18"] markm [label="Mark Murray\nmarkm@FreeBSD.org\n1995/04/24"] markus [label="Markus Brueffer\nmarkus@FreeBSD.org\n2006/06/01"] matteo [label="Matteo Riondato\nmatteo@FreeBSD.org\n2006/01/18"] mav [label="Alexander Motin\nmav@FreeBSD.org\n2007/04/12"] maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"] mdf [label="Matthew Fleming\nmdf@FreeBSD.org\n2010/06/04"] mdodd [label="Matthew N. Dodd\nmdodd@FreeBSD.org\n1999/07/27"] melifaro [label="Alexander V. Chernikov\nmelifaro@FreeBSD.org\n2011/10/04"] mhorne [label="Mitchell Horne\nmhorne@FreeBSD.org\n2019/03/20"] miwi [label="Martin Wilke\nmiwi@FreeBSD.org\n2011/02/18\n2018/06/14"] mizhka [label="Michael Zhilin\nmizhka@FreeBSD.org\n2016/07/19"] mjacob [label="Matt Jacob\nmjacob@FreeBSD.org\n1997/08/13"] mjg [label="Mateusz Guzik\nmjg@FreeBSD.org\n2012/06/04"] mjoras [label="Matt Joras\nmjoras@FreeBSD.org\n2017/07/12"] mlaier [label="Max Laier\nmlaier@FreeBSD.org\n2004/02/10"] mmel [label="Michal Meloun\nmmel@FreeBSD.org\n2015/11/01"] monthadar [label="Monthadar Al Jaberi\nmonthadar@FreeBSD.org\n2012/04/02"] mp [label="Mark Peek\nmp@FreeBSD.org\n2001/07/27"] mr [label="Michael Reifenberger\nmr@FreeBSD.org\n2001/09/30"] mw [label="Marcin Wojtas\nmw@FreeBSD.org\n2017/07/18"] neel [label="Neel Natu\nneel@FreeBSD.org\n2009/09/20"] netchild [label="Alexander Leidinger\nnetchild@FreeBSD.org\n2005/03/31"] ngie [label="Enji Cooper\nngie@FreeBSD.org\n2014/07/27"] nork [label="Norikatsu Shigemura\nnork@FreeBSD.org\n2009/06/09"] np [label="Navdeep Parhar\nnp@FreeBSD.org\n2009/06/05"] nwhitehorn [label="Nathan Whitehorn\nnwhitehorn@FreeBSD.org\n2008/07/03"] n_hibma [label="Nick Hibma\nn_hibma@FreeBSD.org\n1998/11/26"] obrien [label="David E. O'Brien\nobrien@FreeBSD.org\n1996/10/29"] olli [label="Oliver Fromme\nolli@FreeBSD.org\n2008/02/14"] oshogbo [label="Mariusz Zaborski\noshogbo@FreeBSD.org\n2015/04/15"] peadar [label="Peter Edwards\npeadar@FreeBSD.org\n2004/03/08"] peter [label="Peter Wemm\npeter@FreeBSD.org\n1995/07/04"] peterj [label="Peter Jeremy\npeterj@FreeBSD.org\n2012/09/14"] pfg [label="Pedro Giffuni\npfg@FreeBSD.org\n2011/12/01"] phil [label="Phil Shafer\nphil@FreeBSD.ogr\n2015/12/30"] philip [label="Philip Paeps\nphilip@FreeBSD.org\n2004/01/21"] phk [label="Poul-Henning Kamp\nphk@FreeBSD.org\n1994/02/21"] pho [label="Peter Holm\npho@FreeBSD.org\n2008/11/16"] pjd [label="Pawel Jakub Dawidek\npjd@FreeBSD.org\n2004/02/02"] pkelsey [label="Patrick Kelsey\pkelsey@FreeBSD.org\n2014/05/29"] pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2010/10/05"] ps [label="Paul Saab\nps@FreeBSD.org\n2000/02/23"] qingli [label="Qing Li\nqingli@FreeBSD.org\n2005/04/13"] ram [label="Ram Kishore Vegesna\nram@FreeBSD.org\n2018/04/04"] ray [label="Aleksandr Rybalko\nray@FreeBSD.org\n2011/05/25"] rdivacky [label="Roman Divacky\nrdivacky@FreeBSD.org\n2008/03/13"] remko [label="Remko Lodder\nremko@FreeBSD.org\n2007/02/23"] rgrimes [label="Rodney W. Grimes\nrgrimes@FreeBSD.org\n1993/06/12\n2017/03/03"] rik [label="Roman Kurakin\nrik@FreeBSD.org\n2003/12/18"] rlibby [label="Ryan Libby\nrlibby@FreeBSD.org\n2017/06/07"] rmacklem [label="Rick Macklem\nrmacklem@FreeBSD.org\n2009/03/27"] rmh [label="Robert Millan\nrmh@FreeBSD.org\n2011/09/18"] rnoland [label="Robert Noland\nrnoland@FreeBSD.org\n2008/09/15"] roberto [label="Ollivier Robert\nroberto@FreeBSD.org\n1995/02/22"] rodrigc [label="Craig Rodrigues\nrodrigc@FreeBSD.org\n2005/05/14"] royger [label="Roger Pau Monne\nroyger@FreeBSD.org\n2013/11/26"] rpaulo [label="Rui Paulo\nrpaulo@FreeBSD.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpokala@FreeBSD.org\n2015/11/19"] rrs [label="Randall R Stewart\nrrs@FreeBSD.org\n2007/02/08"] rse [label="Ralf S. Engelschall\nrse@FreeBSD.org\n1997/07/31"] rstone [label="Ryan Stone\nrstone@FreeBSD.org\n2010/04/19"] ru [label="Ruslan Ermilov\nru@FreeBSD.org\n1999/05/27"] rwatson [label="Robert N. M. Watson\nrwatson@FreeBSD.org\n1999/12/16"] sam [label="Sam Leffler\nsam@FreeBSD.org\n2002/07/02"] sanpei [label="MIHIRA Sanpei Yoshiro\nsanpei@FreeBSD.org\n2000/06/19"] sbruno [label="Sean Bruno\nsbruno@FreeBSD.org\n2008/08/02"] scf [label="Sean C. Farley\nscf@FreeBSD.org\n2007/06/24"] schweikh [label="Jens Schweikhardt\nschweikh@FreeBSD.org\n2001/04/06"] scottl [label="Scott Long\nscottl@FreeBSD.org\n2000/09/28"] se [label="Stefan Esser\nse@FreeBSD.org\n1994/08/26"] sephe [label="Sepherosa Ziehau\nsephe@FreeBSD.org\n2007/03/28"] sepotvin [label="Stephane E. Potvin\nsepotvin@FreeBSD.org\n2007/02/15"] sgalabov [label="Stanislav Galabov\nsgalabov@FreeBSD.org\n2016/02/24"] shurd [label="Stephen Hurd\nshurd@FreeBSD.org\n2017/09/02"] simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2006/03/07"] sjg [label="Simon J. Gerraty\nsjg@FreeBSD.org\n2012/10/23"] skra [label="Svatopluk Kraus\nskra@FreeBSD.org\n2015/10/28"] slavash [label="Slava Shwartsman\nslavash@FreeBSD.org\n2018/02/08"] slm [label="Stephen McConnell\nslm@FreeBSD.org\n2014/05/07"] smh [label="Steven Hartland\nsmh@FreeBSD.org\n2012/11/12"] sobomax [label="Maxim Sobolev\nsobomax@FreeBSD.org\n2001/07/25"] sos [label="Soren Schmidt\nsos@FreeBSD.org\n????/??/??"] sson [label="Stacey Son\nsson@FreeBSD.org\n2008/07/08"] stas [label="Stanislav Sedov\nstas@FreeBSD.org\n2008/08/22"] stevek [label="Stephen J. Kiernan\nstevek@FreeBSD.org\n2016/07/18"] suz [label="SUZUKI Shinsuke\nsuz@FreeBSD.org\n2002/03/26"] syrinx [label="Shteryana Shopova\nsyrinx@FreeBSD.org\n2006/10/07"] takawata [label="Takanori Watanabe\ntakawata@FreeBSD.org\n2000/07/06"] theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thj [label="Tom Jones\nthj@FreeBSD.org\n2018/04/07"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] tsoome [label="Toomas Soome\ntsoome@FreeBSD.org\n2016/08/10"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] tuexen [label="Michael Tuexen\ntuexen@FreeBSD.org\n2009/06/06"] tychon [label="Tycho Nightingale\ntychon@FreeBSD.org\n2014/01/21"] ume [label="Hajimu UMEMOTO\nume@FreeBSD.org\n2000/02/26"] uqs [label="Ulrich Spoerlein\nuqs@FreeBSD.org\n2010/01/28"] vangyzen [label="Eric van Gyzen\nvangyzen@FreeBSD.org\n2015/03/08"] vanhu [label="Yvan Vanhullebus\nvanhu@FreeBSD.org\n2008/07/21"] versus [label="Konrad Jankowski\nversus@FreeBSD.org\n2008/10/27"] weongyo [label="Weongyo Jeong\nweongyo@FreeBSD.org\n2007/12/21"] wes [label="Wes Peters\nwes@FreeBSD.org\n1998/11/25"] whu [label="Wei Hu\nwhu@FreeBSD.org\n2015/02/11"] will [label="Will Andrews\nwill@FreeBSD.org\n2000/03/20"] wkoszek [label="Wojciech A. Koszek\nwkoszek@FreeBSD.org\n2006/02/21"] wma [label="Wojciech Macek\nwma@FreeBSD.org\n2016/01/18"] wollman [label="Garrett Wollman\nwollman@FreeBSD.org\n????/??/??"] wsalamon [label="Wayne Salamon\nwsalamon@FreeBSD.org\n2005/06/25"] wulf [label="Vladimir Kondratyev\nwulf@FreeBSD.org\n2017/04/27"] yongari [label="Pyun YongHyeon\nyongari@FreeBSD.org\n2004/08/01"] yuripv [label="Yuri Pankov\nyuripv@FreeBSD.org\n2018/10/09"] zbb [label="Zbigniew Bodek\nzbb@FreeBSD.org\n2013/09/02"] zec [label="Marko Zec\nzec@FreeBSD.org\n2008/06/22"] zml [label="Zachary Loafman\nzml@FreeBSD.org\n2009/05/27"] zont [label="Andrey Zonov\nzont@FreeBSD.org\n2012/08/21"] # Pseudo target representing rev 1.1 of commit.allow day1 [label="Birth of FreeBSD"] # Here are the mentor/mentee relationships. # Group together all the mentees for a particular mentor. # Keep the list sorted by mentor login. day1 -> jtc day1 -> jkh day1 -> nate day1 -> rgrimes day1 -> alm day1 -> dg adrian -> avos adrian -> jmcneill adrian -> landonf adrian -> lidl adrian -> loos adrian -> mizhka adrian -> monthadar adrian -> ray adrian -> rmh adrian -> sephe adrian -> sgalabov ae -> melifaro allanjude -> tsoome alc -> davide andre -> qingli andrew -> manu anholt -> jkim araujo -> miwi avg -> art avg -> eugen avg -> pluknet avg -> smh bapt -> allanjude bapt -> araujo bapt -> bdrewery bapt -> wulf bde -> rgrimes benno -> grehan billf -> dougb billf -> gad billf -> jedgar billf -> jhb billf -> shafeeq billf -> will bmilekic -> csjp bms -> dhartmei bms -> mlaier bms -> thompsa brian -> joe brooks -> bushman brooks -> jamie brooks -> theraven brooks -> arichardson bz -> anchie bz -> jamie bz -> syrinx cognet -> br cognet -> jceel cognet -> kevlo cognet -> ian cognet -> manu cognet -> mw cognet -> wkoszek cognet -> wma cognet -> zbb cperciva -> eadler cperciva -> flz cperciva -> randi cperciva -> simon csjp -> bushman das -> kargl das -> rodrigc delphij -> gabor delphij -> rafan delphij -> sephe des -> anholt des -> hmp des -> mike des -> olli des -> ru des -> bapt dds -> versus dfr -> gallatin dfr -> zml dg -> peter dim -> theraven dwmalone -> fanf dwmalone -> peadar dwmalone -> snb eadler -> bcran ed -> dim ed -> gavin ed -> jilles ed -> rdivacky ed -> uqs eivind -> des eivind -> rwatson emaste -> achim emaste -> bwidawsk emaste -> dteske emaste -> kevans emaste -> lwhsu emaste -> markj emaste -> ngie emaste -> rstone emax -> markus erj -> jeb fjoe -> versus gallatin -> ticso gavin -> versus gibbs -> mjacob gibbs -> njl gibbs -> royger gibbs -> whu glebius -> mav gnn -> jinmei gnn -> rrs gnn -> ivoras gnn -> vanhu gnn -> lstewart gnn -> np gnn -> davide gnn -> arybchik gnn -> erj gnn -> kp gnn -> jtl gnn -> karels gonzo -> jmcneill gonzo -> wulf grehan -> bryanv grehan -> rgrimes grog -> edwin grog -> le grog -> peterj hselasky -> slavash imp -> akiyama imp -> ambrisko imp -> andrew imp -> bmah imp -> bruno imp -> chuck imp -> dmlb imp -> emax imp -> furuta imp -> joe imp -> johalun imp -> jon imp -> keichii imp -> kibab imp -> mb imp -> mr imp -> neel imp -> non imp -> nork imp -> onoe imp -> remko imp -> rik imp -> rink imp -> sanpei imp -> shiba imp -> takawata imp -> toshi imp -> tsoome imp -> uch jake -> bms jake -> gordon jake -> harti jake -> jeff jake -> kmacy jake -> robert jake -> yongari jb -> sson jdp -> fjoe jfv -> erj jhb -> arr jhb -> avg jhb -> jch jhb -> jeff jhb -> kbyanc jhb -> peterj jhb -> pfg jhb -> rnoland jhb -> rpokala jhb -> arichardson jhibbits -> leitao jhibbits -> luporl jimharris -> carl jkh -> dfr jkh -> gj jkh -> grog jkh -> imp jkh -> jlemon jkh -> joerg jkh -> jwd jkh -> msmith jkh -> murray jkh -> phk jkh -> wes jkh -> yar jkoshy -> kaiw jkoshy -> fabient jkoshy -> rstone jlemon -> bmilekic jlemon -> brooks jmallett -> pkelsey jmmv -> ngie joerg -> brian joerg -> eik joerg -> jmg joerg -> le joerg -> netchild joerg -> schweikh jtl -> ngie jtl -> thj julian -> glebius julian -> davidxu julian -> archie julian -> adrian julian -> zec julian -> mp kan -> kib ken -> asomers ken -> chuck ken -> ram ken -> slm ken -> will kib -> ae kib -> badger kib -> dchagin +kib -> dougm kib -> gjb kib -> jah kib -> jlh kib -> jpaetzel kib -> lulf kib -> melifaro kib -> mmel kib -> pho kib -> pluknet kib -> rdivacky kib -> rmacklem kib -> rmh kib -> skra kib -> slavash kib -> stas kib -> tijl kib -> trociny kib -> vangyzen kib -> yuripv kib -> zont kmacy -> lstewart marcel -> allanjude marcel -> art marcel -> arun marcel -> marius marcel -> nwhitehorn marcel -> sjg markj -> cem +markj -> dougm markj -> lwhsu markj -> mhorne markj -> rlibby markm -> jasone markm -> sheldonh mav -> ae mav -> eugen mav -> ram mdf -> gleb mdodd -> jake mike -> das mlaier -> benjsc mlaier -> dhartmei mlaier -> thompsa mlaier -> eri msmith -> cokane msmith -> jasone msmith -> scottl murray -> delphij mux -> cognet mux -> dumbbell netchild -> ariff njl -> marks njl -> philip njl -> rpaulo njl -> sepotvin nwhitehorn -> andreast nwhitehorn -> jhibbits nwhitehorn -> leitao nwhitehorn -> luporl obrien -> benno obrien -> groudier obrien -> gshapiro obrien -> kan obrien -> sam pfg -> pstef pfg -> fsu peter -> asmodai peter -> jayanth peter -> ps philip -> benl philip -> ed philip -> jls philip -> matteo philip -> uqs philip -> kp phk -> jkoshy phk -> mux phk -> rgrimes pjd -> def pjd -> kib pjd -> lulf pjd -> oshogbo pjd -> smh pjd -> trociny rgrimes -> markm rmacklem -> jwd royger -> whu rpaulo -> avg rpaulo -> bschmidt rpaulo -> dim rpaulo -> jmmv rpaulo -> lidl rpaulo -> ngie rrs -> bcran rrs -> jchandra rrs -> tuexen rstone -> markj rstone -> mjoras ru -> ceri ru -> cjc ru -> eik ru -> maxim ru -> sobomax rwatson -> adrian rwatson -> antoine rwatson -> bmah rwatson -> brueffer rwatson -> bz rwatson -> cperciva rwatson -> emaste rwatson -> gnn rwatson -> jh rwatson -> jonathan rwatson -> kensmith rwatson -> kmacy rwatson -> linimon rwatson -> rmacklem rwatson -> shafeeq rwatson -> tmm rwatson -> trasz rwatson -> trhodes rwatson -> wsalamon rodrigc -> araujo sam -> andre sam -> benjsc sam -> sephe sbruno -> hiren sbruno -> jeb sbruno -> jimharris sbruno -> shurd schweikh -> dds scottl -> achim scottl -> jimharris scottl -> pjd scottl -> sah scottl -> sbruno scottl -> slm scottl -> yongari sephe -> dexuan sheldonh -> dwmalone sheldonh -> iedowse shin -> ume simon -> benl sjg -> phil sjg -> stevek sos -> marcel stas -> ganbold theraven -> phil thompsa -> weongyo thompsa -> eri trasz -> jh trasz -> mjg ume -> jinmei ume -> suz ume -> tshiozak vangyzen -> badger vangyzen -> dab wes -> scf wkoszek -> jceel wollman -> gad zml -> mdf zml -> zack } Index: projects/runtime-coverage-v2/stand/common/disk.c =================================================================== --- projects/runtime-coverage-v2/stand/common/disk.c (revision 347075) +++ projects/runtime-coverage-v2/stand/common/disk.c (revision 347076) @@ -1,450 +1,450 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2012 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "disk.h" #ifdef DISK_DEBUG # define DPRINTF(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DPRINTF(fmt, args...) #endif struct open_disk { struct ptable *table; uint64_t mediasize; uint64_t entrysize; u_int sectorsize; }; struct print_args { struct disk_devdesc *dev; const char *prefix; int verbose; }; /* Convert size to a human-readable number. */ static char * display_size(uint64_t size, u_int sectorsize) { static char buf[80]; char unit; size = size * sectorsize / 1024; unit = 'K'; if (size >= 10485760000LL) { size /= 1073741824; unit = 'T'; } else if (size >= 10240000) { size /= 1048576; unit = 'G'; } else if (size >= 10000) { size /= 1024; unit = 'M'; } sprintf(buf, "%4ld%cB", (long)size, unit); return (buf); } int ptblread(void *d, void *buf, size_t blocks, uint64_t offset) { struct disk_devdesc *dev; struct open_disk *od; dev = (struct disk_devdesc *)d; od = (struct open_disk *)dev->dd.d_opendata; /* * The strategy function assumes the offset is in units of 512 byte * sectors. For larger sector sizes, we need to adjust the offset to * match the actual sector size. */ offset *= (od->sectorsize / 512); /* * As the GPT backup partition is located at the end of the disk, * to avoid reading past disk end, flag bcache not to use RA. */ return (dev->dd.d_dev->dv_strategy(dev, F_READ | F_NORA, offset, blocks * od->sectorsize, (char *)buf, NULL)); } static int ptable_print(void *arg, const char *pname, const struct ptable_entry *part) { struct disk_devdesc dev; struct print_args *pa, bsd; struct open_disk *od; struct ptable *table; char line[80]; int res; u_int sectsize; uint64_t partsize; pa = (struct print_args *)arg; od = (struct open_disk *)pa->dev->dd.d_opendata; sectsize = od->sectorsize; partsize = part->end - part->start + 1; sprintf(line, " %s%s: %s\t%s\n", pa->prefix, pname, parttype2str(part->type), pa->verbose ? display_size(partsize, sectsize) : ""); if (pager_output(line)) return 1; res = 0; if (part->type == PART_FREEBSD) { /* Open slice with BSD label */ dev.dd.d_dev = pa->dev->dd.d_dev; dev.dd.d_unit = pa->dev->dd.d_unit; dev.d_slice = part->index; dev.d_partition = D_PARTNONE; if (disk_open(&dev, partsize, sectsize) == 0) { table = ptable_open(&dev, partsize, sectsize, ptblread); if (table != NULL) { sprintf(line, " %s%s", pa->prefix, pname); bsd.dev = pa->dev; bsd.prefix = line; bsd.verbose = pa->verbose; res = ptable_iterate(table, &bsd, ptable_print); ptable_close(table); } disk_close(&dev); } } return (res); } int disk_print(struct disk_devdesc *dev, char *prefix, int verbose) { struct open_disk *od; struct print_args pa; /* Disk should be opened */ od = (struct open_disk *)dev->dd.d_opendata; pa.dev = dev; pa.prefix = prefix; pa.verbose = verbose; return (ptable_iterate(od->table, &pa, ptable_print)); } int disk_read(struct disk_devdesc *dev, void *buf, uint64_t offset, u_int blocks) { struct open_disk *od; int ret; od = (struct open_disk *)dev->dd.d_opendata; ret = dev->dd.d_dev->dv_strategy(dev, F_READ, dev->d_offset + offset, blocks * od->sectorsize, buf, NULL); return (ret); } int disk_write(struct disk_devdesc *dev, void *buf, uint64_t offset, u_int blocks) { struct open_disk *od; int ret; od = (struct open_disk *)dev->dd.d_opendata; ret = dev->dd.d_dev->dv_strategy(dev, F_WRITE, dev->d_offset + offset, blocks * od->sectorsize, buf, NULL); return (ret); } int disk_ioctl(struct disk_devdesc *dev, u_long cmd, void *data) { struct open_disk *od = dev->dd.d_opendata; if (od == NULL) return (ENOTTY); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = od->sectorsize; break; case DIOCGMEDIASIZE: if (dev->d_offset == 0) *(uint64_t *)data = od->mediasize; else *(uint64_t *)data = od->entrysize * od->sectorsize; break; default: return (ENOTTY); } return (0); } int disk_open(struct disk_devdesc *dev, uint64_t mediasize, u_int sectorsize) { struct disk_devdesc partdev; struct open_disk *od; struct ptable *table; struct ptable_entry part; int rc, slice, partition; rc = 0; od = (struct open_disk *)malloc(sizeof(struct open_disk)); if (od == NULL) { DPRINTF("no memory"); return (ENOMEM); } dev->dd.d_opendata = od; od->entrysize = 0; od->mediasize = mediasize; od->sectorsize = sectorsize; /* * While we are reading disk metadata, make sure we do it relative * to the start of the disk */ memcpy(&partdev, dev, sizeof(partdev)); partdev.d_offset = 0; partdev.d_slice = D_SLICENONE; partdev.d_partition = D_PARTNONE; dev->d_offset = 0; table = NULL; slice = dev->d_slice; partition = dev->d_partition; DPRINTF("%s unit %d, slice %d, partition %d => %p", disk_fmtdev(dev), dev->dd.d_unit, dev->d_slice, dev->d_partition, od); /* Determine disk layout. */ od->table = ptable_open(&partdev, mediasize / sectorsize, sectorsize, ptblread); if (od->table == NULL) { DPRINTF("Can't read partition table"); rc = ENXIO; goto out; } if (ptable_getsize(od->table, &mediasize) != 0) { rc = ENXIO; goto out; } od->mediasize = mediasize; if (ptable_gettype(od->table) == PTABLE_BSD && partition >= 0) { /* It doesn't matter what value has d_slice */ rc = ptable_getpart(od->table, &part, partition); if (rc == 0) { dev->d_offset = part.start; od->entrysize = part.end - part.start + 1; } } else if (ptable_gettype(od->table) == PTABLE_ISO9660) { dev->d_offset = 0; od->entrysize = mediasize; } else if (slice >= 0) { /* Try to get information about partition */ if (slice == 0) rc = ptable_getbestpart(od->table, &part); else rc = ptable_getpart(od->table, &part, slice); if (rc != 0) /* Partition doesn't exist */ goto out; dev->d_offset = part.start; od->entrysize = part.end - part.start + 1; slice = part.index; if (ptable_gettype(od->table) == PTABLE_GPT) { - partition = 255; + partition = D_PARTISGPT; goto out; /* Nothing more to do */ - } else if (partition == 255) { + } else if (partition == D_PARTISGPT) { /* * When we try to open GPT partition, but partition * table isn't GPT, reset d_partition value to -1 * and try to autodetect appropriate value. */ partition = -1; } /* * If d_partition < 0 and we are looking at a BSD slice, * then try to read BSD label, otherwise return the * whole MBR slice. */ if (partition == -1 && part.type != PART_FREEBSD) goto out; /* Try to read BSD label */ table = ptable_open(dev, part.end - part.start + 1, od->sectorsize, ptblread); if (table == NULL) { DPRINTF("Can't read BSD label"); rc = ENXIO; goto out; } /* * If slice contains BSD label and d_partition < 0, then * assume the 'a' partition. Otherwise just return the * whole MBR slice, because it can contain ZFS. */ if (partition < 0) { if (ptable_gettype(table) != PTABLE_BSD) goto out; partition = 0; } rc = ptable_getpart(table, &part, partition); if (rc != 0) goto out; dev->d_offset += part.start; od->entrysize = part.end - part.start + 1; } out: if (table != NULL) ptable_close(table); if (rc != 0) { if (od->table != NULL) ptable_close(od->table); free(od); DPRINTF("%s could not open", disk_fmtdev(dev)); } else { /* Save the slice and partition number to the dev */ dev->d_slice = slice; dev->d_partition = partition; DPRINTF("%s offset %lld => %p", disk_fmtdev(dev), (long long)dev->d_offset, od); } return (rc); } int disk_close(struct disk_devdesc *dev) { struct open_disk *od; od = (struct open_disk *)dev->dd.d_opendata; DPRINTF("%s closed => %p", disk_fmtdev(dev), od); ptable_close(od->table); free(od); return (0); } char* disk_fmtdev(struct disk_devdesc *dev) { static char buf[128]; char *cp; cp = buf + sprintf(buf, "%s%d", dev->dd.d_dev->dv_name, dev->dd.d_unit); if (dev->d_slice > D_SLICENONE) { #ifdef LOADER_GPT_SUPPORT if (dev->d_partition == D_PARTISGPT) { sprintf(cp, "p%d:", dev->d_slice); return (buf); } else #endif #ifdef LOADER_MBR_SUPPORT cp += sprintf(cp, "s%d", dev->d_slice); #endif } if (dev->d_partition > D_PARTNONE) cp += sprintf(cp, "%c", dev->d_partition + 'a'); strcat(cp, ":"); return (buf); } int disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path) { int unit, slice, partition; const char *np; char *cp; np = devspec; unit = -1; /* * If there is path/file info after the device info, then any missing * slice or partition info should be considered a request to search for * an appropriate partition. Otherwise we want to open the raw device * itself and not try to fill in missing info by searching. */ if ((cp = strchr(np, ':')) != NULL && cp[1] != '\0') { slice = D_SLICEWILD; partition = D_PARTWILD; } else { slice = D_SLICENONE; partition = D_PARTNONE; } if (*np != '\0' && *np != ':') { unit = strtol(np, &cp, 10); if (cp == np) return (EUNIT); #ifdef LOADER_GPT_SUPPORT if (*cp == 'p') { np = cp + 1; slice = strtol(np, &cp, 10); if (np == cp) return (ESLICE); /* we don't support nested partitions on GPT */ if (*cp != '\0' && *cp != ':') return (EINVAL); - partition = 255; + partition = D_PARTISGPT; } else #endif #ifdef LOADER_MBR_SUPPORT if (*cp == 's') { np = cp + 1; slice = strtol(np, &cp, 10); if (np == cp) return (ESLICE); } #endif if (*cp != '\0' && *cp != ':') { partition = *cp - 'a'; if (partition < 0) return (EPART); cp++; } } else return (EINVAL); if (*cp != '\0' && *cp != ':') return (EINVAL); dev->dd.d_unit = unit; dev->d_slice = slice; dev->d_partition = partition; if (path != NULL) *path = (*cp == '\0') ? cp: cp + 1; return (0); } Index: projects/runtime-coverage-v2/stand/efi/boot1/Makefile =================================================================== --- projects/runtime-coverage-v2/stand/efi/boot1/Makefile (revision 347075) +++ projects/runtime-coverage-v2/stand/efi/boot1/Makefile (revision 347076) @@ -1,109 +1,110 @@ # $FreeBSD$ .include -PROG= boot1.sym +BOOT1?= boot1 +PROG= ${BOOT1}.sym INTERNALPROG= WARNS?= 6 CFLAGS+= -DEFI_BOOT1 # We implement a slightly non-standard %S in that it always takes a # CHAR16 that's common in UEFI-land instead of a wchar_t. This only # seems to matter on arm64 where wchar_t defaults to an int instead # of a short. There's no good cast to use here so just ignore the # warnings for now. CWARNFLAGS.boot1.c+= -Wno-format # Disable warnings that are currently incompatible with the zfs boot code CWARNFLAGS.zfs_module.c += -Wno-array-bounds CWARNFLAGS.zfs_module.c += -Wno-cast-align CWARNFLAGS.zfs_module.c += -Wno-cast-qual CWARNFLAGS.zfs_module.c += -Wno-missing-prototypes CWARNFLAGS.zfs_module.c += -Wno-sign-compare CWARNFLAGS.zfs_module.c += -Wno-unused-parameter CWARNFLAGS.zfs_module.c += -Wno-unused-function # architecture-specific loader code -SRCS= boot1.c self_reloc.c start.S ufs_module.c +SRCS+= boot1.c self_reloc.c start.S ufs_module.c .if ${MK_LOADER_ZFS} != "no" SRCS+= zfs_module.c CFLAGS.zfs_module.c+= -I${ZFSSRC} CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/boot/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/crypto/skein CFLAGS+= -DEFI_ZFS_BOOT .endif .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} > 40201 CWARNFLAGS.self_reloc.c+= -Wno-error=maybe-uninitialized .endif CFLAGS+= -I${EFIINC} CFLAGS+= -I${EFIINCMD} CFLAGS+= -I${SYSDIR}/contrib/dev/acpica/include CFLAGS+= -DEFI_UFS_BOOT .ifdef(EFI_DEBUG) CFLAGS+= -DEFI_DEBUG .endif # Always add MI sources and REGULAR efi loader bits .PATH: ${EFISRC}/loader/arch/${MACHINE} .PATH: ${EFISRC}/loader .PATH: ${LDRSRC} CFLAGS+= -I${LDRSRC} -FILES= boot1.efi boot1.efifat -FILESMODE_boot1.efi= ${BINMODE} +FILES= ${BOOT1}.efi ${BOOT1}.efifat +FILESMODE_${BOOT1}.efi= ${BINMODE} LDSCRIPT= ${EFISRC}/loader/arch/${MACHINE}/ldscript.${MACHINE} LDFLAGS+= -Wl,-T${LDSCRIPT},-Bsymbolic,-znotext -shared .if ${MACHINE_CPUARCH} == "aarch64" CFLAGS+= -mgeneral-regs-only .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" CFLAGS+= -fPIC LDFLAGS+= -Wl,-znocombreloc .endif LIBEFI= ${BOOTOBJ}/efi/libefi/libefi.a # # Add libstand for the runtime functions used by the compiler - for example # __aeabi_* (arm) or __divdi3 (i386). # as well as required string and memory functions for all platforms. # DPADD+= ${LIBEFI} ${LIBSA} LDADD+= ${LIBEFI} ${LIBSA} DPADD+= ${LDSCRIPT} -boot1.efi: ${PROG} +${BOOT1}.efi: ${PROG} if ${NM} ${.ALLSRC} | grep ' U '; then \ echo "Undefined symbols in ${.ALLSRC}"; \ exit 1; \ fi SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH} \ ${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \ -j .dynamic -j .dynsym -j .rel.dyn \ -j .rela.dyn -j .reloc -j .eh_frame \ --output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET} # The following inserts our objects into a template FAT file system # created by generate-fat.sh .include "Makefile.fat" -boot1.efifat: boot1.efi +${BOOT1}.efifat: ${BOOT1}.efi @set -- `ls -l ${.ALLSRC}`; \ x=$$(($$5-${BOOT1_MAXSIZE})); \ if [ $$x -ge 0 ]; then \ echo "boot1 $$x bytes too large; regenerate FAT templates?" >&2 ;\ exit 1; \ fi echo ${.OBJDIR} xz -d -c ${BOOTSRC}/efi/boot1/fat-${MACHINE}.tmpl.xz > ${.TARGET} ${DD} if=${.ALLSRC} of=${.TARGET} seek=${BOOT1_OFFSET} conv=notrunc -CLEANFILES+= boot1.efi boot1.efifat +CLEANFILES+= ${BOOT1}.efi ${BOOT1}.efifat .include Index: projects/runtime-coverage-v2/stand/efi/boot1/boot1.c =================================================================== --- projects/runtime-coverage-v2/stand/efi/boot1/boot1.c (revision 347075) +++ projects/runtime-coverage-v2/stand/efi/boot1/boot1.c (revision 347076) @@ -1,581 +1,581 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * Copyright (c) 2001 Robert Drehmel * All rights reserved. * Copyright (c) 2014 Nathan Whitehorn * All rights reserved. * Copyright (c) 2015 Eric McCorkle * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "boot_module.h" #include "paths.h" static void efi_panic(EFI_STATUS s, const char *fmt, ...) __dead2 __printflike(2, 3); static const boot_module_t *boot_modules[] = { #ifdef EFI_ZFS_BOOT &zfs_module, #endif #ifdef EFI_UFS_BOOT &ufs_module #endif }; #define NUM_BOOT_MODULES nitems(boot_modules) /* The initial number of handles used to query EFI for partitions. */ #define NUM_HANDLES_INIT 24 static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL; static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL; static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID; /* * Provide Malloc / Free backed by EFIs AllocatePool / FreePool which ensures * memory is correctly aligned avoiding EFI_INVALID_PARAMETER returns from * EFI methods. */ void * Malloc(size_t len, const char *file __unused, int line __unused) { void *out; if (BS->AllocatePool(EfiLoaderData, len, &out) == EFI_SUCCESS) return (out); return (NULL); } void Free(void *buf, const char *file __unused, int line __unused) { if (buf != NULL) (void)BS->FreePool(buf); } /* * nodes_match returns TRUE if the imgpath isn't NULL and the nodes match, * FALSE otherwise. */ static BOOLEAN nodes_match(EFI_DEVICE_PATH *imgpath, EFI_DEVICE_PATH *devpath) { size_t len; if (imgpath == NULL || imgpath->Type != devpath->Type || imgpath->SubType != devpath->SubType) return (FALSE); len = DevicePathNodeLength(imgpath); if (len != DevicePathNodeLength(devpath)) return (FALSE); return (memcmp(imgpath, devpath, (size_t)len) == 0); } /* * device_paths_match returns TRUE if the imgpath isn't NULL and all nodes * in imgpath and devpath match up to their respective occurrences of a * media node, FALSE otherwise. */ static BOOLEAN device_paths_match(EFI_DEVICE_PATH *imgpath, EFI_DEVICE_PATH *devpath) { if (imgpath == NULL) return (FALSE); while (!IsDevicePathEnd(imgpath) && !IsDevicePathEnd(devpath)) { if (IsDevicePathType(imgpath, MEDIA_DEVICE_PATH) && IsDevicePathType(devpath, MEDIA_DEVICE_PATH)) return (TRUE); if (!nodes_match(imgpath, devpath)) return (FALSE); imgpath = NextDevicePathNode(imgpath); devpath = NextDevicePathNode(devpath); } return (FALSE); } /* * devpath_last returns the last non-path end node in devpath. */ static EFI_DEVICE_PATH * devpath_last(EFI_DEVICE_PATH *devpath) { while (!IsDevicePathEnd(NextDevicePathNode(devpath))) devpath = NextDevicePathNode(devpath); return (devpath); } /* * load_loader attempts to load the loader image data. * * It tries each module and its respective devices, identified by mod->probe, * in order until a successful load occurs at which point it returns EFI_SUCCESS * and EFI_NOT_FOUND otherwise. * * Only devices which have preferred matching the preferred parameter are tried. */ static EFI_STATUS load_loader(const boot_module_t **modp, dev_info_t **devinfop, void **bufp, size_t *bufsize, BOOLEAN preferred) { UINTN i; dev_info_t *dev; const boot_module_t *mod; for (i = 0; i < NUM_BOOT_MODULES; i++) { mod = boot_modules[i]; for (dev = mod->devices(); dev != NULL; dev = dev->next) { if (dev->preferred != preferred) continue; if (mod->load(PATH_LOADER_EFI, dev, bufp, bufsize) == EFI_SUCCESS) { *devinfop = dev; *modp = mod; return (EFI_SUCCESS); } } } return (EFI_NOT_FOUND); } /* * try_boot only returns if it fails to load the loader. If it succeeds * it simply boots, otherwise it returns the status of last EFI call. */ static EFI_STATUS try_boot(void) { size_t bufsize, loadersize, cmdsize; void *buf, *loaderbuf; char *cmd; dev_info_t *dev; const boot_module_t *mod; EFI_HANDLE loaderhandle; EFI_LOADED_IMAGE *loaded_image; EFI_STATUS status; status = load_loader(&mod, &dev, &loaderbuf, &loadersize, TRUE); if (status != EFI_SUCCESS) { status = load_loader(&mod, &dev, &loaderbuf, &loadersize, FALSE); if (status != EFI_SUCCESS) { printf("Failed to load '%s'\n", PATH_LOADER_EFI); return (status); } } /* * Read in and parse the command line from /boot.config or /boot/config, * if present. We'll pass it the next stage via a simple ASCII * string. loader.efi has a hack for ASCII strings, so we'll use that to * keep the size down here. We only try to read the alternate file if * we get EFI_NOT_FOUND because all other errors mean that the boot_module * had troubles with the filesystem. We could return early, but we'll let * loading the actual kernel sort all that out. Since these files are * optional, we don't report errors in trying to read them. */ cmd = NULL; cmdsize = 0; status = mod->load(PATH_DOTCONFIG, dev, &buf, &bufsize); if (status == EFI_NOT_FOUND) status = mod->load(PATH_CONFIG, dev, &buf, &bufsize); if (status == EFI_SUCCESS) { cmdsize = bufsize + 1; cmd = malloc(cmdsize); if (cmd == NULL) goto errout; memcpy(cmd, buf, bufsize); cmd[bufsize] = '\0'; free(buf); buf = NULL; } if ((status = BS->LoadImage(TRUE, IH, devpath_last(dev->devpath), loaderbuf, loadersize, &loaderhandle)) != EFI_SUCCESS) { printf("Failed to load image provided by %s, size: %zu, (%lu)\n", mod->name, loadersize, EFI_ERROR_CODE(status)); goto errout; } if ((status = BS->HandleProtocol(loaderhandle, &LoadedImageGUID, (VOID**)&loaded_image)) != EFI_SUCCESS) { printf("Failed to query LoadedImage provided by %s (%lu)\n", mod->name, EFI_ERROR_CODE(status)); goto errout; } if (cmd != NULL) printf(" command args: %s\n", cmd); loaded_image->DeviceHandle = dev->devhandle; loaded_image->LoadOptionsSize = cmdsize; loaded_image->LoadOptions = cmd; DPRINTF("Starting '%s' in 5 seconds...", PATH_LOADER_EFI); DSTALL(1000000); DPRINTF("."); DSTALL(1000000); DPRINTF("."); DSTALL(1000000); DPRINTF("."); DSTALL(1000000); DPRINTF("."); DSTALL(1000000); DPRINTF(".\n"); if ((status = BS->StartImage(loaderhandle, NULL, NULL)) != EFI_SUCCESS) { printf("Failed to start image provided by %s (%lu)\n", mod->name, EFI_ERROR_CODE(status)); loaded_image->LoadOptionsSize = 0; loaded_image->LoadOptions = NULL; } errout: if (cmd != NULL) free(cmd); if (buf != NULL) free(buf); if (loaderbuf != NULL) free(loaderbuf); return (status); } /* * probe_handle determines if the passed handle represents a logical partition * if it does it uses each module in order to probe it and if successful it * returns EFI_SUCCESS. */ static EFI_STATUS probe_handle(EFI_HANDLE h, EFI_DEVICE_PATH *imgpath, BOOLEAN *preferred) { dev_info_t *devinfo; EFI_BLOCK_IO *blkio; EFI_DEVICE_PATH *devpath; EFI_STATUS status; UINTN i; /* Figure out if we're dealing with an actual partition. */ status = BS->HandleProtocol(h, &DevicePathGUID, (void **)&devpath); if (status == EFI_UNSUPPORTED) return (status); if (status != EFI_SUCCESS) { DPRINTF("\nFailed to query DevicePath (%lu)\n", EFI_ERROR_CODE(status)); return (status); } #ifdef EFI_DEBUG { CHAR16 *text = efi_devpath_name(devpath); DPRINTF("probing: %S\n", text); efi_free_devpath_name(text); } #endif status = BS->HandleProtocol(h, &BlockIoProtocolGUID, (void **)&blkio); if (status == EFI_UNSUPPORTED) return (status); if (status != EFI_SUCCESS) { DPRINTF("\nFailed to query BlockIoProtocol (%lu)\n", EFI_ERROR_CODE(status)); return (status); } if (!blkio->Media->LogicalPartition) return (EFI_UNSUPPORTED); *preferred = device_paths_match(imgpath, devpath); /* Run through each module, see if it can load this partition */ for (i = 0; i < NUM_BOOT_MODULES; i++) { devinfo = malloc(sizeof(*devinfo)); if (devinfo == NULL) { DPRINTF("\nFailed to allocate devinfo\n"); - continue; + break; } devinfo->dev = blkio; devinfo->devpath = devpath; devinfo->devhandle = h; devinfo->devdata = NULL; devinfo->preferred = *preferred; devinfo->next = NULL; status = boot_modules[i]->probe(devinfo); if (status == EFI_SUCCESS) return (EFI_SUCCESS); free(devinfo); } return (EFI_UNSUPPORTED); } /* * probe_handle_status calls probe_handle and outputs the returned status * of the call. */ static void probe_handle_status(EFI_HANDLE h, EFI_DEVICE_PATH *imgpath) { EFI_STATUS status; BOOLEAN preferred; preferred = FALSE; status = probe_handle(h, imgpath, &preferred); DPRINTF("probe: "); switch (status) { case EFI_UNSUPPORTED: printf("."); DPRINTF(" not supported\n"); break; case EFI_SUCCESS: if (preferred) { printf("%c", '*'); DPRINTF(" supported (preferred)\n"); } else { printf("%c", '+'); DPRINTF(" supported\n"); } break; default: printf("x"); DPRINTF(" error (%lu)\n", EFI_ERROR_CODE(status)); break; } DSTALL(500000); } EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE *Xsystab) { EFI_HANDLE *handles; EFI_LOADED_IMAGE *img; EFI_DEVICE_PATH *imgpath; EFI_STATUS status; EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL; SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL; UINTN i, hsize, nhandles; CHAR16 *text; UINT16 boot_current; size_t sz; UINT16 boot_order[100]; /* Basic initialization*/ ST = Xsystab; IH = Ximage; BS = ST->BootServices; RS = ST->RuntimeServices; /* Set up the console, so printf works. */ status = BS->LocateProtocol(&ConsoleControlGUID, NULL, (VOID **)&ConsoleControl); if (status == EFI_SUCCESS) (void)ConsoleControl->SetMode(ConsoleControl, EfiConsoleControlScreenText); /* * Reset the console enable the cursor. Later we'll choose a better * console size through GOP/UGA. */ conout = ST->ConOut; conout->Reset(conout, TRUE); /* Explicitly set conout to mode 0, 80x25 */ conout->SetMode(conout, 0); conout->EnableCursor(conout, TRUE); conout->ClearScreen(conout); printf("\n>> FreeBSD EFI boot block\n"); printf(" Loader path: %s\n\n", PATH_LOADER_EFI); printf(" Initializing modules:"); for (i = 0; i < NUM_BOOT_MODULES; i++) { printf(" %s", boot_modules[i]->name); if (boot_modules[i]->init != NULL) boot_modules[i]->init(); } putchar('\n'); /* Determine the devpath of our image so we can prefer it. */ status = BS->HandleProtocol(IH, &LoadedImageGUID, (VOID**)&img); imgpath = NULL; if (status == EFI_SUCCESS) { text = efi_devpath_name(img->FilePath); if (text != NULL) { printf(" Load Path: %S\n", text); efi_setenv_freebsd_wcs("Boot1Path", text); efi_free_devpath_name(text); } status = BS->HandleProtocol(img->DeviceHandle, &DevicePathGUID, (void **)&imgpath); if (status != EFI_SUCCESS) { DPRINTF("Failed to get image DevicePath (%lu)\n", EFI_ERROR_CODE(status)); } else { text = efi_devpath_name(imgpath); if (text != NULL) { printf(" Load Device: %S\n", text); efi_setenv_freebsd_wcs("Boot1Dev", text); efi_free_devpath_name(text); } } } boot_current = 0; sz = sizeof(boot_current); if (efi_global_getenv("BootCurrent", &boot_current, &sz) == EFI_SUCCESS) { printf(" BootCurrent: %04x\n", boot_current); sz = sizeof(boot_order); if (efi_global_getenv("BootOrder", &boot_order, &sz) == EFI_SUCCESS) { printf(" BootOrder:"); for (i = 0; i < sz / sizeof(boot_order[0]); i++) printf(" %04x%s", boot_order[i], boot_order[i] == boot_current ? "[*]" : ""); printf("\n"); } } #ifdef TEST_FAILURE /* * For testing failover scenarios, it's nice to be able to fail fast. * Define TEST_FAILURE to create a boot1.efi that always fails after * reporting the boot manager protocol details. */ BS->Exit(IH, EFI_OUT_OF_RESOURCES, 0, NULL); #endif /* Get all the device handles */ hsize = (UINTN)NUM_HANDLES_INIT * sizeof(EFI_HANDLE); handles = malloc(hsize); if (handles == NULL) printf("Failed to allocate %d handles\n", NUM_HANDLES_INIT); status = BS->LocateHandle(ByProtocol, &BlockIoProtocolGUID, NULL, &hsize, handles); switch (status) { case EFI_SUCCESS: break; case EFI_BUFFER_TOO_SMALL: free(handles); handles = malloc(hsize); if (handles == NULL) efi_panic(EFI_OUT_OF_RESOURCES, "Failed to allocate %d handles\n", NUM_HANDLES_INIT); status = BS->LocateHandle(ByProtocol, &BlockIoProtocolGUID, NULL, &hsize, handles); if (status != EFI_SUCCESS) efi_panic(status, "Failed to get device handles\n"); break; default: efi_panic(status, "Failed to get device handles\n"); break; } /* Scan all partitions, probing with all modules. */ nhandles = hsize / sizeof(*handles); printf(" Probing %zu block devices...", nhandles); DPRINTF("\n"); for (i = 0; i < nhandles; i++) probe_handle_status(handles[i], imgpath); printf(" done\n"); /* Status summary. */ for (i = 0; i < NUM_BOOT_MODULES; i++) { printf(" "); boot_modules[i]->status(); } try_boot(); /* If we get here, we're out of luck... */ efi_panic(EFI_LOAD_ERROR, "No bootable partitions found!"); } /* * add_device adds a device to the passed devinfo list. */ void add_device(dev_info_t **devinfop, dev_info_t *devinfo) { dev_info_t *dev; if (*devinfop == NULL) { *devinfop = devinfo; return; } for (dev = *devinfop; dev->next != NULL; dev = dev->next) ; dev->next = devinfo; } /* * OK. We totally give up. Exit back to EFI with a sensible status so * it can try the next option on the list. */ static void efi_panic(EFI_STATUS s, const char *fmt, ...) { va_list ap; printf("panic: "); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); BS->Exit(IH, s, 0, NULL); } void putchar(int c) { CHAR16 buf[2]; if (c == '\n') { buf[0] = '\r'; buf[1] = 0; ST->ConOut->OutputString(ST->ConOut, buf); } buf[0] = c; buf[1] = 0; ST->ConOut->OutputString(ST->ConOut, buf); } Index: projects/runtime-coverage-v2/stand/efi/boot1/boot_module.h =================================================================== --- projects/runtime-coverage-v2/stand/efi/boot1/boot_module.h (revision 347075) +++ projects/runtime-coverage-v2/stand/efi/boot1/boot_module.h (revision 347076) @@ -1,110 +1,110 @@ /*- * Copyright (c) 2015 Eric McCorkle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOT_MODULE_H_ #define _BOOT_MODULE_H_ #include #include #include #include #ifdef EFI_DEBUG #define DPRINTF(fmt, args...) printf(fmt, ##args) #define DSTALL(d) BS->Stall(d) #else #define DPRINTF(fmt, ...) {} #define DSTALL(d) {} #endif /* EFI device info */ typedef struct dev_info { EFI_BLOCK_IO *dev; EFI_DEVICE_PATH *devpath; - EFI_HANDLE *devhandle; + EFI_HANDLE devhandle; void *devdata; uint64_t partoff; int preferred; struct dev_info *next; } dev_info_t; /* * A boot loader module. * * This is a standard interface for filesystem modules in the EFI system. */ typedef struct boot_module_t { const char *name; /* init is the optional initialiser for the module. */ void (*init)(void); /* * probe checks to see if the module can handle dev. * * Return codes: * EFI_SUCCESS = The module can handle the device. * EFI_NOT_FOUND = The module can not handle the device. * Other = The module encountered an error. */ EFI_STATUS (*probe)(dev_info_t* dev); /* * load should select the best out of a set of devices that probe * indicated were loadable and load the specified file. * * Return codes: * EFI_SUCCESS = The module can handle the device. * EFI_NOT_FOUND = The module can not handle the device. * Other = The module encountered an error. */ EFI_STATUS (*load)(const char *filepath, dev_info_t *devinfo, void **buf, size_t *bufsize); /* status outputs information about the probed devices. */ void (*status)(void); /* valid devices as found by probe. */ dev_info_t *(*devices)(void); } boot_module_t; /* Standard boot modules. */ #ifdef EFI_UFS_BOOT extern const boot_module_t ufs_module; #endif #ifdef EFI_ZFS_BOOT extern const boot_module_t zfs_module; #endif /* Functions available to modules. */ extern void add_device(dev_info_t **devinfop, dev_info_t *devinfo); extern int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap); #endif Index: projects/runtime-coverage-v2/stand/efi/loader/main.c =================================================================== --- projects/runtime-coverage-v2/stand/efi/loader/main.c (revision 347075) +++ projects/runtime-coverage-v2/stand/efi/loader/main.c (revision 347076) @@ -1,1555 +1,1541 @@ /*- * Copyright (c) 2008-2010 Rui Paulo * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Copyright (c) 2016-2019 Netflix, Inc. written by M. Warner Losh * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "efizfs.h" #include "loader_efi.h" struct arch_switch archsw; /* MI/MD interface boundary */ EFI_GUID acpi = ACPI_TABLE_GUID; EFI_GUID acpi20 = ACPI_20_TABLE_GUID; EFI_GUID devid = DEVICE_PATH_PROTOCOL; EFI_GUID imgid = LOADED_IMAGE_PROTOCOL; EFI_GUID mps = MPS_TABLE_GUID; EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL; EFI_GUID smbios = SMBIOS_TABLE_GUID; EFI_GUID smbios3 = SMBIOS3_TABLE_GUID; EFI_GUID dxe = DXE_SERVICES_TABLE_GUID; EFI_GUID hoblist = HOB_LIST_TABLE_GUID; EFI_GUID lzmadecomp = LZMA_DECOMPRESSION_GUID; EFI_GUID mpcore = ARM_MP_CORE_INFO_TABLE_GUID; EFI_GUID esrt = ESRT_TABLE_GUID; EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID; EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID; EFI_GUID fdtdtb = FDT_TABLE_GUID; EFI_GUID inputid = SIMPLE_TEXT_INPUT_PROTOCOL; /* * Number of seconds to wait for a keystroke before exiting with failure * in the event no currdev is found. -2 means always break, -1 means * never break, 0 means poll once and then reboot, > 0 means wait for * that many seconds. "fail_timeout" can be set in the environment as * well. */ static int fail_timeout = 5; /* * Current boot variable */ UINT16 boot_current; /* * Image that we booted from. */ EFI_LOADED_IMAGE *boot_img; static bool has_keyboard(void) { EFI_STATUS status; EFI_DEVICE_PATH *path; EFI_HANDLE *hin, *hin_end, *walker; UINTN sz; bool retval = false; /* * Find all the handles that support the SIMPLE_TEXT_INPUT_PROTOCOL and * do the typical dance to get the right sized buffer. */ sz = 0; hin = NULL; status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, 0); if (status == EFI_BUFFER_TOO_SMALL) { hin = (EFI_HANDLE *)malloc(sz); status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, hin); if (EFI_ERROR(status)) free(hin); } if (EFI_ERROR(status)) return retval; /* * Look at each of the handles. If it supports the device path protocol, * use it to get the device path for this handle. Then see if that * device path matches either the USB device path for keyboards or the * legacy device path for keyboards. */ hin_end = &hin[sz / sizeof(*hin)]; for (walker = hin; walker < hin_end; walker++) { status = BS->HandleProtocol(*walker, &devid, (VOID **)&path); if (EFI_ERROR(status)) continue; while (!IsDevicePathEnd(path)) { /* * Check for the ACPI keyboard node. All PNP3xx nodes * are keyboards of different flavors. Note: It is * unclear of there's always a keyboard node when * there's a keyboard controller, or if there's only one * when a keyboard is detected at boot. */ if (DevicePathType(path) == ACPI_DEVICE_PATH && (DevicePathSubType(path) == ACPI_DP || DevicePathSubType(path) == ACPI_EXTENDED_DP)) { ACPI_HID_DEVICE_PATH *acpi; acpi = (ACPI_HID_DEVICE_PATH *)(void *)path; if ((EISA_ID_TO_NUM(acpi->HID) & 0xff00) == 0x300 && (acpi->HID & 0xffff) == PNP_EISA_ID_CONST) { retval = true; goto out; } /* * Check for USB keyboard node, if present. Unlike a * PS/2 keyboard, these definitely only appear when * connected to the system. */ } else if (DevicePathType(path) == MESSAGING_DEVICE_PATH && DevicePathSubType(path) == MSG_USB_CLASS_DP) { USB_CLASS_DEVICE_PATH *usb; usb = (USB_CLASS_DEVICE_PATH *)(void *)path; if (usb->DeviceClass == 3 && /* HID */ usb->DeviceSubClass == 1 && /* Boot devices */ usb->DeviceProtocol == 1) { /* Boot keyboards */ retval = true; goto out; } } path = NextDevicePathNode(path); } } out: free(hin); return retval; } static void set_currdev(const char *devname) { env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, devname, env_noset, env_nounset); } static void set_currdev_devdesc(struct devdesc *currdev) { const char *devname; devname = efi_fmtdev(currdev); printf("Setting currdev to %s\n", devname); set_currdev(devname); } static void set_currdev_devsw(struct devsw *dev, int unit) { struct devdesc currdev; currdev.d_dev = dev; currdev.d_unit = unit; set_currdev_devdesc(&currdev); } static void set_currdev_pdinfo(pdinfo_t *dp) { /* * Disks are special: they have partitions. if the parent * pointer is non-null, we're a partition not a full disk * and we need to adjust currdev appropriately. */ if (dp->pd_devsw->dv_type == DEVT_DISK) { struct disk_devdesc currdev; currdev.dd.d_dev = dp->pd_devsw; if (dp->pd_parent == NULL) { currdev.dd.d_unit = dp->pd_unit; currdev.d_slice = D_SLICENONE; currdev.d_partition = D_PARTNONE; } else { currdev.dd.d_unit = dp->pd_parent->pd_unit; currdev.d_slice = dp->pd_unit; currdev.d_partition = D_PARTISGPT; /* XXX Assumes GPT */ } set_currdev_devdesc((struct devdesc *)&currdev); } else { set_currdev_devsw(dp->pd_devsw, dp->pd_unit); } } static bool sanity_check_currdev(void) { struct stat st; return (stat("/boot/defaults/loader.conf", &st) == 0 || stat("/boot/kernel/kernel", &st) == 0); } #ifdef EFI_ZFS_BOOT static bool probe_zfs_currdev(uint64_t guid) { char *devname; struct zfs_devdesc currdev; currdev.dd.d_dev = &zfs_dev; currdev.dd.d_unit = 0; currdev.pool_guid = guid; currdev.root_guid = 0; set_currdev_devdesc((struct devdesc *)&currdev); devname = efi_fmtdev(&currdev); init_zfs_bootenv(devname); return (sanity_check_currdev()); } #endif static bool try_as_currdev(pdinfo_t *hd, pdinfo_t *pp) { uint64_t guid; #ifdef EFI_ZFS_BOOT /* * If there's a zpool on this device, try it as a ZFS * filesystem, which has somewhat different setup than all * other types of fs due to imperfect loader integration. * This all stems from ZFS being both a device (zpool) and * a filesystem, plus the boot env feature. */ if (efizfs_get_guid_by_handle(pp->pd_handle, &guid)) return (probe_zfs_currdev(guid)); #endif /* * All other filesystems just need the pdinfo * initialized in the standard way. */ set_currdev_pdinfo(pp); return (sanity_check_currdev()); } /* * Sometimes we get filenames that are all upper case * and/or have backslashes in them. Filter all this out * if it looks like we need to do so. */ static void fix_dosisms(char *p) { while (*p) { if (isupper(*p)) *p = tolower(*p); else if (*p == '\\') *p = '/'; p++; } } #define SIZE(dp, edp) (size_t)((intptr_t)(void *)edp - (intptr_t)(void *)dp) enum { BOOT_INFO_OK = 0, BAD_CHOICE = 1, NOT_SPECIFIC = 2 }; static int match_boot_info(char *boot_info, size_t bisz) { uint32_t attr; uint16_t fplen; size_t len; char *walker, *ep; EFI_DEVICE_PATH *dp, *edp, *first_dp, *last_dp; pdinfo_t *pp; CHAR16 *descr; char *kernel = NULL; FILEPATH_DEVICE_PATH *fp; struct stat st; CHAR16 *text; /* * FreeBSD encodes it's boot loading path into the boot loader * BootXXXX variable. We look for the last one in the path * and use that to load the kernel. However, if we only fine * one DEVICE_PATH, then there's nothing specific and we should * fall back. * * In an ideal world, we'd look at the image handle we were * passed, match up with the loader we are and then return the * next one in the path. This would be most flexible and cover * many chain booting scenarios where you need to use this * boot loader to get to the next boot loader. However, that * doesn't work. We rarely have the path to the image booted * (just the device) so we can't count on that. So, we do the * enxt best thing, we look through the device path(s) passed * in the BootXXXX varaible. If there's only one, we return * NOT_SPECIFIC. Otherwise, we look at the last one and try to * load that. If we can, we return BOOT_INFO_OK. Otherwise we * return BAD_CHOICE for the caller to sort out. */ if (bisz < sizeof(attr) + sizeof(fplen) + sizeof(CHAR16)) return NOT_SPECIFIC; walker = boot_info; ep = walker + bisz; memcpy(&attr, walker, sizeof(attr)); walker += sizeof(attr); memcpy(&fplen, walker, sizeof(fplen)); walker += sizeof(fplen); descr = (CHAR16 *)(intptr_t)walker; len = ucs2len(descr); walker += (len + 1) * sizeof(CHAR16); last_dp = first_dp = dp = (EFI_DEVICE_PATH *)walker; edp = (EFI_DEVICE_PATH *)(walker + fplen); if ((char *)edp > ep) return NOT_SPECIFIC; while (dp < edp && SIZE(dp, edp) > sizeof(EFI_DEVICE_PATH)) { text = efi_devpath_name(dp); if (text != NULL) { printf(" BootInfo Path: %S\n", text); efi_free_devpath_name(text); } last_dp = dp; dp = (EFI_DEVICE_PATH *)((char *)dp + efi_devpath_length(dp)); } /* * If there's only one item in the list, then nothing was * specified. Or if the last path doesn't have a media * path in it. Those show up as various VenHw() nodes * which are basically opaque to us. Don't count those * as something specifc. */ if (last_dp == first_dp) { printf("Ignoring Boot%04x: Only one DP found\n", boot_current); return NOT_SPECIFIC; } if (efi_devpath_to_media_path(last_dp) == NULL) { printf("Ignoring Boot%04x: No Media Path\n", boot_current); return NOT_SPECIFIC; } /* * OK. At this point we either have a good path or a bad one. * Let's check. */ pp = efiblk_get_pdinfo_by_device_path(last_dp); if (pp == NULL) { printf("Ignoring Boot%04x: Device Path not found\n", boot_current); return BAD_CHOICE; } set_currdev_pdinfo(pp); if (!sanity_check_currdev()) { printf("Ignoring Boot%04x: sanity check failed\n", boot_current); return BAD_CHOICE; } /* * OK. We've found a device that matches, next we need to check the last * component of the path. If it's a file, then we set the default kernel * to that. Otherwise, just use this as the default root. * * Reminder: we're running very early, before we've parsed the defaults * file, so we may need to have a hack override. */ dp = efi_devpath_last_node(last_dp); if (DevicePathType(dp) != MEDIA_DEVICE_PATH || DevicePathSubType(dp) != MEDIA_FILEPATH_DP) { printf("Using Boot%04x for root partition\n", boot_current); return (BOOT_INFO_OK); /* use currdir, default kernel */ } fp = (FILEPATH_DEVICE_PATH *)dp; ucs2_to_utf8(fp->PathName, &kernel); if (kernel == NULL) { printf("Not using Boot%04x: can't decode kernel\n", boot_current); return (BAD_CHOICE); } if (*kernel == '\\' || isupper(*kernel)) fix_dosisms(kernel); if (stat(kernel, &st) != 0) { free(kernel); printf("Not using Boot%04x: can't find %s\n", boot_current, kernel); return (BAD_CHOICE); } setenv("kernel", kernel, 1); free(kernel); text = efi_devpath_name(last_dp); if (text) { printf("Using Boot%04x %S + %s\n", boot_current, text, kernel); efi_free_devpath_name(text); } return (BOOT_INFO_OK); } /* * Look at the passed-in boot_info, if any. If we find it then we need * to see if we can find ourselves in the boot chain. If we can, and * there's another specified thing to boot next, assume that the file * is loaded from / and use that for the root filesystem. If can't * find the specified thing, we must fail the boot. If we're last on * the list, then we fallback to looking for the first available / * candidate (ZFS, if there's a bootable zpool, otherwise a UFS * partition that has either /boot/defaults/loader.conf on it or * /boot/kernel/kernel (the default kernel) that we can use. * * We always fail if we can't find the right thing. However, as * a concession to buggy UEFI implementations, like u-boot, if * we have determined that the host is violating the UEFI boot * manager protocol, we'll signal the rest of the program that * a drop to the OK boot loader prompt is possible. */ static int find_currdev(bool do_bootmgr, bool is_last, char *boot_info, size_t boot_info_sz) { pdinfo_t *dp, *pp; EFI_DEVICE_PATH *devpath, *copy; EFI_HANDLE h; CHAR16 *text; struct devsw *dev; int unit; uint64_t extra; int rv; char *rootdev; /* * First choice: if rootdev is already set, use that, even if * it's wrong. */ rootdev = getenv("rootdev"); if (rootdev != NULL) { printf(" Setting currdev to configured rootdev %s\n", rootdev); set_currdev(rootdev); return (0); } /* * Second choice: If uefi_rootdev is set, translate that UEFI device * path to the loader's internal name and use that. */ do { rootdev = getenv("uefi_rootdev"); if (rootdev == NULL) break; devpath = efi_name_to_devpath(rootdev); if (devpath == NULL) break; dp = efiblk_get_pdinfo_by_device_path(devpath); efi_devpath_free(devpath); if (dp == NULL) break; printf(" Setting currdev to UEFI path %s\n", rootdev); set_currdev_pdinfo(dp); return (0); } while (0); /* * Third choice: If we can find out image boot_info, and there's * a follow-on boot image in that boot_info, use that. In this * case root will be the partition specified in that image and * we'll load the kernel specified by the file path. Should there * not be a filepath, we use the default. This filepath overrides * loader.conf. */ if (do_bootmgr) { rv = match_boot_info(boot_info, boot_info_sz); switch (rv) { case BOOT_INFO_OK: /* We found it */ return (0); case BAD_CHOICE: /* specified file not found -> error */ /* XXX do we want to have an escape hatch for last in boot order? */ return (ENOENT); } /* Nothing specified, try normal match */ } #ifdef EFI_ZFS_BOOT /* * Did efi_zfs_probe() detect the boot pool? If so, use the zpool * it found, if it's sane. ZFS is the only thing that looks for * disks and pools to boot. This may change in the future, however, * if we allow specifying which pool to boot from via UEFI variables * rather than the bootenv stuff that FreeBSD uses today. */ if (pool_guid != 0) { printf("Trying ZFS pool\n"); if (probe_zfs_currdev(pool_guid)) return (0); } #endif /* EFI_ZFS_BOOT */ /* * Try to find the block device by its handle based on the * image we're booting. If we can't find a sane partition, * search all the other partitions of the disk. We do not * search other disks because it's a violation of the UEFI * boot protocol to do so. We fail and let UEFI go on to * the next candidate. */ dp = efiblk_get_pdinfo_by_handle(boot_img->DeviceHandle); if (dp != NULL) { text = efi_devpath_name(dp->pd_devpath); if (text != NULL) { printf("Trying ESP: %S\n", text); efi_free_devpath_name(text); } set_currdev_pdinfo(dp); if (sanity_check_currdev()) return (0); if (dp->pd_parent != NULL) { pdinfo_t *espdp = dp; dp = dp->pd_parent; STAILQ_FOREACH(pp, &dp->pd_part, pd_link) { /* Already tried the ESP */ if (espdp == pp) continue; /* * Roll up the ZFS special case * for those partitions that have * zpools on them. */ text = efi_devpath_name(pp->pd_devpath); if (text != NULL) { printf("Trying: %S\n", text); efi_free_devpath_name(text); } if (try_as_currdev(dp, pp)) return (0); } } } /* * Try the device handle from our loaded image first. If that * fails, use the device path from the loaded image and see if * any of the nodes in that path match one of the enumerated * handles. Currently, this handle list is only for netboot. */ if (efi_handle_lookup(boot_img->DeviceHandle, &dev, &unit, &extra) == 0) { set_currdev_devsw(dev, unit); if (sanity_check_currdev()) return (0); } copy = NULL; devpath = efi_lookup_image_devpath(IH); while (devpath != NULL) { h = efi_devpath_handle(devpath); if (h == NULL) break; free(copy); copy = NULL; if (efi_handle_lookup(h, &dev, &unit, &extra) == 0) { set_currdev_devsw(dev, unit); if (sanity_check_currdev()) return (0); } devpath = efi_lookup_devpath(h); if (devpath != NULL) { copy = efi_devpath_trim(devpath); devpath = copy; } } free(copy); return (ENOENT); } static bool interactive_interrupt(const char *msg) { time_t now, then, last; last = 0; now = then = getsecs(); printf("%s\n", msg); if (fail_timeout == -2) /* Always break to OK */ return (true); if (fail_timeout == -1) /* Never break to OK */ return (false); do { if (last != now) { printf("press any key to interrupt reboot in %d seconds\r", fail_timeout - (int)(now - then)); last = now; } /* XXX no pause or timeout wait for char */ if (ischar()) return (true); now = getsecs(); } while (now - then < fail_timeout); return (false); } static int parse_args(int argc, CHAR16 *argv[]) { int i, j, howto; bool vargood; char var[128]; /* * Parse the args to set the console settings, etc * boot1.efi passes these in, if it can read /boot.config or /boot/config * or iPXE may be setup to pass these in. Or the optional argument in the * boot environment was used to pass these arguments in (in which case * neither /boot.config nor /boot/config are consulted). * * Loop through the args, and for each one that contains an '=' that is * not the first character, add it to the environment. This allows * loader and kernel env vars to be passed on the command line. Convert * args from UCS-2 to ASCII (16 to 8 bit) as they are copied (though this * method is flawed for non-ASCII characters). */ howto = 0; for (i = 1; i < argc; i++) { cpy16to8(argv[i], var, sizeof(var)); howto |= boot_parse_arg(var); } return (howto); } static void setenv_int(const char *key, int val) { char buf[20]; snprintf(buf, sizeof(buf), "%d", val); setenv(key, buf, 1); } /* * Parse ConOut (the list of consoles active) and see if we can find a * serial port and/or a video port. It would be nice to also walk the * ACPI name space to map the UID for the serial port to a port. The * latter is especially hard. */ static int parse_uefi_con_out(void) { int how, rv; int vid_seen = 0, com_seen = 0, seen = 0; size_t sz; char buf[4096], *ep; EFI_DEVICE_PATH *node; ACPI_HID_DEVICE_PATH *acpi; UART_DEVICE_PATH *uart; bool pci_pending; how = 0; sz = sizeof(buf); rv = efi_global_getenv("ConOut", buf, &sz); if (rv != EFI_SUCCESS) goto out; ep = buf + sz; node = (EFI_DEVICE_PATH *)buf; while ((char *)node < ep) { pci_pending = false; if (DevicePathType(node) == ACPI_DEVICE_PATH && DevicePathSubType(node) == ACPI_DP) { /* Check for Serial node */ acpi = (void *)node; if (EISA_ID_TO_NUM(acpi->HID) == 0x501) { setenv_int("efi_8250_uid", acpi->UID); com_seen = ++seen; } } else if (DevicePathType(node) == MESSAGING_DEVICE_PATH && DevicePathSubType(node) == MSG_UART_DP) { uart = (void *)node; setenv_int("efi_com_speed", uart->BaudRate); } else if (DevicePathType(node) == ACPI_DEVICE_PATH && DevicePathSubType(node) == ACPI_ADR_DP) { /* Check for AcpiAdr() Node for video */ vid_seen = ++seen; } else if (DevicePathType(node) == HARDWARE_DEVICE_PATH && DevicePathSubType(node) == HW_PCI_DP) { /* * Note, vmware fusion has a funky console device * PciRoot(0x0)/Pci(0xf,0x0) * which we can only detect at the end since we also * have to cope with: * PciRoot(0x0)/Pci(0x1f,0x0)/Serial(0x1) * so only match it if it's last. */ pci_pending = true; } node = NextDevicePathNode(node); /* Skip the end node */ } if (pci_pending && vid_seen == 0) vid_seen = ++seen; /* * Truth table for RB_MULTIPLE | RB_SERIAL * Value Result * 0 Use only video console * RB_SERIAL Use only serial console * RB_MULTIPLE Use both video and serial console * (but video is primary so gets rc messages) * both Use both video and serial console * (but serial is primary so gets rc messages) * * Try to honor this as best we can. If only one of serial / video * found, then use that. Otherwise, use the first one we found. * This also implies if we found nothing, default to video. */ how = 0; if (vid_seen && com_seen) { how |= RB_MULTIPLE; if (com_seen < vid_seen) how |= RB_SERIAL; } else if (com_seen) how |= RB_SERIAL; out: return (how); } void parse_loader_efi_config(EFI_HANDLE h, const char *env_fn) { pdinfo_t *dp; struct stat st; int fd = -1; char *env = NULL; dp = efiblk_get_pdinfo_by_handle(h); if (dp == NULL) return; set_currdev_pdinfo(dp); if (stat(env_fn, &st) != 0) return; fd = open(env_fn, O_RDONLY); if (fd == -1) return; env = malloc(st.st_size + 1); if (env == NULL) goto out; if (read(fd, env, st.st_size) != st.st_size) goto out; env[st.st_size] = '\0'; boot_parse_cmdline(env); out: free(env); close(fd); } static void read_loader_env(const char *name, char *def_fn, bool once) { UINTN len; char *fn, *freeme = NULL; len = 0; fn = def_fn; if (efi_freebsd_getenv(name, NULL, &len) == EFI_BUFFER_TOO_SMALL) { freeme = fn = malloc(len + 1); if (fn != NULL) { if (efi_freebsd_getenv(name, fn, &len) != EFI_SUCCESS) { free(fn); fn = NULL; printf( "Can't fetch FreeBSD::%s we know is there\n", name); } else { /* * if tagged as 'once' delete the env variable so we * only use it once. */ if (once) efi_freebsd_delenv(name); /* * We malloced 1 more than len above, then redid the call. * so now we have room at the end of the string to NUL terminate * it here, even if the typical idium would have '- 1' here to * not overflow. len should be the same on return both times. */ fn[len] = '\0'; } } else { printf( "Can't allocate %d bytes to fetch FreeBSD::%s env var\n", len, name); } } if (fn) { printf(" Reading loader env vars from %s\n", fn); parse_loader_efi_config(boot_img->DeviceHandle, fn); } } EFI_STATUS main(int argc, CHAR16 *argv[]) { EFI_GUID *guid; int howto, i, uhowto; UINTN k; bool has_kbd, is_last; char *s; EFI_DEVICE_PATH *imgpath; CHAR16 *text; EFI_STATUS rv; size_t sz, bosz = 0, bisz = 0; UINT16 boot_order[100]; char boot_info[4096]; char buf[32]; bool uefi_boot_mgr; archsw.arch_autoload = efi_autoload; archsw.arch_getdev = efi_getdev; archsw.arch_copyin = efi_copyin; archsw.arch_copyout = efi_copyout; archsw.arch_readin = efi_readin; archsw.arch_zfs_probe = efi_zfs_probe; /* Get our loaded image protocol interface structure. */ BS->HandleProtocol(IH, &imgid, (VOID**)&boot_img); /* * Chicken-and-egg problem; we want to have console output early, but * some console attributes may depend on reading from eg. the boot * device, which we can't do yet. We can use printf() etc. once this is * done. So, we set it to the efi console, then call console init. This * gets us printf early, but also primes the pump for all future console * changes to take effect, regardless of where they come from. */ setenv("console", "efi", 1); cons_probe(); /* Init the time source */ efi_time_init(); /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); /* * Scan the BLOCK IO MEDIA handles then * march through the device switch probing for things. */ i = efipart_inithandles(); if (i != 0 && i != ENOENT) { printf("efipart_inithandles failed with ERRNO %d, expect " "failures\n", i); } for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); /* * Detect console settings two different ways: one via the command * args (eg -h) or via the UEFI ConOut variable. */ has_kbd = has_keyboard(); howto = parse_args(argc, argv); if (!has_kbd && (howto & RB_PROBE)) howto |= RB_SERIAL | RB_MULTIPLE; howto &= ~RB_PROBE; uhowto = parse_uefi_con_out(); /* - * Scan the BLOCK IO MEDIA handles then - * march through the device switch probing for things. - */ - i = efipart_inithandles(); - if (i != 0 && i != ENOENT) { - printf("efipart_inithandles failed with ERRNO %d, expect " - "failures\n", i); - } - - for (i = 0; devsw[i] != NULL; i++) - if (devsw[i]->dv_init != NULL) - (devsw[i]->dv_init)(); - - /* * Read additional environment variables from the boot device's * "LoaderEnv" file. Any boot loader environment variable may be set * there, which are subtly different than loader.conf variables. Only * the 'simple' ones may be set so things like foo_load="YES" won't work * for two reasons. First, the parser is simplistic and doesn't grok * quotes. Second, because the variables that cause an action to happen * are parsed by the lua, 4th or whatever code that's not yet * loaded. This is relative to the root directory when loader.efi is * loaded off the UFS root drive (when chain booted), or from the ESP * when directly loaded by the BIOS. * * We also read in NextLoaderEnv if it was specified. This allows next boot * functionality to be implemented and to override anything in LoaderEnv. */ read_loader_env("LoaderEnv", "/efi/freebsd/loader.env", false); read_loader_env("NextLoaderEnv", NULL, true); /* * We now have two notions of console. howto should be viewed as * overrides. If console is already set, don't set it again. */ #define VIDEO_ONLY 0 #define SERIAL_ONLY RB_SERIAL #define VID_SER_BOTH RB_MULTIPLE #define SER_VID_BOTH (RB_SERIAL | RB_MULTIPLE) #define CON_MASK (RB_SERIAL | RB_MULTIPLE) if (strcmp(getenv("console"), "efi") == 0) { if ((howto & CON_MASK) == 0) { /* No override, uhowto is controlling and efi cons is perfect */ howto = howto | (uhowto & CON_MASK); } else if ((howto & CON_MASK) == (uhowto & CON_MASK)) { /* override matches what UEFI told us, efi console is perfect */ } else if ((uhowto & (CON_MASK)) != 0) { /* * We detected a serial console on ConOut. All possible * overrides include serial. We can't really override what efi * gives us, so we use it knowing it's the best choice. */ /* Do nothing */ } else { /* * We detected some kind of serial in the override, but ConOut * has no serial, so we have to sort out which case it really is. */ switch (howto & CON_MASK) { case SERIAL_ONLY: setenv("console", "comconsole", 1); break; case VID_SER_BOTH: setenv("console", "efi comconsole", 1); break; case SER_VID_BOTH: setenv("console", "comconsole efi", 1); break; /* case VIDEO_ONLY can't happen -- it's the first if above */ } } } /* * howto is set now how we want to export the flags to the kernel, so * set the env based on it. */ boot_howto_to_env(howto); if (efi_copy_init()) { printf("failed to allocate staging area\n"); return (EFI_BUFFER_TOO_SMALL); } if ((s = getenv("fail_timeout")) != NULL) fail_timeout = strtol(s, NULL, 10); printf("%s\n", bootprog_info); printf(" Command line arguments:"); for (i = 0; i < argc; i++) printf(" %S", argv[i]); printf("\n"); printf(" EFI version: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff); printf(" EFI Firmware: %S (rev %d.%02d)\n", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff); printf(" Console: %s (%#x)\n", getenv("console"), howto); /* Determine the devpath of our image so we can prefer it. */ text = efi_devpath_name(boot_img->FilePath); if (text != NULL) { printf(" Load Path: %S\n", text); efi_setenv_freebsd_wcs("LoaderPath", text); efi_free_devpath_name(text); } rv = BS->HandleProtocol(boot_img->DeviceHandle, &devid, (void **)&imgpath); if (rv == EFI_SUCCESS) { text = efi_devpath_name(imgpath); if (text != NULL) { printf(" Load Device: %S\n", text); efi_setenv_freebsd_wcs("LoaderDev", text); efi_free_devpath_name(text); } } if (getenv("uefi_ignore_boot_mgr") != NULL) { printf(" Ignoring UEFI boot manager\n"); uefi_boot_mgr = false; } else { uefi_boot_mgr = true; boot_current = 0; sz = sizeof(boot_current); rv = efi_global_getenv("BootCurrent", &boot_current, &sz); if (rv == EFI_SUCCESS) printf(" BootCurrent: %04x\n", boot_current); else { boot_current = 0xffff; uefi_boot_mgr = false; } sz = sizeof(boot_order); rv = efi_global_getenv("BootOrder", &boot_order, &sz); if (rv == EFI_SUCCESS) { printf(" BootOrder:"); for (i = 0; i < sz / sizeof(boot_order[0]); i++) printf(" %04x%s", boot_order[i], boot_order[i] == boot_current ? "[*]" : ""); printf("\n"); is_last = boot_order[(sz / sizeof(boot_order[0])) - 1] == boot_current; bosz = sz; } else if (uefi_boot_mgr) { /* * u-boot doesn't set BootOrder, but otherwise participates in the * boot manager protocol. So we fake it here and don't consider it * a failure. */ bosz = sizeof(boot_order[0]); boot_order[0] = boot_current; is_last = true; } } /* * Next, find the boot info structure the UEFI boot manager is * supposed to setup. We need this so we can walk through it to * find where we are in the booting process and what to try to * boot next. */ if (uefi_boot_mgr) { snprintf(buf, sizeof(buf), "Boot%04X", boot_current); sz = sizeof(boot_info); rv = efi_global_getenv(buf, &boot_info, &sz); if (rv == EFI_SUCCESS) bisz = sz; else uefi_boot_mgr = false; } /* * Disable the watchdog timer. By default the boot manager sets * the timer to 5 minutes before invoking a boot option. If we * want to return to the boot manager, we have to disable the * watchdog timer and since we're an interactive program, we don't * want to wait until the user types "quit". The timer may have * fired by then. We don't care if this fails. It does not prevent * normal functioning in any way... */ BS->SetWatchdogTimer(0, 0, 0, NULL); /* * Initialize the trusted/forbidden certificates from UEFI. * They will be later used to verify the manifest(s), * which should contain hashes of verified files. * This needs to be initialized before any configuration files * are loaded. */ #ifdef EFI_SECUREBOOT ve_efi_init(); #endif /* * Try and find a good currdev based on the image that was booted. * It might be desirable here to have a short pause to allow falling * through to the boot loader instead of returning instantly to follow * the boot protocol and also allow an escape hatch for users wishing * to try something different. */ if (find_currdev(uefi_boot_mgr, is_last, boot_info, bisz) != 0) if (uefi_boot_mgr && !interactive_interrupt("Failed to find bootable partition")) return (EFI_NOT_FOUND); efi_init_environment(); #if !defined(__arm__) for (k = 0; k < ST->NumberOfTableEntries; k++) { guid = &ST->ConfigurationTable[k].VendorGuid; if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) { char buf[40]; snprintf(buf, sizeof(buf), "%p", ST->ConfigurationTable[k].VendorTable); setenv("hint.smbios.0.mem", buf, 1); smbios_detect(ST->ConfigurationTable[k].VendorTable); break; } } #endif interact(); /* doesn't return */ return (EFI_SUCCESS); /* keep compiler happy */ } COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff); static int command_poweroff(int argc __unused, char *argv[] __unused) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); RS->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL); /* NOTREACHED */ return (CMD_ERROR); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 0, NULL); /* NOTREACHED */ return (CMD_ERROR); } COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int command_quit(int argc, char *argv[]) { exit(0); return (CMD_OK); } COMMAND_SET(memmap, "memmap", "print memory map", command_memmap); static int command_memmap(int argc __unused, char *argv[] __unused) { UINTN sz; EFI_MEMORY_DESCRIPTOR *map, *p; UINTN key, dsz; UINT32 dver; EFI_STATUS status; int i, ndesc; char line[80]; sz = 0; status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver); if (status != EFI_BUFFER_TOO_SMALL) { printf("Can't determine memory map size\n"); return (CMD_ERROR); } map = malloc(sz); status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); if (EFI_ERROR(status)) { printf("Can't read memory map\n"); return (CMD_ERROR); } ndesc = sz / dsz; snprintf(line, sizeof(line), "%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); pager_open(); if (pager_output(line)) { pager_close(); return (CMD_OK); } for (i = 0, p = map; i < ndesc; i++, p = NextMemoryDescriptor(p, dsz)) { snprintf(line, sizeof(line), "%23s %012jx %012jx %08jx ", efi_memory_type(p->Type), (uintmax_t)p->PhysicalStart, (uintmax_t)p->VirtualStart, (uintmax_t)p->NumberOfPages); if (pager_output(line)) break; if (p->Attribute & EFI_MEMORY_UC) printf("UC "); if (p->Attribute & EFI_MEMORY_WC) printf("WC "); if (p->Attribute & EFI_MEMORY_WT) printf("WT "); if (p->Attribute & EFI_MEMORY_WB) printf("WB "); if (p->Attribute & EFI_MEMORY_UCE) printf("UCE "); if (p->Attribute & EFI_MEMORY_WP) printf("WP "); if (p->Attribute & EFI_MEMORY_RP) printf("RP "); if (p->Attribute & EFI_MEMORY_XP) printf("XP "); if (p->Attribute & EFI_MEMORY_NV) printf("NV "); if (p->Attribute & EFI_MEMORY_MORE_RELIABLE) printf("MR "); if (p->Attribute & EFI_MEMORY_RO) printf("RO "); if (pager_output("\n")) break; } pager_close(); return (CMD_OK); } COMMAND_SET(configuration, "configuration", "print configuration tables", command_configuration); static int command_configuration(int argc, char *argv[]) { UINTN i; char *name; printf("NumberOfTableEntries=%lu\n", (unsigned long)ST->NumberOfTableEntries); for (i = 0; i < ST->NumberOfTableEntries; i++) { EFI_GUID *guid; printf(" "); guid = &ST->ConfigurationTable[i].VendorGuid; if (efi_guid_to_name(guid, &name) == true) { printf(name); free(name); } else { printf("Error while translating UUID to name"); } printf(" at %p\n", ST->ConfigurationTable[i].VendorTable); } return (CMD_OK); } COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode); static int command_mode(int argc, char *argv[]) { UINTN cols, rows; unsigned int mode; int i; char *cp; char rowenv[8]; EFI_STATUS status; SIMPLE_TEXT_OUTPUT_INTERFACE *conout; extern void HO(void); conout = ST->ConOut; if (argc > 1) { mode = strtol(argv[1], &cp, 0); if (cp[0] != '\0') { printf("Invalid mode\n"); return (CMD_ERROR); } status = conout->QueryMode(conout, mode, &cols, &rows); if (EFI_ERROR(status)) { printf("invalid mode %d\n", mode); return (CMD_ERROR); } status = conout->SetMode(conout, mode); if (EFI_ERROR(status)) { printf("couldn't set mode %d\n", mode); return (CMD_ERROR); } sprintf(rowenv, "%u", (unsigned)rows); setenv("LINES", rowenv, 1); HO(); /* set cursor */ return (CMD_OK); } printf("Current mode: %d\n", conout->Mode->Mode); for (i = 0; i <= conout->Mode->MaxMode; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) continue; printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols, (unsigned)rows); } if (i != 0) printf("Select a mode with the command \"mode \"\n"); return (CMD_OK); } COMMAND_SET(lsefi, "lsefi", "list EFI handles", command_lsefi); static int command_lsefi(int argc __unused, char *argv[] __unused) { char *name; EFI_HANDLE *buffer = NULL; EFI_HANDLE handle; UINTN bufsz = 0, i, j; EFI_STATUS status; int ret = 0; status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer); if (status != EFI_BUFFER_TOO_SMALL) { snprintf(command_errbuf, sizeof (command_errbuf), "unexpected error: %lld", (long long)status); return (CMD_ERROR); } if ((buffer = malloc(bufsz)) == NULL) { sprintf(command_errbuf, "out of memory"); return (CMD_ERROR); } status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer); if (EFI_ERROR(status)) { free(buffer); snprintf(command_errbuf, sizeof (command_errbuf), "LocateHandle() error: %lld", (long long)status); return (CMD_ERROR); } pager_open(); for (i = 0; i < (bufsz / sizeof (EFI_HANDLE)); i++) { UINTN nproto = 0; EFI_GUID **protocols = NULL; handle = buffer[i]; printf("Handle %p", handle); if (pager_output("\n")) break; /* device path */ status = BS->ProtocolsPerHandle(handle, &protocols, &nproto); if (EFI_ERROR(status)) { snprintf(command_errbuf, sizeof (command_errbuf), "ProtocolsPerHandle() error: %lld", (long long)status); continue; } for (j = 0; j < nproto; j++) { if (efi_guid_to_name(protocols[j], &name) == true) { printf(" %s", name); free(name); } else { printf("Error while translating UUID to name"); } if ((ret = pager_output("\n")) != 0) break; } BS->FreePool(protocols); if (ret != 0) break; } pager_close(); free(buffer); return (CMD_OK); } #ifdef LOADER_FDT_SUPPORT extern int command_fdt_internal(int argc, char *argv[]); /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); #endif /* * Chain load another efi loader. */ static int command_chain(int argc, char *argv[]) { EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; EFI_HANDLE loaderhandle; EFI_LOADED_IMAGE *loaded_image; EFI_STATUS status; struct stat st; struct devdesc *dev; char *name, *path; void *buf; int fd; if (argc < 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } name = argv[1]; if ((fd = open(name, O_RDONLY)) < 0) { command_errmsg = "no such file"; return (CMD_ERROR); } if (fstat(fd, &st) < -1) { command_errmsg = "stat failed"; close(fd); return (CMD_ERROR); } status = BS->AllocatePool(EfiLoaderCode, (UINTN)st.st_size, &buf); if (status != EFI_SUCCESS) { command_errmsg = "failed to allocate buffer"; close(fd); return (CMD_ERROR); } if (read(fd, buf, st.st_size) != st.st_size) { command_errmsg = "error while reading the file"; (void)BS->FreePool(buf); close(fd); return (CMD_ERROR); } close(fd); status = BS->LoadImage(FALSE, IH, NULL, buf, st.st_size, &loaderhandle); (void)BS->FreePool(buf); if (status != EFI_SUCCESS) { command_errmsg = "LoadImage failed"; return (CMD_ERROR); } status = BS->HandleProtocol(loaderhandle, &LoadedImageGUID, (void **)&loaded_image); if (argc > 2) { int i, len = 0; CHAR16 *argp; for (i = 2; i < argc; i++) len += strlen(argv[i]) + 1; len *= sizeof (*argp); loaded_image->LoadOptions = argp = malloc (len); loaded_image->LoadOptionsSize = len; for (i = 2; i < argc; i++) { char *ptr = argv[i]; while (*ptr) *(argp++) = *(ptr++); *(argp++) = ' '; } *(--argv) = 0; } if (efi_getdev((void **)&dev, name, (const char **)&path) == 0) { #ifdef EFI_ZFS_BOOT struct zfs_devdesc *z_dev; #endif struct disk_devdesc *d_dev; pdinfo_t *hd, *pd; switch (dev->d_dev->dv_type) { #ifdef EFI_ZFS_BOOT case DEVT_ZFS: z_dev = (struct zfs_devdesc *)dev; loaded_image->DeviceHandle = efizfs_get_handle_by_guid(z_dev->pool_guid); break; #endif case DEVT_NET: loaded_image->DeviceHandle = efi_find_handle(dev->d_dev, dev->d_unit); break; default: hd = efiblk_get_pdinfo(dev); if (STAILQ_EMPTY(&hd->pd_part)) { loaded_image->DeviceHandle = hd->pd_handle; break; } d_dev = (struct disk_devdesc *)dev; STAILQ_FOREACH(pd, &hd->pd_part, pd_link) { /* * d_partition should be 255 */ if (pd->pd_unit == (uint32_t)d_dev->d_slice) { loaded_image->DeviceHandle = pd->pd_handle; break; } } break; } } dev_cleanup(); status = BS->StartImage(loaderhandle, NULL, NULL); if (status != EFI_SUCCESS) { command_errmsg = "StartImage failed"; free(loaded_image->LoadOptions); loaded_image->LoadOptions = NULL; status = BS->UnloadImage(loaded_image); return (CMD_ERROR); } return (CMD_ERROR); /* not reached */ } COMMAND_SET(chain, "chain", "chain load file", command_chain); Index: projects/runtime-coverage-v2/stand/i386/gptboot/gptboot.8 =================================================================== --- projects/runtime-coverage-v2/stand/i386/gptboot/gptboot.8 (revision 347075) +++ projects/runtime-coverage-v2/stand/i386/gptboot/gptboot.8 (revision 347076) @@ -1,245 +1,259 @@ .\" Copyright (c) 2013 Warren Block .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd February 5, 2014 +.Dd April 30, 2019 .Dt GPTBOOT 8 .Os .Sh NAME .Nm gptboot .Nd GPT bootcode for UFS on BIOS-based computers .Sh DESCRIPTION .Nm is used on BIOS-based computers to boot from a UFS partition on a GPT-partitioned disk. .Nm is installed in a .Cm freebsd-boot partition with .Xr gpart 8 . +.Pp +When it starts, +.Nm +first reads the GPT and determines which drive and partition to +boot from, as described under +.Sx BOOTING , +below. +If it does not find an eligible partition, or if the user hits a +key within three seconds, +.Nm +switches from auto-boot to interactive mode. +Interactive mode allows manual selection of the disk, partition, +filename, and boot option flags, as described in +.Xr boot 8 . .Sh IMPLEMENTATION NOTES The GPT standard allows a variable number of partitions, but .Nm only boots from tables with 128 partitions or less. .Sh PARTITION ATTRIBUTES .Nm checks and manages several attributes of GPT UFS partitions. .Bl -tag -width ".Cm bootfailed" .It Cm bootme Attempt to boot from this partition. If more than one partition has the .Cm bootme attribute set, .Nm will attempt to boot each one until successful. .It Cm bootonce Attempt to boot from this partition only one time. Setting this attribute with .Xr gpart 8 automatically also sets the .Cm bootme attribute. Multiple partitions may have the .Cm bootonce and .Cm bootme attributes set. .It Cm bootfailed The .Cm bootfailed attribute marks partitions that had the .Cm bootonce attribute set, but failed to boot. This attribute is managed by the system. See .Sx "BOOTING" and .Sx "POST-BOOT ACTIONS" below for details. .El .Sh USAGE For normal usage, the user does not have to set or manage any of the partition attributes. .Nm will boot from the first UFS partition found. .Pp The .Cm bootonce attribute can be used for testing an upgraded operating system on an already-working computer. The existing system partition is left untouched, and the new version of the operating system to be tested is installed on another partition. The .Cm bootonce attribute is set on that new test partition. The next boot is attempted from the test partition. Success or failure will be shown in the system log files. After a successful boot of the test partition, a user script can check the logs and change the .Cm bootme attributes so the test partition becomes the new system partition. Because the .Cm bootonce attribute is cleared after an attempted boot, a failed boot will not leave the system attempting to boot from a partition that will never succeed. Instead, the system will boot from the older, known-working operating system that has not been modified. If the .Cm bootme attribute is set on any partitions, booting will be attempted from them first. If no partitions with .Cm bootme attributes are found, booting will be attempted from the first UFS partition found. .Sh BOOTING .Nm first reads the partition table. All .Cm freebsd-ufs partitions with only the .Cm bootonce attribute set, indicating a failed boot, are set to .Cm bootfailed . .Nm then scans through all of the .Cm freebsd-ufs partitions. Boot behavior depends on the combination of .Cm bootme and .Cm bootonce attributes set on those partitions. .Bl -tag -width ".Cm bootonce + .Cm bootme" .It Cm bootonce + Cm bootme Highest priority: booting is attempted from each of the .Cm freebsd-ufs partitions with both of these attributes. On each partition, the .Cm bootme attribute is removed and the boot attempted. .It Cm bootme Middle priority: booting is attempted from each of the .Cm freebsd-ufs partitions with the .Cm bootme attribute. .El .Pp If neither .Cm bootonce nor .Cm bootme attributes are found on any partitions, booting is attempted from the first .Cm freebsd-ufs partition on the disk. .Sh POST-BOOT ACTIONS The startup script .Pa /etc/rc.d/gptboot checks the attributes of .Cm freebsd-ufs partitions on all GPT disks. Partitions with the .Cm bootfailed attribute generate a .Dq boot from X failed system log message. Partitions with only the .Cm bootonce attribute, indicating a partition that successfully booted, generate a .Dq boot from X succeeded system log message. The .Cm bootfailed attributes are cleared from all the partitions. The .Cm bootonce attribute is cleared from the partition that successfully booted. There is normally only one of these. .Sh FILES .Bl -tag -width /boot/gptboot -compact .It Pa /boot/gptboot bootcode binary .It Pa /boot.config parameters for the boot blocks .Pq optional .El .Sh EXAMPLES .Nm is installed in a .Cm freebsd-boot partition, usually the first partition on the disk. A .Dq protective MBR .Po see .Xr gpart 8 .Pc is typically installed in combination with .Nm . .Pp Install .Nm on the .Pa ada0 drive: .Bd -literal -offset indent gpart bootcode -b /boot/pmbr -p /boot/gptboot -i 1 ada0 .Ed .Pp .Nm can also be installed without the PMBR: .Bd -literal -offset indent gpart bootcode -p /boot/gptboot -i 1 ada0 .Ed .Pp Set the .Cm bootme attribute for partition 2: .Bd -literal -offset indent gpart set -a bootme -i 2 ada0 .Ed .Pp Set the .Cm bootonce attribute for partition 2, automatically also setting the .Cm bootme attribute: .Bd -literal -offset indent gpart set -a bootonce -i 2 ada0 .Ed .Sh SEE ALSO .Xr boot.config 5 , .Xr rc.conf 5 , .Xr boot 8 , .Xr gpart 8 .Sh HISTORY .Nm appeared in FreeBSD 7.1. .Sh AUTHORS This manual page written by .An Warren Block Aq wblock@FreeBSD.org . Index: projects/runtime-coverage-v2/stand/i386/zfsboot/zfsboot.c =================================================================== --- projects/runtime-coverage-v2/stand/i386/zfsboot/zfsboot.c (revision 347075) +++ projects/runtime-coverage-v2/stand/i386/zfsboot/zfsboot.c (revision 347076) @@ -1,1128 +1,1160 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include "stand.h" #include #include #include #ifdef GPT #include #endif #include #include #include #include #include #include #include #include #include #include "lib.h" #include "rbx.h" #include "drv.h" #include "edd.h" #include "cons.h" #include "bootargs.h" #include "paths.h" #include "libzfs.h" #define ARGS 0x900 #define NOPT 14 #define NDEV 3 #define BIOS_NUMDRIVES 0x475 #define DRV_HARD 0x80 #define DRV_MASK 0x7f #define TYPE_AD 0 #define TYPE_DA 1 #define TYPE_MAXHARD TYPE_DA #define TYPE_FD 2 #define DEV_GELIBOOT_BSIZE 4096 extern uint32_t _end; #ifdef GPT static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; #endif static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ static const unsigned char flags[NOPT] = { RBX_DUAL, RBX_SERIAL, RBX_ASKNAME, RBX_CDROM, RBX_CONFIG, RBX_KDB, RBX_GDB, RBX_MUTE, RBX_NOINTR, RBX_PAUSE, RBX_QUIET, RBX_DFLTROOT, RBX_SINGLE, RBX_VERBOSE }; uint32_t opts; static const unsigned char dev_maj[NDEV] = {30, 4, 2}; static char cmd[512]; static char cmddup[512]; static char kname[1024]; static char rootname[256]; static int comspeed = SIOSPD; static struct bootinfo bootinfo; static uint32_t bootdev; static struct zfs_boot_args zfsargs; vm_offset_t high_heap_base; uint32_t bios_basemem, bios_extmem, high_heap_size; static struct bios_smap smap; /* * The minimum amount of memory to reserve in bios_extmem for the heap. */ #define HEAP_MIN (64 * 1024 * 1024) static char *heap_next; static char *heap_end; /* Buffers that must not span a 64k boundary. */ #define READ_BUF_SIZE 8192 struct dmadat { char rdbuf[READ_BUF_SIZE]; /* for reading large things */ char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ }; static struct dmadat *dmadat; void exit(int); void reboot(void); static void load(void); static int parse_cmd(void); static void bios_getmem(void); int main(void); #ifdef LOADER_GELI_SUPPORT #include "geliboot.h" static char gelipw[GELI_PW_MAXLEN]; #endif struct zfsdsk { struct dsk dsk; #ifdef LOADER_GELI_SUPPORT struct geli_dev *gdev; #endif }; #include "zfsimpl.c" /* * Read from a dnode (which must be from a ZPL filesystem). */ static int zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) { const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; size_t n; int rc; n = size; if (*offp + n > zp->zp_size) n = zp->zp_size - *offp; rc = dnode_read(spa, dnode, *offp, start, n); if (rc) return (-1); *offp += n; return (n); } /* * Current ZFS pool */ static spa_t *spa; static spa_t *primary_spa; static vdev_t *primary_vdev; /* * A wrapper for dskread that doesn't have to worry about whether the * buffer pointer crosses a 64k boundary. */ static int vdev_read(void *xvdev, void *priv, off_t off, void *buf, size_t bytes) { char *p; daddr_t lba, alignlba; off_t diff; unsigned int nb, alignnb; struct zfsdsk *zdsk = (struct zfsdsk *) priv; if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) return -1; p = buf; lba = off / DEV_BSIZE; lba += zdsk->dsk.start; /* * Align reads to 4k else 4k sector GELIs will not decrypt. * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. */ alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE; /* * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the * start of the GELI partition, not the start of the actual disk. */ alignlba += zdsk->dsk.start; diff = (lba - alignlba) * DEV_BSIZE; while (bytes > 0) { nb = bytes / DEV_BSIZE; /* * Ensure that the read size plus the leading offset does not * exceed the size of the read buffer. */ if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE) nb = (READ_BUF_SIZE - diff) / DEV_BSIZE; /* * Round the number of blocks to read up to the nearest multiple * of DEV_GELIBOOT_BSIZE. */ alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE) / DEV_BSIZE; if (zdsk->dsk.size > 0 && alignlba + alignnb > zdsk->dsk.size + zdsk->dsk.start) { printf("Shortening read at %lld from %d to %lld\n", alignlba, alignnb, (zdsk->dsk.size + zdsk->dsk.start) - alignlba); alignnb = (zdsk->dsk.size + zdsk->dsk.start) - alignlba; } if (drvread(&zdsk->dsk, dmadat->rdbuf, alignlba, alignnb)) return -1; #ifdef LOADER_GELI_SUPPORT /* decrypt */ if (zdsk->gdev != NULL) { if (geli_read(zdsk->gdev, ((alignlba - zdsk->dsk.start) * DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE)) return (-1); } #endif memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); p += nb * DEV_BSIZE; lba += nb; alignlba += alignnb; bytes -= nb * DEV_BSIZE; /* Don't need the leading offset after the first block. */ diff = 0; } return 0; } /* Match the signature exactly due to signature madness */ static int vdev_read2(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { return vdev_read(vdev, priv, off, buf, bytes); } static int vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { char *p; daddr_t lba; unsigned int nb; struct zfsdsk *zdsk = (struct zfsdsk *) priv; if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) return -1; p = buf; lba = off / DEV_BSIZE; lba += zdsk->dsk.start; while (bytes > 0) { nb = bytes / DEV_BSIZE; if (nb > READ_BUF_SIZE / DEV_BSIZE) nb = READ_BUF_SIZE / DEV_BSIZE; memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE); if (drvwrite(&zdsk->dsk, dmadat->rdbuf, lba, nb)) return -1; p += nb * DEV_BSIZE; lba += nb; bytes -= nb * DEV_BSIZE; } return 0; } static int xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) { if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { printf("Invalid format\n"); return -1; } return 0; } /* * Read Pad2 (formerly "Boot Block Header") area of the first * vdev label of the given vdev. */ static int vdev_read_pad2(vdev_t *vdev, char *buf, size_t size) { blkptr_t bp; char *tmp = zap_scratch; off_t off = offsetof(vdev_label_t, vl_pad2); if (size > VDEV_PAD_SIZE) size = VDEV_PAD_SIZE; BP_ZERO(&bp); BP_SET_LSIZE(&bp, VDEV_PAD_SIZE); BP_SET_PSIZE(&bp, VDEV_PAD_SIZE); BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); DVA_SET_OFFSET(BP_IDENTITY(&bp), off); if (vdev_read_phys(vdev, &bp, tmp, off, 0)) return (EIO); memcpy(buf, tmp, size); return (0); } static int vdev_clear_pad2(vdev_t *vdev) { char *zeroes = zap_scratch; uint64_t *end; off_t off = offsetof(vdev_label_t, vl_pad2); memset(zeroes, 0, VDEV_PAD_SIZE); end = (uint64_t *)(zeroes + VDEV_PAD_SIZE); /* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */ end[-5] = 0x0210da7ab10c7a11; end[-4] = 0x97f48f807f6e2a3f; end[-3] = 0xaf909f1658aacefc; end[-2] = 0xcbd1ea57ff6db48b; end[-1] = 0x6ec692db0d465fab; if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE)) return (EIO); return (0); } static void bios_getmem(void) { uint64_t size; /* Parse system memory map */ v86.ebx = 0; do { v86.ctl = V86_FLAGS; v86.addr = 0x15; /* int 0x15 function 0xe820*/ v86.eax = 0xe820; v86.ecx = sizeof(struct bios_smap); v86.edx = SMAP_SIG; v86.es = VTOPSEG(&smap); v86.edi = VTOPOFF(&smap); v86int(); if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) break; /* look for a low-memory segment that's large enough */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && (smap.length >= (512 * 1024))) bios_basemem = smap.length; /* look for the first segment in 'extended' memory */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { bios_extmem = smap.length; } /* * Look for the largest segment in 'extended' memory beyond * 1MB but below 4GB. */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && (smap.base < 0x100000000ull)) { size = smap.length; /* * If this segment crosses the 4GB boundary, truncate it. */ if (smap.base + size > 0x100000000ull) size = 0x100000000ull - smap.base; if (size > high_heap_size) { high_heap_size = size; high_heap_base = smap.base; } } } while (v86.ebx != 0); /* Fall back to the old compatibility function for base memory */ if (bios_basemem == 0) { v86.ctl = 0; v86.addr = 0x12; /* int 0x12 */ v86int(); bios_basemem = (v86.eax & 0xffff) * 1024; } /* Fall back through several compatibility functions for extended memory */ if (bios_extmem == 0) { v86.ctl = V86_FLAGS; v86.addr = 0x15; /* int 0x15 function 0xe801*/ v86.eax = 0xe801; v86int(); if (!V86_CY(v86.efl)) { bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; } } if (bios_extmem == 0) { v86.ctl = 0; v86.addr = 0x15; /* int 0x15 function 0x88*/ v86.eax = 0x8800; v86int(); bios_extmem = (v86.eax & 0xffff) * 1024; } /* * If we have extended memory and did not find a suitable heap * region in the SMAP, use the last 3MB of 'extended' memory as a * high heap candidate. */ if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { high_heap_size = HEAP_MIN; high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; } } /* * Try to detect a device supported by the legacy int13 BIOS */ static int int13probe(int drive) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = drive; v86int(); if (!V86_CY(v86.efl) && /* carry clear */ ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ return(0); /* skip device */ } return (1); } return(0); } /* * We call this when we find a ZFS vdev - ZFS consumes the dsk * structure so we must make a new one. */ static struct zfsdsk * copy_dsk(struct zfsdsk *zdsk) { struct zfsdsk *newdsk; newdsk = malloc(sizeof(struct zfsdsk)); *newdsk = *zdsk; return (newdsk); } /* + * Get disk size from GPT. + */ +static uint64_t +drvsize_gpt(struct dsk *dskp) +{ +#ifdef GPT + struct gpt_hdr hdr; + char *sec; + + sec = dmadat->secbuf; + if (drvread(dskp, sec, 1, 1)) + return (0); + + memcpy(&hdr, sec, sizeof(hdr)); + if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || + hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || + hdr.hdr_entsz < sizeof(struct gpt_ent) || + DEV_BSIZE % hdr.hdr_entsz != 0) { + return (0); + } + return (hdr.hdr_lba_alt + 1); +#else + return (0); +#endif +} + +/* * Get disk size from eax=0x800 and 0x4800. We need to probe both * because 0x4800 may not be available and we would like to get more * or less correct disk size - if it is possible at all. * Note we do not really want to touch drv.c because that code is shared * with boot2 and we can not afford to grow that code. */ static uint64_t drvsize_ext(struct zfsdsk *zdsk) { struct dsk *dskp; uint64_t size, tmp; int cyl, hds, sec; dskp = &zdsk->dsk; + + /* Try to read disk size from GPT */ + size = drvsize_gpt(dskp); + if (size != 0) + return (size); v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = dskp->drive; v86int(); /* Don't error out if we get bad sector number, try EDD as well */ if (V86_CY(v86.efl) || /* carry set */ (v86.edx & 0xff) <= (unsigned)(dskp->drive & 0x7f)) /* unit # bad */ return (0); cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; /* Convert max head # -> # of heads */ hds = ((v86.edx & 0xff00) >> 8) + 1; sec = v86.ecx & 0x3f; size = (uint64_t)cyl * hds * sec; /* Determine if we can use EDD with this device. */ v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4100; v86.edx = dskp->drive; v86.ebx = 0x55aa; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ebx & 0xffff) != 0xaa55 || /* signature */ (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) return (size); tmp = drvsize(dskp); if (tmp > size) size = tmp; return (size); } /* * The "layered" ioctl to read disk/partition size. Unfortunately * the zfsboot case is hardest, because we do not have full software * stack available, so we need to do some manual work here. */ uint64_t ldi_get_size(void *priv) { struct zfsdsk *zdsk = priv; uint64_t size = zdsk->dsk.size; if (zdsk->dsk.start == 0) size = drvsize_ext(zdsk); return (size * DEV_BSIZE); } static void probe_drive(struct zfsdsk *zdsk) { #ifdef GPT struct gpt_hdr hdr; struct gpt_ent *ent; unsigned part, entries_per_sec; daddr_t slba; #endif #if defined(GPT) || defined(LOADER_GELI_SUPPORT) daddr_t elba; #endif struct dos_partition *dp; char *sec; unsigned i; #ifdef LOADER_GELI_SUPPORT /* * Taste the disk, if it is GELI encrypted, decrypt it then dig out the * partition table and probe each slice/partition in turn for a vdev or * GELI encrypted vdev. */ elba = drvsize_ext(zdsk); if (elba > 0) { elba--; } zdsk->gdev = geli_taste(vdev_read, zdsk, elba, "disk%u:0:"); if ((zdsk->gdev != NULL) && (geli_havekey(zdsk->gdev) == 0)) geli_passphrase(zdsk->gdev, gelipw); #endif /* LOADER_GELI_SUPPORT */ sec = dmadat->secbuf; zdsk->dsk.start = 0; #ifdef GPT /* * First check for GPT. */ if (drvread(&zdsk->dsk, sec, 1, 1)) { return; } memcpy(&hdr, sec, sizeof(hdr)); if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { goto trymbr; } /* * Probe all GPT partitions for the presence of ZFS pools. We * return the spa_t for the first we find (if requested). This * will have the effect of booting from the first pool on the * disk. * * If no vdev is found, GELI decrypting the device and try again */ entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; slba = hdr.hdr_lba_table; elba = slba + hdr.hdr_entries / entries_per_sec; while (slba < elba) { zdsk->dsk.start = 0; if (drvread(&zdsk->dsk, sec, slba, 1)) return; for (part = 0; part < entries_per_sec; part++) { ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, sizeof(uuid_t)) == 0) { zdsk->dsk.start = ent->ent_lba_start; zdsk->dsk.size = ent->ent_lba_end - ent->ent_lba_start + 1; zdsk->dsk.slice = part + 1; zdsk->dsk.part = 255; if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ zdsk = copy_dsk(zdsk); } #ifdef LOADER_GELI_SUPPORT else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, ent->ent_lba_end - ent->ent_lba_start, "disk%up%u:", zdsk->dsk.unit, zdsk->dsk.slice)) != NULL) { if (geli_havekey(zdsk->gdev) == 0 || geli_passphrase(zdsk->gdev, gelipw) == 0) { /* * This slice has GELI, check it for ZFS. */ if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ zdsk = copy_dsk(zdsk); } break; } } #endif /* LOADER_GELI_SUPPORT */ } } slba++; } return; trymbr: #endif /* GPT */ if (drvread(&zdsk->dsk, sec, DOSBBSECTOR, 1)) return; dp = (void *)(sec + DOSPARTOFF); for (i = 0; i < NDOSPART; i++) { if (!dp[i].dp_typ) continue; zdsk->dsk.start = dp[i].dp_start; zdsk->dsk.size = dp[i].dp_size; zdsk->dsk.slice = i + 1; if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { zdsk = copy_dsk(zdsk); } #ifdef LOADER_GELI_SUPPORT else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, dp[i].dp_size - dp[i].dp_start, "disk%us%u:")) != NULL) { if (geli_havekey(zdsk->gdev) == 0 || geli_passphrase(zdsk->gdev, gelipw) == 0) { /* * This slice has GELI, check it for ZFS. */ if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ zdsk = copy_dsk(zdsk); } break; } } #endif /* LOADER_GELI_SUPPORT */ } } int main(void) { dnode_phys_t dn; off_t off; struct zfsdsk *zdsk; int autoboot, i; int nextboot; int rc; dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); bios_getmem(); if (high_heap_size > 0) { heap_end = PTOV(high_heap_base + high_heap_size); heap_next = PTOV(high_heap_base); } else { heap_next = (char *)dmadat + sizeof(*dmadat); heap_end = (char *)PTOV(bios_basemem); } setheap(heap_next, heap_end); zdsk = calloc(1, sizeof(struct zfsdsk)); zdsk->dsk.drive = *(uint8_t *)PTOV(ARGS); zdsk->dsk.type = zdsk->dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD; zdsk->dsk.unit = zdsk->dsk.drive & DRV_MASK; zdsk->dsk.slice = *(uint8_t *)PTOV(ARGS + 1) + 1; zdsk->dsk.part = 0; zdsk->dsk.start = 0; zdsk->dsk.size = drvsize_ext(zdsk); bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); bootinfo.bi_basemem = bios_basemem / 1024; bootinfo.bi_extmem = bios_extmem / 1024; bootinfo.bi_memsizes_valid++; bootinfo.bi_bios_dev = zdsk->dsk.drive; bootdev = MAKEBOOTDEV(dev_maj[zdsk->dsk.type], zdsk->dsk.slice, zdsk->dsk.unit, zdsk->dsk.part); /* Process configuration file */ autoboot = 1; zfs_init(); /* * Probe the boot drive first - we will try to boot from whatever * pool we find on that drive. */ probe_drive(zdsk); /* * Probe the rest of the drives that the bios knows about. This * will find any other available pools and it may fill in missing * vdevs for the boot pool. */ #ifndef VIRTUALBOX for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) #else for (i = 0; i < MAXBDDEV; i++) #endif { if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) continue; if (!int13probe(i | DRV_HARD)) break; zdsk = calloc(1, sizeof(struct zfsdsk)); zdsk->dsk.drive = i | DRV_HARD; zdsk->dsk.type = zdsk->dsk.drive & TYPE_AD; zdsk->dsk.unit = i; zdsk->dsk.slice = 0; zdsk->dsk.part = 0; zdsk->dsk.start = 0; zdsk->dsk.size = drvsize_ext(zdsk); probe_drive(zdsk); } /* * The first discovered pool, if any, is the pool. */ spa = spa_get_primary(); if (!spa) { printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); for (;;) ; } primary_spa = spa; primary_vdev = spa_get_primary_vdev(spa); nextboot = 0; rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd)); if (vdev_clear_pad2(primary_vdev)) printf("failed to clear pad2 area of primary vdev\n"); if (rc == 0) { if (*cmd) { /* * We could find an old-style ZFS Boot Block header here. * Simply ignore it. */ if (*(uint64_t *)cmd != 0x2f5b007b10c) { /* * Note that parse() is destructive to cmd[] and we also want * to honor RBX_QUIET option that could be present in cmd[]. */ nextboot = 1; memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) { printf("failed to parse pad2 area of primary vdev\n"); reboot(); } if (!OPT_CHECK(RBX_QUIET)) printf("zfs nextboot: %s\n", cmddup); } /* Do not process this command twice */ *cmd = 0; } } else printf("failed to read pad2 area of primary vdev\n"); /* Mount ZFS only if it's not already mounted via nextboot parsing. */ if (zfsmount.spa == NULL && (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) { printf("%s: failed to mount default pool %s\n", BOOTPROG, spa->spa_name); autoboot = 0; } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { off = 0; zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); } if (*cmd) { /* * Note that parse_cmd() is destructive to cmd[] and we also want * to honor RBX_QUIET option that could be present in cmd[]. */ memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) autoboot = 0; if (!OPT_CHECK(RBX_QUIET)) printf("%s: %s\n", PATH_CONFIG, cmddup); /* Do not process this command twice */ *cmd = 0; } /* Do not risk waiting at the prompt forever. */ if (nextboot && !autoboot) reboot(); /* * Try to exec /boot/loader. If interrupted by a keypress, * or in case of failure, try to load a kernel directly instead. */ if (autoboot && !*kname) { memcpy(kname, PATH_LOADER, sizeof(PATH_LOADER)); if (!keyhit(3)) { load(); memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL)); } } /* Present the user with the boot2 prompt. */ for (;;) { if (!autoboot || !OPT_CHECK(RBX_QUIET)) { printf("\nFreeBSD/x86 boot\n"); if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) printf("Default: %s/<0x%llx>:%s\n" "boot: ", spa->spa_name, zfsmount.rootobj, kname); else if (rootname[0] != '\0') printf("Default: %s/%s:%s\n" "boot: ", spa->spa_name, rootname, kname); else printf("Default: %s:%s\n" "boot: ", spa->spa_name, kname); } if (ioctrl & IO_SERIAL) sio_flush(); if (!autoboot || keyhit(5)) getstr(cmd, sizeof(cmd)); else if (!autoboot || !OPT_CHECK(RBX_QUIET)) putchar('\n'); autoboot = 0; if (parse_cmd()) putchar('\a'); else load(); } } /* XXX - Needed for btxld to link the boot2 binary; do not remove. */ void exit(int x) { __exit(x); } void reboot(void) { __exit(0); } static void load(void) { union { struct exec ex; Elf32_Ehdr eh; } hdr; static Elf32_Phdr ep[2]; static Elf32_Shdr es[2]; caddr_t p; dnode_phys_t dn; off_t off; uint32_t addr, x; int fmt, i, j; if (zfs_lookup(&zfsmount, kname, &dn)) { printf("\nCan't find %s\n", kname); return; } off = 0; if (xfsread(&dn, &off, &hdr, sizeof(hdr))) return; if (N_GETMAGIC(hdr.ex) == ZMAGIC) fmt = 0; else if (IS_ELF(hdr.eh)) fmt = 1; else { printf("Invalid %s\n", "format"); return; } if (fmt == 0) { addr = hdr.ex.a_entry & 0xffffff; p = PTOV(addr); off = PAGE_SIZE; if (xfsread(&dn, &off, p, hdr.ex.a_text)) return; p += roundup2(hdr.ex.a_text, PAGE_SIZE); if (xfsread(&dn, &off, p, hdr.ex.a_data)) return; p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); p += sizeof(hdr.ex.a_syms); if (hdr.ex.a_syms) { if (xfsread(&dn, &off, p, hdr.ex.a_syms)) return; p += hdr.ex.a_syms; if (xfsread(&dn, &off, p, sizeof(int))) return; x = *(uint32_t *)p; p += sizeof(int); x -= sizeof(int); if (xfsread(&dn, &off, p, x)) return; p += x; } } else { off = hdr.eh.e_phoff; for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) return; if (ep[j].p_type == PT_LOAD) j++; } for (i = 0; i < 2; i++) { p = PTOV(ep[i].p_paddr & 0xffffff); off = ep[i].p_offset; if (xfsread(&dn, &off, p, ep[i].p_filesz)) return; } p += roundup2(ep[1].p_memsz, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { off = hdr.eh.e_shoff + sizeof(es[0]) * (hdr.eh.e_shstrndx + 1); if (xfsread(&dn, &off, &es, sizeof(es))) return; for (i = 0; i < 2; i++) { memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); p += sizeof(es[i].sh_size); off = es[i].sh_offset; if (xfsread(&dn, &off, p, es[i].sh_size)) return; p += es[i].sh_size; } } addr = hdr.eh.e_entry & 0xffffff; } bootinfo.bi_esymtab = VTOP(p); bootinfo.bi_kernelname = VTOP(kname); zfsargs.size = sizeof(zfsargs); zfsargs.pool = zfsmount.spa->spa_guid; zfsargs.root = zfsmount.rootobj; zfsargs.primary_pool = primary_spa->spa_guid; #ifdef LOADER_GELI_SUPPORT explicit_bzero(gelipw, sizeof(gelipw)); export_geli_boot_data(&zfsargs.gelidata); #endif if (primary_vdev != NULL) zfsargs.primary_vdev = primary_vdev->v_guid; else printf("failed to detect primary vdev\n"); /* * Note that the zfsargs struct is passed by value, not by pointer. Code in * btxldr.S copies the values from the entry stack to a fixed location * within loader(8) at startup due to the presence of KARGS_FLAGS_EXTARG. */ __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), bootdev, KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, (uint32_t) spa->spa_guid, (uint32_t) (spa->spa_guid >> 32), VTOP(&bootinfo), zfsargs); } static int zfs_mount_ds(char *dsname) { uint64_t newroot; spa_t *newspa; char *q; q = strchr(dsname, '/'); if (q) *q++ = '\0'; newspa = spa_find_by_name(dsname); if (newspa == NULL) { printf("\nCan't find ZFS pool %s\n", dsname); return -1; } if (zfs_spa_init(newspa)) return -1; newroot = 0; if (q) { if (zfs_lookup_dataset(newspa, q, &newroot)) { printf("\nCan't find dataset %s in ZFS pool %s\n", q, newspa->spa_name); return -1; } } if (zfs_mount(newspa, newroot, &zfsmount)) { printf("\nCan't mount ZFS dataset\n"); return -1; } spa = newspa; return (0); } static int parse_cmd(void) { char *arg = cmd; char *ep, *p, *q; const char *cp; int c, i, j; while ((c = *arg++)) { if (c == ' ' || c == '\t' || c == '\n') continue; for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); ep = p; if (*p) *p++ = 0; if (c == '-') { while ((c = *arg++)) { if (c == 'P') { if (*(uint8_t *)PTOV(0x496) & 0x10) { cp = "yes"; } else { opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); cp = "no"; } printf("Keyboard: %s\n", cp); continue; } else if (c == 'S') { j = 0; while ((unsigned int)(i = *arg++ - '0') <= 9) j = j * 10 + i; if (j > 0 && i == -'0') { comspeed = j; break; } /* Fall through to error below ('S' not in optstr[]). */ } for (i = 0; c != optstr[i]; i++) if (i == NOPT - 1) return -1; opts ^= OPT_SET(flags[i]); } ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; if (ioctrl & IO_SERIAL) { if (sio_init(115200 / comspeed) != 0) ioctrl &= ~IO_SERIAL; } } if (c == '?') { dnode_phys_t dn; if (zfs_lookup(&zfsmount, arg, &dn) == 0) { zap_list(spa, &dn); } return -1; } else { arg--; /* * Report pool status if the comment is 'status'. Lets * hope no-one wants to load /status as a kernel. */ if (!strcmp(arg, "status")) { spa_all_status(); return -1; } /* * If there is "zfs:" prefix simply ignore it. */ if (strncmp(arg, "zfs:", 4) == 0) arg += 4; /* * If there is a colon, switch pools. */ q = strchr(arg, ':'); if (q) { *q++ = '\0'; if (zfs_mount_ds(arg) != 0) return -1; arg = q; } if ((i = ep - arg)) { if ((size_t)i >= sizeof(kname)) return -1; memcpy(kname, arg, i + 1); } } arg = p; } return 0; } Index: projects/runtime-coverage-v2/sys/amd64/conf/GENERIC =================================================================== --- projects/runtime-coverage-v2/sys/amd64/conf/GENERIC (revision 347075) +++ projects/runtime-coverage-v2/sys/amd64/conf/GENERIC (revision 347076) @@ -1,382 +1,383 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/amd64 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu HAMMER ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options TCP_BLACKBOX # Enhanced TCP event logging options TCP_HHOOK # hhook(9) framework for TCP options TCP_RFC7413 # TCP Fast Open options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options EFIRT # EFI Runtime Services support options COMPAT_FREEBSD32 # Compatible with i386 binaries options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options BUF_TRACKING # Track buffer history options DDB # Support DDB. options FULL_BUF_TRACKING # Track more buffer history options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel Sanitizers #options COVERAGE # Generic kernel coverage. Used by KCOV #options KCOV # Kernel Coverage Sanitizer # Warning: KUBSAN can result in a kernel too large for loader to load #options KUBSAN # Kernel Undefined Behavior Sanitizer # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel options EARLY_AP_STARTUP # CPU frequency control device cpufreq # Bus support. device acpi options ACPI_DMAR device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support # Floppy drives device fdc # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices device ahd # AHA39320/29320 and onboard AIC79xx devices device esp # AMD Am53C974 (Tekram DC-390(T)) device hptiop # Highpoint RocketRaid 3xxx series device isp # Qlogic family #device ispfw # Firmware for QLogic HBAs- normally a module device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device mpr # LSI-Logic MPT-Fusion 3 device sym # NCR/Symbios Logic device trm # Tekram DC395U/UW/F DC315U adapters device isci # Intel C600 SAS controller device ocs_fc # Emulex FC adapters # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) #device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID device arcmsr # Areca SATA II RAID device ciss # Compaq Smart RAID 5* device hptmv # Highpoint RocketRAID 182x device hptnr # Highpoint DC7280, R750 device hptrr # Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx device hpt27xx # Highpoint RocketRAID 27xx device iir # Intel Integrated RAID device ips # IBM (Adaptec) ServeRAID device mly # Mylex AcceleRAID/eXtremeRAID device twa # 3ware 9000 series PATA/SATA RAID device smartpqi # Microsemi smartpqi driver device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller # RAID controllers device aac # Adaptec FSA RAID device aacp # SCSI passthrough for aac (requires CAM) device aacraid # Adaptec by PMC RAID device ida # Compaq Smart RAID device mfi # LSI MegaRAID SAS device mlx # Mylex DAC960 family device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s device pmspcv # PMC-Sierra SAS/SATA Controller driver #XXX pointer/int warnings #device pst # Promise Supertrak SX6000 device twe # 3ware ATA RAID # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device vt_efifb device agp # support several AGP chipsets # PCCARD (PCMCIA) support # PCMCIA and cardbus bridge support device cbb # cardbus (yenta) bridge device pccard # PC Card (16-bit) bus device cardbus # CardBus (32-bit) bus # Serial (COM) ports device uart # Generic UART driver # Parallel port device ppc device ppbus # Parallel port bus (required) device lpt # Printer device ppi # Parallel port interface device #device vpo # Requires scbus and da device puc # Multi I/O cards and multi-channel UARTs # PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure device iflib device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet device ixv # Intel PRO/10GbE PCIE VF Ethernet device ixl # Intel 700 Series Physical Function device iavf # Intel Adaptive Virtual Function device vmx # VMware VMXNET3 Ethernet # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) device gem # Sun GEM/Sun ERI/Apple GMAC device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device nfe # nVidia nForce MCP on-board Ethernet device nge # NatSemi DP83820 gigabit Ethernet device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vge # VIA VT612x gigabit Ethernet device vr # VIA Rhine, Rhine II device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device an # Aironet 4500/4800 802.11 wireless NICs. device ath # Atheros NICs device ath_pci # Atheros pci/cardbus glue device ath_hal # pci/cardbus chip support options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later device ath_rate_sample # SampleRate tx rate control for ath #device bwi # Broadcom BCM430x/BCM431x wireless NICs. #device bwn # Broadcom BCM43xx wireless NICs. device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device ral # Ralink Technology RT2500 wireless NICs. device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. device wpi # Intel 3945ABG wireless NICs. # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # Sound support device sound # Generic sound driver (required) device snd_cmi # CMedia CMI8338/CMI8738 device snd_csa # Crystal Semiconductor CS461x/428x device snd_emu10kx # Creative SoundBlaster Live! and Audigy device snd_es137x # Ensoniq AudioPCI ES137x device snd_hda # Intel High Definition Audio device snd_ich # Intel, NVidia and other ICH AC'97 Audio device snd_via8233 # VIA VT8233x Audio # MMC/SD device mmc # MMC/SD bus device mmcsd # MMC/SD memory card device sdhci # Generic PCI SD Host Controller # VirtIO support device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI device device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device # HyperV drivers and enhancement support device hyperv # HyperV drivers # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/amd64/conf/MINIMAL =================================================================== --- projects/runtime-coverage-v2/sys/amd64/conf/MINIMAL (revision 347075) +++ projects/runtime-coverage-v2/sys/amd64/conf/MINIMAL (revision 347076) @@ -1,154 +1,155 @@ # # MINIMAL -- Mostly Minimal kernel configuration file for FreeBSD/amd64 # # Many definitions of minimal are possible. The one this file follows is # GENERIC, minus all functionality that can be replaced by loading kernel # modules. # # Exceptions: # o While UFS is buildable as a module, the current module lacks # some features (ACL, GJOURNAL) that GENERIC includes. # o acpi as a module has been reported flakey and not well tested, so # is included in the kernel. # o random is included due to uncertaty... # o Many networking things are included # # For now, please run changes to these list past imp@freebsd.org # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu HAMMER ident MINIMAL makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options COMPAT_FREEBSD32 # Compatible with i386 binaries options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel options EARLY_AP_STARTUP # CPU frequency control device cpufreq # Bus support. device acpi options ACPI_DMAR device pci # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device vt_efifb device agp # support several AGP chipsets # Pseudo devices. device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device gif # IPv6 and IPv4 tunneling # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/amd64/linux/linux.h =================================================================== --- projects/runtime-coverage-v2/sys/amd64/linux/linux.h (revision 347075) +++ projects/runtime-coverage-v2/sys/amd64/linux/linux.h (revision 347076) @@ -1,471 +1,463 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _AMD64_LINUX_H_ #define _AMD64_LINUX_H_ #include #include #define LINUX_LEGACY_SYSCALLS /* * debugging support */ extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld/%ld): "#nm"("fmt")\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LMSG(fmt) "linux(%ld/%ld): "fmt"\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LINUX_DTRACE linuxulator #define PTRIN(v) (void *)(v) #define PTROUT(v) (uintptr_t)(v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define CP2(src,dst,sfld,dfld) do { (dst).dfld = (src).sfld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) /* * Provide a separate set of types for the Linux types. */ typedef int32_t l_int; typedef int64_t l_long; typedef int16_t l_short; typedef uint32_t l_uint; typedef uint64_t l_ulong; typedef uint16_t l_ushort; typedef l_ulong l_uintptr_t; typedef l_long l_clock_t; typedef l_int l_daddr_t; typedef l_ulong l_dev_t; typedef l_uint l_gid_t; typedef l_ushort l_gid16_t; typedef l_uint l_uid_t; typedef l_ushort l_uid16_t; typedef l_ulong l_ino_t; typedef l_int l_key_t; typedef l_long l_loff_t; typedef l_uint l_mode_t; typedef l_long l_off_t; typedef l_int l_pid_t; typedef l_ulong l_size_t; typedef l_long l_ssize_t; typedef l_long l_suseconds_t; typedef l_long l_time_t; typedef l_int l_timer_t; typedef l_int l_mqd_t; typedef l_size_t l_socklen_t; typedef l_ulong l_fd_mask; typedef struct { l_int val[2]; } l_fsid_t; typedef struct { l_time_t tv_sec; l_suseconds_t tv_usec; } l_timeval; #define l_fd_set fd_set /* * Miscellaneous */ #define LINUX_AT_COUNT 19 /* Count of used aux entry types. */ struct l___sysctl_args { l_uintptr_t name; l_int nlen; l_uintptr_t oldval; l_uintptr_t oldlenp; l_uintptr_t newval; l_size_t newlen; l_ulong __spare[4]; }; /* Resource limits */ #define LINUX_RLIMIT_CPU 0 #define LINUX_RLIMIT_FSIZE 1 #define LINUX_RLIMIT_DATA 2 #define LINUX_RLIMIT_STACK 3 #define LINUX_RLIMIT_CORE 4 #define LINUX_RLIMIT_RSS 5 #define LINUX_RLIMIT_NPROC 6 #define LINUX_RLIMIT_NOFILE 7 #define LINUX_RLIMIT_MEMLOCK 8 #define LINUX_RLIMIT_AS 9 /* Address space limit */ #define LINUX_RLIM_NLIMITS 10 struct l_rlimit { l_ulong rlim_cur; l_ulong rlim_max; }; /* * stat family of syscalls */ struct l_timespec { l_time_t tv_sec; l_long tv_nsec; }; struct l_newstat { l_dev_t st_dev; l_ino_t st_ino; l_ulong st_nlink; l_uint st_mode; l_uid_t st_uid; l_gid_t st_gid; l_uint __st_pad1; l_dev_t st_rdev; l_off_t st_size; l_long st_blksize; l_long st_blocks; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_long __unused1; l_long __unused2; l_long __unused3; }; /* sigaction flags */ #define LINUX_SA_NOCLDSTOP 0x00000001 #define LINUX_SA_NOCLDWAIT 0x00000002 #define LINUX_SA_SIGINFO 0x00000004 #define LINUX_SA_RESTORER 0x04000000 #define LINUX_SA_ONSTACK 0x08000000 #define LINUX_SA_RESTART 0x10000000 #define LINUX_SA_INTERRUPT 0x20000000 #define LINUX_SA_NOMASK 0x40000000 #define LINUX_SA_ONESHOT 0x80000000 /* sigprocmask actions */ #define LINUX_SIG_BLOCK 0 #define LINUX_SIG_UNBLOCK 1 #define LINUX_SIG_SETMASK 2 /* sigaltstack */ #define LINUX_MINSIGSTKSZ 2048 typedef void (*l_handler_t)(l_int); typedef struct { l_handler_t lsa_handler; l_ulong lsa_flags; l_uintptr_t lsa_restorer; l_sigset_t lsa_mask; } l_sigaction_t; typedef struct { l_uintptr_t ss_sp; l_int ss_flags; l_size_t ss_size; } l_stack_t; struct l_fpstate { u_int16_t cwd; u_int16_t swd; u_int16_t twd; u_int16_t fop; u_int64_t rip; u_int64_t rdp; u_int32_t mxcsr; u_int32_t mxcsr_mask; u_int32_t st_space[32]; u_int32_t xmm_space[64]; u_int32_t reserved2[24]; }; struct l_sigcontext { l_ulong sc_r8; l_ulong sc_r9; l_ulong sc_r10; l_ulong sc_r11; l_ulong sc_r12; l_ulong sc_r13; l_ulong sc_r14; l_ulong sc_r15; l_ulong sc_rdi; l_ulong sc_rsi; l_ulong sc_rbp; l_ulong sc_rbx; l_ulong sc_rdx; l_ulong sc_rax; l_ulong sc_rcx; l_ulong sc_rsp; l_ulong sc_rip; l_ulong sc_rflags; l_ushort sc_cs; l_ushort sc_gs; l_ushort sc_fs; l_ushort sc___pad0; l_ulong sc_err; l_ulong sc_trapno; l_sigset_t sc_mask; l_ulong sc_cr2; struct l_fpstate *sc_fpstate; l_ulong sc_reserved1[8]; }; struct l_ucontext { l_ulong uc_flags; l_uintptr_t uc_link; l_stack_t uc_stack; struct l_sigcontext uc_mcontext; l_sigset_t uc_sigmask; }; #define LINUX_SI_PREAMBLE_SIZE (4 * sizeof(int)) #define LINUX_SI_MAX_SIZE 128 #define LINUX_SI_PAD_SIZE ((LINUX_SI_MAX_SIZE - \ LINUX_SI_PREAMBLE_SIZE) / sizeof(l_int)) typedef union l_sigval { l_int sival_int; l_uintptr_t sival_ptr; } l_sigval_t; typedef struct l_siginfo { l_int lsi_signo; l_int lsi_errno; l_int lsi_code; union { l_int _pad[LINUX_SI_PAD_SIZE]; struct { l_pid_t _pid; l_uid_t _uid; } _kill; struct { l_timer_t _tid; l_int _overrun; char _pad[sizeof(l_uid_t) - sizeof(int)]; union l_sigval _sigval; l_uint _sys_private; } _timer; struct { l_pid_t _pid; /* sender's pid */ l_uid_t _uid; /* sender's uid */ union l_sigval _sigval; } _rt; struct { l_pid_t _pid; /* which child */ l_uid_t _uid; /* sender's uid */ l_int _status; /* exit code */ l_clock_t _utime; l_clock_t _stime; } _sigchld; struct { l_uintptr_t _addr; /* Faulting insn/memory ref. */ } _sigfault; struct { l_long _band; /* POLL_IN,POLL_OUT,POLL_MSG */ l_int _fd; } _sigpoll; } _sifields; } l_siginfo_t; #define lsi_pid _sifields._kill._pid #define lsi_uid _sifields._kill._uid #define lsi_tid _sifields._timer._tid #define lsi_overrun _sifields._timer._overrun #define lsi_sys_private _sifields._timer._sys_private #define lsi_status _sifields._sigchld._status #define lsi_utime _sifields._sigchld._utime #define lsi_stime _sifields._sigchld._stime #define lsi_value _sifields._rt._sigval #define lsi_int _sifields._rt._sigval.sival_int #define lsi_ptr _sifields._rt._sigval.sival_ptr #define lsi_addr _sifields._sigfault._addr #define lsi_band _sifields._sigpoll._band #define lsi_fd _sifields._sigpoll._fd /* * We make the stack look like Linux expects it when calling a signal * handler, but use the BSD way of calling the handler and sigreturn(). * This means that we need to pass the pointer to the handler too. * It is appended to the frame to not interfere with the rest of it. */ struct l_rt_sigframe { struct l_ucontext sf_sc; struct l_siginfo sf_si; l_handler_t sf_handler; }; /* * mount flags */ #define LINUX_MS_RDONLY 0x0001 #define LINUX_MS_NOSUID 0x0002 #define LINUX_MS_NODEV 0x0004 #define LINUX_MS_NOEXEC 0x0008 #define LINUX_MS_REMOUNT 0x0020 /* * SystemV IPC defines */ #define LINUX_IPC_RMID 0 #define LINUX_IPC_SET 1 #define LINUX_IPC_STAT 2 #define LINUX_IPC_INFO 3 #define LINUX_SHM_LOCK 11 #define LINUX_SHM_UNLOCK 12 #define LINUX_SHM_STAT 13 #define LINUX_SHM_INFO 14 #define LINUX_SHM_RDONLY 0x1000 #define LINUX_SHM_RND 0x2000 #define LINUX_SHM_REMAP 0x4000 /* semctl commands */ #define LINUX_GETPID 11 #define LINUX_GETVAL 12 #define LINUX_GETALL 13 #define LINUX_GETNCNT 14 #define LINUX_GETZCNT 15 #define LINUX_SETVAL 16 #define LINUX_SETALL 17 #define LINUX_SEM_STAT 18 #define LINUX_SEM_INFO 19 union l_semun { l_int val; l_uintptr_t buf; l_uintptr_t array; l_uintptr_t __buf; l_uintptr_t __pad; }; -struct l_sockaddr { - l_ushort sa_family; - char sa_data[14]; -}; - struct l_ifmap { l_ulong mem_start; l_ulong mem_end; l_ushort base_addr; u_char irq; u_char dma; u_char port; } __packed; - -#define LINUX_IFHWADDRLEN 6 -#define LINUX_IFNAMSIZ 16 struct l_ifreq { union { char ifrn_name[LINUX_IFNAMSIZ]; } ifr_ifrn; union { struct l_sockaddr ifru_addr; struct l_sockaddr ifru_dstaddr; struct l_sockaddr ifru_broadaddr; struct l_sockaddr ifru_netmask; struct l_sockaddr ifru_hwaddr; l_short ifru_flags[1]; l_int ifru_ivalue; l_int ifru_mtu; struct l_ifmap ifru_map; char ifru_slave[LINUX_IFNAMSIZ]; l_uintptr_t ifru_data; } ifr_ifru; } __packed; #define ifr_name ifr_ifrn.ifrn_name /* Interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ #define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ struct l_ifconf { int ifc_len; union { l_uintptr_t ifcu_buf; l_uintptr_t ifcu_req; } ifc_ifcu; }; #define ifc_buf ifc_ifcu.ifcu_buf #define ifc_req ifc_ifcu.ifcu_req /* * poll() */ #define LINUX_POLLIN 0x0001 #define LINUX_POLLPRI 0x0002 #define LINUX_POLLOUT 0x0004 #define LINUX_POLLERR 0x0008 #define LINUX_POLLHUP 0x0010 #define LINUX_POLLNVAL 0x0020 #define LINUX_POLLRDNORM 0x0040 #define LINUX_POLLRDBAND 0x0080 #define LINUX_POLLWRNORM 0x0100 #define LINUX_POLLWRBAND 0x0200 #define LINUX_POLLMSG 0x0400 struct l_pollfd { l_int fd; l_short events; l_short revents; }; #define LINUX_ARCH_SET_GS 0x1001 #define LINUX_ARCH_SET_FS 0x1002 #define LINUX_ARCH_GET_FS 0x1003 #define LINUX_ARCH_GET_GS 0x1004 #define linux_copyout_rusage(r, u) copyout(r, u, sizeof(*r)) /* robust futexes */ struct linux_robust_list { l_uintptr_t next; }; struct linux_robust_list_head { struct linux_robust_list list; l_long futex_offset; l_uintptr_t pending_list; }; #endif /* !_AMD64_LINUX_H_ */ Index: projects/runtime-coverage-v2/sys/amd64/linux32/linux.h =================================================================== --- projects/runtime-coverage-v2/sys/amd64/linux32/linux.h (revision 347075) +++ projects/runtime-coverage-v2/sys/amd64/linux32/linux.h (revision 347076) @@ -1,656 +1,648 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2001 Doug Rabson * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _AMD64_LINUX_H_ #define _AMD64_LINUX_H_ #include #include #define LINUX_LEGACY_SYSCALLS /* * debugging support */ extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX32_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld/%ld): "#nm"("fmt")\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LMSG(fmt) "linux(%ld/%ld): "fmt"\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LINUX_DTRACE linuxulator32 #define LINUX32_MAXUSER ((1ul << 32) - PAGE_SIZE) #define LINUX32_SHAREDPAGE (LINUX32_MAXUSER - PAGE_SIZE) #define LINUX32_USRSTACK LINUX32_SHAREDPAGE /* XXX 16 = sizeof(linux32_ps_strings) */ #define LINUX32_PS_STRINGS (LINUX32_USRSTACK - 16) #define LINUX32_MAXDSIZ (512 * 1024 * 1024) /* 512MB */ #define LINUX32_MAXSSIZ (64 * 1024 * 1024) /* 64MB */ #define LINUX32_MAXVMEM 0 /* Unlimited */ #define PTRIN(v) (void *)(uintptr_t)(v) #define PTROUT(v) (l_uintptr_t)(uintptr_t)(v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define CP2(src,dst,sfld,dfld) do { (dst).dfld = (src).sfld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) /* * Provide a separate set of types for the Linux types. */ typedef int l_int; typedef int32_t l_long; typedef int64_t l_longlong; typedef short l_short; typedef unsigned int l_uint; typedef uint32_t l_ulong; typedef uint64_t l_ulonglong; typedef unsigned short l_ushort; typedef l_ulong l_uintptr_t; typedef l_long l_clock_t; typedef l_int l_daddr_t; typedef l_ushort l_dev_t; typedef l_uint l_gid_t; typedef l_ushort l_gid16_t; typedef l_ulong l_ino_t; typedef l_int l_key_t; typedef l_longlong l_loff_t; typedef l_ushort l_mode_t; typedef l_long l_off_t; typedef l_int l_pid_t; typedef l_uint l_size_t; typedef l_long l_suseconds_t; typedef l_long l_time_t; typedef l_uint l_uid_t; typedef l_ushort l_uid16_t; typedef l_int l_timer_t; typedef l_int l_mqd_t; typedef l_ulong l_fd_mask; typedef struct { l_int val[2]; } __packed l_fsid_t; typedef struct { l_time_t tv_sec; l_suseconds_t tv_usec; } l_timeval; #define l_fd_set fd_set /* * Miscellaneous */ #define LINUX_AT_COUNT 20 /* Count of used aux entry types. * Keep this synchronized with * linux_fixup_elf() code. */ struct l___sysctl_args { l_uintptr_t name; l_int nlen; l_uintptr_t oldval; l_uintptr_t oldlenp; l_uintptr_t newval; l_size_t newlen; l_ulong __spare[4]; } __packed; /* Resource limits */ #define LINUX_RLIMIT_CPU 0 #define LINUX_RLIMIT_FSIZE 1 #define LINUX_RLIMIT_DATA 2 #define LINUX_RLIMIT_STACK 3 #define LINUX_RLIMIT_CORE 4 #define LINUX_RLIMIT_RSS 5 #define LINUX_RLIMIT_NPROC 6 #define LINUX_RLIMIT_NOFILE 7 #define LINUX_RLIMIT_MEMLOCK 8 #define LINUX_RLIMIT_AS 9 /* Address space limit */ #define LINUX_RLIM_NLIMITS 10 struct l_rlimit { l_ulong rlim_cur; l_ulong rlim_max; } __packed; struct l_rusage { l_timeval ru_utime; l_timeval ru_stime; l_long ru_maxrss; l_long ru_ixrss; l_long ru_idrss; l_long ru_isrss; l_long ru_minflt; l_long ru_majflt; l_long ru_nswap; l_long ru_inblock; l_long ru_oublock; l_long ru_msgsnd; l_long ru_msgrcv; l_long ru_nsignals; l_long ru_nvcsw; l_long ru_nivcsw; } __packed; struct l_mmap_argv { l_uintptr_t addr; l_size_t len; l_int prot; l_int flags; l_int fd; l_ulong pgoff; }; /* * stat family of syscalls */ struct l_timespec { l_time_t tv_sec; l_long tv_nsec; } __packed; struct l_newstat { l_ushort st_dev; l_ushort __pad1; l_ulong st_ino; l_ushort st_mode; l_ushort st_nlink; l_ushort st_uid; l_ushort st_gid; l_ushort st_rdev; l_ushort __pad2; l_ulong st_size; l_ulong st_blksize; l_ulong st_blocks; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_ulong __unused4; l_ulong __unused5; } __packed; struct l_stat { l_ushort st_dev; l_ulong st_ino; l_ushort st_mode; l_ushort st_nlink; l_ushort st_uid; l_ushort st_gid; l_ushort st_rdev; l_long st_size; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_long st_blksize; l_long st_blocks; l_ulong st_flags; l_ulong st_gen; }; struct l_stat64 { l_ushort st_dev; u_char __pad0[10]; l_ulong __st_ino; l_uint st_mode; l_uint st_nlink; l_ulong st_uid; l_ulong st_gid; l_ushort st_rdev; u_char __pad3[10]; l_longlong st_size; l_ulong st_blksize; l_ulong st_blocks; l_ulong __pad4; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_ulonglong st_ino; } __packed; struct l_statfs64 { l_int f_type; l_int f_bsize; uint64_t f_blocks; uint64_t f_bfree; uint64_t f_bavail; uint64_t f_files; uint64_t f_ffree; l_fsid_t f_fsid; l_int f_namelen; l_int f_frsize; l_int f_flags; l_int f_spare[4]; } __packed; /* sigaction flags */ #define LINUX_SA_NOCLDSTOP 0x00000001 #define LINUX_SA_NOCLDWAIT 0x00000002 #define LINUX_SA_SIGINFO 0x00000004 #define LINUX_SA_RESTORER 0x04000000 #define LINUX_SA_ONSTACK 0x08000000 #define LINUX_SA_RESTART 0x10000000 #define LINUX_SA_INTERRUPT 0x20000000 #define LINUX_SA_NOMASK 0x40000000 #define LINUX_SA_ONESHOT 0x80000000 /* sigprocmask actions */ #define LINUX_SIG_BLOCK 0 #define LINUX_SIG_UNBLOCK 1 #define LINUX_SIG_SETMASK 2 /* sigaltstack */ #define LINUX_MINSIGSTKSZ 2048 typedef l_uintptr_t l_handler_t; typedef l_ulong l_osigset_t; typedef struct { l_handler_t lsa_handler; l_osigset_t lsa_mask; l_ulong lsa_flags; l_uintptr_t lsa_restorer; } __packed l_osigaction_t; typedef struct { l_handler_t lsa_handler; l_ulong lsa_flags; l_uintptr_t lsa_restorer; l_sigset_t lsa_mask; } __packed l_sigaction_t; typedef struct { l_uintptr_t ss_sp; l_int ss_flags; l_size_t ss_size; } __packed l_stack_t; /* The Linux sigcontext, pretty much a standard 386 trapframe. */ struct l_sigcontext { l_uint sc_gs; l_uint sc_fs; l_uint sc_es; l_uint sc_ds; l_uint sc_edi; l_uint sc_esi; l_uint sc_ebp; l_uint sc_esp; l_uint sc_ebx; l_uint sc_edx; l_uint sc_ecx; l_uint sc_eax; l_uint sc_trapno; l_uint sc_err; l_uint sc_eip; l_uint sc_cs; l_uint sc_eflags; l_uint sc_esp_at_signal; l_uint sc_ss; l_uint sc_387; l_uint sc_mask; l_uint sc_cr2; } __packed; struct l_ucontext { l_ulong uc_flags; l_uintptr_t uc_link; l_stack_t uc_stack; struct l_sigcontext uc_mcontext; l_sigset_t uc_sigmask; } __packed; #define LINUX_SI_MAX_SIZE 128 #define LINUX_SI_PAD_SIZE ((LINUX_SI_MAX_SIZE/sizeof(l_int)) - 3) typedef union l_sigval { l_int sival_int; l_uintptr_t sival_ptr; } l_sigval_t; typedef struct l_siginfo { l_int lsi_signo; l_int lsi_errno; l_int lsi_code; union { l_int _pad[LINUX_SI_PAD_SIZE]; struct { l_pid_t _pid; l_uid_t _uid; } __packed _kill; struct { l_timer_t _tid; l_int _overrun; char _pad[sizeof(l_uid_t) - sizeof(l_int)]; l_sigval_t _sigval; l_int _sys_private; } __packed _timer; struct { l_pid_t _pid; /* sender's pid */ l_uid_t _uid; /* sender's uid */ l_sigval_t _sigval; } __packed _rt; struct { l_pid_t _pid; /* which child */ l_uid_t _uid; /* sender's uid */ l_int _status; /* exit code */ l_clock_t _utime; l_clock_t _stime; } __packed _sigchld; struct { l_uintptr_t _addr; /* Faulting insn/memory ref. */ } __packed _sigfault; struct { l_long _band; /* POLL_IN,POLL_OUT,POLL_MSG */ l_int _fd; } __packed _sigpoll; } _sifields; } __packed l_siginfo_t; #define lsi_pid _sifields._kill._pid #define lsi_uid _sifields._kill._uid #define lsi_tid _sifields._timer._tid #define lsi_overrun _sifields._timer._overrun #define lsi_sys_private _sifields._timer._sys_private #define lsi_status _sifields._sigchld._status #define lsi_utime _sifields._sigchld._utime #define lsi_stime _sifields._sigchld._stime #define lsi_value _sifields._rt._sigval #define lsi_int _sifields._rt._sigval.sival_int #define lsi_ptr _sifields._rt._sigval.sival_ptr #define lsi_addr _sifields._sigfault._addr #define lsi_band _sifields._sigpoll._band #define lsi_fd _sifields._sigpoll._fd struct l_fpreg { u_int16_t significand[4]; u_int16_t exponent; } __packed; struct l_fpxreg { u_int16_t significand[4]; u_int16_t exponent; u_int16_t padding[3]; } __packed; struct l_xmmreg { u_int32_t element[4]; } __packed; struct l_fpstate { /* Regular FPU environment */ u_int32_t cw; u_int32_t sw; u_int32_t tag; u_int32_t ipoff; u_int32_t cssel; u_int32_t dataoff; u_int32_t datasel; struct l_fpreg _st[8]; u_int16_t status; u_int16_t magic; /* 0xffff = regular FPU data */ /* FXSR FPU environment */ u_int32_t _fxsr_env[6]; /* env is ignored. */ u_int32_t mxcsr; u_int32_t reserved; struct l_fpxreg _fxsr_st[8]; /* reg data is ignored. */ struct l_xmmreg _xmm[8]; u_int32_t padding[56]; } __packed; /* * We make the stack look like Linux expects it when calling a signal * handler, but use the BSD way of calling the handler and sigreturn(). * This means that we need to pass the pointer to the handler too. * It is appended to the frame to not interfere with the rest of it. */ struct l_sigframe { l_int sf_sig; struct l_sigcontext sf_sc; struct l_fpstate sf_fpstate; l_uint sf_extramask[1]; l_handler_t sf_handler; } __packed; struct l_rt_sigframe { l_int sf_sig; l_uintptr_t sf_siginfo; l_uintptr_t sf_ucontext; l_siginfo_t sf_si; struct l_ucontext sf_sc; l_handler_t sf_handler; } __packed; /* * arch specific open/fcntl flags */ #define LINUX_F_GETLK64 12 #define LINUX_F_SETLK64 13 #define LINUX_F_SETLKW64 14 union l_semun { l_int val; l_uintptr_t buf; l_uintptr_t array; l_uintptr_t __buf; l_uintptr_t __pad; } __packed; -struct l_sockaddr { - l_ushort sa_family; - char sa_data[14]; -} __packed; - struct l_ifmap { l_ulong mem_start; l_ulong mem_end; l_ushort base_addr; u_char irq; u_char dma; u_char port; } __packed; - -#define LINUX_IFHWADDRLEN 6 -#define LINUX_IFNAMSIZ 16 struct l_ifreq { union { char ifrn_name[LINUX_IFNAMSIZ]; } ifr_ifrn; union { struct l_sockaddr ifru_addr; struct l_sockaddr ifru_dstaddr; struct l_sockaddr ifru_broadaddr; struct l_sockaddr ifru_netmask; struct l_sockaddr ifru_hwaddr; l_short ifru_flags[1]; l_int ifru_ivalue; l_int ifru_mtu; struct l_ifmap ifru_map; char ifru_slave[LINUX_IFNAMSIZ]; l_uintptr_t ifru_data; } ifr_ifru; } __packed; #define ifr_name ifr_ifrn.ifrn_name /* Interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ #define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ struct l_ifconf { int ifc_len; union { l_uintptr_t ifcu_buf; l_uintptr_t ifcu_req; } ifc_ifcu; } __packed; #define ifc_buf ifc_ifcu.ifcu_buf #define ifc_req ifc_ifcu.ifcu_req /* * poll() */ #define LINUX_POLLIN 0x0001 #define LINUX_POLLPRI 0x0002 #define LINUX_POLLOUT 0x0004 #define LINUX_POLLERR 0x0008 #define LINUX_POLLHUP 0x0010 #define LINUX_POLLNVAL 0x0020 #define LINUX_POLLRDNORM 0x0040 #define LINUX_POLLRDBAND 0x0080 #define LINUX_POLLWRNORM 0x0100 #define LINUX_POLLWRBAND 0x0200 #define LINUX_POLLMSG 0x0400 struct l_pollfd { l_int fd; l_short events; l_short revents; } __packed; struct l_user_desc { l_uint entry_number; l_uint base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; #define LINUX_LOWERWORD 0x0000ffff /* * Macros which does the same thing as those in Linux include/asm-um/ldt-i386.h. * These convert Linux user space descriptor to machine one. */ #define LINUX_LDT_entry_a(info) \ ((((info)->base_addr & LINUX_LOWERWORD) << 16) | \ ((info)->limit & LINUX_LOWERWORD)) #define LINUX_ENTRY_B_READ_EXEC_ONLY 9 #define LINUX_ENTRY_B_CONTENTS 10 #define LINUX_ENTRY_B_SEG_NOT_PRESENT 15 #define LINUX_ENTRY_B_BASE_ADDR 16 #define LINUX_ENTRY_B_USEABLE 20 #define LINUX_ENTRY_B_SEG32BIT 22 #define LINUX_ENTRY_B_LIMIT 23 #define LINUX_LDT_entry_b(info) \ (((info)->base_addr & 0xff000000) | \ ((info)->limit & 0xf0000) | \ ((info)->contents << LINUX_ENTRY_B_CONTENTS) | \ (((info)->seg_not_present == 0) << LINUX_ENTRY_B_SEG_NOT_PRESENT) | \ (((info)->base_addr & 0x00ff0000) >> LINUX_ENTRY_B_BASE_ADDR) | \ (((info)->read_exec_only == 0) << LINUX_ENTRY_B_READ_EXEC_ONLY) | \ ((info)->seg_32bit << LINUX_ENTRY_B_SEG32BIT) | \ ((info)->useable << LINUX_ENTRY_B_USEABLE) | \ ((info)->limit_in_pages << LINUX_ENTRY_B_LIMIT) | 0x7000) #define LINUX_LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ (info)->seg_not_present == 1 && \ (info)->read_exec_only == 1 && \ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ (info)->useable == 0) /* * Macros for converting segments. * They do the same as those in arch/i386/kernel/process.c in Linux. */ #define LINUX_GET_BASE(desc) \ ((((desc)->a >> 16) & LINUX_LOWERWORD) | \ (((desc)->b << 16) & 0x00ff0000) | \ ((desc)->b & 0xff000000)) #define LINUX_GET_LIMIT(desc) \ (((desc)->a & LINUX_LOWERWORD) | \ ((desc)->b & 0xf0000)) #define LINUX_GET_32BIT(desc) \ (((desc)->b >> LINUX_ENTRY_B_SEG32BIT) & 1) #define LINUX_GET_CONTENTS(desc) \ (((desc)->b >> LINUX_ENTRY_B_CONTENTS) & 3) #define LINUX_GET_WRITABLE(desc) \ (((desc)->b >> LINUX_ENTRY_B_READ_EXEC_ONLY) & 1) #define LINUX_GET_LIMIT_PAGES(desc) \ (((desc)->b >> LINUX_ENTRY_B_LIMIT) & 1) #define LINUX_GET_PRESENT(desc) \ (((desc)->b >> LINUX_ENTRY_B_SEG_NOT_PRESENT) & 1) #define LINUX_GET_USEABLE(desc) \ (((desc)->b >> LINUX_ENTRY_B_USEABLE) & 1) struct iovec; struct uio; struct l_iovec32 { uint32_t iov_base; l_size_t iov_len; }; int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, int error); int linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop); int linux_copyout_rusage(struct rusage *ru, void *uaddr); /* robust futexes */ struct linux_robust_list { l_uintptr_t next; }; struct linux_robust_list_head { struct linux_robust_list list; l_long futex_offset; l_uintptr_t pending_list; }; #endif /* !_AMD64_LINUX_H_ */ Index: projects/runtime-coverage-v2/sys/amd64/vmm/vmm_instruction_emul.c =================================================================== --- projects/runtime-coverage-v2/sys/amd64/vmm/vmm_instruction_emul.c (revision 347075) +++ projects/runtime-coverage-v2/sys/amd64/vmm/vmm_instruction_emul.c (revision 347076) @@ -1,2546 +1,2646 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Sandvine, Inc. * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #ifdef _KERNEL #include #include #include #include #include #include #include #include #else /* !_KERNEL */ #include #include #include #include #include #include #define KASSERT(exp,msg) assert((exp)) #endif /* _KERNEL */ #include #include #include /* struct vie_op.op_type */ enum { VIE_OP_TYPE_NONE = 0, VIE_OP_TYPE_MOV, VIE_OP_TYPE_MOVSX, VIE_OP_TYPE_MOVZX, VIE_OP_TYPE_AND, VIE_OP_TYPE_OR, VIE_OP_TYPE_SUB, VIE_OP_TYPE_TWO_BYTE, VIE_OP_TYPE_PUSH, VIE_OP_TYPE_CMP, VIE_OP_TYPE_POP, VIE_OP_TYPE_MOVS, VIE_OP_TYPE_GROUP1, VIE_OP_TYPE_STOS, VIE_OP_TYPE_BITTEST, VIE_OP_TYPE_TWOB_GRP15, + VIE_OP_TYPE_ADD, VIE_OP_TYPE_LAST }; /* struct vie_op.op_flags */ #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ #define VIE_OP_F_NO_MODRM (1 << 3) #define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4) static const struct vie_op two_byte_opcodes[256] = { [0xAE] = { .op_byte = 0xAE, .op_type = VIE_OP_TYPE_TWOB_GRP15, }, [0xB6] = { .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, }, [0xB7] = { .op_byte = 0xB7, .op_type = VIE_OP_TYPE_MOVZX, }, [0xBA] = { .op_byte = 0xBA, .op_type = VIE_OP_TYPE_BITTEST, .op_flags = VIE_OP_F_IMM8, }, [0xBE] = { .op_byte = 0xBE, .op_type = VIE_OP_TYPE_MOVSX, }, }; static const struct vie_op one_byte_opcodes[256] = { + [0x03] = { + .op_byte = 0x03, + .op_type = VIE_OP_TYPE_ADD, + }, [0x0F] = { .op_byte = 0x0F, .op_type = VIE_OP_TYPE_TWO_BYTE }, [0x0B] = { .op_byte = 0x0B, .op_type = VIE_OP_TYPE_OR, }, [0x2B] = { .op_byte = 0x2B, .op_type = VIE_OP_TYPE_SUB, }, [0x39] = { .op_byte = 0x39, .op_type = VIE_OP_TYPE_CMP, }, [0x3B] = { .op_byte = 0x3B, .op_type = VIE_OP_TYPE_CMP, }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, }, [0x89] = { .op_byte = 0x89, .op_type = VIE_OP_TYPE_MOV, }, [0x8A] = { .op_byte = 0x8A, .op_type = VIE_OP_TYPE_MOV, }, [0x8B] = { .op_byte = 0x8B, .op_type = VIE_OP_TYPE_MOV, }, [0xA1] = { .op_byte = 0xA1, .op_type = VIE_OP_TYPE_MOV, .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, }, [0xA3] = { .op_byte = 0xA3, .op_type = VIE_OP_TYPE_MOV, .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, }, [0xA4] = { .op_byte = 0xA4, .op_type = VIE_OP_TYPE_MOVS, .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION }, [0xA5] = { .op_byte = 0xA5, .op_type = VIE_OP_TYPE_MOVS, .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION }, [0xAA] = { .op_byte = 0xAA, .op_type = VIE_OP_TYPE_STOS, .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION }, [0xAB] = { .op_byte = 0xAB, .op_type = VIE_OP_TYPE_STOS, .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, .op_type = VIE_OP_TYPE_MOV, .op_flags = VIE_OP_F_IMM8, }, [0xC7] = { .op_byte = 0xC7, .op_type = VIE_OP_TYPE_MOV, .op_flags = VIE_OP_F_IMM, }, [0x23] = { .op_byte = 0x23, .op_type = VIE_OP_TYPE_AND, }, [0x80] = { /* Group 1 extended opcode */ .op_byte = 0x80, .op_type = VIE_OP_TYPE_GROUP1, .op_flags = VIE_OP_F_IMM8, }, [0x81] = { /* Group 1 extended opcode */ .op_byte = 0x81, .op_type = VIE_OP_TYPE_GROUP1, .op_flags = VIE_OP_F_IMM, }, [0x83] = { /* Group 1 extended opcode */ .op_byte = 0x83, .op_type = VIE_OP_TYPE_GROUP1, .op_flags = VIE_OP_F_IMM8, }, [0x8F] = { /* XXX Group 1A extended opcode - not just POP */ .op_byte = 0x8F, .op_type = VIE_OP_TYPE_POP, }, [0xFF] = { /* XXX Group 5 extended opcode - not just PUSH */ .op_byte = 0xFF, .op_type = VIE_OP_TYPE_PUSH, } }; /* struct vie.mod */ #define VIE_MOD_INDIRECT 0 #define VIE_MOD_INDIRECT_DISP8 1 #define VIE_MOD_INDIRECT_DISP32 2 #define VIE_MOD_DIRECT 3 /* struct vie.rm */ #define VIE_RM_SIB 4 #define VIE_RM_DISP32 5 #define GB (1024 * 1024 * 1024) static enum vm_reg_name gpr_map[16] = { VM_REG_GUEST_RAX, VM_REG_GUEST_RCX, VM_REG_GUEST_RDX, VM_REG_GUEST_RBX, VM_REG_GUEST_RSP, VM_REG_GUEST_RBP, VM_REG_GUEST_RSI, VM_REG_GUEST_RDI, VM_REG_GUEST_R8, VM_REG_GUEST_R9, VM_REG_GUEST_R10, VM_REG_GUEST_R11, VM_REG_GUEST_R12, VM_REG_GUEST_R13, VM_REG_GUEST_R14, VM_REG_GUEST_R15 }; static uint64_t size2mask[] = { [1] = 0xff, [2] = 0xffff, [4] = 0xffffffff, [8] = 0xffffffffffffffff, }; static int vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) { int error; error = vm_get_register(vm, vcpuid, reg, rval); return (error); } static void vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) { *lhbr = 0; *reg = gpr_map[vie->reg]; /* * 64-bit mode imposes limitations on accessing legacy high byte * registers (lhbr). * * The legacy high-byte registers cannot be addressed if the REX * prefix is present. In this case the values 4, 5, 6 and 7 of the * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. * * If the REX prefix is not present then the values 4, 5, 6 and 7 * of the 'ModRM:reg' field address the legacy high-byte registers, * %ah, %ch, %dh and %bh respectively. */ if (!vie->rex_present) { if (vie->reg & 0x4) { *lhbr = 1; *reg = gpr_map[vie->reg & 0x3]; } } } static int vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) { uint64_t val; int error, lhbr; enum vm_reg_name reg; vie_calc_bytereg(vie, ®, &lhbr); error = vm_get_register(vm, vcpuid, reg, &val); /* * To obtain the value of a legacy high byte register shift the * base register right by 8 bits (%ah = %rax >> 8). */ if (lhbr) *rval = val >> 8; else *rval = val; return (error); } static int vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) { uint64_t origval, val, mask; int error, lhbr; enum vm_reg_name reg; vie_calc_bytereg(vie, ®, &lhbr); error = vm_get_register(vm, vcpuid, reg, &origval); if (error == 0) { val = byte; mask = 0xff; if (lhbr) { /* * Shift left by 8 to store 'byte' in a legacy high * byte register. */ val <<= 8; mask <<= 8; } val |= origval & ~mask; error = vm_set_register(vm, vcpuid, reg, val); } return (error); } int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size) { int error; uint64_t origval; switch (size) { case 1: case 2: error = vie_read_register(vm, vcpuid, reg, &origval); if (error) return (error); val &= size2mask[size]; val |= origval & ~size2mask[size]; break; case 4: val &= 0xffffffffUL; break; case 8: break; default: return (EINVAL); } error = vm_set_register(vm, vcpuid, reg, val); return (error); } #define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) /* * Return the status flags that would result from doing (x - y). */ #define GETCC(sz) \ static u_long \ getcc##sz(uint##sz##_t x, uint##sz##_t y) \ { \ u_long rflags; \ \ __asm __volatile("sub %2,%1; pushfq; popq %0" : \ "=r" (rflags), "+r" (x) : "m" (y)); \ return (rflags); \ } struct __hack GETCC(8); GETCC(16); GETCC(32); GETCC(64); static u_long getcc(int opsize, uint64_t x, uint64_t y) { KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, ("getcc: invalid operand size %d", opsize)); if (opsize == 1) return (getcc8(x, y)); else if (opsize == 2) return (getcc16(x, y)); else if (opsize == 4) return (getcc32(x, y)); else return (getcc64(x, y)); } +/* + * Macro creation of functions getaddflags{8,16,32,64} + */ +#define GETADDFLAGS(sz) \ +static u_long \ +getaddflags##sz(uint##sz##_t x, uint##sz##_t y) \ +{ \ + u_long rflags; \ + \ + __asm __volatile("add %2,%1; pushfq; popq %0" : \ + "=r" (rflags), "+r" (x) : "m" (y)); \ + return (rflags); \ +} struct __hack + +GETADDFLAGS(8); +GETADDFLAGS(16); +GETADDFLAGS(32); +GETADDFLAGS(64); + +static u_long +getaddflags(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, + ("getaddflags: invalid operand size %d", opsize)); + + if (opsize == 1) + return (getaddflags8(x, y)); + else if (opsize == 2) + return (getaddflags16(x, y)); + else if (opsize == 4) + return (getaddflags32(x, y)); + else + return (getaddflags64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; enum vm_reg_name reg; uint8_t byte; uint64_t val; size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { case 0x88: /* * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) * 88/r: mov r/m8, r8 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) */ size = 1; /* override for byte operation */ error = vie_read_bytereg(vm, vcpuid, vie, &byte); if (error == 0) error = memwrite(vm, vcpuid, gpa, byte, size, arg); break; case 0x89: /* * MOV from reg (ModRM:reg) to mem (ModRM:r/m) * 89/r: mov r/m16, r16 * 89/r: mov r/m32, r32 * REX.W + 89/r mov r/m64, r64 */ reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, &val); if (error == 0) { val &= size2mask[size]; error = memwrite(vm, vcpuid, gpa, val, size, arg); } break; case 0x8A: /* * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) * 8A/r: mov r8, r/m8 * REX + 8A/r: mov r8, r/m8 */ size = 1; /* override for byte operation */ error = memread(vm, vcpuid, gpa, &val, size, arg); if (error == 0) error = vie_write_bytereg(vm, vcpuid, vie, val); break; case 0x8B: /* * MOV from mem (ModRM:r/m) to reg (ModRM:reg) * 8B/r: mov r16, r/m16 * 8B/r: mov r32, r/m32 * REX.W 8B/r: mov r64, r/m64 */ error = memread(vm, vcpuid, gpa, &val, size, arg); if (error == 0) { reg = gpr_map[vie->reg]; error = vie_update_register(vm, vcpuid, reg, val, size); } break; case 0xA1: /* * MOV from seg:moffset to AX/EAX/RAX * A1: mov AX, moffs16 * A1: mov EAX, moffs32 * REX.W + A1: mov RAX, moffs64 */ error = memread(vm, vcpuid, gpa, &val, size, arg); if (error == 0) { reg = VM_REG_GUEST_RAX; error = vie_update_register(vm, vcpuid, reg, val, size); } break; case 0xA3: /* * MOV from AX/EAX/RAX to seg:moffset * A3: mov moffs16, AX * A3: mov moffs32, EAX * REX.W + A3: mov moffs64, RAX */ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); if (error == 0) { val &= size2mask[size]; error = memwrite(vm, vcpuid, gpa, val, size, arg); } break; case 0xC6: /* * MOV from imm8 to mem (ModRM:r/m) * C6/0 mov r/m8, imm8 * REX + C6/0 mov r/m8, imm8 */ size = 1; /* override for byte operation */ error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); break; case 0xC7: /* * MOV from imm16/imm32 to mem (ModRM:r/m) * C7/0 mov r/m16, imm16 * C7/0 mov r/m32, imm32 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) */ val = vie->immediate & size2mask[size]; error = memwrite(vm, vcpuid, gpa, val, size, arg); break; default: break; } return (error); } static int emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; enum vm_reg_name reg; uint64_t val; size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { case 0xB6: /* * MOV and zero extend byte from mem (ModRM:r/m) to * reg (ModRM:reg). * * 0F B6/r movzx r16, r/m8 * 0F B6/r movzx r32, r/m8 * REX.W + 0F B6/r movzx r64, r/m8 */ /* get the first operand */ error = memread(vm, vcpuid, gpa, &val, 1, arg); if (error) break; /* get the second operand */ reg = gpr_map[vie->reg]; /* zero-extend byte */ val = (uint8_t)val; /* write the result */ error = vie_update_register(vm, vcpuid, reg, val, size); break; case 0xB7: /* * MOV and zero extend word from mem (ModRM:r/m) to * reg (ModRM:reg). * * 0F B7/r movzx r32, r/m16 * REX.W + 0F B7/r movzx r64, r/m16 */ error = memread(vm, vcpuid, gpa, &val, 2, arg); if (error) return (error); reg = gpr_map[vie->reg]; /* zero-extend word */ val = (uint16_t)val; error = vie_update_register(vm, vcpuid, reg, val, size); break; case 0xBE: /* * MOV and sign extend byte from mem (ModRM:r/m) to * reg (ModRM:reg). * * 0F BE/r movsx r16, r/m8 * 0F BE/r movsx r32, r/m8 * REX.W + 0F BE/r movsx r64, r/m8 */ /* get the first operand */ error = memread(vm, vcpuid, gpa, &val, 1, arg); if (error) break; /* get the second operand */ reg = gpr_map[vie->reg]; /* sign extend byte */ val = (int8_t)val; /* write the result */ error = vie_update_register(vm, vcpuid, reg, val, size); break; default: break; } return (error); } /* * Helper function to calculate and validate a linear address. */ static int get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, int opsize, int addrsize, int prot, enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla, int *fault) { struct seg_desc desc; uint64_t cr0, val, rflags; int error; error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); error = vm_get_seg_desc(vm, vcpuid, seg, &desc); KASSERT(error == 0, ("%s: error %d getting segment descriptor %d", __func__, error, seg)); error = vie_read_register(vm, vcpuid, gpr, &val); KASSERT(error == 0, ("%s: error %d getting register %d", __func__, error, gpr)); if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize, addrsize, prot, gla)) { if (seg == VM_REG_GUEST_SS) vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); goto guest_fault; } if (vie_canonical_check(paging->cpu_mode, *gla)) { if (seg == VM_REG_GUEST_SS) vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); goto guest_fault; } if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { vm_inject_ac(vm, vcpuid, 0); goto guest_fault; } *fault = 0; return (0); guest_fault: *fault = 1; return (0); } static int emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { #ifdef _KERNEL struct vm_copyinfo copyinfo[2]; #else struct iovec copyinfo[2]; #endif uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val; uint64_t rcx, rdi, rsi, rflags; int error, fault, opsize, seg, repeat; opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; val = 0; error = 0; /* * XXX although the MOVS instruction is only supposed to be used with * the "rep" prefix some guests like FreeBSD will use "repnz" instead. * * Empirically the "repnz" prefix has identical behavior to "rep" * and the zero flag does not make a difference. */ repeat = vie->repz_present | vie->repnz_present; if (repeat) { error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* * The count register is %rcx, %ecx or %cx depending on the * address size of the instruction. */ if ((rcx & vie_size2mask(vie->addrsize)) == 0) { error = 0; goto done; } } /* * Source Destination Comments * -------------------------------------------- * (1) memory memory n/a * (2) memory mmio emulated * (3) mmio memory emulated * (4) mmio mmio emulated * * At this point we don't have sufficient information to distinguish * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this * out because it will succeed only when operating on regular memory. * * XXX the emulation doesn't properly handle the case where 'gpa' * is straddling the boundary between the normal memory and MMIO. */ seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); if (error || fault) goto done; error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, copyinfo, nitems(copyinfo), &fault); if (error == 0) { if (fault) goto done; /* Resume guest to handle fault */ /* * case (2): read from system memory and write to mmio. */ vm_copyin(vm, vcpuid, copyinfo, &val, opsize); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); error = memwrite(vm, vcpuid, gpa, val, opsize, arg); if (error) goto done; } else { /* * 'vm_copy_setup()' is expected to fail for cases (3) and (4) * if 'srcaddr' is in the mmio space. */ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, &fault); if (error || fault) goto done; error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, PROT_WRITE, copyinfo, nitems(copyinfo), &fault); if (error == 0) { if (fault) goto done; /* Resume guest to handle fault */ /* * case (3): read from MMIO and write to system memory. * * A MMIO read can have side-effects so we * commit to it only after vm_copy_setup() is * successful. If a page-fault needs to be * injected into the guest then it will happen * before the MMIO read is attempted. */ error = memread(vm, vcpuid, gpa, &val, opsize, arg); if (error) goto done; vm_copyout(vm, vcpuid, &val, copyinfo, opsize); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); } else { /* * Case (4): read from and write to mmio. * * Commit to the MMIO read/write (with potential * side-effects) only after we are sure that the * instruction is not going to be restarted due * to address translation faults. */ error = vm_gla2gpa(vm, vcpuid, paging, srcaddr, PROT_READ, &srcgpa, &fault); if (error || fault) goto done; error = vm_gla2gpa(vm, vcpuid, paging, dstaddr, PROT_WRITE, &dstgpa, &fault); if (error || fault) goto done; error = memread(vm, vcpuid, srcgpa, &val, opsize, arg); if (error) goto done; error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg); if (error) goto done; } } error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) { rsi -= opsize; rdi -= opsize; } else { rsi += opsize; rdi += opsize; } error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error)); error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); if (repeat) { rcx = rcx - 1; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, rcx, vie->addrsize); KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); /* * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) vm_restart_instruction(vm, vcpuid); } done: KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d", __func__, error)); return (error); } static int emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, opsize, repeat; uint64_t val; uint64_t rcx, rdi, rflags; opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize; repeat = vie->repz_present | vie->repnz_present; if (repeat) { error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* * The count register is %rcx, %ecx or %cx depending on the * address size of the instruction. */ if ((rcx & vie_size2mask(vie->addrsize)) == 0) return (0); } error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); KASSERT(!error, ("%s: error %d getting rax", __func__, error)); error = memwrite(vm, vcpuid, gpa, val, opsize, arg); if (error) return (error); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) rdi -= opsize; else rdi += opsize; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); if (repeat) { rcx = rcx - 1; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, rcx, vie->addrsize); KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); /* * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) vm_restart_instruction(vm, vcpuid); } return (0); } static int emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; enum vm_reg_name reg; uint64_t result, rflags, rflags2, val1, val2; size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { case 0x23: /* * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the * result in reg. * * 23/r and r16, r/m16 * 23/r and r32, r/m32 * REX.W + 23/r and r64, r/m64 */ /* get the first operand */ reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ error = memread(vm, vcpuid, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ result = val1 & val2; error = vie_update_register(vm, vcpuid, reg, result, size); break; case 0x81: case 0x83: /* * AND mem (ModRM:r/m) with immediate and store the * result in mem. * * 81 /4 and r/m16, imm16 * 81 /4 and r/m32, imm32 * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64 * * 83 /4 and r/m16, imm8 sign-extended to 16 * 83 /4 and r/m32, imm8 sign-extended to 32 * REX.W + 83/4 and r/m64, imm8 sign-extended to 64 */ /* get the first operand */ error = memread(vm, vcpuid, gpa, &val1, size, arg); if (error) break; /* * perform the operation with the pre-fetched immediate * operand and write the result */ result = val1 & vie->immediate; error = memwrite(vm, vcpuid, gpa, result, size, arg); break; default: break; } if (error) return (error); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); /* * OF and CF are cleared; the SF, ZF and PF flags are set according * to the result; AF is undefined. * * The updated status flags are obtained by subtracting 0 from 'result'. */ rflags2 = getcc(size, result, 0); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; enum vm_reg_name reg; uint64_t result, rflags, rflags2, val1, val2; size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { case 0x0B: /* * OR reg (ModRM:reg) and mem (ModRM:r/m) and store the * result in reg. * * 0b/r or r16, r/m16 * 0b/r or r32, r/m32 * REX.W + 0b/r or r64, r/m64 */ /* get the first operand */ reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ error = memread(vm, vcpuid, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ result = val1 | val2; error = vie_update_register(vm, vcpuid, reg, result, size); break; case 0x81: case 0x83: /* * OR mem (ModRM:r/m) with immediate and store the * result in mem. * * 81 /1 or r/m16, imm16 * 81 /1 or r/m32, imm32 * REX.W + 81 /1 or r/m64, imm32 sign-extended to 64 * * 83 /1 or r/m16, imm8 sign-extended to 16 * 83 /1 or r/m32, imm8 sign-extended to 32 * REX.W + 83/1 or r/m64, imm8 sign-extended to 64 */ /* get the first operand */ error = memread(vm, vcpuid, gpa, &val1, size, arg); if (error) break; /* * perform the operation with the pre-fetched immediate * operand and write the result */ result = val1 | vie->immediate; error = memwrite(vm, vcpuid, gpa, result, size, arg); break; default: break; } if (error) return (error); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); /* * OF and CF are cleared; the SF, ZF and PF flags are set according * to the result; AF is undefined. * * The updated status flags are obtained by subtracting 0 from 'result'. */ rflags2 = getcc(size, result, 0); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; uint64_t regop, memop, op1, op2, rflags, rflags2; enum vm_reg_name reg; size = vie->opsize; switch (vie->op.op_byte) { case 0x39: case 0x3B: /* * 39/r CMP r/m16, r16 * 39/r CMP r/m32, r32 * REX.W 39/r CMP r/m64, r64 * * 3B/r CMP r16, r/m16 * 3B/r CMP r32, r/m32 * REX.W + 3B/r CMP r64, r/m64 * * Compare the first operand with the second operand and * set status flags in EFLAGS register. The comparison is * performed by subtracting the second operand from the first * operand and then setting the status flags. */ /* Get the register operand */ reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, ®op); if (error) return (error); /* Get the memory operand */ error = memread(vm, vcpuid, gpa, &memop, size, arg); if (error) return (error); if (vie->op.op_byte == 0x3B) { op1 = regop; op2 = memop; } else { op1 = memop; op2 = regop; } rflags2 = getcc(size, op1, op2); break; case 0x80: case 0x81: case 0x83: /* * 80 /7 cmp r/m8, imm8 * REX + 80 /7 cmp r/m8, imm8 * * 81 /7 cmp r/m16, imm16 * 81 /7 cmp r/m32, imm32 * REX.W + 81 /7 cmp r/m64, imm32 sign-extended to 64 * * 83 /7 cmp r/m16, imm8 sign-extended to 16 * 83 /7 cmp r/m32, imm8 sign-extended to 32 * REX.W + 83 /7 cmp r/m64, imm8 sign-extended to 64 * * Compare mem (ModRM:r/m) with immediate and set * status flags according to the results. The * comparison is performed by subtracting the * immediate from the first operand and then setting * the status flags. * */ if (vie->op.op_byte == 0x80) size = 1; /* get the first operand */ error = memread(vm, vcpuid, gpa, &op1, size, arg); if (error) return (error); rflags2 = getcc(size, op1, vie->immediate); break; default: return (EINVAL); } error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & RFLAGS_STATUS_BITS; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int +emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t nval, rflags, rflags2, val1, val2; + enum vm_reg_name reg; + + size = vie->opsize; + error = EINVAL; + + switch (vie->op.op_byte) { + case 0x03: + /* + * ADD r/m to r and store the result in r + * + * 03/r ADD r16, r/m16 + * 03/r ADD r32, r/m32 + * REX.W + 03/r ADD r64, r/m64 + */ + + /* get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vm, vcpuid, reg, &val1); + if (error) + break; + + /* get the second operand */ + error = memread(vm, vcpuid, gpa, &val2, size, arg); + if (error) + break; + + /* perform the operation and write the result */ + nval = val1 + val2; + error = vie_update_register(vm, vcpuid, reg, nval, size); + break; + default: + break; + } + + if (!error) { + rflags2 = getaddflags(size, val1, val2); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + &rflags); + if (error) + return (error); + + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + rflags, 8); + } + + return (error); +} + +static int emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; uint64_t nval, rflags, rflags2, val1, val2; enum vm_reg_name reg; size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { case 0x2B: /* * SUB r/m from r and store the result in r * * 2B/r SUB r16, r/m16 * 2B/r SUB r32, r/m32 * REX.W + 2B/r SUB r64, r/m64 */ /* get the first operand */ reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ error = memread(vm, vcpuid, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ nval = val1 - val2; error = vie_update_register(vm, vcpuid, reg, nval, size); break; default: break; } if (!error) { rflags2 = getcc(size, val1, val2); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & RFLAGS_STATUS_BITS; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); } return (error); } static int emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { #ifdef _KERNEL struct vm_copyinfo copyinfo[2]; #else struct iovec copyinfo[2]; #endif struct seg_desc ss_desc; uint64_t cr0, rflags, rsp, stack_gla, val; int error, fault, size, stackaddrsize, pushop; val = 0; size = vie->opsize; pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0; /* * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 */ if (paging->cpu_mode == CPU_MODE_REAL) { stackaddrsize = 2; } else if (paging->cpu_mode == CPU_MODE_64BIT) { /* * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 * - Stack pointer size is always 64-bits. * - PUSH/POP of 32-bit values is not possible in 64-bit mode. * - 16-bit PUSH/POP is supported by using the operand size * override prefix (66H). */ stackaddrsize = 8; size = vie->opsize_override ? 2 : 8; } else { /* * In protected or compatibility mode the 'B' flag in the * stack-segment descriptor determines the size of the * stack pointer. */ error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); KASSERT(error == 0, ("%s: error %d getting SS descriptor", __func__, error)); if (SEG_DESC_DEF32(ss_desc.access)) stackaddrsize = 4; else stackaddrsize = 2; } error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); if (pushop) { rsp -= size; } if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ, &stack_gla)) { vm_inject_ss(vm, vcpuid, 0); return (0); } if (vie_canonical_check(paging->cpu_mode, stack_gla)) { vm_inject_ss(vm, vcpuid, 0); return (0); } if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { vm_inject_ac(vm, vcpuid, 0); return (0); } error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo), &fault); if (error || fault) return (error); if (pushop) { error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); if (error == 0) vm_copyout(vm, vcpuid, &val, copyinfo, size); } else { vm_copyin(vm, vcpuid, copyinfo, &val, size); error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); rsp += size; } vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); if (error == 0) { error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, stackaddrsize); KASSERT(error == 0, ("error %d updating rsp", error)); } return (error); } static int emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error; /* * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. * * PUSH is part of the group 5 extended opcodes and is identified * by ModRM:reg = b110. */ if ((vie->reg & 7) != 6) return (EINVAL); error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, memwrite, arg); return (error); } static int emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error; /* * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. * * POP is part of the group 1A extended opcodes and is identified * by ModRM:reg = b000. */ if ((vie->reg & 7) != 0) return (EINVAL); error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, memwrite, arg); return (error); } static int emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { int error; switch (vie->reg & 7) { case 0x1: /* OR */ error = emulate_or(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case 0x4: /* AND */ error = emulate_and(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case 0x7: /* CMP */ error = emulate_cmp(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; default: error = EINVAL; break; } return (error); } static int emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { uint64_t val, rflags; int error, bitmask, bitoff; /* * 0F BA is a Group 8 extended opcode. * * Currently we only emulate the 'Bit Test' instruction which is * identified by a ModR/M:reg encoding of 100b. */ if ((vie->reg & 7) != 4) return (EINVAL); error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg); if (error) return (error); /* * Intel SDM, Vol 2, Table 3-2: * "Range of Bit Positions Specified by Bit Offset Operands" */ bitmask = vie->opsize * 8 - 1; bitoff = vie->immediate & bitmask; /* Copy the bit into the Carry flag in %rflags */ if (val & (1UL << bitoff)) rflags |= PSL_C; else rflags &= ~PSL_C; error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error)); return (0); } static int emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { int error; uint64_t buf; switch (vie->reg & 7) { case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */ if (vie->mod == 0x3) { /* * SFENCE. Ignore it, VM exit provides enough * barriers on its own. */ error = 0; } else { /* * CLFLUSH, CLFLUSHOPT. Only check for access * rights. */ error = memread(vm, vcpuid, gpa, &buf, 1, memarg); } break; default: error = EINVAL; break; } return (error); } int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { int error; if (!vie->decoded) return (EINVAL); switch (vie->op.op_type) { case VIE_OP_TYPE_GROUP1: error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_POP: error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_PUSH: error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_CMP: error = emulate_cmp(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOV: error = emulate_mov(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOVSX: case VIE_OP_TYPE_MOVZX: error = emulate_movx(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOVS: error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_STOS: error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_AND: error = emulate_and(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_OR: error = emulate_or(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_SUB: error = emulate_sub(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_BITTEST: error = emulate_bittest(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_TWOB_GRP15: error = emulate_twob_group15(vm, vcpuid, gpa, vie, memread, memwrite, memarg); + break; + case VIE_OP_TYPE_ADD: + error = emulate_add(vm, vcpuid, gpa, vie, memread, + memwrite, memarg); break; default: error = EINVAL; break; } return (error); } int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) { KASSERT(size == 1 || size == 2 || size == 4 || size == 8, ("%s: invalid size %d", __func__, size)); KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) return (0); return ((gla & (size - 1)) ? 1 : 0); } int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) { uint64_t mask; if (cpu_mode != CPU_MODE_64BIT) return (0); /* * The value of the bit 47 in the 'gla' should be replicated in the * most significant 16 bits. */ mask = ~((1UL << 48) - 1); if (gla & (1UL << 47)) return ((gla & mask) != mask); else return ((gla & mask) != 0); } uint64_t vie_size2mask(int size) { KASSERT(size == 1 || size == 2 || size == 4 || size == 8, ("vie_size2mask: invalid size %d", size)); return (size2mask[size]); } int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, struct seg_desc *desc, uint64_t offset, int length, int addrsize, int prot, uint64_t *gla) { uint64_t firstoff, low_limit, high_limit, segbase; int glasize, type; KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, ("%s: invalid segment %d", __func__, seg)); KASSERT(length == 1 || length == 2 || length == 4 || length == 8, ("%s: invalid operand size %d", __func__, length)); KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, ("%s: invalid prot %#x", __func__, prot)); firstoff = offset; if (cpu_mode == CPU_MODE_64BIT) { KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); glasize = 8; } else { KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); glasize = 4; /* * If the segment selector is loaded with a NULL selector * then the descriptor is unusable and attempting to use * it results in a #GP(0). */ if (SEG_DESC_UNUSABLE(desc->access)) return (-1); /* * The processor generates a #NP exception when a segment * register is loaded with a selector that points to a * descriptor that is not present. If this was the case then * it would have been checked before the VM-exit. */ KASSERT(SEG_DESC_PRESENT(desc->access), ("segment %d not present: %#x", seg, desc->access)); /* * The descriptor type must indicate a code/data segment. */ type = SEG_DESC_TYPE(desc->access); KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " "descriptor type %#x", seg, type)); if (prot & PROT_READ) { /* #GP on a read access to a exec-only code segment */ if ((type & 0xA) == 0x8) return (-1); } if (prot & PROT_WRITE) { /* * #GP on a write access to a code segment or a * read-only data segment. */ if (type & 0x8) /* code segment */ return (-1); if ((type & 0xA) == 0) /* read-only data seg */ return (-1); } /* * 'desc->limit' is fully expanded taking granularity into * account. */ if ((type & 0xC) == 0x4) { /* expand-down data segment */ low_limit = desc->limit + 1; high_limit = SEG_DESC_DEF32(desc->access) ? 0xffffffff : 0xffff; } else { /* code segment or expand-up data segment */ low_limit = 0; high_limit = desc->limit; } while (length > 0) { offset &= vie_size2mask(addrsize); if (offset < low_limit || offset > high_limit) return (-1); offset++; length--; } } /* * In 64-bit mode all segments except %fs and %gs have a segment * base address of 0. */ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) { segbase = 0; } else { segbase = desc->base; } /* * Truncate 'firstoff' to the effective address size before adding * it to the segment base. */ firstoff &= vie_size2mask(addrsize); *gla = (segbase + firstoff) & vie_size2mask(glasize); return (0); } #ifdef _KERNEL void vie_init(struct vie *vie, const char *inst_bytes, int inst_length) { KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE, ("%s: invalid instruction length (%d)", __func__, inst_length)); bzero(vie, sizeof(struct vie)); vie->base_register = VM_REG_LAST; vie->index_register = VM_REG_LAST; vie->segment_register = VM_REG_LAST; if (inst_length) { bcopy(inst_bytes, vie->inst, inst_length); vie->num_valid = inst_length; } } static int pf_error_code(int usermode, int prot, int rsvd, uint64_t pte) { int error_code = 0; if (pte & PG_V) error_code |= PGEX_P; if (prot & VM_PROT_WRITE) error_code |= PGEX_W; if (usermode) error_code |= PGEX_U; if (rsvd) error_code |= PGEX_RSV; if (prot & VM_PROT_EXECUTE) error_code |= PGEX_I; return (error_code); } static void ptp_release(void **cookie) { if (*cookie != NULL) { vm_gpa_release(*cookie); *cookie = NULL; } } static void * ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) { void *ptr; ptp_release(cookie); ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie); return (ptr); } static int _vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault, bool check_only) { int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; u_int retries; uint64_t *ptpbase, ptpphys, pte, pgsize; uint32_t *ptpbase32, pte32; void *cookie; *guest_fault = 0; usermode = (paging->cpl == 3 ? 1 : 0); writable = prot & VM_PROT_WRITE; cookie = NULL; retval = 0; retries = 0; restart: ptpphys = paging->cr3; /* root of the page tables */ ptp_release(&cookie); if (retries++ > 0) maybe_yield(); if (vie_canonical_check(paging->cpu_mode, gla)) { /* * XXX assuming a non-stack reference otherwise a stack fault * should be generated. */ if (!check_only) vm_inject_gp(vm, vcpuid); goto fault; } if (paging->paging_mode == PAGING_MODE_FLAT) { *gpa = gla; goto done; } if (paging->paging_mode == PAGING_MODE_32) { nlevels = 2; while (--nlevels >= 0) { /* Zero out the lower 12 bits. */ ptpphys &= ~0xfff; ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie); if (ptpbase32 == NULL) goto error; ptpshift = PAGE_SHIFT + nlevels * 10; ptpindex = (gla >> ptpshift) & 0x3FF; pgsize = 1UL << ptpshift; pte32 = ptpbase32[ptpindex]; if ((pte32 & PG_V) == 0 || (usermode && (pte32 & PG_U) == 0) || (writable && (pte32 & PG_RW) == 0)) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte32); vm_inject_pf(vm, vcpuid, pfcode, gla); } goto fault; } /* * Emulate the x86 MMU's management of the accessed * and dirty flags. While the accessed flag is set * at every level of the page table, the dirty flag * is only set at the last level providing the guest * physical address. */ if (!check_only && (pte32 & PG_A) == 0) { if (atomic_cmpset_32(&ptpbase32[ptpindex], pte32, pte32 | PG_A) == 0) { goto restart; } } /* XXX must be ignored if CR4.PSE=0 */ if (nlevels > 0 && (pte32 & PG_PS) != 0) break; ptpphys = pte32; } /* Set the dirty bit in the page table entry if necessary */ if (!check_only && writable && (pte32 & PG_M) == 0) { if (atomic_cmpset_32(&ptpbase32[ptpindex], pte32, pte32 | PG_M) == 0) { goto restart; } } /* Zero out the lower 'ptpshift' bits */ pte32 >>= ptpshift; pte32 <<= ptpshift; *gpa = pte32 | (gla & (pgsize - 1)); goto done; } if (paging->paging_mode == PAGING_MODE_PAE) { /* Zero out the lower 5 bits and the upper 32 bits */ ptpphys &= 0xffffffe0UL; ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4, &cookie); if (ptpbase == NULL) goto error; ptpindex = (gla >> 30) & 0x3; pte = ptpbase[ptpindex]; if ((pte & PG_V) == 0) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte); vm_inject_pf(vm, vcpuid, pfcode, gla); } goto fault; } ptpphys = pte; nlevels = 2; } else nlevels = 4; while (--nlevels >= 0) { /* Zero out the lower 12 bits and the upper 12 bits */ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie); if (ptpbase == NULL) goto error; ptpshift = PAGE_SHIFT + nlevels * 9; ptpindex = (gla >> ptpshift) & 0x1FF; pgsize = 1UL << ptpshift; pte = ptpbase[ptpindex]; if ((pte & PG_V) == 0 || (usermode && (pte & PG_U) == 0) || (writable && (pte & PG_RW) == 0)) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte); vm_inject_pf(vm, vcpuid, pfcode, gla); } goto fault; } /* Set the accessed bit in the page table entry */ if (!check_only && (pte & PG_A) == 0) { if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_A) == 0) { goto restart; } } if (nlevels > 0 && (pte & PG_PS) != 0) { if (pgsize > 1 * GB) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 1, pte); vm_inject_pf(vm, vcpuid, pfcode, gla); } goto fault; } break; } ptpphys = pte; } /* Set the dirty bit in the page table entry if necessary */ if (!check_only && writable && (pte & PG_M) == 0) { if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0) goto restart; } /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; *gpa = pte | (gla & (pgsize - 1)); done: ptp_release(&cookie); KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d", __func__, retval)); return (retval); error: retval = EFAULT; goto done; fault: *guest_fault = 1; goto done; } int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) { return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault, false)); } int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) { return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault, true)); } int vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t rip, int inst_length, struct vie *vie, int *faultptr) { struct vm_copyinfo copyinfo[2]; int error, prot; if (inst_length > VIE_INST_SIZE) panic("vmm_fetch_instruction: invalid length %d", inst_length); prot = PROT_READ | PROT_EXEC; error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, copyinfo, nitems(copyinfo), faultptr); if (error || *faultptr) return (error); vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); vie->num_valid = inst_length; return (0); } static int vie_peek(struct vie *vie, uint8_t *x) { if (vie->num_processed < vie->num_valid) { *x = vie->inst[vie->num_processed]; return (0); } else return (-1); } static void vie_advance(struct vie *vie) { vie->num_processed++; } static bool segment_override(uint8_t x, int *seg) { switch (x) { case 0x2E: *seg = VM_REG_GUEST_CS; break; case 0x36: *seg = VM_REG_GUEST_SS; break; case 0x3E: *seg = VM_REG_GUEST_DS; break; case 0x26: *seg = VM_REG_GUEST_ES; break; case 0x64: *seg = VM_REG_GUEST_FS; break; case 0x65: *seg = VM_REG_GUEST_GS; break; default: return (false); } return (true); } static int decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) { uint8_t x; while (1) { if (vie_peek(vie, &x)) return (-1); if (x == 0x66) vie->opsize_override = 1; else if (x == 0x67) vie->addrsize_override = 1; else if (x == 0xF3) vie->repz_present = 1; else if (x == 0xF2) vie->repnz_present = 1; else if (segment_override(x, &vie->segment_register)) vie->segment_override = 1; else break; vie_advance(vie); } /* * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: * - Only one REX prefix is allowed per instruction. * - The REX prefix must immediately precede the opcode byte or the * escape opcode byte. * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) * the mandatory prefix must come before the REX prefix. */ if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { vie->rex_present = 1; vie->rex_w = x & 0x8 ? 1 : 0; vie->rex_r = x & 0x4 ? 1 : 0; vie->rex_x = x & 0x2 ? 1 : 0; vie->rex_b = x & 0x1 ? 1 : 0; vie_advance(vie); } /* * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 */ if (cpu_mode == CPU_MODE_64BIT) { /* * Default address size is 64-bits and default operand size * is 32-bits. */ vie->addrsize = vie->addrsize_override ? 4 : 8; if (vie->rex_w) vie->opsize = 8; else if (vie->opsize_override) vie->opsize = 2; else vie->opsize = 4; } else if (cs_d) { /* Default address and operand sizes are 32-bits */ vie->addrsize = vie->addrsize_override ? 2 : 4; vie->opsize = vie->opsize_override ? 2 : 4; } else { /* Default address and operand sizes are 16-bits */ vie->addrsize = vie->addrsize_override ? 4 : 2; vie->opsize = vie->opsize_override ? 4 : 2; } return (0); } static int decode_two_byte_opcode(struct vie *vie) { uint8_t x; if (vie_peek(vie, &x)) return (-1); vie->op = two_byte_opcodes[x]; if (vie->op.op_type == VIE_OP_TYPE_NONE) return (-1); vie_advance(vie); return (0); } static int decode_opcode(struct vie *vie) { uint8_t x; if (vie_peek(vie, &x)) return (-1); vie->op = one_byte_opcodes[x]; if (vie->op.op_type == VIE_OP_TYPE_NONE) return (-1); vie_advance(vie); if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) return (decode_two_byte_opcode(vie)); return (0); } static int decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) { uint8_t x; if (vie->op.op_flags & VIE_OP_F_NO_MODRM) return (0); if (cpu_mode == CPU_MODE_REAL) return (-1); if (vie_peek(vie, &x)) return (-1); vie->mod = (x >> 6) & 0x3; vie->rm = (x >> 0) & 0x7; vie->reg = (x >> 3) & 0x7; /* * A direct addressing mode makes no sense in the context of an EPT * fault. There has to be a memory access involved to cause the * EPT fault. */ if (vie->mod == VIE_MOD_DIRECT) return (-1); if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { /* * Table 2-5: Special Cases of REX Encodings * * mod=0, r/m=5 is used in the compatibility mode to * indicate a disp32 without a base register. * * mod!=3, r/m=4 is used in the compatibility mode to * indicate that the SIB byte is present. * * The 'b' bit in the REX prefix is don't care in * this case. */ } else { vie->rm |= (vie->rex_b << 3); } vie->reg |= (vie->rex_r << 3); /* SIB */ if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) goto done; vie->base_register = gpr_map[vie->rm]; switch (vie->mod) { case VIE_MOD_INDIRECT_DISP8: vie->disp_bytes = 1; break; case VIE_MOD_INDIRECT_DISP32: vie->disp_bytes = 4; break; case VIE_MOD_INDIRECT: if (vie->rm == VIE_RM_DISP32) { vie->disp_bytes = 4; /* * Table 2-7. RIP-Relative Addressing * * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 * whereas in compatibility mode it just implies disp32. */ if (cpu_mode == CPU_MODE_64BIT) vie->base_register = VM_REG_GUEST_RIP; else vie->base_register = VM_REG_LAST; } break; } done: vie_advance(vie); return (0); } static int decode_sib(struct vie *vie) { uint8_t x; /* Proceed only if SIB byte is present */ if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) return (0); if (vie_peek(vie, &x)) return (-1); /* De-construct the SIB byte */ vie->ss = (x >> 6) & 0x3; vie->index = (x >> 3) & 0x7; vie->base = (x >> 0) & 0x7; /* Apply the REX prefix modifiers */ vie->index |= vie->rex_x << 3; vie->base |= vie->rex_b << 3; switch (vie->mod) { case VIE_MOD_INDIRECT_DISP8: vie->disp_bytes = 1; break; case VIE_MOD_INDIRECT_DISP32: vie->disp_bytes = 4; break; } if (vie->mod == VIE_MOD_INDIRECT && (vie->base == 5 || vie->base == 13)) { /* * Special case when base register is unused if mod = 0 * and base = %rbp or %r13. * * Documented in: * Table 2-3: 32-bit Addressing Forms with the SIB Byte * Table 2-5: Special Cases of REX Encodings */ vie->disp_bytes = 4; } else { vie->base_register = gpr_map[vie->base]; } /* * All encodings of 'index' are valid except for %rsp (4). * * Documented in: * Table 2-3: 32-bit Addressing Forms with the SIB Byte * Table 2-5: Special Cases of REX Encodings */ if (vie->index != 4) vie->index_register = gpr_map[vie->index]; /* 'scale' makes sense only in the context of an index register */ if (vie->index_register < VM_REG_LAST) vie->scale = 1 << vie->ss; vie_advance(vie); return (0); } static int decode_displacement(struct vie *vie) { int n, i; uint8_t x; union { char buf[4]; int8_t signed8; int32_t signed32; } u; if ((n = vie->disp_bytes) == 0) return (0); if (n != 1 && n != 4) panic("decode_displacement: invalid disp_bytes %d", n); for (i = 0; i < n; i++) { if (vie_peek(vie, &x)) return (-1); u.buf[i] = x; vie_advance(vie); } if (n == 1) vie->displacement = u.signed8; /* sign-extended */ else vie->displacement = u.signed32; /* sign-extended */ return (0); } static int decode_immediate(struct vie *vie) { int i, n; uint8_t x; union { char buf[4]; int8_t signed8; int16_t signed16; int32_t signed32; } u; /* Figure out immediate operand size (if any) */ if (vie->op.op_flags & VIE_OP_F_IMM) { /* * Section 2.2.1.5 "Immediates", Intel SDM: * In 64-bit mode the typical size of immediate operands * remains 32-bits. When the operand size if 64-bits, the * processor sign-extends all immediates to 64-bits prior * to their use. */ if (vie->opsize == 4 || vie->opsize == 8) vie->imm_bytes = 4; else vie->imm_bytes = 2; } else if (vie->op.op_flags & VIE_OP_F_IMM8) { vie->imm_bytes = 1; } if ((n = vie->imm_bytes) == 0) return (0); KASSERT(n == 1 || n == 2 || n == 4, ("%s: invalid number of immediate bytes: %d", __func__, n)); for (i = 0; i < n; i++) { if (vie_peek(vie, &x)) return (-1); u.buf[i] = x; vie_advance(vie); } /* sign-extend the immediate value before use */ if (n == 1) vie->immediate = u.signed8; else if (n == 2) vie->immediate = u.signed16; else vie->immediate = u.signed32; return (0); } static int decode_moffset(struct vie *vie) { int i, n; uint8_t x; union { char buf[8]; uint64_t u64; } u; if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) return (0); /* * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: * The memory offset size follows the address-size of the instruction. */ n = vie->addrsize; KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); u.u64 = 0; for (i = 0; i < n; i++) { if (vie_peek(vie, &x)) return (-1); u.buf[i] = x; vie_advance(vie); } vie->displacement = u.u64; return (0); } /* * Verify that the 'guest linear address' provided as collateral of the nested * page table fault matches with our instruction decoding. */ static int verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, enum vm_cpu_mode cpu_mode) { int error; uint64_t base, segbase, idx, gla2; enum vm_reg_name seg; struct seg_desc desc; /* Skip 'gla' verification */ if (gla == VIE_INVALID_GLA) return (0); base = 0; if (vie->base_register != VM_REG_LAST) { error = vm_get_register(vm, cpuid, vie->base_register, &base); if (error) { printf("verify_gla: error %d getting base reg %d\n", error, vie->base_register); return (-1); } /* * RIP-relative addressing starts from the following * instruction */ if (vie->base_register == VM_REG_GUEST_RIP) base += vie->num_processed; } idx = 0; if (vie->index_register != VM_REG_LAST) { error = vm_get_register(vm, cpuid, vie->index_register, &idx); if (error) { printf("verify_gla: error %d getting index reg %d\n", error, vie->index_register); return (-1); } } /* * From "Specifying a Segment Selector", Intel SDM, Vol 1 * * In 64-bit mode, segmentation is generally (but not * completely) disabled. The exceptions are the FS and GS * segments. * * In legacy IA-32 mode, when the ESP or EBP register is used * as the base, the SS segment is the default segment. For * other data references, except when relative to stack or * string destination the DS segment is the default. These * can be overridden to allow other segments to be accessed. */ if (vie->segment_override) seg = vie->segment_register; else if (vie->base_register == VM_REG_GUEST_RSP || vie->base_register == VM_REG_GUEST_RBP) seg = VM_REG_GUEST_SS; else seg = VM_REG_GUEST_DS; if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) { segbase = 0; } else { error = vm_get_seg_desc(vm, cpuid, seg, &desc); if (error) { printf("verify_gla: error %d getting segment" " descriptor %d", error, vie->segment_register); return (-1); } segbase = desc.base; } gla2 = segbase + base + vie->scale * idx + vie->displacement; gla2 &= size2mask[vie->addrsize]; if (gla != gla2) { printf("verify_gla mismatch: segbase(0x%0lx)" "base(0x%0lx), scale(%d), index(0x%0lx), " "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", segbase, base, vie->scale, idx, vie->displacement, gla, gla2); return (-1); } return (0); } int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) { if (decode_prefixes(vie, cpu_mode, cs_d)) return (-1); if (decode_opcode(vie)) return (-1); if (decode_modrm(vie, cpu_mode)) return (-1); if (decode_sib(vie)) return (-1); if (decode_displacement(vie)) return (-1); if (decode_immediate(vie)) return (-1); if (decode_moffset(vie)) return (-1); if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { if (verify_gla(vm, cpuid, gla, vie, cpu_mode)) return (-1); } vie->decoded = 1; /* success */ return (0); } #endif /* _KERNEL */ Index: projects/runtime-coverage-v2/sys/arm/conf/std.arm =================================================================== --- projects/runtime-coverage-v2/sys/arm/conf/std.arm (revision 347075) +++ projects/runtime-coverage-v2/sys/arm/conf/std.arm (revision 347076) @@ -1,37 +1,38 @@ # Standard kernel config items for all ARMv4/v5 systems. # # $FreeBSD$ options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 # Debugging support. Always need this: makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB #options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed #options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence options USB_DEBUG # Enable usb debug support code options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Optional extras, never enabled by default: #options BOOTVERBOSE #options DEBUG # May result in extreme spewage #options KTR #options KTR_COMPILE=KTR_ALL #options KTR_ENTRIES=16384 #options KTR_MASK=(KTR_SPARE2) #options KTR_VERBOSE=0 #options USB_REQ_DEBUG #options USB_VERBOSE Index: projects/runtime-coverage-v2/sys/arm/conf/std.armv6 =================================================================== --- projects/runtime-coverage-v2/sys/arm/conf/std.armv6 (revision 347075) +++ projects/runtime-coverage-v2/sys/arm/conf/std.armv6 (revision 347076) @@ -1,85 +1,86 @@ # Standard kernel config items for all ARMv6 systems. # # $FreeBSD$ options HZ=1000 options ARM_L2_PIPT # Only L2 PIPT is supported options INTRNG # All arm systems use INTRNG these days options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_HHOOK # hhook(9) framework for TCP device crypto # core crypto support options IPSEC # IP (v4/v6) security options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options NFSCL # Network Filesystem Client options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options TMPFS # Efficient memory filesystem options GEOM_PART_GPT # GUID Partition Tables options GEOM_PART_BSD # BSD partition scheme options GEOM_PART_MBR # MBR partition scheme options GEOM_LABEL # Provides labelization options COMPAT_43 # Compatible with BSD 4.3 [KEEP THIS!] options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilites options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8) options VFP # Enable floating point hardware support options MAC # Support for Mandatory Access Control (MAC) options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 # DTrace support options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # all architectures - kernel ELF linker loads CTF data makeoptions WITH_CTF=1 # Debugging support. Always need this: makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB #options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence options USB_DEBUG # Enable usb debug support code options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Optional extras, never enabled by default: #options BOOTVERBOSE #options DEBUG # May result in extreme spewage #options KTR #options KTR_COMPILE=KTR_ALL #options KTR_ENTRIES=16384 #options KTR_MASK=(KTR_SPARE2) #options KTR_VERBOSE=0 #options USB_REQ_DEBUG #options USB_VERBOSE Index: projects/runtime-coverage-v2/sys/arm/conf/std.armv7 =================================================================== --- projects/runtime-coverage-v2/sys/arm/conf/std.armv7 (revision 347075) +++ projects/runtime-coverage-v2/sys/arm/conf/std.armv7 (revision 347076) @@ -1,84 +1,85 @@ # Standard kernel config items for all ARMv7 systems. # # $FreeBSD$ options HZ=1000 options ARM_L2_PIPT # Only L2 PIPT is supported options INTRNG # All arm systems use INTRNG these days options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_HHOOK # hhook(9) framework for TCP device crypto # core crypto support options IPSEC # IP (v4/v6) security options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options NFSCL # Network Filesystem Client options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options TMPFS # Efficient memory filesystem options GEOM_PART_GPT # GUID Partition Tables options GEOM_PART_BSD # BSD partition scheme options GEOM_PART_MBR # MBR partition scheme options GEOM_LABEL # Provides labelization options COMPAT_43 # Compatible with BSD 4.3 [KEEP THIS!] options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilites options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8) options VFP # Enable floating point hardware support options MAC # Support for Mandatory Access Control (MAC) options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 # DTrace support options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # all architectures - kernel ELF linker loads CTF data makeoptions WITH_CTF=1 # Debugging support. Always need this: makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB #options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence options USB_DEBUG # Enable usb debug support code options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Optional extras, never enabled by default: #options BOOTVERBOSE #options DEBUG # May result in extreme spewage #options KTR #options KTR_COMPILE=KTR_ALL #options KTR_ENTRIES=16384 #options KTR_MASK=(KTR_SPARE2) #options KTR_VERBOSE=0 #options USB_REQ_DEBUG #options USB_VERBOSE Index: projects/runtime-coverage-v2/sys/arm64/arm64/efirt_machdep.c =================================================================== --- projects/runtime-coverage-v2/sys/arm64/arm64/efirt_machdep.c (revision 347075) +++ projects/runtime-coverage-v2/sys/arm64/arm64/efirt_machdep.c (revision 347076) @@ -1,277 +1,287 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * Copyright (c) 2001 Doug Rabson * Copyright (c) 2016 The FreeBSD Foundation * Copyright (c) 2017 Andrew Turner * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static vm_object_t obj_1t1_pt; static vm_page_t efi_l0_page; static pd_entry_t *efi_l0; static vm_pindex_t efi_1t1_idx; void efi_destroy_1t1_map(void) { vm_page_t m; if (obj_1t1_pt != NULL) { VM_OBJECT_RLOCK(obj_1t1_pt); TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) m->wire_count = 0; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); } obj_1t1_pt = NULL; efi_l0 = NULL; efi_l0_page = NULL; } static vm_page_t efi_1t1_page(void) { return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO)); } static pt_entry_t * efi_1t1_l3(vm_offset_t va) { pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; vm_pindex_t l0_idx, l1_idx, l2_idx; vm_page_t m; vm_paddr_t mphys; l0_idx = pmap_l0_index(va); l0 = &efi_l0[l0_idx]; if (*l0 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l0 = mphys | L0_TABLE; } else { mphys = *l0 & ~ATTR_MASK; } l1 = (pd_entry_t *)PHYS_TO_DMAP(mphys); l1_idx = pmap_l1_index(va); l1 += l1_idx; if (*l1 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l1 = mphys | L1_TABLE; } else { mphys = *l1 & ~ATTR_MASK; } l2 = (pd_entry_t *)PHYS_TO_DMAP(mphys); l2_idx = pmap_l2_index(va); l2 += l2_idx; if (*l2 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l2 = mphys | L2_TABLE; } else { mphys = *l2 & ~ATTR_MASK; } l3 = (pt_entry_t *)PHYS_TO_DMAP(mphys); l3 += pmap_l3_index(va); KASSERT(*l3 == 0, ("%s: Already mapped: va %#jx *pt %#jx", __func__, va, *l3)); return (l3); } /* * Map a physical address from EFI runtime space into KVA space. Returns 0 to * indicate a failed mapping so that the caller may handle error. */ vm_offset_t efi_phys_to_kva(vm_paddr_t paddr) { if (!PHYS_IN_DMAP(paddr)) return (0); return (PHYS_TO_DMAP(paddr)); } /* * Create the 1:1 virtual to physical map for EFI */ bool efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) { struct efi_md *p; pt_entry_t *l3, l3_attr; vm_offset_t va; uint64_t idx; int i, mode; obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, L0_ENTRIES + L0_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(obj_1t1_pt); efi_1t1_idx = 0; efi_l0_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page)); bzero(efi_l0, L0_ENTRIES * sizeof(*efi_l0)); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { if ((p->md_attr & EFI_MD_ATTR_RT) == 0) continue; if (p->md_virt != NULL && (uint64_t)p->md_virt != p->md_phys) { if (bootverbose) printf("EFI Runtime entry %d is mapped\n", i); goto fail; } if ((p->md_phys & EFI_PAGE_MASK) != 0) { if (bootverbose) printf("EFI Runtime entry %d is not aligned\n", i); goto fail; } if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || p->md_phys + p->md_pages * EFI_PAGE_SIZE >= VM_MAXUSER_ADDRESS) { printf("EFI Runtime entry %d is not in mappable for RT:" "base %#016jx %#jx pages\n", i, (uintmax_t)p->md_phys, (uintmax_t)p->md_pages); goto fail; } if ((p->md_attr & EFI_MD_ATTR_WB) != 0) mode = VM_MEMATTR_WRITE_BACK; else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) mode = VM_MEMATTR_WRITE_THROUGH; else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) mode = VM_MEMATTR_WRITE_COMBINING; else if ((p->md_attr & EFI_MD_ATTR_UC) != 0) mode = VM_MEMATTR_DEVICE; else { if (bootverbose) printf("EFI Runtime entry %d mapping " "attributes unsupported\n", i); mode = VM_MEMATTR_UNCACHEABLE; } printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages); l3_attr = ATTR_DEFAULT | ATTR_IDX(mode) | ATTR_AP(ATTR_AP_RW) | L3_PAGE; if (mode == VM_MEMATTR_DEVICE) l3_attr |= ATTR_UXN | ATTR_PXN; VM_OBJECT_WLOCK(obj_1t1_pt); for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, va += PAGE_SIZE) { l3 = efi_1t1_l3(va); *l3 = va | l3_attr; } VM_OBJECT_WUNLOCK(obj_1t1_pt); } return (true); fail: efi_destroy_1t1_map(); return (false); } int efi_arch_enter(void) { __asm __volatile( "msr ttbr0_el1, %0 \n" "dsb ishst \n" "tlbi vmalle1is \n" "dsb ish \n" "isb \n" : : "r"(VM_PAGE_TO_PHYS(efi_l0_page))); return (0); } void efi_arch_leave(void) { struct thread *td; + /* + * Restore the pcpu pointer. Some UEFI implementations trash it and + * we don't store it before calling into them. To fix this we need + * to restore it after returning to the kernel context. As reading + * curthread will access x18 we need to restore it before loading + * the thread pointer. + */ + __asm __volatile( + "mrs x18, tpidr_el1 \n" + ); td = curthread; __asm __volatile( "msr ttbr0_el1, %0 \n" "dsb ishst \n" "tlbi vmalle1is \n" "dsb ish \n" "isb \n" : : "r"(td->td_proc->p_md.md_l0addr)); } int efi_rt_arch_call(struct efirt_callinfo *ec) { panic("not implemented"); } Index: projects/runtime-coverage-v2/sys/arm64/arm64/identcpu.c =================================================================== --- projects/runtime-coverage-v2/sys/arm64/arm64/identcpu.c (revision 347075) +++ projects/runtime-coverage-v2/sys/arm64/arm64/identcpu.c (revision 347076) @@ -1,1393 +1,1401 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include static int ident_lock; char machine[] = "arm64"; #ifdef SCTL_MASK32 extern int adaptive_machine_arch; #endif static int sysctl_hw_machine(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 static const char machine32[] = "arm"; #endif int error; #ifdef SCTL_MASK32 if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine32, sizeof(machine32)); else #endif error = SYSCTL_OUT(req, machine, sizeof(machine)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); +static char cpu_model[64]; +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, + cpu_model, sizeof(cpu_model), "Machine model"); + /* * Per-CPU affinity as provided in MPIDR_EL1 * Indexed by CPU number in logical order selected by the system. * Relevant fields can be extracted using CPU_AFFn macros, * Aff3.Aff2.Aff1.Aff0 construct a unique CPU address in the system. * * Fields used by us: * Aff1 - Cluster number * Aff0 - CPU number in Aff1 cluster */ uint64_t __cpu_affinity[MAXCPU]; static u_int cpu_aff_levels; struct cpu_desc { u_int cpu_impl; u_int cpu_part_num; u_int cpu_variant; u_int cpu_revision; const char *cpu_impl_name; const char *cpu_part_name; uint64_t mpidr; uint64_t id_aa64afr0; uint64_t id_aa64afr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64isar0; uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint64_t id_aa64mmfr2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; }; struct cpu_desc cpu_desc[MAXCPU]; struct cpu_desc user_cpu_desc; static u_int cpu_print_regs; #define PRINT_ID_AA64_AFR0 0x00000001 #define PRINT_ID_AA64_AFR1 0x00000002 #define PRINT_ID_AA64_DFR0 0x00000010 #define PRINT_ID_AA64_DFR1 0x00000020 #define PRINT_ID_AA64_ISAR0 0x00000100 #define PRINT_ID_AA64_ISAR1 0x00000200 #define PRINT_ID_AA64_MMFR0 0x00001000 #define PRINT_ID_AA64_MMFR1 0x00002000 #define PRINT_ID_AA64_MMFR2 0x00004000 #define PRINT_ID_AA64_PFR0 0x00010000 #define PRINT_ID_AA64_PFR1 0x00020000 struct cpu_parts { u_int part_id; const char *part_name; }; #define CPU_PART_NONE { 0, "Unknown Processor" } struct cpu_implementers { u_int impl_id; const char *impl_name; /* * Part number is implementation defined * so each vendor will have its own set of values and names. */ const struct cpu_parts *cpu_parts; }; #define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer", cpu_parts_none } /* * Per-implementer table of (PartNum, CPU Name) pairs. */ /* ARM Ltd. */ static const struct cpu_parts cpu_parts_arm[] = { { CPU_PART_FOUNDATION, "Foundation-Model" }, { CPU_PART_CORTEX_A35, "Cortex-A35" }, { CPU_PART_CORTEX_A53, "Cortex-A53" }, { CPU_PART_CORTEX_A55, "Cortex-A55" }, { CPU_PART_CORTEX_A57, "Cortex-A57" }, { CPU_PART_CORTEX_A72, "Cortex-A72" }, { CPU_PART_CORTEX_A73, "Cortex-A73" }, { CPU_PART_CORTEX_A75, "Cortex-A75" }, CPU_PART_NONE, }; /* Cavium */ static const struct cpu_parts cpu_parts_cavium[] = { { CPU_PART_THUNDERX, "ThunderX" }, { CPU_PART_THUNDERX2, "ThunderX2" }, CPU_PART_NONE, }; /* Unknown */ static const struct cpu_parts cpu_parts_none[] = { CPU_PART_NONE, }; /* * Implementers table. */ const struct cpu_implementers cpu_implementers[] = { { CPU_IMPL_ARM, "ARM", cpu_parts_arm }, { CPU_IMPL_BROADCOM, "Broadcom", cpu_parts_none }, { CPU_IMPL_CAVIUM, "Cavium", cpu_parts_cavium }, { CPU_IMPL_DEC, "DEC", cpu_parts_none }, { CPU_IMPL_INFINEON, "IFX", cpu_parts_none }, { CPU_IMPL_FREESCALE, "Freescale", cpu_parts_none }, { CPU_IMPL_NVIDIA, "NVIDIA", cpu_parts_none }, { CPU_IMPL_APM, "APM", cpu_parts_none }, { CPU_IMPL_QUALCOMM, "Qualcomm", cpu_parts_none }, { CPU_IMPL_MARVELL, "Marvell", cpu_parts_none }, { CPU_IMPL_INTEL, "Intel", cpu_parts_none }, CPU_IMPLEMENTER_NONE, }; #define MRS_TYPE_MASK 0xf #define MRS_INVALID 0 #define MRS_EXACT 1 #define MRS_EXACT_VAL(x) (MRS_EXACT | ((x) << 4)) #define MRS_EXACT_FIELD(x) ((x) >> 4) #define MRS_LOWER 2 struct mrs_field { bool sign; u_int type; u_int shift; }; #define MRS_FIELD(_sign, _type, _shift) \ { \ .sign = (_sign), \ .type = (_type), \ .shift = (_shift), \ } #define MRS_FIELD_END { .type = MRS_INVALID, } static struct mrs_field id_aa64isar0_fields[] = { MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_DP_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM4_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM3_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA3_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_RDM_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_ATOMIC_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_CRC32_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA2_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA1_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_AES_SHIFT), MRS_FIELD_END, }; static struct mrs_field id_aa64isar1_fields[] = { MRS_FIELD(false, MRS_EXACT, ID_AA64ISAR1_GPI_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64ISAR1_GPA_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_LRCPC_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_FCMA_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_JSCVT_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64ISAR1_API_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64ISAR1_APA_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_DPB_SHIFT), MRS_FIELD_END, }; static struct mrs_field id_aa64pfr0_fields[] = { MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_SVE_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_RAS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_GIC_SHIFT), MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_ADV_SIMD_SHIFT), MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_FP_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL3_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL2_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL1_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL0_SHIFT), MRS_FIELD_END, }; static struct mrs_field id_aa64dfr0_fields[] = { MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMS_VER_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_CTX_CMPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_WRPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_BRPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMU_VER_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_TRACE_VER_SHIFT), MRS_FIELD(false, MRS_EXACT_VAL(0x6), ID_AA64DFR0_DEBUG_VER_SHIFT), MRS_FIELD_END, }; struct mrs_user_reg { u_int CRm; u_int Op2; size_t offset; struct mrs_field *fields; }; static struct mrs_user_reg user_regs[] = { { /* id_aa64isar0_el1 */ .CRm = 6, .Op2 = 0, .offset = __offsetof(struct cpu_desc, id_aa64isar0), .fields = id_aa64isar0_fields, }, { /* id_aa64isar1_el1 */ .CRm = 6, .Op2 = 1, .offset = __offsetof(struct cpu_desc, id_aa64isar1), .fields = id_aa64isar1_fields, }, { /* id_aa64pfr0_el1 */ .CRm = 4, .Op2 = 0, .offset = __offsetof(struct cpu_desc, id_aa64pfr0), .fields = id_aa64pfr0_fields, }, { /* id_aa64dfr0_el1 */ .CRm = 5, .Op2 = 0, .offset = __offsetof(struct cpu_desc, id_aa64dfr0), .fields = id_aa64dfr0_fields, }, }; #define CPU_DESC_FIELD(desc, idx) \ *(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset) static int user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame, uint32_t esr) { uint64_t value; int CRm, Op2, i, reg; if ((insn & MRS_MASK) != MRS_VALUE) return (0); /* * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}. * These are in the EL1 CPU identification space. * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1. * CRm == {4-7} holds the ID_AA64 registers. * * For full details see the ARMv8 ARM (ARM DDI 0487C.a) * Table D9-2 System instruction encodings for non-Debug System * register accesses. */ if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0) return (0); CRm = mrs_CRm(insn); if (CRm > 7 || (CRm < 4 && CRm != 0)) return (0); Op2 = mrs_Op2(insn); value = 0; for (i = 0; i < nitems(user_regs); i++) { if (user_regs[i].CRm == CRm && user_regs[i].Op2 == Op2) { value = CPU_DESC_FIELD(user_cpu_desc, i); break; } } if (CRm == 0) { switch (Op2) { case 0: value = READ_SPECIALREG(midr_el1); break; case 5: value = READ_SPECIALREG(mpidr_el1); break; case 6: value = READ_SPECIALREG(revidr_el1); break; default: return (0); } } /* * We will handle this instruction, move to the next so we * don't trap here again. */ frame->tf_elr += INSN_SIZE; reg = MRS_REGISTER(insn); /* If reg is 31 then write to xzr, i.e. do nothing */ if (reg == 31) return (1); if (reg < nitems(frame->tf_x)) frame->tf_x[reg] = value; else if (reg == 30) frame->tf_lr = value; return (1); } static void update_user_regs(u_int cpu) { struct mrs_field *fields; uint64_t cur, value; int i, j, cur_field, new_field; for (i = 0; i < nitems(user_regs); i++) { value = CPU_DESC_FIELD(cpu_desc[cpu], i); if (cpu == 0) cur = value; else cur = CPU_DESC_FIELD(user_cpu_desc, i); fields = user_regs[i].fields; for (j = 0; fields[j].type != 0; j++) { switch (fields[j].type & MRS_TYPE_MASK) { case MRS_EXACT: cur &= ~(0xfu << fields[j].shift); cur |= (uint64_t)MRS_EXACT_FIELD(fields[j].type) << fields[j].shift; break; case MRS_LOWER: new_field = (value >> fields[j].shift) & 0xf; cur_field = (cur >> fields[j].shift) & 0xf; if ((fields[j].sign && (int)new_field < (int)cur_field) || (!fields[j].sign && (u_int)new_field < (u_int)cur_field)) { cur &= ~(0xfu << fields[j].shift); cur |= new_field << fields[j].shift; } break; default: panic("Invalid field type: %d", fields[j].type); } } CPU_DESC_FIELD(user_cpu_desc, i) = cur; } } static void identify_cpu_sysinit(void *dummy __unused) { int cpu; /* Create a user visible cpu description with safe values */ memset(&user_cpu_desc, 0, sizeof(user_cpu_desc)); /* Safe values for these registers */ user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_ADV_SIMD_NONE | ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 | ID_AA64PFR0_EL0_64; user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DEBUG_VER_8; CPU_FOREACH(cpu) { print_cpu_features(cpu); update_user_regs(cpu); } install_undef_handler(true, user_mrs_handler); } SYSINIT(idenrity_cpu, SI_SUB_SMP, SI_ORDER_ANY, identify_cpu_sysinit, NULL); void print_cpu_features(u_int cpu) { struct sbuf *sb; int printed; sb = sbuf_new_auto(); sbuf_printf(sb, "CPU%3d: %s %s r%dp%d", cpu, cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name, cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision); sbuf_cat(sb, " affinity:"); switch(cpu_aff_levels) { default: case 4: sbuf_printf(sb, " %2d", CPU_AFF3(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 3: sbuf_printf(sb, " %2d", CPU_AFF2(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 2: sbuf_printf(sb, " %2d", CPU_AFF1(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 1: case 0: /* On UP this will be zero */ sbuf_printf(sb, " %2d", CPU_AFF0(cpu_desc[cpu].mpidr)); break; } sbuf_finish(sb); printf("%s\n", sbuf_data(sb)); sbuf_clear(sb); /* * There is a hardware errata where, if one CPU is performing a TLB * invalidation while another is performing a store-exclusive the * store-exclusive may return the wrong status. A workaround seems * to be to use an IPI to invalidate on each CPU, however given the * limited number of affected units (pass 1.1 is the evaluation * hardware revision), and the lack of information from Cavium * this has not been implemented. * * At the time of writing this the only information is from: * https://lkml.org/lkml/2016/8/4/722 */ /* * XXX: CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 on its own also * triggers on pass 2.0+. */ if (cpu == 0 && CPU_VAR(PCPU_GET(midr)) == 0 && CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) printf("WARNING: ThunderX Pass 1.1 detected.\nThis has known " "hardware bugs that may cause the incorrect operation of " "atomic operations.\n"); if (cpu != 0 && cpu_print_regs == 0) return; #define SEP_STR ((printed++) == 0) ? "" : "," /* AArch64 Instruction Set Attribute Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR0) != 0) { printed = 0; sbuf_printf(sb, " Instruction Set Attributes 0 = <"); switch (ID_AA64ISAR0_DP(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_DP_NONE: break; case ID_AA64ISAR0_DP_IMPL: sbuf_printf(sb, "%sDotProd", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown DP", SEP_STR); break; } switch (ID_AA64ISAR0_SM4(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SM4_NONE: break; case ID_AA64ISAR0_SM4_IMPL: sbuf_printf(sb, "%sSM4", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SM4", SEP_STR); break; } switch (ID_AA64ISAR0_SM3(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SM3_NONE: break; case ID_AA64ISAR0_SM3_IMPL: sbuf_printf(sb, "%sSM3", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SM3", SEP_STR); break; } switch (ID_AA64ISAR0_SHA3(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA3_NONE: break; case ID_AA64ISAR0_SHA3_IMPL: sbuf_printf(sb, "%sSHA3", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA3", SEP_STR); break; } switch (ID_AA64ISAR0_RDM(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_RDM_NONE: break; case ID_AA64ISAR0_RDM_IMPL: sbuf_printf(sb, "%sRDM", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RDM", SEP_STR); } switch (ID_AA64ISAR0_ATOMIC(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_ATOMIC_NONE: break; case ID_AA64ISAR0_ATOMIC_IMPL: sbuf_printf(sb, "%sAtomic", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Atomic", SEP_STR); } switch (ID_AA64ISAR0_CRC32(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_CRC32_NONE: break; case ID_AA64ISAR0_CRC32_BASE: sbuf_printf(sb, "%sCRC32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CRC32", SEP_STR); break; } switch (ID_AA64ISAR0_SHA2(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA2_NONE: break; case ID_AA64ISAR0_SHA2_BASE: sbuf_printf(sb, "%sSHA2", SEP_STR); break; case ID_AA64ISAR0_SHA2_512: sbuf_printf(sb, "%sSHA2+SHA512", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA2", SEP_STR); break; } switch (ID_AA64ISAR0_SHA1(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA1_NONE: break; case ID_AA64ISAR0_SHA1_BASE: sbuf_printf(sb, "%sSHA1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA1", SEP_STR); break; } switch (ID_AA64ISAR0_AES(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_AES_NONE: break; case ID_AA64ISAR0_AES_BASE: sbuf_printf(sb, "%sAES", SEP_STR); break; case ID_AA64ISAR0_AES_PMULL: sbuf_printf(sb, "%sAES+PMULL", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown AES", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64isar0 & ~ID_AA64ISAR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64isar0 & ~ID_AA64ISAR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Instruction Set Attribute Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR1) != 0) { printed = 0; sbuf_printf(sb, " Instruction Set Attributes 1 = <"); switch (ID_AA64ISAR1_GPI(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_GPI_NONE: break; case ID_AA64ISAR1_GPI_IMPL: sbuf_printf(sb, "%sImpl GenericAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GenericAuth", SEP_STR); break; } switch (ID_AA64ISAR1_GPA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_GPA_NONE: break; case ID_AA64ISAR1_GPA_IMPL: sbuf_printf(sb, "%sPrince GenericAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GenericAuth", SEP_STR); break; } switch (ID_AA64ISAR1_LRCPC(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_LRCPC_NONE: break; case ID_AA64ISAR1_LRCPC_IMPL: sbuf_printf(sb, "%sRCpc", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RCpc", SEP_STR); break; } switch (ID_AA64ISAR1_FCMA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_FCMA_NONE: break; case ID_AA64ISAR1_FCMA_IMPL: sbuf_printf(sb, "%sFCMA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown FCMA", SEP_STR); break; } switch (ID_AA64ISAR1_JSCVT(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_JSCVT_NONE: break; case ID_AA64ISAR1_JSCVT_IMPL: sbuf_printf(sb, "%sJS Conv", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown JS Conv", SEP_STR); break; } switch (ID_AA64ISAR1_API(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_API_NONE: break; case ID_AA64ISAR1_API_IMPL: sbuf_printf(sb, "%sImpl AddrAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Impl AddrAuth", SEP_STR); break; } switch (ID_AA64ISAR1_APA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_APA_NONE: break; case ID_AA64ISAR1_APA_IMPL: sbuf_printf(sb, "%sPrince AddrAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Prince AddrAuth", SEP_STR); break; } switch (ID_AA64ISAR1_DPB(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_DPB_NONE: break; case ID_AA64ISAR1_DPB_IMPL: sbuf_printf(sb, "%sDC CVAP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown DC CVAP", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64isar1 & ~ID_AA64ISAR1_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64isar1 & ~ID_AA64ISAR1_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Processor Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR0) != 0) { printed = 0; sbuf_printf(sb, " Processor Features 0 = <"); switch (ID_AA64PFR0_SVE(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_SVE_NONE: break; case ID_AA64PFR0_SVE_IMPL: sbuf_printf(sb, "%sSVE", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SVE", SEP_STR); break; } switch (ID_AA64PFR0_RAS(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_RAS_NONE: break; case ID_AA64PFR0_RAS_V1: sbuf_printf(sb, "%sRASv1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RAS", SEP_STR); break; } switch (ID_AA64PFR0_GIC(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_GIC_CPUIF_NONE: break; case ID_AA64PFR0_GIC_CPUIF_EN: sbuf_printf(sb, "%sGIC", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GIC interface", SEP_STR); break; } switch (ID_AA64PFR0_ADV_SIMD(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_ADV_SIMD_NONE: break; case ID_AA64PFR0_ADV_SIMD_IMPL: sbuf_printf(sb, "%sAdvSIMD", SEP_STR); break; case ID_AA64PFR0_ADV_SIMD_HP: sbuf_printf(sb, "%sAdvSIMD+HP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown AdvSIMD", SEP_STR); break; } switch (ID_AA64PFR0_FP(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_FP_NONE: break; case ID_AA64PFR0_FP_IMPL: sbuf_printf(sb, "%sFloat", SEP_STR); break; case ID_AA64PFR0_FP_HP: sbuf_printf(sb, "%sFloat+HP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Float", SEP_STR); break; } switch (ID_AA64PFR0_EL3(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL3_NONE: sbuf_printf(sb, "%sNo EL3", SEP_STR); break; case ID_AA64PFR0_EL3_64: sbuf_printf(sb, "%sEL3", SEP_STR); break; case ID_AA64PFR0_EL3_64_32: sbuf_printf(sb, "%sEL3 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL3", SEP_STR); break; } switch (ID_AA64PFR0_EL2(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL2_NONE: sbuf_printf(sb, "%sNo EL2", SEP_STR); break; case ID_AA64PFR0_EL2_64: sbuf_printf(sb, "%sEL2", SEP_STR); break; case ID_AA64PFR0_EL2_64_32: sbuf_printf(sb, "%sEL2 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL2", SEP_STR); break; } switch (ID_AA64PFR0_EL1(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL1_64: sbuf_printf(sb, "%sEL1", SEP_STR); break; case ID_AA64PFR0_EL1_64_32: sbuf_printf(sb, "%sEL1 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL1", SEP_STR); break; } switch (ID_AA64PFR0_EL0(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL0_64: sbuf_printf(sb, "%sEL0", SEP_STR); break; case ID_AA64PFR0_EL0_64_32: sbuf_printf(sb, "%sEL0 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL0", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64pfr0 & ~ID_AA64PFR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64pfr0 & ~ID_AA64PFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Processor Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR1) != 0) { printf(" Processor Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64pfr1); } /* AArch64 Memory Model Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR0) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 0 = <"); switch (ID_AA64MMFR0_TGRAN4(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN4_NONE: break; case ID_AA64MMFR0_TGRAN4_IMPL: sbuf_printf(sb, "%s4k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 4k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_TGRAN64(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN64_NONE: break; case ID_AA64MMFR0_TGRAN64_IMPL: sbuf_printf(sb, "%s64k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 64k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_TGRAN16(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN16_NONE: break; case ID_AA64MMFR0_TGRAN16_IMPL: sbuf_printf(sb, "%s16k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 16k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_BIGEND_EL0(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_BIGEND_EL0_FIXED: break; case ID_AA64MMFR0_BIGEND_EL0_MIXED: sbuf_printf(sb, "%sEL0 MixEndian", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL0 Endian switching", SEP_STR); break; } switch (ID_AA64MMFR0_S_NS_MEM(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_S_NS_MEM_NONE: break; case ID_AA64MMFR0_S_NS_MEM_DISTINCT: sbuf_printf(sb, "%sS/NS Mem", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown S/NS Mem", SEP_STR); break; } switch (ID_AA64MMFR0_BIGEND(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_BIGEND_FIXED: break; case ID_AA64MMFR0_BIGEND_MIXED: sbuf_printf(sb, "%sMixedEndian", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Endian switching", SEP_STR); break; } switch (ID_AA64MMFR0_ASID_BITS(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_ASID_BITS_8: sbuf_printf(sb, "%s8bit ASID", SEP_STR); break; case ID_AA64MMFR0_ASID_BITS_16: sbuf_printf(sb, "%s16bit ASID", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown ASID", SEP_STR); break; } switch (ID_AA64MMFR0_PA_RANGE(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_PA_RANGE_4G: sbuf_printf(sb, "%s4GB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_64G: sbuf_printf(sb, "%s64GB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_1T: sbuf_printf(sb, "%s1TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_4T: sbuf_printf(sb, "%s4TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_16T: sbuf_printf(sb, "%s16TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_256T: sbuf_printf(sb, "%s256TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_4P: sbuf_printf(sb, "%s4PB PA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PA Range", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr0 & ~ID_AA64MMFR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr0 & ~ID_AA64MMFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR1) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 1 = <"); switch (ID_AA64MMFR1_XNX(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_XNX_NONE: break; case ID_AA64MMFR1_XNX_IMPL: sbuf_printf(sb, "%sEL2 XN", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown XNX", SEP_STR); break; } switch (ID_AA64MMFR1_SPEC_SEI(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_SPEC_SEI_NONE: break; case ID_AA64MMFR1_SPEC_SEI_IMPL: sbuf_printf(sb, "%sSpecSEI", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SpecSEI", SEP_STR); break; } switch (ID_AA64MMFR1_PAN(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_PAN_NONE: break; case ID_AA64MMFR1_PAN_IMPL: sbuf_printf(sb, "%sPAN", SEP_STR); break; case ID_AA64MMFR1_PAN_ATS1E1: sbuf_printf(sb, "%sPAN+AT", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PAN", SEP_STR); break; } switch (ID_AA64MMFR1_LO(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_LO_NONE: break; case ID_AA64MMFR1_LO_IMPL: sbuf_printf(sb, "%sLO", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown LO", SEP_STR); break; } switch (ID_AA64MMFR1_HPDS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_HPDS_NONE: break; case ID_AA64MMFR1_HPDS_HPD: sbuf_printf(sb, "%sHPDS", SEP_STR); break; case ID_AA64MMFR1_HPDS_TTPBHA: sbuf_printf(sb, "%sTTPBHA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown HPDS", SEP_STR); break; } switch (ID_AA64MMFR1_VH(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_VH_NONE: break; case ID_AA64MMFR1_VH_IMPL: sbuf_printf(sb, "%sVHE", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VHE", SEP_STR); break; } switch (ID_AA64MMFR1_VMIDBITS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_VMIDBITS_8: break; case ID_AA64MMFR1_VMIDBITS_16: sbuf_printf(sb, "%s16 VMID bits", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VMID bits", SEP_STR); break; } switch (ID_AA64MMFR1_HAFDBS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_HAFDBS_NONE: break; case ID_AA64MMFR1_HAFDBS_AF: sbuf_printf(sb, "%sAF", SEP_STR); break; case ID_AA64MMFR1_HAFDBS_AF_DBS: sbuf_printf(sb, "%sAF+DBS", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Hardware update AF/DBS", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr1 & ~ID_AA64MMFR1_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr1 & ~ID_AA64MMFR1_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 2 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR2) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 2 = <"); switch (ID_AA64MMFR2_NV(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_NV_NONE: break; case ID_AA64MMFR2_NV_IMPL: sbuf_printf(sb, "%sNestedVirt", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown NestedVirt", SEP_STR); break; } switch (ID_AA64MMFR2_CCIDX(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_CCIDX_32: sbuf_printf(sb, "%s32b CCIDX", SEP_STR); break; case ID_AA64MMFR2_CCIDX_64: sbuf_printf(sb, "%s64b CCIDX", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CCIDX", SEP_STR); break; } switch (ID_AA64MMFR2_VA_RANGE(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_VA_RANGE_48: sbuf_printf(sb, "%s48b VA", SEP_STR); break; case ID_AA64MMFR2_VA_RANGE_52: sbuf_printf(sb, "%s52b VA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VA Range", SEP_STR); break; } switch (ID_AA64MMFR2_IESB(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_IESB_NONE: break; case ID_AA64MMFR2_IESB_IMPL: sbuf_printf(sb, "%sIESB", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown IESB", SEP_STR); break; } switch (ID_AA64MMFR2_LSM(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_LSM_NONE: break; case ID_AA64MMFR2_LSM_IMPL: sbuf_printf(sb, "%sLSM", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown LSM", SEP_STR); break; } switch (ID_AA64MMFR2_UAO(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_UAO_NONE: break; case ID_AA64MMFR2_UAO_IMPL: sbuf_printf(sb, "%sUAO", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown UAO", SEP_STR); break; } switch (ID_AA64MMFR2_CNP(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_CNP_NONE: break; case ID_AA64MMFR2_CNP_IMPL: sbuf_printf(sb, "%sCnP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CnP", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr2 & ~ID_AA64MMFR2_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr2 & ~ID_AA64MMFR2_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Debug Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR0) != 0) { printed = 0; sbuf_printf(sb, " Debug Features 0 = <"); switch(ID_AA64DFR0_PMS_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_PMS_VER_NONE: break; case ID_AA64DFR0_PMS_VER_V1: sbuf_printf(sb, "%sSPE v1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SPE", SEP_STR); break; } sbuf_printf(sb, "%s%lu CTX Breakpoints", SEP_STR, ID_AA64DFR0_CTX_CMPS(cpu_desc[cpu].id_aa64dfr0)); sbuf_printf(sb, "%s%lu Watchpoints", SEP_STR, ID_AA64DFR0_WRPS(cpu_desc[cpu].id_aa64dfr0)); sbuf_printf(sb, "%s%lu Breakpoints", SEP_STR, ID_AA64DFR0_BRPS(cpu_desc[cpu].id_aa64dfr0)); switch (ID_AA64DFR0_PMU_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_PMU_VER_NONE: break; case ID_AA64DFR0_PMU_VER_3: sbuf_printf(sb, "%sPMUv3", SEP_STR); break; case ID_AA64DFR0_PMU_VER_3_1: sbuf_printf(sb, "%sPMUv3+16 bit evtCount", SEP_STR); break; case ID_AA64DFR0_PMU_VER_IMPL: sbuf_printf(sb, "%sImplementation defined PMU", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PMU", SEP_STR); break; } switch (ID_AA64DFR0_TRACE_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_TRACE_VER_NONE: break; case ID_AA64DFR0_TRACE_VER_IMPL: sbuf_printf(sb, "%sTrace", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Trace", SEP_STR); break; } switch (ID_AA64DFR0_DEBUG_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_DEBUG_VER_8: sbuf_printf(sb, "%sDebug v8", SEP_STR); break; case ID_AA64DFR0_DEBUG_VER_8_VHE: sbuf_printf(sb, "%sDebug v8+VHE", SEP_STR); break; case ID_AA64DFR0_DEBUG_VER_8_2: sbuf_printf(sb, "%sDebug v8.2", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Debug", SEP_STR); break; } if (cpu_desc[cpu].id_aa64dfr0 & ~ID_AA64DFR0_MASK) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64dfr0 & ~ID_AA64DFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR1) != 0) { printf(" Debug Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64dfr1); } /* AArch64 Auxiliary Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR0) != 0) { printf(" Auxiliary Features 0 = <%#lx>\n", cpu_desc[cpu].id_aa64afr0); } /* AArch64 Auxiliary Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR1) != 0) { printf(" Auxiliary Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64afr1); } sbuf_delete(sb); sb = NULL; #undef SEP_STR } void identify_cpu(void) { u_int midr; u_int impl_id; u_int part_id; u_int cpu; size_t i; const struct cpu_parts *cpu_partsp = NULL; cpu = PCPU_GET(cpuid); midr = get_midr(); /* * Store midr to pcpu to allow fast reading * from EL0, EL1 and assembly code. */ PCPU_SET(midr, midr); impl_id = CPU_IMPL(midr); for (i = 0; i < nitems(cpu_implementers); i++) { if (impl_id == cpu_implementers[i].impl_id || cpu_implementers[i].impl_id == 0) { cpu_desc[cpu].cpu_impl = impl_id; cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name; cpu_partsp = cpu_implementers[i].cpu_parts; break; } } part_id = CPU_PART(midr); for (i = 0; &cpu_partsp[i] != NULL; i++) { if (part_id == cpu_partsp[i].part_id || cpu_partsp[i].part_id == 0) { cpu_desc[cpu].cpu_part_num = part_id; cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name; break; } } cpu_desc[cpu].cpu_revision = CPU_REV(midr); cpu_desc[cpu].cpu_variant = CPU_VAR(midr); + + snprintf(cpu_model, sizeof(cpu_model), "%s %s r%dp%d", + cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name, + cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision); /* Save affinity for current CPU */ cpu_desc[cpu].mpidr = get_mpidr(); CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK; cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(ID_AA64DFR0_EL1); cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(ID_AA64DFR1_EL1); cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(ID_AA64ISAR0_EL1); cpu_desc[cpu].id_aa64isar1 = READ_SPECIALREG(ID_AA64ISAR1_EL1); cpu_desc[cpu].id_aa64mmfr0 = READ_SPECIALREG(ID_AA64MMFR0_EL1); cpu_desc[cpu].id_aa64mmfr1 = READ_SPECIALREG(ID_AA64MMFR1_EL1); cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(ID_AA64MMFR2_EL1); cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(ID_AA64PFR0_EL1); cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(ID_AA64PFR1_EL1); if (cpu != 0) { /* * This code must run on one cpu at a time, but we are * not scheduling on the current core so implement a * simple spinlock. */ while (atomic_cmpset_acq_int(&ident_lock, 0, 1) == 0) __asm __volatile("wfe" ::: "memory"); switch (cpu_aff_levels) { case 0: if (CPU_AFF0(cpu_desc[cpu].mpidr) != CPU_AFF0(cpu_desc[0].mpidr)) cpu_aff_levels = 1; /* FALLTHROUGH */ case 1: if (CPU_AFF1(cpu_desc[cpu].mpidr) != CPU_AFF1(cpu_desc[0].mpidr)) cpu_aff_levels = 2; /* FALLTHROUGH */ case 2: if (CPU_AFF2(cpu_desc[cpu].mpidr) != CPU_AFF2(cpu_desc[0].mpidr)) cpu_aff_levels = 3; /* FALLTHROUGH */ case 3: if (CPU_AFF3(cpu_desc[cpu].mpidr) != CPU_AFF3(cpu_desc[0].mpidr)) cpu_aff_levels = 4; break; } if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) cpu_print_regs |= PRINT_ID_AA64_AFR0; if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) cpu_print_regs |= PRINT_ID_AA64_AFR1; if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) cpu_print_regs |= PRINT_ID_AA64_DFR0; if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) cpu_print_regs |= PRINT_ID_AA64_DFR1; if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) cpu_print_regs |= PRINT_ID_AA64_ISAR0; if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) cpu_print_regs |= PRINT_ID_AA64_ISAR1; if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) cpu_print_regs |= PRINT_ID_AA64_MMFR0; if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) cpu_print_regs |= PRINT_ID_AA64_MMFR1; if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) cpu_print_regs |= PRINT_ID_AA64_MMFR2; if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) cpu_print_regs |= PRINT_ID_AA64_PFR0; if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) cpu_print_regs |= PRINT_ID_AA64_PFR1; /* Wake up the other CPUs */ atomic_store_rel_int(&ident_lock, 0); __asm __volatile("sev" ::: "memory"); } } Index: projects/runtime-coverage-v2/sys/arm64/conf/GENERIC =================================================================== --- projects/runtime-coverage-v2/sys/arm64/conf/GENERIC (revision 347075) +++ projects/runtime-coverage-v2/sys/arm64/conf/GENERIC (revision 347076) @@ -1,318 +1,327 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/arm64 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu ARM64 ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_HHOOK # hhook(9) framework for TCP options TCP_OFFLOAD # TCP offload options TCP_RFC7413 # TCP Fast Open options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD32 # Compatible with FreeBSD/arm options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options VFP # Floating-point support options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits options SMP options INTRNG # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. #options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence options USB_DEBUG # enable debug msgs options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel Sanitizers #options COVERAGE # Generic kernel coverage. Used by KCOV #options KCOV # Kernel Coverage Sanitizer # Warning: KUBSAN can result in a kernel too large for loader to load #options KUBSAN # Kernel Undefined Behavior Sanitizer # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # SoC support options SOC_ALLWINNER_A64 options SOC_ALLWINNER_H5 options SOC_CAVM_THUNDERX options SOC_HISI_HI6220 options SOC_BRCM_BCM2837 options SOC_MARVELL_8K options SOC_ROCKCHIP_RK3328 options SOC_ROCKCHIP_RK3399 options SOC_XILINX_ZYNQ # Timer drivers device a10_timer # Annapurna Alpine drivers device al_ccu # Alpine Cache Coherency Unit device al_nb_service # Alpine North Bridge Service device al_iofic # I/O Fabric Interrupt Controller device al_serdes # Serializer/Deserializer device al_udma # Universal DMA # Qualcomm Snapdragon drivers device qcom_gcc # Global Clock Controller # VirtIO support device virtio device virtio_pci device virtio_mmio device virtio_blk device vtnet # CPU frequency control device cpufreq # Bus drivers device pci device al_pci # Annapurna Alpine PCI-E options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support # PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure device iflib device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel 10Gb Ethernet Family # Ethernet NICs device mdio device mii device miibus # MII bus support device awg # Allwinner EMAC Gigabit Ethernet device axgbe # AMD Opteron A1100 integrated NIC device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device neta # Marvell Armada 370/38x/XP/3700 NIC device smc # SMSC LAN91C111 device vnic # Cavium ThunderX NIC device al_eth # Annapurna Alpine Ethernet NIC device dwc_rk # Rockchip Designware # Etherswitch devices device etherswitch # Enable etherswitch support device miiproxy # Required for etherswitch device e6000sw # Marvell mv88e6085 based switches # Block devices device ahci device scbus device da # ATA/SCSI peripherals device pass # Passthrough device (direct ATA/SCSI access) # NVM Express (NVMe) support device nvme # base NVMe driver options NVME_USE_NVD=0 # prefer the cam(4) based nda(4) driver device nvd # expose NVMe namespaces as disks, depends on nvme # MMC/SD/SDIO Card slot support device sdhci device sdhci_xenon # Marvell Xenon SD/MMC controller device aw_mmc # Allwinner SD/MMC controller device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards device dwmmc # Serial (COM) ports device uart # Generic UART driver device uart_msm # Qualcomm MSM UART driver device uart_mu # RPI3 aux port device uart_mvebu # Armada 3700 UART driver device uart_ns8250 # ns8250-type UART driver device uart_snps device pl011 # USB support device aw_ehci # Allwinner EHCI USB interface (USB 2.0) device aw_usbphy # Allwinner USB PHY device dwcotg # DWC OTG controller device ohci # OHCI USB interface device ehci # EHCI USB interface (USB 2.0) device ehci_mv # Marvell EHCI USB interface device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # USB ethernet support device muge device smcphy device smsc +# Sound support +device sound +device a10_codec + +# DMA controller +device a31_dmac + # GPIO / PINCTRL device aw_gpio # Allwinner GPIO controller device gpio device gpioled device fdt_pinctrl +device gpioregulator device mv_gpio # Marvell GPIO controller device mvebu_pinctrl # Marvell Pinmux Controller device rk_gpio # RockChip GPIO Controller device rk_pinctrl # RockChip Pinmux Controller # I2C device aw_rsb # Allwinner Reduced Serial Bus device bcm2835_bsc # Broadcom BCM283x I2C bus device iicbus device iic device twsi # Allwinner I2C controller device rk_i2c # RockChip I2C controller device syr827 # Silergy SYR827 PMIC device sy8106a # SY8106A Buck Regulator # Clock and reset controllers device aw_ccu # Allwinner clock controller # Interrupt controllers device aw_nmi # Allwinner NMI support device mv_cp110_icu # Marvell CP110 ICU device mv_ap806_gicp # Marvell AP806 GICP # Real-time clock support device aw_rtc # Allwinner Real-time Clock device mv_rtc # Marvell Real-time Clock # Watchdog controllers device aw_wdog # Allwinner Watchdog # Power management controllers device axp81x # X-Powers AXP81x PMIC device rk805 # RockChip RK805 PMIC # EFUSE device aw_sid # Allwinner Secure ID EFUSE # Thermal sensors device aw_thermal # Allwinner Thermal Sensor Controller device mv_thermal # Marvell Thermal Sensor Controller # SPI device spibus device bcm2835_spi # Broadcom BCM283x SPI bus # PWM device pwm device aw_pwm # Console device vt device kbdmux device vt_efifb # EVDEV support device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers device uinput # install /dev/uinput cdev device aw_cir # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module options EFIRT # EFI Runtime Services # EXT_RESOURCES pseudo devices options EXT_RESOURCES device clk device phy device hwreset device nvmem device regulator device syscon device aw_syscon # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # Chip-specific errata options THUNDERX_PASS_1_1_ERRATA options FDT device acpi # DTBs makeoptions MODULES_EXTRA="dtb/allwinner dtb/rockchip dtb/rpi" Index: projects/runtime-coverage-v2/sys/arm64/linux/linux.h =================================================================== --- projects/runtime-coverage-v2/sys/arm64/linux/linux.h (revision 347075) +++ projects/runtime-coverage-v2/sys/arm64/linux/linux.h (revision 347076) @@ -1,321 +1,313 @@ /*- * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2018 Turing Robotic Industries Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $FreeBSD$ */ #ifndef _ARM64_LINUX_H_ #define _ARM64_LINUX_H_ #include #include /* Debugging support */ #define DEBUG extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld/%ld): "#nm"("fmt")\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LMSG(fmt) "linux(%ld/%ld): "fmt"\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LINUX_DTRACE linuxulator #define PTRIN(v) (void *)(v) #define PTROUT(v) (uintptr_t)(v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define CP2(src,dst,sfld,dfld) do { (dst).dfld = (src).sfld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) /* Provide a separate set of types for the Linux types */ typedef int32_t l_int; typedef int64_t l_long; typedef int16_t l_short; typedef uint32_t l_uint; typedef uint64_t l_ulong; typedef uint16_t l_ushort; typedef l_ulong l_uintptr_t; typedef l_long l_clock_t; typedef l_int l_daddr_t; typedef l_ulong l_dev_t; typedef l_uint l_gid_t; typedef l_ushort l_gid16_t; /* XXX */ typedef l_uint l_uid_t; typedef l_ushort l_uid16_t; /* XXX */ typedef l_ulong l_ino_t; typedef l_int l_key_t; typedef l_long l_loff_t; typedef l_uint l_mode_t; typedef l_long l_off_t; typedef l_int l_pid_t; typedef l_ulong l_size_t; typedef l_long l_suseconds_t; typedef l_long l_time_t; typedef l_int l_timer_t; /* XXX */ typedef l_ulong l_fd_mask; typedef struct { l_int val[2]; } l_fsid_t; typedef struct { l_time_t tv_sec; l_suseconds_t tv_usec; } l_timeval; #define l_fd_set fd_set /* Miscellaneous */ #define LINUX_AT_COUNT 20 struct l___sysctl_args { l_uintptr_t name; l_int nlen; l_uintptr_t oldval; l_uintptr_t oldlenp; l_uintptr_t newval; l_uintptr_t newlen; l_ulong __spare[4]; }; /* Resource limits */ #define LINUX_RLIMIT_CPU 0 #define LINUX_RLIMIT_FSIZE 1 #define LINUX_RLIMIT_DATA 2 #define LINUX_RLIMIT_STACK 3 #define LINUX_RLIMIT_CORE 4 #define LINUX_RLIMIT_RSS 5 #define LINUX_RLIMIT_NPROC 6 #define LINUX_RLIMIT_NOFILE 7 #define LINUX_RLIMIT_MEMLOCK 8 #define LINUX_RLIMIT_AS 9 /* Address space limit */ #define LINUX_RLIM_NLIMITS 10 struct l_rlimit { l_ulong rlim_cur; l_ulong rlim_max; }; /* stat family of syscalls */ struct l_timespec { l_time_t tv_sec; l_long tv_nsec; }; struct l_newstat { l_dev_t st_dev; l_ino_t st_ino; l_uint st_mode; l_uint st_nlink; l_uid_t st_uid; l_gid_t st_gid; l_dev_t st_rdev; l_ulong __st_pad1; l_off_t st_size; l_int st_blksize; l_int __st_pad2; l_long st_blocks; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_uint __unused1; l_uint __unused2; }; /* sigaction flags */ #define LINUX_SA_NOCLDSTOP 0x00000001 #define LINUX_SA_NOCLDWAIT 0x00000002 #define LINUX_SA_SIGINFO 0x00000004 #define LINUX_SA_RESTORER 0x04000000 #define LINUX_SA_ONSTACK 0x08000000 #define LINUX_SA_RESTART 0x10000000 #define LINUX_SA_INTERRUPT 0x20000000 /* XXX */ #define LINUX_SA_NOMASK 0x40000000 /* SA_NODEFER */ #define LINUX_SA_ONESHOT 0x80000000 /* SA_RESETHAND */ /* sigprocmask actions */ #define LINUX_SIG_BLOCK 0 #define LINUX_SIG_UNBLOCK 1 #define LINUX_SIG_SETMASK 2 /* sigaltstack */ #define LINUX_MINSIGSTKSZ 2048 /* XXX */ typedef void (*l_handler_t)(l_int); typedef struct { l_handler_t lsa_handler; l_sigset_t lsa_mask; l_ulong lsa_flags; l_uintptr_t lsa_restorer; } l_sigaction_t; /* XXX */ typedef struct { l_uintptr_t ss_sp; l_int ss_flags; l_size_t ss_size; } l_stack_t; #define LINUX_SI_PREAMBLE_SIZE (4 * sizeof(int)) #define LINUX_SI_MAX_SIZE 128 #define LINUX_SI_PAD_SIZE ((LINUX_SI_MAX_SIZE - \ LINUX_SI_PREAMBLE_SIZE) / sizeof(l_int)) typedef union l_sigval { l_int sival_int; l_uintptr_t sival_ptr; } l_sigval_t; typedef struct l_siginfo { l_int lsi_signo; l_int lsi_errno; l_int lsi_code; union { l_int _pad[LINUX_SI_PAD_SIZE]; struct { l_pid_t _pid; l_uid_t _uid; } _kill; struct { l_timer_t _tid; l_int _overrun; char _pad[sizeof(l_uid_t) - sizeof(int)]; union l_sigval _sigval; l_uint _sys_private; } _timer; struct { l_pid_t _pid; /* sender's pid */ l_uid_t _uid; /* sender's uid */ union l_sigval _sigval; } _rt; struct { l_pid_t _pid; /* which child */ l_uid_t _uid; /* sender's uid */ l_int _status; /* exit code */ l_clock_t _utime; l_clock_t _stime; } _sigchld; struct { l_uintptr_t _addr; /* Faulting insn/memory ref. */ } _sigfault; struct { l_long _band; /* POLL_IN,POLL_OUT,POLL_MSG */ l_int _fd; } _sigpoll; } _sifields; } l_siginfo_t; #define lsi_pid _sifields._kill._pid #define lsi_uid _sifields._kill._uid #define lsi_tid _sifields._timer._tid #define lsi_overrun _sifields._timer._overrun #define lsi_sys_private _sifields._timer._sys_private #define lsi_status _sifields._sigchld._status #define lsi_utime _sifields._sigchld._utime #define lsi_stime _sifields._sigchld._stime #define lsi_value _sifields._rt._sigval #define lsi_int _sifields._rt._sigval.sival_int #define lsi_ptr _sifields._rt._sigval.sival_ptr #define lsi_addr _sifields._sigfault._addr #define lsi_band _sifields._sigpoll._band #define lsi_fd _sifields._sigpoll._fd union l_semun { l_int val; l_uintptr_t buf; l_uintptr_t array; l_uintptr_t __buf; l_uintptr_t __pad; }; -struct l_sockaddr { - l_ushort sa_family; - char sa_data[14]; -}; - struct l_ifmap { l_ulong mem_start; l_ulong mem_end; l_ushort base_addr; u_char irq; u_char dma; u_char port; } __packed; - -#define LINUX_IFHWADDRLEN 6 -#define LINUX_IFNAMSIZ 16 struct l_ifreq { union { char ifrn_name[LINUX_IFNAMSIZ]; } ifr_ifrn; union { struct l_sockaddr ifru_addr; struct l_sockaddr ifru_dstaddr; struct l_sockaddr ifru_broadaddr; struct l_sockaddr ifru_netmask; struct l_sockaddr ifru_hwaddr; l_short ifru_flags[1]; l_int ifru_ivalue; l_int ifru_mtu; struct l_ifmap ifru_map; char ifru_slave[LINUX_IFNAMSIZ]; l_uintptr_t ifru_data; } ifr_ifru; } __packed; #define ifr_name ifr_ifrn.ifrn_name /* Interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ #define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ #define linux_copyout_rusage(r, u) copyout(r, u, sizeof(*r)) /* robust futexes */ struct linux_robust_list { l_uintptr_t next; }; struct linux_robust_list_head { struct linux_robust_list list; l_long futex_offset; l_uintptr_t pending_list; }; #endif /* _ARM64_LINUX_H_ */ Index: projects/runtime-coverage-v2/sys/compat/linprocfs/linprocfs.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linprocfs/linprocfs.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linprocfs/linprocfs.c (revision 347076) @@ -1,1754 +1,1817 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav * Copyright (c) 1999 Pierre Beyssac * Copyright (c) 1993 Jan-Simon Pendry * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)procfs_status.c 8.4 (Berkeley) 6/15/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) #include #include #endif /* __i386__ || __amd64__ */ #include #include #include #include #include #include /* * Various conversion macros */ #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz))) /* ticks to jiffies */ #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz))) /* ticks to centiseconds */ #define T2S(x) ((x) / (stathz ? stathz : hz)) /* ticks to seconds */ #define B2K(x) ((x) >> 10) /* bytes to kbytes */ #define B2P(x) ((x) >> PAGE_SHIFT) /* bytes to pages */ #define P2B(x) ((x) << PAGE_SHIFT) /* pages to bytes */ #define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* pages to kbytes */ #define TV2J(x) ((x)->tv_sec * 100UL + (x)->tv_usec / 10000) /** * @brief Mapping of ki_stat in struct kinfo_proc to the linux state * * The linux procfs state field displays one of the characters RSDZTW to * denote running, sleeping in an interruptible wait, waiting in an * uninterruptible disk sleep, a zombie process, process is being traced * or stopped, or process is paging respectively. * * Our struct kinfo_proc contains the variable ki_stat which contains a * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK. * * This character array is used with ki_stati-1 as an index and tries to * map our states to suitable linux states. */ static char linux_state[] = "RRSTZDD"; /* * Filler function for proc/meminfo */ static int linprocfs_domeminfo(PFS_FILL_ARGS) { unsigned long memtotal; /* total memory in bytes */ unsigned long memused; /* used memory in bytes */ unsigned long memfree; /* free memory in bytes */ unsigned long buffers, cached; /* buffer / cache memory ??? */ unsigned long long swaptotal; /* total swap space in bytes */ unsigned long long swapused; /* used swap space in bytes */ unsigned long long swapfree; /* free swap space in bytes */ int i, j; memtotal = physmem * PAGE_SIZE; /* * The correct thing here would be: * memfree = vm_free_count() * PAGE_SIZE; memused = memtotal - memfree; * * but it might mislead linux binaries into thinking there * is very little memory left, so we cheat and tell them that * all memory that isn't wired down is free. */ memused = vm_wire_count() * PAGE_SIZE; memfree = memtotal - memused; swap_pager_status(&i, &j); swaptotal = (unsigned long long)i * PAGE_SIZE; swapused = (unsigned long long)j * PAGE_SIZE; swapfree = swaptotal - swapused; /* * We'd love to be able to write: * buffers = bufspace; * * but bufspace is internal to vfs_bio.c and we don't feel * like unstaticizing it just for linprocfs's sake. */ buffers = 0; cached = vm_inactive_count() * PAGE_SIZE; sbuf_printf(sb, "MemTotal: %9lu kB\n" "MemFree: %9lu kB\n" "Buffers: %9lu kB\n" "Cached: %9lu kB\n" "SwapTotal:%9llu kB\n" "SwapFree: %9llu kB\n", B2K(memtotal), B2K(memfree), B2K(buffers), B2K(cached), B2K(swaptotal), B2K(swapfree)); return (0); } #if defined(__i386__) || defined(__amd64__) /* * Filler function for proc/cpuinfo (i386 & amd64 version) */ static int linprocfs_docpuinfo(PFS_FILL_ARGS) { int hw_model[2]; char model[128]; uint64_t freq; size_t size; u_int cache_size[4]; int fqmhz, fqkhz; int i, j; /* * We default the flags to include all non-conflicting flags, * and the Intel versions of conflicting flags. */ - static char *flags[] = { - "fpu", "vme", "de", "pse", "tsc", - "msr", "pae", "mce", "cx8", "apic", - "sep", "sep", "mtrr", "pge", "mca", - "cmov", "pat", "pse36", "pn", "b19", - "b20", "b21", "mmxext", "mmx", "fxsr", - "xmm", "sse2", "b27", "b28", "b29", - "3dnowext", "3dnow" + static char *cpu_feature_names[] = { + /* 0 */ "fpu", "vme", "de", "pse", + /* 4 */ "tsc", "msr", "pae", "mce", + /* 8 */ "cx8", "apic", "", "sep", + /* 12 */ "mtrr", "pge", "mca", "cmov", + /* 16 */ "pat", "pse36", "pn", "clflush", + /* 20 */ "", "dts", "acpi", "mmx", + /* 24 */ "fxsr", "sse", "sse2", "ss", + /* 28 */ "ht", "tm", "ia64", "pbe" }; + static char *amd_feature_names[] = { + /* 0 */ "", "", "", "", + /* 4 */ "", "", "", "", + /* 8 */ "", "", "", "syscall", + /* 12 */ "", "", "", "", + /* 16 */ "", "", "", "mp", + /* 20 */ "nx", "", "mmxext", "", + /* 24 */ "", "fxsr_opt", "pdpe1gb", "rdtscp", + /* 28 */ "", "lm", "3dnowext", "3dnow" + }; + + static char *cpu_feature2_names[] = { + /* 0 */ "pni", "pclmulqdq", "dtes3", "monitor", + /* 4 */ "ds_cpl", "vmx", "smx", "est", + /* 8 */ "tm2", "ssse3", "cid", "sdbg", + /* 12 */ "fma", "cx16", "xptr", "pdcm", + /* 16 */ "", "pcid", "dca", "sse4_1", + /* 20 */ "sse4_2", "x2apic", "movbe", "popcnt", + /* 24 */ "tsc_deadline_timer", "aes", "xsave", "", + /* 28 */ "avx", "f16c", "rdrand", "hypervisor" + }; + + static char *amd_feature2_names[] = { + /* 0 */ "lahf_lm", "cmp_legacy", "svm", "extapic", + /* 4 */ "cr8_legacy", "abm", "sse4a", "misalignsse", + /* 8 */ "3dnowprefetch", "osvw", "ibs", "xop", + /* 12 */ "skinit", "wdt", "", "lwp", + /* 16 */ "fma4", "tce", "", "nodeid_msr", + /* 20 */ "", "tbm", "topoext", "perfctr_core", + /* 24 */ "perfctr_nb", "", "bpext", "ptsc", + /* 28 */ "perfctr_llc", "mwaitx", "", "" + }; + + static char *cpu_stdext_feature_names[] = { + /* 0 */ "fsgsbase", "tsc_adjust", "", "bmi1", + /* 4 */ "hle", "avx2", "", "smep", + /* 8 */ "bmi2", "erms", "invpcid", "rtm", + /* 12 */ "cqm", "", "mpx", "rdt_a", + /* 16 */ "avx512f", "avx512dq", "rdseed", "adx", + /* 20 */ "smap", "avx512ifma", "", "clflushopt", + /* 24 */ "clwb", "intel_pt", "avx512pf", "avx512er", + /* 28 */ "avx512cd", "sha_ni", "avx512bw", "avx512vl" + }; + static char *power_flags[] = { "ts", "fid", "vid", "ttp", "tm", "stc", "100mhzsteps", "hwpstate", "", "cpb", "eff_freq_ro", "proc_feedback", "acc_power", }; hw_model[0] = CTL_HW; hw_model[1] = HW_MODEL; model[0] = '\0'; size = sizeof(model); if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0) strcpy(model, "unknown"); #ifdef __i386__ switch (cpu_vendor_id) { case CPU_VENDOR_AMD: if (cpu_class < CPUCLASS_686) - flags[16] = "fcmov"; + cpu_feature_names[16] = "fcmov"; break; case CPU_VENDOR_CYRIX: - flags[24] = "cxmmx"; + cpu_feature_names[24] = "cxmmx"; break; } #endif if (cpu_exthigh >= 0x80000006) do_cpuid(0x80000006, cache_size); else memset(cache_size, 0, sizeof(cache_size)); for (i = 0; i < mp_ncpus; ++i) { fqmhz = 0; fqkhz = 0; freq = atomic_load_acq_64(&tsc_freq); if (freq != 0) { fqmhz = (freq + 4999) / 1000000; fqkhz = ((freq + 4999) / 10000) % 100; } sbuf_printf(sb, "processor\t: %d\n" "vendor_id\t: %.20s\n" "cpu family\t: %u\n" "model\t\t: %u\n" "model name\t: %s\n" "stepping\t: %u\n" "cpu MHz\t\t: %d.%02d\n" "cache size\t: %d KB\n" "physical id\t: %d\n" "siblings\t: %d\n" "core id\t\t: %d\n" "cpu cores\t: %d\n" "apicid\t\t: %d\n" "initial apicid\t: %d\n" "fpu\t\t: %s\n" "fpu_exception\t: %s\n" "cpuid level\t: %d\n" "wp\t\t: %s\n", i, cpu_vendor, CPUID_TO_FAMILY(cpu_id), CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING, fqmhz, fqkhz, (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus, i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/ (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes", CPUID_TO_FAMILY(cpu_id), "yes"); sbuf_cat(sb, "flags\t\t:"); - for (j = 0; j < nitems(flags); j++) - if (cpu_feature & (1 << j)) - sbuf_printf(sb, " %s", flags[j]); + for (j = 0; j < nitems(cpu_feature_names); j++) + if (cpu_feature & (1 << j) && + cpu_feature_names[j][0] != '\0') + sbuf_printf(sb, " %s", cpu_feature_names[j]); + for (j = 0; j < nitems(amd_feature_names); j++) + if (amd_feature & (1 << j) && + amd_feature_names[j][0] != '\0') + sbuf_printf(sb, " %s", amd_feature_names[j]); + for (j = 0; j < nitems(cpu_feature2_names); j++) + if (cpu_feature2 & (1 << j) && + cpu_feature2_names[j][0] != '\0') + sbuf_printf(sb, " %s", cpu_feature2_names[j]); + for (j = 0; j < nitems(amd_feature2_names); j++) + if (amd_feature2 & (1 << j) && + amd_feature2_names[j][0] != '\0') + sbuf_printf(sb, " %s", amd_feature2_names[j]); + for (j = 0; j < nitems(cpu_stdext_feature_names); j++) + if (cpu_stdext_feature & (1 << j) && + cpu_stdext_feature_names[j][0] != '\0') + sbuf_printf(sb, " %s", + cpu_stdext_feature_names[j]); sbuf_cat(sb, "\n"); sbuf_printf(sb, "bugs\t\t: %s\n" "bogomips\t: %d.%02d\n" "clflush size\t: %d\n" "cache_alignment\t: %d\n" "address sizes\t: %d bits physical, %d bits virtual\n", #if defined(I586_CPU) && !defined(NO_F00F_HACK) (has_f00f_bug) ? "Intel F00F" : "", #else "", #endif fqmhz, fqkhz, cpu_clflush_line_size, cpu_clflush_line_size, cpu_maxphyaddr, (cpu_maxphyaddr > 32) ? 48 : 0); sbuf_cat(sb, "power management: "); for (j = 0; j < nitems(power_flags); j++) if (amd_pminfo & (1 << j)) sbuf_printf(sb, " %s", power_flags[j]); sbuf_cat(sb, "\n\n"); /* XXX per-cpu vendor / class / model / id? */ } sbuf_cat(sb, "\n"); return (0); } #else /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */ static int linprocfs_docpuinfo(PFS_FILL_ARGS) { int i; for (i = 0; i < mp_ncpus; ++i) { sbuf_printf(sb, "processor\t: %d\n" "BogoMIPS\t: %d.%02d\n", i, 0, 0); sbuf_cat(sb, "Features\t: "); sbuf_cat(sb, "\n"); sbuf_printf(sb, "CPU implementer\t: \n" "CPU architecture: \n" "CPU variant\t: 0x%x\n" "CPU part\t: 0x%x\n" "CPU revision\t: %d\n", 0, 0, 0); sbuf_cat(sb, "\n"); } return (0); } #endif /* __i386__ || __amd64__ */ /* * Filler function for proc/mtab * * This file doesn't exist in Linux' procfs, but is included here so * users can symlink /compat/linux/etc/mtab to /proc/mtab */ static int linprocfs_domtab(PFS_FILL_ARGS) { struct nameidata nd; const char *lep; char *dlep, *flep, *mntto, *mntfrom, *fstype; size_t lep_len; int error; struct statfs *buf, *sp; size_t count; /* resolve symlinks etc. in the emulation tree prefix */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, td); flep = NULL; error = namei(&nd); lep = linux_emul_path; if (error == 0) { if (vn_fullpath(td, nd.ni_vp, &dlep, &flep) == 0) lep = dlep; vrele(nd.ni_vp); } lep_len = strlen(lep); buf = NULL; error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count, UIO_SYSSPACE, MNT_WAIT); if (error != 0) { free(buf, M_TEMP); free(flep, M_TEMP); return (error); } for (sp = buf; count > 0; sp++, count--) { /* determine device name */ mntfrom = sp->f_mntfromname; /* determine mount point */ mntto = sp->f_mntonname; if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/') mntto += lep_len; /* determine fs type */ fstype = sp->f_fstypename; if (strcmp(fstype, pn->pn_info->pi_name) == 0) mntfrom = fstype = "proc"; else if (strcmp(fstype, "procfs") == 0) continue; if (strcmp(fstype, "linsysfs") == 0) { sbuf_printf(sb, "/sys %s sysfs %s", mntto, sp->f_flags & MNT_RDONLY ? "ro" : "rw"); } else { /* For Linux msdosfs is called vfat */ if (strcmp(fstype, "msdosfs") == 0) fstype = "vfat"; sbuf_printf(sb, "%s %s %s %s", mntfrom, mntto, fstype, sp->f_flags & MNT_RDONLY ? "ro" : "rw"); } #define ADD_OPTION(opt, name) \ if (sp->f_flags & (opt)) sbuf_printf(sb, "," name); ADD_OPTION(MNT_SYNCHRONOUS, "sync"); ADD_OPTION(MNT_NOEXEC, "noexec"); ADD_OPTION(MNT_NOSUID, "nosuid"); ADD_OPTION(MNT_UNION, "union"); ADD_OPTION(MNT_ASYNC, "async"); ADD_OPTION(MNT_SUIDDIR, "suiddir"); ADD_OPTION(MNT_NOSYMFOLLOW, "nosymfollow"); ADD_OPTION(MNT_NOATIME, "noatime"); #undef ADD_OPTION /* a real Linux mtab will also show NFS options */ sbuf_printf(sb, " 0 0\n"); } free(buf, M_TEMP); free(flep, M_TEMP); return (error); } /* * Filler function for proc/partitions */ static int linprocfs_dopartitions(PFS_FILL_ARGS) { struct g_class *cp; struct g_geom *gp; struct g_provider *pp; int major, minor; g_topology_lock(); sbuf_printf(sb, "major minor #blocks name rio rmerge rsect " "ruse wio wmerge wsect wuse running use aveq\n"); LIST_FOREACH(cp, &g_classes, class) { if (strcmp(cp->name, "DISK") == 0 || strcmp(cp->name, "PART") == 0) LIST_FOREACH(gp, &cp->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (linux_driver_get_major_minor( pp->name, &major, &minor) != 0) { major = 0; minor = 0; } sbuf_printf(sb, "%d %d %lld %s " "%d %d %d %d %d " "%d %d %d %d %d %d\n", major, minor, (long long)pp->mediasize, pp->name, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } } } g_topology_unlock(); return (0); } /* * Filler function for proc/stat * * Output depends on kernel version: * * v2.5.40 <= * user nice system idle * v2.5.41 * user nice system idle iowait * v2.6.11 * user nice system idle iowait irq softirq steal * v2.6.24 * user nice system idle iowait irq softirq steal guest * v2.6.33 >= * user nice system idle iowait irq softirq steal guest guest_nice */ static int linprocfs_dostat(PFS_FILL_ARGS) { struct pcpu *pcpu; long cp_time[CPUSTATES]; long *cp; struct timeval boottime; int i; char *zero_pad; bool has_intr = true; if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) { zero_pad = " 0 0 0 0\n"; } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) { zero_pad = " 0 0 0\n"; } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) { zero_pad = " 0 0\n"; } else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) { has_intr = false; zero_pad = " 0\n"; } else { has_intr = false; zero_pad = "\n"; } read_cpu_time(cp_time); getboottime(&boottime); /* Parameters common to all versions */ sbuf_printf(sb, "cpu %lu %lu %lu %lu", T2J(cp_time[CP_USER]), T2J(cp_time[CP_NICE]), T2J(cp_time[CP_SYS]), T2J(cp_time[CP_IDLE])); /* Print interrupt stats if available */ if (has_intr) { sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR])); } /* Pad out remaining fields depending on version */ sbuf_printf(sb, "%s", zero_pad); CPU_FOREACH(i) { pcpu = pcpu_find(i); cp = pcpu->pc_cp_time; sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i, T2J(cp[CP_USER]), T2J(cp[CP_NICE]), T2J(cp[CP_SYS]), T2J(cp[CP_IDLE])); if (has_intr) { sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR])); } sbuf_printf(sb, "%s", zero_pad); } sbuf_printf(sb, "disk 0 0 0 0\n" "page %ju %ju\n" "swap %ju %ju\n" "intr %ju\n" "ctxt %ju\n" "btime %lld\n", (uintmax_t)VM_CNT_FETCH(v_vnodepgsin), (uintmax_t)VM_CNT_FETCH(v_vnodepgsout), (uintmax_t)VM_CNT_FETCH(v_swappgsin), (uintmax_t)VM_CNT_FETCH(v_swappgsout), (uintmax_t)VM_CNT_FETCH(v_intr), (uintmax_t)VM_CNT_FETCH(v_swtch), (long long)boottime.tv_sec); return (0); } static int linprocfs_doswaps(PFS_FILL_ARGS) { struct xswdev xsw; uintmax_t total, used; int n; char devname[SPECNAMELEN + 1]; sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); for (n = 0; ; n++) { if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0) break; total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024; used = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024; /* * The space and not tab after the device name is on * purpose. Linux does so. */ sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n", devname, total, used); } return (0); } /* * Filler function for proc/uptime */ static int linprocfs_douptime(PFS_FILL_ARGS) { long cp_time[CPUSTATES]; struct timeval tv; getmicrouptime(&tv); read_cpu_time(cp_time); sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n", (long long)tv.tv_sec, tv.tv_usec / 10000, T2S(cp_time[CP_IDLE] / mp_ncpus), T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100); return (0); } /* * Get OS build date */ static void linprocfs_osbuild(struct thread *td, struct sbuf *sb) { #if 0 char osbuild[256]; char *cp1, *cp2; strncpy(osbuild, version, 256); osbuild[255] = '\0'; cp1 = strstr(osbuild, "\n"); cp2 = strstr(osbuild, ":"); if (cp1 && cp2) { *cp1 = *cp2 = '\0'; cp1 = strstr(osbuild, "#"); } else cp1 = NULL; if (cp1) sbuf_printf(sb, "%s%s", cp1, cp2 + 1); else #endif sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977"); } /* * Get OS builder */ static void linprocfs_osbuilder(struct thread *td, struct sbuf *sb) { #if 0 char builder[256]; char *cp; cp = strstr(version, "\n "); if (cp) { strncpy(builder, cp + 5, 256); builder[255] = '\0'; cp = strstr(builder, ":"); if (cp) *cp = '\0'; } if (cp) sbuf_cat(sb, builder); else #endif sbuf_cat(sb, "des@freebsd.org"); } /* * Filler function for proc/version */ static int linprocfs_doversion(PFS_FILL_ARGS) { char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); sbuf_printf(sb, "%s version %s (", osname, osrelease); linprocfs_osbuilder(td, sb); sbuf_cat(sb, ") (gcc version " __VERSION__ ") "); linprocfs_osbuild(td, sb); sbuf_cat(sb, "\n"); return (0); } /* * Filler function for proc/loadavg */ static int linprocfs_doloadavg(PFS_FILL_ARGS) { sbuf_printf(sb, "%d.%02d %d.%02d %d.%02d %d/%d %d\n", (int)(averunnable.ldavg[0] / averunnable.fscale), (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100), (int)(averunnable.ldavg[1] / averunnable.fscale), (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100), (int)(averunnable.ldavg[2] / averunnable.fscale), (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100), 1, /* number of running tasks */ nprocs, /* number of tasks */ lastpid /* the last pid */ ); return (0); } /* * Filler function for proc/pid/stat */ static int linprocfs_doprocstat(PFS_FILL_ARGS) { struct kinfo_proc kp; struct timeval boottime; char state; static int ratelimit = 0; vm_offset_t startcode, startdata; getboottime(&boottime); sx_slock(&proctree_lock); PROC_LOCK(p); fill_kinfo_proc(p, &kp); sx_sunlock(&proctree_lock); if (p->p_vmspace) { startcode = (vm_offset_t)p->p_vmspace->vm_taddr; startdata = (vm_offset_t)p->p_vmspace->vm_daddr; } else { startcode = 0; startdata = 0; } sbuf_printf(sb, "%d", p->p_pid); #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg) PS_ADD("comm", "(%s)", p->p_comm); if (kp.ki_stat > sizeof(linux_state)) { state = 'R'; if (ratelimit == 0) { printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n", kp.ki_stat, sizeof(linux_state)); ++ratelimit; } } else state = linux_state[kp.ki_stat - 1]; PS_ADD("state", "%c", state); PS_ADD("ppid", "%d", p->p_pptr ? p->p_pptr->p_pid : 0); PS_ADD("pgrp", "%d", p->p_pgid); PS_ADD("session", "%d", p->p_session->s_sid); PROC_UNLOCK(p); PS_ADD("tty", "%ju", (uintmax_t)kp.ki_tdev); PS_ADD("tpgid", "%d", kp.ki_tpgid); PS_ADD("flags", "%u", 0); /* XXX */ PS_ADD("minflt", "%lu", kp.ki_rusage.ru_minflt); PS_ADD("cminflt", "%lu", kp.ki_rusage_ch.ru_minflt); PS_ADD("majflt", "%lu", kp.ki_rusage.ru_majflt); PS_ADD("cmajflt", "%lu", kp.ki_rusage_ch.ru_majflt); PS_ADD("utime", "%ld", TV2J(&kp.ki_rusage.ru_utime)); PS_ADD("stime", "%ld", TV2J(&kp.ki_rusage.ru_stime)); PS_ADD("cutime", "%ld", TV2J(&kp.ki_rusage_ch.ru_utime)); PS_ADD("cstime", "%ld", TV2J(&kp.ki_rusage_ch.ru_stime)); PS_ADD("priority", "%d", kp.ki_pri.pri_user); PS_ADD("nice", "%d", kp.ki_nice); /* 19 (nicest) to -19 */ PS_ADD("0", "%d", 0); /* removed field */ PS_ADD("itrealvalue", "%d", 0); /* XXX */ PS_ADD("starttime", "%lu", TV2J(&kp.ki_start) - TV2J(&boottime)); PS_ADD("vsize", "%ju", P2K((uintmax_t)kp.ki_size)); PS_ADD("rss", "%ju", (uintmax_t)kp.ki_rssize); PS_ADD("rlim", "%lu", kp.ki_rusage.ru_maxrss); PS_ADD("startcode", "%ju", (uintmax_t)startcode); PS_ADD("endcode", "%ju", (uintmax_t)startdata); PS_ADD("startstack", "%u", 0); /* XXX */ PS_ADD("kstkesp", "%u", 0); /* XXX */ PS_ADD("kstkeip", "%u", 0); /* XXX */ PS_ADD("signal", "%u", 0); /* XXX */ PS_ADD("blocked", "%u", 0); /* XXX */ PS_ADD("sigignore", "%u", 0); /* XXX */ PS_ADD("sigcatch", "%u", 0); /* XXX */ PS_ADD("wchan", "%u", 0); /* XXX */ PS_ADD("nswap", "%lu", kp.ki_rusage.ru_nswap); PS_ADD("cnswap", "%lu", kp.ki_rusage_ch.ru_nswap); PS_ADD("exitsignal", "%d", 0); /* XXX */ PS_ADD("processor", "%u", kp.ki_lastcpu); PS_ADD("rt_priority", "%u", 0); /* XXX */ /* >= 2.5.19 */ PS_ADD("policy", "%u", kp.ki_pri.pri_class); /* >= 2.5.19 */ #undef PS_ADD sbuf_putc(sb, '\n'); return (0); } /* * Filler function for proc/pid/statm */ static int linprocfs_doprocstatm(PFS_FILL_ARGS) { struct kinfo_proc kp; segsz_t lsize; sx_slock(&proctree_lock); PROC_LOCK(p); fill_kinfo_proc(p, &kp); PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* * See comments in linprocfs_doprocstatus() regarding the * computation of lsize. */ /* size resident share trs drs lrs dt */ sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size)); sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize); sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */ sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_tsize); sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize)); lsize = B2P(kp.ki_size) - kp.ki_dsize - kp.ki_ssize - kp.ki_tsize - 1; sbuf_printf(sb, "%ju ", (uintmax_t)lsize); sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */ return (0); } /* * Filler function for proc/pid/status */ static int linprocfs_doprocstatus(PFS_FILL_ARGS) { struct kinfo_proc kp; char *state; segsz_t lsize; struct thread *td2; struct sigacts *ps; l_sigset_t siglist, sigignore, sigcatch; int i; sx_slock(&proctree_lock); PROC_LOCK(p); td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */ if (P_SHOULDSTOP(p)) { state = "T (stopped)"; } else { switch(p->p_state) { case PRS_NEW: state = "I (idle)"; break; case PRS_NORMAL: if (p->p_flag & P_WEXIT) { state = "X (exiting)"; break; } switch(td2->td_state) { case TDS_INHIBITED: state = "S (sleeping)"; break; case TDS_RUNQ: case TDS_RUNNING: state = "R (running)"; break; default: state = "? (unknown)"; break; } break; case PRS_ZOMBIE: state = "Z (zombie)"; break; default: state = "? (unknown)"; break; } } fill_kinfo_proc(p, &kp); sx_sunlock(&proctree_lock); sbuf_printf(sb, "Name:\t%s\n", p->p_comm); /* XXX escape */ sbuf_printf(sb, "State:\t%s\n", state); /* * Credentials */ sbuf_printf(sb, "Pid:\t%d\n", p->p_pid); sbuf_printf(sb, "PPid:\t%d\n", kp.ki_ppid ); sbuf_printf(sb, "TracerPid:\t%d\n", kp.ki_tracer ); sbuf_printf(sb, "Uid:\t%d %d %d %d\n", p->p_ucred->cr_ruid, p->p_ucred->cr_uid, p->p_ucred->cr_svuid, /* FreeBSD doesn't have fsuid */ p->p_ucred->cr_uid); sbuf_printf(sb, "Gid:\t%d %d %d %d\n", p->p_ucred->cr_rgid, p->p_ucred->cr_gid, p->p_ucred->cr_svgid, /* FreeBSD doesn't have fsgid */ p->p_ucred->cr_gid); sbuf_cat(sb, "Groups:\t"); for (i = 0; i < p->p_ucred->cr_ngroups; i++) sbuf_printf(sb, "%d ", p->p_ucred->cr_groups[i]); PROC_UNLOCK(p); sbuf_putc(sb, '\n'); /* * Memory * * While our approximation of VmLib may not be accurate (I * don't know of a simple way to verify it, and I'm not sure * it has much meaning anyway), I believe it's good enough. * * The same code that could (I think) accurately compute VmLib * could also compute VmLck, but I don't really care enough to * implement it. Submissions are welcome. */ sbuf_printf(sb, "VmSize:\t%8ju kB\n", B2K((uintmax_t)kp.ki_size)); sbuf_printf(sb, "VmLck:\t%8u kB\n", P2K(0)); /* XXX */ sbuf_printf(sb, "VmRSS:\t%8ju kB\n", P2K((uintmax_t)kp.ki_rssize)); sbuf_printf(sb, "VmData:\t%8ju kB\n", P2K((uintmax_t)kp.ki_dsize)); sbuf_printf(sb, "VmStk:\t%8ju kB\n", P2K((uintmax_t)kp.ki_ssize)); sbuf_printf(sb, "VmExe:\t%8ju kB\n", P2K((uintmax_t)kp.ki_tsize)); lsize = B2P(kp.ki_size) - kp.ki_dsize - kp.ki_ssize - kp.ki_tsize - 1; sbuf_printf(sb, "VmLib:\t%8ju kB\n", P2K((uintmax_t)lsize)); /* * Signal masks */ PROC_LOCK(p); bsd_to_linux_sigset(&p->p_siglist, &siglist); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore); bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch); mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); sbuf_printf(sb, "SigPnd:\t%016jx\n", siglist.__mask); /* * XXX. SigBlk - target thread's signal mask, td_sigmask. * To implement SigBlk pseudofs should support proc/tid dir entries. */ sbuf_printf(sb, "SigBlk:\t%016x\n", 0); sbuf_printf(sb, "SigIgn:\t%016jx\n", sigignore.__mask); sbuf_printf(sb, "SigCgt:\t%016jx\n", sigcatch.__mask); /* * Linux also prints the capability masks, but we don't have * capabilities yet, and when we do get them they're likely to * be meaningless to Linux programs, so we lie. XXX */ sbuf_printf(sb, "CapInh:\t%016x\n", 0); sbuf_printf(sb, "CapPrm:\t%016x\n", 0); sbuf_printf(sb, "CapEff:\t%016x\n", 0); return (0); } /* * Filler function for proc/pid/cwd */ static int linprocfs_doproccwd(PFS_FILL_ARGS) { struct filedesc *fdp; struct vnode *vp; char *fullpath = "unknown"; char *freepath = NULL; fdp = p->p_fd; FILEDESC_SLOCK(fdp); vp = fdp->fd_cdir; if (vp != NULL) VREF(vp); FILEDESC_SUNLOCK(fdp); vn_fullpath(td, vp, &fullpath, &freepath); if (vp != NULL) vrele(vp); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); return (0); } /* * Filler function for proc/pid/root */ static int linprocfs_doprocroot(PFS_FILL_ARGS) { struct filedesc *fdp; struct vnode *vp; char *fullpath = "unknown"; char *freepath = NULL; fdp = p->p_fd; FILEDESC_SLOCK(fdp); vp = jailed(p->p_ucred) ? fdp->fd_jdir : fdp->fd_rdir; if (vp != NULL) VREF(vp); FILEDESC_SUNLOCK(fdp); vn_fullpath(td, vp, &fullpath, &freepath); if (vp != NULL) vrele(vp); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); return (0); } /* * Filler function for proc/pid/cmdline */ static int linprocfs_doproccmdline(PFS_FILL_ARGS) { int ret; PROC_LOCK(p); if ((ret = p_cansee(td, p)) != 0) { PROC_UNLOCK(p); return (ret); } /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwize. */ if (p->p_vmspace == &vmspace0) { PROC_UNLOCK(p); return (0); } if (p->p_args != NULL) { sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length); PROC_UNLOCK(p); return (0); } if ((p->p_flag & P_SYSTEM) != 0) { PROC_UNLOCK(p); return (0); } PROC_UNLOCK(p); ret = proc_getargv(td, p, sb); return (ret); } /* * Filler function for proc/pid/environ */ static int linprocfs_doprocenviron(PFS_FILL_ARGS) { /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwize. */ if (p->p_vmspace == &vmspace0) return (0); return (proc_getenvv(td, p, sb)); } static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n"; static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n"; static char vdso_str[] = " [vdso]"; static char stack_str[] = " [stack]"; /* * Filler function for proc/pid/maps */ static int linprocfs_doprocmaps(PFS_FILL_ARGS) { struct vmspace *vm; vm_map_t map; vm_map_entry_t entry, tmp_entry; vm_object_t obj, tobj, lobj; vm_offset_t e_start, e_end; vm_ooffset_t off = 0; vm_prot_t e_prot; unsigned int last_timestamp; char *name = "", *freename = NULL; const char *l_map_str; ino_t ino; int ref_count, shadow_count, flags; int error; struct vnode *vp; struct vattr vat; PROC_LOCK(p); error = p_candebug(td, p); PROC_UNLOCK(p); if (error) return (error); if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); error = 0; vm = vmspace_acquire_ref(p); if (vm == NULL) return (ESRCH); if (SV_CURPROC_FLAG(SV_LP64)) l_map_str = l64_map_str; else l_map_str = l32_map_str; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { name = ""; freename = NULL; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; e_prot = entry->protection; e_start = entry->start; e_end = entry->end; obj = entry->object.vm_object; for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } last_timestamp = map->timestamp; vm_map_unlock_read(map); ino = 0; if (lobj) { off = IDX_TO_OFF(lobj->size); vp = vm_object_vnode(lobj); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(td, vp, &name, &freename); vn_lock(vp, LK_SHARED | LK_RETRY); VOP_GETATTR(vp, &vat, td->td_ucred); ino = vat.va_fileid; vput(vp); } else if (SV_PROC_ABI(p) == SV_ABI_LINUX) { if (e_start == p->p_sysent->sv_shared_page_base) name = vdso_str; if (e_end == p->p_sysent->sv_usrstack) name = stack_str; } } else { flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, access, offset, major, minor, inode, name. */ error = sbuf_printf(sb, l_map_str, (u_long)e_start, (u_long)e_end, (e_prot & VM_PROT_READ)?"r":"-", (e_prot & VM_PROT_WRITE)?"w":"-", (e_prot & VM_PROT_EXECUTE)?"x":"-", "p", (u_long)off, 0, 0, (u_long)ino, *name ? " " : "", name ); if (freename) free(freename, M_TEMP); vm_map_lock_read(map); if (error == -1) { error = 0; break; } if (last_timestamp != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. Specifically, * the entry may have been clipped, merged, or deleted. */ vm_map_lookup_entry(map, e_end - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); return (error); } /* * Criteria for interface name translation */ #define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) static int linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen) { struct ifnet *ifscan; int ethno; IFNET_RLOCK_ASSERT(); /* Short-circuit non ethernet interfaces */ if (!IFP_IS_ETH(ifp)) return (strlcpy(buffer, ifp->if_xname, buflen)); /* Determine the (relative) unit number for ethernet interfaces */ ethno = 0; CK_STAILQ_FOREACH(ifscan, &V_ifnet, if_link) { if (ifscan == ifp) return (snprintf(buffer, buflen, "eth%d", ethno)); if (IFP_IS_ETH(ifscan)) ethno++; } return (0); } /* * Filler function for proc/net/dev */ static int linprocfs_donetdev(PFS_FILL_ARGS) { char ifname[16]; /* XXX LINUX_IFNAMSIZ */ struct ifnet *ifp; sbuf_printf(sb, "%6s|%58s|%s\n" "%6s|%58s|%58s\n", "Inter-", " Receive", " Transmit", " face", "bytes packets errs drop fifo frame compressed multicast", "bytes packets errs drop fifo colls carrier compressed"); CURVNET_SET(TD_TO_VNET(curthread)); IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { linux_ifname(ifp, ifname, sizeof ifname); sbuf_printf(sb, "%6.6s: ", ifname); sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ", (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IBYTES), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IERRORS), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS), /* rx_missed_errors */ 0UL, /* rx_fifo_errors */ 0UL, /* rx_length_errors + * rx_over_errors + * rx_crc_errors + * rx_frame_errors */ 0UL, /* rx_compressed */ (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS)); /* XXX-BZ rx only? */ sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n", (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OBYTES), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OERRORS), (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS), 0UL, /* tx_fifo_errors */ (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS), 0UL, /* tx_carrier_errors + * tx_aborted_errors + * tx_window_errors + * tx_heartbeat_errors*/ 0UL); /* tx_compressed */ } IFNET_RUNLOCK(); CURVNET_RESTORE(); return (0); } /* * Filler function for proc/sys/kernel/osrelease */ static int linprocfs_doosrelease(PFS_FILL_ARGS) { char osrelease[LINUX_MAX_UTSNAME]; linux_get_osrelease(td, osrelease); sbuf_printf(sb, "%s\n", osrelease); return (0); } /* * Filler function for proc/sys/kernel/ostype */ static int linprocfs_doostype(PFS_FILL_ARGS) { char osname[LINUX_MAX_UTSNAME]; linux_get_osname(td, osname); sbuf_printf(sb, "%s\n", osname); return (0); } /* * Filler function for proc/sys/kernel/version */ static int linprocfs_doosbuild(PFS_FILL_ARGS) { linprocfs_osbuild(td, sb); sbuf_cat(sb, "\n"); return (0); } /* * Filler function for proc/sys/kernel/msgmni */ static int linprocfs_domsgmni(PFS_FILL_ARGS) { sbuf_printf(sb, "%d\n", msginfo.msgmni); return (0); } /* * Filler function for proc/sys/kernel/pid_max */ static int linprocfs_dopid_max(PFS_FILL_ARGS) { sbuf_printf(sb, "%i\n", PID_MAX); return (0); } /* * Filler function for proc/sys/kernel/sem */ static int linprocfs_dosem(PFS_FILL_ARGS) { sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns, seminfo.semopm, seminfo.semmni); return (0); } /* * Filler function for proc/sys/vm/min_free_kbytes * * This mirrors the approach in illumos to return zero for reads. Effectively, * it says, no memory is kept in reserve for "atomic allocations". This class * of allocation can be used at times when a thread cannot be suspended. */ static int linprocfs_dominfree(PFS_FILL_ARGS) { sbuf_printf(sb, "%d\n", 0); return (0); } /* * Filler function for proc/scsi/device_info */ static int linprocfs_doscsidevinfo(PFS_FILL_ARGS) { return (0); } /* * Filler function for proc/scsi/scsi */ static int linprocfs_doscsiscsi(PFS_FILL_ARGS) { return (0); } /* * Filler function for proc/devices */ static int linprocfs_dodevices(PFS_FILL_ARGS) { char *char_devices; sbuf_printf(sb, "Character devices:\n"); char_devices = linux_get_char_devices(); sbuf_printf(sb, "%s", char_devices); linux_free_get_char_devices(char_devices); sbuf_printf(sb, "\nBlock devices:\n"); return (0); } /* * Filler function for proc/cmdline */ static int linprocfs_docmdline(PFS_FILL_ARGS) { sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname); sbuf_printf(sb, " ro root=302\n"); return (0); } /* * Filler function for proc/filesystems */ static int linprocfs_dofilesystems(PFS_FILL_ARGS) { struct vfsconf *vfsp; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_flags & VFCF_SYNTHETIC) sbuf_printf(sb, "nodev"); sbuf_printf(sb, "\t%s\n", vfsp->vfc_name); } vfsconf_sunlock(); return(0); } #if 0 /* * Filler function for proc/modules */ static int linprocfs_domodules(PFS_FILL_ARGS) { struct linker_file *lf; TAILQ_FOREACH(lf, &linker_files, link) { sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename, (unsigned long)lf->size, lf->refs); } return (0); } #endif /* * Filler function for proc/pid/fd */ static int linprocfs_dofdescfs(PFS_FILL_ARGS) { if (p == curproc) sbuf_printf(sb, "/dev/fd"); else sbuf_printf(sb, "unknown"); return (0); } /* * Filler function for proc/pid/limits */ static const struct linux_rlimit_ident { const char *desc; const char *unit; unsigned int rlim_id; } linux_rlimits_ident[] = { { "Max cpu time", "seconds", RLIMIT_CPU }, { "Max file size", "bytes", RLIMIT_FSIZE }, { "Max data size", "bytes", RLIMIT_DATA }, { "Max stack size", "bytes", RLIMIT_STACK }, { "Max core file size", "bytes", RLIMIT_CORE }, { "Max resident set", "bytes", RLIMIT_RSS }, { "Max processes", "processes", RLIMIT_NPROC }, { "Max open files", "files", RLIMIT_NOFILE }, { "Max locked memory", "bytes", RLIMIT_MEMLOCK }, { "Max address space", "bytes", RLIMIT_AS }, { "Max file locks", "locks", LINUX_RLIMIT_LOCKS }, { "Max pending signals", "signals", LINUX_RLIMIT_SIGPENDING }, { "Max msgqueue size", "bytes", LINUX_RLIMIT_MSGQUEUE }, { "Max nice priority", "", LINUX_RLIMIT_NICE }, { "Max realtime priority", "", LINUX_RLIMIT_RTPRIO }, { "Max realtime timeout", "us", LINUX_RLIMIT_RTTIME }, { 0, 0, 0 } }; static int linprocfs_doproclimits(PFS_FILL_ARGS) { const struct linux_rlimit_ident *li; struct plimit *limp; struct rlimit rl; ssize_t size; int res, error; error = 0; PROC_LOCK(p); limp = lim_hold(p->p_limit); PROC_UNLOCK(p); size = sizeof(res); sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit", "Hard Limit", "Units"); for (li = linux_rlimits_ident; li->desc != NULL; ++li) { switch (li->rlim_id) { case LINUX_RLIMIT_LOCKS: /* FALLTHROUGH */ case LINUX_RLIMIT_RTTIME: rl.rlim_cur = RLIM_INFINITY; break; case LINUX_RLIMIT_SIGPENDING: error = kernel_sysctlbyname(td, "kern.sigqueue.max_pending_per_proc", &res, &size, 0, 0, 0, 0); if (error != 0) goto out; rl.rlim_cur = res; rl.rlim_max = res; break; case LINUX_RLIMIT_MSGQUEUE: error = kernel_sysctlbyname(td, "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); if (error != 0) goto out; rl.rlim_cur = res; rl.rlim_max = res; break; case LINUX_RLIMIT_NICE: /* FALLTHROUGH */ case LINUX_RLIMIT_RTPRIO: rl.rlim_cur = 0; rl.rlim_max = 0; break; default: rl = limp->pl_rlimit[li->rlim_id]; break; } if (rl.rlim_cur == RLIM_INFINITY) sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n", li->desc, "unlimited", "unlimited", li->unit); else sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n", li->desc, (unsigned long long)rl.rlim_cur, (unsigned long long)rl.rlim_max, li->unit); } out: lim_free(limp); return (error); } /* * Filler function for proc/sys/kernel/random/uuid */ static int linprocfs_douuid(PFS_FILL_ARGS) { struct uuid uuid; kern_uuidgen(&uuid, 1); sbuf_printf_uuid(sb, &uuid); sbuf_printf(sb, "\n"); return(0); } /* * Filler function for proc/pid/auxv */ static int linprocfs_doauxv(PFS_FILL_ARGS) { struct sbuf *asb; off_t buflen, resid; int error; /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwise. */ if (p->p_vmspace == &vmspace0) return (0); if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0 || uio->uio_resid < 0) return (EINVAL); asb = sbuf_new_auto(); if (asb == NULL) return (ENOMEM); error = proc_getauxv(td, p, asb); if (error == 0) error = sbuf_finish(asb); resid = sbuf_len(asb) - uio->uio_offset; if (resid > uio->uio_resid) buflen = uio->uio_resid; else buflen = resid; if (buflen > IOSIZE_MAX) return (EINVAL); if (buflen > MAXPHYS) buflen = MAXPHYS; if (resid <= 0) return (0); if (error == 0) error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio); sbuf_delete(asb); return (error); } /* * Constructor */ static int linprocfs_init(PFS_INIT_ARGS) { struct pfs_node *root; struct pfs_node *dir; struct pfs_node *sys; root = pi->pi_root; /* /proc/... */ pfs_create_file(root, "cmdline", &linprocfs_docmdline, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "devices", &linprocfs_dodevices, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "filesystems", &linprocfs_dofilesystems, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "loadavg", &linprocfs_doloadavg, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "meminfo", &linprocfs_domeminfo, NULL, NULL, NULL, PFS_RD); #if 0 pfs_create_file(root, "modules", &linprocfs_domodules, NULL, NULL, NULL, PFS_RD); #endif pfs_create_file(root, "mounts", &linprocfs_domtab, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "mtab", &linprocfs_domtab, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "partitions", &linprocfs_dopartitions, NULL, NULL, NULL, PFS_RD); pfs_create_link(root, "self", &procfs_docurproc, NULL, NULL, NULL, 0); pfs_create_file(root, "stat", &linprocfs_dostat, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "swaps", &linprocfs_doswaps, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "uptime", &linprocfs_douptime, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "version", &linprocfs_doversion, NULL, NULL, NULL, PFS_RD); /* /proc/net/... */ dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0); pfs_create_file(dir, "dev", &linprocfs_donetdev, NULL, NULL, NULL, PFS_RD); /* /proc//... */ dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP); pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "cwd", &linprocfs_doproccwd, NULL, NULL, NULL, 0); pfs_create_file(dir, "environ", &linprocfs_doprocenviron, NULL, &procfs_candebug, NULL, PFS_RD); pfs_create_link(dir, "exe", &procfs_doprocfile, NULL, &procfs_notsystem, NULL, 0); pfs_create_file(dir, "maps", &linprocfs_doprocmaps, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "mem", &procfs_doprocmem, procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW); pfs_create_file(dir, "mounts", &linprocfs_domtab, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "root", &linprocfs_doprocroot, NULL, NULL, NULL, 0); pfs_create_file(dir, "stat", &linprocfs_doprocstat, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "statm", &linprocfs_doprocstatm, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "status", &linprocfs_doprocstatus, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL, 0); pfs_create_file(dir, "auxv", &linprocfs_doauxv, NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD); pfs_create_file(dir, "limits", &linprocfs_doproclimits, NULL, NULL, NULL, PFS_RD); /* /proc/scsi/... */ dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0); pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi, NULL, NULL, NULL, PFS_RD); /* /proc/sys/... */ sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0); /* /proc/sys/kernel/... */ dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0); pfs_create_file(dir, "osrelease", &linprocfs_doosrelease, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "ostype", &linprocfs_doostype, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "version", &linprocfs_doosbuild, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "msgmni", &linprocfs_domsgmni, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "pid_max", &linprocfs_dopid_max, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "sem", &linprocfs_dosem, NULL, NULL, NULL, PFS_RD); /* /proc/sys/kernel/random/... */ dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0); pfs_create_file(dir, "uuid", &linprocfs_douuid, NULL, NULL, NULL, PFS_RD); /* /proc/sys/vm/.... */ dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0); pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree, NULL, NULL, NULL, PFS_RD); return (0); } /* * Destructor */ static int linprocfs_uninit(PFS_INIT_ARGS) { /* nothing to do, pseudofs will GC */ return (0); } PSEUDOFS(linprocfs, 1, VFCF_JAIL); #if defined(__aarch64__) || defined(__amd64__) MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1); #else MODULE_DEPEND(linprocfs, linux, 1, 1, 1); #endif MODULE_DEPEND(linprocfs, procfs, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1); Index: projects/runtime-coverage-v2/sys/compat/linsysfs/linsysfs.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linsysfs/linsysfs.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linsysfs/linsysfs.c (revision 347076) @@ -1,565 +1,548 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 IronPort Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include -#include -#include -#include -#include -#include #include -#include #include #include -#include -#include -#include #include #include #include -#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include #include #include struct scsi_host_queue { TAILQ_ENTRY(scsi_host_queue) scsi_host_next; char *path; char *name; }; TAILQ_HEAD(,scsi_host_queue) scsi_host_q; static int host_number = 0; static int atoi(const char *str) { return (int)strtol(str, (char **)NULL, 10); } /* * Filler function for proc_name */ static int linsysfs_scsiname(PFS_FILL_ARGS) { struct scsi_host_queue *scsi_host; int index; if (strncmp(pn->pn_parent->pn_name, "host", 4) == 0) { index = atoi(&pn->pn_parent->pn_name[4]); } else { sbuf_printf(sb, "unknown\n"); return (0); } TAILQ_FOREACH(scsi_host, &scsi_host_q, scsi_host_next) { if (index-- == 0) { sbuf_printf(sb, "%s\n", scsi_host->name); return (0); } } sbuf_printf(sb, "unknown\n"); return (0); } /* * Filler function for device sym-link */ static int linsysfs_link_scsi_host(PFS_FILL_ARGS) { struct scsi_host_queue *scsi_host; int index; if (strncmp(pn->pn_parent->pn_name, "host", 4) == 0) { index = atoi(&pn->pn_parent->pn_name[4]); } else { sbuf_printf(sb, "unknown\n"); return (0); } TAILQ_FOREACH(scsi_host, &scsi_host_q, scsi_host_next) { if (index-- == 0) { sbuf_printf(sb, "../../../devices%s", scsi_host->path); return(0); } } sbuf_printf(sb, "unknown\n"); return (0); } static int linsysfs_fill_data(PFS_FILL_ARGS) { sbuf_printf(sb, "%s", (char *)pn->pn_data); return (0); } static int linsysfs_fill_vendor(PFS_FILL_ARGS) { sbuf_printf(sb, "0x%04x\n", pci_get_vendor((device_t)pn->pn_data)); return (0); } static int linsysfs_fill_device(PFS_FILL_ARGS) { sbuf_printf(sb, "0x%04x\n", pci_get_device((device_t)pn->pn_data)); return (0); } static int linsysfs_fill_subvendor(PFS_FILL_ARGS) { sbuf_printf(sb, "0x%04x\n", pci_get_subvendor((device_t)pn->pn_data)); return (0); } static int linsysfs_fill_subdevice(PFS_FILL_ARGS) { sbuf_printf(sb, "0x%04x\n", pci_get_subdevice((device_t)pn->pn_data)); return (0); } static int linsysfs_fill_revid(PFS_FILL_ARGS) { sbuf_printf(sb, "0x%x\n", pci_get_revid((device_t)pn->pn_data)); return (0); } static int linsysfs_fill_config(PFS_FILL_ARGS) { uint8_t config[48]; device_t dev; uint32_t reg; dev = (device_t)pn->pn_data; bzero(config, sizeof(config)); reg = pci_get_vendor(dev); config[0] = reg; config[1] = reg >> 8; reg = pci_get_device(dev); config[2] = reg; config[3] = reg >> 8; reg = pci_get_revid(dev); config[8] = reg; reg = pci_get_subvendor(dev); config[44] = reg; config[45] = reg >> 8; reg = pci_get_subdevice(dev); config[46] = reg; config[47] = reg >> 8; sbuf_bcat(sb, config, sizeof(config)); return (0); } /* * Filler function for PCI uevent file */ static int linsysfs_fill_uevent_pci(PFS_FILL_ARGS) { device_t dev; dev = (device_t)pn->pn_data; sbuf_printf(sb, "DRIVER=%s\nPCI_CLASS=%X\nPCI_ID=%04X:%04X\n" "PCI_SUBSYS_ID=%04X:%04X\nPCI_SLOT_NAME=%04d:%02x:%02x.%x\n", linux_driver_get_name_dev(dev), pci_get_class(dev), pci_get_vendor(dev), pci_get_device(dev), pci_get_subvendor(dev), pci_get_subdevice(dev), pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); return (0); } /* * Filler function for drm uevent file */ static int linsysfs_fill_uevent_drm(PFS_FILL_ARGS) { device_t dev; int unit; dev = (device_t)pn->pn_data; unit = device_get_unit(dev); sbuf_printf(sb, "MAJOR=226\nMINOR=%d\nDEVNAME=dri/card%d\nDEVTYPE=dri_minor\n", unit, unit); return (0); } static char * get_full_pfs_path(struct pfs_node *cur) { char *temp, *path; temp = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); path[0] = '\0'; do { snprintf(temp, MAXPATHLEN, "%s/%s", cur->pn_name, path); strlcpy(path, temp, MAXPATHLEN); cur = cur->pn_parent; } while (cur->pn_parent != NULL); path[strlen(path) - 1] = '\0'; /* remove extra slash */ free(temp, M_TEMP); return (path); } /* * Filler function for symlink from drm char device to PCI device */ static int linsysfs_fill_vgapci(PFS_FILL_ARGS) { char *path; path = get_full_pfs_path((struct pfs_node*)pn->pn_data); sbuf_printf(sb, "../../../%s", path); free(path, M_TEMP); return (0); } #undef PCI_DEV #define PCI_DEV "pci" #define DRMN_DEV "drmn" static int linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, struct pfs_node *chardev, struct pfs_node *drm, char *path, char *prefix) { struct scsi_host_queue *scsi_host; struct pfs_node *sub_dir, *cur_file; int i, nchildren, error; device_t *children, parent; devclass_t devclass; const char *name = NULL; struct pci_devinfo *dinfo; char *device, *host, *new_path, *devname; new_path = path; devname = malloc(16, M_TEMP, M_WAITOK); parent = device_get_parent(dev); if (parent) { devclass = device_get_devclass(parent); if (devclass != NULL) name = devclass_get_name(devclass); if (name && strcmp(name, PCI_DEV) == 0) { dinfo = device_get_ivars(dev); if (dinfo) { device = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); new_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); new_path[0] = '\000'; strcpy(new_path, path); host = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); device[0] = '\000'; sprintf(device, "%s:%02x:%02x.%x", prefix, dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func); strcat(new_path, "/"); strcat(new_path, device); dir = pfs_create_dir(dir, device, NULL, NULL, NULL, 0); cur_file = pfs_create_file(dir, "vendor", &linsysfs_fill_vendor, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "device", &linsysfs_fill_device, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "subsystem_vendor", &linsysfs_fill_subvendor, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "subsystem_device", &linsysfs_fill_subdevice, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "revision", &linsysfs_fill_revid, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "config", &linsysfs_fill_config, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_file(dir, "uevent", &linsysfs_fill_uevent_pci, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; cur_file = pfs_create_link(dir, "subsystem", &linsysfs_fill_data, NULL, NULL, NULL, 0); /* libdrm just checks that the link ends in "/pci" */ cur_file->pn_data = "/sys/bus/pci"; if (dinfo->cfg.baseclass == PCIC_STORAGE) { /* DJA only make this if needed */ sprintf(host, "host%d", host_number++); strcat(new_path, "/"); strcat(new_path, host); pfs_create_dir(dir, host, NULL, NULL, NULL, 0); scsi_host = malloc(sizeof( struct scsi_host_queue), M_DEVBUF, M_NOWAIT); scsi_host->path = malloc( strlen(new_path) + 1, M_DEVBUF, M_NOWAIT); scsi_host->path[0] = '\000'; bcopy(new_path, scsi_host->path, strlen(new_path) + 1); scsi_host->name = "unknown"; sub_dir = pfs_create_dir(scsi, host, NULL, NULL, NULL, 0); pfs_create_link(sub_dir, "device", &linsysfs_link_scsi_host, NULL, NULL, NULL, 0); pfs_create_file(sub_dir, "proc_name", &linsysfs_scsiname, NULL, NULL, NULL, PFS_RD); scsi_host->name = linux_driver_get_name_dev(dev); TAILQ_INSERT_TAIL(&scsi_host_q, scsi_host, scsi_host_next); } free(device, M_TEMP); free(host, M_TEMP); } } devclass = device_get_devclass(dev); if (devclass != NULL) name = devclass_get_name(devclass); else name = NULL; if (name != NULL && strcmp(name, DRMN_DEV) == 0 && device_get_unit(dev) >= 0) { dinfo = device_get_ivars(parent); if (dinfo != NULL && dinfo->cfg.baseclass == PCIC_DISPLAY) { sprintf(devname, "226:%d", device_get_unit(dev)); sub_dir = pfs_create_dir(chardev, devname, NULL, NULL, NULL, 0); cur_file = pfs_create_link(sub_dir, "device", &linsysfs_fill_vgapci, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dir; cur_file = pfs_create_file(sub_dir, "uevent", &linsysfs_fill_uevent_drm, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; sprintf(devname, "card%d", device_get_unit(dev)); sub_dir = pfs_create_dir(drm, devname, NULL, NULL, NULL, 0); cur_file = pfs_create_link(sub_dir, "device", &linsysfs_fill_vgapci, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dir; } } } error = device_get_children(dev, &children, &nchildren); if (error == 0) { for (i = 0; i < nchildren; i++) if (children[i]) linsysfs_run_bus(children[i], dir, scsi, chardev, drm, new_path, prefix); free(children, M_TEMP); } if (new_path != path) free(new_path, M_TEMP); free(devname, M_TEMP); return (1); } /* - * Filler function for sys/devices/system/cpu/online + * Filler function for sys/devices/system/cpu/{online,possible,present} */ static int linsysfs_cpuonline(PFS_FILL_ARGS) { sbuf_printf(sb, "%d-%d\n", CPU_FIRST(), mp_maxid); return (0); } /* * Filler function for sys/devices/system/cpu/cpuX/online */ static int linsysfs_cpuxonline(PFS_FILL_ARGS) { sbuf_printf(sb, "1\n"); return (0); } static void linsysfs_listcpus(struct pfs_node *dir) { struct pfs_node *cpu; char *name; int i, count, len; len = 1; count = mp_maxcpus; while (count > 10) { count /= 10; len++; } len += sizeof("cpu"); name = malloc(len, M_TEMP, M_WAITOK); for (i = 0; i < mp_ncpus; ++i) { /* /sys/devices/system/cpu/cpuX */ sprintf(name, "cpu%d", i); cpu = pfs_create_dir(dir, name, NULL, NULL, NULL, 0); pfs_create_file(cpu, "online", &linsysfs_cpuxonline, NULL, NULL, NULL, PFS_RD); } free(name, M_TEMP); } /* * Constructor */ static int linsysfs_init(PFS_INIT_ARGS) { struct pfs_node *root; struct pfs_node *class; struct pfs_node *dir, *sys, *cpu; struct pfs_node *drm; struct pfs_node *pci; struct pfs_node *scsi; struct pfs_node *devdir, *chardev; devclass_t devclass; device_t dev; TAILQ_INIT(&scsi_host_q); root = pi->pi_root; /* /sys/class/... */ class = pfs_create_dir(root, "class", NULL, NULL, NULL, 0); scsi = pfs_create_dir(class, "scsi_host", NULL, NULL, NULL, 0); drm = pfs_create_dir(class, "drm", NULL, NULL, NULL, 0); /* /sys/dev/... */ devdir = pfs_create_dir(root, "dev", NULL, NULL, NULL, 0); chardev = pfs_create_dir(devdir, "char", NULL, NULL, NULL, 0); /* /sys/devices/... */ dir = pfs_create_dir(root, "devices", NULL, NULL, NULL, 0); pci = pfs_create_dir(dir, "pci0000:00", NULL, NULL, NULL, 0); devclass = devclass_find("root"); if (devclass == NULL) { return (0); } dev = devclass_get_device(devclass, 0); linsysfs_run_bus(dev, pci, scsi, chardev, drm, "/pci0000:00", "0000"); /* /sys/devices/system */ sys = pfs_create_dir(dir, "system", NULL, NULL, NULL, 0); /* /sys/devices/system/cpu */ cpu = pfs_create_dir(sys, "cpu", NULL, NULL, NULL, 0); pfs_create_file(cpu, "online", &linsysfs_cpuonline, + NULL, NULL, NULL, PFS_RD); + pfs_create_file(cpu, "possible", &linsysfs_cpuonline, + NULL, NULL, NULL, PFS_RD); + pfs_create_file(cpu, "present", &linsysfs_cpuonline, NULL, NULL, NULL, PFS_RD); linsysfs_listcpus(cpu); return (0); } /* * Destructor */ static int linsysfs_uninit(PFS_INIT_ARGS) { struct scsi_host_queue *scsi_host, *scsi_host_tmp; TAILQ_FOREACH_SAFE(scsi_host, &scsi_host_q, scsi_host_next, scsi_host_tmp) { TAILQ_REMOVE(&scsi_host_q, scsi_host, scsi_host_next); free(scsi_host->path, M_TEMP); free(scsi_host, M_TEMP); } return (0); } PSEUDOFS(linsysfs, 1, VFCF_JAIL); #if defined(__aarch64__) || defined(__amd64__) MODULE_DEPEND(linsysfs, linux_common, 1, 1, 1); #else MODULE_DEPEND(linsysfs, linux, 1, 1, 1); #endif Index: projects/runtime-coverage-v2/sys/compat/linux/linux.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux.c (revision 347076) @@ -1,205 +1,310 @@ /*- * Copyright (c) 2015 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include +#include +#include +#include #include +#include +#include +#include +#include +#include + #include +#include +CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ); static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = { LINUX_SIGHUP, /* SIGHUP */ LINUX_SIGINT, /* SIGINT */ LINUX_SIGQUIT, /* SIGQUIT */ LINUX_SIGILL, /* SIGILL */ LINUX_SIGTRAP, /* SIGTRAP */ LINUX_SIGABRT, /* SIGABRT */ 0, /* SIGEMT */ LINUX_SIGFPE, /* SIGFPE */ LINUX_SIGKILL, /* SIGKILL */ LINUX_SIGBUS, /* SIGBUS */ LINUX_SIGSEGV, /* SIGSEGV */ LINUX_SIGSYS, /* SIGSYS */ LINUX_SIGPIPE, /* SIGPIPE */ LINUX_SIGALRM, /* SIGALRM */ LINUX_SIGTERM, /* SIGTERM */ LINUX_SIGURG, /* SIGURG */ LINUX_SIGSTOP, /* SIGSTOP */ LINUX_SIGTSTP, /* SIGTSTP */ LINUX_SIGCONT, /* SIGCONT */ LINUX_SIGCHLD, /* SIGCHLD */ LINUX_SIGTTIN, /* SIGTTIN */ LINUX_SIGTTOU, /* SIGTTOU */ LINUX_SIGIO, /* SIGIO */ LINUX_SIGXCPU, /* SIGXCPU */ LINUX_SIGXFSZ, /* SIGXFSZ */ LINUX_SIGVTALRM,/* SIGVTALRM */ LINUX_SIGPROF, /* SIGPROF */ LINUX_SIGWINCH, /* SIGWINCH */ 0, /* SIGINFO */ LINUX_SIGUSR1, /* SIGUSR1 */ LINUX_SIGUSR2 /* SIGUSR2 */ }; static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = { SIGHUP, /* LINUX_SIGHUP */ SIGINT, /* LINUX_SIGINT */ SIGQUIT, /* LINUX_SIGQUIT */ SIGILL, /* LINUX_SIGILL */ SIGTRAP, /* LINUX_SIGTRAP */ SIGABRT, /* LINUX_SIGABRT */ SIGBUS, /* LINUX_SIGBUS */ SIGFPE, /* LINUX_SIGFPE */ SIGKILL, /* LINUX_SIGKILL */ SIGUSR1, /* LINUX_SIGUSR1 */ SIGSEGV, /* LINUX_SIGSEGV */ SIGUSR2, /* LINUX_SIGUSR2 */ SIGPIPE, /* LINUX_SIGPIPE */ SIGALRM, /* LINUX_SIGALRM */ SIGTERM, /* LINUX_SIGTERM */ SIGBUS, /* LINUX_SIGSTKFLT */ SIGCHLD, /* LINUX_SIGCHLD */ SIGCONT, /* LINUX_SIGCONT */ SIGSTOP, /* LINUX_SIGSTOP */ SIGTSTP, /* LINUX_SIGTSTP */ SIGTTIN, /* LINUX_SIGTTIN */ SIGTTOU, /* LINUX_SIGTTOU */ SIGURG, /* LINUX_SIGURG */ SIGXCPU, /* LINUX_SIGXCPU */ SIGXFSZ, /* LINUX_SIGXFSZ */ SIGVTALRM, /* LINUX_SIGVTALARM */ SIGPROF, /* LINUX_SIGPROF */ SIGWINCH, /* LINUX_SIGWINCH */ SIGIO, /* LINUX_SIGIO */ /* * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal * to the first unused FreeBSD signal number. Since Linux supports * signals from 1 to 64 we are ok here as our SIGRTMIN = 65. */ SIGRTMIN, /* LINUX_SIGPWR */ SIGSYS /* LINUX_SIGSYS */ }; /* * Map Linux RT signals to the FreeBSD RT signals. */ static inline int linux_to_bsd_rt_signal(int sig) { return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN); } static inline int bsd_to_linux_rt_signal(int sig) { return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN); } int linux_to_bsd_signal(int sig) { KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig)); if (sig < LINUX_SIGRTMIN) return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]); return (linux_to_bsd_rt_signal(sig)); } int bsd_to_linux_signal(int sig) { if (sig <= LINUX_SIGTBLSZ) return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]); if (sig == SIGRTMIN) return (LINUX_SIGPWR); return (bsd_to_linux_rt_signal(sig)); } int linux_to_bsd_sigaltstack(int lsa) { int bsa = 0; if (lsa & LINUX_SS_DISABLE) bsa |= SS_DISABLE; /* * Linux ignores SS_ONSTACK flag for ss * parameter while FreeBSD prohibits it. */ return (bsa); } int bsd_to_linux_sigaltstack(int bsa) { int lsa = 0; if (bsa & SS_DISABLE) lsa |= LINUX_SS_DISABLE; if (bsa & SS_ONSTACK) lsa |= LINUX_SS_ONSTACK; return (lsa); } void linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) { int b, l; SIGEMPTYSET(*bss); for (l = 1; l <= LINUX_SIGRTMAX; l++) { if (LINUX_SIGISMEMBER(*lss, l)) { b = linux_to_bsd_signal(l); if (b) SIGADDSET(*bss, b); } } } void bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) { int b, l; LINUX_SIGEMPTYSET(*lss); for (b = 1; b <= SIGRTMAX; b++) { if (SIGISMEMBER(*bss, b)) { l = bsd_to_linux_signal(b); if (l) LINUX_SIGADDSET(*lss, l); } } +} + +/* + * Translate a Linux interface name to a FreeBSD interface name, + * and return the associated ifnet structure + * bsdname and lxname need to be least IFNAMSIZ bytes long, but + * can point to the same buffer. + */ +struct ifnet * +ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname) +{ + struct ifnet *ifp; + int len, unit; + char *ep; + int is_eth, is_lo, index; + + for (len = 0; len < LINUX_IFNAMSIZ; ++len) + if (!isalpha(lxname[len]) || lxname[len] == 0) + break; + if (len == 0 || len == LINUX_IFNAMSIZ) + return (NULL); + /* Linux loopback interface name is lo (not lo0) */ + is_lo = (len == 2 && !strncmp(lxname, "lo", len)) ? 1 : 0; + unit = (int)strtoul(lxname + len, &ep, 10); + if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) && + is_lo == 0) + return (NULL); + index = 0; + is_eth = (len == 3 && !strncmp(lxname, "eth", len)) ? 1 : 0; + + CURVNET_SET(TD_TO_VNET(td)); + IFNET_RLOCK(); + CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { + /* + * Allow Linux programs to use FreeBSD names. Don't presume + * we never have an interface named "eth", so don't make + * the test optional based on is_eth. + */ + if (strncmp(ifp->if_xname, lxname, LINUX_IFNAMSIZ) == 0) + break; + if (is_eth && IFP_IS_ETH(ifp) && unit == index++) + break; + if (is_lo && IFP_IS_LOOP(ifp)) + break; + } + IFNET_RUNLOCK(); + CURVNET_RESTORE(); + if (ifp != NULL && bsdname != NULL) + strlcpy(bsdname, ifp->if_xname, IFNAMSIZ); + return (ifp); +} + +void +linux_ifflags(struct ifnet *ifp, short *flags) +{ + + *flags = (ifp->if_flags | ifp->if_drv_flags) & 0xffff; + /* these flags have no Linux equivalent */ + *flags &= ~(IFF_DRV_OACTIVE|IFF_SIMPLEX| + IFF_LINK0|IFF_LINK1|IFF_LINK2); + /* Linux' multicast flag is in a different bit */ + if (*flags & IFF_MULTICAST) { + *flags &= ~IFF_MULTICAST; + *flags |= 0x1000; + } +} + +int +linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa) +{ + struct ifaddr *ifa; + struct sockaddr_dl *sdl; + + if (IFP_IS_LOOP(ifp)) { + bzero(lsa, sizeof(*lsa)); + lsa->sa_family = LINUX_ARPHRD_LOOPBACK; + return (0); + } + + if (!IFP_IS_ETH(ifp)) + return (ENOENT); + + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sdl = (struct sockaddr_dl*)ifa->ifa_addr; + if (sdl != NULL && (sdl->sdl_family == AF_LINK) && + (sdl->sdl_type == IFT_ETHER)) { + bzero(lsa, sizeof(*lsa)); + lsa->sa_family = LINUX_ARPHRD_ETHER; + bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN); + return (0); + } + } + + return (ENOENT); } Index: projects/runtime-coverage-v2/sys/compat/linux/linux.h =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux.h (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux.h (revision 347076) @@ -1,95 +1,112 @@ /*- * Copyright (c) 2015 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_MI_H_ #define _LINUX_MI_H_ +#define LINUX_IFHWADDRLEN 6 +#define LINUX_IFNAMSIZ 16 + +/* + * Criteria for interface name translation + */ +#define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) +#define IFP_IS_LOOP(ifp) (ifp->if_type == IFT_LOOP) + +struct l_sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + +#define LINUX_ARPHRD_ETHER 1 +#define LINUX_ARPHRD_LOOPBACK 772 + /* sigaltstack */ #define LINUX_SS_ONSTACK 1 #define LINUX_SS_DISABLE 2 int linux_to_bsd_sigaltstack(int lsa); int bsd_to_linux_sigaltstack(int bsa); /* sigset */ typedef struct { uint64_t __mask; } l_sigset_t; /* primitives to manipulate sigset_t */ #define LINUX_SIGEMPTYSET(set) (set).__mask = 0 #define LINUX_SIGISMEMBER(set, sig) (1UL & ((set).__mask >> _SIG_IDX(sig))) #define LINUX_SIGADDSET(set, sig) (set).__mask |= 1UL << _SIG_IDX(sig) void linux_to_bsd_sigset(l_sigset_t *, sigset_t *); void bsd_to_linux_sigset(sigset_t *, l_sigset_t *); /* signaling */ #define LINUX_SIGHUP 1 #define LINUX_SIGINT 2 #define LINUX_SIGQUIT 3 #define LINUX_SIGILL 4 #define LINUX_SIGTRAP 5 #define LINUX_SIGABRT 6 #define LINUX_SIGIOT LINUX_SIGABRT #define LINUX_SIGBUS 7 #define LINUX_SIGFPE 8 #define LINUX_SIGKILL 9 #define LINUX_SIGUSR1 10 #define LINUX_SIGSEGV 11 #define LINUX_SIGUSR2 12 #define LINUX_SIGPIPE 13 #define LINUX_SIGALRM 14 #define LINUX_SIGTERM 15 #define LINUX_SIGSTKFLT 16 #define LINUX_SIGCHLD 17 #define LINUX_SIGCONT 18 #define LINUX_SIGSTOP 19 #define LINUX_SIGTSTP 20 #define LINUX_SIGTTIN 21 #define LINUX_SIGTTOU 22 #define LINUX_SIGURG 23 #define LINUX_SIGXCPU 24 #define LINUX_SIGXFSZ 25 #define LINUX_SIGVTALRM 26 #define LINUX_SIGPROF 27 #define LINUX_SIGWINCH 28 #define LINUX_SIGIO 29 #define LINUX_SIGPOLL LINUX_SIGIO #define LINUX_SIGPWR 30 #define LINUX_SIGSYS 31 #define LINUX_SIGTBLSZ 31 #define LINUX_SIGRTMIN 32 #define LINUX_SIGRTMAX 64 #define LINUX_SIG_VALID(sig) ((sig) <= LINUX_SIGRTMAX && (sig) > 0) int linux_to_bsd_signal(int sig); int bsd_to_linux_signal(int sig); #endif /* _LINUX_MI_H_ */ Index: projects/runtime-coverage-v2/sys/compat/linux/linux_common.h =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux_common.h (nonexistent) +++ projects/runtime-coverage-v2/sys/compat/linux/linux_common.h (revision 347076) @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 Dmitry Chagin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_COMMON_H_ +#define _LINUX_COMMON_H_ + +struct ifnet *ifname_linux_to_bsd(struct thread *td, + const char *lxname, char *bsdname); +void linux_ifflags(struct ifnet *ifp, short *flags); +int linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa); + +#endif /* _LINUX_COMMON_H_ */ Property changes on: projects/runtime-coverage-v2/sys/compat/linux/linux_common.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/runtime-coverage-v2/sys/compat/linux/linux_emul.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux_emul.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux_emul.c (revision 347076) @@ -1,343 +1,342 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2006 Roman Divacky * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif /* * This returns reference to the thread emuldata entry (if found) * * Hold PROC_LOCK when referencing emuldata from other threads. */ struct linux_emuldata * em_find(struct thread *td) { struct linux_emuldata *em; em = td->td_emuldata; return (em); } /* * This returns reference to the proc pemuldata entry (if found) * * Hold PROC_LOCK when referencing proc pemuldata from other threads. * Hold LINUX_PEM_LOCK wher referencing pemuldata members. */ struct linux_pemuldata * pem_find(struct proc *p) { struct linux_pemuldata *pem; pem = p->p_emuldata; return (pem); } void linux_proc_init(struct thread *td, struct thread *newtd, int flags) { struct linux_emuldata *em; struct linux_pemuldata *pem; struct epoll_emuldata *emd; struct proc *p; if (newtd != NULL) { p = newtd->td_proc; /* non-exec call */ em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO); if (flags & LINUX_CLONE_THREAD) { LINUX_CTR1(proc_init, "thread newtd(%d)", newtd->td_tid); em->em_tid = newtd->td_tid; } else { LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid); em->em_tid = p->p_pid; pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); sx_init(&pem->pem_sx, "lpemlk"); p->p_emuldata = pem; } newtd->td_emuldata = em; } else { p = td->td_proc; /* exec */ LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid); /* lookup the old one */ em = em_find(td); KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); em->em_tid = p->p_pid; em->flags = 0; - em->pdeath_signal = 0; em->robust_futexes = NULL; em->child_clear_tid = NULL; em->child_set_tid = NULL; /* epoll should be destroyed in a case of exec. */ pem = pem_find(p); KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); pem->persona = 0; if (pem->epoll != NULL) { emd = pem->epoll; pem->epoll = NULL; free(emd, M_EPOLL); } } } void linux_proc_exit(void *arg __unused, struct proc *p) { struct linux_pemuldata *pem; struct epoll_emuldata *emd; struct thread *td = curthread; if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) return; LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p", td->td_tid, p->p_pid, p); pem = pem_find(p); if (pem == NULL) return; (p->p_sysent->sv_thread_detach)(td); p->p_emuldata = NULL; if (pem->epoll != NULL) { emd = pem->epoll; pem->epoll = NULL; free(emd, M_EPOLL); } sx_destroy(&pem->pem_sx); free(pem, M_LINUX); } /* * If a Linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a Linux * binary is doing the exec, so we do not create an EXEC module for it. */ int linux_exec_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a Linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain Linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an * alternate path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } int linux_common_execve(struct thread *td, struct image_args *eargs) { struct linux_pemuldata *pem; struct epoll_emuldata *emd; struct vmspace *oldvmspace; struct linux_emuldata *em; struct proc *p; int error; p = td->td_proc; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = kern_execve(td, eargs, NULL); post_execve(td, error, oldvmspace); if (error != EJUSTRETURN) return (error); /* * In a case of transition from Linux binary execing to * FreeBSD binary we destroy Linux emuldata thread & proc entries. */ if (SV_CURPROC_ABI() != SV_ABI_LINUX) { PROC_LOCK(p); em = em_find(td); KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n")); td->td_emuldata = NULL; pem = pem_find(p); KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n")); p->p_emuldata = NULL; PROC_UNLOCK(p); if (pem->epoll != NULL) { emd = pem->epoll; pem->epoll = NULL; free(emd, M_EPOLL); } free(em, M_TEMP); free(pem, M_LINUX); } return (EJUSTRETURN); } void linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) { struct thread *td = curthread; struct thread *othertd; #if defined(__amd64__) struct linux_pemuldata *pem; #endif /* * In a case of execing from Linux binary properly detach * other threads from the user space. */ if (__predict_false(SV_PROC_ABI(p) == SV_ABI_LINUX)) { FOREACH_THREAD_IN_PROC(p, othertd) { if (td != othertd) (p->p_sysent->sv_thread_detach)(othertd); } } /* * In a case of execing to Linux binary we create Linux * emuldata thread entry. */ if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX)) { if (SV_PROC_ABI(p) == SV_ABI_LINUX) linux_proc_init(td, NULL, 0); else linux_proc_init(td, td, 0); #if defined(__amd64__) /* * An IA32 executable which has executable stack will have the * READ_IMPLIES_EXEC personality flag set automatically. */ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && imgp->stack_prot & VM_PROT_EXECUTE) { pem = pem_find(p); pem->persona |= LINUX_READ_IMPLIES_EXEC; } #endif } } void linux_thread_dtor(void *arg __unused, struct thread *td) { struct linux_emuldata *em; em = em_find(td); if (em == NULL) return; td->td_emuldata = NULL; LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid); free(em, M_TEMP); } void linux_schedtail(struct thread *td) { struct linux_emuldata *em; struct proc *p; int error = 0; int *child_set_tid; p = td->td_proc; em = em_find(td); KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n")); child_set_tid = em->child_set_tid; if (child_set_tid != NULL) { error = copyout(&em->em_tid, child_set_tid, sizeof(em->em_tid)); LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d", td->td_tid, child_set_tid, em->em_tid, error); } else LINUX_CTR1(schedtail, "thread(%d)", em->em_tid); } Index: projects/runtime-coverage-v2/sys/compat/linux/linux_emul.h =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux_emul.h (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux_emul.h (revision 347076) @@ -1,83 +1,82 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 Roman Divacky * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_EMUL_H_ #define _LINUX_EMUL_H_ /* * modeled after similar structure in NetBSD * this will be extended as we need more functionality */ struct linux_emuldata { int *child_set_tid; /* in clone(): Child's TID to set on clone */ int *child_clear_tid;/* in clone(): Child's TID to clear on exit */ - int pdeath_signal; /* parent death signal */ int flags; /* thread emuldata flags */ int em_tid; /* thread id */ struct linux_robust_list_head *robust_futexes; }; struct linux_emuldata *em_find(struct thread *); int linux_exec_imgact_try(struct image_params *); void linux_proc_init(struct thread *, struct thread *, int); void linux_proc_exit(void *, struct proc *); void linux_schedtail(struct thread *); void linux_proc_exec(void *, struct proc *, struct image_params *); void linux_thread_dtor(void *arg __unused, struct thread *); void linux_thread_detach(struct thread *); int linux_common_execve(struct thread *, struct image_args *); /* process emuldata flags */ #define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated futex REQUEUE op*/ #define LINUX_XUNSUP_EPOLL 0x00000002 /* unsupported epoll events */ #define LINUX_XUNSUP_FUTEXPIOP 0x00000004 /* uses unsupported pi futex */ struct linux_pemuldata { uint32_t flags; /* process emuldata flags */ struct sx pem_sx; /* lock for this struct */ void *epoll; /* epoll data */ uint32_t persona; /* process execution domain */ }; #define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx) #define LINUX_PEM_XUNLOCK(p) sx_xunlock(&(p)->pem_sx) #define LINUX_PEM_SLOCK(p) sx_slock(&(p)->pem_sx) #define LINUX_PEM_SUNLOCK(p) sx_sunlock(&(p)->pem_sx) struct linux_pemuldata *pem_find(struct proc *); extern const int linux_errtbl[]; #endif /* !_LINUX_EMUL_H_ */ Index: projects/runtime-coverage-v2/sys/compat/linux/linux_ioctl.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux_ioctl.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux_ioctl.c (revision 347076) @@ -1,3884 +1,3805 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif +#include #include #include #include #include #include #include #include #include #include #include CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ); static linux_ioctl_function_t linux_ioctl_cdrom; static linux_ioctl_function_t linux_ioctl_vfat; static linux_ioctl_function_t linux_ioctl_console; static linux_ioctl_function_t linux_ioctl_hdio; static linux_ioctl_function_t linux_ioctl_disk; static linux_ioctl_function_t linux_ioctl_socket; static linux_ioctl_function_t linux_ioctl_sound; static linux_ioctl_function_t linux_ioctl_termio; static linux_ioctl_function_t linux_ioctl_private; static linux_ioctl_function_t linux_ioctl_drm; static linux_ioctl_function_t linux_ioctl_sg; static linux_ioctl_function_t linux_ioctl_v4l; static linux_ioctl_function_t linux_ioctl_v4l2; static linux_ioctl_function_t linux_ioctl_special; static linux_ioctl_function_t linux_ioctl_fbsd_usb; static linux_ioctl_function_t linux_ioctl_evdev; static struct linux_ioctl_handler cdrom_handler = { linux_ioctl_cdrom, LINUX_IOCTL_CDROM_MIN, LINUX_IOCTL_CDROM_MAX }; static struct linux_ioctl_handler vfat_handler = { linux_ioctl_vfat, LINUX_IOCTL_VFAT_MIN, LINUX_IOCTL_VFAT_MAX }; static struct linux_ioctl_handler console_handler = { linux_ioctl_console, LINUX_IOCTL_CONSOLE_MIN, LINUX_IOCTL_CONSOLE_MAX }; static struct linux_ioctl_handler hdio_handler = { linux_ioctl_hdio, LINUX_IOCTL_HDIO_MIN, LINUX_IOCTL_HDIO_MAX }; static struct linux_ioctl_handler disk_handler = { linux_ioctl_disk, LINUX_IOCTL_DISK_MIN, LINUX_IOCTL_DISK_MAX }; static struct linux_ioctl_handler socket_handler = { linux_ioctl_socket, LINUX_IOCTL_SOCKET_MIN, LINUX_IOCTL_SOCKET_MAX }; static struct linux_ioctl_handler sound_handler = { linux_ioctl_sound, LINUX_IOCTL_SOUND_MIN, LINUX_IOCTL_SOUND_MAX }; static struct linux_ioctl_handler termio_handler = { linux_ioctl_termio, LINUX_IOCTL_TERMIO_MIN, LINUX_IOCTL_TERMIO_MAX }; static struct linux_ioctl_handler private_handler = { linux_ioctl_private, LINUX_IOCTL_PRIVATE_MIN, LINUX_IOCTL_PRIVATE_MAX }; static struct linux_ioctl_handler drm_handler = { linux_ioctl_drm, LINUX_IOCTL_DRM_MIN, LINUX_IOCTL_DRM_MAX }; static struct linux_ioctl_handler sg_handler = { linux_ioctl_sg, LINUX_IOCTL_SG_MIN, LINUX_IOCTL_SG_MAX }; static struct linux_ioctl_handler video_handler = { linux_ioctl_v4l, LINUX_IOCTL_VIDEO_MIN, LINUX_IOCTL_VIDEO_MAX }; static struct linux_ioctl_handler video2_handler = { linux_ioctl_v4l2, LINUX_IOCTL_VIDEO2_MIN, LINUX_IOCTL_VIDEO2_MAX }; static struct linux_ioctl_handler fbsd_usb = { linux_ioctl_fbsd_usb, FBSD_LUSB_MIN, FBSD_LUSB_MAX }; static struct linux_ioctl_handler evdev_handler = { linux_ioctl_evdev, LINUX_IOCTL_EVDEV_MIN, LINUX_IOCTL_EVDEV_MAX }; DATA_SET(linux_ioctl_handler_set, cdrom_handler); DATA_SET(linux_ioctl_handler_set, vfat_handler); DATA_SET(linux_ioctl_handler_set, console_handler); DATA_SET(linux_ioctl_handler_set, hdio_handler); DATA_SET(linux_ioctl_handler_set, disk_handler); DATA_SET(linux_ioctl_handler_set, socket_handler); DATA_SET(linux_ioctl_handler_set, sound_handler); DATA_SET(linux_ioctl_handler_set, termio_handler); DATA_SET(linux_ioctl_handler_set, private_handler); DATA_SET(linux_ioctl_handler_set, drm_handler); DATA_SET(linux_ioctl_handler_set, sg_handler); DATA_SET(linux_ioctl_handler_set, video_handler); DATA_SET(linux_ioctl_handler_set, video2_handler); DATA_SET(linux_ioctl_handler_set, fbsd_usb); DATA_SET(linux_ioctl_handler_set, evdev_handler); #ifdef __i386__ static TAILQ_HEAD(, linux_ioctl_handler_element) linux_ioctl_handlers = TAILQ_HEAD_INITIALIZER(linux_ioctl_handlers); static struct sx linux_ioctl_sx; SX_SYSINIT(linux_ioctl, &linux_ioctl_sx, "Linux ioctl handlers"); #else extern TAILQ_HEAD(, linux_ioctl_handler_element) linux_ioctl_handlers; extern struct sx linux_ioctl_sx; #endif #ifdef COMPAT_LINUX32 static TAILQ_HEAD(, linux_ioctl_handler_element) linux32_ioctl_handlers = TAILQ_HEAD_INITIALIZER(linux32_ioctl_handlers); #endif /* * hdio related ioctls for VMWare support */ struct linux_hd_geometry { u_int8_t heads; u_int8_t sectors; u_int16_t cylinders; u_int32_t start; }; struct linux_hd_big_geometry { u_int8_t heads; u_int8_t sectors; u_int32_t cylinders; u_int32_t start; }; static int linux_ioctl_hdio(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; u_int sectorsize, fwcylinders, fwheads, fwsectors; off_t mediasize, bytespercyl; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); switch (args->cmd & 0xffff) { case LINUX_HDIO_GET_GEO: case LINUX_HDIO_GET_GEO_BIG: error = fo_ioctl(fp, DIOCGMEDIASIZE, (caddr_t)&mediasize, td->td_ucred, td); if (!error) error = fo_ioctl(fp, DIOCGSECTORSIZE, (caddr_t)§orsize, td->td_ucred, td); if (!error) error = fo_ioctl(fp, DIOCGFWHEADS, (caddr_t)&fwheads, td->td_ucred, td); if (!error) error = fo_ioctl(fp, DIOCGFWSECTORS, (caddr_t)&fwsectors, td->td_ucred, td); /* * XXX: DIOCGFIRSTOFFSET is not yet implemented, so * so pretend that GEOM always says 0. This is NOT VALID * for slices or partitions, only the per-disk raw devices. */ fdrop(fp, td); if (error) return (error); /* * 1. Calculate the number of bytes in a cylinder, * given the firmware's notion of heads and sectors * per cylinder. * 2. Calculate the number of cylinders, given the total * size of the media. * All internal calculations should have 64-bit precision. */ bytespercyl = (off_t) sectorsize * fwheads * fwsectors; fwcylinders = mediasize / bytespercyl; #if defined(DEBUG) linux_msg(td, "HDIO_GET_GEO: mediasize %jd, c/h/s %d/%d/%d, " "bpc %jd", (intmax_t)mediasize, fwcylinders, fwheads, fwsectors, (intmax_t)bytespercyl); #endif if ((args->cmd & 0xffff) == LINUX_HDIO_GET_GEO) { struct linux_hd_geometry hdg; hdg.cylinders = fwcylinders; hdg.heads = fwheads; hdg.sectors = fwsectors; hdg.start = 0; error = copyout(&hdg, (void *)args->arg, sizeof(hdg)); } else if ((args->cmd & 0xffff) == LINUX_HDIO_GET_GEO_BIG) { struct linux_hd_big_geometry hdbg; memset(&hdbg, 0, sizeof(hdbg)); hdbg.cylinders = fwcylinders; hdbg.heads = fwheads; hdbg.sectors = fwsectors; hdbg.start = 0; error = copyout(&hdbg, (void *)args->arg, sizeof(hdbg)); } return (error); break; default: /* XXX */ linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented", args->fd, (int)(args->cmd & 0xffff), (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff)); break; } fdrop(fp, td); return (ENOIOCTL); } static int linux_ioctl_disk(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; u_int sectorsize; off_t mediasize; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); switch (args->cmd & 0xffff) { case LINUX_BLKGETSIZE: error = fo_ioctl(fp, DIOCGSECTORSIZE, (caddr_t)§orsize, td->td_ucred, td); if (!error) error = fo_ioctl(fp, DIOCGMEDIASIZE, (caddr_t)&mediasize, td->td_ucred, td); fdrop(fp, td); if (error) return (error); sectorsize = mediasize / sectorsize; /* * XXX: How do we know we return the right size of integer ? */ return (copyout(§orsize, (void *)args->arg, sizeof(sectorsize))); break; case LINUX_BLKSSZGET: error = fo_ioctl(fp, DIOCGSECTORSIZE, (caddr_t)§orsize, td->td_ucred, td); fdrop(fp, td); if (error) return (error); return (copyout(§orsize, (void *)args->arg, sizeof(sectorsize))); break; } fdrop(fp, td); return (ENOIOCTL); } /* * termio related ioctls */ struct linux_termio { unsigned short c_iflag; unsigned short c_oflag; unsigned short c_cflag; unsigned short c_lflag; unsigned char c_line; unsigned char c_cc[LINUX_NCC]; }; struct linux_termios { unsigned int c_iflag; unsigned int c_oflag; unsigned int c_cflag; unsigned int c_lflag; unsigned char c_line; unsigned char c_cc[LINUX_NCCS]; }; struct linux_winsize { unsigned short ws_row, ws_col; unsigned short ws_xpixel, ws_ypixel; }; struct speedtab { int sp_speed; /* Speed. */ int sp_code; /* Code. */ }; static struct speedtab sptab[] = { { B0, LINUX_B0 }, { B50, LINUX_B50 }, { B75, LINUX_B75 }, { B110, LINUX_B110 }, { B134, LINUX_B134 }, { B150, LINUX_B150 }, { B200, LINUX_B200 }, { B300, LINUX_B300 }, { B600, LINUX_B600 }, { B1200, LINUX_B1200 }, { B1800, LINUX_B1800 }, { B2400, LINUX_B2400 }, { B4800, LINUX_B4800 }, { B9600, LINUX_B9600 }, { B19200, LINUX_B19200 }, { B38400, LINUX_B38400 }, { B57600, LINUX_B57600 }, { B115200, LINUX_B115200 }, {-1, -1 } }; struct linux_serial_struct { int type; int line; int port; int irq; int flags; int xmit_fifo_size; int custom_divisor; int baud_base; unsigned short close_delay; char reserved_char[2]; int hub6; unsigned short closing_wait; unsigned short closing_wait2; int reserved[4]; }; static int linux_to_bsd_speed(int code, struct speedtab *table) { for ( ; table->sp_code != -1; table++) if (table->sp_code == code) return (table->sp_speed); return (-1); } static int bsd_to_linux_speed(int speed, struct speedtab *table) { for ( ; table->sp_speed != -1; table++) if (table->sp_speed == speed) return (table->sp_code); return (-1); } static void bsd_to_linux_termios(struct termios *bios, struct linux_termios *lios) { int i; #ifdef DEBUG if (ldebug(ioctl)) { printf("LINUX: BSD termios structure (input):\n"); printf("i=%08x o=%08x c=%08x l=%08x ispeed=%d ospeed=%d\n", bios->c_iflag, bios->c_oflag, bios->c_cflag, bios->c_lflag, bios->c_ispeed, bios->c_ospeed); printf("c_cc "); for (i=0; ic_cc[i]); printf("\n"); } #endif lios->c_iflag = 0; if (bios->c_iflag & IGNBRK) lios->c_iflag |= LINUX_IGNBRK; if (bios->c_iflag & BRKINT) lios->c_iflag |= LINUX_BRKINT; if (bios->c_iflag & IGNPAR) lios->c_iflag |= LINUX_IGNPAR; if (bios->c_iflag & PARMRK) lios->c_iflag |= LINUX_PARMRK; if (bios->c_iflag & INPCK) lios->c_iflag |= LINUX_INPCK; if (bios->c_iflag & ISTRIP) lios->c_iflag |= LINUX_ISTRIP; if (bios->c_iflag & INLCR) lios->c_iflag |= LINUX_INLCR; if (bios->c_iflag & IGNCR) lios->c_iflag |= LINUX_IGNCR; if (bios->c_iflag & ICRNL) lios->c_iflag |= LINUX_ICRNL; if (bios->c_iflag & IXON) lios->c_iflag |= LINUX_IXON; if (bios->c_iflag & IXANY) lios->c_iflag |= LINUX_IXANY; if (bios->c_iflag & IXOFF) lios->c_iflag |= LINUX_IXOFF; if (bios->c_iflag & IMAXBEL) lios->c_iflag |= LINUX_IMAXBEL; lios->c_oflag = 0; if (bios->c_oflag & OPOST) lios->c_oflag |= LINUX_OPOST; if (bios->c_oflag & ONLCR) lios->c_oflag |= LINUX_ONLCR; if (bios->c_oflag & TAB3) lios->c_oflag |= LINUX_XTABS; lios->c_cflag = bsd_to_linux_speed(bios->c_ispeed, sptab); lios->c_cflag |= (bios->c_cflag & CSIZE) >> 4; if (bios->c_cflag & CSTOPB) lios->c_cflag |= LINUX_CSTOPB; if (bios->c_cflag & CREAD) lios->c_cflag |= LINUX_CREAD; if (bios->c_cflag & PARENB) lios->c_cflag |= LINUX_PARENB; if (bios->c_cflag & PARODD) lios->c_cflag |= LINUX_PARODD; if (bios->c_cflag & HUPCL) lios->c_cflag |= LINUX_HUPCL; if (bios->c_cflag & CLOCAL) lios->c_cflag |= LINUX_CLOCAL; if (bios->c_cflag & CRTSCTS) lios->c_cflag |= LINUX_CRTSCTS; lios->c_lflag = 0; if (bios->c_lflag & ISIG) lios->c_lflag |= LINUX_ISIG; if (bios->c_lflag & ICANON) lios->c_lflag |= LINUX_ICANON; if (bios->c_lflag & ECHO) lios->c_lflag |= LINUX_ECHO; if (bios->c_lflag & ECHOE) lios->c_lflag |= LINUX_ECHOE; if (bios->c_lflag & ECHOK) lios->c_lflag |= LINUX_ECHOK; if (bios->c_lflag & ECHONL) lios->c_lflag |= LINUX_ECHONL; if (bios->c_lflag & NOFLSH) lios->c_lflag |= LINUX_NOFLSH; if (bios->c_lflag & TOSTOP) lios->c_lflag |= LINUX_TOSTOP; if (bios->c_lflag & ECHOCTL) lios->c_lflag |= LINUX_ECHOCTL; if (bios->c_lflag & ECHOPRT) lios->c_lflag |= LINUX_ECHOPRT; if (bios->c_lflag & ECHOKE) lios->c_lflag |= LINUX_ECHOKE; if (bios->c_lflag & FLUSHO) lios->c_lflag |= LINUX_FLUSHO; if (bios->c_lflag & PENDIN) lios->c_lflag |= LINUX_PENDIN; if (bios->c_lflag & IEXTEN) lios->c_lflag |= LINUX_IEXTEN; for (i=0; ic_cc[i] = LINUX_POSIX_VDISABLE; lios->c_cc[LINUX_VINTR] = bios->c_cc[VINTR]; lios->c_cc[LINUX_VQUIT] = bios->c_cc[VQUIT]; lios->c_cc[LINUX_VERASE] = bios->c_cc[VERASE]; lios->c_cc[LINUX_VKILL] = bios->c_cc[VKILL]; lios->c_cc[LINUX_VEOF] = bios->c_cc[VEOF]; lios->c_cc[LINUX_VEOL] = bios->c_cc[VEOL]; lios->c_cc[LINUX_VMIN] = bios->c_cc[VMIN]; lios->c_cc[LINUX_VTIME] = bios->c_cc[VTIME]; lios->c_cc[LINUX_VEOL2] = bios->c_cc[VEOL2]; lios->c_cc[LINUX_VSUSP] = bios->c_cc[VSUSP]; lios->c_cc[LINUX_VSTART] = bios->c_cc[VSTART]; lios->c_cc[LINUX_VSTOP] = bios->c_cc[VSTOP]; lios->c_cc[LINUX_VREPRINT] = bios->c_cc[VREPRINT]; lios->c_cc[LINUX_VDISCARD] = bios->c_cc[VDISCARD]; lios->c_cc[LINUX_VWERASE] = bios->c_cc[VWERASE]; lios->c_cc[LINUX_VLNEXT] = bios->c_cc[VLNEXT]; for (i=0; ic_cc[i] == _POSIX_VDISABLE) lios->c_cc[i] = LINUX_POSIX_VDISABLE; } lios->c_line = 0; #ifdef DEBUG if (ldebug(ioctl)) { printf("LINUX: LINUX termios structure (output):\n"); printf("i=%08x o=%08x c=%08x l=%08x line=%d\n", lios->c_iflag, lios->c_oflag, lios->c_cflag, lios->c_lflag, (int)lios->c_line); printf("c_cc "); for (i=0; ic_cc[i]); printf("\n"); } #endif } static void linux_to_bsd_termios(struct linux_termios *lios, struct termios *bios) { int i; #ifdef DEBUG if (ldebug(ioctl)) { printf("LINUX: LINUX termios structure (input):\n"); printf("i=%08x o=%08x c=%08x l=%08x line=%d\n", lios->c_iflag, lios->c_oflag, lios->c_cflag, lios->c_lflag, (int)lios->c_line); printf("c_cc "); for (i=0; ic_cc[i]); printf("\n"); } #endif bios->c_iflag = 0; if (lios->c_iflag & LINUX_IGNBRK) bios->c_iflag |= IGNBRK; if (lios->c_iflag & LINUX_BRKINT) bios->c_iflag |= BRKINT; if (lios->c_iflag & LINUX_IGNPAR) bios->c_iflag |= IGNPAR; if (lios->c_iflag & LINUX_PARMRK) bios->c_iflag |= PARMRK; if (lios->c_iflag & LINUX_INPCK) bios->c_iflag |= INPCK; if (lios->c_iflag & LINUX_ISTRIP) bios->c_iflag |= ISTRIP; if (lios->c_iflag & LINUX_INLCR) bios->c_iflag |= INLCR; if (lios->c_iflag & LINUX_IGNCR) bios->c_iflag |= IGNCR; if (lios->c_iflag & LINUX_ICRNL) bios->c_iflag |= ICRNL; if (lios->c_iflag & LINUX_IXON) bios->c_iflag |= IXON; if (lios->c_iflag & LINUX_IXANY) bios->c_iflag |= IXANY; if (lios->c_iflag & LINUX_IXOFF) bios->c_iflag |= IXOFF; if (lios->c_iflag & LINUX_IMAXBEL) bios->c_iflag |= IMAXBEL; bios->c_oflag = 0; if (lios->c_oflag & LINUX_OPOST) bios->c_oflag |= OPOST; if (lios->c_oflag & LINUX_ONLCR) bios->c_oflag |= ONLCR; if (lios->c_oflag & LINUX_XTABS) bios->c_oflag |= TAB3; bios->c_cflag = (lios->c_cflag & LINUX_CSIZE) << 4; if (lios->c_cflag & LINUX_CSTOPB) bios->c_cflag |= CSTOPB; if (lios->c_cflag & LINUX_CREAD) bios->c_cflag |= CREAD; if (lios->c_cflag & LINUX_PARENB) bios->c_cflag |= PARENB; if (lios->c_cflag & LINUX_PARODD) bios->c_cflag |= PARODD; if (lios->c_cflag & LINUX_HUPCL) bios->c_cflag |= HUPCL; if (lios->c_cflag & LINUX_CLOCAL) bios->c_cflag |= CLOCAL; if (lios->c_cflag & LINUX_CRTSCTS) bios->c_cflag |= CRTSCTS; bios->c_lflag = 0; if (lios->c_lflag & LINUX_ISIG) bios->c_lflag |= ISIG; if (lios->c_lflag & LINUX_ICANON) bios->c_lflag |= ICANON; if (lios->c_lflag & LINUX_ECHO) bios->c_lflag |= ECHO; if (lios->c_lflag & LINUX_ECHOE) bios->c_lflag |= ECHOE; if (lios->c_lflag & LINUX_ECHOK) bios->c_lflag |= ECHOK; if (lios->c_lflag & LINUX_ECHONL) bios->c_lflag |= ECHONL; if (lios->c_lflag & LINUX_NOFLSH) bios->c_lflag |= NOFLSH; if (lios->c_lflag & LINUX_TOSTOP) bios->c_lflag |= TOSTOP; if (lios->c_lflag & LINUX_ECHOCTL) bios->c_lflag |= ECHOCTL; if (lios->c_lflag & LINUX_ECHOPRT) bios->c_lflag |= ECHOPRT; if (lios->c_lflag & LINUX_ECHOKE) bios->c_lflag |= ECHOKE; if (lios->c_lflag & LINUX_FLUSHO) bios->c_lflag |= FLUSHO; if (lios->c_lflag & LINUX_PENDIN) bios->c_lflag |= PENDIN; if (lios->c_lflag & LINUX_IEXTEN) bios->c_lflag |= IEXTEN; for (i=0; ic_cc[i] = _POSIX_VDISABLE; bios->c_cc[VINTR] = lios->c_cc[LINUX_VINTR]; bios->c_cc[VQUIT] = lios->c_cc[LINUX_VQUIT]; bios->c_cc[VERASE] = lios->c_cc[LINUX_VERASE]; bios->c_cc[VKILL] = lios->c_cc[LINUX_VKILL]; bios->c_cc[VEOF] = lios->c_cc[LINUX_VEOF]; bios->c_cc[VEOL] = lios->c_cc[LINUX_VEOL]; bios->c_cc[VMIN] = lios->c_cc[LINUX_VMIN]; bios->c_cc[VTIME] = lios->c_cc[LINUX_VTIME]; bios->c_cc[VEOL2] = lios->c_cc[LINUX_VEOL2]; bios->c_cc[VSUSP] = lios->c_cc[LINUX_VSUSP]; bios->c_cc[VSTART] = lios->c_cc[LINUX_VSTART]; bios->c_cc[VSTOP] = lios->c_cc[LINUX_VSTOP]; bios->c_cc[VREPRINT] = lios->c_cc[LINUX_VREPRINT]; bios->c_cc[VDISCARD] = lios->c_cc[LINUX_VDISCARD]; bios->c_cc[VWERASE] = lios->c_cc[LINUX_VWERASE]; bios->c_cc[VLNEXT] = lios->c_cc[LINUX_VLNEXT]; for (i=0; ic_cc[i] == LINUX_POSIX_VDISABLE) bios->c_cc[i] = _POSIX_VDISABLE; } bios->c_ispeed = bios->c_ospeed = linux_to_bsd_speed(lios->c_cflag & LINUX_CBAUD, sptab); #ifdef DEBUG if (ldebug(ioctl)) { printf("LINUX: BSD termios structure (output):\n"); printf("i=%08x o=%08x c=%08x l=%08x ispeed=%d ospeed=%d\n", bios->c_iflag, bios->c_oflag, bios->c_cflag, bios->c_lflag, bios->c_ispeed, bios->c_ospeed); printf("c_cc "); for (i=0; ic_cc[i]); printf("\n"); } #endif } static void bsd_to_linux_termio(struct termios *bios, struct linux_termio *lio) { struct linux_termios lios; memset(lio, 0, sizeof(*lio)); bsd_to_linux_termios(bios, &lios); lio->c_iflag = lios.c_iflag; lio->c_oflag = lios.c_oflag; lio->c_cflag = lios.c_cflag; lio->c_lflag = lios.c_lflag; lio->c_line = lios.c_line; memcpy(lio->c_cc, lios.c_cc, LINUX_NCC); } static void linux_to_bsd_termio(struct linux_termio *lio, struct termios *bios) { struct linux_termios lios; int i; lios.c_iflag = lio->c_iflag; lios.c_oflag = lio->c_oflag; lios.c_cflag = lio->c_cflag; lios.c_lflag = lio->c_lflag; for (i=LINUX_NCC; ic_cc, LINUX_NCC); linux_to_bsd_termios(&lios, bios); } static int linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) { struct termios bios; struct linux_termios lios; struct linux_termio lio; struct file *fp; int error; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); switch (args->cmd & 0xffff) { case LINUX_TCGETS: error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td->td_ucred, td); if (error) break; bsd_to_linux_termios(&bios, &lios); error = copyout(&lios, (void *)args->arg, sizeof(lios)); break; case LINUX_TCSETS: error = copyin((void *)args->arg, &lios, sizeof(lios)); if (error) break; linux_to_bsd_termios(&lios, &bios); error = (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td->td_ucred, td)); break; case LINUX_TCSETSW: error = copyin((void *)args->arg, &lios, sizeof(lios)); if (error) break; linux_to_bsd_termios(&lios, &bios); error = (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td->td_ucred, td)); break; case LINUX_TCSETSF: error = copyin((void *)args->arg, &lios, sizeof(lios)); if (error) break; linux_to_bsd_termios(&lios, &bios); error = (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td->td_ucred, td)); break; case LINUX_TCGETA: error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td->td_ucred, td); if (error) break; bsd_to_linux_termio(&bios, &lio); error = (copyout(&lio, (void *)args->arg, sizeof(lio))); break; case LINUX_TCSETA: error = copyin((void *)args->arg, &lio, sizeof(lio)); if (error) break; linux_to_bsd_termio(&lio, &bios); error = (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td->td_ucred, td)); break; case LINUX_TCSETAW: error = copyin((void *)args->arg, &lio, sizeof(lio)); if (error) break; linux_to_bsd_termio(&lio, &bios); error = (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td->td_ucred, td)); break; case LINUX_TCSETAF: error = copyin((void *)args->arg, &lio, sizeof(lio)); if (error) break; linux_to_bsd_termio(&lio, &bios); error = (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td->td_ucred, td)); break; /* LINUX_TCSBRK */ case LINUX_TCXONC: { switch (args->arg) { case LINUX_TCOOFF: args->cmd = TIOCSTOP; break; case LINUX_TCOON: args->cmd = TIOCSTART; break; case LINUX_TCIOFF: case LINUX_TCION: { int c; struct write_args wr; error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td->td_ucred, td); if (error) break; fdrop(fp, td); c = (args->arg == LINUX_TCIOFF) ? VSTOP : VSTART; c = bios.c_cc[c]; if (c != _POSIX_VDISABLE) { wr.fd = args->fd; wr.buf = &c; wr.nbyte = sizeof(c); return (sys_write(td, &wr)); } else return (0); } default: fdrop(fp, td); return (EINVAL); } args->arg = 0; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; } case LINUX_TCFLSH: { int val; switch (args->arg) { case LINUX_TCIFLUSH: val = FREAD; break; case LINUX_TCOFLUSH: val = FWRITE; break; case LINUX_TCIOFLUSH: val = FREAD | FWRITE; break; default: fdrop(fp, td); return (EINVAL); } error = (fo_ioctl(fp,TIOCFLUSH,(caddr_t)&val,td->td_ucred,td)); break; } case LINUX_TIOCEXCL: args->cmd = TIOCEXCL; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCNXCL: args->cmd = TIOCNXCL; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCSCTTY: args->cmd = TIOCSCTTY; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCGPGRP: args->cmd = TIOCGPGRP; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCSPGRP: args->cmd = TIOCSPGRP; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* LINUX_TIOCOUTQ */ /* LINUX_TIOCSTI */ case LINUX_TIOCGWINSZ: args->cmd = TIOCGWINSZ; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCSWINSZ: args->cmd = TIOCSWINSZ; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCMGET: args->cmd = TIOCMGET; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCMBIS: args->cmd = TIOCMBIS; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCMBIC: args->cmd = TIOCMBIC; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCMSET: args->cmd = TIOCMSET; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* TIOCGSOFTCAR */ /* TIOCSSOFTCAR */ case LINUX_FIONREAD: /* LINUX_TIOCINQ */ args->cmd = FIONREAD; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* LINUX_TIOCLINUX */ case LINUX_TIOCCONS: args->cmd = TIOCCONS; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCGSERIAL: { struct linux_serial_struct lss; bzero(&lss, sizeof(lss)); lss.type = LINUX_PORT_16550A; lss.flags = 0; lss.close_delay = 0; error = copyout(&lss, (void *)args->arg, sizeof(lss)); break; } case LINUX_TIOCSSERIAL: { struct linux_serial_struct lss; error = copyin((void *)args->arg, &lss, sizeof(lss)); if (error) break; /* XXX - It really helps to have an implementation that * does nothing. NOT! */ error = 0; break; } case LINUX_TIOCPKT: args->cmd = TIOCPKT; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_FIONBIO: args->cmd = FIONBIO; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCNOTTY: args->cmd = TIOCNOTTY; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCSETD: { int line; switch (args->arg) { case LINUX_N_TTY: line = TTYDISC; break; case LINUX_N_SLIP: line = SLIPDISC; break; case LINUX_N_PPP: line = PPPDISC; break; default: fdrop(fp, td); return (EINVAL); } error = (fo_ioctl(fp, TIOCSETD, (caddr_t)&line, td->td_ucred, td)); break; } case LINUX_TIOCGETD: { int linux_line; int bsd_line = TTYDISC; error = fo_ioctl(fp, TIOCGETD, (caddr_t)&bsd_line, td->td_ucred, td); if (error) break; switch (bsd_line) { case TTYDISC: linux_line = LINUX_N_TTY; break; case SLIPDISC: linux_line = LINUX_N_SLIP; break; case PPPDISC: linux_line = LINUX_N_PPP; break; default: fdrop(fp, td); return (EINVAL); } error = (copyout(&linux_line, (void *)args->arg, sizeof(int))); break; } /* LINUX_TCSBRKP */ /* LINUX_TIOCTTYGSTRUCT */ case LINUX_FIONCLEX: args->cmd = FIONCLEX; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_FIOCLEX: args->cmd = FIOCLEX; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_FIOASYNC: args->cmd = FIOASYNC; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* LINUX_TIOCSERCONFIG */ /* LINUX_TIOCSERGWILD */ /* LINUX_TIOCSERSWILD */ /* LINUX_TIOCGLCKTRMIOS */ /* LINUX_TIOCSLCKTRMIOS */ case LINUX_TIOCSBRK: args->cmd = TIOCSBRK; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCCBRK: args->cmd = TIOCCBRK; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_TIOCGPTN: { int nb; error = fo_ioctl(fp, TIOCGPTN, (caddr_t)&nb, td->td_ucred, td); if (!error) error = copyout(&nb, (void *)args->arg, sizeof(int)); break; } case LINUX_TIOCSPTLCK: /* Our unlockpt() does nothing. */ error = 0; break; default: error = ENOIOCTL; break; } fdrop(fp, td); return (error); } /* * CDROM related ioctls */ struct linux_cdrom_msf { u_char cdmsf_min0; u_char cdmsf_sec0; u_char cdmsf_frame0; u_char cdmsf_min1; u_char cdmsf_sec1; u_char cdmsf_frame1; }; struct linux_cdrom_tochdr { u_char cdth_trk0; u_char cdth_trk1; }; union linux_cdrom_addr { struct { u_char minute; u_char second; u_char frame; } msf; int lba; }; struct linux_cdrom_tocentry { u_char cdte_track; u_char cdte_adr:4; u_char cdte_ctrl:4; u_char cdte_format; union linux_cdrom_addr cdte_addr; u_char cdte_datamode; }; struct linux_cdrom_subchnl { u_char cdsc_format; u_char cdsc_audiostatus; u_char cdsc_adr:4; u_char cdsc_ctrl:4; u_char cdsc_trk; u_char cdsc_ind; union linux_cdrom_addr cdsc_absaddr; union linux_cdrom_addr cdsc_reladdr; }; struct l_cdrom_read_audio { union linux_cdrom_addr addr; u_char addr_format; l_int nframes; u_char *buf; }; struct l_dvd_layer { u_char book_version:4; u_char book_type:4; u_char min_rate:4; u_char disc_size:4; u_char layer_type:4; u_char track_path:1; u_char nlayers:2; u_char track_density:4; u_char linear_density:4; u_char bca:1; u_int32_t start_sector; u_int32_t end_sector; u_int32_t end_sector_l0; }; struct l_dvd_physical { u_char type; u_char layer_num; struct l_dvd_layer layer[4]; }; struct l_dvd_copyright { u_char type; u_char layer_num; u_char cpst; u_char rmi; }; struct l_dvd_disckey { u_char type; l_uint agid:2; u_char value[2048]; }; struct l_dvd_bca { u_char type; l_int len; u_char value[188]; }; struct l_dvd_manufact { u_char type; u_char layer_num; l_int len; u_char value[2048]; }; typedef union { u_char type; struct l_dvd_physical physical; struct l_dvd_copyright copyright; struct l_dvd_disckey disckey; struct l_dvd_bca bca; struct l_dvd_manufact manufact; } l_dvd_struct; typedef u_char l_dvd_key[5]; typedef u_char l_dvd_challenge[10]; struct l_dvd_lu_send_agid { u_char type; l_uint agid:2; }; struct l_dvd_host_send_challenge { u_char type; l_uint agid:2; l_dvd_challenge chal; }; struct l_dvd_send_key { u_char type; l_uint agid:2; l_dvd_key key; }; struct l_dvd_lu_send_challenge { u_char type; l_uint agid:2; l_dvd_challenge chal; }; struct l_dvd_lu_send_title_key { u_char type; l_uint agid:2; l_dvd_key title_key; l_int lba; l_uint cpm:1; l_uint cp_sec:1; l_uint cgms:2; }; struct l_dvd_lu_send_asf { u_char type; l_uint agid:2; l_uint asf:1; }; struct l_dvd_host_send_rpcstate { u_char type; u_char pdrc; }; struct l_dvd_lu_send_rpcstate { u_char type:2; u_char vra:3; u_char ucca:3; u_char region_mask; u_char rpc_scheme; }; typedef union { u_char type; struct l_dvd_lu_send_agid lsa; struct l_dvd_host_send_challenge hsc; struct l_dvd_send_key lsk; struct l_dvd_lu_send_challenge lsc; struct l_dvd_send_key hsk; struct l_dvd_lu_send_title_key lstk; struct l_dvd_lu_send_asf lsasf; struct l_dvd_host_send_rpcstate hrpcs; struct l_dvd_lu_send_rpcstate lrpcs; } l_dvd_authinfo; static void bsd_to_linux_msf_lba(u_char af, union msf_lba *bp, union linux_cdrom_addr *lp) { if (af == CD_LBA_FORMAT) lp->lba = bp->lba; else { lp->msf.minute = bp->msf.minute; lp->msf.second = bp->msf.second; lp->msf.frame = bp->msf.frame; } } static void set_linux_cdrom_addr(union linux_cdrom_addr *addr, int format, int lba) { if (format == LINUX_CDROM_MSF) { addr->msf.frame = lba % 75; lba /= 75; lba += 2; addr->msf.second = lba % 60; addr->msf.minute = lba / 60; } else addr->lba = lba; } static int linux_to_bsd_dvd_struct(l_dvd_struct *lp, struct dvd_struct *bp) { bp->format = lp->type; switch (bp->format) { case DVD_STRUCT_PHYSICAL: if (bp->layer_num >= 4) return (EINVAL); bp->layer_num = lp->physical.layer_num; break; case DVD_STRUCT_COPYRIGHT: bp->layer_num = lp->copyright.layer_num; break; case DVD_STRUCT_DISCKEY: bp->agid = lp->disckey.agid; break; case DVD_STRUCT_BCA: case DVD_STRUCT_MANUFACT: break; default: return (EINVAL); } return (0); } static int bsd_to_linux_dvd_struct(struct dvd_struct *bp, l_dvd_struct *lp) { switch (bp->format) { case DVD_STRUCT_PHYSICAL: { struct dvd_layer *blp = (struct dvd_layer *)bp->data; struct l_dvd_layer *llp = &lp->physical.layer[bp->layer_num]; memset(llp, 0, sizeof(*llp)); llp->book_version = blp->book_version; llp->book_type = blp->book_type; llp->min_rate = blp->max_rate; llp->disc_size = blp->disc_size; llp->layer_type = blp->layer_type; llp->track_path = blp->track_path; llp->nlayers = blp->nlayers; llp->track_density = blp->track_density; llp->linear_density = blp->linear_density; llp->bca = blp->bca; llp->start_sector = blp->start_sector; llp->end_sector = blp->end_sector; llp->end_sector_l0 = blp->end_sector_l0; break; } case DVD_STRUCT_COPYRIGHT: lp->copyright.cpst = bp->cpst; lp->copyright.rmi = bp->rmi; break; case DVD_STRUCT_DISCKEY: memcpy(lp->disckey.value, bp->data, sizeof(lp->disckey.value)); break; case DVD_STRUCT_BCA: lp->bca.len = bp->length; memcpy(lp->bca.value, bp->data, sizeof(lp->bca.value)); break; case DVD_STRUCT_MANUFACT: lp->manufact.len = bp->length; memcpy(lp->manufact.value, bp->data, sizeof(lp->manufact.value)); /* lp->manufact.layer_num is unused in Linux (redhat 7.0). */ break; default: return (EINVAL); } return (0); } static int linux_to_bsd_dvd_authinfo(l_dvd_authinfo *lp, int *bcode, struct dvd_authinfo *bp) { switch (lp->type) { case LINUX_DVD_LU_SEND_AGID: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_AGID; bp->agid = lp->lsa.agid; break; case LINUX_DVD_HOST_SEND_CHALLENGE: *bcode = DVDIOCSENDKEY; bp->format = DVD_SEND_CHALLENGE; bp->agid = lp->hsc.agid; memcpy(bp->keychal, lp->hsc.chal, 10); break; case LINUX_DVD_LU_SEND_KEY1: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_KEY1; bp->agid = lp->lsk.agid; break; case LINUX_DVD_LU_SEND_CHALLENGE: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_CHALLENGE; bp->agid = lp->lsc.agid; break; case LINUX_DVD_HOST_SEND_KEY2: *bcode = DVDIOCSENDKEY; bp->format = DVD_SEND_KEY2; bp->agid = lp->hsk.agid; memcpy(bp->keychal, lp->hsk.key, 5); break; case LINUX_DVD_LU_SEND_TITLE_KEY: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_TITLE_KEY; bp->agid = lp->lstk.agid; bp->lba = lp->lstk.lba; break; case LINUX_DVD_LU_SEND_ASF: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_ASF; bp->agid = lp->lsasf.agid; break; case LINUX_DVD_INVALIDATE_AGID: *bcode = DVDIOCREPORTKEY; bp->format = DVD_INVALIDATE_AGID; bp->agid = lp->lsa.agid; break; case LINUX_DVD_LU_SEND_RPC_STATE: *bcode = DVDIOCREPORTKEY; bp->format = DVD_REPORT_RPC; break; case LINUX_DVD_HOST_SEND_RPC_STATE: *bcode = DVDIOCSENDKEY; bp->format = DVD_SEND_RPC; bp->region = lp->hrpcs.pdrc; break; default: return (EINVAL); } return (0); } static int bsd_to_linux_dvd_authinfo(struct dvd_authinfo *bp, l_dvd_authinfo *lp) { switch (lp->type) { case LINUX_DVD_LU_SEND_AGID: lp->lsa.agid = bp->agid; break; case LINUX_DVD_HOST_SEND_CHALLENGE: lp->type = LINUX_DVD_LU_SEND_KEY1; break; case LINUX_DVD_LU_SEND_KEY1: memcpy(lp->lsk.key, bp->keychal, sizeof(lp->lsk.key)); break; case LINUX_DVD_LU_SEND_CHALLENGE: memcpy(lp->lsc.chal, bp->keychal, sizeof(lp->lsc.chal)); break; case LINUX_DVD_HOST_SEND_KEY2: lp->type = LINUX_DVD_AUTH_ESTABLISHED; break; case LINUX_DVD_LU_SEND_TITLE_KEY: memcpy(lp->lstk.title_key, bp->keychal, sizeof(lp->lstk.title_key)); lp->lstk.cpm = bp->cpm; lp->lstk.cp_sec = bp->cp_sec; lp->lstk.cgms = bp->cgms; break; case LINUX_DVD_LU_SEND_ASF: lp->lsasf.asf = bp->asf; break; case LINUX_DVD_INVALIDATE_AGID: break; case LINUX_DVD_LU_SEND_RPC_STATE: lp->lrpcs.type = bp->reg_type; lp->lrpcs.vra = bp->vend_rsts; lp->lrpcs.ucca = bp->user_rsts; lp->lrpcs.region_mask = bp->region; lp->lrpcs.rpc_scheme = bp->rpc_scheme; break; case LINUX_DVD_HOST_SEND_RPC_STATE: break; default: return (EINVAL); } return (0); } static int linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); switch (args->cmd & 0xffff) { case LINUX_CDROMPAUSE: args->cmd = CDIOCPAUSE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMRESUME: args->cmd = CDIOCRESUME; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMPLAYMSF: args->cmd = CDIOCPLAYMSF; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMPLAYTRKIND: args->cmd = CDIOCPLAYTRACKS; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMREADTOCHDR: { struct ioc_toc_header th; struct linux_cdrom_tochdr lth; error = fo_ioctl(fp, CDIOREADTOCHEADER, (caddr_t)&th, td->td_ucred, td); if (!error) { lth.cdth_trk0 = th.starting_track; lth.cdth_trk1 = th.ending_track; copyout(<h, (void *)args->arg, sizeof(lth)); } break; } case LINUX_CDROMREADTOCENTRY: { struct linux_cdrom_tocentry lte; struct ioc_read_toc_single_entry irtse; error = copyin((void *)args->arg, <e, sizeof(lte)); if (error) break; irtse.address_format = lte.cdte_format; irtse.track = lte.cdte_track; error = fo_ioctl(fp, CDIOREADTOCENTRY, (caddr_t)&irtse, td->td_ucred, td); if (!error) { lte.cdte_ctrl = irtse.entry.control; lte.cdte_adr = irtse.entry.addr_type; bsd_to_linux_msf_lba(irtse.address_format, &irtse.entry.addr, <e.cdte_addr); error = copyout(<e, (void *)args->arg, sizeof(lte)); } break; } case LINUX_CDROMSTOP: args->cmd = CDIOCSTOP; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMSTART: args->cmd = CDIOCSTART; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_CDROMEJECT: args->cmd = CDIOCEJECT; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* LINUX_CDROMVOLCTRL */ case LINUX_CDROMSUBCHNL: { struct linux_cdrom_subchnl sc; struct ioc_read_subchannel bsdsc; struct cd_sub_channel_info bsdinfo; bsdsc.address_format = CD_LBA_FORMAT; bsdsc.data_format = CD_CURRENT_POSITION; bsdsc.track = 0; bsdsc.data_len = sizeof(bsdinfo); bsdsc.data = &bsdinfo; error = fo_ioctl(fp, CDIOCREADSUBCHANNEL_SYSSPACE, (caddr_t)&bsdsc, td->td_ucred, td); if (error) break; error = copyin((void *)args->arg, &sc, sizeof(sc)); if (error) break; sc.cdsc_audiostatus = bsdinfo.header.audio_status; sc.cdsc_adr = bsdinfo.what.position.addr_type; sc.cdsc_ctrl = bsdinfo.what.position.control; sc.cdsc_trk = bsdinfo.what.position.track_number; sc.cdsc_ind = bsdinfo.what.position.index_number; set_linux_cdrom_addr(&sc.cdsc_absaddr, sc.cdsc_format, bsdinfo.what.position.absaddr.lba); set_linux_cdrom_addr(&sc.cdsc_reladdr, sc.cdsc_format, bsdinfo.what.position.reladdr.lba); error = copyout(&sc, (void *)args->arg, sizeof(sc)); break; } /* LINUX_CDROMREADMODE2 */ /* LINUX_CDROMREADMODE1 */ /* LINUX_CDROMREADAUDIO */ /* LINUX_CDROMEJECT_SW */ /* LINUX_CDROMMULTISESSION */ /* LINUX_CDROM_GET_UPC */ case LINUX_CDROMRESET: args->cmd = CDIOCRESET; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; /* LINUX_CDROMVOLREAD */ /* LINUX_CDROMREADRAW */ /* LINUX_CDROMREADCOOKED */ /* LINUX_CDROMSEEK */ /* LINUX_CDROMPLAYBLK */ /* LINUX_CDROMREADALL */ /* LINUX_CDROMCLOSETRAY */ /* LINUX_CDROMLOADFROMSLOT */ /* LINUX_CDROMGETSPINDOWN */ /* LINUX_CDROMSETSPINDOWN */ /* LINUX_CDROM_SET_OPTIONS */ /* LINUX_CDROM_CLEAR_OPTIONS */ /* LINUX_CDROM_SELECT_SPEED */ /* LINUX_CDROM_SELECT_DISC */ /* LINUX_CDROM_MEDIA_CHANGED */ /* LINUX_CDROM_DRIVE_STATUS */ /* LINUX_CDROM_DISC_STATUS */ /* LINUX_CDROM_CHANGER_NSLOTS */ /* LINUX_CDROM_LOCKDOOR */ /* LINUX_CDROM_DEBUG */ /* LINUX_CDROM_GET_CAPABILITY */ /* LINUX_CDROMAUDIOBUFSIZ */ case LINUX_DVD_READ_STRUCT: { l_dvd_struct *lds; struct dvd_struct *bds; lds = malloc(sizeof(*lds), M_LINUX, M_WAITOK); bds = malloc(sizeof(*bds), M_LINUX, M_WAITOK); error = copyin((void *)args->arg, lds, sizeof(*lds)); if (error) goto out; error = linux_to_bsd_dvd_struct(lds, bds); if (error) goto out; error = fo_ioctl(fp, DVDIOCREADSTRUCTURE, (caddr_t)bds, td->td_ucred, td); if (error) goto out; error = bsd_to_linux_dvd_struct(bds, lds); if (error) goto out; error = copyout(lds, (void *)args->arg, sizeof(*lds)); out: free(bds, M_LINUX); free(lds, M_LINUX); break; } /* LINUX_DVD_WRITE_STRUCT */ case LINUX_DVD_AUTH: { l_dvd_authinfo lda; struct dvd_authinfo bda; int bcode; error = copyin((void *)args->arg, &lda, sizeof(lda)); if (error) break; error = linux_to_bsd_dvd_authinfo(&lda, &bcode, &bda); if (error) break; error = fo_ioctl(fp, bcode, (caddr_t)&bda, td->td_ucred, td); if (error) { if (lda.type == LINUX_DVD_HOST_SEND_KEY2) { lda.type = LINUX_DVD_AUTH_FAILURE; copyout(&lda, (void *)args->arg, sizeof(lda)); } break; } error = bsd_to_linux_dvd_authinfo(&bda, &lda); if (error) break; error = copyout(&lda, (void *)args->arg, sizeof(lda)); break; } case LINUX_SCSI_GET_BUS_NUMBER: { struct sg_scsi_id id; error = fo_ioctl(fp, SG_GET_SCSI_ID, (caddr_t)&id, td->td_ucred, td); if (error) break; error = copyout(&id.channel, (void *)args->arg, sizeof(int)); break; } case LINUX_SCSI_GET_IDLUN: { struct sg_scsi_id id; struct scsi_idlun idl; error = fo_ioctl(fp, SG_GET_SCSI_ID, (caddr_t)&id, td->td_ucred, td); if (error) break; idl.dev_id = (id.scsi_id & 0xff) + ((id.lun & 0xff) << 8) + ((id.channel & 0xff) << 16) + ((id.host_no & 0xff) << 24); idl.host_unique_id = id.host_no; error = copyout(&idl, (void *)args->arg, sizeof(idl)); break; } /* LINUX_CDROM_SEND_PACKET */ /* LINUX_CDROM_NEXT_WRITABLE */ /* LINUX_CDROM_LAST_WRITTEN */ default: error = ENOIOCTL; break; } fdrop(fp, td); return (error); } static int linux_ioctl_vfat(struct thread *td, struct linux_ioctl_args *args) { return (ENOTTY); } /* * Sound related ioctls */ struct linux_old_mixer_info { char id[16]; char name[32]; }; static u_int32_t dirbits[4] = { IOC_VOID, IOC_IN, IOC_OUT, IOC_INOUT }; #define SETDIR(c) (((c) & ~IOC_DIRMASK) | dirbits[args->cmd >> 30]) static int linux_ioctl_sound(struct thread *td, struct linux_ioctl_args *args) { switch (args->cmd & 0xffff) { case LINUX_SOUND_MIXER_WRITE_VOLUME: args->cmd = SETDIR(SOUND_MIXER_WRITE_VOLUME); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_BASS: args->cmd = SETDIR(SOUND_MIXER_WRITE_BASS); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_TREBLE: args->cmd = SETDIR(SOUND_MIXER_WRITE_TREBLE); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_SYNTH: args->cmd = SETDIR(SOUND_MIXER_WRITE_SYNTH); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_PCM: args->cmd = SETDIR(SOUND_MIXER_WRITE_PCM); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_SPEAKER: args->cmd = SETDIR(SOUND_MIXER_WRITE_SPEAKER); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_LINE: args->cmd = SETDIR(SOUND_MIXER_WRITE_LINE); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_MIC: args->cmd = SETDIR(SOUND_MIXER_WRITE_MIC); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_CD: args->cmd = SETDIR(SOUND_MIXER_WRITE_CD); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_IMIX: args->cmd = SETDIR(SOUND_MIXER_WRITE_IMIX); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_ALTPCM: args->cmd = SETDIR(SOUND_MIXER_WRITE_ALTPCM); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_RECLEV: args->cmd = SETDIR(SOUND_MIXER_WRITE_RECLEV); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_IGAIN: args->cmd = SETDIR(SOUND_MIXER_WRITE_IGAIN); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_OGAIN: args->cmd = SETDIR(SOUND_MIXER_WRITE_OGAIN); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_LINE1: args->cmd = SETDIR(SOUND_MIXER_WRITE_LINE1); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_LINE2: args->cmd = SETDIR(SOUND_MIXER_WRITE_LINE2); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_LINE3: args->cmd = SETDIR(SOUND_MIXER_WRITE_LINE3); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_INFO: { /* Key on encoded length */ switch ((args->cmd >> 16) & 0x1fff) { case 0x005c: { /* SOUND_MIXER_INFO */ args->cmd = SOUND_MIXER_INFO; return (sys_ioctl(td, (struct ioctl_args *)args)); } case 0x0030: { /* SOUND_OLD_MIXER_INFO */ struct linux_old_mixer_info info; bzero(&info, sizeof(info)); strncpy(info.id, "OSS", sizeof(info.id) - 1); strncpy(info.name, "FreeBSD OSS Mixer", sizeof(info.name) - 1); copyout(&info, (void *)args->arg, sizeof(info)); return (0); } default: return (ENOIOCTL); } break; } case LINUX_OSS_GETVERSION: { int version = linux_get_oss_version(td); return (copyout(&version, (void *)args->arg, sizeof(int))); } case LINUX_SOUND_MIXER_READ_STEREODEVS: args->cmd = SOUND_MIXER_READ_STEREODEVS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_READ_CAPS: args->cmd = SOUND_MIXER_READ_CAPS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_READ_RECMASK: args->cmd = SOUND_MIXER_READ_RECMASK; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_READ_DEVMASK: args->cmd = SOUND_MIXER_READ_DEVMASK; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_MIXER_WRITE_RECSRC: args->cmd = SETDIR(SOUND_MIXER_WRITE_RECSRC); return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_RESET: args->cmd = SNDCTL_DSP_RESET; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SYNC: args->cmd = SNDCTL_DSP_SYNC; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SPEED: args->cmd = SNDCTL_DSP_SPEED; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_STEREO: args->cmd = SNDCTL_DSP_STEREO; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETBLKSIZE: /* LINUX_SNDCTL_DSP_SETBLKSIZE */ args->cmd = SNDCTL_DSP_GETBLKSIZE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SETFMT: args->cmd = SNDCTL_DSP_SETFMT; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_PCM_WRITE_CHANNELS: args->cmd = SOUND_PCM_WRITE_CHANNELS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SOUND_PCM_WRITE_FILTER: args->cmd = SOUND_PCM_WRITE_FILTER; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_POST: args->cmd = SNDCTL_DSP_POST; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SUBDIVIDE: args->cmd = SNDCTL_DSP_SUBDIVIDE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SETFRAGMENT: args->cmd = SNDCTL_DSP_SETFRAGMENT; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETFMTS: args->cmd = SNDCTL_DSP_GETFMTS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETOSPACE: args->cmd = SNDCTL_DSP_GETOSPACE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETISPACE: args->cmd = SNDCTL_DSP_GETISPACE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_NONBLOCK: args->cmd = SNDCTL_DSP_NONBLOCK; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETCAPS: args->cmd = SNDCTL_DSP_GETCAPS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SETTRIGGER: /* LINUX_SNDCTL_GETTRIGGER */ args->cmd = SNDCTL_DSP_SETTRIGGER; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETIPTR: args->cmd = SNDCTL_DSP_GETIPTR; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETOPTR: args->cmd = SNDCTL_DSP_GETOPTR; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_SETDUPLEX: args->cmd = SNDCTL_DSP_SETDUPLEX; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_DSP_GETODELAY: args->cmd = SNDCTL_DSP_GETODELAY; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_RESET: args->cmd = SNDCTL_SEQ_RESET; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_SYNC: args->cmd = SNDCTL_SEQ_SYNC; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SYNTH_INFO: args->cmd = SNDCTL_SYNTH_INFO; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_CTRLRATE: args->cmd = SNDCTL_SEQ_CTRLRATE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_GETOUTCOUNT: args->cmd = SNDCTL_SEQ_GETOUTCOUNT; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_GETINCOUNT: args->cmd = SNDCTL_SEQ_GETINCOUNT; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_PERCMODE: args->cmd = SNDCTL_SEQ_PERCMODE; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_FM_LOAD_INSTR: args->cmd = SNDCTL_FM_LOAD_INSTR; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_TESTMIDI: args->cmd = SNDCTL_SEQ_TESTMIDI; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_RESETSAMPLES: args->cmd = SNDCTL_SEQ_RESETSAMPLES; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_NRSYNTHS: args->cmd = SNDCTL_SEQ_NRSYNTHS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_NRMIDIS: args->cmd = SNDCTL_SEQ_NRMIDIS; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_MIDI_INFO: args->cmd = SNDCTL_MIDI_INFO; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SEQ_TRESHOLD: args->cmd = SNDCTL_SEQ_TRESHOLD; return (sys_ioctl(td, (struct ioctl_args *)args)); case LINUX_SNDCTL_SYNTH_MEMAVL: args->cmd = SNDCTL_SYNTH_MEMAVL; return (sys_ioctl(td, (struct ioctl_args *)args)); } return (ENOIOCTL); } /* * Console related ioctls */ static int linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); switch (args->cmd & 0xffff) { case LINUX_KIOCSOUND: args->cmd = KIOCSOUND; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDMKTONE: args->cmd = KDMKTONE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDGETLED: args->cmd = KDGETLED; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDSETLED: args->cmd = KDSETLED; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDSETMODE: args->cmd = KDSETMODE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDGETMODE: args->cmd = KDGETMODE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDGKBMODE: args->cmd = KDGKBMODE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_KDSKBMODE: { int kbdmode; switch (args->arg) { case LINUX_KBD_RAW: kbdmode = K_RAW; break; case LINUX_KBD_XLATE: kbdmode = K_XLATE; break; case LINUX_KBD_MEDIUMRAW: kbdmode = K_RAW; break; default: fdrop(fp, td); return (EINVAL); } error = (fo_ioctl(fp, KDSKBMODE, (caddr_t)&kbdmode, td->td_ucred, td)); break; } case LINUX_VT_OPENQRY: args->cmd = VT_OPENQRY; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_VT_GETMODE: args->cmd = VT_GETMODE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_VT_SETMODE: { struct vt_mode mode; if ((error = copyin((void *)args->arg, &mode, sizeof(mode)))) break; if (LINUX_SIG_VALID(mode.relsig)) mode.relsig = linux_to_bsd_signal(mode.relsig); else mode.relsig = 0; if (LINUX_SIG_VALID(mode.acqsig)) mode.acqsig = linux_to_bsd_signal(mode.acqsig); else mode.acqsig = 0; /* XXX. Linux ignores frsig and set it to 0. */ mode.frsig = 0; if ((error = copyout(&mode, (void *)args->arg, sizeof(mode)))) break; args->cmd = VT_SETMODE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; } case LINUX_VT_GETSTATE: args->cmd = VT_GETACTIVE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_VT_RELDISP: args->cmd = VT_RELDISP; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_VT_ACTIVATE: args->cmd = VT_ACTIVATE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; case LINUX_VT_WAITACTIVE: args->cmd = VT_WAITACTIVE; error = (sys_ioctl(td, (struct ioctl_args *)args)); break; default: error = ENOIOCTL; break; } fdrop(fp, td); return (error); } /* - * Criteria for interface name translation - */ -#define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) - -/* - * Translate a Linux interface name to a FreeBSD interface name, - * and return the associated ifnet structure - * bsdname and lxname need to be least IFNAMSIZ bytes long, but - * can point to the same buffer. - */ - -static struct ifnet * -ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname) -{ - struct ifnet *ifp; - int len, unit; - char *ep; - int is_eth, index; - - for (len = 0; len < LINUX_IFNAMSIZ; ++len) - if (!isalpha(lxname[len])) - break; - if (len == 0 || len == LINUX_IFNAMSIZ) - return (NULL); - unit = (int)strtoul(lxname + len, &ep, 10); - if (ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) - return (NULL); - index = 0; - is_eth = (len == 3 && !strncmp(lxname, "eth", len)) ? 1 : 0; - CURVNET_SET(TD_TO_VNET(td)); - IFNET_RLOCK(); - CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { - /* - * Allow Linux programs to use FreeBSD names. Don't presume - * we never have an interface named "eth", so don't make - * the test optional based on is_eth. - */ - if (strncmp(ifp->if_xname, lxname, LINUX_IFNAMSIZ) == 0) - break; - if (is_eth && IFP_IS_ETH(ifp) && unit == index++) - break; - } - IFNET_RUNLOCK(); - CURVNET_RESTORE(); - if (ifp != NULL) - strlcpy(bsdname, ifp->if_xname, IFNAMSIZ); - return (ifp); -} - -/* * Implement the SIOCGIFNAME ioctl */ static int linux_ioctl_ifname(struct thread *td, struct l_ifreq *uifr) { struct l_ifreq ifr; struct ifnet *ifp; int error, ethno, index; error = copyin(uifr, &ifr, sizeof(ifr)); if (error != 0) return (error); CURVNET_SET(TD_TO_VNET(curthread)); IFNET_RLOCK(); index = 1; /* ifr.ifr_ifindex starts from 1 */ ethno = 0; error = ENODEV; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifr.ifr_ifindex == index) { if (IFP_IS_ETH(ifp)) snprintf(ifr.ifr_name, LINUX_IFNAMSIZ, "eth%d", ethno); else strlcpy(ifr.ifr_name, ifp->if_xname, LINUX_IFNAMSIZ); error = 0; break; } if (IFP_IS_ETH(ifp)) ethno++; index++; } IFNET_RUNLOCK(); if (error == 0) error = copyout(&ifr, uifr, sizeof(ifr)); CURVNET_RESTORE(); return (error); } /* * Implement the SIOCGIFCONF ioctl */ static int linux_ifconf(struct thread *td, struct ifconf *uifc) { #ifdef COMPAT_LINUX32 struct l_ifconf ifc; #else struct ifconf ifc; #endif struct l_ifreq ifr; struct ifnet *ifp; struct ifaddr *ifa; struct sbuf *sb; int error, ethno, full = 0, valid_len, max_len; error = copyin(uifc, &ifc, sizeof(ifc)); if (error != 0) return (error); max_len = MAXPHYS - 1; CURVNET_SET(TD_TO_VNET(td)); /* handle the 'request buffer size' case */ if ((l_uintptr_t)ifc.ifc_buf == PTROUT(NULL)) { ifc.ifc_len = 0; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (sa->sa_family == AF_INET) ifc.ifc_len += sizeof(ifr); } } IFNET_RUNLOCK(); error = copyout(&ifc, uifc, sizeof(ifc)); CURVNET_RESTORE(); return (error); } if (ifc.ifc_len <= 0) { CURVNET_RESTORE(); return (EINVAL); } again: /* Keep track of eth interfaces */ ethno = 0; if (ifc.ifc_len <= max_len) { max_len = ifc.ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; /* Return all AF_INET addresses of all interfaces */ IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs = 0; bzero(&ifr, sizeof(ifr)); if (IFP_IS_ETH(ifp)) snprintf(ifr.ifr_name, LINUX_IFNAMSIZ, "eth%d", ethno++); else strlcpy(ifr.ifr_name, ifp->if_xname, LINUX_IFNAMSIZ); /* Walk the address list */ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (sa->sa_family == AF_INET) { ifr.ifr_addr.sa_family = LINUX_AF_INET; memcpy(ifr.ifr_addr.sa_data, sa->sa_data, sizeof(ifr.ifr_addr.sa_data)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); addrs++; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc.ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), PTRIN(ifc.ifc_buf), ifc.ifc_len); if (error == 0) error = copyout(&ifc, uifc, sizeof(ifc)); sbuf_delete(sb); CURVNET_RESTORE(); return (error); } static int linux_gifflags(struct thread *td, struct ifnet *ifp, struct l_ifreq *ifr) { l_short flags; - flags = (ifp->if_flags | ifp->if_drv_flags) & 0xffff; - /* these flags have no Linux equivalent */ - flags &= ~(IFF_DRV_OACTIVE|IFF_SIMPLEX| - IFF_LINK0|IFF_LINK1|IFF_LINK2); - /* Linux' multicast flag is in a different bit */ - if (flags & IFF_MULTICAST) { - flags &= ~IFF_MULTICAST; - flags |= 0x1000; - } + linux_ifflags(ifp, &flags); return (copyout(&flags, &ifr->ifr_flags, sizeof(flags))); } -#define ARPHRD_ETHER 1 -#define ARPHRD_LOOPBACK 772 - static int linux_gifhwaddr(struct ifnet *ifp, struct l_ifreq *ifr) { - struct ifaddr *ifa; - struct sockaddr_dl *sdl; struct l_sockaddr lsa; - if (ifp->if_type == IFT_LOOP) { - bzero(&lsa, sizeof(lsa)); - lsa.sa_family = ARPHRD_LOOPBACK; - return (copyout(&lsa, &ifr->ifr_hwaddr, sizeof(lsa))); - } - - if (ifp->if_type != IFT_ETHER) + if (linux_ifhwaddr(ifp, &lsa) != 0) return (ENOENT); - CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - sdl = (struct sockaddr_dl*)ifa->ifa_addr; - if (sdl != NULL && (sdl->sdl_family == AF_LINK) && - (sdl->sdl_type == IFT_ETHER)) { - bzero(&lsa, sizeof(lsa)); - lsa.sa_family = ARPHRD_ETHER; - bcopy(LLADDR(sdl), lsa.sa_data, LINUX_IFHWADDRLEN); - return (copyout(&lsa, &ifr->ifr_hwaddr, sizeof(lsa))); - } - } - - return (ENOENT); + return (copyout(&lsa, &ifr->ifr_hwaddr, sizeof(lsa))); } /* * If we fault in bsd_to_linux_ifreq() then we will fault when we call * the native ioctl(). Thus, we don't really need to check the return * value of this function. */ static int bsd_to_linux_ifreq(struct ifreq *arg) { struct ifreq ifr; size_t ifr_len = sizeof(struct ifreq); int error; if ((error = copyin(arg, &ifr, ifr_len))) return (error); *(u_short *)&ifr.ifr_addr = ifr.ifr_addr.sa_family; error = copyout(&ifr, arg, ifr_len); return (error); } /* * Socket related ioctls */ static int linux_ioctl_socket(struct thread *td, struct linux_ioctl_args *args) { char lifname[LINUX_IFNAMSIZ], ifname[IFNAMSIZ]; struct ifnet *ifp; struct file *fp; int error, type; ifp = NULL; error = 0; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); type = fp->f_type; fdrop(fp, td); if (type != DTYPE_SOCKET) { /* not a socket - probably a tap / vmnet device */ switch (args->cmd) { case LINUX_SIOCGIFADDR: case LINUX_SIOCSIFADDR: case LINUX_SIOCGIFFLAGS: return (linux_ioctl_special(td, args)); default: return (ENOIOCTL); } } switch (args->cmd & 0xffff) { case LINUX_FIOGETOWN: case LINUX_FIOSETOWN: case LINUX_SIOCADDMULTI: case LINUX_SIOCATMARK: case LINUX_SIOCDELMULTI: case LINUX_SIOCGIFNAME: case LINUX_SIOCGIFCONF: case LINUX_SIOCGPGRP: case LINUX_SIOCSPGRP: case LINUX_SIOCGIFCOUNT: /* these ioctls don't take an interface name */ #ifdef DEBUG printf("%s(): ioctl %d\n", __func__, args->cmd & 0xffff); #endif break; case LINUX_SIOCGIFFLAGS: case LINUX_SIOCGIFADDR: case LINUX_SIOCSIFADDR: case LINUX_SIOCGIFDSTADDR: case LINUX_SIOCGIFBRDADDR: case LINUX_SIOCGIFNETMASK: case LINUX_SIOCSIFNETMASK: case LINUX_SIOCGIFMTU: case LINUX_SIOCSIFMTU: case LINUX_SIOCSIFNAME: case LINUX_SIOCGIFHWADDR: case LINUX_SIOCSIFHWADDR: case LINUX_SIOCDEVPRIVATE: case LINUX_SIOCDEVPRIVATE+1: case LINUX_SIOCGIFINDEX: /* copy in the interface name and translate it. */ error = copyin((void *)args->arg, lifname, LINUX_IFNAMSIZ); if (error != 0) return (error); #ifdef DEBUG printf("%s(): ioctl %d on %.*s\n", __func__, args->cmd & 0xffff, LINUX_IFNAMSIZ, lifname); #endif memset(ifname, 0, sizeof(ifname)); ifp = ifname_linux_to_bsd(td, lifname, ifname); if (ifp == NULL) return (EINVAL); /* * We need to copy it back out in case we pass the * request on to our native ioctl(), which will expect * the ifreq to be in user space and have the correct * interface name. */ error = copyout(ifname, (void *)args->arg, IFNAMSIZ); if (error != 0) return (error); #ifdef DEBUG printf("%s(): %s translated to %s\n", __func__, lifname, ifname); #endif break; default: return (ENOIOCTL); } switch (args->cmd & 0xffff) { case LINUX_FIOSETOWN: args->cmd = FIOSETOWN; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCSPGRP: args->cmd = SIOCSPGRP; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_FIOGETOWN: args->cmd = FIOGETOWN; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCGPGRP: args->cmd = SIOCGPGRP; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCATMARK: args->cmd = SIOCATMARK; error = sys_ioctl(td, (struct ioctl_args *)args); break; /* LINUX_SIOCGSTAMP */ case LINUX_SIOCGIFNAME: error = linux_ioctl_ifname(td, (struct l_ifreq *)args->arg); break; case LINUX_SIOCGIFCONF: error = linux_ifconf(td, (struct ifconf *)args->arg); break; case LINUX_SIOCGIFFLAGS: args->cmd = SIOCGIFFLAGS; error = linux_gifflags(td, ifp, (struct l_ifreq *)args->arg); break; case LINUX_SIOCGIFADDR: args->cmd = SIOCGIFADDR; error = sys_ioctl(td, (struct ioctl_args *)args); bsd_to_linux_ifreq((struct ifreq *)args->arg); break; case LINUX_SIOCSIFADDR: /* XXX probably doesn't work, included for completeness */ args->cmd = SIOCSIFADDR; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCGIFDSTADDR: args->cmd = SIOCGIFDSTADDR; error = sys_ioctl(td, (struct ioctl_args *)args); bsd_to_linux_ifreq((struct ifreq *)args->arg); break; case LINUX_SIOCGIFBRDADDR: args->cmd = SIOCGIFBRDADDR; error = sys_ioctl(td, (struct ioctl_args *)args); bsd_to_linux_ifreq((struct ifreq *)args->arg); break; case LINUX_SIOCGIFNETMASK: args->cmd = SIOCGIFNETMASK; error = sys_ioctl(td, (struct ioctl_args *)args); bsd_to_linux_ifreq((struct ifreq *)args->arg); break; case LINUX_SIOCSIFNETMASK: error = ENOIOCTL; break; case LINUX_SIOCGIFMTU: args->cmd = SIOCGIFMTU; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCSIFMTU: args->cmd = SIOCSIFMTU; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCSIFNAME: error = ENOIOCTL; break; case LINUX_SIOCGIFHWADDR: error = linux_gifhwaddr(ifp, (struct l_ifreq *)args->arg); break; case LINUX_SIOCSIFHWADDR: error = ENOIOCTL; break; case LINUX_SIOCADDMULTI: args->cmd = SIOCADDMULTI; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCDELMULTI: args->cmd = SIOCDELMULTI; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCGIFINDEX: args->cmd = SIOCGIFINDEX; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCGIFCOUNT: error = 0; break; /* * XXX This is slightly bogus, but these ioctls are currently * XXX only used by the aironet (if_an) network driver. */ case LINUX_SIOCDEVPRIVATE: args->cmd = SIOCGPRIVATE_0; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCDEVPRIVATE+1: args->cmd = SIOCGPRIVATE_1; error = sys_ioctl(td, (struct ioctl_args *)args); break; } if (ifp != NULL) /* restore the original interface name */ copyout(lifname, (void *)args->arg, LINUX_IFNAMSIZ); #ifdef DEBUG printf("%s(): returning %d\n", __func__, error); #endif return (error); } /* * Device private ioctl handler */ static int linux_ioctl_private(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error, type; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); type = fp->f_type; fdrop(fp, td); if (type == DTYPE_SOCKET) return (linux_ioctl_socket(td, args)); return (ENOIOCTL); } /* * DRM ioctl handler (sys/dev/drm) */ static int linux_ioctl_drm(struct thread *td, struct linux_ioctl_args *args) { args->cmd = SETDIR(args->cmd); return (sys_ioctl(td, (struct ioctl_args *)args)); } #ifdef COMPAT_LINUX32 #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) #define PTROUT_CP(src,dst,fld) \ do { (dst).fld = PTROUT((src).fld); } while (0) static int linux_ioctl_sg_io(struct thread *td, struct linux_ioctl_args *args) { struct sg_io_hdr io; struct sg_io_hdr32 io32; struct file *fp; int error; error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) { printf("sg_linux_ioctl: fget returned %d\n", error); return (error); } if ((error = copyin((void *)args->arg, &io32, sizeof(io32))) != 0) goto out; CP(io32, io, interface_id); CP(io32, io, dxfer_direction); CP(io32, io, cmd_len); CP(io32, io, mx_sb_len); CP(io32, io, iovec_count); CP(io32, io, dxfer_len); PTRIN_CP(io32, io, dxferp); PTRIN_CP(io32, io, cmdp); PTRIN_CP(io32, io, sbp); CP(io32, io, timeout); CP(io32, io, flags); CP(io32, io, pack_id); PTRIN_CP(io32, io, usr_ptr); CP(io32, io, status); CP(io32, io, masked_status); CP(io32, io, msg_status); CP(io32, io, sb_len_wr); CP(io32, io, host_status); CP(io32, io, driver_status); CP(io32, io, resid); CP(io32, io, duration); CP(io32, io, info); if ((error = fo_ioctl(fp, SG_IO, (caddr_t)&io, td->td_ucred, td)) != 0) goto out; CP(io, io32, interface_id); CP(io, io32, dxfer_direction); CP(io, io32, cmd_len); CP(io, io32, mx_sb_len); CP(io, io32, iovec_count); CP(io, io32, dxfer_len); PTROUT_CP(io, io32, dxferp); PTROUT_CP(io, io32, cmdp); PTROUT_CP(io, io32, sbp); CP(io, io32, timeout); CP(io, io32, flags); CP(io, io32, pack_id); PTROUT_CP(io, io32, usr_ptr); CP(io, io32, status); CP(io, io32, masked_status); CP(io, io32, msg_status); CP(io, io32, sb_len_wr); CP(io, io32, host_status); CP(io, io32, driver_status); CP(io, io32, resid); CP(io, io32, duration); CP(io, io32, info); error = copyout(&io32, (void *)args->arg, sizeof(io32)); out: fdrop(fp, td); return (error); } #endif static int linux_ioctl_sg(struct thread *td, struct linux_ioctl_args *args) { switch (args->cmd) { case LINUX_SG_GET_VERSION_NUM: args->cmd = SG_GET_VERSION_NUM; break; case LINUX_SG_SET_TIMEOUT: args->cmd = SG_SET_TIMEOUT; break; case LINUX_SG_GET_TIMEOUT: args->cmd = SG_GET_TIMEOUT; break; case LINUX_SG_IO: args->cmd = SG_IO; #ifdef COMPAT_LINUX32 return (linux_ioctl_sg_io(td, args)); #endif break; case LINUX_SG_GET_RESERVED_SIZE: args->cmd = SG_GET_RESERVED_SIZE; break; case LINUX_SG_GET_SCSI_ID: args->cmd = SG_GET_SCSI_ID; break; case LINUX_SG_GET_SG_TABLESIZE: args->cmd = SG_GET_SG_TABLESIZE; break; default: return (ENODEV); } return (sys_ioctl(td, (struct ioctl_args *)args)); } /* * Video4Linux (V4L) ioctl handler */ static int linux_to_bsd_v4l_tuner(struct l_video_tuner *lvt, struct video_tuner *vt) { vt->tuner = lvt->tuner; strlcpy(vt->name, lvt->name, LINUX_VIDEO_TUNER_NAME_SIZE); vt->rangelow = lvt->rangelow; /* possible long size conversion */ vt->rangehigh = lvt->rangehigh; /* possible long size conversion */ vt->flags = lvt->flags; vt->mode = lvt->mode; vt->signal = lvt->signal; return (0); } static int bsd_to_linux_v4l_tuner(struct video_tuner *vt, struct l_video_tuner *lvt) { lvt->tuner = vt->tuner; strlcpy(lvt->name, vt->name, LINUX_VIDEO_TUNER_NAME_SIZE); lvt->rangelow = vt->rangelow; /* possible long size conversion */ lvt->rangehigh = vt->rangehigh; /* possible long size conversion */ lvt->flags = vt->flags; lvt->mode = vt->mode; lvt->signal = vt->signal; return (0); } #ifdef COMPAT_LINUX_V4L_CLIPLIST static int linux_to_bsd_v4l_clip(struct l_video_clip *lvc, struct video_clip *vc) { vc->x = lvc->x; vc->y = lvc->y; vc->width = lvc->width; vc->height = lvc->height; vc->next = PTRIN(lvc->next); /* possible pointer size conversion */ return (0); } #endif static int linux_to_bsd_v4l_window(struct l_video_window *lvw, struct video_window *vw) { vw->x = lvw->x; vw->y = lvw->y; vw->width = lvw->width; vw->height = lvw->height; vw->chromakey = lvw->chromakey; vw->flags = lvw->flags; vw->clips = PTRIN(lvw->clips); /* possible pointer size conversion */ vw->clipcount = lvw->clipcount; return (0); } static int bsd_to_linux_v4l_window(struct video_window *vw, struct l_video_window *lvw) { memset(lvw, 0, sizeof(*lvw)); lvw->x = vw->x; lvw->y = vw->y; lvw->width = vw->width; lvw->height = vw->height; lvw->chromakey = vw->chromakey; lvw->flags = vw->flags; lvw->clips = PTROUT(vw->clips); /* possible pointer size conversion */ lvw->clipcount = vw->clipcount; return (0); } static int linux_to_bsd_v4l_buffer(struct l_video_buffer *lvb, struct video_buffer *vb) { vb->base = PTRIN(lvb->base); /* possible pointer size conversion */ vb->height = lvb->height; vb->width = lvb->width; vb->depth = lvb->depth; vb->bytesperline = lvb->bytesperline; return (0); } static int bsd_to_linux_v4l_buffer(struct video_buffer *vb, struct l_video_buffer *lvb) { lvb->base = PTROUT(vb->base); /* possible pointer size conversion */ lvb->height = vb->height; lvb->width = vb->width; lvb->depth = vb->depth; lvb->bytesperline = vb->bytesperline; return (0); } static int linux_to_bsd_v4l_code(struct l_video_code *lvc, struct video_code *vc) { strlcpy(vc->loadwhat, lvc->loadwhat, LINUX_VIDEO_CODE_LOADWHAT_SIZE); vc->datasize = lvc->datasize; vc->data = PTRIN(lvc->data); /* possible pointer size conversion */ return (0); } #ifdef COMPAT_LINUX_V4L_CLIPLIST static int linux_v4l_clip_copy(void *lvc, struct video_clip **ppvc) { int error; struct video_clip vclip; struct l_video_clip l_vclip; error = copyin(lvc, &l_vclip, sizeof(l_vclip)); if (error) return (error); linux_to_bsd_v4l_clip(&l_vclip, &vclip); /* XXX: If there can be no concurrency: s/M_NOWAIT/M_WAITOK/ */ if ((*ppvc = malloc(sizeof(**ppvc), M_LINUX, M_NOWAIT)) == NULL) return (ENOMEM); /* XXX: Linux has no ENOMEM here. */ memcpy(*ppvc, &vclip, sizeof(vclip)); (*ppvc)->next = NULL; return (0); } static int linux_v4l_cliplist_free(struct video_window *vw) { struct video_clip **ppvc; struct video_clip **ppvc_next; for (ppvc = &(vw->clips); *ppvc != NULL; ppvc = ppvc_next) { ppvc_next = &((*ppvc)->next); free(*ppvc, M_LINUX); } vw->clips = NULL; return (0); } static int linux_v4l_cliplist_copy(struct l_video_window *lvw, struct video_window *vw) { int error; int clipcount; void *plvc; struct video_clip **ppvc; /* * XXX: The cliplist is used to pass in a list of clipping * rectangles or, if clipcount == VIDEO_CLIP_BITMAP, a * clipping bitmap. Some Linux apps, however, appear to * leave cliplist and clips uninitialized. In any case, * the cliplist is not used by pwc(4), at the time of * writing, FreeBSD's only V4L driver. When a driver * that uses the cliplist is developed, this code may * need re-examiniation. */ error = 0; clipcount = vw->clipcount; if (clipcount == VIDEO_CLIP_BITMAP) { /* * In this case, the pointer (clips) is overloaded * to be a "void *" to a bitmap, therefore there * is no struct video_clip to copy now. */ } else if (clipcount > 0 && clipcount <= 16384) { /* * Clips points to list of clip rectangles, so * copy the list. * * XXX: Upper limit of 16384 was used here to try to * avoid cases when clipcount and clips pointer * are uninitialized and therefore have high random * values, as is the case in the Linux Skype * application. The value 16384 was chosen as that * is what is used in the Linux stradis(4) MPEG * decoder driver, the only place we found an * example of cliplist use. */ plvc = PTRIN(lvw->clips); vw->clips = NULL; ppvc = &(vw->clips); while (clipcount-- > 0) { if (plvc == NULL) { error = EFAULT; break; } else { error = linux_v4l_clip_copy(plvc, ppvc); if (error) { linux_v4l_cliplist_free(vw); break; } } ppvc = &((*ppvc)->next); plvc = PTRIN(((struct l_video_clip *) plvc)->next); } } else { /* * clipcount == 0 or negative (but not VIDEO_CLIP_BITMAP) * Force cliplist to null. */ vw->clipcount = 0; vw->clips = NULL; } return (error); } #endif static int linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; struct video_tuner vtun; struct video_window vwin; struct video_buffer vbuf; struct video_code vcode; struct l_video_tuner l_vtun; struct l_video_window l_vwin; struct l_video_buffer l_vbuf; struct l_video_code l_vcode; switch (args->cmd & 0xffff) { case LINUX_VIDIOCGCAP: args->cmd = VIDIOCGCAP; break; case LINUX_VIDIOCGCHAN: args->cmd = VIDIOCGCHAN; break; case LINUX_VIDIOCSCHAN: args->cmd = VIDIOCSCHAN; break; case LINUX_VIDIOCGTUNER: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = copyin((void *) args->arg, &l_vtun, sizeof(l_vtun)); if (error) { fdrop(fp, td); return (error); } linux_to_bsd_v4l_tuner(&l_vtun, &vtun); error = fo_ioctl(fp, VIDIOCGTUNER, &vtun, td->td_ucred, td); if (!error) { bsd_to_linux_v4l_tuner(&vtun, &l_vtun); error = copyout(&l_vtun, (void *) args->arg, sizeof(l_vtun)); } fdrop(fp, td); return (error); case LINUX_VIDIOCSTUNER: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = copyin((void *) args->arg, &l_vtun, sizeof(l_vtun)); if (error) { fdrop(fp, td); return (error); } linux_to_bsd_v4l_tuner(&l_vtun, &vtun); error = fo_ioctl(fp, VIDIOCSTUNER, &vtun, td->td_ucred, td); fdrop(fp, td); return (error); case LINUX_VIDIOCGPICT: args->cmd = VIDIOCGPICT; break; case LINUX_VIDIOCSPICT: args->cmd = VIDIOCSPICT; break; case LINUX_VIDIOCCAPTURE: args->cmd = VIDIOCCAPTURE; break; case LINUX_VIDIOCGWIN: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = fo_ioctl(fp, VIDIOCGWIN, &vwin, td->td_ucred, td); if (!error) { bsd_to_linux_v4l_window(&vwin, &l_vwin); error = copyout(&l_vwin, (void *) args->arg, sizeof(l_vwin)); } fdrop(fp, td); return (error); case LINUX_VIDIOCSWIN: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = copyin((void *) args->arg, &l_vwin, sizeof(l_vwin)); if (error) { fdrop(fp, td); return (error); } linux_to_bsd_v4l_window(&l_vwin, &vwin); #ifdef COMPAT_LINUX_V4L_CLIPLIST error = linux_v4l_cliplist_copy(&l_vwin, &vwin); if (error) { fdrop(fp, td); return (error); } #endif error = fo_ioctl(fp, VIDIOCSWIN, &vwin, td->td_ucred, td); fdrop(fp, td); #ifdef COMPAT_LINUX_V4L_CLIPLIST linux_v4l_cliplist_free(&vwin); #endif return (error); case LINUX_VIDIOCGFBUF: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = fo_ioctl(fp, VIDIOCGFBUF, &vbuf, td->td_ucred, td); if (!error) { bsd_to_linux_v4l_buffer(&vbuf, &l_vbuf); error = copyout(&l_vbuf, (void *) args->arg, sizeof(l_vbuf)); } fdrop(fp, td); return (error); case LINUX_VIDIOCSFBUF: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = copyin((void *) args->arg, &l_vbuf, sizeof(l_vbuf)); if (error) { fdrop(fp, td); return (error); } linux_to_bsd_v4l_buffer(&l_vbuf, &vbuf); error = fo_ioctl(fp, VIDIOCSFBUF, &vbuf, td->td_ucred, td); fdrop(fp, td); return (error); case LINUX_VIDIOCKEY: args->cmd = VIDIOCKEY; break; case LINUX_VIDIOCGFREQ: args->cmd = VIDIOCGFREQ; break; case LINUX_VIDIOCSFREQ: args->cmd = VIDIOCSFREQ; break; case LINUX_VIDIOCGAUDIO: args->cmd = VIDIOCGAUDIO; break; case LINUX_VIDIOCSAUDIO: args->cmd = VIDIOCSAUDIO; break; case LINUX_VIDIOCSYNC: args->cmd = VIDIOCSYNC; break; case LINUX_VIDIOCMCAPTURE: args->cmd = VIDIOCMCAPTURE; break; case LINUX_VIDIOCGMBUF: args->cmd = VIDIOCGMBUF; break; case LINUX_VIDIOCGUNIT: args->cmd = VIDIOCGUNIT; break; case LINUX_VIDIOCGCAPTURE: args->cmd = VIDIOCGCAPTURE; break; case LINUX_VIDIOCSCAPTURE: args->cmd = VIDIOCSCAPTURE; break; case LINUX_VIDIOCSPLAYMODE: args->cmd = VIDIOCSPLAYMODE; break; case LINUX_VIDIOCSWRITEMODE: args->cmd = VIDIOCSWRITEMODE; break; case LINUX_VIDIOCGPLAYINFO: args->cmd = VIDIOCGPLAYINFO; break; case LINUX_VIDIOCSMICROCODE: error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = copyin((void *) args->arg, &l_vcode, sizeof(l_vcode)); if (error) { fdrop(fp, td); return (error); } linux_to_bsd_v4l_code(&l_vcode, &vcode); error = fo_ioctl(fp, VIDIOCSMICROCODE, &vcode, td->td_ucred, td); fdrop(fp, td); return (error); case LINUX_VIDIOCGVBIFMT: args->cmd = VIDIOCGVBIFMT; break; case LINUX_VIDIOCSVBIFMT: args->cmd = VIDIOCSVBIFMT; break; default: return (ENOIOCTL); } error = sys_ioctl(td, (struct ioctl_args *)args); return (error); } /* * Special ioctl handler */ static int linux_ioctl_special(struct thread *td, struct linux_ioctl_args *args) { int error; switch (args->cmd) { case LINUX_SIOCGIFADDR: args->cmd = SIOCGIFADDR; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCSIFADDR: args->cmd = SIOCSIFADDR; error = sys_ioctl(td, (struct ioctl_args *)args); break; case LINUX_SIOCGIFFLAGS: args->cmd = SIOCGIFFLAGS; error = sys_ioctl(td, (struct ioctl_args *)args); break; default: error = ENOIOCTL; } return (error); } static int linux_to_bsd_v4l2_standard(struct l_v4l2_standard *lvstd, struct v4l2_standard *vstd) { vstd->index = lvstd->index; vstd->id = lvstd->id; CTASSERT(sizeof(vstd->name) == sizeof(lvstd->name)); memcpy(vstd->name, lvstd->name, sizeof(vstd->name)); vstd->frameperiod = lvstd->frameperiod; vstd->framelines = lvstd->framelines; CTASSERT(sizeof(vstd->reserved) == sizeof(lvstd->reserved)); memcpy(vstd->reserved, lvstd->reserved, sizeof(vstd->reserved)); return (0); } static int bsd_to_linux_v4l2_standard(struct v4l2_standard *vstd, struct l_v4l2_standard *lvstd) { lvstd->index = vstd->index; lvstd->id = vstd->id; CTASSERT(sizeof(vstd->name) == sizeof(lvstd->name)); memcpy(lvstd->name, vstd->name, sizeof(lvstd->name)); lvstd->frameperiod = vstd->frameperiod; lvstd->framelines = vstd->framelines; CTASSERT(sizeof(vstd->reserved) == sizeof(lvstd->reserved)); memcpy(lvstd->reserved, vstd->reserved, sizeof(lvstd->reserved)); return (0); } static int linux_to_bsd_v4l2_buffer(struct l_v4l2_buffer *lvb, struct v4l2_buffer *vb) { vb->index = lvb->index; vb->type = lvb->type; vb->bytesused = lvb->bytesused; vb->flags = lvb->flags; vb->field = lvb->field; vb->timestamp.tv_sec = lvb->timestamp.tv_sec; vb->timestamp.tv_usec = lvb->timestamp.tv_usec; memcpy(&vb->timecode, &lvb->timecode, sizeof (lvb->timecode)); vb->sequence = lvb->sequence; vb->memory = lvb->memory; if (lvb->memory == V4L2_MEMORY_USERPTR) /* possible pointer size conversion */ vb->m.userptr = (unsigned long)PTRIN(lvb->m.userptr); else vb->m.offset = lvb->m.offset; vb->length = lvb->length; vb->input = lvb->input; vb->reserved = lvb->reserved; return (0); } static int bsd_to_linux_v4l2_buffer(struct v4l2_buffer *vb, struct l_v4l2_buffer *lvb) { lvb->index = vb->index; lvb->type = vb->type; lvb->bytesused = vb->bytesused; lvb->flags = vb->flags; lvb->field = vb->field; lvb->timestamp.tv_sec = vb->timestamp.tv_sec; lvb->timestamp.tv_usec = vb->timestamp.tv_usec; memcpy(&lvb->timecode, &vb->timecode, sizeof (vb->timecode)); lvb->sequence = vb->sequence; lvb->memory = vb->memory; if (vb->memory == V4L2_MEMORY_USERPTR) /* possible pointer size conversion */ lvb->m.userptr = PTROUT(vb->m.userptr); else lvb->m.offset = vb->m.offset; lvb->length = vb->length; lvb->input = vb->input; lvb->reserved = vb->reserved; return (0); } static int linux_to_bsd_v4l2_format(struct l_v4l2_format *lvf, struct v4l2_format *vf) { vf->type = lvf->type; if (lvf->type == V4L2_BUF_TYPE_VIDEO_OVERLAY #ifdef V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY || lvf->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY #endif ) /* * XXX TODO - needs 32 -> 64 bit conversion: * (unused by webcams?) */ return (EINVAL); memcpy(&vf->fmt, &lvf->fmt, sizeof(vf->fmt)); return (0); } static int bsd_to_linux_v4l2_format(struct v4l2_format *vf, struct l_v4l2_format *lvf) { lvf->type = vf->type; if (vf->type == V4L2_BUF_TYPE_VIDEO_OVERLAY #ifdef V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY || vf->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY #endif ) /* * XXX TODO - needs 32 -> 64 bit conversion: * (unused by webcams?) */ return (EINVAL); memcpy(&lvf->fmt, &vf->fmt, sizeof(vf->fmt)); return (0); } static int linux_ioctl_v4l2(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; int error; struct v4l2_format vformat; struct l_v4l2_format l_vformat; struct v4l2_standard vstd; struct l_v4l2_standard l_vstd; struct l_v4l2_buffer l_vbuf; struct v4l2_buffer vbuf; struct v4l2_input vinp; switch (args->cmd & 0xffff) { case LINUX_VIDIOC_RESERVED: case LINUX_VIDIOC_LOG_STATUS: if ((args->cmd & IOC_DIRMASK) != LINUX_IOC_VOID) return (ENOIOCTL); args->cmd = (args->cmd & 0xffff) | IOC_VOID; break; case LINUX_VIDIOC_OVERLAY: case LINUX_VIDIOC_STREAMON: case LINUX_VIDIOC_STREAMOFF: case LINUX_VIDIOC_S_STD: case LINUX_VIDIOC_S_TUNER: case LINUX_VIDIOC_S_AUDIO: case LINUX_VIDIOC_S_AUDOUT: case LINUX_VIDIOC_S_MODULATOR: case LINUX_VIDIOC_S_FREQUENCY: case LINUX_VIDIOC_S_CROP: case LINUX_VIDIOC_S_JPEGCOMP: case LINUX_VIDIOC_S_PRIORITY: case LINUX_VIDIOC_DBG_S_REGISTER: case LINUX_VIDIOC_S_HW_FREQ_SEEK: case LINUX_VIDIOC_SUBSCRIBE_EVENT: case LINUX_VIDIOC_UNSUBSCRIBE_EVENT: args->cmd = (args->cmd & ~IOC_DIRMASK) | IOC_IN; break; case LINUX_VIDIOC_QUERYCAP: case LINUX_VIDIOC_G_STD: case LINUX_VIDIOC_G_AUDIO: case LINUX_VIDIOC_G_INPUT: case LINUX_VIDIOC_G_OUTPUT: case LINUX_VIDIOC_G_AUDOUT: case LINUX_VIDIOC_G_JPEGCOMP: case LINUX_VIDIOC_QUERYSTD: case LINUX_VIDIOC_G_PRIORITY: case LINUX_VIDIOC_QUERY_DV_PRESET: args->cmd = (args->cmd & ~IOC_DIRMASK) | IOC_OUT; break; case LINUX_VIDIOC_ENUM_FMT: case LINUX_VIDIOC_REQBUFS: case LINUX_VIDIOC_G_PARM: case LINUX_VIDIOC_S_PARM: case LINUX_VIDIOC_G_CTRL: case LINUX_VIDIOC_S_CTRL: case LINUX_VIDIOC_G_TUNER: case LINUX_VIDIOC_QUERYCTRL: case LINUX_VIDIOC_QUERYMENU: case LINUX_VIDIOC_S_INPUT: case LINUX_VIDIOC_S_OUTPUT: case LINUX_VIDIOC_ENUMOUTPUT: case LINUX_VIDIOC_G_MODULATOR: case LINUX_VIDIOC_G_FREQUENCY: case LINUX_VIDIOC_CROPCAP: case LINUX_VIDIOC_G_CROP: case LINUX_VIDIOC_ENUMAUDIO: case LINUX_VIDIOC_ENUMAUDOUT: case LINUX_VIDIOC_G_SLICED_VBI_CAP: #ifdef VIDIOC_ENUM_FRAMESIZES case LINUX_VIDIOC_ENUM_FRAMESIZES: case LINUX_VIDIOC_ENUM_FRAMEINTERVALS: case LINUX_VIDIOC_ENCODER_CMD: case LINUX_VIDIOC_TRY_ENCODER_CMD: #endif case LINUX_VIDIOC_DBG_G_REGISTER: case LINUX_VIDIOC_DBG_G_CHIP_IDENT: case LINUX_VIDIOC_ENUM_DV_PRESETS: case LINUX_VIDIOC_S_DV_PRESET: case LINUX_VIDIOC_G_DV_PRESET: case LINUX_VIDIOC_S_DV_TIMINGS: case LINUX_VIDIOC_G_DV_TIMINGS: args->cmd = (args->cmd & ~IOC_DIRMASK) | IOC_INOUT; break; case LINUX_VIDIOC_G_FMT: case LINUX_VIDIOC_S_FMT: case LINUX_VIDIOC_TRY_FMT: error = copyin((void *)args->arg, &l_vformat, sizeof(l_vformat)); if (error) return (error); error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error) return (error); if (linux_to_bsd_v4l2_format(&l_vformat, &vformat) != 0) error = EINVAL; else if ((args->cmd & 0xffff) == LINUX_VIDIOC_G_FMT) error = fo_ioctl(fp, VIDIOC_G_FMT, &vformat, td->td_ucred, td); else if ((args->cmd & 0xffff) == LINUX_VIDIOC_S_FMT) error = fo_ioctl(fp, VIDIOC_S_FMT, &vformat, td->td_ucred, td); else error = fo_ioctl(fp, VIDIOC_TRY_FMT, &vformat, td->td_ucred, td); bsd_to_linux_v4l2_format(&vformat, &l_vformat); copyout(&l_vformat, (void *)args->arg, sizeof(l_vformat)); fdrop(fp, td); return (error); case LINUX_VIDIOC_ENUMSTD: error = copyin((void *)args->arg, &l_vstd, sizeof(l_vstd)); if (error) return (error); linux_to_bsd_v4l2_standard(&l_vstd, &vstd); error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error) return (error); error = fo_ioctl(fp, VIDIOC_ENUMSTD, (caddr_t)&vstd, td->td_ucred, td); if (error) { fdrop(fp, td); return (error); } bsd_to_linux_v4l2_standard(&vstd, &l_vstd); error = copyout(&l_vstd, (void *)args->arg, sizeof(l_vstd)); fdrop(fp, td); return (error); case LINUX_VIDIOC_ENUMINPUT: /* * The Linux struct l_v4l2_input differs only in size, * it has no padding at the end. */ error = copyin((void *)args->arg, &vinp, sizeof(struct l_v4l2_input)); if (error != 0) return (error); error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = fo_ioctl(fp, VIDIOC_ENUMINPUT, (caddr_t)&vinp, td->td_ucred, td); if (error) { fdrop(fp, td); return (error); } error = copyout(&vinp, (void *)args->arg, sizeof(struct l_v4l2_input)); fdrop(fp, td); return (error); case LINUX_VIDIOC_QUERYBUF: case LINUX_VIDIOC_QBUF: case LINUX_VIDIOC_DQBUF: error = copyin((void *)args->arg, &l_vbuf, sizeof(l_vbuf)); if (error) return (error); error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error) return (error); linux_to_bsd_v4l2_buffer(&l_vbuf, &vbuf); if ((args->cmd & 0xffff) == LINUX_VIDIOC_QUERYBUF) error = fo_ioctl(fp, VIDIOC_QUERYBUF, &vbuf, td->td_ucred, td); else if ((args->cmd & 0xffff) == LINUX_VIDIOC_QBUF) error = fo_ioctl(fp, VIDIOC_QBUF, &vbuf, td->td_ucred, td); else error = fo_ioctl(fp, VIDIOC_DQBUF, &vbuf, td->td_ucred, td); bsd_to_linux_v4l2_buffer(&vbuf, &l_vbuf); copyout(&l_vbuf, (void *)args->arg, sizeof(l_vbuf)); fdrop(fp, td); return (error); /* * XXX TODO - these need 32 -> 64 bit conversion: * (are any of them needed for webcams?) */ case LINUX_VIDIOC_G_FBUF: case LINUX_VIDIOC_S_FBUF: case LINUX_VIDIOC_G_EXT_CTRLS: case LINUX_VIDIOC_S_EXT_CTRLS: case LINUX_VIDIOC_TRY_EXT_CTRLS: case LINUX_VIDIOC_DQEVENT: default: return (ENOIOCTL); } error = sys_ioctl(td, (struct ioctl_args *)args); return (error); } /* * Support for emulators/linux-libusb. This port uses FBSD_LUSB* macros * instead of USB* ones. This lets us to provide correct values for cmd. * 0xffffffe0 -- 0xffffffff range seemed to be the least collision-prone. */ static int linux_ioctl_fbsd_usb(struct thread *td, struct linux_ioctl_args *args) { int error; error = 0; switch (args->cmd) { case FBSD_LUSB_DEVICEENUMERATE: args->cmd = USB_DEVICEENUMERATE; break; case FBSD_LUSB_DEV_QUIRK_ADD: args->cmd = USB_DEV_QUIRK_ADD; break; case FBSD_LUSB_DEV_QUIRK_GET: args->cmd = USB_DEV_QUIRK_GET; break; case FBSD_LUSB_DEV_QUIRK_REMOVE: args->cmd = USB_DEV_QUIRK_REMOVE; break; case FBSD_LUSB_DO_REQUEST: args->cmd = USB_DO_REQUEST; break; case FBSD_LUSB_FS_CLEAR_STALL_SYNC: args->cmd = USB_FS_CLEAR_STALL_SYNC; break; case FBSD_LUSB_FS_CLOSE: args->cmd = USB_FS_CLOSE; break; case FBSD_LUSB_FS_COMPLETE: args->cmd = USB_FS_COMPLETE; break; case FBSD_LUSB_FS_INIT: args->cmd = USB_FS_INIT; break; case FBSD_LUSB_FS_OPEN: args->cmd = USB_FS_OPEN; break; case FBSD_LUSB_FS_START: args->cmd = USB_FS_START; break; case FBSD_LUSB_FS_STOP: args->cmd = USB_FS_STOP; break; case FBSD_LUSB_FS_UNINIT: args->cmd = USB_FS_UNINIT; break; case FBSD_LUSB_GET_CONFIG: args->cmd = USB_GET_CONFIG; break; case FBSD_LUSB_GET_DEVICEINFO: args->cmd = USB_GET_DEVICEINFO; break; case FBSD_LUSB_GET_DEVICE_DESC: args->cmd = USB_GET_DEVICE_DESC; break; case FBSD_LUSB_GET_FULL_DESC: args->cmd = USB_GET_FULL_DESC; break; case FBSD_LUSB_GET_IFACE_DRIVER: args->cmd = USB_GET_IFACE_DRIVER; break; case FBSD_LUSB_GET_PLUGTIME: args->cmd = USB_GET_PLUGTIME; break; case FBSD_LUSB_GET_POWER_MODE: args->cmd = USB_GET_POWER_MODE; break; case FBSD_LUSB_GET_REPORT_DESC: args->cmd = USB_GET_REPORT_DESC; break; case FBSD_LUSB_GET_REPORT_ID: args->cmd = USB_GET_REPORT_ID; break; case FBSD_LUSB_GET_TEMPLATE: args->cmd = USB_GET_TEMPLATE; break; case FBSD_LUSB_IFACE_DRIVER_ACTIVE: args->cmd = USB_IFACE_DRIVER_ACTIVE; break; case FBSD_LUSB_IFACE_DRIVER_DETACH: args->cmd = USB_IFACE_DRIVER_DETACH; break; case FBSD_LUSB_QUIRK_NAME_GET: args->cmd = USB_QUIRK_NAME_GET; break; case FBSD_LUSB_READ_DIR: args->cmd = USB_READ_DIR; break; case FBSD_LUSB_SET_ALTINTERFACE: args->cmd = USB_SET_ALTINTERFACE; break; case FBSD_LUSB_SET_CONFIG: args->cmd = USB_SET_CONFIG; break; case FBSD_LUSB_SET_IMMED: args->cmd = USB_SET_IMMED; break; case FBSD_LUSB_SET_POWER_MODE: args->cmd = USB_SET_POWER_MODE; break; case FBSD_LUSB_SET_TEMPLATE: args->cmd = USB_SET_TEMPLATE; break; case FBSD_LUSB_FS_OPEN_STREAM: args->cmd = USB_FS_OPEN_STREAM; break; case FBSD_LUSB_GET_DEV_PORT_PATH: args->cmd = USB_GET_DEV_PORT_PATH; break; case FBSD_LUSB_GET_POWER_USAGE: args->cmd = USB_GET_POWER_USAGE; break; default: error = ENOIOCTL; } if (error != ENOIOCTL) error = sys_ioctl(td, (struct ioctl_args *)args); return (error); } /* * Some evdev ioctls must be translated. * - EVIOCGMTSLOTS is a IOC_READ ioctl on Linux although it has input data * (must be IOC_INOUT on FreeBSD). * - On Linux, EVIOCGRAB, EVIOCREVOKE and EVIOCRMFF are defined as _IOW with * an int argument. You don't pass an int pointer to the ioctl(), however, * but just the int directly. On FreeBSD, they are defined as _IOWINT for * this to work. */ static int linux_ioctl_evdev(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; clockid_t clock; int error; args->cmd = SETDIR(args->cmd); switch (args->cmd) { case (EVIOCGRAB & ~IOC_DIRMASK) | IOC_IN: args->cmd = EVIOCGRAB; break; case (EVIOCREVOKE & ~IOC_DIRMASK) | IOC_IN: args->cmd = EVIOCREVOKE; break; case (EVIOCRMFF & ~IOC_DIRMASK) | IOC_IN: args->cmd = EVIOCRMFF; break; case EVIOCSCLOCKID: { error = copyin(PTRIN(args->arg), &clock, sizeof(clock)); if (error != 0) return (error); if (clock & ~(LINUX_IOCTL_EVDEV_CLK)) return (EINVAL); error = linux_to_native_clockid(&clock, clock); if (error != 0) return (error); error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); error = fo_ioctl(fp, EVIOCSCLOCKID, &clock, td->td_ucred, td); fdrop(fp, td); return (error); } default: break; } if (IOCBASECMD(args->cmd) == ((EVIOCGMTSLOTS(0) & ~IOC_DIRMASK) | IOC_OUT)) args->cmd = (args->cmd & ~IOC_DIRMASK) | IOC_INOUT; return (sys_ioctl(td, (struct ioctl_args *)args)); } /* * main ioctl syscall function */ int linux_ioctl(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; struct linux_ioctl_handler_element *he; int error, cmd; #ifdef DEBUG if (ldebug(ioctl)) printf(ARGS(ioctl, "%d, %04lx, *"), args->fd, (unsigned long)args->cmd); #endif error = fget(td, args->fd, &cap_ioctl_rights, &fp); if (error != 0) return (error); if ((fp->f_flag & (FREAD|FWRITE)) == 0) { fdrop(fp, td); return (EBADF); } /* Iterate over the ioctl handlers */ cmd = args->cmd & 0xffff; sx_slock(&linux_ioctl_sx); mtx_lock(&Giant); #ifdef COMPAT_LINUX32 TAILQ_FOREACH(he, &linux32_ioctl_handlers, list) { if (cmd >= he->low && cmd <= he->high) { error = (*he->func)(td, args); if (error != ENOIOCTL) { mtx_unlock(&Giant); sx_sunlock(&linux_ioctl_sx); fdrop(fp, td); return (error); } } } #endif TAILQ_FOREACH(he, &linux_ioctl_handlers, list) { if (cmd >= he->low && cmd <= he->high) { error = (*he->func)(td, args); if (error != ENOIOCTL) { mtx_unlock(&Giant); sx_sunlock(&linux_ioctl_sx); fdrop(fp, td); return (error); } } } mtx_unlock(&Giant); sx_sunlock(&linux_ioctl_sx); fdrop(fp, td); switch (args->cmd & 0xffff) { case LINUX_BTRFS_IOC_CLONE: return (ENOTSUP); default: linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented", args->fd, (int)(args->cmd & 0xffff), (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff)); break; } return (EINVAL); } int linux_ioctl_register_handler(struct linux_ioctl_handler *h) { struct linux_ioctl_handler_element *he, *cur; if (h == NULL || h->func == NULL) return (EINVAL); /* * Reuse the element if the handler is already on the list, otherwise * create a new element. */ sx_xlock(&linux_ioctl_sx); TAILQ_FOREACH(he, &linux_ioctl_handlers, list) { if (he->func == h->func) break; } if (he == NULL) { he = malloc(sizeof(*he), M_LINUX, M_WAITOK); he->func = h->func; } else TAILQ_REMOVE(&linux_ioctl_handlers, he, list); /* Initialize range information. */ he->low = h->low; he->high = h->high; he->span = h->high - h->low + 1; /* Add the element to the list, sorted on span. */ TAILQ_FOREACH(cur, &linux_ioctl_handlers, list) { if (cur->span > he->span) { TAILQ_INSERT_BEFORE(cur, he, list); sx_xunlock(&linux_ioctl_sx); return (0); } } TAILQ_INSERT_TAIL(&linux_ioctl_handlers, he, list); sx_xunlock(&linux_ioctl_sx); return (0); } int linux_ioctl_unregister_handler(struct linux_ioctl_handler *h) { struct linux_ioctl_handler_element *he; if (h == NULL || h->func == NULL) return (EINVAL); sx_xlock(&linux_ioctl_sx); TAILQ_FOREACH(he, &linux_ioctl_handlers, list) { if (he->func == h->func) { TAILQ_REMOVE(&linux_ioctl_handlers, he, list); sx_xunlock(&linux_ioctl_sx); free(he, M_LINUX); return (0); } } sx_xunlock(&linux_ioctl_sx); return (EINVAL); } #ifdef COMPAT_LINUX32 int linux32_ioctl_register_handler(struct linux_ioctl_handler *h) { struct linux_ioctl_handler_element *he, *cur; if (h == NULL || h->func == NULL) return (EINVAL); /* * Reuse the element if the handler is already on the list, otherwise * create a new element. */ sx_xlock(&linux_ioctl_sx); TAILQ_FOREACH(he, &linux32_ioctl_handlers, list) { if (he->func == h->func) break; } if (he == NULL) { he = malloc(sizeof(*he), M_LINUX, M_WAITOK); he->func = h->func; } else TAILQ_REMOVE(&linux32_ioctl_handlers, he, list); /* Initialize range information. */ he->low = h->low; he->high = h->high; he->span = h->high - h->low + 1; /* Add the element to the list, sorted on span. */ TAILQ_FOREACH(cur, &linux32_ioctl_handlers, list) { if (cur->span > he->span) { TAILQ_INSERT_BEFORE(cur, he, list); sx_xunlock(&linux_ioctl_sx); return (0); } } TAILQ_INSERT_TAIL(&linux32_ioctl_handlers, he, list); sx_xunlock(&linux_ioctl_sx); return (0); } int linux32_ioctl_unregister_handler(struct linux_ioctl_handler *h) { struct linux_ioctl_handler_element *he; if (h == NULL || h->func == NULL) return (EINVAL); sx_xlock(&linux_ioctl_sx); TAILQ_FOREACH(he, &linux32_ioctl_handlers, list) { if (he->func == h->func) { TAILQ_REMOVE(&linux32_ioctl_handlers, he, list); sx_xunlock(&linux_ioctl_sx); free(he, M_LINUX); return (0); } } sx_xunlock(&linux_ioctl_sx); return (EINVAL); } #endif Index: projects/runtime-coverage-v2/sys/compat/linux/linux_misc.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linux/linux_misc.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linux/linux_misc.c (revision 347076) @@ -1,2592 +1,2593 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include /** * Special DTrace provider for the linuxulator. * * In this file we define the provider for the entire linuxulator. All * modules (= files of the linuxulator) use it. * * We define a different name depending on the emulated bitsize, see * ../..//linux{,32}/linux.h, e.g.: * native bitsize = linuxulator * amd64, 32bit emulation = linuxulator32 */ LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalbig; l_ulong freebig; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; static int linux_utimensat_nsec_valid(l_long); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; vm_object_t object; int i, j; struct timespec ts; bzero(&sysinfo, sizeof(sysinfo)); getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; sysinfo.sharedram = 0; mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) if (object->shadow_count > 1) sysinfo.sharedram += object->resident_page_count; mtx_unlock(&vm_object_list_mtx); sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* The following are only present in newer Linux kernels. */ sysinfo.totalbig = 0; sysinfo.freebig = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error; #ifdef DEBUG if (ldebug(alarm)) printf(ARGS(alarm, "%u"), args->secs); #endif secs = args->secs; /* * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 * to match kern_setitimer()'s limit to avoid error from it. * * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit * platforms. */ if (secs > INT32_MAX / 2) secs = INT32_MAX / 2; it.it_value.tv_sec = secs; it.it_value.tv_usec = 0; timevalclear(&it.it_interval); error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); KASSERT(error == 0, ("kern_setitimer returns %d", error)); if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || old_it.it_value.tv_usec >= 500000) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; return (0); } #endif int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; uintptr_t new, old; #ifdef DEBUG if (ldebug(brk)) printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); #endif old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (uintptr_t)args->dsend; if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) td->td_retval[0] = (register_t)new; else td->td_retval[0] = (register_t)old; return (0); } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; unsigned long bss_size; char *library; ssize_t aresid; int error, locked, writecount; LCONVPATHEXIST(td, args->library, &library); #ifdef DEBUG if (ldebug(uselib)) printf(ARGS(uselib, "%s"), library); #endif a_out = NULL; locked = 0; vp = NULL; NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, library, td); error = namei(&ni); LFREEPATH(library); if (error) goto cleanup; vp = ni.ni_vp; NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = 1; /* Writable? */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) goto cleanup; if (writecount != 0) { error = ETXTBSY; goto cleanup; } /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; /* Pull in executable header into exec_map */ error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || racct_set(td->td_proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. * XXX: Note that if any of the VM operations fail below we don't * clear this flag. */ VOP_SET_TEXT(vp); /* * Lock no longer needed */ locked = 0; VOP_UNLOCK(vp, 0); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { #ifdef DEBUG printf("uselib: Non page aligned binary %lu\n", file_offset); #endif /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, td->td_ucred, NOCRED, &aresid, td); if (error != 0) goto cleanup; if (aresid != 0) { error = ENOEXEC; goto cleanup; } } else { #ifdef DEBUG printf("uselib: Page aligned binary %lu\n", file_offset); #endif /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; } #ifdef DEBUG printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], ((long *)vmaddr)[1]); #endif if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: /* Unlock vnode if needed */ if (locked) VOP_UNLOCK(vp, 0); /* Release the temporary mapping. */ if (a_out) kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); return (error); } #endif /* __i386__ */ #ifdef LINUX_LEGACY_SYSCALLS int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; #ifdef DEBUG if (ldebug(select)) printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, (void *)args->readfds, (void *)args->writefds, (void *)args->exceptfds, (void *)args->timeout); #endif /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; #ifdef DEBUG if (ldebug(select)) printf(LMSG("incoming timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, LINUX_NFDBITS); #ifdef DEBUG if (ldebug(select)) printf(LMSG("real select returns %d"), error); #endif if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); #ifdef DEBUG if (ldebug(select)) printf(LMSG("outgoing timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: #ifdef DEBUG if (ldebug(select)) printf(LMSG("select_out -> %d"), error); #endif return (error); } #endif int linux_mremap(struct thread *td, struct linux_mremap_args *args) { uintptr_t addr; size_t len; int error = 0; #ifdef DEBUG if (ldebug(mremap)) printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), (void *)(uintptr_t)args->addr, (unsigned long)args->old_len, (unsigned long)args->new_len, (unsigned long)args->flags); #endif if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { addr = args->addr + args->new_len; len = args->old_len - args->new_len; error = kern_munmap(td, addr, len); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { return (kern_msync(td, args->addr, args->len, args->fl & ~LINUX_MS_SYNC)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; #ifdef DEBUG if (ldebug(time)) printf(ARGS(time, "*")); #endif microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } #endif struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; #ifdef DEBUG if (ldebug(times)) printf(ARGS(times, "*")); #endif if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; #ifdef DEBUG if (ldebug(newuname)) printf(ARGS(newuname, "*")); #endif linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); return (copyout(&utsname, args->buf, sizeof(utsname))); } struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utime)) printf(ARGS(utime, "%s, *"), fname); #endif if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utimes)) printf(ARGS(utimes, "%s, *"), fname); #endif if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } #endif static int linux_utimensat_nsec_valid(l_long nsec) { if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) return (0); if (nsec >= 0 && nsec <= 999999999) return (0); return (1); } int linux_utimensat(struct thread *td, struct linux_utimensat_args *args) { struct l_timespec l_times[2]; struct timespec times[2], *timesp = NULL; char *path = NULL; int error, dfd, flags = 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; #ifdef DEBUG if (ldebug(utimensat)) printf(ARGS(utimensat, "%d, *"), dfd); #endif if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); if (args->times != NULL) { error = copyin(args->times, l_times, sizeof(l_times)); if (error != 0) return (error); if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) return (EINVAL); times[0].tv_sec = l_times[0].tv_sec; switch (l_times[0].tv_nsec) { case LINUX_UTIME_OMIT: times[0].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[0].tv_nsec = UTIME_NOW; break; default: times[0].tv_nsec = l_times[0].tv_nsec; } times[1].tv_sec = l_times[1].tv_sec; switch (l_times[1].tv_nsec) { case LINUX_UTIME_OMIT: times[1].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[1].tv_nsec = UTIME_NOW; break; default: times[1].tv_nsec = l_times[1].tv_nsec; break; } timesp = times; /* This breaks POSIX, but is what the Linux kernel does * _on purpose_ (documented in the man page for utimensat(2)), * so we must follow that behaviour. */ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) return (0); } if (args->pathname != NULL) LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); else if (args->flags != 0) return (EINVAL); if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) flags |= AT_SYMLINK_NOFOLLOW; if (path == NULL) error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); else { error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, UIO_SYSSPACE, flags); LFREEPATH(path); } return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); #ifdef DEBUG if (ldebug(futimesat)) printf(ARGS(futimesat, "%s, *"), fname); #endif if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } #endif int linux_common_wait(struct thread *td, int pid, int *status, int options, struct rusage *ru) { int error, tmpstat; error = kern_wait(td, pid, &tmpstat, options, ru); if (error) return (error); if (status) { tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | bsd_to_linux_signal(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); else if (WIFCONTINUED(tmpstat)) tmpstat = 0xffff; error = copyout(&tmpstat, status, sizeof(int)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { struct linux_wait4_args wait4_args; #ifdef DEBUG if (ldebug(waitpid)) printf(ARGS(waitpid, "%d, %p, %d"), args->pid, (void *)args->status, args->options); #endif wait4_args.pid = args->pid; wait4_args.status = args->status; wait4_args.options = args->options; wait4_args.rusage = NULL; return (linux_wait4(td, &wait4_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct rusage ru, *rup; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); options = WEXITED; linux_to_bsd_waitopts(args->options, &options); if (args->rusage != NULL) rup = &ru; else rup = NULL; error = linux_common_wait(td, args->pid, args->status, options, rup); if (error != 0) return (error); if (args->rusage != NULL) error = linux_copyout_rusage(&ru, args->rusage); return (error); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { int status, options, sig; struct __wrusage wru; siginfo_t siginfo; l_siginfo_t lsi; idtype_t idtype; struct proc *p; int error; options = 0; linux_to_bsd_waitopts(args->options, &options); if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) return (EINVAL); if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) return (EINVAL); switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; default: return (EINVAL); } error = kern_wait6(td, idtype, args->id, &status, options, &wru, &siginfo); if (error != 0) return (error); if (args->rusage != NULL) { error = linux_copyout_rusage(&wru.wru_children, args->rusage); if (error != 0) return (error); } if (args->info != NULL) { p = td->td_proc; bzero(&lsi, sizeof(lsi)); if (td->td_retval[0] != 0) { sig = bsd_to_linux_signal(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); } error = copyout(&lsi, args->info, sizeof(lsi)); } td->td_retval[0] = 0; return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mknod)) printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, (uintmax_t)args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } #endif int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(mknodat)) printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { struct linux_pemuldata *pem; struct proc *p = td->td_proc; uint32_t old; #ifdef DEBUG if (ldebug(personality)) printf(ARGS(personality, "%u"), args->per); #endif PROC_LOCK(p); pem = pem_find(p); old = pem->persona; if (args->per != 0xffffffff) pem->persona = args->per; PROC_UNLOCK(p); td->td_retval[0] = old; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; #ifdef DEBUG if (ldebug(setitimer)) printf(ARGS(setitimer, "%p, %p"), (void *)uap->itv, (void *)uap->oitv); #endif if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); #ifdef DEBUG if (ldebug(setitimer)) { printf("setitimer: value: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); printf("setitimer: interval: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); } #endif error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; #ifdef DEBUG if (ldebug(getitimer)) printf(ARGS(getitimer, "%p"), (void *)uap->itv); #endif error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { struct setpriority_args bsd_args; bsd_args.which = PRIO_PROCESS; bsd_args.who = 0; /* current process */ bsd_args.prio = args->inc; return (sys_setpriority(td, &bsd_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_LINUX); if (error) return (error); td->td_retval[0] = ngrp; return (0); } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; #ifdef DEBUG if (ldebug(setrlimit)) printf(ARGS(setrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(old_getrlimit)) printf(ARGS(old_getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(getrlimit)) printf(ARGS(getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_setscheduler)) printf(ARGS(sched_setscheduler, "%d, %d, %p"), args->pid, args->policy, (const void *)args->param); #endif switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_max)) printf(ARGS(sched_get_priority_max, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_min)) printf(ARGS(sched_get_priority_min, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; #ifdef DEBUG if (ldebug(reboot)) printf(ARGS(reboot, "0x%x"), args->cmd); #endif if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } /* * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that * are assumed to be preserved. The following lightweight syscalls fixes * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c * * linux_getpid() - MP SAFE * linux_getgid() - MP SAFE * linux_getuid() - MP SAFE */ int linux_getpid(struct thread *td, struct linux_getpid_args *args) { #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif td->td_retval[0] = kern_getppid(td); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { #ifdef DEBUG if (ldebug(getgid)) printf(ARGS(getgid, "")); #endif td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { #ifdef DEBUG if (ldebug(getuid)) printf(ARGS(getuid, "")); #endif td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { struct getsid_args bsd; #ifdef DEBUG if (ldebug(getsid)) printf(ARGS(getsid, "%i"), args->pid); #endif bsd.pid = args->pid; return (sys_getsid(td, &bsd)); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { struct getpriority_args bsd_args; int error; #ifdef DEBUG if (ldebug(getpriority)) printf(ARGS(getpriority, "%i, %i"), args->which, args->who); #endif bsd_args.which = args->which; bsd_args.who = args->who; error = sys_getpriority(td, &bsd_args); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(sethostname)) printf(ARGS(sethostname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(setdomainname)) printf(ARGS(setdomainname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, args->error_code, 0); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION_1 0x19980330 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, u32s; if (uap->hdrp == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: #ifdef DEBUG if (ldebug(capget)) printf(LMSG("invalid capget capability version 0x%x"), luch.version); #endif luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (uap->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ memset(&lucd, 0, u32s * sizeof(lucd[0])); error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, i, u32s; if (uap->hdrp == NULL || uap->datap == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: #ifdef DEBUG if (ldebug(capset)) printf(LMSG("invalid capset capability version 0x%x"), luch.version); #endif luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ for (i = 0; i < u32s; i++) { if (lucd[i].effective || lucd[i].permitted || lucd[i].inheritable) { linux_msg(td, "capset[%d] effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", i, (int)lucd[i].effective, (int)lucd[i].permitted, (int)lucd[i].inheritable); return (EPERM); } } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; - struct linux_emuldata *em; int pdeath_signal; #ifdef DEBUG if (ldebug(prctl)) printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, (uintmax_t)args->arg2, (uintmax_t)args->arg3, (uintmax_t)args->arg4, (uintmax_t)args->arg5); #endif switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); - em = em_find(td); - KASSERT(em != NULL, ("prctl: emuldata not found.\n")); - em->pdeath_signal = args->arg2; - break; + pdeath_signal = linux_to_bsd_signal(args->arg2); + return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, + &pdeath_signal)); case LINUX_PR_GET_PDEATHSIG: - em = em_find(td); - KASSERT(em != NULL, ("prctl: emuldata not found.\n")); - pdeath_signal = em->pdeath_signal; - error = copyout(&pdeath_signal, + error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, + &pdeath_signal); + if (error != 0) + return (error); + pdeath_signal = bsd_to_linux_signal(pdeath_signal); + return (copyout(&pdeath_signal, (void *)(register_t)args->arg2, - sizeof(pdeath_signal)); + sizeof(pdeath_signal))); break; case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a Linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; default: error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_setparam)) printf(ARGS(sched_setparam, "%d, *"), uap->pid); #endif error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_getparam)) printf(ARGS(sched_getparam, "%d, *"), uap->pid); #endif tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); if (error == 0) error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct thread *tdt; #ifdef DEBUG if (ldebug(sched_getaffinity)) printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); if (error == 0) td->td_retval[0] = sizeof(cpuset_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct thread *tdt; #ifdef DEBUG if (ldebug(sched_setaffinity)) printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; #ifdef DEBUG if (ldebug(prlimit64)) printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, args->resource, (void *)args->new, (void *)args->old); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget(args->pid, flags, &p); if (error != 0) return (error); if (args->old != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; struct l_timespec lts; struct timespec uts; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (args->sig != NULL) { error = copyin(args->sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); if (lpse6.ss_len != sizeof(l_ss)) return (EINVAL); if (lpse6.ss != 0) { error = copyin(PTRIN(lpse6.ss), &l_ss, sizeof(l_ss)); if (error != 0) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } } /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); TIMESPEC_TO_TIMEVAL(&utv, &uts); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, ssp, LINUX_NFDBITS); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0] != 0) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, &uts); error = native_to_linux_timespec(<s, &uts); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec ts0, ts1; struct l_timespec lts; struct timespec uts, *tsp; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; if (args->sset != NULL) { if (args->ssize != sizeof(l_ss)) return (EINVAL); error = copyin(args->sset, &l_ss, sizeof(l_ss)); if (error) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } else ssp = NULL; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); nanotime(&ts0); tsp = &uts; } else tsp = NULL; error = kern_poll(td, args->fds, args->nfds, tsp, ssp); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0]) { nanotime(&ts1); timespecsub(&ts1, &ts0, &ts1); timespecsub(&uts, &ts1, &uts); if (uts.tv_sec < 0) timespecclear(&uts); } else timespecclear(&uts); error = native_to_linux_timespec(<s, &uts); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } #if defined(DEBUG) || defined(KTR) /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ #ifdef COMPAT_LINUX32 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL #else #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL #endif u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; static int linux_debug(int syscall, int toggle, int global) { if (global) { char c = toggle ? 0 : 0xff; memset(linux_debug_map, c, sizeof(linux_debug_map)); return (0); } if (syscall < 0 || syscall >= L_MAXSYSCALL) return (EINVAL); if (toggle) clrbit(linux_debug_map, syscall); else setbit(linux_debug_map, syscall); return (0); } #undef L_MAXSYSCALL /* * Usage: sysctl linux.debug=.<0/1> * * E.g.: sysctl linux.debug=21.0 * * As a special case, syscall "all" will apply to all syscalls globally. */ #define LINUX_MAX_DEBUGSTR 16 int linux_sysctl_debug(SYSCTL_HANDLER_ARGS) { char value[LINUX_MAX_DEBUGSTR], *p; int error, sysc, toggle; int global = 0; value[0] = '\0'; error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); if (error || req->newptr == NULL) return (error); for (p = value; *p != '\0' && *p != '.'; p++); if (*p == '\0') return (EINVAL); *p++ = '\0'; sysc = strtol(value, NULL, 0); toggle = strtol(p, NULL, 0); if (strcmp(value, "all") == 0) global = 1; error = linux_debug(sysc, toggle, global); return (error); } #endif /* DEBUG || KTR */ int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec lts; struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (uap->pid < 0) return (EINVAL); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); error = native_to_linux_timespec(<s, &ts); if (error != 0) return (error); return (copyout(<s, uap->interval, sizeof(lts))); } /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { tdt = td; PROC_LOCK(tdt->td_proc); } else if (tid > PID_MAX) tdt = tdfind(tid, pid); else { /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } return (tdt); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } int linux_getrandom(struct thread *td, struct linux_getrandom_args *args) { struct uio uio; struct iovec iov; int error; if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) return (EINVAL); if (args->count > INT_MAX) args->count = INT_MAX; iov.iov_base = args->buf; iov.iov_len = args->count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = iov.iov_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); if (error == 0) td->td_retval[0] = args->count - uio.uio_resid; return (error); } int linux_mincore(struct thread *td, struct linux_mincore_args *args) { /* Needs to be page-aligned */ if (args->start & PAGE_MASK) return (EINVAL); return (kern_mincore(td, args->start, args->len, args->vec)); } Index: projects/runtime-coverage-v2/sys/compat/linuxkpi/common/include/linux/dmapool.h =================================================================== --- projects/runtime-coverage-v2/sys/compat/linuxkpi/common/include/linux/dmapool.h (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linuxkpi/common/include/linux/dmapool.h (revision 347076) @@ -1,95 +1,86 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_DMAPOOL_H_ #define _LINUX_DMAPOOL_H_ #include #include #include #include #include +struct dma_pool; struct dma_pool *linux_dma_pool_create(char *name, struct device *dev, size_t size, size_t align, size_t boundary); void linux_dma_pool_destroy(struct dma_pool *pool); void *linux_dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); void linux_dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma_addr); - -struct dma_pool { - struct pci_dev *pool_pdev; - uma_zone_t pool_zone; - struct mtx pool_dma_lock; - bus_dma_tag_t pool_dmat; - size_t pool_entry_size; - struct mtx pool_ptree_lock; - struct pctrie pool_ptree; -}; static inline struct dma_pool * dma_pool_create(char *name, struct device *dev, size_t size, size_t align, size_t boundary) { return (linux_dma_pool_create(name, dev, size, align, boundary)); } static inline void dma_pool_destroy(struct dma_pool *pool) { linux_dma_pool_destroy(pool); } static inline void * dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return (linux_dma_pool_alloc(pool, mem_flags, handle)); } static inline void * dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return (dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle)); } static inline void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma_addr) { linux_dma_pool_free(pool, vaddr, dma_addr); } #endif /* _LINUX_DMAPOOL_H_ */ Index: projects/runtime-coverage-v2/sys/compat/linuxkpi/common/src/linux_pci.c =================================================================== --- projects/runtime-coverage-v2/sys/compat/linuxkpi/common/src/linux_pci.c (revision 347075) +++ projects/runtime-coverage-v2/sys/compat/linuxkpi/common/src/linux_pci.c (revision 347076) @@ -1,828 +1,824 @@ /*- * Copyright (c) 2015-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static device_probe_t linux_pci_probe; static device_attach_t linux_pci_attach; static device_detach_t linux_pci_detach; static device_suspend_t linux_pci_suspend; static device_resume_t linux_pci_resume; static device_shutdown_t linux_pci_shutdown; static device_method_t pci_methods[] = { DEVMETHOD(device_probe, linux_pci_probe), DEVMETHOD(device_attach, linux_pci_attach), DEVMETHOD(device_detach, linux_pci_detach), DEVMETHOD(device_suspend, linux_pci_suspend), DEVMETHOD(device_resume, linux_pci_resume), DEVMETHOD(device_shutdown, linux_pci_shutdown), DEVMETHOD_END }; struct linux_dma_priv { uint64_t dma_mask; - struct mtx dma_lock; + struct mtx lock; bus_dma_tag_t dmat; - struct mtx ptree_lock; struct pctrie ptree; }; +#define DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock) +#define DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock) static int linux_pdev_dma_init(struct pci_dev *pdev) { struct linux_dma_priv *priv; priv = malloc(sizeof(*priv), M_DEVBUF, M_WAITOK | M_ZERO); pdev->dev.dma_priv = priv; - mtx_init(&priv->dma_lock, "linux_dma", NULL, MTX_DEF); + mtx_init(&priv->lock, "lkpi-priv-dma", NULL, MTX_DEF); - mtx_init(&priv->ptree_lock, "linux_dma_ptree", NULL, MTX_DEF); pctrie_init(&priv->ptree); return (0); } static int linux_pdev_dma_uninit(struct pci_dev *pdev) { struct linux_dma_priv *priv; priv = pdev->dev.dma_priv; if (priv->dmat) bus_dma_tag_destroy(priv->dmat); - mtx_destroy(&priv->dma_lock); - mtx_destroy(&priv->ptree_lock); + mtx_destroy(&priv->lock); free(priv, M_DEVBUF); pdev->dev.dma_priv = NULL; return (0); } int linux_dma_tag_init(struct device *dev, u64 dma_mask) { struct linux_dma_priv *priv; int error; priv = dev->dma_priv; if (priv->dmat) { if (priv->dma_mask == dma_mask) return (0); bus_dma_tag_destroy(priv->dmat); } priv->dma_mask = dma_mask; error = bus_dma_tag_create(bus_get_dma_tag(dev->bsddev), 1, 0, /* alignment, boundary */ dma_mask, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ BUS_SPACE_MAXSIZE, /* maxsize */ 1, /* nsegments */ BUS_SPACE_MAXSIZE, /* maxsegsz */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &priv->dmat); return (-error); } static struct pci_driver * linux_pci_find(device_t dev, const struct pci_device_id **idp) { const struct pci_device_id *id; struct pci_driver *pdrv; uint16_t vendor; uint16_t device; uint16_t subvendor; uint16_t subdevice; vendor = pci_get_vendor(dev); device = pci_get_device(dev); subvendor = pci_get_subvendor(dev); subdevice = pci_get_subdevice(dev); spin_lock(&pci_lock); list_for_each_entry(pdrv, &pci_drivers, links) { for (id = pdrv->id_table; id->vendor != 0; id++) { if (vendor == id->vendor && (PCI_ANY_ID == id->device || device == id->device) && (PCI_ANY_ID == id->subvendor || subvendor == id->subvendor) && (PCI_ANY_ID == id->subdevice || subdevice == id->subdevice)) { *idp = id; spin_unlock(&pci_lock); return (pdrv); } } } spin_unlock(&pci_lock); return (NULL); } static int linux_pci_probe(device_t dev) { const struct pci_device_id *id; struct pci_driver *pdrv; if ((pdrv = linux_pci_find(dev, &id)) == NULL) return (ENXIO); if (device_get_driver(dev) != &pdrv->bsddriver) return (ENXIO); device_set_desc(dev, pdrv->name); return (0); } static int linux_pci_attach(device_t dev) { struct resource_list_entry *rle; struct pci_bus *pbus; struct pci_dev *pdev; struct pci_devinfo *dinfo; struct pci_driver *pdrv; const struct pci_device_id *id; device_t parent; devclass_t devclass; int error; linux_set_current(curthread); pdrv = linux_pci_find(dev, &id); pdev = device_get_softc(dev); parent = device_get_parent(dev); devclass = device_get_devclass(parent); if (pdrv->isdrm) { dinfo = device_get_ivars(parent); device_set_ivars(dev, dinfo); } else { dinfo = device_get_ivars(dev); } pdev->dev.parent = &linux_root_device; pdev->dev.bsddev = dev; INIT_LIST_HEAD(&pdev->dev.irqents); pdev->devfn = PCI_DEVFN(pci_get_slot(dev), pci_get_function(dev)); pdev->device = dinfo->cfg.device; pdev->vendor = dinfo->cfg.vendor; pdev->subsystem_vendor = dinfo->cfg.subvendor; pdev->subsystem_device = dinfo->cfg.subdevice; pdev->class = pci_get_class(dev); pdev->revision = pci_get_revid(dev); pdev->pdrv = pdrv; kobject_init(&pdev->dev.kobj, &linux_dev_ktype); kobject_set_name(&pdev->dev.kobj, device_get_nameunit(dev)); kobject_add(&pdev->dev.kobj, &linux_root_device.kobj, kobject_name(&pdev->dev.kobj)); rle = linux_pci_get_rle(pdev, SYS_RES_IRQ, 0); if (rle != NULL) pdev->dev.irq = rle->start; else pdev->dev.irq = LINUX_IRQ_INVALID; pdev->irq = pdev->dev.irq; error = linux_pdev_dma_init(pdev); if (error) goto out; if (pdev->bus == NULL) { pbus = malloc(sizeof(*pbus), M_DEVBUF, M_WAITOK | M_ZERO); pbus->self = pdev; pbus->number = pci_get_bus(dev); pdev->bus = pbus; } spin_lock(&pci_lock); list_add(&pdev->links, &pci_devices); spin_unlock(&pci_lock); error = pdrv->probe(pdev, id); out: if (error) { spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); error = -error; } return (error); } static int linux_pci_detach(device_t dev) { struct pci_dev *pdev; linux_set_current(curthread); pdev = device_get_softc(dev); pdev->pdrv->remove(pdev); linux_pdev_dma_uninit(pdev); spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); device_set_desc(dev, NULL); put_device(&pdev->dev); return (0); } static int linux_pci_suspend(device_t dev) { const struct dev_pm_ops *pmops; struct pm_message pm = { }; struct pci_dev *pdev; int error; error = 0; linux_set_current(curthread); pdev = device_get_softc(dev); pmops = pdev->pdrv->driver.pm; if (pdev->pdrv->suspend != NULL) error = -pdev->pdrv->suspend(pdev, pm); else if (pmops != NULL && pmops->suspend != NULL) { error = -pmops->suspend(&pdev->dev); if (error == 0 && pmops->suspend_late != NULL) error = -pmops->suspend_late(&pdev->dev); } return (error); } static int linux_pci_resume(device_t dev) { const struct dev_pm_ops *pmops; struct pci_dev *pdev; int error; error = 0; linux_set_current(curthread); pdev = device_get_softc(dev); pmops = pdev->pdrv->driver.pm; if (pdev->pdrv->resume != NULL) error = -pdev->pdrv->resume(pdev); else if (pmops != NULL && pmops->resume != NULL) { if (pmops->resume_early != NULL) error = -pmops->resume_early(&pdev->dev); if (error == 0 && pmops->resume != NULL) error = -pmops->resume(&pdev->dev); } return (error); } static int linux_pci_shutdown(device_t dev) { struct pci_dev *pdev; linux_set_current(curthread); pdev = device_get_softc(dev); if (pdev->pdrv->shutdown != NULL) pdev->pdrv->shutdown(pdev); return (0); } static int _linux_pci_register_driver(struct pci_driver *pdrv, devclass_t dc) { int error; linux_set_current(curthread); spin_lock(&pci_lock); list_add(&pdrv->links, &pci_drivers); spin_unlock(&pci_lock); pdrv->bsddriver.name = pdrv->name; pdrv->bsddriver.methods = pci_methods; pdrv->bsddriver.size = sizeof(struct pci_dev); mtx_lock(&Giant); error = devclass_add_driver(dc, &pdrv->bsddriver, BUS_PASS_DEFAULT, &pdrv->bsdclass); mtx_unlock(&Giant); return (-error); } int linux_pci_register_driver(struct pci_driver *pdrv) { devclass_t dc; dc = devclass_find("pci"); if (dc == NULL) return (-ENXIO); pdrv->isdrm = false; return (_linux_pci_register_driver(pdrv, dc)); } int linux_pci_register_drm_driver(struct pci_driver *pdrv) { devclass_t dc; dc = devclass_create("vgapci"); if (dc == NULL) return (-ENXIO); pdrv->isdrm = true; pdrv->name = "drmn"; return (_linux_pci_register_driver(pdrv, dc)); } void linux_pci_unregister_driver(struct pci_driver *pdrv) { devclass_t bus; bus = devclass_find("pci"); spin_lock(&pci_lock); list_del(&pdrv->links); spin_unlock(&pci_lock); mtx_lock(&Giant); if (bus != NULL) devclass_delete_driver(bus, &pdrv->bsddriver); mtx_unlock(&Giant); } CTASSERT(sizeof(dma_addr_t) <= sizeof(uint64_t)); struct linux_dma_obj { void *vaddr; uint64_t dma_addr; bus_dmamap_t dmamap; }; static uma_zone_t linux_dma_trie_zone; static uma_zone_t linux_dma_obj_zone; static void linux_dma_init(void *arg) { linux_dma_trie_zone = uma_zcreate("linux_dma_pctrie", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 0); linux_dma_obj_zone = uma_zcreate("linux_dma_object", sizeof(struct linux_dma_obj), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_init, NULL); static void linux_dma_uninit(void *arg) { uma_zdestroy(linux_dma_obj_zone); uma_zdestroy(linux_dma_trie_zone); } SYSUNINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_uninit, NULL); static void * linux_dma_trie_alloc(struct pctrie *ptree) { return (uma_zalloc(linux_dma_trie_zone, 0)); } static void linux_dma_trie_free(struct pctrie *ptree, void *node) { uma_zfree(linux_dma_trie_zone, node); } PCTRIE_DEFINE(LINUX_DMA, linux_dma_obj, dma_addr, linux_dma_trie_alloc, linux_dma_trie_free); void * linux_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { struct linux_dma_priv *priv; vm_paddr_t high; size_t align; void *mem; if (dev == NULL || dev->dma_priv == NULL) { *dma_handle = 0; return (NULL); } priv = dev->dma_priv; if (priv->dma_mask) high = priv->dma_mask; else if (flag & GFP_DMA32) high = BUS_SPACE_MAXADDR_32BIT; else high = BUS_SPACE_MAXADDR; align = PAGE_SIZE << get_order(size); mem = (void *)kmem_alloc_contig(size, flag, 0, high, align, 0, VM_MEMATTR_DEFAULT); if (mem) *dma_handle = linux_dma_map_phys(dev, vtophys(mem), size); else *dma_handle = 0; return (mem); } dma_addr_t linux_dma_map_phys(struct device *dev, vm_paddr_t phys, size_t len) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; int error, nseg; bus_dma_segment_t seg; priv = dev->dma_priv; obj = uma_zalloc(linux_dma_obj_zone, 0); + DMA_PRIV_LOCK(priv); if (bus_dmamap_create(priv->dmat, 0, &obj->dmamap) != 0) { + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } nseg = -1; - mtx_lock(&priv->dma_lock); if (_bus_dmamap_load_phys(priv->dmat, obj->dmamap, phys, len, BUS_DMA_NOWAIT, &seg, &nseg) != 0) { bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } - mtx_unlock(&priv->dma_lock); KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg)); obj->dma_addr = seg.ds_addr; - mtx_lock(&priv->ptree_lock); error = LINUX_DMA_PCTRIE_INSERT(&priv->ptree, obj); - mtx_unlock(&priv->ptree_lock); if (error != 0) { - mtx_lock(&priv->dma_lock); bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } - + DMA_PRIV_UNLOCK(priv); return (obj->dma_addr); } void linux_dma_unmap(struct device *dev, dma_addr_t dma_addr, size_t len) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; priv = dev->dma_priv; - mtx_lock(&priv->ptree_lock); + DMA_PRIV_LOCK(priv); obj = LINUX_DMA_PCTRIE_LOOKUP(&priv->ptree, dma_addr); if (obj == NULL) { - mtx_unlock(&priv->ptree_lock); + DMA_PRIV_UNLOCK(priv); return; } LINUX_DMA_PCTRIE_REMOVE(&priv->ptree, dma_addr); - mtx_unlock(&priv->ptree_lock); - - mtx_lock(&priv->dma_lock); bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); } int linux_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; struct scatterlist *dma_sg, *sg; int dma_nents, error, nseg; size_t seg_len; vm_paddr_t seg_phys, prev_phys_end; bus_dma_segment_t seg; priv = dev->dma_priv; obj = uma_zalloc(linux_dma_obj_zone, 0); + DMA_PRIV_LOCK(priv); if (bus_dmamap_create(priv->dmat, 0, &obj->dmamap) != 0) { + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } sg = sgl; dma_sg = sg; dma_nents = 0; + while (nents > 0) { seg_phys = sg_phys(sg); seg_len = sg->length; while (--nents > 0) { prev_phys_end = sg_phys(sg) + sg->length; sg = sg_next(sg); if (prev_phys_end != sg_phys(sg)) break; seg_len += sg->length; } nseg = -1; - mtx_lock(&priv->dma_lock); if (_bus_dmamap_load_phys(priv->dmat, obj->dmamap, seg_phys, seg_len, BUS_DMA_NOWAIT, &seg, &nseg) != 0) { bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } - mtx_unlock(&priv->dma_lock); KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg)); sg_dma_address(dma_sg) = seg.ds_addr; sg_dma_len(dma_sg) = seg.ds_len; dma_sg = sg_next(dma_sg); dma_nents++; } obj->dma_addr = sg_dma_address(sgl); - mtx_lock(&priv->ptree_lock); error = LINUX_DMA_PCTRIE_INSERT(&priv->ptree, obj); - mtx_unlock(&priv->ptree_lock); if (error != 0) { - mtx_lock(&priv->dma_lock); bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } - + DMA_PRIV_UNLOCK(priv); return (dma_nents); } void linux_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; priv = dev->dma_priv; - mtx_lock(&priv->ptree_lock); + DMA_PRIV_LOCK(priv); obj = LINUX_DMA_PCTRIE_LOOKUP(&priv->ptree, sg_dma_address(sgl)); if (obj == NULL) { - mtx_unlock(&priv->ptree_lock); + DMA_PRIV_UNLOCK(priv); return; } LINUX_DMA_PCTRIE_REMOVE(&priv->ptree, sg_dma_address(sgl)); - mtx_unlock(&priv->ptree_lock); - - mtx_lock(&priv->dma_lock); bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); - mtx_unlock(&priv->dma_lock); + DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); } +struct dma_pool { + struct device *pool_device; + uma_zone_t pool_zone; + struct mtx pool_lock; + bus_dma_tag_t pool_dmat; + size_t pool_entry_size; + struct pctrie pool_ptree; +}; + +#define DMA_POOL_LOCK(pool) mtx_lock(&(pool)->pool_lock) +#define DMA_POOL_UNLOCK(pool) mtx_unlock(&(pool)->pool_lock) + static inline int dma_pool_obj_ctor(void *mem, int size, void *arg, int flags) { struct linux_dma_obj *obj = mem; struct dma_pool *pool = arg; int error, nseg; bus_dma_segment_t seg; nseg = -1; - mtx_lock(&pool->pool_dma_lock); + DMA_POOL_LOCK(pool); error = _bus_dmamap_load_phys(pool->pool_dmat, obj->dmamap, vtophys(obj->vaddr), pool->pool_entry_size, BUS_DMA_NOWAIT, &seg, &nseg); - mtx_unlock(&pool->pool_dma_lock); + DMA_POOL_UNLOCK(pool); if (error != 0) { return (error); } KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg)); obj->dma_addr = seg.ds_addr; return (0); } static void dma_pool_obj_dtor(void *mem, int size, void *arg) { struct linux_dma_obj *obj = mem; struct dma_pool *pool = arg; - mtx_lock(&pool->pool_dma_lock); + DMA_POOL_LOCK(pool); bus_dmamap_unload(pool->pool_dmat, obj->dmamap); - mtx_unlock(&pool->pool_dma_lock); + DMA_POOL_UNLOCK(pool); } static int dma_pool_obj_import(void *arg, void **store, int count, int domain __unused, int flags) { struct dma_pool *pool = arg; struct linux_dma_priv *priv; struct linux_dma_obj *obj; int error, i; - priv = pool->pool_pdev->dev.dma_priv; + priv = pool->pool_device->dma_priv; for (i = 0; i < count; i++) { obj = uma_zalloc(linux_dma_obj_zone, flags); if (obj == NULL) break; error = bus_dmamem_alloc(pool->pool_dmat, &obj->vaddr, BUS_DMA_NOWAIT, &obj->dmamap); if (error!= 0) { uma_zfree(linux_dma_obj_zone, obj); break; } store[i] = obj; } return (i); } static void dma_pool_obj_release(void *arg, void **store, int count) { struct dma_pool *pool = arg; struct linux_dma_priv *priv; struct linux_dma_obj *obj; int i; - priv = pool->pool_pdev->dev.dma_priv; + priv = pool->pool_device->dma_priv; for (i = 0; i < count; i++) { obj = store[i]; bus_dmamem_free(pool->pool_dmat, obj->vaddr, obj->dmamap); uma_zfree(linux_dma_obj_zone, obj); } } struct dma_pool * linux_dma_pool_create(char *name, struct device *dev, size_t size, size_t align, size_t boundary) { struct linux_dma_priv *priv; struct dma_pool *pool; priv = dev->dma_priv; pool = kzalloc(sizeof(*pool), GFP_KERNEL); - pool->pool_pdev = to_pci_dev(dev); + pool->pool_device = dev; pool->pool_entry_size = size; if (bus_dma_tag_create(bus_get_dma_tag(dev->bsddev), align, boundary, /* alignment, boundary */ priv->dma_mask, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsz */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &pool->pool_dmat)) { kfree(pool); return (NULL); } pool->pool_zone = uma_zcache_create(name, -1, dma_pool_obj_ctor, dma_pool_obj_dtor, NULL, NULL, dma_pool_obj_import, dma_pool_obj_release, pool, 0); - mtx_init(&pool->pool_dma_lock, "linux_dma_pool", NULL, MTX_DEF); - - mtx_init(&pool->pool_ptree_lock, "linux_dma_pool_ptree", NULL, - MTX_DEF); + mtx_init(&pool->pool_lock, "lkpi-dma-pool", NULL, MTX_DEF); pctrie_init(&pool->pool_ptree); return (pool); } void linux_dma_pool_destroy(struct dma_pool *pool) { uma_zdestroy(pool->pool_zone); bus_dma_tag_destroy(pool->pool_dmat); - mtx_destroy(&pool->pool_ptree_lock); - mtx_destroy(&pool->pool_dma_lock); + mtx_destroy(&pool->pool_lock); kfree(pool); } void * linux_dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { struct linux_dma_obj *obj; obj = uma_zalloc_arg(pool->pool_zone, pool, mem_flags); if (obj == NULL) return (NULL); - mtx_lock(&pool->pool_ptree_lock); + DMA_POOL_LOCK(pool); if (LINUX_DMA_PCTRIE_INSERT(&pool->pool_ptree, obj) != 0) { - mtx_unlock(&pool->pool_ptree_lock); + DMA_POOL_UNLOCK(pool); uma_zfree_arg(pool->pool_zone, obj, pool); return (NULL); } - mtx_unlock(&pool->pool_ptree_lock); + DMA_POOL_UNLOCK(pool); *handle = obj->dma_addr; return (obj->vaddr); } void linux_dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma_addr) { struct linux_dma_obj *obj; - mtx_lock(&pool->pool_ptree_lock); + DMA_POOL_LOCK(pool); obj = LINUX_DMA_PCTRIE_LOOKUP(&pool->pool_ptree, dma_addr); if (obj == NULL) { - mtx_unlock(&pool->pool_ptree_lock); + DMA_POOL_UNLOCK(pool); return; } LINUX_DMA_PCTRIE_REMOVE(&pool->pool_ptree, dma_addr); - mtx_unlock(&pool->pool_ptree_lock); + DMA_POOL_UNLOCK(pool); uma_zfree_arg(pool->pool_zone, obj, pool); } Index: projects/runtime-coverage-v2/sys/conf/NOTES =================================================================== --- projects/runtime-coverage-v2/sys/conf/NOTES (revision 347075) +++ projects/runtime-coverage-v2/sys/conf/NOTES (revision 347076) @@ -1,3014 +1,3017 @@ # $FreeBSD$ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers', # 'makeoptions', 'hints', etc. go into the kernel configuration that you # run config(8) with. # # Lines that begin with 'hint.' are NOT for config(8), they go into your # hints file. See /boot/device.hints and/or the 'hints' config(8) directive. # # Please use ``make LINT'' to create an old-style LINT file if you want to # do kernel test-builds. # # This file contains machine independent kernel configuration notes. For # machine dependent notes, look in /sys//conf/NOTES. # # # NOTES conventions and style guide: # # Large block comments should begin and end with a line containing only a # comment character. # # To describe a particular object, a block comment (if it exists) should # come first. Next should come device, options, and hints lines in that # order. All device and option lines must be described by a comment that # doesn't just expand the device or option name. Use only a concise # comment on the same line if possible. Very detailed descriptions of # devices and subsystems belong in man pages. # # A space followed by a tab separates 'options' from an option name. Two # spaces followed by a tab separate 'device' from a device name. Comments # after an option or device should use one space after the comment character. # To comment out a negative option that disables code and thus should not be # enabled for LINT builds, precede 'options' with "#!". # # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a formula defined in subr_param.c. # Omitting this parameter or setting it to 0 will cause the system to # auto-size based on physical memory. # maxusers 10 # To statically compile in device wiring instead of /boot/device.hints #hints "LINT.hints" # Default places to look for devices. # Use the following to compile in values accessible to the kernel # through getenv() (or kenv(1) in userland). The format of the file # is 'variable=value', see kenv(1) # #env "LINT.env" # # The `makeoptions' parameter allows variables to be passed to the # generated Makefile in the build area. # # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS} # after most other flags. Here we use it to inhibit use of non-optimal # gcc built-in functions (e.g., memcmp). # # DEBUG happens to be magic. # The following is equivalent to 'config -g KERNELNAME' and creates # 'kernel.debug' compiled with -g debugging as well as a normal # 'kernel'. Use 'make install.debug' to install the debug kernel # but that isn't normally necessary as the debug symbols are not loaded # by the kernel and are not useful there anyway. # # KERNEL can be overridden so that you can change the default name of your # kernel. # # MODULES_OVERRIDE can be used to limit modules built to a specific list. # makeoptions CONF_CFLAGS=-fno-builtin #Don't allow use of memcmp, etc. #makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols #makeoptions KERNEL=foo #Build kernel "foo" and install "/foo" # Only build ext2fs module plus those parts of the sound system I need. #makeoptions MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3" makeoptions DESTDIR=/tmp # # FreeBSD processes are subject to certain limits to their consumption # of system resources. See getrlimit(2) for more details. Each # resource limit has two values, a "soft" limit and a "hard" limit. # The soft limits can be modified during normal system operation, but # the hard limits are set at boot time. Their default values are # in sys//include/vmparam.h. There are two ways to change them: # # 1. Set the values at kernel build time. The options below are one # way to allow that limit to grow to 1GB. They can be increased # further by changing the parameters: # # 2. In /boot/loader.conf, set the tunables kern.maxswzone, # kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz, # kern.dflssiz, kern.maxssiz and kern.sgrowsiz. # # The options in /boot/loader.conf override anything in the kernel # configuration file. See the function init_param1 in # sys/kern/subr_param.c for more details. # options MAXDSIZ=(1024UL*1024*1024) options MAXSSIZ=(128UL*1024*1024) options DFLDSIZ=(1024UL*1024*1024) # # BLKDEV_IOSIZE sets the default block size used in user block # device I/O. Note that this value will be overridden by the label # when specifying a block device from a label with a non-0 # partition blocksize. The default is PAGE_SIZE. # options BLKDEV_IOSIZE=8192 # # MAXPHYS and DFLTPHYS # # These are the maximal and safe 'raw' I/O block device access sizes. # Reads and writes will be split into MAXPHYS chunks for known good # devices and DFLTPHYS for the rest. Some applications have better # performance with larger raw I/O access sizes. Note that certain VM # parameters are derived from these values and making them too large # can make an unbootable kernel. # # The defaults are 64K and 128K respectively. options DFLTPHYS=(64*1024) options MAXPHYS=(128*1024) # This allows you to actually store this configuration file into # the kernel binary itself. See config(8) for more details. # options INCLUDE_CONFIG_FILE # Include this file in kernel # # Compile-time defaults for various boot parameters # options BOOTVERBOSE=1 options BOOTHOWTO=RB_MULTIPLE # # Compile-time defaults for dmesg boot tagging # # Default boot tag; may use 'kern.boot_tag' loader tunable to override. The # current boot's tag is also exposed via the 'kern.boot_tag' sysctl. options BOOT_TAG=\"\" # Maximum boot tag size the kernel's static buffer should accomodate. Maximum # size for both BOOT_TAG and the assocated tunable. options BOOT_TAG_SZ=32 options GEOM_BDE # Disk encryption. options GEOM_BSD # BSD disklabels (obsolete, gone in 12) options GEOM_CACHE # Disk cache. options GEOM_CONCAT # Disk concatenation. options GEOM_ELI # Disk encryption. options GEOM_FOX # Redundant path mitigation (obsolete, gone in 12) options GEOM_GATE # Userland services. options GEOM_JOURNAL # Journaling. options GEOM_LABEL # Providers labelization. options GEOM_LINUX_LVM # Linux LVM2 volumes options GEOM_MAP # Map based partitioning options GEOM_MBR # DOS/MBR partitioning (obsolete, gone in 12) options GEOM_MIRROR # Disk mirroring. options GEOM_MULTIPATH # Disk multipath options GEOM_NOP # Test class. options GEOM_PART_APM # Apple partitioning options GEOM_PART_BSD # BSD disklabel options GEOM_PART_BSD64 # BSD disklabel64 options GEOM_PART_EBR # Extended Boot Records options GEOM_PART_EBR_COMPAT # Backward compatible partition names options GEOM_PART_GPT # GPT partitioning options GEOM_PART_LDM # Logical Disk Manager options GEOM_PART_MBR # MBR partitioning options GEOM_PART_VTOC8 # SMI VTOC8 disk label options GEOM_RAID # Soft RAID functionality. options GEOM_RAID3 # RAID3 functionality. options GEOM_SHSEC # Shared secret. options GEOM_STRIPE # Disk striping. options GEOM_SUNLABEL # Sun/Solaris partitioning (obsolete, gone in 12) options GEOM_UZIP # Read-only compressed disks options GEOM_VINUM # Vinum logical volume manager options GEOM_VIRSTOR # Virtual storage. options GEOM_VOL # Volume names from UFS superblock (obsolete, gone in 12) options GEOM_ZERO # Performance testing helper. # # The root device and filesystem type can be compiled in; # this provides a fallback option if the root device cannot # be correctly guessed by the bootstrap code, or an override if # the RB_DFLTROOT flag (-r) is specified when booting the kernel. # options ROOTDEVNAME=\"ufs:da0s2e\" ##################################################################### # Scheduler options: # # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory. These options # select which scheduler is compiled in. # # SCHED_4BSD is the historical, proven, BSD scheduler. It has a global run # queue and no CPU affinity which makes it suboptimal for SMP. It has very # good interactivity and priority selection. # # SCHED_ULE provides significant performance advantages over 4BSD on many # workloads on SMP machines. It supports cpu-affinity, per-cpu runqueues # and scheduler locks. It also has a stronger notion of interactivity # which leads to better responsiveness even on uniprocessor machines. This # is the default scheduler. # # SCHED_STATS is a debugging option which keeps some stats in the sysctl # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions. # options SCHED_4BSD options SCHED_STATS #options SCHED_ULE ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # Mandatory: options SMP # Symmetric MultiProcessor Kernel # EARLY_AP_STARTUP releases the Application Processors earlier in the # kernel startup process (before devices are probed) rather than at the # end. This is a temporary option for use during the transition from # late to early AP startup. options EARLY_AP_STARTUP # MAXCPU defines the maximum number of CPUs that can boot in the system. # A default value should be already present, for every architecture. options MAXCPU=32 # NUMA enables use of Non-Uniform Memory Access policies in various kernel # subsystems. options NUMA # MAXMEMDOM defines the maximum number of memory domains that can boot in the # system. A default value should already be defined by every architecture. options MAXMEMDOM=2 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin # if the thread that currently owns the mutex is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_MUTEXES # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin # if the thread that currently owns the rwlock is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_RWLOCKS # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that # currently owns the sx lock is executing on another CPU. # This behavior is enabled by default, so this option can be used to # disable it. options NO_ADAPTIVE_SX # MUTEX_NOINLINE forces mutex operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options MUTEX_NOINLINE # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options RWLOCK_NOINLINE # SX_NOINLINE forces sx lock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options SX_NOINLINE # SMP Debugging Options: # # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data # structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. # WARNING! Only tested on amd64 and i386. # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel # threads. Its sole use is to expose race conditions and other # bugs during development. Enabling this option will reduce # performance and increase the frequency of kernel panics by # design. If you aren't sure that you need it then you don't. # Relies on the PREEMPTION option. DON'T TURN THIS ON. # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table # used to hold active sleep queues as well as sleep wait message # frequency. # TURNSTILE_PROFILING enables rudimentary profiling of the hash table # used to hold active lock queues. # UMTX_PROFILING enables rudimentary profiling of the hash table used # to hold active lock queues. # WITNESS enables the witness code which detects deadlocks and cycles # during locking operations. # WITNESS_KDB causes the witness code to drop into the kernel debugger if # a lock hierarchy violation occurs or if locks are held when going to # sleep. # WITNESS_SKIPSPIN disables the witness checks on spin mutexes. options PREEMPTION options FULL_PREEMPTION options WITNESS options WITNESS_KDB options WITNESS_SKIPSPIN # LOCK_PROFILING - Profiling locks. See LOCK_PROFILING(9) for details. options LOCK_PROFILING # Set the number of buffers and the hash size. The hash size MUST be larger # than the number of buffers. Hash size should be prime. options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" # Profiling for the callout(9) backend. options CALLOUT_PROFILING # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING options UMTX_PROFILING ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. Note that some architectures that # are supported by FreeBSD do not include support for certain important # aspects of this compatibility option, namely those related to the # signal delivery mechanism. # options COMPAT_43 # Old tty interface. options COMPAT_43TTY # Note that as a general rule, COMPAT_FREEBSD depends on # COMPAT_FREEBSD, COMPAT_FREEBSD, etc. # Enable FreeBSD4 compatibility syscalls options COMPAT_FREEBSD4 # Enable FreeBSD5 compatibility syscalls options COMPAT_FREEBSD5 # Enable FreeBSD6 compatibility syscalls options COMPAT_FREEBSD6 # Enable FreeBSD7 compatibility syscalls options COMPAT_FREEBSD7 # Enable FreeBSD9 compatibility syscalls options COMPAT_FREEBSD9 # Enable FreeBSD10 compatibility syscalls options COMPAT_FREEBSD10 # Enable FreeBSD11 compatibility syscalls options COMPAT_FREEBSD11 +# Enable FreeBSD12 compatibility syscalls +options COMPAT_FREEBSD12 + # Enable Linux Kernel Programming Interface options COMPAT_LINUXKPI # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Compile with kernel debugger related code. # options KDB # # Print a stack trace of the current thread on the console for a panic. # options KDB_TRACE # # Don't enter the debugger for a panic. Intended for unattended operation # where you may want to enter the debugger from the console, but still want # the machine to recover from a panic. # options KDB_UNATTENDED # # Enable the ddb debugger backend. # options DDB # # Print the numerical value of symbols in addition to the symbolic # representation. # options DDB_NUMSYM # # Enable the remote gdb debugger backend. # options GDB # # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the # contents of the registered sysctl nodes on the console. It is disabled by # default because it generates excessively verbose console output that can # interfere with serial console operation. # options SYSCTL_DEBUG # # Enable textdump by default, this disables kernel core dumps. # options TEXTDUMP_PREFERRED # # Enable extra debug messages while performing textdumps. # options TEXTDUMP_VERBOSE # # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the # resulting kernel. options NO_SYSCTL_DESCR # # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9) # allocations that are smaller than a page. The purpose is to isolate # different malloc types into hash classes, so that any buffer # overruns or use-after-free will usually only affect memory from # malloc types in that hash class. This is purely a debugging tool; # by varying the hash function and tracking which hash class was # corrupted, the intersection of the hash classes from each instance # will point to a single malloc type that is being misused. At this # point inspection or memguard(9) can be used to catch the offending # code. # options MALLOC_DEBUG_MAXZONES=8 # # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator # for the kernel used to detect modify-after-free scenarios. See the # memguard(9) man page for more information on usage. # options DEBUG_MEMGUARD # # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for # malloc(9). # options DEBUG_REDZONE # # EARLY_PRINTF enables support for calling a special printf (eprintf) # very early in the kernel (before cn_init() has been called). This # should only be used for debugging purposes early in boot. Normally, # it is not defined. It is commented out here because this feature # isn't generally available. And the required eputc() isn't defined. # #options EARLY_PRINTF # # KTRACE enables the system-call tracing facility ktrace(2). To be more # SMP-friendly, KTRACE uses a worker thread to process most trace events # asynchronously to the thread generating the event. This requires a # pre-allocated store of objects representing trace events. The # KTRACE_REQUEST_POOL option specifies the initial size of this store. # The size of the pool can be adjusted both at boottime and runtime via # the kern.ktrace_request_pool tunable and sysctl. # options KTRACE #kernel tracing options KTRACE_REQUEST_POOL=101 # # KTR is a kernel tracing facility imported from BSD/OS. It is # enabled with the KTR option. KTR_ENTRIES defines the number of # entries in the circular trace buffer; it may be an arbitrary number. # KTR_BOOT_ENTRIES defines the number of entries during the early boot, # before malloc(9) is functional. # KTR_COMPILE defines the mask of events to compile into the kernel as # defined by the KTR_* constants in . KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log # events, with bit X corresponding to CPU X. The layout of the string # passed as KTR_CPUMASK must match a series of bitmasks each of them # separated by the "," character (ie: # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. # options KTR options KTR_BOOT_ENTRIES=1024 options KTR_ENTRIES=(128*1024) options KTR_COMPILE=(KTR_ALL) options KTR_MASK=KTR_INTR options KTR_CPUMASK=0x3 options KTR_VERBOSE # # ALQ(9) is a facility for the asynchronous queuing of records from the kernel # to a vnode, and is employed by services such as ktr(4) to produce trace # files based on a kernel event stream. Records are written asynchronously # in a worker thread. # options ALQ options KTR_ALQ # # The INVARIANTS option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options INVARIANTS # # The INVARIANT_SUPPORT option makes us compile in support for # verifying some of the internal structures. It is a prerequisite for # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be # called. The intent is that you can set 'INVARIANTS' for single # source files (by changing the source file or specifying it on the # command line) if you have 'INVARIANT_SUPPORT' enabled. Also, if you # wish to build a kernel module with 'INVARIANTS', then adding # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary # infrastructure without the added overhead. # options INVARIANT_SUPPORT # # The KASSERT_PANIC_OPTIONAL option allows kasserts to fire without # necessarily inducing a panic. Panic is the default behavior, but # runtime options can configure it either entirely off, or off with a # limit. # options KASSERT_PANIC_OPTIONAL # # The DIAGNOSTIC option is used to enable extra debugging information # and invariants checking. The added checks are too expensive or noisy # for an INVARIANTS kernel and thus are disabled by default. It is # expected that a kernel configured with DIAGNOSTIC will also have the # INVARIANTS option enabled. # options DIAGNOSTIC # # REGRESSION causes optional kernel interfaces necessary only for regression # testing to be enabled. These interfaces may constitute security risks # when enabled, as they permit processes to easily modify aspects of the # run-time environment to reproduce unlikely or unusual (possibly normally # impossible) scenarios. # options REGRESSION # # This option lets some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # # STACK enables the stack(9) facility, allowing the capture of kernel stack # for the purpose of procinfo(1), etc. stack(9) will also be compiled in # automatically if DDB(4) is compiled into the kernel. # options STACK # # The NUM_CORE_FILES option specifies the limit for the number of core # files generated by a particular process, when the core file format # specifier includes the %I pattern. Since we only have 1 character for # the core count in the format string, meaning the range will be 0-9, the # maximum value allowed for this option is 10. # This core file limit can be adjusted at runtime via the debug.ncores # sysctl. # options NUM_CORE_FILES=5 # # The TSLOG option enables timestamped logging of events, especially # function entries/exits, in order to track the time spent by the kernel. # In particular, this is useful when investigating the early boot process, # before it is possible to use more sophisticated tools like DTrace. # The TSLOGSIZE option controls the size of the (preallocated, fixed # length) buffer used for storing these events (default: 262144 records). # # For security reasons the TSLOG option should not be enabled on systems # used in production. # options TSLOG options TSLOGSIZE=262144 ##################################################################### # PERFORMANCE MONITORING OPTIONS # # The hwpmc driver that allows the use of in-CPU performance monitoring # counters for performance monitoring. The base kernel needs to be configured # with the 'options' line, while the hwpmc device can be either compiled # in or loaded as a loadable kernel module. # # Additional configuration options may be required on specific architectures, # please see hwpmc(4). device hwpmc # Driver (also a loadable module) options HWPMC_DEBUG options HWPMC_HOOKS # Other necessary kernel hooks ##################################################################### # NETWORKING OPTIONS # # Protocol families # options INET #Internet communications protocols options INET6 #IPv6 communications protocols options RATELIMIT # TX rate limiting support options ROUTETABLES=2 # allocated fibs up to 65536. default is 1. # but that would be a bad idea as they are large. options TCP_OFFLOAD # TCP offload support. options TCPHPTS # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) # Option IPSEC_SUPPORT does not enable IPsec, but makes it possible to # load it as a kernel module. You still MUST add device crypto to your kernel # configuration. options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV # options. options NETSMB #SMB/CIFS requester # mchain library. It can be either loaded as KLD or compiled into kernel options LIBMCHAIN # libalias library, performing NAT options LIBALIAS # # SCTP is a NEW transport protocol defined by # RFC2960 updated by RFC3309 and RFC3758.. and # soon to have a new base RFC and many many more # extensions. This release supports all the extensions # including many drafts (most about to become RFC's). # It is the reference implementation of SCTP # and is quite well tested. # # Note YOU MUST have both INET and INET6 defined. # You don't have to enable V6, but SCTP is # dual stacked and so far we have not torn apart # the V6 and V4.. since an association can span # both a V6 and V4 address at the SAME time :-) # options SCTP # There are bunches of options: # this one turns on all sorts of # nastily printing that you can # do. It's all controlled by a # bit mask (settable by socket opt and # by sysctl). Including will not cause # logging until you set the bits.. but it # can be quite verbose.. so without this # option we don't do any of the tests for # bits and prints.. which makes the code run # faster.. if you are not debugging don't use. options SCTP_DEBUG # # All that options after that turn on specific types of # logging. You can monitor CWND growth, flight size # and all sorts of things. Go look at the code and # see. I have used this to produce interesting # charts and graphs as well :-> # # I have not yet committed the tools to get and print # the logs, I will do that eventually .. before then # if you want them send me an email rrs@freebsd.org # You basically must have ktr(4) enabled for these # and you then set the sysctl to turn on/off various # logging bits. Use ktrdump(8) to pull the log and run # it through a display program.. and graphs and other # things too. # options SCTP_LOCK_LOGGING options SCTP_MBUF_LOGGING options SCTP_MBCNT_LOGGING options SCTP_PACKET_LOGGING options SCTP_LTRACE_CHUNKS options SCTP_LTRACE_ERRORS # altq(9). Enable the base part of the hooks with the ALTQ option. # Individual disciplines must be built into the base system and can not be # loaded as modules at this point. ALTQ requires a stable TSC so if yours is # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC # option. options ALTQ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out options ALTQ_CODEL # CoDel Active Queueing options ALTQ_HFSC # Hierarchical Packet Scheduler options ALTQ_FAIRQ # Fair Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable options ALTQ_DEBUG # netgraph(4). Enable the base netgraph code with the NETGRAPH option. # Individual node types can be enabled with the corresponding option # listed below; however, this is not strictly necessary as netgraph # will automatically load the corresponding KLD module if the node type # is not already compiled into the kernel. Each type below has a # corresponding man page, e.g., ng_async(8). options NETGRAPH # netgraph(4) system options NETGRAPH_DEBUG # enable extra debugging, this # affects netgraph(4) and nodes # Node types options NETGRAPH_ASYNC options NETGRAPH_ATMLLC options NETGRAPH_ATM_ATMPIF options NETGRAPH_BLUETOOTH # ng_bluetooth(4) options NETGRAPH_BLUETOOTH_BT3C # ng_bt3c(4) options NETGRAPH_BLUETOOTH_HCI # ng_hci(4) options NETGRAPH_BLUETOOTH_L2CAP # ng_l2cap(4) options NETGRAPH_BLUETOOTH_SOCKET # ng_btsocket(4) options NETGRAPH_BLUETOOTH_UBT # ng_ubt(4) options NETGRAPH_BLUETOOTH_UBTBCMFW # ubtbcmfw(4) options NETGRAPH_BPF options NETGRAPH_BRIDGE options NETGRAPH_CAR options NETGRAPH_CHECKSUM options NETGRAPH_CISCO options NETGRAPH_DEFLATE options NETGRAPH_DEVICE options NETGRAPH_ECHO options NETGRAPH_EIFACE options NETGRAPH_ETHER options NETGRAPH_FRAME_RELAY options NETGRAPH_GIF options NETGRAPH_GIF_DEMUX options NETGRAPH_HOLE options NETGRAPH_IFACE options NETGRAPH_IP_INPUT options NETGRAPH_IPFW options NETGRAPH_KSOCKET options NETGRAPH_L2TP options NETGRAPH_LMI options NETGRAPH_MPPC_COMPRESSION options NETGRAPH_MPPC_ENCRYPTION options NETGRAPH_NETFLOW options NETGRAPH_NAT options NETGRAPH_ONE2MANY options NETGRAPH_PATCH options NETGRAPH_PIPE options NETGRAPH_PPP options NETGRAPH_PPPOE options NETGRAPH_PPTPGRE options NETGRAPH_PRED1 options NETGRAPH_RFC1490 options NETGRAPH_SOCKET options NETGRAPH_SPLIT options NETGRAPH_SPPP options NETGRAPH_TAG options NETGRAPH_TCPMSS options NETGRAPH_TEE options NETGRAPH_UI options NETGRAPH_VJC options NETGRAPH_VLAN # NgATM - Netgraph ATM options NGATM_ATM options NGATM_ATMBASE options NGATM_SSCOP options NGATM_SSCFU options NGATM_UNI options NGATM_CCATM device mn # Munich32x/Falc54 Nx64kbit/sec cards. # Network stack virtualization. options VIMAGE options VNET_DEBUG # debug for VIMAGE # # Network interfaces: # The `loop' device is MANDATORY when networking is enabled. device loop # The `ether' device provides generic code to handle # Ethernets; it is MANDATORY when an Ethernet device driver is # configured. device ether # The `vlan' device implements the VLAN tagging of Ethernet frames # according to IEEE 802.1Q. device vlan # The `vxlan' device implements the VXLAN encapsulation of Ethernet # frames in UDP packets according to RFC7348. device vxlan # The `wlan' device provides generic code to support 802.11 # drivers, including host AP mode; it is MANDATORY for the wi, # and ath drivers and will eventually be required by all 802.11 drivers. device wlan options IEEE80211_DEBUG #enable debugging msgs options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support options IEEE80211_SUPPORT_TDMA #enable TDMA support # The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide # support for WEP, TKIP, and AES-CCMP crypto protocols optionally # used with 802.11 devices that depend on the `wlan' module. device wlan_wep device wlan_ccmp device wlan_tkip # The `wlan_xauth' device provides support for external (i.e. user-mode) # authenticators for use with 802.11 drivers that use the `wlan' # module and support 802.1x and/or WPA security protocols. device wlan_xauth # The `wlan_acl' device provides a MAC-based access control mechanism # for use with 802.11 drivers operating in ap mode and using the # `wlan' module. # The 'wlan_amrr' device provides AMRR transmit rate control algorithm device wlan_acl device wlan_amrr # The `sppp' device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). device sppp # The `bpf' device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. DHCP requires bpf. device bpf # The `netmap' device implements memory-mapped access to network # devices from userspace, enabling wire-speed packet capture and # generation even at 10Gbit/s. Requires support in the device # driver. Supported drivers are ixgbe, e1000, re. device netmap # The `disc' device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing and benchmarking purposes. device disc # The `epair' device implements a virtual back-to-back connected Ethernet # like interface pair. device epair # The `edsc' device implements a minimal Ethernet interface, # which discards all packets sent and receives none. device edsc # The `tap' device is a pty-like virtual Ethernet interface device tap # The `tun' device implements (user-)ppp and nos-tun(8) device tun # The `gif' device implements IPv6 over IP4 tunneling, # IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and # IPv6 over IPv6 tunneling. # The `gre' device implements GRE (Generic Routing Encapsulation) tunneling, # as specified in the RFC 2784 and RFC 2890. # The `me' device implements Minimal Encapsulation within IPv4 as # specified in the RFC 2004. # The XBONEHACK option allows the same pair of addresses to be configured on # multiple gif interfaces. device gif device gre device me options XBONEHACK # The `stf' device implements 6to4 encapsulation. device stf # The pf packet filter consists of three devices: # The `pf' device provides /dev/pf and the firewall code itself. # The `pflog' device provides the pflog0 interface which logs packets. # The `pfsync' device provides the pfsync0 interface used for # synchronization of firewall state tables (over the net). device pf device pflog device pfsync # Bridge interface. device if_bridge # Common Address Redundancy Protocol. See carp(4) for more details. device carp # IPsec interface. device enc # Link aggregation interface. device lagg # # Internet family options: # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted and XORP. # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall_type=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''. It # depends on IPFIREWALL if compiled into the kernel. # # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires # LIBALIAS. # # IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw. # # IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw. # # IPFIREWALL_PMOD adds support for protocols modification module. Currently # it supports only TCP MSS modification. # # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding # packets without touching the TTL). This can be useful to hide firewalls # from traceroute and similar tools. # # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything. # # TCPDEBUG enables code which keeps traces of the TCP state machine # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # # TCPPCAP enables code which keeps the last n packets sent and received # on a TCP socket. # # TCP_BLACKBOX enables enhanced TCP event logging. # # TCP_HHOOK enables the hhook(9) framework hooks for the TCP stack. # # RADIX_MPATH provides support for equal-cost multi-path routing. # options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #enable logging to syslogd(8) options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPFIREWALL_NAT #ipfw kernel nat support options IPFIREWALL_NAT64 #ipfw kernel NAT64 support options IPFIREWALL_NPTV6 #ipfw kernel IPv6 NPT support options IPDIVERT #divert sockets options IPFILTER #ipfilter support options IPFILTER_LOG #ipfilter logging options IPFILTER_LOOKUP #ipfilter pools options IPFILTER_DEFAULT_BLOCK #block all packets by default options IPSTEALTH #support for stealth forwarding options PF_DEFAULT_TO_DROP #drop everything by default options TCPDEBUG options TCPPCAP options TCP_BLACKBOX options TCP_HHOOK options RADIX_MPATH # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See mbuf(9) for a list of available test cases. # MBUF_PROFILING enables code to profile the mbuf chains # exiting the system (via participating interfaces) and # return a logarithmic histogram of monitored parameters # (e.g. packet size, wasted space, number of mbufs in chain). options MBUF_STRESS_TEST options MBUF_PROFILING # Statically link in accept filters options ACCEPT_FILTER_DATA options ACCEPT_FILTER_DNS options ACCEPT_FILTER_HTTP # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are # carried in TCP option 19. This option is commonly used to protect # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable. # This is enabled on a per-socket basis using the TCP_MD5SIG socket option. # This requires the use of 'device crypto' and either 'options IPSEC' or # 'options IPSEC_SUPPORT'. options TCP_SIGNATURE #include support for RFC 2385 # DUMMYNET enables the "dummynet" bandwidth limiter. You need IPFIREWALL # as well. See dummynet(4) and ipfw(8) for more info. When you run # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve # a smooth scheduling of the traffic. options DUMMYNET # The NETDUMP option enables netdump(4) client support in the kernel. # This allows a panicking kernel to transmit a kernel dump to a remote host. options NETDUMP ##################################################################### # FILESYSTEM OPTIONS # # Only the root filesystem needs to be statically compiled or preloaded # as module; everything else will be automatically loaded at mount # time. Some people still prefer to statically compile other # filesystems as well. # # NB: The UNION filesystem was known to be buggy in the past. It is now # being actively maintained, although there are still some issues being # resolved. # # One of these is mandatory: options FFS #Fast filesystem options NFSCL #Network File System client # The rest are optional: options AUTOFS #Automounter filesystem options CD9660 #ISO 9660 filesystem options FDESCFS #File descriptor filesystem options FUSEFS #FUSEFS support module options MSDOSFS #MS DOS File System (FAT, FAT32) options NFSLOCKD #Network Lock Manager options NFSD #Network Filesystem Server options KGSSAPI #Kernel GSSAPI implementation options NULLFS #NULL filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options PSEUDOFS_TRACE #Debugging support for PSEUDOFS options SMBFS #SMB/CIFS filesystem options TMPFS #Efficient memory filesystem options UDF #Universal Disk Format options UNIONFS #Union filesystem # The xFS_ROOT options REQUIRE the associated ``options xFS'' options NFS_ROOT #NFS usable as root device # Soft updates is a technique for improving filesystem speed and # making abrupt shutdown less risky. # options SOFTUPDATES # Extended attributes allow additional data to be associated with files, # and is used for ACLs, Capabilities, and MAC labels. # See src/sys/ufs/ufs/README.extattr for more information. options UFS_EXTATTR options UFS_EXTATTR_AUTOSTART # Access Control List support for UFS filesystems. The current ACL # implementation requires extended attribute support, UFS_EXTATTR, # for the underlying filesystem. # See src/sys/ufs/ufs/README.acls for more information. options UFS_ACL # Directory hashing improves the speed of operations on very large # directories at the expense of some memory. options UFS_DIRHASH # Gjournal-based UFS journaling support. options UFS_GJOURNAL # Make space in the kernel for a root filesystem on a md device. # Define to the number of kilobytes to reserve for the filesystem. # This is now optional. # If not defined, the root filesystem passed in as the MFS_IMAGE makeoption # will be automatically embedded in the kernel during linking. Its exact size # will be consumed within the kernel. # If defined, the old way of embedding the filesystem in the kernel will be # used. That is to say MD_ROOT_SIZE KB will be allocated in the kernel and # later, the filesystem image passed in as the MFS_IMAGE makeoption will be # dd'd into the reserved space if it fits. options MD_ROOT_SIZE=10 # Make the md device a potential root device, either with preloaded # images of type mfs_root or md_root. options MD_ROOT # Write-protect the md root device so that it may not be mounted writeable. options MD_ROOT_READONLY # Allow to read MD image from external memory regions options MD_ROOT_MEM # Disk quotas are supported when this option is enabled. options QUOTA #enable disk quotas # If you are running a machine just as a fileserver for PC and MAC # users, using SAMBA, you may consider setting this option # and keeping all those users' directories on a filesystem that is # mounted with the suiddir option. This gives new files the same # ownership as the directory (similar to group). It's a security hole # if you let these users run programs, so confine it to file-servers # (but it'll save you lots of headaches in those cases). Root owned # directories are exempt and X bits are cleared. The suid bit must be # set on the directory as well; see chmod(1). PC owners can't see/set # ownerships so they keep getting their toes trodden on. This saves # you all the support calls as the filesystem it's used on will act as # they expect: "It's my dir so it must be my file". # options SUIDDIR # NFS options: options NFS_MINATTRTIMO=3 # VREG attrib cache timeout in sec options NFS_MAXATTRTIMO=60 options NFS_MINDIRATTRTIMO=30 # VDIR attrib cache timeout in sec options NFS_MAXDIRATTRTIMO=60 options NFS_DEBUG # Enable NFS Debugging # # Add support for the EXT2FS filesystem of Linux fame. Be a bit # careful with this - the ext2fs code has a tendency to lag behind # changes and not be exercised very much, so mounting read/write could # be dangerous (and even mounting read only could result in panics.) # options EXT2FS # Cryptographically secure random number generator; /dev/random device random # The system memory devices; /dev/mem, /dev/kmem device mem # The kernel symbol table device; /dev/ksyms device ksyms # Optional character code conversion support with LIBICONV. # Each option requires their base file system and LIBICONV. options CD9660_ICONV options MSDOSFS_ICONV options UDF_ICONV ##################################################################### # POSIX P1003.1B # Real time extensions added in the 1993 POSIX # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING options _KPOSIX_PRIORITY_SCHEDULING # p1003_1b_semaphores are very experimental, # user should be ready to assist in debugging if problems arise. options P1003_1B_SEMAPHORES # POSIX message queue options P1003_1B_MQUEUE ##################################################################### # SECURITY POLICY PARAMETERS # Support for BSM audit options AUDIT # Support for Mandatory Access Control (MAC): options MAC options MAC_BIBA options MAC_BSDEXTENDED options MAC_IFOFF options MAC_LOMAC options MAC_MLS options MAC_NONE options MAC_NTPD options MAC_PARTITION options MAC_PORTACL options MAC_SEEOTHERUIDS options MAC_STUB options MAC_TEST # Support for Capsicum options CAPABILITIES # fine-grained rights on file descriptors options CAPABILITY_MODE # sandboxes with no global namespace access ##################################################################### # CLOCK OPTIONS # The granularity of operation is controlled by the kernel option HZ whose # default value (1000 on most architectures) means a granularity of 1ms # (1s/HZ). Historically, the default was 100, but finer granularity is # required for DUMMYNET and other systems on modern hardware. There are # reasonable arguments that HZ should, in fact, be 100 still; consider, # that reducing the granularity too much might cause excessive overhead in # clock interrupt processing, potentially causing ticks to be missed and thus # actually reducing the accuracy of operation. options HZ=100 # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp options PPS_SYNC # Enable support for generic feed-forward clocks in the kernel. # The feed-forward clock support is an alternative to the feedback oriented # ntpd/system clock approach, and is to be used with a feed-forward # synchronization algorithm such as the RADclock: # More info here: http://www.synclab.org/radclock options FFCLOCK ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # It is possible to wire down your SCSI devices so that a given bus, # target, and LUN always come on line as the same device unit. In # earlier versions the unit numbers were assigned in the order that # the devices were probed on the SCSI bus. This means that if you # removed a disk drive, you may have had to rewrite your /etc/fstab # file, and also that you had to be careful when adding a new disk # as it may have been probed earlier and moved your device configuration # around. (See also option GEOM_VOL for a different solution to this # problem.) # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "da3" then the first # non-wired disk will be assigned da4. # The syntax for wiring down devices is: hint.scbus.0.at="ahc0" hint.scbus.1.at="ahc1" hint.scbus.1.bus="0" hint.scbus.3.at="ahc2" hint.scbus.3.bus="0" hint.scbus.2.at="ahc2" hint.scbus.2.bus="1" hint.da.0.at="scbus0" hint.da.0.target="0" hint.da.0.unit="0" hint.da.1.at="scbus3" hint.da.1.target="1" hint.da.2.at="scbus2" hint.da.2.target="3" hint.sa.1.at="scbus1" hint.sa.1.target="6" # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The ch driver drives SCSI Media Changer ("jukebox") devices. # # The da driver drives SCSI Direct Access ("disk") and Optical Media # ("WORM") devices. # # The sa driver drives SCSI Sequential Access ("tape") devices. # # The cd driver drives SCSI Read Only Direct Access ("cd") devices. # # The ses driver drives SCSI Environment Services ("ses") and # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices. # # The pt driver drives SCSI Processor devices. # # The sg driver provides a passthrough API that is compatible with the # Linux SG driver. It will work in conjunction with the COMPAT_LINUX # option to run linux SG apps. It can also stand on its own and provide # source level API compatibility for porting apps to FreeBSD. # # Target Mode support is provided here but also requires that a SIM # (SCSI Host Adapter Driver) provide support as well. # # The targ driver provides target mode support as a Processor type device. # It exists to give the minimal context necessary to respond to Inquiry # commands. There is a sample user application that shows how the rest # of the command support might be done in /usr/share/examples/scsi_target. # # The targbh driver provides target mode support and exists to respond # to incoming commands that do not otherwise have a logical unit assigned # to them. # # The pass driver provides a passthrough API to access the CAM subsystem. device scbus #base SCSI code device ch #SCSI media changers device da #SCSI direct access devices (aka disks) device sa #SCSI tapes device cd #SCSI CD-ROMs device ses #Enclosure Services (SES and SAF-TE) device pt #SCSI processor device targ #SCSI Target Mode Code device targbh #SCSI Target Mode Blackhole Device device pass #CAM passthrough driver device sg #Linux SCSI passthrough device ctl #CAM Target Layer # CAM OPTIONS: # debugging options: # CAMDEBUG Compile in all possible debugging. # CAM_DEBUG_COMPILE Debug levels to compile in. # CAM_DEBUG_FLAGS Debug levels to enable on boot. # CAM_DEBUG_BUS Limit debugging to the given bus. # CAM_DEBUG_TARGET Limit debugging to the given target. # CAM_DEBUG_LUN Limit debugging to the given lun. # CAM_DEBUG_DELAY Delay in us after printing each debug line. # # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter) # queue after a bus reset, and the number of milliseconds to # freeze the device queue after a bus device reset. This # can be changed at boot and runtime with the # kern.cam.scsi_delay tunable/sysctl. options CAMDEBUG options CAM_DEBUG_COMPILE=-1 options CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH) options CAM_DEBUG_BUS=-1 options CAM_DEBUG_TARGET=-1 options CAM_DEBUG_LUN=-1 options CAM_DEBUG_DELAY=1 options CAM_MAX_HIGHPOWER=4 options SCSI_NO_SENSE_STRINGS options SCSI_NO_OP_STRINGS options SCSI_DELAY=5000 # Be pessimistic about Joe SCSI device options CAM_IOSCHED_DYNAMIC options CAM_TEST_FAILURE # Options for the CAM CDROM driver: # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only # enforced if there is I/O waiting for another LUN # The compiled in defaults for these variables are 2 and 10 seconds, # respectively. # # These can also be changed on the fly with the following sysctl variables: # kern.cam.cd.changer.min_busy_seconds # kern.cam.cd.changer.max_busy_seconds # options CHANGER_MIN_BUSY_SECONDS=2 options CHANGER_MAX_BUSY_SECONDS=10 # Options for the CAM sequential access driver: # SA_IO_TIMEOUT: Timeout for read/write/wfm operations, in minutes # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT. options SA_IO_TIMEOUT=4 options SA_SPACE_TIMEOUT=60 options SA_REWIND_TIMEOUT=(2*60) options SA_ERASE_TIMEOUT=(4*60) options SA_1FM_AT_EOD # Optional timeout for the CAM processor target (pt) device # This is specified in seconds. The default is 60 seconds. options SCSI_PT_DEFAULT_TIMEOUT=60 # Optional enable of doing SES passthrough on other devices (e.g., disks) # # Normally disabled because a lot of newer SCSI disks report themselves # as having SES capabilities, but this can then clot up attempts to build # a topology with the SES device that's on the box these drives are in.... options SES_ENABLE_PASSTHROUGH ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device pty #BSD-style compatibility pseudo ttys device nmdm #back-to-back tty devices device md #Memory/malloc disk device snp #Snoop device - to look at pty/vty/etc.. device ccd #Concatenated disk driver device firmware #firmware(9) support # Kernel side iconv library options LIBICONV # Size of the kernel message buffer. Should be N * pagesize. options MSGBUF_SIZE=40960 ##################################################################### # HARDWARE BUS CONFIGURATION # # PCI bus & PCI options: # device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support ##################################################################### # HARDWARE DEVICE CONFIGURATION # For ISA the required hints are listed. # PCI, CardBus, SD/MMC and pccard are self identifying buses, so # no hints are needed. # # Mandatory devices: # # These options are valid for other keyboard drivers as well. options KBD_DISABLE_KEYMAP_LOAD # refuse to load a keymap options KBD_INSTALL_CDEV # install a CDEV entry in /dev device kbdmux # keyboard multiplexer options KBDMUX_DFLT_KEYMAP # specify the built-in keymap makeoptions KBDMUX_DFLT_KEYMAP=it.iso options FB_DEBUG # Frame buffer debugging device splash # Splash screen and screen saver support # Various screen savers. device blank_saver device daemon_saver device dragon_saver device fade_saver device fire_saver device green_saver device logo_saver device rain_saver device snake_saver device star_saver device warp_saver # The syscons console driver (SCO color console compatible). device sc hint.sc.0.at="isa" options MAXCONS=16 # number of virtual consoles options SC_ALT_MOUSE_IMAGE # simplified mouse cursor in text mode options SC_DFLT_FONT # compile font in makeoptions SC_DFLT_FONT=cp850 options SC_DFLT_TERM=\"sc\" # default terminal emulator options SC_DISABLE_KDBKEY # disable `debug' key options SC_DISABLE_REBOOT # disable reboot key sequence options SC_HISTORY_SIZE=200 # number of history buffer lines options SC_MOUSE_CHAR=0x3 # char code for text mode mouse cursor options SC_PIXEL_MODE # add support for the raster text mode # The following options will let you change the default colors of syscons. options SC_NORM_ATTR=(FG_GREEN|BG_BLACK) options SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN) options SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK) options SC_KERNEL_CONS_ATTRS=\"\x0c\x0d\x0e\x0f\x02\x09\x0a\x0b\" options SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED) # The following options will let you change the default behavior of # cut-n-paste feature options SC_CUT_SPACES2TABS # convert leading spaces into tabs options SC_CUT_SEPCHARS=\"x09\" # set of characters that delimit words # (default is single space - \"x20\") # If you have a two button mouse, you may want to add the following option # to use the right button of the mouse to paste text. options SC_TWOBUTTON_MOUSE # You can selectively disable features in syscons. options SC_NO_CUTPASTE options SC_NO_FONT_LOADING options SC_NO_HISTORY options SC_NO_MODE_CHANGE options SC_NO_SYSMOUSE options SC_NO_SUSPEND_VTYSWITCH #!options SC_NO_TERM_DUMB #!options SC_NO_TERM_SC #!options SC_NO_TERM_SCTEKEN # `flags' for sc # 0x80 Put the video card in the VESA 800x600 dots, 16 color mode # 0x100 Probe for a keyboard device periodically if one is not present # Enable experimental features of the syscons terminal emulator (teken). options TEKEN_CONS25 # cons25-style terminal emulation options TEKEN_UTF8 # UTF-8 output handling # The vt video console driver. device vt options VT_ALT_TO_ESC_HACK=1 # Prepend ESC sequence to ALT keys options VT_MAXWINDOWS=16 # Number of virtual consoles options VT_TWOBUTTON_MOUSE # Use right mouse button to paste # The following options set the default framebuffer size. options VT_FB_DEFAULT_HEIGHT=480 options VT_FB_DEFAULT_WIDTH=640 # The following options will let you change the default vt terminal colors. options TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK) options TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK) # # Optional devices: # # # SCSI host adapters: # # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/ # 19160x/29160x, aic7770/aic78xx # ahd: Adaptec 29320/39320 Controllers. # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers # including the AMD Am53C974 (found on devices such as the Tekram # DC-390(T)) and the Sun ESP and FAS families of controllers # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters, # ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2, # ISP 12160 Ultra3 SCSI, # Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters. # Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters. # Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters. # ispfw: Firmware module for Qlogic host adapters # mpr: LSI-Logic MPT/Fusion Gen 3 # mps: LSI-Logic MPT/Fusion Gen 2 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4 # or FC9x9 Fibre Channel host adapters. # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors: # 53C810, 53C810A, 53C815, 53C825, 53C825A, 53C860, 53C875, # 53C876, 53C885, 53C895, 53C895A, 53C896, 53C897, 53C1510D, # 53C1010-33, 53C1010-66. # trm: Tekram DC395U/UW/F DC315U adapters. device ahc device ahd device esp device iscsi_initiator device isp hint.isp.0.disable="1" hint.isp.0.role="3" hint.isp.0.prefer_iomap="1" hint.isp.0.prefer_memmap="1" hint.isp.0.fwload_disable="1" hint.isp.0.ignore_nvram="1" hint.isp.0.fullduplex="1" hint.isp.0.topology="lport" hint.isp.0.topology="nport" hint.isp.0.topology="lport-only" hint.isp.0.topology="nport-only" # we can't get u_int64_t types, nor can we get strings if it's got # a leading 0x, hence this silly dodge. hint.isp.0.portwnn="w50000000aaaa0000" hint.isp.0.nodewnn="w50000000aaaa0001" device ispfw device mpr # LSI-Logic MPT-Fusion 3 device mps # LSI-Logic MPT-Fusion 2 device mpt # LSI-Logic MPT-Fusion device sym device trm # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # Dump the contents of the ahc controller configuration PROM. options AHC_DUMP_EEPROM # Bitmap of units to enable targetmode operations. options AHC_TMODE_ENABLE # Compile in Aic7xxx Debugging code. options AHC_DEBUG # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h options AHC_DEBUG_OPTS # Print register bitfields in debug output. Adds ~128k to driver # See ahc(4). options AHC_REG_PRETTY_PRINT # Compile in aic79xx debugging code. options AHD_DEBUG # Aic79xx driver debugging options. Adds ~215k to driver. See ahd(4). options AHD_DEBUG_OPTS=0xFFFFFFFF # Print human-readable register definitions when debugging options AHD_REG_PRETTY_PRINT # Bitmap of units to enable targetmode operations. options AHD_TMODE_ENABLE # Options used in dev/iscsi (Software iSCSI stack) # options ISCSI_INITIATOR_DEBUG=9 # Options used in dev/isp/ (Qlogic SCSI/FC driver). # # ISP_TARGET_MODE - enable target mode operation # options ISP_TARGET_MODE=1 # # ISP_DEFAULT_ROLES - default role # none=0 # target=1 # initiator=2 # both=3 (not supported currently) # # ISP_INTERNAL_TARGET (trivial internal disk target, for testing) # options ISP_DEFAULT_ROLES=0 #options SYM_SETUP_SCSI_DIFF #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 #options SYM_SETUP_PCI_PARITY #-PCI parity checking # disabled:0, enabled:1 (default) #options SYM_SETUP_MAX_LUN #-Number of LUNs supported # default:8, range:[1..64] # # Compaq "CISS" RAID controllers (SmartRAID 5* series) # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device ciss # # Intel Integrated RAID controllers. # This driver was developed and is maintained by Intel. Contacts # at Intel for this driver are # "Kannanthanam, Boji T" and # "Leubner, Achim" . # device iir # # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later # firmware. These controllers have a SCSI-like interface, and require # the CAM infrastructure. # device mly # # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers. Only # one entry is needed; the code will find and configure all supported # controllers. # device ida # Compaq Smart RAID device mlx # Mylex DAC960 device amr # AMI MegaRAID device amrp # SCSI Passthrough interface (optional, CAM req.) device mfi # LSI MegaRAID SAS device mfip # LSI MegaRAID SAS passthrough, requires CAM options MFI_DEBUG device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s # # 3ware ATA RAID # device twe # 3ware ATA RAID # # Serial ATA host controllers: # # ahci: Advanced Host Controller Interface (AHCI) compatible # mvs: Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers # # These drivers are part of cam(4) subsystem. They supersede less featured # ata(4) subsystem drivers, supporting same hardware. device ahci device mvs device siis # # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including # PC Card devices. You only need one "device ata" for it to find all # PCI and PC Card ATA/ATAPI devices on modern machines. # Alternatively, individual bus and chipset drivers may be chosen by using # the 'atacore' driver then selecting the drivers on a per vendor basis. # For example to build a system which only supports a VIA chipset, # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers. device ata # Modular ATA #device atacore # Core ATA functionality #device atapccard # CARDBUS support #device ataisa # ISA bus support #device atapci # PCI bus support; only generic chipset support # PCI ATA chipsets #device ataacard # ACARD #device ataacerlabs # Acer Labs Inc. (ALI) #device ataamd # American Micro Devices (AMD) #device ataati # ATI #device atacenatek # Cenatek #device atacypress # Cypress #device atacyrix # Cyrix #device atahighpoint # HighPoint #device ataintel # Intel #device ataite # Integrated Technology Inc. (ITE) #device atajmicron # JMicron #device atamarvell # Marvell #device atamicron # Micron #device atanational # National #device atanetcell # NetCell #device atanvidia # nVidia #device atapromise # Promise #device ataserverworks # ServerWorks #device atasiliconimage # Silicon Image Inc. (SiI) (formerly CMD) #device atasis # Silicon Integrated Systems Corp.(SiS) #device atavia # VIA Technologies Inc. # # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add: hint.ata.0.at="isa" hint.ata.0.port="0x1f0" hint.ata.0.irq="14" hint.ata.1.at="isa" hint.ata.1.port="0x170" hint.ata.1.irq="15" # # Standard floppy disk controllers and floppy tapes, supports # the Y-E DATA External FDD (PC Card) # device fdc hint.fdc.0.at="isa" hint.fdc.0.port="0x3F0" hint.fdc.0.irq="6" hint.fdc.0.drq="2" # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # # Activate this line if you happen to have an Insight floppy tape. # Probing them proved to be dangerous for people with floppy disks only, # so it's "hidden" behind a flag: #hint.fdc.0.flags="1" # Specify floppy devices hint.fd.0.at="fdc0" hint.fd.0.drive="0" hint.fd.1.at="fdc0" hint.fd.1.drive="1" # # uart: newbusified driver for serial interfaces. It consolidates the sio(4), # sab(4) and zs(4) drivers. # device uart # Options for uart(4) options UART_PPS_ON_CTS # Do time pulse capturing using CTS # instead of DCD. options UART_POLL_FREQ # Set polling rate, used when hw has # no interrupt support (50 Hz default). # The following hint should only be used for pure ISA devices. It is not # needed otherwise. Use of hints is strongly discouraged. hint.uart.0.at="isa" # The following 3 hints are used when the UART is a system device (i.e., a # console or debug port), but only on platforms that don't have any other # means to pass the information to the kernel. The unit number of the hint # is only used to bundle the hints together. There is no relation to the # unit number of the probed UART. hint.uart.0.port="0x3f8" hint.uart.0.flags="0x10" hint.uart.0.baud="115200" # `flags' for serial drivers that support consoles like sio(4) and uart(4): # 0x10 enable console support for this unit. Other console flags # (if applicable) are ignored unless this is set. Enabling # console support does not make the unit the preferred console. # Boot with -h or set boot_serial=YES in the loader. For sio(4) # specifically, the 0x20 flag can also be set (see above). # Currently, at most one unit can have console support; the # first one (in config file order) with this flag set is # preferred. Setting this flag for sio0 gives the old behavior. # 0x80 use this port for serial line gdb support in ddb. Also known # as debug port. # # Options for serial drivers that support consoles: options BREAK_TO_DEBUGGER # A BREAK/DBG on the console goes to # ddb, if available. # Solaris implements a new BREAK which is initiated by a character # sequence CR ~ ^b which is similar to a familiar pattern used on # Sun servers by the Remote Console. There are FreeBSD extensions: # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot. options ALT_BREAK_TO_DEBUGGER # Serial Communications Controller # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel # communications controllers. device scc # PCI Universal Communications driver # Supports various multi port PCI I/O cards. device puc # # Network interfaces: # # MII bus support is required for many PCI Ethernet NICs, # namely those which use MII-compliant transceivers or implement # transceiver control interfaces that operate like an MII. Adding # "device miibus" to the kernel config pulls in support for the generic # miibus API, the common support for for bit-bang'ing the MII and all # of the PHY drivers, including a generic one for PHYs that aren't # specifically handled by an individual driver. Support for specific # PHYs may be built by adding "device mii", "device mii_bitbang" if # needed by the NIC driver and then adding the appropriate PHY driver. device mii # Minimal MII support device mii_bitbang # Common module for bit-bang'ing the MII device miibus # MII support w/ bit-bang'ing and all PHYs device acphy # Altima Communications AC101 device amphy # AMD AM79c873 / Davicom DM910{1,2} device atphy # Attansic/Atheros F1 device axphy # Asix Semiconductor AX88x9x device bmtphy # Broadcom BCM5201/BCM5202 and 3Com 3c905C device bnxt # Broadcom NetXtreme-C/NetXtreme-E device brgphy # Broadcom BCM54xx/57xx 1000baseTX device ciphy # Cicada/Vitesse CS/VSC8xxx device e1000phy # Marvell 88E1000 1000/100/10-BT device gentbi # Generic 10-bit 1000BASE-{LX,SX} fiber ifaces device icsphy # ICS ICS1889-1893 device ip1000phy # IC Plus IP1000A/IP1001 device jmphy # JMicron JMP211/JMP202 device lxtphy # Level One LXT-970 device mlphy # Micro Linear 6692 device nsgphy # NatSemi DP8361/DP83865/DP83891 device nsphy # NatSemi DP83840A device nsphyter # NatSemi DP83843/DP83815 device pnaphy # HomePNA device qsphy # Quality Semiconductor QS6612 device rdcphy # RDC Semiconductor R6040 device rgephy # RealTek 8169S/8110S/8211B/8211C device rlphy # RealTek 8139 device rlswitch # RealTek 8305 device smcphy # SMSC LAN91C111 device tdkphy # TDK 89Q2120 device tlphy # Texas Instruments ThunderLAN device truephy # LSI TruePHY device xmphy # XaQti XMAC II # an: Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA, # PCI and ISA varieties. # ae: Support for gigabit ethernet adapters based on the Attansic/Atheros # L2 PCI-Express FastEthernet controllers. # age: Support for gigabit ethernet adapters based on the Attansic/Atheros # L1 PCI express gigabit ethernet controllers. # alc: Support for Atheros AR8131/AR8132 PCIe ethernet controllers. # ale: Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers. # ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan) # bce: Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet # adapters. # bfe: Broadcom BCM4401 Ethernet adapter. # bge: Support for gigabit ethernet adapters based on the Broadcom # BCM570x family of controllers, including the 3Com 3c996-T, # the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and # the embedded gigE NICs on Dell PowerEdge 2550 servers. # bnxt: Broadcom NetXtreme-C and NetXtreme-E PCIe 10/25/50G Ethernet adapters. # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # bwi: Broadcom BCM430* and BCM431* family of wireless adapters. # bwn: Broadcom BCM43xx family of wireless adapters. # cas: Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters. # cxgbe:Chelsio T4, T5, and T6-based 1/10/25/40/100GbE PCIe Ethernet # adapters. # cxgbev: Chelsio T4, T5, and T6-based PCIe Virtual Functions. # dc: Support for PCI fast ethernet adapters based on the DEC/Intel 21143 # and various workalikes including: # the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics # AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On # 82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II # and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver # replaces the old al, ax, dm, pn and mx drivers. List of brands: # Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, # SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, # LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204, # KNE110TX. # de: Digital Equipment DC21040 # em: Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters. # ep: 3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589 # and PC Card devices using these chipsets. # ex: Intel EtherExpress Pro/10 and other i82595-based adapters, # Olicom Ethernet PC Card devices. # fe: Fujitsu MB86960A/MB86965A Ethernet # fxp: Intel EtherExpress Pro/100B # (hint of prefer_iomap can be done to prefer I/O instead of Mem mapping) # gem: Apple GMAC/Sun ERI/Sun GEM # hme: Sun HME (Happy Meal Ethernet) # jme: JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters. # le: AMD Am7900 LANCE and Am79C9xx PCnet # lge: Support for PCI gigabit ethernet adapters based on the Level 1 # LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX, # SMC TigerCard 1000 (SMC9462SX), and some Addtron cards. # lio: Support for Cavium 23XX Ethernet adapters # malo: Marvell Libertas wireless NICs. # mwl: Marvell 88W8363 802.11n wireless NICs. # Requires the mwl firmware module # mwlfw: Marvell 88W8363 firmware # msk: Support for gigabit ethernet adapters based on the Marvell/SysKonnect # Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061, # 88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053, # 88E8055, 88E8056 and D-Link 560T/550SX. # mlx5: Mellanox ConnectX-4 and ConnectX-4 LX IB and Eth shared code module. # mlx5en:Mellanox ConnectX-4 and ConnectX-4 LX PCIe Ethernet adapters. # my: Myson Fast Ethernet (MTD80X, MTD89X) # nge: Support for PCI gigabit ethernet adapters based on the National # Semiconductor DP83820 and DP83821 chipset. This includes the # SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet # GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom # EP-320G-TX and the Netgear GA622T. # oce: Emulex 10 Gbit adapters (OneConnect Ethernet) # pcn: Support for PCI fast ethernet adapters based on the AMD Am79c97x # PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home # chipsets. These can also be handled by the le(4) driver if the # pcn(4) driver is left out of the kernel. The le(4) driver does not # support the additional features like the MII bus and burst mode of # the PCnet-FAST and greater chipsets though. # ral: Ralink Technology IEEE 802.11 wireless adapter # re: RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter # rl: Support for PCI fast ethernet adapters based on the RealTek 8129/8139 # chipset. Note that the RealTek driver defaults to using programmed # I/O to do register accesses because memory mapped mode seems to cause # severe lockups on SMP hardware. This driver also supports the # Accton EN1207D `Cheetah' adapter, which uses a chip called # the MPX 5030/5038, which is either a RealTek in disguise or a # RealTek workalike. Note that the D-Link DFE-530TX+ uses the RealTek # chipset and is supported by this driver, not the 'vr' driver. # rtwn: RealTek wireless adapters. # rtwnfw: RealTek wireless firmware. # sf: Support for Adaptec Duralink PCI fast ethernet adapters based on the # Adaptec AIC-6915 "starfire" controller. # This includes dual and quad port cards, as well as one 100baseFX card. # Most of these are 64-bit PCI devices, except for one single port # card which is 32-bit. # sge: Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter # sis: Support for NICs based on the Silicon Integrated Systems SiS 900, # SiS 7016 and NS DP83815 PCI fast ethernet controller chips. # sk: Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs. # This includes the SK-9841 and SK-9842 single port cards (single mode # and multimode fiber) and the SK-9843 and SK-9844 dual port cards # (also single mode and multimode). # The driver will autodetect the number of ports on the card and # attach each one as a separate network interface. # sn: Support for ISA and PC Card Ethernet devices using the # SMC91C90/92/94/95 chips. # ste: Sundance Technologies ST201 PCI fast ethernet controller, includes # the D-Link DFE-550TX. # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack # TC9021 family of controllers, including the Sundance ST2021/ST2023, # the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101. # ti: Support for PCI gigabit ethernet NICs based on the Alteon Networks # Tigon 1 and Tigon 2 chipsets. This includes the Alteon AceNIC, the # 3Com 3c985, the Netgear GA620 and various others. Note that you will # probably want to bump up kern.ipc.nmbclusters a lot to use this driver. # tl: Support for the Texas Instruments TNETE100 series 'ThunderLAN' # cards and integrated ethernet controllers. This includes several # Compaq Netelligent 10/100 cards and the built-in ethernet controllers # in several Compaq Prosignia, Proliant and Deskpro systems. It also # supports several Olicom 10Mbps and 10/100 boards. # tx: SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series) # txp: Support for 3Com 3cR990 cards with the "Typhoon" chipset # vr: Support for various fast ethernet adapters based on the VIA # Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips, # including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for # DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320. # vte: DM&P Vortex86 RDC R6040 Fast Ethernet # vx: 3Com 3C590 and 3C595 # wb: Support for fast ethernet adapters based on the Winbond W89C840F chip. # Note: this is not the same as the Winbond W89C940F, which is a # NE2000 clone. # wi: Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both # the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA # bridge with a PCMCIA adapter plugged into it. # xe: Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller, # Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card, # Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56 # xl: Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast) # Etherlink XL cards and integrated controllers. This includes the # integrated 3c905B-TX chips in certain Dell Optiplex and Dell # Precision desktop machines and the integrated 3c905-TX chips # in Dell Latitude laptop docking stations. # Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX # Order for ISA devices is important here device ep device ex device fe hint.fe.0.at="isa" hint.fe.0.port="0x300" device sn hint.sn.0.at="isa" hint.sn.0.port="0x300" hint.sn.0.irq="10" device an device wi device xe # PCI Ethernet NICs that use the common MII bus controller code. device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) hint.fxp.0.prefer_iomap="0" device gem # Apple GMAC/Sun ERI/Sun GEM device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device mlx5 # Shared code module between IB and Ethernet device mlx5en # Mellanox ConnectX-4 and ConnectX-4 LX device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device my # Myson Fast Ethernet (MTD80X, MTD89X) device nge # NatSemi DP83820 gigabit Ethernet device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device pcn # AMD Am79C97x PCI 10/100 NICs device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure device iflib device em # Intel Pro/1000 Gigabit Ethernet device ix # Intel Pro/10Gbe PCIE Ethernet device ixv # Intel Pro/10Gbe PCIE Ethernet VF # PCI Ethernet NICs. device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet device cxgbev # Chelsio T4-T6 Virtual Functions device de # DEC/Intel DC21x4x (``Tulip'') device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device oce # Emulex 10 GbE (OneConnect Ethernet) device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI IEEE 802.11 Wireless NICs device ath # Atheros pci/cardbus NIC's device ath_hal # pci/cardbus chip support #device ath_ar5210 # AR5210 chips #device ath_ar5211 # AR5211 chips #device ath_ar5212 # AR5212 chips #device ath_rf2413 #device ath_rf2417 #device ath_rf2425 #device ath_rf5111 #device ath_rf5112 #device ath_rf5413 #device ath_ar5416 # AR5416 chips # All of the AR5212 parts have a problem when paired with the AR71xx # CPUS. These parts have a bug that triggers a fatal bus error on the AR71xx # only. Details of the exact nature of the bug are sketchy, but some can be # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and # 6. This option enables this workaround. There is a performance penalty # for this work around, but without it things don't work at all. The DMA # from the card usually bursts 128 bytes, but on the affected CPUs, only # 4 are safe. options AH_RXCFG_SDMAMW_4BYTES #device ath_ar9160 # AR9160 chips #device ath_ar9280 # AR9280 chips #device ath_ar9285 # AR9285 chips device ath_rate_sample # SampleRate tx rate control for ath device bwi # Broadcom BCM430* BCM431* device bwn # Broadcom BCM43xx device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device mwlfw device ral # Ralink Technology RT2500 wireless NICs. device rtwn # Realtek wireless NICs device rtwnfw # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers. #options TI_SF_BUF_JUMBO # Turn on the header splitting option for the ti(4) driver firmware. This # only works for Tigon II chips, and has no effect for Tigon I chips. # This option requires the TI_SF_BUF_JUMBO option above. #options TI_JUMBO_HDRSPLIT # These two options allow manipulating the mbuf cluster size and mbuf size, # respectively. Be very careful with NIC driver modules when changing # these from their default values, because that can potentially cause a # mismatch between the mbuf size assumed by the kernel and the mbuf size # assumed by a module. The only driver that currently has the ability to # detect a mismatch is ti(4). options MCLSHIFT=12 # mbuf cluster shift in bits, 12 == 4KB options MSIZE=512 # mbuf size in bytes # # Sound drivers # # sound: The generic sound driver. # device sound # # snd_*: Device-specific drivers. # # The flags of the device tell the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # snd_ad1816: Analog Devices AD1816 ISA PnP/non-PnP. # snd_als4000: Avance Logic ALS4000 PCI. # snd_atiixp: ATI IXP 200/300/400 PCI. # snd_audiocs: Crystal Semiconductor CS4231 SBus/EBus. Only # for sparc64. # snd_cmi: CMedia CMI8338/CMI8738 PCI. # snd_cs4281: Crystal Semiconductor CS4281 PCI. # snd_csa: Crystal Semiconductor CS461x/428x PCI. (except # 4281) # snd_ds1: Yamaha DS-1 PCI. # snd_emu10k1: Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI. # snd_emu10kx: Creative SoundBlaster Live! and Audigy # snd_envy24: VIA Envy24 and compatible, needs snd_spicds. # snd_envy24ht: VIA Envy24HT and compatible, needs snd_spicds. # snd_es137x: Ensoniq AudioPCI ES137x PCI. # snd_ess: Ensoniq ESS ISA PnP/non-PnP, to be used in # conjunction with snd_sbc. # snd_fm801: Forte Media FM801 PCI. # snd_gusc: Gravis UltraSound ISA PnP/non-PnP. # snd_hda: Intel High Definition Audio (Controller) and # compatible. # snd_hdspe: RME HDSPe AIO and RayDAT. # snd_ich: Intel ICH AC'97 and some more audio controllers # embedded in a chipset, for example nVidia # nForce controllers. # snd_maestro: ESS Technology Maestro-1/2x PCI. # snd_maestro3: ESS Technology Maestro-3/Allegro PCI. # snd_mss: Microsoft Sound System ISA PnP/non-PnP. # snd_neomagic: Neomagic 256 AV/ZX PCI. # snd_sb16: Creative SoundBlaster16, to be used in # conjunction with snd_sbc. # snd_sb8: Creative SoundBlaster (pre-16), to be used in # conjunction with snd_sbc. # snd_sbc: Creative SoundBlaster ISA PnP/non-PnP. # Supports ESS and Avance ISA chips as well. # snd_solo: ESS Solo-1x PCI. # snd_spicds: SPI codec driver, needed by Envy24/Envy24HT drivers. # snd_t4dwave: Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs # M5451 PCI. # snd_uaudio: USB audio. # snd_via8233: VIA VT8233x PCI. # snd_via82c686: VIA VT82C686A PCI. # snd_vibes: S3 Sonicvibes PCI. device snd_ad1816 device snd_als4000 device snd_atiixp #device snd_audiocs device snd_cmi device snd_cs4281 device snd_csa device snd_ds1 device snd_emu10k1 device snd_emu10kx device snd_envy24 device snd_envy24ht device snd_es137x device snd_ess device snd_fm801 device snd_gusc device snd_hda device snd_hdspe device snd_ich device snd_maestro device snd_maestro3 device snd_mss device snd_neomagic device snd_sb16 device snd_sb8 device snd_sbc device snd_solo device snd_spicds device snd_t4dwave device snd_uaudio device snd_via8233 device snd_via82c686 device snd_vibes # For non-PnP sound cards: hint.pcm.0.at="isa" hint.pcm.0.irq="10" hint.pcm.0.drq="1" hint.pcm.0.flags="0x0" hint.sbc.0.at="isa" hint.sbc.0.port="0x220" hint.sbc.0.irq="5" hint.sbc.0.drq="1" hint.sbc.0.flags="0x15" hint.gusc.0.at="isa" hint.gusc.0.port="0x220" hint.gusc.0.irq="5" hint.gusc.0.drq="1" hint.gusc.0.flags="0x13" # # Following options are intended for debugging/testing purposes: # # SND_DEBUG Enable extra debugging code that includes # sanity checking and possible increase of # verbosity. # # SND_DIAGNOSTIC Similar in a spirit of INVARIANTS/DIAGNOSTIC, # zero tolerance against inconsistencies. # # SND_FEEDER_MULTIFORMAT By default, only 16/32 bit feeders are compiled # in. This options enable most feeder converters # except for 8bit. WARNING: May bloat the kernel. # # SND_FEEDER_FULL_MULTIFORMAT Ditto, but includes 8bit feeders as well. # # SND_FEEDER_RATE_HP (feeder_rate) High precision 64bit arithmetic # as much as possible (the default trying to # avoid it). Possible slowdown. # # SND_PCM_64 (Only applicable for i386/32bit arch) # Process 32bit samples through 64bit # integer/arithmetic. Slight increase of dynamic # range at a cost of possible slowdown. # # SND_OLDSTEREO Only 2 channels are allowed, effectively # disabling multichannel processing. # options SND_DEBUG options SND_DIAGNOSTIC options SND_FEEDER_MULTIFORMAT options SND_FEEDER_FULL_MULTIFORMAT options SND_FEEDER_RATE_HP options SND_PCM_64 options SND_OLDSTEREO # # Miscellaneous hardware: # # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board # cmx: OmniKey CardMan 4040 pccard smartcard reader device cmx # # The 'bktr' device is a PCI video capture device using the Brooktree # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator, # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo. # # options OVERRIDE_CARD=xxx # options OVERRIDE_TUNER=xxx # options OVERRIDE_MSP=1 # options OVERRIDE_DBX=1 # These options can be used to override the auto detection # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h # Using sysctl(8) run-time overrides on a per-card basis can be made # # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL # or # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC # Specifies the default video capture mode. # This is required for Dual Crystal (28&35MHz) boards where PAL is used # to prevent hangs during initialization, e.g. VideoLogic Captivator PCI. # # options BKTR_USE_PLL # This is required for PAL or SECAM boards with a 28MHz crystal and no 35MHz # crystal, e.g. some new Bt878 cards. # # options BKTR_GPIO_ACCESS # This enables IOCTLs which give user level access to the GPIO port. # # options BKTR_NO_MSP_RESET # Prevents the MSP34xx reset. Good if you initialize the MSP in another OS first # # options BKTR_430_FX_MODE # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode. # # options BKTR_SIS_VIA_MODE # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is # needed for some old SiS and VIA chipset motherboards. # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset # motherboards and motherboards with bad or incomplete PCI 2.1 support. # As a rough guess, old = before 1998 # # options BKTR_NEW_MSP34XX_DRIVER # Use new, more complete initialization scheme for the msp34* soundchip. # Should fix stereo autodetection if the old driver does only output # mono sound. # # options BKTR_USE_FREEBSD_SMBUS # Compile with FreeBSD SMBus implementation # # Brooktree driver has been ported to the new I2C framework. Thus, # you'll need to have the following 3 lines in the kernel config. # device smbus # device iicbus # device iicbb # device iicsmb # The iic and smb devices are only needed if you want to control other # I2C slaves connected to the external connector of some cards. # device bktr # # PC Card/PCMCIA and Cardbus # # cbb: pci/cardbus bridge implementing YENTA interface # pccard: pccard slots # cardbus: cardbus slots device cbb device pccard device cardbus # # MMC/SD # # mmc MMC/SD bus # mmcsd MMC/SD memory card # sdhci Generic PCI SD Host Controller # device mmc device mmcsd device sdhci # # SMB bus # # System Management Bus support is provided by the 'smbus' device. # Access to the SMBus device is via the 'smb' device (/dev/smb*), # which is a child of the 'smbus' device. # # Supported devices: # smb standard I/O through /dev/smb* # # Supported SMB interfaces: # iicsmb I2C to SMB bridge with any iicbus interface # bktr brooktree848 I2C hardware interface # intpm Intel PIIX4 (82371AB, 82443MX) Power Management Unit # alpm Acer Aladdin-IV/V/Pro2 Power Management Unit # ichsmb Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA) # viapm VIA VT82C586B/596B/686A and VT8233 Power Management Unit # amdpm AMD 756 Power Management Unit # amdsmb AMD 8111 SMBus 2.0 Controller # nfpm NVIDIA nForce Power Management Unit # nfsmb NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller # ismt Intel SMBus 2.0 controller chips (on Atom S1200, C2000) # device smbus # Bus support, required for smb below. device intpm device alpm device ichsmb device viapm device amdpm device amdsmb device nfpm device nfsmb device ismt device smb # SMBus peripheral devices # # jedec_dimm Asset and temperature reporting for DDR3 and DDR4 DIMMs # device jedec_dimm # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported devices: # ic i2c network interface # iic i2c standard io # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands. # iicoc simple polling driver for OpenCores I2C controller # # Supported interfaces: # bktr brooktree848 I2C software interface # # Other: # iicbb generic I2C bit-banging code (needed by lpbb, bktr) # device iicbus # Bus support, required for ic/iic/iicsmb below. device iicbb device ic device iic device iicsmb # smb over i2c bridge device iicoc # OpenCores I2C controller support # I2C peripheral devices # device ds1307 # Dallas DS1307 RTC and compatible device ds13rtc # All Dallas/Maxim ds13xx chips device ds1672 # Dallas DS1672 RTC device ds3231 # Dallas DS3231 RTC + temperature device icee # AT24Cxxx and compatible EEPROMs device lm75 # LM75 compatible temperature sensor device nxprtc # NXP RTCs: PCA/PFC212x PCA/PCF85xx device s35390a # Seiko Instruments S-35390A RTC # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'da'), best # performance is achieved with ports in EPP 1.9 mode. # lpt Parallel Printer # plip Parallel network interface # ppi General-purpose I/O ("Geek Port") + IEEE1284 I/O # pps Pulse per second Timing Interface # lpbb Philips official parallel port I2C bit-banging interface # pcfclock Parallel port clock driver. # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # options PPC_PROBE_CHIPSET # Enable chipset specific detection # (see flags in ppc(4)) options DEBUG_1284 # IEEE1284 signaling protocol debug options PERIPH_1284 # Makes your computer act as an IEEE1284 # compliant peripheral options DONTPROBE_1284 # Avoid boot detection of PnP parallel devices options VP0_DEBUG # ZIP/ZIP+ debug options LPT_DEBUG # Printer driver debug options PPC_DEBUG # Parallel chipset level debug options PLIP_DEBUG # Parallel network IP interface debug options PCFCLOCK_VERBOSE # Verbose pcfclock driver options PCFCLOCK_MAX_RETRIES=5 # Maximum read tries (default 10) device ppc hint.ppc.0.at="isa" hint.ppc.0.irq="7" device ppbus device vpo device lpt device plip device ppi device pps device lpbb device pcfclock # # Etherswitch framework and drivers # # etherswitch The etherswitch(4) framework # miiproxy Proxy device for miibus(4) functionality # # Switch hardware support: # arswitch Atheros switches # ip17x IC+ 17x family switches # rtl8366r Realtek RTL8366 switches # ukswitch Multi-PHY switches # device etherswitch device miiproxy device arswitch device ip17x device rtl8366rb device ukswitch # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname # Requires NFSCL and NFS_ROOT options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options BOOTP_NFSV3 # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP options BOOTP_BLOCKSIZE=8192 # Override NFS block size # # Enable software watchdog routines, even if hardware watchdog is present. # By default, software watchdog timer is enabled only if no hardware watchdog # is present. # options SW_WATCHDOG # # Add the software deadlock resolver thread. # options DEADLKRES # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn # it back on at run-time. # # This is sometimes usable for systems which don't have any swap space # (see also sysctl "vm.disable_swapspace_pageouts") # #options NO_SWAPPING # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers # for sendfile(2) that are used to map file VM pages, and normally # default to a quantity that is roughly 16*MAXUSERS+512. You would # typically want about 4 of these for each simultaneous file send. # options NSFBUFS=1024 # # Enable extra debugging code for locks. This stores the filename and # line of whatever acquired the lock in the lock itself, and changes a # number of function calls to pass around the relevant data. This is # not at all useful unless you are debugging lock code. Note that # modules should be recompiled as this option modifies KBI. # options DEBUG_LOCKS ##################################################################### # USB support # UHCI controller device uhci # OHCI controller device ohci # EHCI controller device ehci # XHCI controller device xhci # SL811 Controller #device slhci # General USB code (mandatory for USB) device usb # # USB Double Bulk Pipe devices device udbp # USB Fm Radio device ufm # USB temperature meter device ugold # USB LED device uled # Human Interface Device (anything with buttons and dials) device uhid # USB keyboard device ukbd # USB printer device ulpt # USB mass storage driver (Requires scbus and da) device umass # USB mass storage driver for device-side mode device usfs # USB support for Belkin F5U109 and Magic Control Technology serial adapters device umct # USB modem support device umodem # USB mouse device ums # USB touchpad(s) device atp device wsp # eGalax USB touch screen device uep # Diamond Rio 500 MP3 player device urio # # USB serial support device ucom # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra device u3g # USB support for Technologies ARK3116 based serial adapters device uark # USB support for Belkin F5U103 and compatible serial adapters device ubsa # USB support for serial adapters based on the FT8U100AX and FT8U232AM device uftdi # USB support for some Windows CE based serial communication. device uipaq # USB support for Prolific PL-2303 serial adapters device uplcom # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters device uslcom # USB Visor and Palm devices device uvisor # USB serial support for DDI pocket's PHS device uvscom # # USB ethernet support device uether # ADMtek USB ethernet. Supports the LinkSys USB100TX, # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus # eval board. device aue # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the # LinkSys USB200M and various other adapters. device axe # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver. device axge # # Devices which communicate using Ethernet over USB, particularly # Communication Device Class (CDC) Ethernet specification. Supports # Sharp Zaurus PDAs, some DOCSIS cable modems and so on. device cdce # # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate # and Netmate II, and the Belkin F5U111. device cue # # Kawasaki LSI ethernet. Supports the LinkSys USB10T, # Entrega USB-NET-E45, Peracom Ethernet Adapter, the # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T, # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB # and 2104USB, and the Corega USB-T. device kue # # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX # and the GREEN HOUSE GH-USB100B. device rue # # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC. device udav # # RealTek RTL8152/RTL8153 USB Ethernet driver device ure # # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030. device mos # # HSxPA devices from Option N.V device uhso # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver device rsu # # Ralink Technology RT2501USB/RT2601USB wireless driver device rum # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver device run # # Atheros AR5523 wireless driver device uath # # Conexant/Intersil PrismGT wireless driver device upgt # # Ralink Technology RT2500USB wireless driver device ural # # RNDIS USB ethernet driver device urndis # Realtek RTL8187B/L wireless driver device urtw # # ZyDas ZD1211/ZD1211B wireless driver device zyd # # Sierra USB wireless driver device usie # # debugging options for the USB subsystem # options USB_DEBUG options U3G_DEBUG # options for ukbd: options UKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions UKBD_DFLT_KEYMAP=jp.106 # options for uplcom: options UPLCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds # options for uvscom: options UVSCOM_DEFAULT_OPKTSIZE=8 # default output packet size options UVSCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds ##################################################################### # FireWire support device firewire # FireWire bus code device sbp # SCSI over Firewire (Requires scbus and da) device sbp_targ # SBP-2 Target mode (Requires scbus and targ) device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC2734 and RFC3146) ##################################################################### # dcons support (Dumb Console Device) device dcons # dumb console driver device dcons_crom # FireWire attachment options DCONS_BUF_SIZE=16384 # buffer size options DCONS_POLL_HZ=100 # polling rate options DCONS_FORCE_CONSOLE=0 # force to be the primary console options DCONS_FORCE_GDB=1 # force to be the gdb device ##################################################################### # crypto subsystem # # This is a port of the OpenBSD crypto framework. Include this when # configuring IPSEC and when you have a h/w crypto device to accelerate # user applications that link to OpenSSL. # # Drivers are ports from OpenBSD with some simple enhancements that have # been fed back to OpenBSD. device crypto # core crypto support # Only install the cryptodev device if you are running tests, or know # specifically why you need it. In most cases, it is not needed and # will make things slower. device cryptodev # /dev/crypto for access to h/w device rndtest # FIPS 140-2 entropy tester device ccr # Chelsio T6 device hifn # Hifn 7951, 7781, etc. options HIFN_DEBUG # enable debugging support: hw.hifn.debug options HIFN_RNDTEST # enable rndtest support device ubsec # Broadcom 5501, 5601, 58xx options UBSEC_DEBUG # enable debugging support: hw.ubsec.debug options UBSEC_RNDTEST # enable rndtest support ##################################################################### # # Embedded system options: # # An embedded system might want to run something other than init. options INIT_PATH=/sbin/init:/rescue/init # Debug options options BUS_DEBUG # enable newbus debugging options DEBUG_VFS_LOCKS # enable VFS lock debugging options SOCKBUF_DEBUG # enable sockbuf last record/mb tail checking options IFMEDIA_DEBUG # enable debugging in net/if_media.c # # Verbose SYSINIT # # Make the SYSINIT process performed by mi_startup() verbose. This is very # useful when porting to a new architecture. If DDB is also enabled, this # will print function names instead of addresses. If defined with a value # of zero, the verbose code is compiled-in but disabled by default, and can # be enabled with the debug.verbose_sysinit=1 tunable. options VERBOSE_SYSINIT ##################################################################### # SYSV IPC KERNEL PARAMETERS # # Maximum number of System V semaphores that can be used on the system at # one time. options SEMMNI=11 # Total number of semaphores system wide options SEMMNS=61 # Total number of undo structures in system options SEMMNU=31 # Maximum number of System V semaphores that can be used by a single process # at one time. options SEMMSL=61 # Maximum number of operations that can be outstanding on a single System V # semaphore at one time. options SEMOPM=101 # Maximum number of undo operations that can be outstanding on a single # System V semaphore at one time. options SEMUME=11 # Maximum number of shared memory pages system wide. options SHMALL=1025 # Maximum size, in bytes, of a single System V shared memory region. options SHMMAX=(SHMMAXPGS*PAGE_SIZE+1) options SHMMAXPGS=1025 # Minimum size, in bytes, of a single System V shared memory region. options SHMMIN=2 # Maximum number of shared memory regions that can be used on the system # at one time. options SHMMNI=33 # Maximum number of System V shared memory regions that can be attached to # a single process at one time. options SHMSEG=9 # Set the amount of time (in seconds) the system will wait before # rebooting automatically when a kernel panic occurs. If set to (-1), # the system will wait indefinitely until a key is pressed on the # console. options PANIC_REBOOT_WAIT_TIME=16 # Attempt to bypass the buffer cache and put data directly into the # userland buffer for read operation when O_DIRECT flag is set on the # file. Both offset and length of the read operation must be # multiples of the physical media sector size. # options DIRECTIO # Specify a lower limit for the number of swap I/O buffers. They are # (among other things) used when bypassing the buffer cache due to # DIRECTIO kernel option enabled and O_DIRECT flag set on file. # options NSWBUF_MIN=120 ##################################################################### # More undocumented options for linting. # Note that documenting these is not considered an affront. options CAM_DEBUG_DELAY # VFS cluster debugging. options CLUSTERDEBUG options DEBUG # Kernel filelock debugging. options LOCKF_DEBUG # System V compatible message queues # Please note that the values provided here are used to test kernel # building. The defaults in the sources provide almost the same numbers. # MSGSSZ must be a power of 2 between 8 and 1024. options MSGMNB=2049 # Max number of chars in queue options MSGMNI=41 # Max number of message queue identifiers options MSGSEG=2049 # Max number of message segments options MSGSSZ=16 # Size of a message segment options MSGTQL=41 # Max number of messages in system options NBUF=512 # Number of buffer headers options SC_DEBUG_LEVEL=5 # Syscons debug level options SC_RENDER_DEBUG # syscons rendering debugging options VFS_BIO_DEBUG # VFS buffer I/O debugging options KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack options KSTACK_USAGE_PROF # Adaptec Array Controller driver options options AAC_DEBUG # Debugging levels: # 0 - quiet, only emit warnings # 1 - noisy, emit major function # points and things done # 2 - extremely noisy, emit trace # items in loops, etc. # Resource Accounting options RACCT # Resource Limits options RCTL # Yet more undocumented options for linting. # BKTR_ALLOC_PAGES has no effect except to cause warnings, and # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the # driver still mostly spells this option BROOKTREE_ALLOC_PAGES. ##options BKTR_ALLOC_PAGES=(217*4+1) options BROOKTREE_ALLOC_PAGES=(217*4+1) options MAXFILES=999 # Random number generator # Allow the CSPRNG algorithm to be loaded as a module. #options RANDOM_LOADABLE # Select this to allow high-rate but potentially expensive # harvesting of Slab-Allocator entropy. In very high-rate # situations the value of doing this is dubious at best. options RANDOM_ENABLE_UMA # slab allocator # Select this to allow high-rate but potentially expensive # harvesting of of the m_next pointer in the mbuf. Note that # the m_next pointer is NULL except when receiving > 4K # jumbo frames or sustained bursts by way of LRO. Thus in # the common case it is stirring zero in to the entropy # pool. In cases where it is not NULL it is pointing to one # of a small (in the thousands to 10s of thousands) number # of 256 byte aligned mbufs. Hence it is, even in the best # case, a poor source of entropy. And in the absence of actual # runtime analysis of entropy collection may mislead the user in # to believe that substantially more entropy is being collected # than in fact is - leading to a different class of security # risk. In high packet rate situations ethernet entropy # collection is also very expensive, possibly leading to as # much as a 50% drop in packets received. # This option is present to maintain backwards compatibility # if desired, however it cannot be recommended for use in any # environment. options RANDOM_ENABLE_ETHER # ether_input # Module to enable execution of application via emulators like QEMU options IMAGACT_BINMISC # zlib I/O stream support # This enables support for compressed core dumps. options GZIO # zstd I/O stream support # This enables support for Zstd compressed core dumps. options ZSTDIO # BHND(4) drivers options BHND_LOGLEVEL # Logging threshold level # evdev interface device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers options EVDEV_DEBUG # enable event debug msgs device uinput # install /dev/uinput cdev options UINPUT_DEBUG # enable uinput debug msgs # Encrypted kernel crash dumps. options EKCD # Serial Peripheral Interface (SPI) support. device spibus # Bus support. device at45d # DataFlash driver device cqspi # device mx25l # SPIFlash driver device n25q # device spigen # Generic access to SPI devices from userland. # Enable legacy /dev/spigenN name aliases for /dev/spigenX.Y devices. options SPIGEN_LEGACY_CDEVNAME # legacy device names for spigen device xz # xz_embedded LZMA de-compression library Index: projects/runtime-coverage-v2/sys/conf/dtb.build.mk =================================================================== --- projects/runtime-coverage-v2/sys/conf/dtb.build.mk (revision 347075) +++ projects/runtime-coverage-v2/sys/conf/dtb.build.mk (revision 347076) @@ -1,77 +1,86 @@ # $FreeBSD$ .include # Grab all the options for a kernel build. For backwards compat, we need to # do this after bsd.own.mk. .include "kern.opts.mk" DTC?= dtc .if !defined(SYSDIR) .if defined(S) SYSDIR= ${S} .else # Search for kernel source tree in standard places. .for _dir in ${.CURDIR}/../.. ${.CURDIR}/../../.. /sys /usr/src/sys .if exists(${_dir}/kern/) SYSDIR= ${_dir:tA} .endif .endfor .endif # defined(S) .endif # defined(SYSDIR) .if !defined(SYSDIR) || !exists(${SYSDIR}/kern/) .error "can't find kernel source tree" .endif -DTB=${DTS:T:R:S/$/.dtb/} +.for _dts in ${DTS} +# DTB for aarch64 needs to preserve the immediate parent of the .dts, because +# these DTS are vendored and should be installed into their vendored directory. +.if ${MACHINE_ARCH} == "aarch64" +DTB+= ${_dts:R:S/$/.dtb/} +.else +DTB+= ${_dts:T:R:S/$/.dtb/} +.endif +.endfor + DTBO=${DTSO:T:R:S/$/.dtbo/} .SUFFIXES: .dtb .dts .dtbo .dtso .PATH.dts: ${SYSDIR}/gnu/dts/${MACHINE} ${SYSDIR}/dts/${MACHINE} .PATH.dtso: ${SYSDIR}/dts/${MACHINE}/overlays .export DTC ECHO .dts.dtb: ${OP_META} @${ECHO} Generating ${.TARGET} from ${.IMPSRC} @${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${.IMPSRC} ${.OBJDIR} .dtso.dtbo: ${OP_META} @${ECHO} Generating ${.TARGET} from ${.IMPSRC} @${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} ${.IMPSRC} ${.OBJDIR} # Add dependencies on the source file so that out-of-tree things can be included # without any .PATH additions. .for _dts in ${DTS} ${FDT_DTS_FILE} ${_dts:R:T}.dtb: ${_dts} .endfor .for _dtso in ${DTSO} ${_dtso:R:T}.dtbo: ${_dtso} .endfor _dtbinstall: # Need to create this because installkernel doesn't invoke mtree with BSD.root.mtree # to make sure the tree is setup properly. We don't recreate it to avoid duplicate # entries in the NO_ROOT case. test -d ${DESTDIR}${DTBDIR} || ${INSTALL} -d -o ${DTBOWN} -g ${DTBGRP} ${DESTDIR}${DTBDIR} .for _dtb in ${DTB} .if ${MACHINE_CPUARCH} == "aarch64" # :H:T here to grab the vendor component of the DTB path in a way that # allows out-of-tree DTS builds, too. We make the assumption that # out-of-tree DTS will have a similar directory structure to in-tree, # with .dts files appearing in a vendor/ directory. test -d ${DESTDIR}${DTBDIR}/${_dtb:H:T} || ${INSTALL} -d -o ${DTBOWN} -g ${DTBGRP} ${DESTDIR}${DTBDIR}/${_dtb:H:T} ${INSTALL} -o ${DTBOWN} -g ${DTBGRP} -m ${DTBMODE} \ ${_INSTALLFLAGS} ${_dtb:T} ${DESTDIR}${DTBDIR}/${_dtb:H:T} .else ${INSTALL} -o ${DTBOWN} -g ${DTBGRP} -m ${DTBMODE} \ ${_INSTALLFLAGS} ${_dtb} ${DESTDIR}${DTBDIR}/ .endif .endfor test -d ${DESTDIR}${DTBODIR} || ${INSTALL} -d -o ${DTBOWN} -g ${DTBGRP} ${DESTDIR}${DTBODIR} .for _dtbo in ${DTBO} ${INSTALL} -o ${DTBOWN} -g ${DTBGRP} -m ${DTBMODE} \ ${_INSTALLFLAGS} ${_dtbo} ${DESTDIR}${DTBODIR}/ .endfor Index: projects/runtime-coverage-v2/sys/conf/files.arm64 =================================================================== --- projects/runtime-coverage-v2/sys/conf/files.arm64 (revision 347075) +++ projects/runtime-coverage-v2/sys/conf/files.arm64 (revision 347076) @@ -1,289 +1,292 @@ # $FreeBSD$ cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S" \ compile-with "${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # cloudabi64_vdso.o optional compat_cloudabi64 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_aarch64.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_aarch64.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi64_vdso.o" # cloudabi64_vdso_blob.o optional compat_cloudabi64 \ dependency "cloudabi64_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi64_vdso_blob.o" # # Allwinner common files arm/allwinner/a10_ehci.c optional ehci aw_ehci fdt arm/allwinner/a10_timer.c optional a10_timer fdt +arm/allwinner/a10_codec.c optional sound a10_codec +arm/allwinner/a31_dmac.c optional a31_dmac +arm/allwinner/sunxi_dma_if.m optional a31_dmac arm/allwinner/aw_cir.c optional evdev aw_cir fdt arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt arm/allwinner/aw_mmc.c optional mmc aw_mmc fdt | mmccam aw_mmc fdt arm/allwinner/aw_nmi.c optional aw_nmi fdt \ compile-with "${NORMAL_C} -I$S/gnu/dts/include" arm/allwinner/aw_pwm.c optional aw_pwm fdt arm/allwinner/aw_rsb.c optional aw_rsb fdt arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_sid.c optional aw_sid nvmem fdt arm/allwinner/aw_spi.c optional aw_spi fdt arm/allwinner/aw_syscon.c optional aw_syscon ext_resources syscon fdt arm/allwinner/aw_thermal.c optional aw_thermal nvmem fdt arm/allwinner/aw_usbphy.c optional ehci aw_usbphy fdt arm/allwinner/aw_wdog.c optional aw_wdog fdt arm/allwinner/axp81x.c optional axp81x fdt arm/allwinner/if_awg.c optional awg ext_resources syscon aw_sid nvmem fdt # Allwinner clock driver arm/allwinner/clkng/aw_ccung.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_nkmp.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_nm.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_prediv_mux.c optional aw_ccu fdt arm/allwinner/clkng/ccu_a64.c optional soc_allwinner_a64 aw_ccu fdt arm/allwinner/clkng/ccu_h3.c optional soc_allwinner_h5 aw_ccu fdt arm/allwinner/clkng/ccu_sun8i_r.c optional aw_ccu fdt arm/allwinner/clkng/ccu_de2.c optional aw_ccu fdt # Allwinner padconf files arm/allwinner/a64/a64_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/a64/a64_r_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/h3/h3_padconf.c optional soc_allwinner_h5 fdt arm/allwinner/h3/h3_r_padconf.c optional soc_allwinner_h5 fdt arm/annapurna/alpine/alpine_ccu.c optional al_ccu fdt arm/annapurna/alpine/alpine_nb_service.c optional al_nb_service fdt arm/annapurna/alpine/alpine_pci.c optional al_pci fdt arm/annapurna/alpine/alpine_pci_msix.c optional al_pci fdt arm/annapurna/alpine/alpine_serdes.c optional al_serdes fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_acpi.c optional acpi arm/arm/gic_fdt.c optional fdt arm/arm/pmu.c standard arm/arm/physmem.c standard arm/broadcom/bcm2835/bcm2835_audio.c optional sound vchiq fdt \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" arm/broadcom/bcm2835/bcm2835_bsc.c optional bcm2835_bsc soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_cpufreq.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_dma.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_fbd.c optional vt soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_ft5406.c optional evdev bcm2835_ft5406 soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_gpio.c optional gpio soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_intr.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_mbox.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_rng.c optional random soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_sdhci.c optional sdhci soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_sdhost.c optional sdhci soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_spi.c optional bcm2835_spi soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_vcio.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_wdog.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2836.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm283x_dwc_fdt.c optional dwcotg fdt soc_brcm_bcm2837 arm/mv/gpio.c optional mv_gpio fdt arm/mv/mvebu_pinctrl.c optional mvebu_pinctrl fdt arm/mv/mv_cp110_icu.c optional mv_cp110_icu fdt arm/mv/mv_ap806_gicp.c optional mv_ap806_gicp fdt arm/mv/mv_ap806_clock.c optional SOC_MARVELL_8K fdt arm/mv/mv_cp110_clock.c optional SOC_MARVELL_8K fdt arm/mv/mv_thermal.c optional SOC_MARVELL_8K mv_thermal fdt arm/mv/armada38x/armada38x_rtc.c optional mv_rtc fdt arm/xilinx/uart_dev_cdnc.c optional uart soc_xilinx_zynq arm64/acpica/acpi_iort.c optional acpi arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/bzero.S standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard arm64/arm64/cpu_errata.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c optional ddb arm64/arm64/disassem.c optional ddb arm64/arm64/dump_machdep.c standard arm64/arm64/efirt_machdep.c optional efirt arm64/arm64/elf32_machdep.c optional compat_freebsd32 arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/freebsd32_machdep.c optional compat_freebsd32 arm64/arm64/gicv3_its.c optional intrng fdt arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_acpi.c optional acpi arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/identcpu.c standard arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/mem.c standard arm64/arm64/memcpy.S standard arm64/arm64/memmove.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pmap.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/uma_machdep.c standard arm64/arm64/undefined.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/cavium/thunder_pcie_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_pem_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci arm64/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 arm64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 arm64/coresight/coresight.c standard arm64/coresight/coresight_if.m standard arm64/coresight/coresight-cmd.c standard arm64/coresight/coresight-cpu-debug.c standard arm64/coresight/coresight-dynamic-replicator.c standard arm64/coresight/coresight-etm4x.c standard arm64/coresight/coresight-funnel.c standard arm64/coresight/coresight-tmc.c standard arm64/qualcomm/qcom_gcc.c optional qcom_gcc fdt contrib/vchiq/interface/compat/vchi_bsd.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_connected.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_core.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_shim.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_util.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" crypto/armv8/armv8_crypto.c optional armv8crypto armv8_crypto_wrap.o optional armv8crypto \ dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8/ ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/acpica/acpi_bus_if.m optional acpi dev/acpica/acpi_if.m optional acpi dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pxm.c optional acpi dev/ahci/ahci_generic.c optional ahci dev/axgbe/if_axgbe.c optional axgbe dev/axgbe/xgbe-desc.c optional axgbe dev/axgbe/xgbe-dev.c optional axgbe dev/axgbe/xgbe-drv.c optional axgbe dev/axgbe/xgbe-mdio.c optional axgbe dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/iicbus/sy8106a.c optional sy8106a fdt dev/iicbus/twsi/mv_twsi.c optional twsi fdt dev/iicbus/twsi/a10_twsi.c optional twsi fdt dev/iicbus/twsi/twsi.c optional twsi fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/mbox/mbox_if.m optional soc_brcm_bcm2837 dev/mmc/host/dwmmc.c optional dwmmc fdt dev/mmc/host/dwmmc_hisi.c optional dwmmc fdt soc_hisi_hi6220 dev/mmc/host/dwmmc_rockchip.c optional dwmmc fdt soc_rockchip_rk3328 dev/neta/if_mvneta_fdt.c optional neta fdt dev/neta/if_mvneta.c optional neta mdio mii dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_acpi.c optional pci acpi dev/pci/pci_host_generic_fdt.c optional pci fdt dev/psci/psci.c standard dev/psci/psci_arm64.S standard dev/psci/smccc.c standard dev/sdhci/sdhci_xenon.c optional sdhci_xenon sdhci fdt dev/uart/uart_cpu_arm64.c optional uart dev/uart/uart_dev_mu.c optional uart uart_mu dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg fdt soc_hisi_hi6220 dev/usb/controller/ehci_mv.c optional ehci_mv fdt dev/usb/controller/generic_ehci.c optional ehci acpi dev/usb/controller/generic_ohci.c optional ohci fdt dev/usb/controller/generic_usb_if.m optional ohci fdt dev/usb/controller/usb_nop_xceiv.c optional fdt ext_resources dev/usb/controller/generic_xhci.c optional xhci fdt dev/vnic/mrml_bridge.c optional vnic fdt dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional vnic fdt dev/vnic/thunder_bgx.c optional vnic pci dev/vnic/thunder_mdio_fdt.c optional vnic fdt dev/vnic/thunder_mdio.c optional vnic dev/vnic/lmac_if.m optional inet | inet6 | vnic kern/kern_clocksource.c standard kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memcmp.c standard libkern/memset.c standard libkern/arm64/crc32c_armv8.S standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" # RockChip Drivers arm64/rockchip/rk_i2c.c optional fdt rk_i2c soc_rockchip_rk3328 | fdt rk_i2c soc_rockchip_rk3399 arm64/rockchip/rk805.c optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 arm64/rockchip/rk_grf.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/rk_pinctrl.c optional fdt rk_pinctrl soc_rockchip_rk3328 | fdt rk_pinctrl soc_rockchip_rk3399 arm64/rockchip/rk_gpio.c optional fdt rk_gpio soc_rockchip_rk3328 | fdt rk_gpio soc_rockchip_rk3399 arm64/rockchip/if_dwc_rk.c optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399 dev/dwc/if_dwc.c optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399 dev/dwc/if_dwc_if.m optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399 # RockChip Clock support arm64/rockchip/clk/rk_cru.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk_clk_armclk.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk_clk_composite.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk_clk_gate.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk_clk_mux.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk_clk_pll.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk3328_cru.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk3399_cru.c optional fdt soc_rockchip_rk3399 arm64/rockchip/clk/rk3399_pmucru.c optional fdt soc_rockchip_rk3399 Index: projects/runtime-coverage-v2/sys/conf/options =================================================================== --- projects/runtime-coverage-v2/sys/conf/options (revision 347075) +++ projects/runtime-coverage-v2/sys/conf/options (revision 347076) @@ -1,1025 +1,1026 @@ # $FreeBSD$ # # On the handling of kernel options # # All kernel options should be listed in NOTES, with suitable # descriptions. Negative options (options that make some code not # compile) should be commented out; LINT (generated from NOTES) should # compile as much code as possible. Try to structure option-using # code so that a single option only switch code on, or only switch # code off, to make it possible to have a full compile-test. If # necessary, you can check for COMPILING_LINT to get maximum code # coverage. # # All new options shall also be listed in either "conf/options" or # "conf/options.". Options that affect a single source-file # .[c|s] should be directed into "opt_.h", while options # that affect multiple files should either go in "opt_global.h" if # this is a kernel-wide option (used just about everywhere), or in # "opt_.h" if it affects only some files. # Note that the effect of listing only an option without a # header-file-name in conf/options (and cousins) is that the last # convention is followed. # # This handling scheme is not yet fully implemented. # # # Format of this file: # Option name filename # # If filename is missing, the default is # opt_.h AAC_DEBUG opt_aac.h AACRAID_DEBUG opt_aacraid.h AHC_ALLOW_MEMIO opt_aic7xxx.h AHC_TMODE_ENABLE opt_aic7xxx.h AHC_DUMP_EEPROM opt_aic7xxx.h AHC_DEBUG opt_aic7xxx.h AHC_DEBUG_OPTS opt_aic7xxx.h AHC_REG_PRETTY_PRINT opt_aic7xxx.h AHD_DEBUG opt_aic79xx.h AHD_DEBUG_OPTS opt_aic79xx.h AHD_TMODE_ENABLE opt_aic79xx.h AHD_REG_PRETTY_PRINT opt_aic79xx.h TWA_DEBUG opt_twa.h # Debugging options. ALT_BREAK_TO_DEBUGGER opt_kdb.h BREAK_TO_DEBUGGER opt_kdb.h BUF_TRACKING opt_global.h DDB DDB_BUFR_SIZE opt_ddb.h DDB_CAPTURE_DEFAULTBUFSIZE opt_ddb.h DDB_CAPTURE_MAXBUFSIZE opt_ddb.h DDB_CTF opt_ddb.h DDB_NUMSYM opt_ddb.h FULL_BUF_TRACKING opt_global.h GDB KDB opt_global.h KDB_TRACE opt_kdb.h KDB_UNATTENDED opt_kdb.h KLD_DEBUG opt_kld.h SYSCTL_DEBUG opt_sysctl.h EARLY_PRINTF opt_global.h TEXTDUMP_PREFERRED opt_ddb.h TEXTDUMP_VERBOSE opt_ddb.h NUM_CORE_FILES opt_global.h TSLOG opt_global.h TSLOGSIZE opt_global.h # Miscellaneous options. ALQ ALTERA_SDCARD_FAST_SIM opt_altera_sdcard.h ATSE_CFI_HACK opt_cfi.h AUDIT opt_global.h BOOTHOWTO opt_global.h BOOTVERBOSE opt_global.h CALLOUT_PROFILING CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h COMPAT_43 opt_global.h COMPAT_43TTY opt_global.h COMPAT_FREEBSD4 opt_global.h COMPAT_FREEBSD5 opt_global.h COMPAT_FREEBSD6 opt_global.h COMPAT_FREEBSD7 opt_global.h COMPAT_FREEBSD9 opt_global.h COMPAT_FREEBSD10 opt_global.h COMPAT_FREEBSD11 opt_global.h +COMPAT_FREEBSD12 opt_global.h COMPAT_CLOUDABI32 opt_dontuse.h COMPAT_CLOUDABI64 opt_dontuse.h COMPAT_LINUXKPI opt_dontuse.h _COMPAT_LINUX32 opt_compat.h # XXX: make sure opt_compat.h exists COMPILING_LINT opt_global.h CY_PCI_FASTINTR DEADLKRES opt_watchdog.h EXPERIMENTAL opt_global.h EXT_RESOURCES opt_global.h DIRECTIO FILEMON opt_dontuse.h FFCLOCK FULL_PREEMPTION opt_sched.h GZIO opt_gzio.h IMAGACT_BINMISC opt_dontuse.h IPI_PREEMPTION opt_sched.h GEOM_BDE opt_geom.h GEOM_BSD opt_geom.h GEOM_CACHE opt_geom.h GEOM_CONCAT opt_geom.h GEOM_ELI opt_geom.h GEOM_FOX opt_geom.h GEOM_GATE opt_geom.h GEOM_JOURNAL opt_geom.h GEOM_LABEL opt_geom.h GEOM_LABEL_GPT opt_geom.h GEOM_LINUX_LVM opt_geom.h GEOM_MAP opt_geom.h GEOM_MBR opt_geom.h GEOM_MIRROR opt_geom.h GEOM_MOUNTVER opt_geom.h GEOM_MULTIPATH opt_geom.h GEOM_NOP opt_geom.h GEOM_PART_APM opt_geom.h GEOM_PART_BSD opt_geom.h GEOM_PART_BSD64 opt_geom.h GEOM_PART_EBR opt_geom.h GEOM_PART_EBR_COMPAT opt_geom.h GEOM_PART_GPT opt_geom.h GEOM_PART_LDM opt_geom.h GEOM_PART_MBR opt_geom.h GEOM_PART_VTOC8 opt_geom.h GEOM_RAID opt_geom.h GEOM_RAID3 opt_geom.h GEOM_SHSEC opt_geom.h GEOM_STRIPE opt_geom.h GEOM_SUNLABEL opt_geom.h GEOM_UZIP opt_geom.h GEOM_UZIP_DEBUG opt_geom.h GEOM_VINUM opt_geom.h GEOM_VIRSTOR opt_geom.h GEOM_VOL opt_geom.h GEOM_ZERO opt_geom.h IFLIB opt_iflib.h KDTRACE_HOOKS opt_global.h KDTRACE_FRAME opt_kdtrace.h KN_HASHSIZE opt_kqueue.h KSTACK_MAX_PAGES KSTACK_PAGES KSTACK_USAGE_PROF KTRACE KTRACE_REQUEST_POOL opt_ktrace.h LIBICONV MAC opt_global.h MAC_BIBA opt_dontuse.h MAC_BSDEXTENDED opt_dontuse.h MAC_IFOFF opt_dontuse.h MAC_LOMAC opt_dontuse.h MAC_MLS opt_dontuse.h MAC_NONE opt_dontuse.h MAC_NTPD opt_dontuse.h MAC_PARTITION opt_dontuse.h MAC_PORTACL opt_dontuse.h MAC_SEEOTHERUIDS opt_dontuse.h MAC_STATIC opt_mac.h MAC_STUB opt_dontuse.h MAC_TEST opt_dontuse.h MAC_VERIEXEC opt_dontuse.h MAC_VERIEXEC_SHA1 opt_dontuse.h MAC_VERIEXEC_SHA256 opt_dontuse.h MAC_VERIEXEC_SHA384 opt_dontuse.h MAC_VERIEXEC_SHA512 opt_dontuse.h MD_ROOT opt_md.h MD_ROOT_FSTYPE opt_md.h MD_ROOT_READONLY opt_md.h MD_ROOT_SIZE opt_md.h MD_ROOT_MEM opt_md.h MFI_DEBUG opt_mfi.h MFI_DECODE_LOG opt_mfi.h MPROF_BUFFERS opt_mprof.h MPROF_HASH_SIZE opt_mprof.h NEW_PCIB opt_global.h NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h NO_ADAPTIVE_RWLOCKS NO_ADAPTIVE_SX NO_EVENTTIMERS opt_timer.h NO_OBSOLETE_CODE opt_global.h NO_SYSCTL_DESCR opt_global.h NSWBUF_MIN opt_param.h MBUF_PACKET_ZONE_DISABLE opt_global.h PANIC_REBOOT_WAIT_TIME opt_panic.h PCI_HP opt_pci.h PCI_IOV opt_global.h PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h PPS_SYNC opt_ntp.h PREEMPTION opt_sched.h QUOTA SCHED_4BSD opt_sched.h SCHED_STATS opt_sched.h SCHED_ULE opt_sched.h SLEEPQUEUE_PROFILING SLHCI_DEBUG opt_slhci.h STACK opt_stack.h SUIDDIR MSGMNB opt_sysvipc.h MSGMNI opt_sysvipc.h MSGSEG opt_sysvipc.h MSGSSZ opt_sysvipc.h MSGTQL opt_sysvipc.h SEMMNI opt_sysvipc.h SEMMNS opt_sysvipc.h SEMMNU opt_sysvipc.h SEMMSL opt_sysvipc.h SEMOPM opt_sysvipc.h SEMUME opt_sysvipc.h SHMALL opt_sysvipc.h SHMMAX opt_sysvipc.h SHMMAXPGS opt_sysvipc.h SHMMIN opt_sysvipc.h SHMMNI opt_sysvipc.h SHMSEG opt_sysvipc.h SYSVMSG opt_sysvipc.h SYSVSEM opt_sysvipc.h SYSVSHM opt_sysvipc.h SW_WATCHDOG opt_watchdog.h TCPHPTS opt_inet.h TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINS opt_global.h VERBOSE_SYSINIT ZSTDIO opt_zstdio.h # Sanitizers COVERAGE opt_global.h KCOV KUBSAN opt_global.h # POSIX kernel options P1003_1B_MQUEUE opt_posix.h P1003_1B_SEMAPHORES opt_posix.h _KPOSIX_PRIORITY_SCHEDULING opt_posix.h # Do we want the config file compiled into the kernel? INCLUDE_CONFIG_FILE opt_config.h # Options for static filesystems. These should only be used at config # time, since the corresponding lkms cannot work if there are any static # dependencies. Unusability is enforced by hiding the defines for the # options in a never-included header. AUTOFS opt_dontuse.h CD9660 opt_dontuse.h EXT2FS opt_dontuse.h FDESCFS opt_dontuse.h FFS opt_dontuse.h FUSEFS opt_dontuse.h MSDOSFS opt_dontuse.h NANDFS opt_dontuse.h NULLFS opt_dontuse.h PROCFS opt_dontuse.h PSEUDOFS opt_dontuse.h SMBFS opt_dontuse.h TMPFS opt_dontuse.h UDF opt_dontuse.h UNIONFS opt_dontuse.h ZFS opt_dontuse.h # Pseudofs debugging PSEUDOFS_TRACE opt_pseudofs.h # In-kernel GSS-API KGSSAPI opt_kgssapi.h KGSSAPI_DEBUG opt_kgssapi.h # These static filesystems have one slightly bogus static dependency in # sys/i386/i386/autoconf.c. If any of these filesystems are # statically compiled into the kernel, code for mounting them as root # filesystems will be enabled - but look below. # NFSCL - client # NFSD - server NFSCL opt_nfs.h NFSD opt_nfs.h # filesystems and libiconv bridge CD9660_ICONV opt_dontuse.h MSDOSFS_ICONV opt_dontuse.h UDF_ICONV opt_dontuse.h # If you are following the conditions in the copyright, # you can enable soft-updates which will speed up a lot of thigs # and make the system safer from crashes at the same time. # otherwise a STUB module will be compiled in. SOFTUPDATES opt_ffs.h # On small, embedded systems, it can be useful to turn off support for # snapshots. It saves about 30-40k for a feature that would be lightly # used, if it is used at all. NO_FFS_SNAPSHOT opt_ffs.h # Enabling this option turns on support for Access Control Lists in UFS, # which can be used to support high security configurations. Depends on # UFS_EXTATTR. UFS_ACL opt_ufs.h # Enabling this option turns on support for extended attributes in UFS-based # filesystems, which can be used to support high security configurations # as well as new filesystem features. UFS_EXTATTR opt_ufs.h UFS_EXTATTR_AUTOSTART opt_ufs.h # Enable fast hash lookups for large directories on UFS-based filesystems. UFS_DIRHASH opt_ufs.h # Enable gjournal-based UFS journal. UFS_GJOURNAL opt_ufs.h # The below sentence is not in English, and neither is this one. # We plan to remove the static dependences above, with a # _ROOT option to control if it usable as root. This list # allows these options to be present in config files already (though # they won't make any difference yet). NFS_ROOT opt_nfsroot.h # SMB/CIFS requester NETSMB opt_netsmb.h # Enable netdump(4) client support. NETDUMP opt_global.h # Options used only in subr_param.c. HZ opt_param.h MAXFILES opt_param.h NBUF opt_param.h NSFBUFS opt_param.h VM_BCACHE_SIZE_MAX opt_param.h VM_SWZONE_SIZE_MAX opt_param.h MAXUSERS DFLDSIZ opt_param.h MAXDSIZ opt_param.h MAXSSIZ opt_param.h # Generic SCSI options. CAM_MAX_HIGHPOWER opt_cam.h CAMDEBUG opt_cam.h CAM_DEBUG_COMPILE opt_cam.h CAM_DEBUG_DELAY opt_cam.h CAM_DEBUG_BUS opt_cam.h CAM_DEBUG_TARGET opt_cam.h CAM_DEBUG_LUN opt_cam.h CAM_DEBUG_FLAGS opt_cam.h CAM_BOOT_DELAY opt_cam.h CAM_IOSCHED_DYNAMIC opt_cam.h CAM_TEST_FAILURE opt_cam.h SCSI_DELAY opt_scsi.h SCSI_NO_SENSE_STRINGS opt_scsi.h SCSI_NO_OP_STRINGS opt_scsi.h # Options used only in cam/ata/ata_da.c ATA_STATIC_ID opt_ada.h # Options used only in cam/scsi/scsi_cd.c CHANGER_MIN_BUSY_SECONDS opt_cd.h CHANGER_MAX_BUSY_SECONDS opt_cd.h # Options used only in cam/scsi/scsi_da.c DA_TRACK_REFS opt_da.h # Options used only in cam/scsi/scsi_sa.c. SA_IO_TIMEOUT opt_sa.h SA_SPACE_TIMEOUT opt_sa.h SA_REWIND_TIMEOUT opt_sa.h SA_ERASE_TIMEOUT opt_sa.h SA_1FM_AT_EOD opt_sa.h # Options used only in cam/scsi/scsi_pt.c SCSI_PT_DEFAULT_TIMEOUT opt_pt.h # Options used only in cam/scsi/scsi_ses.c SES_ENABLE_PASSTHROUGH opt_ses.h # Options used in dev/sym/ (Symbios SCSI driver). SYM_SETUP_SCSI_DIFF opt_sym.h #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 SYM_SETUP_PCI_PARITY opt_sym.h #-PCI parity checking # disabled:0, enabled:1 (default) SYM_SETUP_MAX_LUN opt_sym.h #-Number of LUNs supported # default:8, range:[1..64] # Options used only in dev/isp/* ISP_TARGET_MODE opt_isp.h ISP_FW_CRASH_DUMP opt_isp.h ISP_DEFAULT_ROLES opt_isp.h ISP_INTERNAL_TARGET opt_isp.h ISP_FCTAPE_OFF opt_isp.h # Options used only in dev/iscsi ISCSI_INITIATOR_DEBUG opt_iscsi_initiator.h # Net stuff. ACCEPT_FILTER_DATA ACCEPT_FILTER_DNS ACCEPT_FILTER_HTTP ALTQ opt_global.h ALTQ_CBQ opt_altq.h ALTQ_CDNR opt_altq.h ALTQ_CODEL opt_altq.h ALTQ_DEBUG opt_altq.h ALTQ_HFSC opt_altq.h ALTQ_FAIRQ opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_PRIQ opt_altq.h ALTQ_RED opt_altq.h ALTQ_RIO opt_altq.h BOOTP opt_bootp.h BOOTP_BLOCKSIZE opt_bootp.h BOOTP_COMPAT opt_bootp.h BOOTP_NFSROOT opt_bootp.h BOOTP_NFSV3 opt_bootp.h BOOTP_WIRED_TO opt_bootp.h DEVICE_POLLING DUMMYNET opt_ipdn.h RATELIMIT opt_ratelimit.h RATELIMIT_DEBUG opt_ratelimit.h INET opt_inet.h INET6 opt_inet6.h IPDIVERT IPFILTER opt_ipfilter.h IPFILTER_DEFAULT_BLOCK opt_ipfilter.h IPFILTER_LOG opt_ipfilter.h IPFILTER_LOOKUP opt_ipfilter.h IPFIREWALL opt_ipfw.h IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h IPFIREWALL_NAT opt_ipfw.h IPFIREWALL_NAT64 opt_ipfw.h IPFIREWALL_NPTV6 opt_ipfw.h IPFIREWALL_VERBOSE opt_ipfw.h IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPFIREWALL_PMOD opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h IPSTEALTH KRPC LIBALIAS LIBMCHAIN MBUF_PROFILING MBUF_STRESS_TEST MROUTING opt_mrouting.h NFSLOCKD PCBGROUP opt_pcbgroup.h PF_DEFAULT_TO_DROP opt_pf.h RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h RSS opt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCPPCAP opt_global.h SIFTR TCP_BLACKBOX opt_global.h TCP_HHOOK opt_inet.h TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_RFC7413 opt_inet.h TCP_RFC7413_MAX_KEYS opt_inet.h TCP_RFC7413_MAX_PSKS opt_inet.h TCP_SIGNATURE opt_ipsec.h VLAN_ARRAY opt_vlan.h XBONEHACK # # SCTP # SCTP opt_sctp.h SCTP_DEBUG opt_sctp.h # Enable debug printfs SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free SCTP_MBCNT_LOGGING opt_sctp.h # Log to KTR mbcnt activity SCTP_PACKET_LOGGING opt_sctp.h # Log to a packet buffer last N packets SCTP_LTRACE_CHUNKS opt_sctp.h # Log to KTR chunks processed SCTP_LTRACE_ERRORS opt_sctp.h # Log to KTR error returns. SCTP_USE_PERCPU_STAT opt_sctp.h # Use per cpu stats. SCTP_MCORE_INPUT opt_sctp.h # Have multiple input threads for input mbufs SCTP_LOCAL_TRACE_BUF opt_sctp.h # Use tracebuffer exported via sysctl SCTP_DETAILED_STR_STATS opt_sctp.h # Use per PR-SCTP policy stream stats # # # # Netgraph(4). Use option NETGRAPH to enable the base netgraph code. # Each netgraph node type can be either be compiled into the kernel # or loaded dynamically. To get the former, include the corresponding # option below. Each type has its own man page, e.g. ng_async(4). NETGRAPH NETGRAPH_DEBUG opt_netgraph.h NETGRAPH_ASYNC opt_netgraph.h NETGRAPH_ATMLLC opt_netgraph.h NETGRAPH_ATM_ATMPIF opt_netgraph.h NETGRAPH_BLUETOOTH opt_netgraph.h NETGRAPH_BLUETOOTH_BT3C opt_netgraph.h NETGRAPH_BLUETOOTH_H4 opt_netgraph.h NETGRAPH_BLUETOOTH_HCI opt_netgraph.h NETGRAPH_BLUETOOTH_L2CAP opt_netgraph.h NETGRAPH_BLUETOOTH_SOCKET opt_netgraph.h NETGRAPH_BLUETOOTH_UBT opt_netgraph.h NETGRAPH_BLUETOOTH_UBTBCMFW opt_netgraph.h NETGRAPH_BPF opt_netgraph.h NETGRAPH_BRIDGE opt_netgraph.h NETGRAPH_CAR opt_netgraph.h NETGRAPH_CHECKSUM opt_netgraph.h NETGRAPH_CISCO opt_netgraph.h NETGRAPH_DEFLATE opt_netgraph.h NETGRAPH_DEVICE opt_netgraph.h NETGRAPH_ECHO opt_netgraph.h NETGRAPH_EIFACE opt_netgraph.h NETGRAPH_ETHER opt_netgraph.h NETGRAPH_ETHER_ECHO opt_netgraph.h NETGRAPH_FEC opt_netgraph.h NETGRAPH_FRAME_RELAY opt_netgraph.h NETGRAPH_GIF opt_netgraph.h NETGRAPH_GIF_DEMUX opt_netgraph.h NETGRAPH_HOLE opt_netgraph.h NETGRAPH_IFACE opt_netgraph.h NETGRAPH_IP_INPUT opt_netgraph.h NETGRAPH_IPFW opt_netgraph.h NETGRAPH_KSOCKET opt_netgraph.h NETGRAPH_L2TP opt_netgraph.h NETGRAPH_LMI opt_netgraph.h NETGRAPH_MPPC_COMPRESSION opt_netgraph.h NETGRAPH_MPPC_ENCRYPTION opt_netgraph.h NETGRAPH_NAT opt_netgraph.h NETGRAPH_NETFLOW opt_netgraph.h NETGRAPH_ONE2MANY opt_netgraph.h NETGRAPH_PATCH opt_netgraph.h NETGRAPH_PIPE opt_netgraph.h NETGRAPH_PPP opt_netgraph.h NETGRAPH_PPPOE opt_netgraph.h NETGRAPH_PPTPGRE opt_netgraph.h NETGRAPH_PRED1 opt_netgraph.h NETGRAPH_RFC1490 opt_netgraph.h NETGRAPH_SOCKET opt_netgraph.h NETGRAPH_SPLIT opt_netgraph.h NETGRAPH_SPPP opt_netgraph.h NETGRAPH_TAG opt_netgraph.h NETGRAPH_TCPMSS opt_netgraph.h NETGRAPH_TEE opt_netgraph.h NETGRAPH_TTY opt_netgraph.h NETGRAPH_UI opt_netgraph.h NETGRAPH_VJC opt_netgraph.h NETGRAPH_VLAN opt_netgraph.h # NgATM options NGATM_ATM opt_netgraph.h NGATM_ATMBASE opt_netgraph.h NGATM_SSCOP opt_netgraph.h NGATM_SSCFU opt_netgraph.h NGATM_UNI opt_netgraph.h NGATM_CCATM opt_netgraph.h # DRM options DRM_DEBUG opt_drm.h TI_SF_BUF_JUMBO opt_ti.h TI_JUMBO_HDRSPLIT opt_ti.h # Misc debug flags. Most of these should probably be replaced with # 'DEBUG', and then let people recompile just the interesting modules # with 'make CC="cc -DDEBUG"'. CLUSTERDEBUG opt_debug_cluster.h DEBUG_1284 opt_ppb_1284.h VP0_DEBUG opt_vpo.h LPT_DEBUG opt_lpt.h PLIP_DEBUG opt_plip.h LOCKF_DEBUG opt_debug_lockf.h SI_DEBUG opt_debug_si.h IFMEDIA_DEBUG opt_ifmedia.h # Fb options FB_DEBUG opt_fb.h FB_INSTALL_CDEV opt_fb.h # ppbus related options PERIPH_1284 opt_ppb_1284.h DONTPROBE_1284 opt_ppb_1284.h # smbus related options ENABLE_ALART opt_intpm.h # These cause changes all over the kernel BLKDEV_IOSIZE opt_global.h BURN_BRIDGES opt_global.h DEBUG opt_global.h DEBUG_LOCKS opt_global.h DEBUG_VFS_LOCKS opt_global.h DFLTPHYS opt_global.h DIAGNOSTIC opt_global.h INVARIANT_SUPPORT opt_global.h INVARIANTS opt_global.h KASSERT_PANIC_OPTIONAL opt_global.h MAXCPU opt_global.h MAXMEMDOM opt_global.h MAXPHYS opt_global.h MCLSHIFT opt_global.h MUTEX_NOINLINE opt_global.h LOCK_PROFILING opt_global.h LOCK_PROFILING_FAST opt_global.h MSIZE opt_global.h REGRESSION opt_global.h RWLOCK_NOINLINE opt_global.h SX_NOINLINE opt_global.h VFS_BIO_DEBUG opt_global.h # These are VM related options VM_KMEM_SIZE opt_vm.h VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h VM_NRESERVLEVEL opt_vm.h VM_LEVEL_0_ORDER opt_vm.h NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h MALLOC_DEBUG_MAXZONES opt_vm.h # The MemGuard replacement allocator used for tamper-after-free detection DEBUG_MEMGUARD opt_vm.h # The RedZone malloc(9) protection DEBUG_REDZONE opt_vm.h # Standard SMP options EARLY_AP_STARTUP opt_global.h SMP opt_global.h NUMA opt_global.h # Size of the kernel message buffer MSGBUF_SIZE opt_msgbuf.h # NFS options NFS_MINATTRTIMO opt_nfs.h NFS_MAXATTRTIMO opt_nfs.h NFS_MINDIRATTRTIMO opt_nfs.h NFS_MAXDIRATTRTIMO opt_nfs.h NFS_DEBUG opt_nfs.h # TMPFS options TMPFS_PAGES_MINRESERVED opt_tmpfs.h # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver OVERRIDE_CARD opt_bktr.h OVERRIDE_TUNER opt_bktr.h OVERRIDE_DBX opt_bktr.h OVERRIDE_MSP opt_bktr.h BROOKTREE_SYSTEM_DEFAULT opt_bktr.h BROOKTREE_ALLOC_PAGES opt_bktr.h BKTR_OVERRIDE_CARD opt_bktr.h BKTR_OVERRIDE_TUNER opt_bktr.h BKTR_OVERRIDE_DBX opt_bktr.h BKTR_OVERRIDE_MSP opt_bktr.h BKTR_SYSTEM_DEFAULT opt_bktr.h BKTR_ALLOC_PAGES opt_bktr.h BKTR_USE_PLL opt_bktr.h BKTR_GPIO_ACCESS opt_bktr.h BKTR_NO_MSP_RESET opt_bktr.h BKTR_430_FX_MODE opt_bktr.h BKTR_SIS_VIA_MODE opt_bktr.h BKTR_USE_FREEBSD_SMBUS opt_bktr.h BKTR_NEW_MSP34XX_DRIVER opt_bktr.h # Options for uart(4) UART_PPS_ON_CTS opt_uart.h UART_POLL_FREQ opt_uart.h UART_DEV_TOLERANCE_PCT opt_uart.h # options for bus/device framework BUS_DEBUG opt_bus.h # options for USB support USB_DEBUG opt_usb.h USB_HOST_ALIGN opt_usb.h USB_REQ_DEBUG opt_usb.h USB_TEMPLATE opt_usb.h USB_VERBOSE opt_usb.h USB_DMA_SINGLE_ALLOC opt_usb.h USB_EHCI_BIG_ENDIAN_DESC opt_usb.h U3G_DEBUG opt_u3g.h UKBD_DFLT_KEYMAP opt_ukbd.h UPLCOM_INTR_INTERVAL opt_uplcom.h UVSCOM_DEFAULT_OPKTSIZE opt_uvscom.h UVSCOM_INTR_INTERVAL opt_uvscom.h # options for the Realtek rtwn driver RTWN_DEBUG opt_rtwn.h RTWN_WITHOUT_UCODE opt_rtwn.h # Embedded system options INIT_PATH ROOTDEVNAME FDC_DEBUG opt_fdc.h PCFCLOCK_VERBOSE opt_pcfclock.h PCFCLOCK_MAX_RETRIES opt_pcfclock.h KTR opt_global.h KTR_ALQ opt_ktr.h KTR_MASK opt_ktr.h KTR_CPUMASK opt_ktr.h KTR_COMPILE opt_global.h KTR_BOOT_ENTRIES opt_global.h KTR_ENTRIES opt_global.h KTR_VERBOSE opt_ktr.h WITNESS opt_global.h WITNESS_KDB opt_witness.h WITNESS_NO_VNODE opt_witness.h WITNESS_SKIPSPIN opt_witness.h WITNESS_COUNT opt_witness.h OPENSOLARIS_WITNESS opt_global.h # options for ACPI support ACPI_DEBUG opt_acpi.h ACPI_MAX_TASKS opt_acpi.h ACPI_MAX_THREADS opt_acpi.h ACPI_DMAR opt_acpi.h DEV_ACPI opt_acpi.h # ISA support DEV_ISA opt_isa.h ISAPNP opt_isa.h # various 'device presence' options. DEV_BPF opt_bpf.h DEV_CARP opt_carp.h DEV_NETMAP opt_global.h DEV_PCI opt_pci.h DEV_PF opt_pf.h DEV_PFLOG opt_pf.h DEV_PFSYNC opt_pf.h DEV_RANDOM opt_global.h DEV_SPLASH opt_splash.h DEV_VLAN opt_vlan.h # ed driver ED_HPP opt_ed.h ED_3C503 opt_ed.h ED_SIC opt_ed.h # bce driver BCE_DEBUG opt_bce.h BCE_NVRAM_WRITE_SUPPORT opt_bce.h SOCKBUF_DEBUG opt_global.h # options for ubsec driver UBSEC_DEBUG opt_ubsec.h UBSEC_RNDTEST opt_ubsec.h UBSEC_NO_RNG opt_ubsec.h # options for hifn driver HIFN_DEBUG opt_hifn.h HIFN_RNDTEST opt_hifn.h # options for safenet driver SAFE_DEBUG opt_safe.h SAFE_NO_RNG opt_safe.h SAFE_RNDTEST opt_safe.h # syscons/vt options MAXCONS opt_syscons.h SC_ALT_MOUSE_IMAGE opt_syscons.h SC_CUT_SPACES2TABS opt_syscons.h SC_CUT_SEPCHARS opt_syscons.h SC_DEBUG_LEVEL opt_syscons.h SC_DFLT_FONT opt_syscons.h SC_DFLT_TERM opt_syscons.h SC_DISABLE_KDBKEY opt_syscons.h SC_DISABLE_REBOOT opt_syscons.h SC_HISTORY_SIZE opt_syscons.h SC_KERNEL_CONS_ATTR opt_syscons.h SC_KERNEL_CONS_ATTRS opt_syscons.h SC_KERNEL_CONS_REV_ATTR opt_syscons.h SC_MOUSE_CHAR opt_syscons.h SC_NO_CUTPASTE opt_syscons.h SC_NO_FONT_LOADING opt_syscons.h SC_NO_HISTORY opt_syscons.h SC_NO_MODE_CHANGE opt_syscons.h SC_NO_SUSPEND_VTYSWITCH opt_syscons.h SC_NO_SYSMOUSE opt_syscons.h SC_NO_TERM_DUMB opt_syscons.h SC_NO_TERM_SC opt_syscons.h SC_NO_TERM_TEKEN opt_syscons.h SC_NORM_ATTR opt_syscons.h SC_NORM_REV_ATTR opt_syscons.h SC_PIXEL_MODE opt_syscons.h SC_RENDER_DEBUG opt_syscons.h SC_TWOBUTTON_MOUSE opt_syscons.h VT_ALT_TO_ESC_HACK opt_syscons.h VT_FB_DEFAULT_WIDTH opt_syscons.h VT_FB_DEFAULT_HEIGHT opt_syscons.h VT_MAXWINDOWS opt_syscons.h VT_TWOBUTTON_MOUSE opt_syscons.h DEV_SC opt_syscons.h DEV_VT opt_syscons.h # teken terminal emulator options TEKEN_CONS25 opt_teken.h TEKEN_UTF8 opt_teken.h TERMINAL_KERN_ATTR opt_teken.h TERMINAL_NORM_ATTR opt_teken.h # options for printf PRINTF_BUFR_SIZE opt_printf.h BOOT_TAG opt_printf.h BOOT_TAG_SZ opt_printf.h # kbd options KBD_DISABLE_KEYMAP_LOAD opt_kbd.h KBD_INSTALL_CDEV opt_kbd.h KBD_MAXRETRY opt_kbd.h KBD_MAXWAIT opt_kbd.h KBD_RESETDELAY opt_kbd.h KBDIO_DEBUG opt_kbd.h KBDMUX_DFLT_KEYMAP opt_kbdmux.h # options for the Atheros driver ATH_DEBUG opt_ath.h ATH_TXBUF opt_ath.h ATH_RXBUF opt_ath.h ATH_DIAGAPI opt_ath.h ATH_TX99_DIAG opt_ath.h ATH_ENABLE_11N opt_ath.h ATH_ENABLE_DFS opt_ath.h ATH_EEPROM_FIRMWARE opt_ath.h ATH_ENABLE_RADIOTAP_VENDOR_EXT opt_ath.h ATH_DEBUG_ALQ opt_ath.h ATH_KTR_INTR_DEBUG opt_ath.h # options for the Atheros hal # XXX For now, this breaks non-AR9130 chipsets, so only use it # XXX when actually targeting AR9130. AH_SUPPORT_AR9130 opt_ah.h # This is required for AR933x SoC support AH_SUPPORT_AR9330 opt_ah.h AH_SUPPORT_AR9340 opt_ah.h AH_SUPPORT_QCA9530 opt_ah.h AH_SUPPORT_QCA9550 opt_ah.h AH_DEBUG opt_ah.h AH_ASSERT opt_ah.h AH_DEBUG_ALQ opt_ah.h AH_REGOPS_FUNC opt_ah.h AH_WRITE_REGDOMAIN opt_ah.h AH_DEBUG_COUNTRY opt_ah.h AH_WRITE_EEPROM opt_ah.h AH_PRIVATE_DIAG opt_ah.h AH_NEED_DESC_SWAP opt_ah.h AH_USE_INIPDGAIN opt_ah.h AH_MAXCHAN opt_ah.h AH_RXCFG_SDMAMW_4BYTES opt_ah.h AH_INTERRUPT_DEBUGGING opt_ah.h # AR5416 and later interrupt mitigation # XXX do not use this for AR9130 AH_AR5416_INTERRUPT_MITIGATION opt_ah.h # options for the Broadcom BCM43xx driver (bwi) BWI_DEBUG opt_bwi.h BWI_DEBUG_VERBOSE opt_bwi.h # options for the Brodacom BCM43xx driver (bwn) BWN_DEBUG opt_bwn.h BWN_GPL_PHY opt_bwn.h BWN_USE_SIBA opt_bwn.h # Options for the SIBA driver SIBA_DEBUG opt_siba.h # options for the Marvell 8335 wireless driver MALO_DEBUG opt_malo.h MALO_TXBUF opt_malo.h MALO_RXBUF opt_malo.h # options for the Marvell wireless driver MWL_DEBUG opt_mwl.h MWL_TXBUF opt_mwl.h MWL_RXBUF opt_mwl.h MWL_DIAGAPI opt_mwl.h MWL_AGGR_SIZE opt_mwl.h MWL_TX_NODROP opt_mwl.h # Options for the Marvell NETA driver MVNETA_MULTIQUEUE opt_mvneta.h MVNETA_KTR opt_mvneta.h # Options for the Intel 802.11ac wireless driver IWM_DEBUG opt_iwm.h # Options for the Intel 802.11n wireless driver IWN_DEBUG opt_iwn.h # Options for the Intel 3945ABG wireless driver WPI_DEBUG opt_wpi.h # dcons options DCONS_BUF_SIZE opt_dcons.h DCONS_POLL_HZ opt_dcons.h DCONS_FORCE_CONSOLE opt_dcons.h DCONS_FORCE_GDB opt_dcons.h # HWPMC options HWPMC_DEBUG opt_global.h HWPMC_HOOKS HWPMC_MIPS_BACKTRACE opt_hwpmc_hooks.h # 802.11 support layer IEEE80211_DEBUG opt_wlan.h IEEE80211_DEBUG_REFCNT opt_wlan.h IEEE80211_SUPPORT_MESH opt_wlan.h IEEE80211_SUPPORT_SUPERG opt_wlan.h IEEE80211_SUPPORT_TDMA opt_wlan.h IEEE80211_ALQ opt_wlan.h IEEE80211_DFS_DEBUG opt_wlan.h # 802.11 TDMA support TDMA_SLOTLEN_DEFAULT opt_tdma.h TDMA_SLOTCNT_DEFAULT opt_tdma.h TDMA_BINTVAL_DEFAULT opt_tdma.h TDMA_TXRATE_11B_DEFAULT opt_tdma.h TDMA_TXRATE_11G_DEFAULT opt_tdma.h TDMA_TXRATE_11A_DEFAULT opt_tdma.h TDMA_TXRATE_TURBO_DEFAULT opt_tdma.h TDMA_TXRATE_HALF_DEFAULT opt_tdma.h TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h TDMA_TXRATE_11NA_DEFAULT opt_tdma.h TDMA_TXRATE_11NG_DEFAULT opt_tdma.h # VideoMode PICKMODE_DEBUG opt_videomode.h # Network stack virtualization options VIMAGE opt_global.h VNET_DEBUG opt_global.h # Common Flash Interface (CFI) options CFI_SUPPORT_STRATAFLASH opt_cfi.h CFI_ARMEDANDDANGEROUS opt_cfi.h CFI_HARDWAREBYTESWAP opt_cfi.h # Sound options SND_DEBUG opt_snd.h SND_DIAGNOSTIC opt_snd.h SND_FEEDER_MULTIFORMAT opt_snd.h SND_FEEDER_FULL_MULTIFORMAT opt_snd.h SND_FEEDER_RATE_HP opt_snd.h SND_PCM_64 opt_snd.h SND_OLDSTEREO opt_snd.h X86BIOS # Flattened device tree options FDT opt_platform.h FDT_DTB_STATIC opt_platform.h # OFED Infiniband stack OFED opt_ofed.h OFED_DEBUG_INIT opt_ofed.h SDP opt_ofed.h SDP_DEBUG opt_ofed.h IPOIB opt_ofed.h IPOIB_DEBUG opt_ofed.h IPOIB_CM opt_ofed.h # Resource Accounting RACCT opt_global.h RACCT_DEFAULT_TO_DISABLED opt_global.h # Resource Limits RCTL opt_global.h # Random number generator(s) # With this, no entropy processor is loaded, but the entropy # harvesting infrastructure is present. This means an entropy # processor may be loaded as a module. RANDOM_LOADABLE opt_global.h # This turns on high-rate and potentially expensive harvesting in # the uma slab allocator. RANDOM_ENABLE_UMA opt_global.h RANDOM_ENABLE_ETHER opt_global.h # This options turns TPM into entropy source. TPM_HARVEST opt_tpm.h # BHND(4) driver BHND_LOGLEVEL opt_global.h # GPIO and child devices GPIO_SPI_DEBUG opt_gpio.h # SPI devices SPIGEN_LEGACY_CDEVNAME opt_spi.h # etherswitch(4) driver RTL8366_SOFT_RESET opt_etherswitch.h # evdev protocol support EVDEV_SUPPORT opt_evdev.h EVDEV_DEBUG opt_evdev.h UINPUT_DEBUG opt_evdev.h # Hyper-V network driver HN_DEBUG opt_hn.h # CAM-based MMC stack MMCCAM # Encrypted kernel crash dumps EKCD opt_ekcd.h # NVME options NVME_USE_NVD opt_nvme.h # amdsbwd options AMDSBWD_DEBUG opt_amdsbwd.h # gcov support GCOV opt_global.h LINDEBUGFS Index: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-nanopi-neo2-opp.dtso =================================================================== --- projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-nanopi-neo2-opp.dtso (nonexistent) +++ projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-nanopi-neo2-opp.dtso (revision 347076) @@ -0,0 +1,32 @@ +/dts-v1/; +/plugin/; + +#include + +/ { + compatible = "allwinner,sun50i-h5"; +}; + +&{/} { + vdd_cpux: gpio-regulator { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + regulator-name = "vdd-cpux"; + regulator-type = "voltage"; + regulator-boot-on; + regulator-always-on; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1300000>; + regulator-ramp-delay = <50>; /* 4ms */ + gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */ + gpios-states = <0x1>; + states = <1100000 0x0 + 1300000 0x1>; + }; + +}; + +&{/cpus/cpu@0} { + cpu-supply = <&vdd_cpux>; +}; + Property changes on: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-nanopi-neo2-opp.dtso ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-opp.dtso =================================================================== --- projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-opp.dtso (nonexistent) +++ projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-opp.dtso (revision 347076) @@ -0,0 +1,99 @@ +/dts-v1/; +/plugin/; + +#include + +/ { + compatible = "allwinner,sun50i-h5"; +}; + +&{/} { + cpu_opp_table: opp_table { + compatible = "operating-points-v2"; + opp-shared; + + opp@408000000 { + opp-hz = /bits/ 64 <408000000>; + opp-microvolt = <1000000 1000000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@648000000 { + opp-hz = /bits/ 64 <648000000>; + opp-microvolt = <1040000 1040000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@816000000 { + opp-hz = /bits/ 64 <816000000>; + opp-microvolt = <1080000 1080000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@912000000 { + opp-hz = /bits/ 64 <912000000>; + opp-microvolt = <1120000 1120000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@960000000 { + opp-hz = /bits/ 64 <960000000>; + opp-microvolt = <1160000 1160000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@1008000000 { + opp-hz = /bits/ 64 <1008000000>; + opp-microvolt = <1200000 1200000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@1056000000 { + opp-hz = /bits/ 64 <1056000000>; + opp-microvolt = <1240000 1240000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@1104000000 { + opp-hz = /bits/ 64 <1104000000>; + opp-microvolt = <1260000 1260000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + + opp@1152000000 { + opp-hz = /bits/ 64 <1152000000>; + opp-microvolt = <1300000 1300000 1300000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + }; + }; + + reg_cpu_fallback: reg_cpu_fallback { + compatible = "regulator-fixed"; + regulator-name = "vdd-cpux-dummy"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + }; + +}; + +&{/cpus/cpu@0} { + clocks = <&ccu CLK_CPUX>; + clock-names = "cpu"; + clock-latency = <244144>; /* 8 32k periods */ + operating-points-v2 = <&cpu_opp_table>; + cpu-supply = <®_cpu_fallback>; + #cooling-cells = <2>; +}; + +&{/cpus/cpu@1} { + operating-points-v2 = <&cpu_opp_table>; +}; + +&{/cpus/cpu@2} { + operating-points-v2 = <&cpu_opp_table>; +}; + +&{/cpus/cpu@3} { + operating-points-v2 = <&cpu_opp_table>; +}; + Property changes on: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-opp.dtso ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-sid.dtso =================================================================== --- projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-sid.dtso (nonexistent) +++ projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-sid.dtso (revision 347076) @@ -0,0 +1,17 @@ +/dts-v1/; +/plugin/; + +/ { + compatible = "allwinner,sun50i-h5"; +}; + +&{/soc} { + sid: eeprom@1c14000 { + compatible = "allwinner,sun50i-h5-sid"; + reg = <0x1c14000 0x400>; + + ths_calib: calib@234 { + reg = <0x234 0x4>; + }; + }; +}; Property changes on: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-sid.dtso ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-ths.dtso =================================================================== --- projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-ths.dtso (nonexistent) +++ projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-ths.dtso (revision 347076) @@ -0,0 +1,26 @@ +/dts-v1/; +/plugin/; + +#include +#include +#include + +/ { + compatible = "allwinner,sun50i-h5"; +}; + +&{/soc} { + ths: thermal_sensor@1c25000 { + compatible = "allwinner,sun50i-h5-ths"; + reg = <0x01c25000 0x100>; + interrupts = ; + clocks = <&ccu CLK_BUS_THS>, <&ccu CLK_THS>; + clock-names = "apb", "ths"; + resets = <&ccu RST_BUS_THS>; + reset-names = "apb"; + #thermal-sensor-cells = <1>; + + nvmem-cells = <&ths_calib>; + nvmem-cell-names = "ths-calib"; + }; +}; Property changes on: projects/runtime-coverage-v2/sys/dts/arm64/overlays/sun50i-h5-ths.dtso ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/runtime-coverage-v2/sys/i386/conf/GENERIC =================================================================== --- projects/runtime-coverage-v2/sys/i386/conf/GENERIC (revision 347075) +++ projects/runtime-coverage-v2/sys/i386/conf/GENERIC (revision 347076) @@ -1,372 +1,373 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/i386 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu I486_CPU cpu I586_CPU cpu I686_CPU ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_HHOOK # hhook(9) framework for TCP options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # To make an SMP kernel, the next two lines are needed options SMP # Symmetric MultiProcessor Kernel device apic # I/O APIC options EARLY_AP_STARTUP # CPU frequency control device cpufreq # Bus support. device acpi device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support # Floppy drives device fdc # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices device esp # AMD Am53C974 (Tekram DC-390(T)) device hptiop # Highpoint RocketRaid 3xxx series device isp # Qlogic family #device ispfw # Firmware for QLogic HBAs- normally a module device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device mpr # LSI-Logic MPT-Fusion 3 device sym # NCR/Symbios Logic device trm # Tekram DC395U/UW/F DC315U adapters device isci # Intel C600 SAS controller # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) #device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID device arcmsr # Areca SATA II RAID device ciss # Compaq Smart RAID 5* device hptmv # Highpoint RocketRAID 182x device hptnr # Highpoint DC7280, R750 device hptrr # Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx device hpt27xx # Highpoint RocketRAID 27xx device iir # Intel Integrated RAID device ips # IBM (Adaptec) ServeRAID device mly # Mylex AcceleRAID/eXtremeRAID device twa # 3ware 9000 series PATA/SATA RAID device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller # RAID controllers device aac # Adaptec FSA RAID device aacp # SCSI passthrough for aac (requires CAM) device aacraid # Adaptec by PMC RAID device ida # Compaq Smart RAID device mfi # LSI MegaRAID SAS device mlx # Mylex DAC960 family device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s device pmspcv # PMC-Sierra SAS/SATA Controller driver device pst # Promise Supertrak SX6000 device twe # 3ware ATA RAID # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespace as disks, depends on nvme # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device agp # support several AGP chipsets # Power management support (see NOTES for more options) #device apm # PCCARD (PCMCIA) support # PCMCIA and cardbus bridge support device cbb # cardbus (yenta) bridge device pccard # PC Card (16-bit) bus device cardbus # CardBus (32-bit) bus # Serial (COM) ports device uart # Generic UART driver # Parallel port device ppc device ppbus # Parallel port bus (required) device lpt # Printer device ppi # Parallel port interface device #device vpo # Requires scbus and da device puc # Multi I/O cards and multi-channel UARTs # PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure device iflib device em # Intel PRO/1000 Gigabit Ethernet Family device vmx # VMware VMXNET3 Ethernet # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) device gem # Sun GEM/Sun ERI/Apple GMAC device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device nfe # nVidia nForce MCP on-board Ethernet device nge # NatSemi DP83820 gigabit Ethernet device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vge # VIA VT612x gigabit Ethernet device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # ISA Ethernet NICs. pccard NICs included. device cs # Crystal Semiconductor CS89x0 NIC # 'device ed' requires 'device miibus' device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards device ex # Intel EtherExpress Pro/10 and Pro/10+ device ep # Etherlink III based cards device fe # Fujitsu MB8696x based cards device sn # SMC's 9000 series of Ethernet chips device xe # Xircom pccard Ethernet # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device an # Aironet 4500/4800 802.11 wireless NICs. device ath # Atheros NICs device ath_pci # Atheros pci/cardbus glue device ath_hal # pci/cardbus chip support options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later device ath_rate_sample # SampleRate tx rate control for ath #device bwi # Broadcom BCM430x/BCM431x wireless NICs. #device bwn # Broadcom BCM43xx wireless NICs. device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device ral # Ralink Technology RT2500 wireless NICs. device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. device wpi # Intel 3945ABG wireless NICs. # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # Sound support device sound # Generic sound driver (required) device snd_cmi # CMedia CMI8338/CMI8738 device snd_csa # Crystal Semiconductor CS461x/428x device snd_emu10kx # Creative SoundBlaster Live! and Audigy device snd_es137x # Ensoniq AudioPCI ES137x device snd_hda # Intel High Definition Audio device snd_ich # Intel, NVidia and other ICH AC'97 Audio device snd_via8233 # VIA VT8233x Audio # MMC/SD device mmc # MMC/SD bus device mmcsd # MMC/SD memory card device sdhci # Generic PCI SD Host Controller # VirtIO support device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI device device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device # HyperV drivers and enchancement support device hyperv # HyperV drivers # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/i386/conf/MINIMAL =================================================================== --- projects/runtime-coverage-v2/sys/i386/conf/MINIMAL (revision 347075) +++ projects/runtime-coverage-v2/sys/i386/conf/MINIMAL (revision 347076) @@ -1,155 +1,156 @@ # # MINIMAL -- Mostly Minimal kernel configuration file for FreeBSD/i386 # # Many definitions of minimal are possible. The one this file follows is # GENERIC, minus all functionality that can be replaced by loading kernel # modules. # # Exceptions: # o While UFS is buildable as a module, the current module lacks # some features (ACL, GJOURNAL) that GENERIC includes. # o acpi as a module has been reported flakey and not well tested, so # is included in the kernel. # o random is included due to uncertaty... # o Many networking things are included # # For now, please run changes to these list past imp@freebsd.org # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu I486_CPU cpu I586_CPU cpu I686_CPU ident MINIMAL makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel options EARLY_AP_STARTUP device apic # CPU frequency control device cpufreq # Bus support. device acpi options ACPI_DMAR device pci # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device vt_efifb device agp # support several AGP chipsets # Pseudo devices. device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device gif # IPv6 and IPv4 tunneling # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/i386/linux/linux.h =================================================================== --- projects/runtime-coverage-v2/sys/i386/linux/linux.h (revision 347075) +++ projects/runtime-coverage-v2/sys/i386/linux/linux.h (revision 347076) @@ -1,614 +1,606 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * $FreeBSD$ */ #ifndef _I386_LINUX_H_ #define _I386_LINUX_H_ #include /* for sigval union */ #include #include #define LINUX_LEGACY_SYSCALLS /* * debugging support */ extern u_char linux_debug_map[]; #define ldebug(name) isclr(linux_debug_map, LINUX_SYS_linux_ ## name) #define ARGS(nm, fmt) "linux(%ld/%ld): "#nm"("fmt")\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LMSG(fmt) "linux(%ld/%ld): "fmt"\n", \ (long)td->td_proc->p_pid, (long)td->td_tid #define LINUX_DTRACE linuxulator #define LINUX_SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define LINUX_USRSTACK LINUX_SHAREDPAGE #define PTRIN(v) (void *)(v) #define PTROUT(v) (l_uintptr_t)(v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define CP2(src,dst,sfld,dfld) do { (dst).dfld = (src).sfld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) /* * Provide a separate set of types for the Linux types. */ typedef int l_int; typedef int32_t l_long; typedef int64_t l_longlong; typedef short l_short; typedef unsigned int l_uint; typedef uint32_t l_ulong; typedef uint64_t l_ulonglong; typedef unsigned short l_ushort; typedef char *l_caddr_t; typedef l_ulong l_uintptr_t; typedef l_long l_clock_t; typedef l_int l_daddr_t; typedef l_ushort l_dev_t; typedef l_uint l_gid_t; typedef l_ushort l_gid16_t; typedef l_ulong l_ino_t; typedef l_int l_key_t; typedef l_longlong l_loff_t; typedef l_ushort l_mode_t; typedef l_long l_off_t; typedef l_int l_pid_t; typedef l_uint l_size_t; typedef l_long l_suseconds_t; typedef l_long l_time_t; typedef l_uint l_uid_t; typedef l_ushort l_uid16_t; typedef l_int l_timer_t; typedef l_int l_mqd_t; typedef l_ulong l_fd_mask; typedef struct { l_int val[2]; } l_fsid_t; typedef struct { l_time_t tv_sec; l_suseconds_t tv_usec; } l_timeval; #define l_fd_set fd_set /* * Miscellaneous */ #define LINUX_AT_COUNT 20 /* Count of used aux entry types. * Keep this synchronized with * linux_fixup_elf() code. */ struct l___sysctl_args { l_int *name; l_int nlen; void *oldval; l_size_t *oldlenp; void *newval; l_size_t newlen; l_ulong __spare[4]; }; /* Resource limits */ #define LINUX_RLIMIT_CPU 0 #define LINUX_RLIMIT_FSIZE 1 #define LINUX_RLIMIT_DATA 2 #define LINUX_RLIMIT_STACK 3 #define LINUX_RLIMIT_CORE 4 #define LINUX_RLIMIT_RSS 5 #define LINUX_RLIMIT_NPROC 6 #define LINUX_RLIMIT_NOFILE 7 #define LINUX_RLIMIT_MEMLOCK 8 #define LINUX_RLIMIT_AS 9 /* Address space limit */ #define LINUX_RLIM_NLIMITS 10 struct l_rlimit { l_ulong rlim_cur; l_ulong rlim_max; }; struct l_mmap_argv { l_uintptr_t addr; l_size_t len; l_int prot; l_int flags; l_int fd; l_off_t pgoff; } __packed; /* * stat family of syscalls */ struct l_timespec { l_time_t tv_sec; l_long tv_nsec; }; struct l_newstat { l_ushort st_dev; l_ushort __pad1; l_ulong st_ino; l_ushort st_mode; l_ushort st_nlink; l_ushort st_uid; l_ushort st_gid; l_ushort st_rdev; l_ushort __pad2; l_ulong st_size; l_ulong st_blksize; l_ulong st_blocks; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_ulong __unused4; l_ulong __unused5; }; struct l_stat { l_ushort st_dev; l_ulong st_ino; l_ushort st_mode; l_ushort st_nlink; l_ushort st_uid; l_ushort st_gid; l_ushort st_rdev; l_long st_size; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_long st_blksize; l_long st_blocks; l_ulong st_flags; l_ulong st_gen; }; struct l_stat64 { l_ushort st_dev; u_char __pad0[10]; l_ulong __st_ino; l_uint st_mode; l_uint st_nlink; l_ulong st_uid; l_ulong st_gid; l_ushort st_rdev; u_char __pad3[10]; l_longlong st_size; l_ulong st_blksize; l_ulong st_blocks; l_ulong __pad4; struct l_timespec st_atim; struct l_timespec st_mtim; struct l_timespec st_ctim; l_ulonglong st_ino; }; struct l_statfs64 { l_int f_type; l_int f_bsize; uint64_t f_blocks; uint64_t f_bfree; uint64_t f_bavail; uint64_t f_files; uint64_t f_ffree; l_fsid_t f_fsid; l_int f_namelen; l_int f_frsize; l_int f_flags; l_int f_spare[4]; }; #define LINUX_NSIG_WORDS 2 /* sigaction flags */ #define LINUX_SA_NOCLDSTOP 0x00000001 #define LINUX_SA_NOCLDWAIT 0x00000002 #define LINUX_SA_SIGINFO 0x00000004 #define LINUX_SA_RESTORER 0x04000000 #define LINUX_SA_ONSTACK 0x08000000 #define LINUX_SA_RESTART 0x10000000 #define LINUX_SA_INTERRUPT 0x20000000 #define LINUX_SA_NOMASK 0x40000000 #define LINUX_SA_ONESHOT 0x80000000 /* sigprocmask actions */ #define LINUX_SIG_BLOCK 0 #define LINUX_SIG_UNBLOCK 1 #define LINUX_SIG_SETMASK 2 /* sigaltstack */ #define LINUX_MINSIGSTKSZ 2048 typedef void (*l_handler_t)(l_int); typedef l_ulong l_osigset_t; typedef struct { l_handler_t lsa_handler; l_osigset_t lsa_mask; l_ulong lsa_flags; void (*lsa_restorer)(void); } l_osigaction_t; typedef struct { l_handler_t lsa_handler; l_ulong lsa_flags; void (*lsa_restorer)(void); l_sigset_t lsa_mask; } l_sigaction_t; typedef struct { void *ss_sp; l_int ss_flags; l_size_t ss_size; } l_stack_t; /* The Linux sigcontext, pretty much a standard 386 trapframe. */ struct l_sigcontext { l_int sc_gs; l_int sc_fs; l_int sc_es; l_int sc_ds; l_int sc_edi; l_int sc_esi; l_int sc_ebp; l_int sc_esp; l_int sc_ebx; l_int sc_edx; l_int sc_ecx; l_int sc_eax; l_int sc_trapno; l_int sc_err; l_int sc_eip; l_int sc_cs; l_int sc_eflags; l_int sc_esp_at_signal; l_int sc_ss; l_int sc_387; l_int sc_mask; l_int sc_cr2; }; struct l_ucontext { l_ulong uc_flags; void *uc_link; l_stack_t uc_stack; struct l_sigcontext uc_mcontext; l_sigset_t uc_sigmask; }; #define LINUX_SI_MAX_SIZE 128 #define LINUX_SI_PAD_SIZE ((LINUX_SI_MAX_SIZE/sizeof(l_int)) - 3) typedef union l_sigval { l_int sival_int; l_uintptr_t sival_ptr; } l_sigval_t; typedef struct l_siginfo { l_int lsi_signo; l_int lsi_errno; l_int lsi_code; union { l_int _pad[LINUX_SI_PAD_SIZE]; struct { l_pid_t _pid; l_uid_t _uid; } _kill; struct { l_timer_t _tid; l_int _overrun; char _pad[sizeof(l_uid_t) - sizeof(l_int)]; l_sigval_t _sigval; l_int _sys_private; } _timer; struct { l_pid_t _pid; /* sender's pid */ l_uid_t _uid; /* sender's uid */ l_sigval_t _sigval; } _rt; struct { l_pid_t _pid; /* which child */ l_uid_t _uid; /* sender's uid */ l_int _status; /* exit code */ l_clock_t _utime; l_clock_t _stime; } _sigchld; struct { l_uintptr_t _addr; /* Faulting insn/memory ref. */ } _sigfault; struct { l_long _band; /* POLL_IN,POLL_OUT,POLL_MSG */ l_int _fd; } _sigpoll; } _sifields; } l_siginfo_t; #define lsi_pid _sifields._kill._pid #define lsi_uid _sifields._kill._uid #define lsi_tid _sifields._timer._tid #define lsi_overrun _sifields._timer._overrun #define lsi_sys_private _sifields._timer._sys_private #define lsi_status _sifields._sigchld._status #define lsi_utime _sifields._sigchld._utime #define lsi_stime _sifields._sigchld._stime #define lsi_value _sifields._rt._sigval #define lsi_int _sifields._rt._sigval.sival_int #define lsi_ptr _sifields._rt._sigval.sival_ptr #define lsi_addr _sifields._sigfault._addr #define lsi_band _sifields._sigpoll._band #define lsi_fd _sifields._sigpoll._fd struct l_fpreg { u_int16_t significand[4]; u_int16_t exponent; }; struct l_fpxreg { u_int16_t significand[4]; u_int16_t exponent; u_int16_t padding[3]; }; struct l_xmmreg { u_int32_t element[4]; }; struct l_fpstate { /* Regular FPU environment */ u_int32_t cw; u_int32_t sw; u_int32_t tag; u_int32_t ipoff; u_int32_t cssel; u_int32_t dataoff; u_int32_t datasel; struct l_fpreg _st[8]; u_int16_t status; u_int16_t magic; /* 0xffff = regular FPU data */ /* FXSR FPU environment */ u_int32_t _fxsr_env[6]; /* env is ignored. */ u_int32_t mxcsr; u_int32_t reserved; struct l_fpxreg _fxsr_st[8]; /* reg data is ignored. */ struct l_xmmreg _xmm[8]; u_int32_t padding[56]; }; /* * We make the stack look like Linux expects it when calling a signal * handler, but use the BSD way of calling the handler and sigreturn(). * This means that we need to pass the pointer to the handler too. * It is appended to the frame to not interfere with the rest of it. */ struct l_sigframe { l_int sf_sig; struct l_sigcontext sf_sc; struct l_fpstate sf_fpstate; l_uint sf_extramask[LINUX_NSIG_WORDS-1]; l_handler_t sf_handler; }; struct l_rt_sigframe { l_int sf_sig; l_siginfo_t *sf_siginfo; struct l_ucontext *sf_ucontext; l_siginfo_t sf_si; struct l_ucontext sf_sc; l_handler_t sf_handler; }; extern struct sysentvec linux_sysvec; /* * arch specific open/fcntl flags */ #define LINUX_F_GETLK64 12 #define LINUX_F_SETLK64 13 #define LINUX_F_SETLKW64 14 union l_semun { l_int val; l_uintptr_t buf; l_ushort *array; l_uintptr_t __buf; l_uintptr_t __pad; }; -struct l_sockaddr { - l_ushort sa_family; - char sa_data[14]; -}; - struct l_ifmap { l_ulong mem_start; l_ulong mem_end; l_ushort base_addr; u_char irq; u_char dma; u_char port; }; - -#define LINUX_IFHWADDRLEN 6 -#define LINUX_IFNAMSIZ 16 struct l_ifreq { union { char ifrn_name[LINUX_IFNAMSIZ]; } ifr_ifrn; union { struct l_sockaddr ifru_addr; struct l_sockaddr ifru_dstaddr; struct l_sockaddr ifru_broadaddr; struct l_sockaddr ifru_netmask; struct l_sockaddr ifru_hwaddr; l_short ifru_flags[1]; l_int ifru_ivalue; l_int ifru_mtu; struct l_ifmap ifru_map; char ifru_slave[LINUX_IFNAMSIZ]; l_caddr_t ifru_data; } ifr_ifru; }; #define ifr_name ifr_ifrn.ifrn_name /* Interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ #define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ /* * poll() */ #define LINUX_POLLIN 0x0001 #define LINUX_POLLPRI 0x0002 #define LINUX_POLLOUT 0x0004 #define LINUX_POLLERR 0x0008 #define LINUX_POLLHUP 0x0010 #define LINUX_POLLNVAL 0x0020 #define LINUX_POLLRDNORM 0x0040 #define LINUX_POLLRDBAND 0x0080 #define LINUX_POLLWRNORM 0x0100 #define LINUX_POLLWRBAND 0x0200 #define LINUX_POLLMSG 0x0400 struct l_pollfd { l_int fd; l_short events; l_short revents; }; struct l_user_desc { l_uint entry_number; l_uint base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; struct l_desc_struct { unsigned long a, b; }; #define LINUX_LOWERWORD 0x0000ffff /* * Macros which does the same thing as those in Linux include/asm-um/ldt-i386.h. * These convert Linux user space descriptor to machine one. */ #define LINUX_LDT_entry_a(info) \ ((((info)->base_addr & LINUX_LOWERWORD) << 16) | \ ((info)->limit & LINUX_LOWERWORD)) #define LINUX_ENTRY_B_READ_EXEC_ONLY 9 #define LINUX_ENTRY_B_CONTENTS 10 #define LINUX_ENTRY_B_SEG_NOT_PRESENT 15 #define LINUX_ENTRY_B_BASE_ADDR 16 #define LINUX_ENTRY_B_USEABLE 20 #define LINUX_ENTRY_B_SEG32BIT 22 #define LINUX_ENTRY_B_LIMIT 23 #define LINUX_LDT_entry_b(info) \ (((info)->base_addr & 0xff000000) | \ ((info)->limit & 0xf0000) | \ ((info)->contents << LINUX_ENTRY_B_CONTENTS) | \ (((info)->seg_not_present == 0) << LINUX_ENTRY_B_SEG_NOT_PRESENT) | \ (((info)->base_addr & 0x00ff0000) >> LINUX_ENTRY_B_BASE_ADDR) | \ (((info)->read_exec_only == 0) << LINUX_ENTRY_B_READ_EXEC_ONLY) | \ ((info)->seg_32bit << LINUX_ENTRY_B_SEG32BIT) | \ ((info)->useable << LINUX_ENTRY_B_USEABLE) | \ ((info)->limit_in_pages << LINUX_ENTRY_B_LIMIT) | 0x7000) #define LINUX_LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ (info)->seg_not_present == 1 && \ (info)->read_exec_only == 1 && \ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ (info)->useable == 0) /* * Macros for converting segments. * They do the same as those in arch/i386/kernel/process.c in Linux. */ #define LINUX_GET_BASE(desc) \ ((((desc)->a >> 16) & LINUX_LOWERWORD) | \ (((desc)->b << 16) & 0x00ff0000) | \ ((desc)->b & 0xff000000)) #define LINUX_GET_LIMIT(desc) \ (((desc)->a & LINUX_LOWERWORD) | \ ((desc)->b & 0xf0000)) #define LINUX_GET_32BIT(desc) \ (((desc)->b >> LINUX_ENTRY_B_SEG32BIT) & 1) #define LINUX_GET_CONTENTS(desc) \ (((desc)->b >> LINUX_ENTRY_B_CONTENTS) & 3) #define LINUX_GET_WRITABLE(desc) \ (((desc)->b >> LINUX_ENTRY_B_READ_EXEC_ONLY) & 1) #define LINUX_GET_LIMIT_PAGES(desc) \ (((desc)->b >> LINUX_ENTRY_B_LIMIT) & 1) #define LINUX_GET_PRESENT(desc) \ (((desc)->b >> LINUX_ENTRY_B_SEG_NOT_PRESENT) & 1) #define LINUX_GET_USEABLE(desc) \ (((desc)->b >> LINUX_ENTRY_B_USEABLE) & 1) #define linux_copyout_rusage(r, u) copyout(r, u, sizeof(*r)) /* robust futexes */ struct linux_robust_list { struct linux_robust_list *next; }; struct linux_robust_list_head { struct linux_robust_list list; l_long futex_offset; struct linux_robust_list *pending_list; }; #endif /* !_I386_LINUX_H_ */ Index: projects/runtime-coverage-v2/sys/kern/kern_time.c =================================================================== --- projects/runtime-coverage-v2/sys/kern/kern_time.c (revision 347075) +++ projects/runtime-coverage-v2/sys/kern/kern_time.c (revision 347076) @@ -1,1761 +1,1763 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #define MAX_CLOCKS (CLOCK_MONOTONIC+1) #define CPUCLOCK_BIT 0x80000000 #define CPUCLOCK_PROCESS_BIT 0x40000000 #define CPUCLOCK_ID_MASK (~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT)) #define MAKE_THREAD_CPUCLOCK(tid) (CPUCLOCK_BIT|(tid)) #define MAKE_PROCESS_CPUCLOCK(pid) \ (CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid)) static struct kclock posix_clocks[MAX_CLOCKS]; static uma_zone_t itimer_zone = NULL; /* * Time of day and interval timer support. * * These routines provide the kernel entry points to get and set * the time-of-day and per-process interval timers. Subroutines * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ static int settime(struct thread *, struct timeval *); static void timevalfix(struct timeval *); static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *ua_rqtp, struct timespec *ua_rmtp); static void itimer_start(void); static int itimer_init(void *, int, int); static void itimer_fini(void *, int); static void itimer_enter(struct itimer *); static void itimer_leave(struct itimer *); static struct itimer *itimer_find(struct proc *, int); static void itimers_alloc(struct proc *); static void itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp); static void itimers_event_hook_exit(void *arg, struct proc *p); static int realtimer_create(struct itimer *); static int realtimer_gettime(struct itimer *, struct itimerspec *); static int realtimer_settime(struct itimer *, int, struct itimerspec *, struct itimerspec *); static int realtimer_delete(struct itimer *); static void realtimer_clocktime(clockid_t, struct timespec *); static void realtimer_expire(void *); int register_posix_clock(int, struct kclock *); void itimer_fire(struct itimer *it); int itimespecfix(struct timespec *ts); #define CLOCK_CALL(clock, call, arglist) \ ((*posix_clocks[clock].call) arglist) SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL); static int settime(struct thread *td, struct timeval *tv) { struct timeval delta, tv1, tv2; static struct timeval maxtime, laststep; struct timespec ts; microtime(&tv1); delta = *tv; timevalsub(&delta, &tv1); /* * If the system is secure, we do not allow the time to be * set to a value earlier than 1 second less than the highest * time we have yet seen. The worst a miscreant can do in * this circumstance is "freeze" time. He couldn't go * back to the past. * * We similarly do not allow the clock to be stepped more * than one second, nor more than once per second. This allows * a miscreant to make the clock march double-time, but no worse. */ if (securelevel_gt(td->td_ucred, 1) != 0) { if (delta.tv_sec < 0 || delta.tv_usec < 0) { /* * Update maxtime to latest time we've seen. */ if (tv1.tv_sec > maxtime.tv_sec) maxtime = tv1; tv2 = *tv; timevalsub(&tv2, &maxtime); if (tv2.tv_sec < -1) { tv->tv_sec = maxtime.tv_sec - 1; printf("Time adjustment clamped to -1 second\n"); } } else { if (tv1.tv_sec == laststep.tv_sec) return (EPERM); if (delta.tv_sec > 1) { tv->tv_sec = tv1.tv_sec + 1; printf("Time adjustment clamped to +1 second\n"); } laststep = *tv; } } ts.tv_sec = tv->tv_sec; ts.tv_nsec = tv->tv_usec * 1000; tc_setclock(&ts); resettodr(); return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_getcpuclockid2_args { id_t id; int which, clockid_t *clock_id; }; #endif /* ARGSUSED */ int sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id) { struct proc *p; pid_t pid; lwpid_t tid; int error; switch (which) { case CPUCLOCK_WHICH_PID: if (id != 0) { error = pget(id, PGET_CANSEE | PGET_NOTID, &p); if (error != 0) return (error); PROC_UNLOCK(p); pid = id; } else { pid = td->td_proc->p_pid; } *clk_id = MAKE_PROCESS_CPUCLOCK(pid); return (0); case CPUCLOCK_WHICH_TID: tid = id == 0 ? td->td_tid : id; *clk_id = MAKE_THREAD_CPUCLOCK(tid); return (0); default: return (EINVAL); } } #ifndef _SYS_SYSPROTO_H_ struct clock_gettime_args { clockid_t clock_id; struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap) { struct timespec ats; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) error = copyout(&ats, uap->tp, sizeof(ats)); return (error); } static inline void cputick2timespec(uint64_t runtime, struct timespec *ats) { runtime = cputick2usec(runtime); ats->tv_sec = runtime / 1000000; ats->tv_nsec = runtime % 1000000 * 1000; } static void get_thread_cputime(struct thread *targettd, struct timespec *ats) { uint64_t runtime, curtime, switchtime; if (targettd == NULL) { /* current thread */ critical_enter(); switchtime = PCPU_GET(switchtime); curtime = cpu_ticks(); runtime = curthread->td_runtime; critical_exit(); runtime += curtime - switchtime; } else { thread_lock(targettd); runtime = targettd->td_runtime; thread_unlock(targettd); } cputick2timespec(runtime, ats); } static void get_process_cputime(struct proc *targetp, struct timespec *ats) { uint64_t runtime; struct rusage ru; PROC_STATLOCK(targetp); rufetch(targetp, &ru); runtime = targetp->p_rux.rux_runtime; if (curthread->td_proc == targetp) runtime += cpu_ticks() - PCPU_GET(switchtime); PROC_STATUNLOCK(targetp); cputick2timespec(runtime, ats); } static int get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct proc *p, *p2; struct thread *td2; lwpid_t tid; pid_t pid; int error; p = td->td_proc; if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) { tid = clock_id & CPUCLOCK_ID_MASK; td2 = tdfind(tid, p->p_pid); if (td2 == NULL) return (EINVAL); get_thread_cputime(td2, ats); PROC_UNLOCK(td2->td_proc); } else { pid = clock_id & CPUCLOCK_ID_MASK; error = pget(pid, PGET_CANSEE, &p2); if (error != 0) return (EINVAL); get_process_cputime(p2, ats); PROC_UNLOCK(p2); } return (0); } int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval sys, user; struct proc *p; p = td->td_proc; switch (clock_id) { case CLOCK_REALTIME: /* Default to precise. */ case CLOCK_REALTIME_PRECISE: nanotime(ats); break; case CLOCK_REALTIME_FAST: getnanotime(ats); break; case CLOCK_VIRTUAL: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_PROF: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); timevaladd(&user, &sys); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_MONOTONIC: /* Default to precise. */ case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: nanouptime(ats); break; case CLOCK_UPTIME_FAST: case CLOCK_MONOTONIC_FAST: getnanouptime(ats); break; case CLOCK_SECOND: ats->tv_sec = time_second; ats->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: get_thread_cputime(NULL, ats); break; case CLOCK_PROCESS_CPUTIME_ID: PROC_LOCK(p); get_process_cputime(p, ats); PROC_UNLOCK(p); break; default: if ((int)clock_id >= 0) return (EINVAL); return (get_cputime(td, clock_id, ats)); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_settime_args { clockid_t clock_id; const struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_settime(struct thread *td, struct clock_settime_args *uap) { struct timespec ats; int error; if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) return (error); return (kern_clock_settime(td, uap->clock_id, &ats)); } static int allow_insane_settime = 0; SYSCTL_INT(_debug, OID_AUTO, allow_insane_settime, CTLFLAG_RWTUN, &allow_insane_settime, 0, "do not perform possibly restrictive checks on settime(2) args"); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval atv; int error; if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) return (error); if (clock_id != CLOCK_REALTIME) return (EINVAL); if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 || ats->tv_sec < 0) return (EINVAL); - if (!allow_insane_settime && ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60) + if (!allow_insane_settime && + (ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60 || + ats->tv_sec < utc_offset())) return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, ats); error = settime(td, &atv); return (error); } #ifndef _SYS_SYSPROTO_H_ struct clock_getres_args { clockid_t clock_id; struct timespec *tp; }; #endif int sys_clock_getres(struct thread *td, struct clock_getres_args *uap) { struct timespec ts; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) error = copyout(&ts, uap->tp, sizeof(ts)); return (error); } int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) { ts->tv_sec = 0; switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_FAST: case CLOCK_REALTIME_PRECISE: case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_FAST: case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_FAST: case CLOCK_UPTIME_PRECISE: /* * Round up the result of the division cheaply by adding 1. * Rounding up is especially important if rounding down * would give 0. Perfect rounding is unimportant. */ ts->tv_nsec = 1000000000 / tc_getfrequency() + 1; break; case CLOCK_VIRTUAL: case CLOCK_PROF: /* Accurately round up here because we can do so cheaply. */ ts->tv_nsec = howmany(1000000000, hz); break; case CLOCK_SECOND: ts->tv_sec = 1; ts->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: case CLOCK_PROCESS_CPUTIME_ID: cputime: /* sync with cputick2usec */ ts->tv_nsec = 1000000 / cpu_tickrate(); if (ts->tv_nsec == 0) ts->tv_nsec = 1000; break; default: if ((int)clock_id < 0) goto cputime; return (EINVAL); } return (0); } int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) { return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt, rmt)); } static uint8_t nanowait[MAXCPU]; int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqt, struct timespec *rmt) { struct timespec ts, now; sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; bool is_abs_real; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); if ((flags & ~TIMER_ABSTIME) != 0) return (EINVAL); switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_PRECISE: case CLOCK_REALTIME_FAST: case CLOCK_SECOND: is_abs_real = (flags & TIMER_ABSTIME) != 0; break; case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_PRECISE: case CLOCK_MONOTONIC_FAST: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: case CLOCK_UPTIME_FAST: is_abs_real = false; break; case CLOCK_VIRTUAL: case CLOCK_PROF: case CLOCK_PROCESS_CPUTIME_ID: return (ENOTSUP); case CLOCK_THREAD_CPUTIME_ID: default: return (EINVAL); } do { ts = *rqt; if ((flags & TIMER_ABSTIME) != 0) { if (is_abs_real) td->td_rtcgen = atomic_load_acq_int(&rtc_generation); error = kern_clock_gettime(td, clock_id, &now); KASSERT(error == 0, ("kern_clock_gettime: %d", error)); timespecsub(&ts, &now, &ts); } if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) { error = EWOULDBLOCK; break; } if (ts.tv_sec > INT32_MAX / 2) { over = ts.tv_sec - INT32_MAX / 2; ts.tv_sec -= over; } else over = 0; tmp = tstosbt(ts); prec = tmp; prec >>= tc_precexp; if (TIMESEL(&sbt, tmp)) sbt += tc_tick_sbt; sbt += tmp; error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", sbt, prec, C_ABSOLUTE); } while (error == 0 && is_abs_real && td->td_rtcgen == 0); td->td_rtcgen = 0; if (error != EWOULDBLOCK) { if (TIMESEL(&sbtt, tmp)) sbtt += tc_tick_sbt; if (sbtt >= sbt) return (0); if (error == ERESTART) error = EINTR; if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) { ts = sbttots(sbt - sbtt); ts.tv_sec += over; if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } return (error); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct nanosleep_args { struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int sys_nanosleep(struct thread *td, struct nanosleep_args *uap) { return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } #ifndef _SYS_SYSPROTO_H_ struct clock_nanosleep_args { clockid_t clock_id; int flags; struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap) { int error; error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *ua_rqtp, struct timespec *ua_rmtp) { struct timespec rmt, rqt; int error; error = copyin(ua_rqtp, &rqt, sizeof(rqt)); if (error) return (error); if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 && !useracc(ua_rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { int error2; error2 = copyout(&rmt, ua_rmtp, sizeof(rmt)); if (error2) error = error2; } return (error); } #ifndef _SYS_SYSPROTO_H_ struct gettimeofday_args { struct timeval *tp; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap) { struct timeval atv; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); error = copyout(&atv, uap->tp, sizeof (atv)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct settimeofday_args { struct timeval *tv; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_settimeofday(struct thread *td, struct settimeofday_args *uap) { struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &atv, sizeof(atv)); if (error) return (error); tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp) { int error; error = priv_check(td, PRIV_SETTIMEOFDAY); if (error) return (error); /* Verify all parameters before changing time. */ if (tv) { if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 || tv->tv_sec < 0) return (EINVAL); error = settime(td, tv); } return (error); } /* * Get value of an interval timer. The process virtual and profiling virtual * time timers are kept in the p_stats area, since they can be swapped out. * These are kept internally in the way they are specified externally: in * time until they expire. * * The real time interval timer is kept in the process table slot for the * process, and its value (it_value) is kept as an absolute time rather than * as a delta, so that it is easy to keep periodic real-time signals from * drifting. * * Virtual time timers are processed in the hardclock() routine of * kern_clock.c. The real time timer is processed by a timeout routine, * called from the softclock() routine. Since a callout may be delayed in * real time due to interrupt processing in the system, it is possible for * the real time timeout routine (realitexpire, given below), to be delayed * in real time past when it is supposed to occur. It does not suffice, * therefore, to reload the real timer .it_value from the real time timers * .it_interval. Rather, we compute the next time in absolute time the timer * should go off. */ #ifndef _SYS_SYSPROTO_H_ struct getitimer_args { u_int which; struct itimerval *itv; }; #endif int sys_getitimer(struct thread *td, struct getitimer_args *uap) { struct itimerval aitv; int error; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); } int kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv) { struct proc *p = td->td_proc; struct timeval ctv; if (which > ITIMER_PROF) return (EINVAL); if (which == ITIMER_REAL) { /* * Convert from absolute to relative time in .it_value * part of real time timer. If time for real time timer * has passed return 0, else return difference between * current time and time for the timer to go off. */ PROC_LOCK(p); *aitv = p->p_realtimer; PROC_UNLOCK(p); if (timevalisset(&aitv->it_value)) { microuptime(&ctv); if (timevalcmp(&aitv->it_value, &ctv, <)) timevalclear(&aitv->it_value); else timevalsub(&aitv->it_value, &ctv); } } else { PROC_ITIMLOCK(p); *aitv = p->p_stats->p_timer[which]; PROC_ITIMUNLOCK(p); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(aitv); #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct setitimer_args { u_int which; struct itimerval *itv, *oitv; }; #endif int sys_setitimer(struct thread *td, struct setitimer_args *uap) { struct itimerval aitv, oitv; int error; if (uap->itv == NULL) { uap->itv = uap->oitv; return (sys_getitimer(td, (struct getitimer_args *)uap)); } if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval)))) return (error); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); return (copyout(&oitv, uap->oitv, sizeof(struct itimerval))); } int kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv, struct itimerval *oitv) { struct proc *p = td->td_proc; struct timeval ctv; sbintime_t sbt, pr; if (aitv == NULL) return (kern_getitimer(td, which, oitv)); if (which > ITIMER_PROF) return (EINVAL); #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(aitv); #endif if (itimerfix(&aitv->it_value) || aitv->it_value.tv_sec > INT32_MAX / 2) return (EINVAL); if (!timevalisset(&aitv->it_value)) timevalclear(&aitv->it_interval); else if (itimerfix(&aitv->it_interval) || aitv->it_interval.tv_sec > INT32_MAX / 2) return (EINVAL); if (which == ITIMER_REAL) { PROC_LOCK(p); if (timevalisset(&p->p_realtimer.it_value)) callout_stop(&p->p_itcallout); microuptime(&ctv); if (timevalisset(&aitv->it_value)) { pr = tvtosbt(aitv->it_value) >> tc_precexp; timevaladd(&aitv->it_value, &ctv); sbt = tvtosbt(aitv->it_value); callout_reset_sbt(&p->p_itcallout, sbt, pr, realitexpire, p, C_ABSOLUTE); } *oitv = p->p_realtimer; p->p_realtimer = *aitv; PROC_UNLOCK(p); if (timevalisset(&oitv->it_value)) { if (timevalcmp(&oitv->it_value, &ctv, <)) timevalclear(&oitv->it_value); else timevalsub(&oitv->it_value, &ctv); } } else { if (aitv->it_interval.tv_sec == 0 && aitv->it_interval.tv_usec != 0 && aitv->it_interval.tv_usec < tick) aitv->it_interval.tv_usec = tick; if (aitv->it_value.tv_sec == 0 && aitv->it_value.tv_usec != 0 && aitv->it_value.tv_usec < tick) aitv->it_value.tv_usec = tick; PROC_ITIMLOCK(p); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; PROC_ITIMUNLOCK(p); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(oitv); #endif return (0); } /* * Real interval timer expired: * send process whose timer expired an alarm signal. * If time is not set up to reload, then just return. * Else compute next time timer should go off which is > current time. * This is where delay in processing this timeout causes multiple * SIGALRM calls to be compressed into one. * tvtohz() always adds 1 to allow for the time until the next clock * interrupt being strictly less than 1 clock tick, but we don't want * that here since we want to appear to be in sync with the clock * interrupt even when we're delayed. */ void realitexpire(void *arg) { struct proc *p; struct timeval ctv; sbintime_t isbt; p = (struct proc *)arg; kern_psignal(p, SIGALRM); if (!timevalisset(&p->p_realtimer.it_interval)) { timevalclear(&p->p_realtimer.it_value); if (p->p_flag & P_WEXIT) wakeup(&p->p_itcallout); return; } isbt = tvtosbt(p->p_realtimer.it_interval); if (isbt >= sbt_timethreshold) getmicrouptime(&ctv); else microuptime(&ctv); do { timevaladd(&p->p_realtimer.it_value, &p->p_realtimer.it_interval); } while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=)); callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value), isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE); } /* * Check that a proposed value to load into the .it_value or * .it_interval part of an interval timer is acceptable, and * fix it to have at least minimal value (i.e. if it is less * than the resolution of the clock, round it up.) */ int itimerfix(struct timeval *tv) { if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) return (EINVAL); if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < (u_int)tick / 16) tv->tv_usec = (u_int)tick / 16; return (0); } /* * Decrement an interval timer by a specified number * of microseconds, which must be less than a second, * i.e. < 1000000. If the timer expires, then reload * it. In this case, carry over (usec - old value) to * reduce the value reloaded into the timer so that * the timer does not drift. This routine assumes * that it is called in a context where the timers * on which it is operating cannot change in value. */ int itimerdecr(struct itimerval *itp, int usec) { if (itp->it_value.tv_usec < usec) { if (itp->it_value.tv_sec == 0) { /* expired, and already in next interval */ usec -= itp->it_value.tv_usec; goto expire; } itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } itp->it_value.tv_usec -= usec; usec = 0; if (timevalisset(&itp->it_value)) return (1); /* expired, exactly at end of interval */ expire: if (timevalisset(&itp->it_interval)) { itp->it_value = itp->it_interval; itp->it_value.tv_usec -= usec; if (itp->it_value.tv_usec < 0) { itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } } else itp->it_value.tv_usec = 0; /* sec is already 0 */ return (0); } /* * Add and subtract routines for timevals. * N.B.: subtract routine doesn't deal with * results which are before the beginning, * it just gets very confused in this case. * Caveat emptor. */ void timevaladd(struct timeval *t1, const struct timeval *t2) { t1->tv_sec += t2->tv_sec; t1->tv_usec += t2->tv_usec; timevalfix(t1); } void timevalsub(struct timeval *t1, const struct timeval *t2) { t1->tv_sec -= t2->tv_sec; t1->tv_usec -= t2->tv_usec; timevalfix(t1); } static void timevalfix(struct timeval *t1) { if (t1->tv_usec < 0) { t1->tv_sec--; t1->tv_usec += 1000000; } if (t1->tv_usec >= 1000000) { t1->tv_sec++; t1->tv_usec -= 1000000; } } /* * ratecheck(): simple time-based rate-limit checking. */ int ratecheck(struct timeval *lasttime, const struct timeval *mininterval) { struct timeval tv, delta; int rv = 0; getmicrouptime(&tv); /* NB: 10ms precision */ delta = tv; timevalsub(&delta, lasttime); /* * check for 0,0 is so that the message will be seen at least once, * even if interval is huge. */ if (timevalcmp(&delta, mininterval, >=) || (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { *lasttime = tv; rv = 1; } return (rv); } /* * ppsratecheck(): packets (or events) per second limitation. * * Return 0 if the limit is to be enforced (e.g. the caller * should drop a packet because of the rate limitation). * * maxpps of 0 always causes zero to be returned. maxpps of -1 * always causes 1 to be returned; this effectively defeats rate * limiting. * * Note that we maintain the struct timeval for compatibility * with other bsd systems. We reuse the storage and just monitor * clock ticks for minimal overhead. */ int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) { int now; /* * Reset the last time and counter if this is the first call * or more than a second has passed since the last update of * lasttime. */ now = ticks; if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { lasttime->tv_sec = now; *curpps = 1; return (maxpps != 0); } else { (*curpps)++; /* NB: ignore potential overflow */ return (maxpps < 0 || *curpps <= maxpps); } } static void itimer_start(void) { struct kclock rt_clock = { .timer_create = realtimer_create, .timer_delete = realtimer_delete, .timer_settime = realtimer_settime, .timer_gettime = realtimer_gettime, .event_hook = NULL }; itimer_zone = uma_zcreate("itimer", sizeof(struct itimer), NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0); register_posix_clock(CLOCK_REALTIME, &rt_clock); register_posix_clock(CLOCK_MONOTONIC, &rt_clock); p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L); p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX); p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX); EVENTHANDLER_REGISTER(process_exit, itimers_event_hook_exit, (void *)ITIMER_EV_EXIT, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(process_exec, itimers_event_hook_exec, (void *)ITIMER_EV_EXEC, EVENTHANDLER_PRI_ANY); } int register_posix_clock(int clockid, struct kclock *clk) { if ((unsigned)clockid >= MAX_CLOCKS) { printf("%s: invalid clockid\n", __func__); return (0); } posix_clocks[clockid] = *clk; return (1); } static int itimer_init(void *mem, int size, int flags) { struct itimer *it; it = (struct itimer *)mem; mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF); return (0); } static void itimer_fini(void *mem, int size) { struct itimer *it; it = (struct itimer *)mem; mtx_destroy(&it->it_mtx); } static void itimer_enter(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); it->it_usecount++; } static void itimer_leave(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); KASSERT(it->it_usecount > 0, ("invalid it_usecount")); if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0) wakeup(it); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_create_args { clockid_t clock_id; struct sigevent * evp; int * timerid; }; #endif int sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap) { struct sigevent *evp, ev; int id; int error; if (uap->evp == NULL) { evp = NULL; } else { error = copyin(uap->evp, &ev, sizeof(ev)); if (error != 0) return (error); evp = &ev; } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id) { struct proc *p = td->td_proc; struct itimer *it; int id; int error; if (clock_id < 0 || clock_id >= MAX_CLOCKS) return (EINVAL); if (posix_clocks[clock_id].timer_create == NULL) return (EINVAL); if (evp != NULL) { if (evp->sigev_notify != SIGEV_NONE && evp->sigev_notify != SIGEV_SIGNAL && evp->sigev_notify != SIGEV_THREAD_ID) return (EINVAL); if ((evp->sigev_notify == SIGEV_SIGNAL || evp->sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(evp->sigev_signo)) return (EINVAL); } if (p->p_itimers == NULL) itimers_alloc(p); it = uma_zalloc(itimer_zone, M_WAITOK); it->it_flags = 0; it->it_usecount = 0; it->it_active = 0; timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); it->it_overrun = 0; it->it_overrun_last = 0; it->it_clockid = clock_id; it->it_timerid = -1; it->it_proc = p; ksiginfo_init(&it->it_ksi); it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT; error = CLOCK_CALL(clock_id, timer_create, (it)); if (error != 0) goto out; PROC_LOCK(p); if (preset_id != -1) { KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id")); id = preset_id; if (p->p_itimers->its_timers[id] != NULL) { PROC_UNLOCK(p); error = 0; goto out; } } else { /* * Find a free timer slot, skipping those reserved * for setitimer(). */ for (id = 3; id < TIMER_MAX; id++) if (p->p_itimers->its_timers[id] == NULL) break; if (id == TIMER_MAX) { PROC_UNLOCK(p); error = EAGAIN; goto out; } } it->it_timerid = id; p->p_itimers->its_timers[id] = it; if (evp != NULL) it->it_sigev = *evp; else { it->it_sigev.sigev_notify = SIGEV_SIGNAL; switch (clock_id) { default: case CLOCK_REALTIME: it->it_sigev.sigev_signo = SIGALRM; break; case CLOCK_VIRTUAL: it->it_sigev.sigev_signo = SIGVTALRM; break; case CLOCK_PROF: it->it_sigev.sigev_signo = SIGPROF; break; } it->it_sigev.sigev_value.sival_int = id; } if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { it->it_ksi.ksi_signo = it->it_sigev.sigev_signo; it->it_ksi.ksi_code = SI_TIMER; it->it_ksi.ksi_value = it->it_sigev.sigev_value; it->it_ksi.ksi_timerid = id; } PROC_UNLOCK(p); *timerid = id; return (0); out: ITIMER_LOCK(it); CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); uma_zfree(itimer_zone, it); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_delete_args { int timerid; }; #endif int sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) { return (kern_ktimer_delete(td, uap->timerid)); } static struct itimer * itimer_find(struct proc *p, int timerid) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_itimers == NULL) || (timerid < 0) || (timerid >= TIMER_MAX) || (it = p->p_itimers->its_timers[timerid]) == NULL) { return (NULL); } ITIMER_LOCK(it); if ((it->it_flags & ITF_DELETING) != 0) { ITIMER_UNLOCK(it); it = NULL; } return (it); } int kern_ktimer_delete(struct thread *td, int timerid) { struct proc *p = td->td_proc; struct itimer *it; PROC_LOCK(p); it = itimer_find(p, timerid); if (it == NULL) { PROC_UNLOCK(p); return (EINVAL); } PROC_UNLOCK(p); it->it_flags |= ITF_DELETING; while (it->it_usecount > 0) { it->it_flags |= ITF_WANTED; msleep(it, &it->it_mtx, PPAUSE, "itimer", 0); } it->it_flags &= ~ITF_WANTED; CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); PROC_LOCK(p); if (KSI_ONQ(&it->it_ksi)) sigqueue_take(&it->it_ksi); p->p_itimers->its_timers[timerid] = NULL; PROC_UNLOCK(p); uma_zfree(itimer_zone, it); return (0); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_settime_args { int timerid; int flags; const struct itimerspec * value; struct itimerspec * ovalue; }; #endif int sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) { struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val, sizeof(val)); if (error != 0) return (error); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) error = copyout(ovalp, uap->ovalue, sizeof(*ovalp)); return (error); } int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_settime, (it, flags, val, oval)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_gettime_args { int timerid; struct itimerspec * value; }; #endif int sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) { struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) error = copyout(&val, uap->value, sizeof(val)); return (error); } int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct timer_getoverrun_args { int timerid; }; #endif int sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap) { return (kern_ktimer_getoverrun(td, uap->timerid)); } int kern_ktimer_getoverrun(struct thread *td, int timer_id) { struct proc *p = td->td_proc; struct itimer *it; int error ; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { td->td_retval[0] = it->it_overrun_last; ITIMER_UNLOCK(it); PROC_UNLOCK(p); error = 0; } return (error); } static int realtimer_create(struct itimer *it) { callout_init_mtx(&it->it_callout, &it->it_mtx, 0); return (0); } static int realtimer_delete(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); /* * clear timer's value and interval to tell realtimer_expire * to not rearm the timer. */ timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); ITIMER_UNLOCK(it); callout_drain(&it->it_callout); ITIMER_LOCK(it); return (0); } static int realtimer_gettime(struct itimer *it, struct itimerspec *ovalue) { struct timespec cts; mtx_assert(&it->it_mtx, MA_OWNED); realtimer_clocktime(it->it_clockid, &cts); *ovalue = it->it_time; if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) { timespecsub(&ovalue->it_value, &cts, &ovalue->it_value); if (ovalue->it_value.tv_sec < 0 || (ovalue->it_value.tv_sec == 0 && ovalue->it_value.tv_nsec == 0)) { ovalue->it_value.tv_sec = 0; ovalue->it_value.tv_nsec = 1; } } return (0); } static int realtimer_settime(struct itimer *it, int flags, struct itimerspec *value, struct itimerspec *ovalue) { struct timespec cts, ts; struct timeval tv; struct itimerspec val; mtx_assert(&it->it_mtx, MA_OWNED); val = *value; if (itimespecfix(&val.it_value)) return (EINVAL); if (timespecisset(&val.it_value)) { if (itimespecfix(&val.it_interval)) return (EINVAL); } else { timespecclear(&val.it_interval); } if (ovalue != NULL) realtimer_gettime(it, ovalue); it->it_time = val; if (timespecisset(&val.it_value)) { realtimer_clocktime(it->it_clockid, &cts); ts = val.it_value; if ((flags & TIMER_ABSTIME) == 0) { /* Convert to absolute time. */ timespecadd(&it->it_time.it_value, &cts, &it->it_time.it_value); } else { timespecsub(&ts, &cts, &ts); /* * We don't care if ts is negative, tztohz will * fix it. */ } TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } else { callout_stop(&it->it_callout); } return (0); } static void realtimer_clocktime(clockid_t id, struct timespec *ts) { if (id == CLOCK_REALTIME) getnanotime(ts); else /* CLOCK_MONOTONIC */ getnanouptime(ts); } int itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); it = itimer_find(p, timerid); if (it != NULL) { ksi->ksi_overrun = it->it_overrun; it->it_overrun_last = it->it_overrun; it->it_overrun = 0; ITIMER_UNLOCK(it); return (0); } return (EINVAL); } int itimespecfix(struct timespec *ts) { if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) return (EINVAL); if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000) ts->tv_nsec = tick * 1000; return (0); } /* Timeout callback for realtime timer */ static void realtimer_expire(void *arg) { struct timespec cts, ts; struct timeval tv; struct itimer *it; it = (struct itimer *)arg; realtimer_clocktime(it->it_clockid, &cts); /* Only fire if time is reached. */ if (timespeccmp(&cts, &it->it_time.it_value, >=)) { if (timespecisset(&it->it_time.it_interval)) { timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &it->it_time.it_value); while (timespeccmp(&cts, &it->it_time.it_value, >=)) { if (it->it_overrun < INT_MAX) it->it_overrun++; else it->it_ksi.ksi_errno = ERANGE; timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &it->it_time.it_value); } } else { /* single shot timer ? */ timespecclear(&it->it_time.it_value); } if (timespecisset(&it->it_time.it_value)) { timespecsub(&it->it_time.it_value, &cts, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } itimer_enter(it); ITIMER_UNLOCK(it); itimer_fire(it); ITIMER_LOCK(it); itimer_leave(it); } else if (timespecisset(&it->it_time.it_value)) { ts = it->it_time.it_value; timespecsub(&ts, &cts, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } } void itimer_fire(struct itimer *it) { struct proc *p = it->it_proc; struct thread *td; if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { if (sigev_findtd(p, &it->it_sigev, &td) != 0) { ITIMER_LOCK(it); timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); callout_stop(&it->it_callout); ITIMER_UNLOCK(it); return; } if (!KSI_ONQ(&it->it_ksi)) { it->it_ksi.ksi_errno = 0; ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev); tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi); } else { if (it->it_overrun < INT_MAX) it->it_overrun++; else it->it_ksi.ksi_errno = ERANGE; } PROC_UNLOCK(p); } } static void itimers_alloc(struct proc *p) { struct itimers *its; int i; its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO); LIST_INIT(&its->its_virtual); LIST_INIT(&its->its_prof); TAILQ_INIT(&its->its_worklist); for (i = 0; i < TIMER_MAX; i++) its->its_timers[i] = NULL; PROC_LOCK(p); if (p->p_itimers == NULL) { p->p_itimers = its; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); free(its, M_SUBPROC); } } static void itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp __unused) { itimers_event_hook_exit(arg, p); } /* Clean up timers when some process events are being triggered. */ static void itimers_event_hook_exit(void *arg, struct proc *p) { struct itimers *its; struct itimer *it; int event = (int)(intptr_t)arg; int i; if (p->p_itimers != NULL) { its = p->p_itimers; for (i = 0; i < MAX_CLOCKS; ++i) { if (posix_clocks[i].event_hook != NULL) CLOCK_CALL(i, event_hook, (p, i, event)); } /* * According to susv3, XSI interval timers should be inherited * by new image. */ if (event == ITIMER_EV_EXEC) i = 3; else if (event == ITIMER_EV_EXIT) i = 0; else panic("unhandled event"); for (; i < TIMER_MAX; ++i) { if ((it = its->its_timers[i]) != NULL) kern_ktimer_delete(curthread, i); } if (its->its_timers[0] == NULL && its->its_timers[1] == NULL && its->its_timers[2] == NULL) { free(its, M_SUBPROC); p->p_itimers = NULL; } } } Index: projects/runtime-coverage-v2/sys/mips/conf/ERL =================================================================== --- projects/runtime-coverage-v2/sys/mips/conf/ERL (revision 347075) +++ projects/runtime-coverage-v2/sys/mips/conf/ERL (revision 347076) @@ -1,214 +1,215 @@ # # ERL - EdgeRouter Lite kernel config # Based on configuration from http://rtfm.net/FreeBSD/ERL # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ ident ERL makeoptions ARCH_FLAGS="-march=octeon+ -mabi=64" makeoptions LDSCRIPT_NAME=ldscript.mips.octeon1 makeoptions KERNLOADADDR=0xffffffff80100000 # We don't need to build a trampolined version of the kernel. makeoptions WITHOUT_KERNEL_TRAMPOLINE=1 include "../cavium/std.octeon1" hints "OCTEON1.hints" #Default places to look for devices. makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols # Board-specific support that cannot be auto-detected at runtime. #options OCTEON_VENDOR_LANNER # Support for Lanner boards. #options OCTEON_VENDOR_RADISYS # Support for Radisys boards. options OCTEON_VENDOR_UBIQUITI # Support for Ubiquiti boards. #options OCTEON_VENDOR_GEFES # Support for GE LANIC boards #options OCTEON_BOARD_CAPK_0100ND # Support for CAPK-0100nd. # Compile for a specified Octeon model. If not specified, support for # detection at runtime will be used instead, which may give inferior # performance. # # See sys/contrib/octeon-sdk/octeon-model.h for possible values. options OCTEON_MODEL=OCTEON_CN50XX_PASS1 options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options TCP_HHOOK # hhook(9) framework for TCP options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD32 # Compatible with o32 binaries options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options TMPFS # Temporary file system options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities # Debugging for use in -current #options KDB # Enable kernel debugger support. options DDB # Support DDB. #options GDB # Support remote GDB. #options DEADLKRES # Enable the deadlock resolver #options INVARIANTS # Enable calls of extra sanity checking #options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS #options WITNESS # Enable checks to detect deadlocks and cycles #options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed #options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel options ROOTDEVNAME=\"ufs:da0s2a\" # Default root filesystem. # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) # Serial (COM) ports device uart # Generic UART driver # On-board Cavium Octeon Ethernet. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device octe # Cavium Octeon management Ethernet. device octm # Switch PHY support for the octe driver. These currently present a VLAN per # physical port, but may eventually provide support for DSA or similar instead. #device mv88e61xxphy # Marvell 88E61XX # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm #device ath # Atheros NIC's #device ath_pci # Atheros pci/cardbus glue #device ath_hal # pci/cardbus chip support #device ath_rate_sample # SampleRate tx rate control for ath # Pseudo devices. device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # Hardware watchdog support. #device octeon_wdog # Octeon hardware watchdog # USB support options USB_DEBUG # enable debug msgs device octusb # Cavium Octeon on-board USB interface (USB 2.0) device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices device uhid # "Human Interface Devices" device ulpt # Printer device umass # Disks/Mass storage - Requires scbus and da device ums # Mouse device urio # Diamond Rio 500 MP3 player # USB Serial devices device u3g # USB-based 3G modems (Option, Huawei, Sierra) device uark # Technologies ARK3116 based serial adapters device ubsa # Belkin F5U103 and compatible serial adapters device uftdi # For FTDI usb serial adapters device uipaq # Some WinCE based devices device uplcom # Prolific PL-2303 serial adapters device uslcom # SI Labs CP2101/CP2102 serial adapters device uvisor # Visor and Palm devices device uvscom # USB serial support for DDI pocket's PHS # USB Ethernet, requires miibus device miibus # MII bus support device aue # ADMtek USB Ethernet device axe # ASIX Electronics USB Ethernet device cdce # Generic USB over Ethernet device cue # CATC USB Ethernet device kue # Kawasaki LSI USB Ethernet device rue # RealTek RTL8150 USB Ethernet device udav # Davicom DM9601E USB # USB Wireless device rum # Ralink Technology RT2501USB wireless NICs device uath # Atheros AR5523 wireless NICs device ural # Ralink Technology RT2500USB wireless NICs device zyd # ZyDAS zd1211/zd1211b wireless NICs # crypto subsystem device crypto # core crypto support (required for IPSEC) device cryptodev # /dev/crypto for access to h/w device cryptocteon # Octeon coprocessor 2 crypto offload # GPIO support #device gpio # PMC support #device hwpmc Index: projects/runtime-coverage-v2/sys/mips/conf/JZ4780 =================================================================== --- projects/runtime-coverage-v2/sys/mips/conf/JZ4780 (revision 347075) +++ projects/runtime-coverage-v2/sys/mips/conf/JZ4780 (revision 347076) @@ -1,113 +1,114 @@ # JZ4780 -- Kernel config for Ingenic JZ47XX boards # # $FreeBSD$ #NO_UNIVERSE # Note: SMP on 32-bit mips is no longer supported, which affects this config file. ident JZ4780 machine mips mipselhf cpu CPU_XBURST cpu CPU_MIPS4KC makeoptions KERNLOADADDR=0x80020000 makeoptions ARCH_FLAGS="-EL -march=mips32r2" # Don't build any modules yet. makeoptions MODULES_OVERRIDE="" files "../ingenic/files.jz4780" hints "JZ4780.hints" #Default places to look for devices. makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols options INTRNG # Borrow interrupt code from ARM options MIPS_NIRQ=264 # 8 cpuintc + 64 intc + 6 * 23 gpio options DDB options KDB options BREAK_TO_DEBUGGER options COMPAT_FREEBSD10 options COMPAT_FREEBSD11 +options COMPAT_FREEBSD12 options SCHED_4BSD #4BSD scheduler options INET #InterNETworking options NFSCL #Network Filesystem Client options NFS_ROOT #NFS usable as /, requires NFSCL options NFSLOCKD #Network Lock Manager options PSEUDOFS #Pseudo-filesystem framework options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories #options ROOTDEVNAME=\"ufs:ada0\" options GEOM_LABEL # Provides labelization options GEOM_PART_GPT # GUID Partition Tables. #options GEOM_RAID # Soft RAID functionality. # Debugging for use in -current #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS #options WITNESS #Enable checks to detect deadlocks and cycles #options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel device loop device ether #device le device miibus device bpf device md device uart device random device fdt_pinctrl device clk device regulator options EXT_RESOURCES device gpio device scbus device da device mmc device mmcsd device dme device iic device iicbus # Framebuffer console support device vt device kbdmux device hdmi device videomode device pty # USB support options USB_DEBUG # enable debug msgs options USB_HOST_ALIGN=128 # L2 cache line size device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device dwcotg # DesignWare HS OTG controller device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices device uhid # "Human Interface Devices" device ukbd # Allow keyboard like HIDs to control console #device ulpt # Printer device umass # Disks/Mass storage - Requires scbus and da device ums # Mouse # FDT support options FDT Index: projects/runtime-coverage-v2/sys/mips/conf/X1000 =================================================================== --- projects/runtime-coverage-v2/sys/mips/conf/X1000 (revision 347075) +++ projects/runtime-coverage-v2/sys/mips/conf/X1000 (revision 347076) @@ -1,95 +1,96 @@ # X1000 -- Kernel config for Ingenic X1000 boards # # $FreeBSD$ #NO_UNIVERSE ident X1000 machine mips mipsel cpu CPU_XBURST cpu CPU_MIPS4KC makeoptions KERNLOADADDR=0x80020000 makeoptions ARCH_FLAGS="-EL -march=mips32r2" # Don't build any modules yet. makeoptions MODULES_OVERRIDE="" files "../ingenic/files.x1000" hints "X1000.hints" #Default places to look for devices. makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols options INTRNG # Borrow interrupt code from ARM options MIPS_NIRQ=264 # 8 cpuintc + 64 intc + 6 * 23 gpio options DDB options KDB options BREAK_TO_DEBUGGER options COMPAT_FREEBSD10 options COMPAT_FREEBSD11 +options COMPAT_FREEBSD12 options SCHED_4BSD #4BSD scheduler options INET #InterNETworking options NFSCL #Network Filesystem Client options NFS_ROOT #NFS usable as /, requires NFSCL options NFSLOCKD #Network Lock Manager options PSEUDOFS #Pseudo-filesystem framework options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories #options ROOTDEVNAME=\"ufs:ada0\" options GEOM_LABEL # Provides labelization options GEOM_PART_GPT # GUID Partition Tables. #options GEOM_RAID # Soft RAID functionality. # Debugging for use in -current #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS #options WITNESS #Enable checks to detect deadlocks and cycles #options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed device loop device ether #device le device miibus device bpf device md device uart device random device fdt_pinctrl device clk device regulator options EXT_RESOURCES device gpio device scbus device da device mmc device mmcsd # USB support #options USB_DEBUG # enable debug msgs #options USB_HOST_ALIGN=128 # L2 cache line size #device ohci # OHCI PCI->USB interface #device ehci # EHCI PCI->USB interface (USB 2.0) #device dwcotg # DesignWare HS OTG controller #device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices #device uhid # "Human Interface Devices" #device ulpt # Printer #device umass # Disks/Mass storage - Requires scbus and da #device ums # Mouse # FDT support options FDT Index: projects/runtime-coverage-v2/sys/mips/conf/std.BERI =================================================================== --- projects/runtime-coverage-v2/sys/mips/conf/std.BERI (revision 347075) +++ projects/runtime-coverage-v2/sys/mips/conf/std.BERI (revision 347076) @@ -1,64 +1,65 @@ # # BERI_TEMPLATE -- a template kernel configuration for the SRI/Cambridge # "BERI" (Bluespec Extensible RISC Implementation) FPGA soft core CPU. This # kernel configuration file will be included by other board-specific files, # and so contains only BERI features common across all board targets. # # $FreeBSD$ # machine mips mips64 cpu CPU_BERI options HZ=200 makeoptions ARCH_FLAGS="-march=mips64 -mabi=64" makeoptions KERNLOADADDR=0xffffffff80100000 include "../beri/std.beri" makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions MODULES_OVERRIDE="" options DDB options KDB options ALT_BREAK_TO_DEBUGGER options KTRACE options CAPABILITY_MODE options CAPABILITIES options COMPAT_FREEBSD10 options COMPAT_FREEBSD11 +options COMPAT_FREEBSD12 options INTRNG options SCHED_ULE options FFS #Berkeley Fast Filesystem options INET options INET6 options TCP_HHOOK # hhook(9) framework for TCP options KGSSAPI options NFSCL options NFSLOCKD options NFS_ROOT # Debugging for use in -current #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS #options WITNESS #Enable checks to detect deadlocks and cycles #options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed device crypto device cryptodev device ether device geom_map device loop device md device random device snp Index: projects/runtime-coverage-v2/sys/modules/dtb/allwinner/Makefile =================================================================== --- projects/runtime-coverage-v2/sys/modules/dtb/allwinner/Makefile (revision 347075) +++ projects/runtime-coverage-v2/sys/modules/dtb/allwinner/Makefile (revision 347076) @@ -1,58 +1,63 @@ # $FreeBSD$ # All the dts files for allwinner systems we support. .if ${MACHINE_ARCH} == "armv7" DTS= \ sun4i-a10-cubieboard.dts \ sun4i-a10-olinuxino-lime.dts \ sun6i-a31s-sinovoip-bpi-m2.dts \ sun5i-a13-olinuxino.dts \ sun5i-r8-chip.dts \ sun7i-a20-bananapi.dts \ sun7i-a20-cubieboard2.dts \ sun7i-a20-lamobo-r1.dts \ sun7i-a20-olimex-som-evb.dts \ sun7i-a20-pcduino3.dts \ sun8i-a83t-bananapi-m3.dts \ sun8i-h2-plus-orangepi-r1.dts \ sun8i-h2-plus-orangepi-zero.dts \ sun8i-h3-nanopi-m1.dts \ sun8i-h3-nanopi-m1-plus.dts \ sun8i-h3-nanopi-neo.dts \ sun8i-h3-orangepi-one.dts \ sun8i-h3-orangepi-pc.dts \ sun8i-h3-orangepi-plus2e.dts DTSO= sun8i-a83t-sid.dtso \ sun8i-h3-sid.dtso LINKS= \ ${DTBDIR}/sun4i-a10-cubieboard.dtb ${DTBDIR}/cubieboard.dtb \ ${DTBDIR}/sun4i-a10-olinuxino-lime.dtb ${DTBDIR}/olinuxino-lime.dtb \ ${DTBDIR}/sun6i-a31s-sinovoip-bpi-m2.dtb ${DTBDIR}/bananapim2.dtb \ ${DTBDIR}/sun7i-a20-bananapi.dtb ${DTBDIR}/bananapi.dtb \ ${DTBDIR}/sun7i-a20-cubieboard2.dtb ${DTBDIR}/cubieboard2.dtb \ ${DTBDIR}/sun7i-a20-olimex-som-evb.dtb ${DTBDIR}/olimex-a20-som-evb.dtb \ ${DTBDIR}/sun7i-a20-pcduino3.dtb ${DTBDIR}/pcduino3.dtb \ ${DTBDIR}/sun8i-a83t-bananapi-m3.dtb ${DTBDIR}/sinovoip-bpi-m3.dtb \ ${DTBDIR}/sun8i-a83t-bananapi-m3.dtb ${DTBDIR}/sun8i-a83t-sinovoip-bpi-m3.dtb .elif ${MACHINE_ARCH} == "aarch64" DTS= \ allwinner/sun50i-a64-nanopi-a64.dts \ allwinner/sun50i-a64-olinuxino.dts \ allwinner/sun50i-a64-pine64-lts.dts \ allwinner/sun50i-a64-pine64-plus.dts \ allwinner/sun50i-a64-pine64.dts \ allwinner/sun50i-a64-sopine-baseboard.dts \ - allwinner/sun50i-h5-orangepi-pc2.dts + allwinner/sun50i-h5-orangepi-pc2.dts \ + allwinner/sun50i-h5-nanopi-neo2.dts DTSO= sun50i-a64-opp.dtso \ sun50i-a64-pwm.dtso \ sun50i-a64-rpwm.dtso \ sun50i-a64-sid.dtso \ sun50i-a64-ths.dtso \ - sun50i-a64-timer.dtso + sun50i-a64-timer.dtso \ + sun50i-h5-opp.dtso \ + sun50i-h5-sid.dtso \ + sun50i-h5-ths.dtso \ + sun50i-h5-nanopi-neo2-opp.dtso .endif .include Index: projects/runtime-coverage-v2/sys/modules/dtb/rockchip/Makefile =================================================================== --- projects/runtime-coverage-v2/sys/modules/dtb/rockchip/Makefile (revision 347075) +++ projects/runtime-coverage-v2/sys/modules/dtb/rockchip/Makefile (revision 347076) @@ -1,7 +1,7 @@ # $FreeBSD$ -# For now only for rk3328-rock64 dts file. DTS= \ - rockchip/rk3328-rock64.dts + rockchip/rk3328-rock64.dts \ + rockchip/rk3399-rockpro64.dts .include Index: projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.c =================================================================== --- projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.c (revision 347075) +++ projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.c (revision 347076) @@ -1,2178 +1,2218 @@ /* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c)2005 YAMAMOTO Takashi, * Copyright (c)2008 Andrew Thompson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ratelimit.h" #include #include #include #include #include #include #include /* hz */ #include /* for net/if.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * actor system priority and port priority. * XXX should be configurable. */ #define LACP_SYSTEM_PRIO 0x8000 #define LACP_PORT_PRIO 0x8000 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; static const struct tlv_template lacp_info_tlv_template[] = { { LACP_TYPE_ACTORINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, { LACP_TYPE_PARTNERINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, { LACP_TYPE_COLLECTORINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) }, { 0, 0 }, }; static const struct tlv_template marker_info_tlv_template[] = { { MARKER_TYPE_INFO, sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, { 0, 0 }, }; static const struct tlv_template marker_response_tlv_template[] = { { MARKER_TYPE_RESPONSE, sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, { 0, 0 }, }; typedef void (*lacp_timer_func_t)(struct lacp_port *); static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *); static void lacp_fill_markerinfo(struct lacp_port *, struct lacp_markerinfo *); static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *); static void lacp_suppress_distributing(struct lacp_softc *, struct lacp_aggregator *); static void lacp_transit_expire(void *); static void lacp_update_portmap(struct lacp_softc *); static void lacp_select_active_aggregator(struct lacp_softc *); static uint16_t lacp_compose_key(struct lacp_port *); static int tlv_check(const void *, size_t, const struct tlvhdr *, const struct tlv_template *, boolean_t); static void lacp_tick(void *); static void lacp_fill_aggregator_id(struct lacp_aggregator *, const struct lacp_port *); static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *, const struct lacp_peerinfo *); static int lacp_aggregator_is_compatible(const struct lacp_aggregator *, const struct lacp_port *); static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *, const struct lacp_peerinfo *); static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *, struct lacp_port *); static void lacp_aggregator_addref(struct lacp_softc *, struct lacp_aggregator *); static void lacp_aggregator_delref(struct lacp_softc *, struct lacp_aggregator *); /* receive machine */ static int lacp_pdu_input(struct lacp_port *, struct mbuf *); static int lacp_marker_input(struct lacp_port *, struct mbuf *); static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_timer(struct lacp_port *); static void lacp_sm_rx_set_expired(struct lacp_port *); static void lacp_sm_rx_update_ntt(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_record_pdu(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_update_selected(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_record_default(struct lacp_port *); static void lacp_sm_rx_update_default_selected(struct lacp_port *); static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *, const struct lacp_peerinfo *); /* mux machine */ static void lacp_sm_mux(struct lacp_port *); static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state); static void lacp_sm_mux_timer(struct lacp_port *); /* periodic transmit machine */ static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t); static void lacp_sm_ptx_tx_schedule(struct lacp_port *); static void lacp_sm_ptx_timer(struct lacp_port *); /* transmit machine */ static void lacp_sm_tx(struct lacp_port *); static void lacp_sm_assert_ntt(struct lacp_port *); static void lacp_run_timers(struct lacp_port *); static int lacp_compare_peerinfo(const struct lacp_peerinfo *, const struct lacp_peerinfo *); static int lacp_compare_systemid(const struct lacp_systemid *, const struct lacp_systemid *); static void lacp_port_enable(struct lacp_port *); static void lacp_port_disable(struct lacp_port *); static void lacp_select(struct lacp_port *); static void lacp_unselect(struct lacp_port *); static void lacp_disable_collecting(struct lacp_port *); static void lacp_enable_collecting(struct lacp_port *); static void lacp_disable_distributing(struct lacp_port *); static void lacp_enable_distributing(struct lacp_port *); static int lacp_xmit_lacpdu(struct lacp_port *); static int lacp_xmit_marker(struct lacp_port *); /* Debugging */ static void lacp_dump_lacpdu(const struct lacpdu *); static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, size_t); static const char *lacp_format_lagid(const struct lacp_peerinfo *, const struct lacp_peerinfo *, char *, size_t); static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *, char *, size_t); static const char *lacp_format_state(uint8_t, char *, size_t); static const char *lacp_format_mac(const uint8_t *, char *, size_t); static const char *lacp_format_systemid(const struct lacp_systemid *, char *, size_t); static const char *lacp_format_portid(const struct lacp_portid *, char *, size_t); static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); VNET_DEFINE_STATIC(int, lacp_debug); #define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1; SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0, "LACP strict protocol compliance default"); #define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; } #define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } #define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. * XXX should be configurable. */ static const struct lacp_peerinfo lacp_partner_admin_optimistic = { .lip_systemid = { .lsi_prio = 0xffff }, .lip_portid = { .lpi_prio = 0xffff }, .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION | LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING, }; static const struct lacp_peerinfo lacp_partner_admin_strict = { .lip_systemid = { .lsi_prio = 0xffff }, .lip_portid = { .lpi_prio = 0xffff }, .lip_state = 0, }; static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer, [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer, [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer, }; struct mbuf * lacp_input(struct lagg_port *lgp, struct mbuf *m) { struct lacp_port *lp = LACP_PORT(lgp); uint8_t subtype; if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) { m_freem(m); return (NULL); } m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype); switch (subtype) { case SLOWPROTOCOLS_SUBTYPE_LACP: lacp_pdu_input(lp, m); return (NULL); case SLOWPROTOCOLS_SUBTYPE_MARKER: lacp_marker_input(lp, m); return (NULL); } /* Not a subtype we are interested in */ return (m); } /* * lacp_pdu_input: process lacpdu */ static int lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) { struct lacp_softc *lsc = lp->lp_lsc; struct lacpdu *du; int error = 0; if (m->m_pkthdr.len != sizeof(*du)) { goto bad; } if ((m->m_flags & M_MCAST) == 0) { goto bad; } if (m->m_len < sizeof(*du)) { m = m_pullup(m, sizeof(*du)); if (m == NULL) { return (ENOMEM); } } du = mtod(m, struct lacpdu *); if (memcmp(&du->ldu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { goto bad; } /* * ignore the version for compatibility with * the future protocol revisions. */ #if 0 if (du->ldu_sph.sph_version != 1) { goto bad; } #endif /* * ignore tlv types for compatibility with * the future protocol revisions. */ if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor, lacp_info_tlv_template, FALSE)) { goto bad; } if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu receive\n"); lacp_dump_lacpdu(du); } if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) { LACP_TPRINTF((lp, "Dropping RX PDU\n")); goto bad; } LACP_LOCK(lsc); lacp_sm_rx(lp, du); LACP_UNLOCK(lsc); m_freem(m); return (error); bad: m_freem(m); return (EINVAL); } static void lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO); memcpy(&info->lip_systemid.lsi_mac, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO); info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index); info->lip_state = lp->lp_state; } static void lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info) { struct ifnet *ifp = lp->lp_ifp; /* Fill in the port index and system id (encoded as the MAC) */ info->mi_rq_port = htons(ifp->if_index); memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN); info->mi_rq_xid = htonl(0); } static int lacp_xmit_lacpdu(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct mbuf *m; struct lacpdu *du; int error; LACP_LOCK_ASSERT(lp->lp_lsc); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*du); du = mtod(m, struct lacpdu *); memset(du, 0, sizeof(*du)); memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW); du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; du->ldu_sph.sph_version = 1; TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor)); du->ldu_actor = lp->lp_actor; TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO, sizeof(du->ldu_partner)); du->ldu_partner = lp->lp_partner; TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO, sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu transmit\n"); lacp_dump_lacpdu(du); } m->m_flags |= M_MCAST; /* * XXX should use higher priority queue. * otherwise network congestion can break aggregation. */ error = lagg_enqueue(lp->lp_ifp, m); return (error); } static int lacp_xmit_marker(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct mbuf *m; struct markerdu *mdu; int error; LACP_LOCK_ASSERT(lp->lp_lsc); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*mdu); mdu = mtod(m, struct markerdu *); memset(mdu, 0, sizeof(*mdu)); memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW); mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER; mdu->mdu_sph.sph_version = 1; /* Bump the transaction id and copy over the marker info */ lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1); TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info)); mdu->mdu_info = lp->lp_marker; LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n", ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", ntohl(mdu->mdu_info.mi_rq_xid))); m->m_flags |= M_MCAST; error = lagg_enqueue(lp->lp_ifp, m); return (error); } void lacp_linkstate(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; struct ifnet *ifp = lgp->lp_ifp; struct ifmediareq ifmr; int error = 0; u_int media; uint8_t old_state; uint16_t old_key; bzero((char *)&ifmr, sizeof(ifmr)); error = (*ifp->if_ioctl)(ifp, SIOCGIFXMEDIA, (caddr_t)&ifmr); if (error != 0) { bzero((char *)&ifmr, sizeof(ifmr)); error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); } if (error != 0) return; LACP_LOCK(lsc); media = ifmr.ifm_active; LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, " "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER, (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP)); old_state = lp->lp_state; old_key = lp->lp_key; lp->lp_media = media; /* * If the port is not an active full duplex Ethernet link then it can * not be aggregated. */ if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 || ifp->if_link_state != LINK_STATE_UP) { lacp_port_disable(lp); } else { lacp_port_enable(lp); } lp->lp_key = lacp_compose_key(lp); if (old_state != lp->lp_state || old_key != lp->lp_key) { LACP_DPRINTF((lp, "-> UNSELECTED\n")); lp->lp_selected = LACP_UNSELECTED; } LACP_UNLOCK(lsc); } static void lacp_tick(void *arg) { struct lacp_softc *lsc = arg; struct lacp_port *lp; LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) continue; CURVNET_SET(lp->lp_ifp->if_vnet); lacp_run_timers(lp); lacp_select(lp); lacp_sm_mux(lp); lacp_sm_tx(lp); lacp_sm_ptx_tx_schedule(lp); CURVNET_RESTORE(); } callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } int lacp_port_create(struct lagg_port *lgp) { struct lagg_softc *sc = lgp->lp_softc; struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_port *lp; struct ifnet *ifp = lgp->lp_ifp; struct sockaddr_dl sdl; struct ifmultiaddr *rifma = NULL; int error; link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER); sdl.sdl_alen = ETHER_ADDR_LEN; bcopy(ðermulticastaddr_slowprotocols, LLADDR(&sdl), ETHER_ADDR_LEN); error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); if (error) { printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifp->if_xname); return (error); } lp = malloc(sizeof(struct lacp_port), M_DEVBUF, M_NOWAIT|M_ZERO); if (lp == NULL) return (ENOMEM); LACP_LOCK(lsc); lgp->lp_psc = lp; lp->lp_ifp = ifp; lp->lp_lagg = lgp; lp->lp_lsc = lsc; lp->lp_ifma = rifma; LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); lacp_fill_actorinfo(lp, &lp->lp_actor); lacp_fill_markerinfo(lp, &lp->lp_marker); lp->lp_state = LACP_STATE_ACTIVITY; lp->lp_aggregator = NULL; lacp_sm_rx_set_expired(lp); LACP_UNLOCK(lsc); lacp_linkstate(lgp); return (0); } void lacp_port_destroy(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; int i; LACP_LOCK(lsc); for (i = 0; i < LACP_NTIMER; i++) { LACP_TIMER_DISARM(lp, i); } lacp_disable_collecting(lp); lacp_disable_distributing(lp); lacp_unselect(lp); LIST_REMOVE(lp, lp_next); LACP_UNLOCK(lsc); /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); free(lp, M_DEVBUF); } void lacp_req(struct lagg_softc *sc, void *data) { struct lacp_opreq *req = (struct lacp_opreq *)data; struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_aggregator *la; bzero(req, sizeof(struct lacp_opreq)); /* * If the LACP softc is NULL, return with the opreq structure full of * zeros. It is normal for the softc to be NULL while the lagg is * being destroyed. */ if (NULL == lsc) return; la = lsc->lsc_active_aggregator; LACP_LOCK(lsc); if (la != NULL) { req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio); memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->actor_key = ntohs(la->la_actor.lip_key); req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio); req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno); req->actor_state = la->la_actor.lip_state; req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio); memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->partner_key = ntohs(la->la_partner.lip_key); req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio); req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno); req->partner_state = la->la_partner.lip_state; } LACP_UNLOCK(lsc); } void lacp_portreq(struct lagg_port *lgp, void *data) { struct lacp_opreq *req = (struct lacp_opreq *)data; struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; LACP_LOCK(lsc); req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio); memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->actor_key = ntohs(lp->lp_actor.lip_key); req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio); req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno); req->actor_state = lp->lp_actor.lip_state; req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio); memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->partner_key = ntohs(lp->lp_partner.lip_key); req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio); req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno); req->partner_state = lp->lp_partner.lip_state; LACP_UNLOCK(lsc); } static void lacp_disable_collecting(struct lacp_port *lp) { LACP_DPRINTF((lp, "collecting disabled\n")); lp->lp_state &= ~LACP_STATE_COLLECTING; } static void lacp_enable_collecting(struct lacp_port *lp) { LACP_DPRINTF((lp, "collecting enabled\n")); lp->lp_state |= LACP_STATE_COLLECTING; } static void lacp_disable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) { return; } KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports")); KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports)); KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid")); LACP_DPRINTF((lp, "disable distributing on aggregator %s, " "nports %d -> %d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), la->la_nports, la->la_nports - 1)); TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); la->la_nports--; sc->sc_active = la->la_nports; if (lsc->lsc_active_aggregator == la) { lacp_suppress_distributing(lsc, la); lacp_select_active_aggregator(lsc); /* regenerate the port map, the active aggregator has changed */ lacp_update_portmap(lsc); } lp->lp_state &= ~LACP_STATE_DISTRIBUTING; if_link_state_change(sc->sc_ifp, sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void lacp_enable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) { return; } LACP_DPRINTF((lp, "enable distributing on aggregator %s, " "nports %d -> %d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), la->la_nports, la->la_nports + 1)); KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid")); TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); la->la_nports++; sc->sc_active = la->la_nports; lp->lp_state |= LACP_STATE_DISTRIBUTING; if (lsc->lsc_active_aggregator == la) { lacp_suppress_distributing(lsc, la); lacp_update_portmap(lsc); } else /* try to become the active aggregator */ lacp_select_active_aggregator(lsc); if_link_state_change(sc->sc_ifp, sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void lacp_transit_expire(void *vp) { struct lacp_softc *lsc = vp; LACP_LOCK_ASSERT(lsc); CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } void lacp_attach(struct lagg_softc *sc) { struct lacp_softc *lsc; lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_psc = lsc; lsc->lsc_softc = sc; lsc->lsc_hashkey = m_ether_tcpip_hash_init(); lsc->lsc_active_aggregator = NULL; lsc->lsc_strict_mode = VNET(lacp_default_strict_mode); LACP_LOCK_INIT(lsc); TAILQ_INIT(&lsc->lsc_aggregators); LIST_INIT(&lsc->lsc_ports); callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0); callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0); /* if the lagg is already up then do the same */ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) lacp_init(sc); } void lacp_detach(void *psc) { struct lacp_softc *lsc = (struct lacp_softc *)psc; KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), ("aggregators still active")); KASSERT(lsc->lsc_active_aggregator == NULL, ("aggregator still attached")); callout_drain(&lsc->lsc_transit_callout); callout_drain(&lsc->lsc_callout); LACP_LOCK_DESTROY(lsc); free(lsc, M_DEVBUF); } void lacp_init(struct lagg_softc *sc) { struct lacp_softc *lsc = LACP_SOFTC(sc); LACP_LOCK(lsc); callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); LACP_UNLOCK(lsc); } void lacp_stop(struct lagg_softc *sc) { struct lacp_softc *lsc = LACP_SOFTC(sc); LACP_LOCK(lsc); callout_stop(&lsc->lsc_transit_callout); callout_stop(&lsc->lsc_callout); LACP_UNLOCK(lsc); } struct lagg_port * lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) { struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_portmap *pm; struct lacp_port *lp; + struct lacp_port **map; uint32_t hash; + int count; if (__predict_false(lsc->lsc_suppress_distributing)) { LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); return (NULL); } pm = &lsc->lsc_pmap[lsc->lsc_activemap]; if (pm->pm_count == 0) { LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); return (NULL); } +#ifdef NUMA + if ((sc->sc_opts & LAGG_OPT_USE_NUMA) && + pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) { + count = pm->pm_numa[m->m_pkthdr.numa_domain].count; + if (count > 0) { + map = pm->pm_numa[m->m_pkthdr.numa_domain].map; + } else { + /* No ports on this domain; use global hash. */ + map = pm->pm_map; + count = pm->pm_count; + } + } else +#endif + { + map = pm->pm_map; + count = pm->pm_count; + } if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey); - hash %= pm->pm_count; - lp = pm->pm_map[hash]; + hash %= count; + lp = map[hash]; + KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, ("aggregated port is not distributing")); return (lp->lp_lagg); } #ifdef RATELIMIT struct lagg_port * lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid) { struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_portmap *pm; struct lacp_port *lp; uint32_t hash; if (__predict_false(lsc->lsc_suppress_distributing)) { LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); return (NULL); } pm = &lsc->lsc_pmap[lsc->lsc_activemap]; if (pm->pm_count == 0) { LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); return (NULL); } hash = flowid >> sc->flowid_shift; hash %= pm->pm_count; lp = pm->pm_map[hash]; return (lp->lp_lagg); } #endif /* * lacp_suppress_distributing: drop transmit packets for a while * to preserve packet ordering. */ static void lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) { struct lacp_port *lp; if (lsc->lsc_active_aggregator != la) { return; } LACP_TRACE(NULL); lsc->lsc_suppress_distributing = TRUE; /* send a marker frame down each port to verify the queues are empty */ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { lp->lp_flags |= LACP_PORT_MARK; lacp_xmit_marker(lp); } /* set a timeout for the marker frames */ callout_reset(&lsc->lsc_transit_callout, LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc); } static int lacp_compare_peerinfo(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b) { return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state))); } static int lacp_compare_systemid(const struct lacp_systemid *a, const struct lacp_systemid *b) { return (memcmp(a, b, sizeof(*a))); } #if 0 /* unused */ static int lacp_compare_portid(const struct lacp_portid *a, const struct lacp_portid *b) { return (memcmp(a, b, sizeof(*a))); } #endif static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *la) { struct lacp_port *lp; uint64_t speed; lp = TAILQ_FIRST(&la->la_ports); if (lp == NULL) { return (0); } speed = ifmedia_baudrate(lp->lp_media); speed *= la->la_nports; if (speed == 0) { LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n", lp->lp_media, la->la_nports)); } return (speed); } /* * lacp_select_active_aggregator: select an aggregator to be used to transmit * packets from lagg(4) interface. */ static void lacp_select_active_aggregator(struct lacp_softc *lsc) { struct lacp_aggregator *la; struct lacp_aggregator *best_la = NULL; uint64_t best_speed = 0; char buf[LACP_LAGIDSTR_MAX+1]; LACP_TRACE(NULL); TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { uint64_t speed; if (la->la_nports == 0) { continue; } speed = lacp_aggregator_bandwidth(la); LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), speed, la->la_nports)); /* * This aggregator is chosen if the partner has a better * system priority or, the total aggregated speed is higher * or, it is already the chosen aggregator */ if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) < LACP_SYS_PRI(best_la->la_partner)) || speed > best_speed || (speed == best_speed && la == lsc->lsc_active_aggregator)) { best_la = la; best_speed = speed; } } KASSERT(best_la == NULL || best_la->la_nports > 0, ("invalid aggregator refcnt")); KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports), ("invalid aggregator list")); if (lsc->lsc_active_aggregator != best_la) { LACP_DPRINTF((NULL, "active aggregator changed\n")); LACP_DPRINTF((NULL, "old %s\n", lacp_format_lagid_aggregator(lsc->lsc_active_aggregator, buf, sizeof(buf)))); } else { LACP_DPRINTF((NULL, "active aggregator not changed\n")); } LACP_DPRINTF((NULL, "new %s\n", lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); if (lsc->lsc_active_aggregator != best_la) { lsc->lsc_active_aggregator = best_la; lacp_update_portmap(lsc); if (best_la) { lacp_suppress_distributing(lsc, best_la); } } } /* * Updated the inactive portmap array with the new list of ports and * make it live. */ static void lacp_update_portmap(struct lacp_softc *lsc) { struct lagg_softc *sc = lsc->lsc_softc; struct lacp_aggregator *la; struct lacp_portmap *p; struct lacp_port *lp; uint64_t speed; u_int newmap; int i; +#ifdef NUMA + int count; + uint8_t domain; +#endif newmap = lsc->lsc_activemap == 0 ? 1 : 0; p = &lsc->lsc_pmap[newmap]; la = lsc->lsc_active_aggregator; speed = 0; bzero(p, sizeof(struct lacp_portmap)); if (la != NULL && la->la_nports > 0) { p->pm_count = la->la_nports; i = 0; - TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) + TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) { p->pm_map[i++] = lp; +#ifdef NUMA + domain = lp->lp_ifp->if_numa_domain; + if (domain >= MAXMEMDOM) + continue; + count = p->pm_numa[domain].count; + p->pm_numa[domain].map[count] = lp; + p->pm_numa[domain].count++; +#endif + } KASSERT(i == p->pm_count, ("Invalid port count")); + +#ifdef NUMA + for (i = 0; i < MAXMEMDOM; i++) { + if (p->pm_numa[i].count != 0) + p->pm_num_dom++; + } +#endif speed = lacp_aggregator_bandwidth(la); } sc->sc_ifp->if_baudrate = speed; /* switch the active portmap over */ atomic_store_rel_int(&lsc->lsc_activemap, newmap); LACP_DPRINTF((NULL, "Set table %d with %d ports\n", lsc->lsc_activemap, lsc->lsc_pmap[lsc->lsc_activemap].pm_count)); } static uint16_t lacp_compose_key(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; u_int media = lp->lp_media; uint16_t key; if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) { /* * non-aggregatable links should have unique keys. * * XXX this isn't really unique as if_index is 16 bit. */ /* bit 0..14: (some bits of) if_index of this port */ key = lp->lp_ifp->if_index; /* bit 15: 1 */ key |= 0x8000; } else { u_int subtype = IFM_SUBTYPE(media); KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type")); KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface")); /* bit 0..4: IFM_SUBTYPE modulo speed */ switch (subtype) { case IFM_10_T: case IFM_10_2: case IFM_10_5: case IFM_10_STP: case IFM_10_FL: key = IFM_10_T; break; case IFM_100_TX: case IFM_100_FX: case IFM_100_T4: case IFM_100_VG: case IFM_100_T2: case IFM_100_T: case IFM_100_SGMII: key = IFM_100_TX; break; case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: case IFM_1000_T: case IFM_1000_KX: case IFM_1000_SGMII: case IFM_1000_CX_SGMII: key = IFM_1000_SX; break; case IFM_10G_LR: case IFM_10G_SR: case IFM_10G_CX4: case IFM_10G_TWINAX: case IFM_10G_TWINAX_LONG: case IFM_10G_LRM: case IFM_10G_T: case IFM_10G_KX4: case IFM_10G_KR: case IFM_10G_CR1: case IFM_10G_ER: case IFM_10G_SFI: case IFM_10G_AOC: key = IFM_10G_LR; break; case IFM_20G_KR2: key = IFM_20G_KR2; break; case IFM_2500_KX: case IFM_2500_T: case IFM_2500_X: key = IFM_2500_KX; break; case IFM_5000_T: case IFM_5000_KR: case IFM_5000_KR_S: case IFM_5000_KR1: key = IFM_5000_T; break; case IFM_50G_PCIE: case IFM_50G_CR2: case IFM_50G_KR2: case IFM_50G_SR2: case IFM_50G_LR2: case IFM_50G_LAUI2_AC: case IFM_50G_LAUI2: case IFM_50G_AUI2_AC: case IFM_50G_AUI2: case IFM_50G_CP: case IFM_50G_SR: case IFM_50G_LR: case IFM_50G_FR: case IFM_50G_KR_PAM4: case IFM_50G_AUI1_AC: case IFM_50G_AUI1: key = IFM_50G_PCIE; break; case IFM_56G_R4: key = IFM_56G_R4; break; case IFM_25G_PCIE: case IFM_25G_CR: case IFM_25G_KR: case IFM_25G_SR: case IFM_25G_LR: case IFM_25G_ACC: case IFM_25G_AOC: case IFM_25G_T: case IFM_25G_CR_S: case IFM_25G_CR1: case IFM_25G_KR_S: case IFM_25G_AUI: case IFM_25G_KR1: key = IFM_25G_PCIE; break; case IFM_40G_CR4: case IFM_40G_SR4: case IFM_40G_LR4: case IFM_40G_XLPPI: case IFM_40G_KR4: case IFM_40G_XLAUI: case IFM_40G_XLAUI_AC: case IFM_40G_ER4: key = IFM_40G_CR4; break; case IFM_100G_CR4: case IFM_100G_SR4: case IFM_100G_KR4: case IFM_100G_LR4: case IFM_100G_CAUI4_AC: case IFM_100G_CAUI4: case IFM_100G_AUI4_AC: case IFM_100G_AUI4: case IFM_100G_CR_PAM4: case IFM_100G_KR_PAM4: case IFM_100G_CP2: case IFM_100G_SR2: case IFM_100G_DR: case IFM_100G_KR2_PAM4: case IFM_100G_CAUI2_AC: case IFM_100G_CAUI2: case IFM_100G_AUI2_AC: case IFM_100G_AUI2: key = IFM_100G_CR4; break; case IFM_200G_CR4_PAM4: case IFM_200G_SR4: case IFM_200G_FR4: case IFM_200G_LR4: case IFM_200G_DR4: case IFM_200G_KR4_PAM4: case IFM_200G_AUI4_AC: case IFM_200G_AUI4: case IFM_200G_AUI8_AC: case IFM_200G_AUI8: key = IFM_200G_CR4_PAM4; break; case IFM_400G_FR8: case IFM_400G_LR8: case IFM_400G_DR4: case IFM_400G_AUI8_AC: case IFM_400G_AUI8: key = IFM_400G_FR8; break; default: key = subtype; break; } /* bit 5..14: (some bits of) if_index of lagg device */ key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5); /* bit 15: 0 */ } return (htons(key)); } static void lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) { char buf[LACP_LAGIDSTR_MAX+1]; LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_refcnt, la->la_refcnt + 1)); KASSERT(la->la_refcnt > 0, ("refcount <= 0")); la->la_refcnt++; KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount")); } static void lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) { char buf[LACP_LAGIDSTR_MAX+1]; LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_refcnt, la->la_refcnt - 1)); KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt")); la->la_refcnt--; if (la->la_refcnt > 0) { return; } KASSERT(la->la_refcnt == 0, ("refcount not zero")); KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active")); TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q); free(la, M_DEVBUF); } /* * lacp_aggregator_get: allocate an aggregator. */ static struct lacp_aggregator * lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp) { struct lacp_aggregator *la; la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT); if (la) { la->la_refcnt = 1; la->la_nports = 0; TAILQ_INIT(&la->la_ports); la->la_pending = 0; TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q); } return (la); } /* * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port. */ static void lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp) { lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner); lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor); la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION; } static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr, const struct lacp_peerinfo *lpi_port) { memset(lpi_aggr, 0, sizeof(*lpi_aggr)); lpi_aggr->lip_systemid = lpi_port->lip_systemid; lpi_aggr->lip_key = lpi_port->lip_key; } /* * lacp_aggregator_is_compatible: check if a port can join to an aggregator. */ static int lacp_aggregator_is_compatible(const struct lacp_aggregator *la, const struct lacp_port *lp) { if (!(lp->lp_state & LACP_STATE_AGGREGATION) || !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) { return (0); } if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) { return (0); } if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) { return (0); } if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) { return (0); } return (1); } static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b) { if (memcmp(&a->lip_systemid, &b->lip_systemid, sizeof(a->lip_systemid))) { return (0); } if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) { return (0); } return (1); } static void lacp_port_enable(struct lacp_port *lp) { lp->lp_state |= LACP_STATE_AGGREGATION; } static void lacp_port_disable(struct lacp_port *lp) { lacp_set_mux(lp, LACP_MUX_DETACHED); lp->lp_state &= ~LACP_STATE_AGGREGATION; lp->lp_selected = LACP_UNSELECTED; lacp_sm_rx_record_default(lp); lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION; lp->lp_state &= ~LACP_STATE_EXPIRED; } /* * lacp_select: select an aggregator. create one if necessary. */ static void lacp_select(struct lacp_port *lp) { struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la; char buf[LACP_LAGIDSTR_MAX+1]; if (lp->lp_aggregator) { return; } /* If we haven't heard from our peer, skip this step. */ if (lp->lp_state & LACP_STATE_DEFAULTED) return; KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); LACP_DPRINTF((lp, "port lagid=%s\n", lacp_format_lagid(&lp->lp_actor, &lp->lp_partner, buf, sizeof(buf)))); TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { if (lacp_aggregator_is_compatible(la, lp)) { break; } } if (la == NULL) { la = lacp_aggregator_get(lsc, lp); if (la == NULL) { LACP_DPRINTF((lp, "aggregator creation failed\n")); /* * will retry on the next tick. */ return; } lacp_fill_aggregator_id(la, lp); LACP_DPRINTF((lp, "aggregator created\n")); } else { LACP_DPRINTF((lp, "compatible aggregator found\n")); if (la->la_refcnt == LACP_MAX_PORTS) return; lacp_aggregator_addref(lsc, la); } LACP_DPRINTF((lp, "aggregator lagid=%s\n", lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)))); lp->lp_aggregator = la; lp->lp_selected = LACP_SELECTED; } /* * lacp_unselect: finish unselect/detach process. */ static void lacp_unselect(struct lacp_port *lp) { struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la = lp->lp_aggregator; KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); if (la == NULL) { return; } lp->lp_aggregator = NULL; lacp_aggregator_delref(lsc, la); } /* mux machine */ static void lacp_sm_mux(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; enum lacp_mux_state new_state; boolean_t p_sync = (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; boolean_t p_collecting = (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0; enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; if (V_lacp_debug > 1) lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, lp->lp_mux_state, selected, p_sync, p_collecting); re_eval: la = lp->lp_aggregator; KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL, ("MUX not detached")); new_state = lp->lp_mux_state; switch (lp->lp_mux_state) { case LACP_MUX_DETACHED: if (selected != LACP_UNSELECTED) { new_state = LACP_MUX_WAITING; } break; case LACP_MUX_WAITING: KASSERT(la->la_pending > 0 || !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); if (selected == LACP_SELECTED && la->la_pending == 0) { new_state = LACP_MUX_ATTACHED; } else if (selected == LACP_UNSELECTED) { new_state = LACP_MUX_DETACHED; } break; case LACP_MUX_ATTACHED: if (selected == LACP_SELECTED && p_sync) { new_state = LACP_MUX_COLLECTING; } else if (selected != LACP_SELECTED) { new_state = LACP_MUX_DETACHED; } break; case LACP_MUX_COLLECTING: if (selected == LACP_SELECTED && p_sync && p_collecting) { new_state = LACP_MUX_DISTRIBUTING; } else if (selected != LACP_SELECTED || !p_sync) { new_state = LACP_MUX_ATTACHED; } break; case LACP_MUX_DISTRIBUTING: if (selected != LACP_SELECTED || !p_sync || !p_collecting) { new_state = LACP_MUX_COLLECTING; lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n"); sc->sc_flapping++; } break; default: panic("%s: unknown state", __func__); } if (lp->lp_mux_state == new_state) { return; } lacp_set_mux(lp, new_state); goto re_eval; } static void lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state) { struct lacp_aggregator *la = lp->lp_aggregator; if (lp->lp_mux_state == new_state) { return; } switch (new_state) { case LACP_MUX_DETACHED: lp->lp_state &= ~LACP_STATE_SYNC; lacp_disable_distributing(lp); lacp_disable_collecting(lp); lacp_sm_assert_ntt(lp); /* cancel timer */ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) { KASSERT(la->la_pending > 0, ("timer_wait_while not active")); la->la_pending--; } LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE); lacp_unselect(lp); break; case LACP_MUX_WAITING: LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE, LACP_AGGREGATE_WAIT_TIME); la->la_pending++; break; case LACP_MUX_ATTACHED: lp->lp_state |= LACP_STATE_SYNC; lacp_disable_collecting(lp); lacp_sm_assert_ntt(lp); break; case LACP_MUX_COLLECTING: lacp_enable_collecting(lp); lacp_disable_distributing(lp); lacp_sm_assert_ntt(lp); break; case LACP_MUX_DISTRIBUTING: lacp_enable_distributing(lp); break; default: panic("%s: unknown state", __func__); } LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state)); lp->lp_mux_state = new_state; } static void lacp_sm_mux_timer(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; char buf[LACP_LAGIDSTR_MAX+1]; KASSERT(la->la_pending > 0, ("no pending event")); LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_pending, la->la_pending - 1)); la->la_pending--; } /* periodic transmit machine */ static void lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate) { if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state, LACP_STATE_TIMEOUT)) { return; } LACP_DPRINTF((lp, "partner timeout changed\n")); /* * FAST_PERIODIC -> SLOW_PERIODIC * or * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC * * let lacp_sm_ptx_tx_schedule to update timeout. */ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); /* * if timeout has been shortened, assert NTT. */ if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) { lacp_sm_assert_ntt(lp); } } static void lacp_sm_ptx_tx_schedule(struct lacp_port *lp) { int timeout; if (!(lp->lp_state & LACP_STATE_ACTIVITY) && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) { /* * NO_PERIODIC */ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); return; } if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) { return; } timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ? LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME; LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout); } static void lacp_sm_ptx_timer(struct lacp_port *lp) { lacp_sm_assert_ntt(lp); } static void lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du) { int timeout; /* * check LACP_DISABLED first */ if (!(lp->lp_state & LACP_STATE_AGGREGATION)) { return; } /* * check loopback condition. */ if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid, &lp->lp_actor.lip_systemid)) { return; } /* * EXPIRED, DEFAULTED, CURRENT -> CURRENT */ lacp_sm_rx_update_selected(lp, du); lacp_sm_rx_update_ntt(lp, du); lacp_sm_rx_record_pdu(lp, du); timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ? LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME; LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout); lp->lp_state &= ~LACP_STATE_EXPIRED; /* * kick transmit machine without waiting the next tick. */ lacp_sm_tx(lp); } static void lacp_sm_rx_set_expired(struct lacp_port *lp) { lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT; LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME); lp->lp_state |= LACP_STATE_EXPIRED; } static void lacp_sm_rx_timer(struct lacp_port *lp) { if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) { /* CURRENT -> EXPIRED */ LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__)); lacp_sm_rx_set_expired(lp); } else { /* EXPIRED -> DEFAULTED */ LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__)); lacp_sm_rx_update_default_selected(lp); lacp_sm_rx_record_default(lp); lp->lp_state &= ~LACP_STATE_EXPIRED; } } static void lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) { boolean_t active; uint8_t oldpstate; char buf[LACP_STATESTR_MAX+1]; LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY) || ((lp->lp_state & LACP_STATE_ACTIVITY) && (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY)); lp->lp_partner = du->ldu_actor; if (active && ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, LACP_STATE_AGGREGATION) && !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { /* * XXX Maintain legacy behavior of leaving the * LACP_STATE_SYNC bit unchanged from the partner's * advertisement if lsc_strict_mode is false. * TODO: We should re-examine the concept of the "strict mode" * to ensure it makes sense to maintain a non-strict mode. */ if (lp->lp_lsc->lsc_strict_mode) lp->lp_partner.lip_state |= LACP_STATE_SYNC; } else { lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; } lp->lp_state &= ~LACP_STATE_DEFAULTED; if (oldpstate != lp->lp_partner.lip_state) { LACP_DPRINTF((lp, "old pstate %s\n", lacp_format_state(oldpstate, buf, sizeof(buf)))); LACP_DPRINTF((lp, "new pstate %s\n", lacp_format_state(lp->lp_partner.lip_state, buf, sizeof(buf)))); } lacp_sm_ptx_update_timeout(lp, oldpstate); } static void lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) { LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) { LACP_DPRINTF((lp, "%s: assert ntt\n", __func__)); lacp_sm_assert_ntt(lp); } } static void lacp_sm_rx_record_default(struct lacp_port *lp) { uint8_t oldpstate; LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; if (lp->lp_lsc->lsc_strict_mode) lp->lp_partner = lacp_partner_admin_strict; else lp->lp_partner = lacp_partner_admin_optimistic; lp->lp_state |= LACP_STATE_DEFAULTED; lacp_sm_ptx_update_timeout(lp, oldpstate); } static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, const struct lacp_peerinfo *info) { LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_partner, info) || !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, LACP_STATE_AGGREGATION)) { lp->lp_selected = LACP_UNSELECTED; /* mux machine will clean up lp->lp_aggregator */ } } static void lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) { LACP_TRACE(lp); lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); } static void lacp_sm_rx_update_default_selected(struct lacp_port *lp) { LACP_TRACE(lp); if (lp->lp_lsc->lsc_strict_mode) lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin_strict); else lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin_optimistic); } /* transmit machine */ static void lacp_sm_tx(struct lacp_port *lp) { int error = 0; if (!(lp->lp_state & LACP_STATE_AGGREGATION) #if 1 || (!(lp->lp_state & LACP_STATE_ACTIVITY) && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) #endif ) { lp->lp_flags &= ~LACP_PORT_NTT; } if (!(lp->lp_flags & LACP_PORT_NTT)) { return; } /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */ if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent, (3 / LACP_FAST_PERIODIC_TIME)) == 0) { LACP_DPRINTF((lp, "rate limited pdu\n")); return; } if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) { error = lacp_xmit_lacpdu(lp); } else { LACP_TPRINTF((lp, "Dropping TX PDU\n")); } if (error == 0) { lp->lp_flags &= ~LACP_PORT_NTT; } else { LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n", error)); } } static void lacp_sm_assert_ntt(struct lacp_port *lp) { lp->lp_flags |= LACP_PORT_NTT; } static void lacp_run_timers(struct lacp_port *lp) { int i; for (i = 0; i < LACP_NTIMER; i++) { KASSERT(lp->lp_timer[i] >= 0, ("invalid timer value %d", lp->lp_timer[i])); if (lp->lp_timer[i] == 0) { continue; } else if (--lp->lp_timer[i] <= 0) { if (lacp_timer_funcs[i]) { (*lacp_timer_funcs[i])(lp); } } } } int lacp_marker_input(struct lacp_port *lp, struct mbuf *m) { struct lacp_softc *lsc = lp->lp_lsc; struct lagg_port *lgp = lp->lp_lagg; struct lacp_port *lp2; struct markerdu *mdu; int error = 0; int pending = 0; if (m->m_pkthdr.len != sizeof(*mdu)) { goto bad; } if ((m->m_flags & M_MCAST) == 0) { goto bad; } if (m->m_len < sizeof(*mdu)) { m = m_pullup(m, sizeof(*mdu)); if (m == NULL) { return (ENOMEM); } } mdu = mtod(m, struct markerdu *); if (memcmp(&mdu->mdu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { goto bad; } if (mdu->mdu_sph.sph_version != 1) { goto bad; } switch (mdu->mdu_tlv.tlv_type) { case MARKER_TYPE_INFO: if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, marker_info_tlv_template, TRUE)) { goto bad; } mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE; memcpy(&mdu->mdu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); error = lagg_enqueue(lp->lp_ifp, m); break; case MARKER_TYPE_RESPONSE: if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, marker_response_tlv_template, TRUE)) { goto bad; } LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n", ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", ntohl(mdu->mdu_info.mi_rq_xid))); /* Verify that it is the last marker we sent out */ if (memcmp(&mdu->mdu_info, &lp->lp_marker, sizeof(struct lacp_markerinfo))) goto bad; LACP_LOCK(lsc); lp->lp_flags &= ~LACP_PORT_MARK; if (lsc->lsc_suppress_distributing) { /* Check if any ports are waiting for a response */ LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) { if (lp2->lp_flags & LACP_PORT_MARK) { pending = 1; break; } } if (pending == 0) { /* All interface queues are clear */ LACP_DPRINTF((NULL, "queue flush complete\n")); lsc->lsc_suppress_distributing = FALSE; } } LACP_UNLOCK(lsc); m_freem(m); break; default: goto bad; } return (error); bad: LACP_DPRINTF((lp, "bad marker frame\n")); m_freem(m); return (EINVAL); } static int tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, const struct tlv_template *tmpl, boolean_t check_type) { while (/* CONSTCOND */ 1) { if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) { return (EINVAL); } if ((check_type && tlv->tlv_type != tmpl->tmpl_type) || tlv->tlv_length != tmpl->tmpl_length) { return (EINVAL); } if (tmpl->tmpl_type == 0) { break; } tlv = (const struct tlvhdr *) ((const char *)tlv + tlv->tlv_length); tmpl++; } return (0); } /* Debugging */ const char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3], (int)mac[4], (int)mac[5]); return (buf); } const char * lacp_format_systemid(const struct lacp_systemid *sysid, char *buf, size_t buflen) { char macbuf[LACP_MACSTR_MAX+1]; snprintf(buf, buflen, "%04X,%s", ntohs(sysid->lsi_prio), lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf))); return (buf); } const char * lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen) { snprintf(buf, buflen, "%04X,%04X", ntohs(portid->lpi_prio), ntohs(portid->lpi_portno)); return (buf); } const char * lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen) { char sysid[LACP_SYSTEMIDSTR_MAX+1]; char portid[LACP_PORTIDSTR_MAX+1]; snprintf(buf, buflen, "(%s,%04X,%s)", lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)), ntohs(peer->lip_key), lacp_format_portid(&peer->lip_portid, portid, sizeof(portid))); return (buf); } const char * lacp_format_lagid(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b, char *buf, size_t buflen) { char astr[LACP_PARTNERSTR_MAX+1]; char bstr[LACP_PARTNERSTR_MAX+1]; #if 0 /* * there's a convention to display small numbered peer * in the left. */ if (lacp_compare_peerinfo(a, b) > 0) { const struct lacp_peerinfo *t; t = a; a = b; b = t; } #endif snprintf(buf, buflen, "[%s,%s]", lacp_format_partner(a, astr, sizeof(astr)), lacp_format_partner(b, bstr, sizeof(bstr))); return (buf); } const char * lacp_format_lagid_aggregator(const struct lacp_aggregator *la, char *buf, size_t buflen) { if (la == NULL) { return ("(none)"); } return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen)); } const char * lacp_format_state(uint8_t state, char *buf, size_t buflen) { snprintf(buf, buflen, "%b", state, LACP_STATE_BITS); return (buf); } static void lacp_dump_lacpdu(const struct lacpdu *du) { char buf[LACP_PARTNERSTR_MAX+1]; char buf2[LACP_STATESTR_MAX+1]; printf("actor=%s\n", lacp_format_partner(&du->ldu_actor, buf, sizeof(buf))); printf("actor.state=%s\n", lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2))); printf("partner=%s\n", lacp_format_partner(&du->ldu_partner, buf, sizeof(buf))); printf("partner.state=%s\n", lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2))); printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay)); } static void lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) { va_list va; if (lp) { printf("%s: ", lp->lp_ifp->if_xname); } va_start(va, fmt); vprintf(fmt, va); va_end(va); } Index: projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.h =================================================================== --- projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.h (revision 347075) +++ projects/runtime-coverage-v2/sys/net/ieee8023ad_lacp.h (revision 347076) @@ -1,346 +1,353 @@ /* $NetBSD: ieee8023ad_impl.h,v 1.2 2005/12/10 23:21:39 elad Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c)2005 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * IEEE802.3ad LACP * * implementation details. */ #define LACP_TIMER_CURRENT_WHILE 0 #define LACP_TIMER_PERIODIC 1 #define LACP_TIMER_WAIT_WHILE 2 #define LACP_NTIMER 3 #define LACP_TIMER_ARM(port, timer, val) \ (port)->lp_timer[(timer)] = (val) #define LACP_TIMER_DISARM(port, timer) \ (port)->lp_timer[(timer)] = 0 #define LACP_TIMER_ISARMED(port, timer) \ ((port)->lp_timer[(timer)] > 0) /* * IEEE802.3ad LACP * * protocol definitions. */ #define LACP_STATE_ACTIVITY (1<<0) #define LACP_STATE_TIMEOUT (1<<1) #define LACP_STATE_AGGREGATION (1<<2) #define LACP_STATE_SYNC (1<<3) #define LACP_STATE_COLLECTING (1<<4) #define LACP_STATE_DISTRIBUTING (1<<5) #define LACP_STATE_DEFAULTED (1<<6) #define LACP_STATE_EXPIRED (1<<7) #define LACP_PORT_NTT 0x00000001 #define LACP_PORT_MARK 0x00000002 #define LACP_STATE_BITS \ "\020" \ "\001ACTIVITY" \ "\002TIMEOUT" \ "\003AGGREGATION" \ "\004SYNC" \ "\005COLLECTING" \ "\006DISTRIBUTING" \ "\007DEFAULTED" \ "\010EXPIRED" #ifdef _KERNEL /* * IEEE802.3 slow protocols * * protocol (on-wire) definitions. * * XXX should be elsewhere. */ #define SLOWPROTOCOLS_SUBTYPE_LACP 1 #define SLOWPROTOCOLS_SUBTYPE_MARKER 2 struct slowprothdr { uint8_t sph_subtype; uint8_t sph_version; } __packed; /* * TLV on-wire structure. */ struct tlvhdr { uint8_t tlv_type; uint8_t tlv_length; /* uint8_t tlv_value[]; */ } __packed; /* * ... and our implementation. */ #define TLV_SET(tlv, type, length) \ do { \ (tlv)->tlv_type = (type); \ (tlv)->tlv_length = sizeof(*tlv) + (length); \ } while (/*CONSTCOND*/0) struct tlv_template { uint8_t tmpl_type; uint8_t tmpl_length; }; struct lacp_systemid { uint16_t lsi_prio; uint8_t lsi_mac[6]; } __packed; struct lacp_portid { uint16_t lpi_prio; uint16_t lpi_portno; } __packed; struct lacp_peerinfo { struct lacp_systemid lip_systemid; uint16_t lip_key; struct lacp_portid lip_portid; uint8_t lip_state; uint8_t lip_resv[3]; } __packed; struct lacp_collectorinfo { uint16_t lci_maxdelay; uint8_t lci_resv[12]; } __packed; struct lacpdu { struct ether_header ldu_eh; struct slowprothdr ldu_sph; struct tlvhdr ldu_tlv_actor; struct lacp_peerinfo ldu_actor; struct tlvhdr ldu_tlv_partner; struct lacp_peerinfo ldu_partner; struct tlvhdr ldu_tlv_collector; struct lacp_collectorinfo ldu_collector; struct tlvhdr ldu_tlv_term; uint8_t ldu_resv[50]; } __packed; /* * IEEE802.3ad marker protocol * * protocol (on-wire) definitions. */ struct lacp_markerinfo { uint16_t mi_rq_port; uint8_t mi_rq_system[ETHER_ADDR_LEN]; uint32_t mi_rq_xid; uint8_t mi_pad[2]; } __packed; struct markerdu { struct ether_header mdu_eh; struct slowprothdr mdu_sph; struct tlvhdr mdu_tlv; struct lacp_markerinfo mdu_info; struct tlvhdr mdu_tlv_term; uint8_t mdu_resv[90]; } __packed; #define MARKER_TYPE_INFO 0x01 #define MARKER_TYPE_RESPONSE 0x02 enum lacp_selected { LACP_UNSELECTED, LACP_STANDBY, /* not used in this implementation */ LACP_SELECTED, }; enum lacp_mux_state { LACP_MUX_DETACHED, LACP_MUX_WAITING, LACP_MUX_ATTACHED, LACP_MUX_COLLECTING, LACP_MUX_DISTRIBUTING, }; #define LACP_MAX_PORTS 32 +struct lacp_numa { + int count; + struct lacp_port *map[LACP_MAX_PORTS]; +}; + struct lacp_portmap { int pm_count; + int pm_num_dom; + struct lacp_numa pm_numa[MAXMEMDOM]; struct lacp_port *pm_map[LACP_MAX_PORTS]; }; struct lacp_port { TAILQ_ENTRY(lacp_port) lp_dist_q; LIST_ENTRY(lacp_port) lp_next; struct lacp_softc *lp_lsc; struct lagg_port *lp_lagg; struct ifnet *lp_ifp; struct lacp_peerinfo lp_partner; struct lacp_peerinfo lp_actor; struct lacp_markerinfo lp_marker; #define lp_state lp_actor.lip_state #define lp_key lp_actor.lip_key #define lp_systemid lp_actor.lip_systemid struct timeval lp_last_lacpdu; int lp_lacpdu_sent; enum lacp_mux_state lp_mux_state; enum lacp_selected lp_selected; int lp_flags; u_int lp_media; /* XXX redundant */ int lp_timer[LACP_NTIMER]; struct ifmultiaddr *lp_ifma; struct lacp_aggregator *lp_aggregator; }; struct lacp_aggregator { TAILQ_ENTRY(lacp_aggregator) la_q; int la_refcnt; /* num of ports which selected us */ int la_nports; /* num of distributing ports */ TAILQ_HEAD(, lacp_port) la_ports; /* distributing ports */ struct lacp_peerinfo la_partner; struct lacp_peerinfo la_actor; int la_pending; /* number of ports in wait_while */ }; struct lacp_softc { struct lagg_softc *lsc_softc; struct mtx lsc_mtx; struct lacp_aggregator *lsc_active_aggregator; TAILQ_HEAD(, lacp_aggregator) lsc_aggregators; boolean_t lsc_suppress_distributing; struct callout lsc_transit_callout; struct callout lsc_callout; LIST_HEAD(, lacp_port) lsc_ports; struct lacp_portmap lsc_pmap[2]; volatile u_int lsc_activemap; u_int32_t lsc_hashkey; struct { u_int32_t lsc_rx_test; u_int32_t lsc_tx_test; } lsc_debug; u_int32_t lsc_strict_mode; boolean_t lsc_fast_timeout; /* if set, fast timeout */ }; #define LACP_TYPE_ACTORINFO 1 #define LACP_TYPE_PARTNERINFO 2 #define LACP_TYPE_COLLECTORINFO 3 /* timeout values (in sec) */ #define LACP_FAST_PERIODIC_TIME (1) #define LACP_SLOW_PERIODIC_TIME (30) #define LACP_SHORT_TIMEOUT_TIME (3 * LACP_FAST_PERIODIC_TIME) #define LACP_LONG_TIMEOUT_TIME (3 * LACP_SLOW_PERIODIC_TIME) #define LACP_CHURN_DETECTION_TIME (60) #define LACP_AGGREGATE_WAIT_TIME (2) #define LACP_TRANSIT_DELAY 3000 /* in msec */ #define LACP_STATE_EQ(s1, s2, mask) \ ((((s1) ^ (s2)) & (mask)) == 0) #define LACP_SYS_PRI(peer) (peer).lip_systemid.lsi_prio #define LACP_PORT(_lp) ((struct lacp_port *)(_lp)->lp_psc) #define LACP_SOFTC(_sc) ((struct lacp_softc *)(_sc)->sc_psc) #define LACP_LOCK_INIT(_lsc) mtx_init(&(_lsc)->lsc_mtx, \ "lacp mtx", NULL, MTX_DEF) #define LACP_LOCK_DESTROY(_lsc) mtx_destroy(&(_lsc)->lsc_mtx) #define LACP_LOCK(_lsc) mtx_lock(&(_lsc)->lsc_mtx) #define LACP_UNLOCK(_lsc) mtx_unlock(&(_lsc)->lsc_mtx) #define LACP_LOCK_ASSERT(_lsc) mtx_assert(&(_lsc)->lsc_mtx, MA_OWNED) struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); #ifdef RATELIMIT struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t); #endif void lacp_attach(struct lagg_softc *); void lacp_detach(void *); void lacp_init(struct lagg_softc *); void lacp_stop(struct lagg_softc *); int lacp_port_create(struct lagg_port *); void lacp_port_destroy(struct lagg_port *); void lacp_linkstate(struct lagg_port *); void lacp_req(struct lagg_softc *, void *); void lacp_portreq(struct lagg_port *, void *); static __inline int lacp_isactive(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la = lp->lp_aggregator; /* This port is joined to the active aggregator */ if (la != NULL && la == lsc->lsc_active_aggregator) return (1); return (0); } static __inline int lacp_iscollecting(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); return ((lp->lp_state & LACP_STATE_COLLECTING) != 0); } static __inline int lacp_isdistributing(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); return ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0); } /* following constants don't include terminating NUL */ #define LACP_MACSTR_MAX (2*6 + 5) #define LACP_SYSTEMPRIOSTR_MAX (4) #define LACP_SYSTEMIDSTR_MAX (LACP_SYSTEMPRIOSTR_MAX + 1 + LACP_MACSTR_MAX) #define LACP_PORTPRIOSTR_MAX (4) #define LACP_PORTNOSTR_MAX (4) #define LACP_PORTIDSTR_MAX (LACP_PORTPRIOSTR_MAX + 1 + LACP_PORTNOSTR_MAX) #define LACP_KEYSTR_MAX (4) #define LACP_PARTNERSTR_MAX \ (1 + LACP_SYSTEMIDSTR_MAX + 1 + LACP_KEYSTR_MAX + 1 \ + LACP_PORTIDSTR_MAX + 1) #define LACP_LAGIDSTR_MAX \ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) #define LACP_STATESTR_MAX (255) /* XXX */ #endif /* _KERNEL */ Index: projects/runtime-coverage-v2/sys/net/if_lagg.c =================================================================== --- projects/runtime-coverage-v2/sys/net/if_lagg.c (revision 347075) +++ projects/runtime-coverage-v2/sys/net/if_lagg.c (revision 347076) @@ -1,2267 +1,2278 @@ /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ /* * Copyright (c) 2005, 2006 Reyk Floeter * Copyright (c) 2007 Andrew Thompson * Copyright (c) 2014, 2016 Marcelo Araujo * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #endif #include #include #include #define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et) #define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et) #define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt)) #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) #define LAGG_XLOCK(_sc) sx_xlock(&(_sc)->sc_sx) #define LAGG_XUNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED) #define LAGG_XLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_XLOCKED) /* Special flags we should propagate to the lagg ports. */ static struct { int flag; int (*func)(struct ifnet *, int); } lagg_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ #define V_lagg_list VNET(lagg_list) VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); #define V_lagg_list_mtx VNET(lagg_list_mtx) #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ "if_lagg list", NULL, MTX_DEF) #define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) #define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) #define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner); #define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); static void lagg_capabilities(struct lagg_softc *); static int lagg_port_create(struct lagg_softc *, struct ifnet *); static int lagg_port_destroy(struct lagg_port *, int); static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); static void lagg_linkstate(struct lagg_softc *); static void lagg_port_state(struct ifnet *, int); static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_port_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *); #endif static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); #ifdef RATELIMIT static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); static void lagg_snd_tag_free(struct m_snd_tag *); #endif static int lagg_setmulti(struct lagg_port *); static int lagg_clrmulti(struct lagg_port *); static int lagg_setcaps(struct lagg_port *, int cap); static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); static int lagg_transmit(struct ifnet *, struct mbuf *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); /* Simple round robin */ static void lagg_rr_attach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ static void lagg_lb_attach(struct lagg_softc *); static void lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); static int lagg_lb_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* Broadcast */ static int lagg_bcast_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* 802.3ad LACP */ static void lagg_lacp_attach(struct lagg_softc *); static void lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static void lagg_lacp_lladdr(struct lagg_softc *); /* lagg protocol table */ static const struct lagg_proto { lagg_proto pr_num; void (*pr_attach)(struct lagg_softc *); void (*pr_detach)(struct lagg_softc *); int (*pr_start)(struct lagg_softc *, struct mbuf *); struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *, struct mbuf *); int (*pr_addport)(struct lagg_port *); void (*pr_delport)(struct lagg_port *); void (*pr_linkstate)(struct lagg_port *); void (*pr_init)(struct lagg_softc *); void (*pr_stop)(struct lagg_softc *); void (*pr_lladdr)(struct lagg_softc *); void (*pr_request)(struct lagg_softc *, void *); void (*pr_portreq)(struct lagg_port *, void *); } lagg_protos[] = { { .pr_num = LAGG_PROTO_NONE }, { .pr_num = LAGG_PROTO_ROUNDROBIN, .pr_attach = lagg_rr_attach, .pr_start = lagg_rr_start, .pr_input = lagg_rr_input, }, { .pr_num = LAGG_PROTO_FAILOVER, .pr_start = lagg_fail_start, .pr_input = lagg_fail_input, }, { .pr_num = LAGG_PROTO_LOADBALANCE, .pr_attach = lagg_lb_attach, .pr_detach = lagg_lb_detach, .pr_start = lagg_lb_start, .pr_input = lagg_lb_input, .pr_addport = lagg_lb_port_create, .pr_delport = lagg_lb_port_destroy, }, { .pr_num = LAGG_PROTO_LACP, .pr_attach = lagg_lacp_attach, .pr_detach = lagg_lacp_detach, .pr_start = lagg_lacp_start, .pr_input = lagg_lacp_input, .pr_addport = lacp_port_create, .pr_delport = lacp_port_destroy, .pr_linkstate = lacp_linkstate, .pr_init = lacp_init, .pr_stop = lacp_stop, .pr_lladdr = lagg_lacp_lladdr, .pr_request = lacp_req, .pr_portreq = lacp_portreq, }, { .pr_num = LAGG_PROTO_BROADCAST, .pr_start = lagg_bcast_start, .pr_input = lagg_bcast_input, }, }; SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); /* Allow input on any failover links */ VNET_DEFINE_STATIC(int, lagg_failover_rx_all); #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); /* Default value for using flowid */ VNET_DEFINE_STATIC(int, def_use_flowid) = 0; #define V_def_use_flowid VNET(def_use_flowid) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); +/* Default value for using numa */ +VNET_DEFINE_STATIC(int, def_use_numa) = 1; +#define V_def_use_numa VNET(def_use_numa) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, + &VNET_NAME(def_use_numa), 0, + "Use numa to steer flows"); + /* Default value for flowid shift */ VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, &VNET_NAME(def_flowid_shift), 0, "Default setting for flowid shift for load sharing"); static void vnet_lagg_init(const void *unused __unused) { LAGG_LIST_LOCK_INIT(); SLIST_INIT(&V_lagg_list); V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create, lagg_clone_destroy, 0); } VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_lagg_init, NULL); static void vnet_lagg_uninit(const void *unused __unused) { if_clone_detach(V_lagg_cloner); LAGG_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_lagg_uninit, NULL); static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, lagg_port_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); lagg_input_p = NULL; lagg_linkstate_p = NULL; break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t lagg_mod = { "if_lagg", lagg_modevent, 0 }; DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); static void lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) { LAGG_XLOCK_ASSERT(sc); KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto", __func__, sc)); if (sc->sc_ifflags & IFF_DEBUG) if_printf(sc->sc_ifp, "using proto %u\n", pr); if (lagg_protos[pr].pr_attach != NULL) lagg_protos[pr].pr_attach(sc); sc->sc_proto = pr; } static void lagg_proto_detach(struct lagg_softc *sc) { lagg_proto pr; LAGG_XLOCK_ASSERT(sc); pr = sc->sc_proto; sc->sc_proto = LAGG_PROTO_NONE; if (lagg_protos[pr].pr_detach != NULL) lagg_protos[pr].pr_detach(sc); } static int lagg_proto_start(struct lagg_softc *sc, struct mbuf *m) { return (lagg_protos[sc->sc_proto].pr_start(sc, m)); } static struct mbuf * lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m)); } static int lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_addport == NULL) return (0); else return (lagg_protos[sc->sc_proto].pr_addport(lp)); } static void lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_delport != NULL) lagg_protos[sc->sc_proto].pr_delport(lp); } static void lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_linkstate != NULL) lagg_protos[sc->sc_proto].pr_linkstate(lp); } static void lagg_proto_init(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_init != NULL) lagg_protos[sc->sc_proto].pr_init(sc); } static void lagg_proto_stop(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_stop != NULL) lagg_protos[sc->sc_proto].pr_stop(sc); } static void lagg_proto_lladdr(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_lladdr != NULL) lagg_protos[sc->sc_proto].pr_lladdr(sc); } static void lagg_proto_request(struct lagg_softc *sc, void *v) { if (lagg_protos[sc->sc_proto].pr_request != NULL) lagg_protos[sc->sc_proto].pr_request(sc, v); } static void lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v) { if (lagg_protos[sc->sc_proto].pr_portreq != NULL) lagg_protos[sc->sc_proto].pr_portreq(lp, v); } /* * This routine is run via an vlan * config EVENT */ static void lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); LAGG_RUNLOCK(); } /* * This routine is run via an vlan * unconfig EVENT */ static void lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); LAGG_RUNLOCK(); } static int lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct lagg_softc *sc; struct ifnet *ifp; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_LAGG); return (ENOSPC); } LAGG_SX_INIT(sc); LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; + if (V_def_use_numa) + sc->sc_opts |= LAGG_OPT_USE_NUMA; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4; lagg_proto_attach(sc, LAGG_PROTO_DEFAULT); CK_SLIST_INIT(&sc->sc_ports); /* Initialise pseudo media types */ ifmedia_init(&sc->sc_media, 0, lagg_media_change, lagg_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); if_initname(ifp, laggname, unit); ifp->if_softc = sc; ifp->if_transmit = lagg_transmit; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; ifp->if_get_counter = lagg_get_counter; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; ifp->if_snd_tag_free = lagg_snd_tag_free; #endif ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; /* * Attach as an ordinary ethernet device, children will be attached * as special device IFT_IEEE8023ADLAG. */ ether_ifattach(ifp, eaddr); sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); /* Insert into the global list of laggs */ LAGG_LIST_LOCK(); SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); LAGG_LIST_UNLOCK(); LAGG_XUNLOCK(sc); return (0); } static void lagg_clone_destroy(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; LAGG_XLOCK(sc); sc->sc_destroying = 1; lagg_stop(sc); ifp->if_flags &= ~IFF_UP; EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); /* Shutdown and remove lagg ports */ while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL) lagg_port_destroy(lp, 1); /* Unhook the aggregation protocol */ lagg_proto_detach(sc); LAGG_XUNLOCK(sc); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); LAGG_LIST_LOCK(); SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); LAGG_LIST_UNLOCK(); LAGG_SX_DESTROY(sc); free(sc, M_LAGG); } static void lagg_capabilities(struct lagg_softc *sc) { struct lagg_port *lp; int cap, ena, pena; uint64_t hwa; struct ifnet_hw_tsomax hw_tsomax; LAGG_XLOCK_ASSERT(sc); /* Get common enabled capabilities for the lagg ports */ ena = ~0; CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) ena &= lp->lp_ifp->if_capenable; ena = (ena == ~0 ? 0 : ena); /* * Apply common enabled capabilities back to the lagg ports. * May require several iterations if they are dependent. */ do { pena = ena; CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_setcaps(lp, ena); ena &= lp->lp_ifp->if_capenable; } } while (pena != ena); /* Get other capabilities from the lagg ports */ cap = ~0; hwa = ~(uint64_t)0; memset(&hw_tsomax, 0, sizeof(hw_tsomax)); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { cap &= lp->lp_ifp->if_capabilities; hwa &= lp->lp_ifp->if_hwassist; if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax); } cap = (cap == ~0 ? 0 : cap); hwa = (hwa == ~(uint64_t)0 ? 0 : hwa); if (sc->sc_ifp->if_capabilities != cap || sc->sc_ifp->if_capenable != ena || sc->sc_ifp->if_hwassist != hwa || if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) { sc->sc_ifp->if_capabilities = cap; sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_hwassist = hwa; getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) if_printf(sc->sc_ifp, "capabilities 0x%08x enabled 0x%08x\n", cap, ena); } } static int lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) { struct lagg_softc *sc_ptr; struct lagg_port *lp, *tlp; struct ifreq ifr; int error, i, oldmtu; uint64_t *pval; LAGG_XLOCK_ASSERT(sc); if (sc->sc_ifp == ifp) { if_printf(sc->sc_ifp, "cannot add a lagg to itself as a port\n"); return (EINVAL); } /* Limit the maximal number of lagg ports */ if (sc->sc_count >= LAGG_MAX_PORTS) return (ENOSPC); /* Check if port has already been associated to a lagg */ if (ifp->if_lagg != NULL) { /* Port is already in the current lagg? */ lp = (struct lagg_port *)ifp->if_lagg; if (lp->lp_softc == sc) return (EEXIST); return (EBUSY); } /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) return (EPROTONOSUPPORT); /* Allow the first Ethernet member to define the MTU */ oldmtu = -1; if (CK_SLIST_EMPTY(&sc->sc_ports)) { sc->sc_ifp->if_mtu = ifp->if_mtu; } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { if (ifp->if_ioctl == NULL) { if_printf(sc->sc_ifp, "cannot change MTU for %s\n", ifp->if_xname); return (EINVAL); } oldmtu = ifp->if_mtu; strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); ifr.ifr_mtu = sc->sc_ifp->if_mtu; error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); if (error != 0) { if_printf(sc->sc_ifp, "invalid MTU for %s\n", ifp->if_xname); return (error); } ifr.ifr_mtu = oldmtu; } lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO); lp->lp_softc = sc; /* Check if port is a stacked lagg */ LAGG_LIST_LOCK(); SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { LAGG_LIST_UNLOCK(); free(lp, M_LAGG); if (oldmtu != -1) (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); return (EINVAL); /* XXX disable stacking for the moment, its untested */ #ifdef LAGG_PORT_STACKING lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { LAGG_LIST_UNLOCK(); free(lp, M_LAGG); if (oldmtu != -1) (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); return (E2BIG); } #endif } } LAGG_LIST_UNLOCK(); if_ref(ifp); lp->lp_ifp = ifp; bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); lp->lp_ifcapenable = ifp->if_capenable; if (CK_SLIST_EMPTY(&sc->sc_ports)) { bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } else { if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); } lagg_setflags(lp, 1); if (CK_SLIST_EMPTY(&sc->sc_ports)) sc->sc_primary = lp; /* Change the interface type */ lp->lp_iftype = ifp->if_type; ifp->if_type = IFT_IEEE8023ADLAG; ifp->if_lagg = lp; lp->lp_ioctl = ifp->if_ioctl; ifp->if_ioctl = lagg_port_ioctl; lp->lp_output = ifp->if_output; ifp->if_output = lagg_port_output; /* Read port counters */ pval = lp->port_counters.val; for (i = 0; i < IFCOUNTERS; i++, pval++) *pval = ifp->if_get_counter(ifp, i); /* * Insert into the list of ports. * Keep ports sorted by if_index. It is handy, when configuration * is predictable and `ifconfig laggN create ...` command * will lead to the same result each time. */ CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { if (tlp->lp_ifp->if_index < ifp->if_index && ( CK_SLIST_NEXT(tlp, lp_entries) == NULL || ((struct lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index > ifp->if_index)) break; } if (tlp != NULL) CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries); else CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); sc->sc_count++; lagg_setmulti(lp); if ((error = lagg_proto_addport(sc, lp)) != 0) { /* Remove the port, without calling pr_delport. */ lagg_port_destroy(lp, 0); if (oldmtu != -1) (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); return (error); } /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); return (0); } #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *sc) { struct lagg_softc *sc_ptr; struct lagg_port *lp; int m = 0; LAGG_SXLOCK_ASSERT(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_flags & LAGG_PORT_STACK) { sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; m = MAX(m, lagg_port_checkstacking(sc_ptr)); } } return (m + 1); } #endif static void lagg_port_destroy_cb(epoch_context_t ec) { struct lagg_port *lp; struct ifnet *ifp; lp = __containerof(ec, struct lagg_port, lp_epoch_ctx); ifp = lp->lp_ifp; if_rele(ifp); free(lp, M_LAGG); } static int lagg_port_destroy(struct lagg_port *lp, int rundelport) { struct lagg_softc *sc = lp->lp_softc; struct lagg_port *lp_ptr, *lp0; struct ifnet *ifp = lp->lp_ifp; uint64_t *pval, vdiff; int i; LAGG_XLOCK_ASSERT(sc); if (rundelport) lagg_proto_delport(sc, lp); if (lp->lp_detaching == 0) lagg_clrmulti(lp); /* Restore interface */ ifp->if_type = lp->lp_iftype; ifp->if_ioctl = lp->lp_ioctl; ifp->if_output = lp->lp_output; ifp->if_lagg = NULL; /* Update detached port counters */ pval = lp->port_counters.val; for (i = 0; i < IFCOUNTERS; i++, pval++) { vdiff = ifp->if_get_counter(ifp, i) - *pval; sc->detached_counters.val[i] += vdiff; } /* Finally, remove the port from the lagg */ CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); sc->sc_count--; /* Update the primary interface */ if (lp == sc->sc_primary) { uint8_t lladdr[ETHER_ADDR_LEN]; if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL) bzero(&lladdr, ETHER_ADDR_LEN); else bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN); sc->sc_primary = lp0; if (sc->sc_destroying == 0) { bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } /* * Update lladdr for each port (new primary needs update * as well, to switch from old lladdr to its 'real' one) */ CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN); } if (lp->lp_ifflags) if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); if (lp->lp_detaching == 0) { lagg_setflags(lp, 0); lagg_setcaps(lp, lp->lp_ifcapenable); if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN); } /* * free port and release it's ifnet reference after a grace period has * elapsed. */ epoch_call(net_epoch_preempt, &lp->lp_epoch_ctx, lagg_port_destroy_cb); /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); return (0); } static int lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_reqport *rp = (struct lagg_reqport *)data; struct lagg_softc *sc; struct lagg_port *lp = NULL; int error = 0; /* Should be checked by the caller */ if (ifp->if_type != IFT_IEEE8023ADLAG || (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) goto fallback; switch (cmd) { case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || ifunit(rp->rp_portname) != ifp) { error = EINVAL; break; } LAGG_RLOCK(); if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(); break; case SIOCSIFCAP: if (lp->lp_ioctl == NULL) { error = EINVAL; break; } error = (*lp->lp_ioctl)(ifp, cmd, data); if (error) break; /* Update lagg interface capabilities */ LAGG_XLOCK(sc); lagg_capabilities(sc); LAGG_XUNLOCK(sc); VLAN_CAPABILITIES(sc->sc_ifp); break; case SIOCSIFMTU: /* Do not allow the MTU to be changed once joined */ error = EINVAL; break; default: goto fallback; } return (error); fallback: if (lp != NULL && lp->lp_ioctl != NULL) return ((*lp->lp_ioctl)(ifp, cmd, data)); return (EINVAL); } /* * Requests counter @cnt data. * * Counter value is calculated the following way: * 1) for each port, sum difference between current and "initial" measurements. * 2) add lagg logical interface counters. * 3) add data from detached_counters array. * * We also do the following things on ports attach/detach: * 1) On port attach we store all counters it has into port_counter array. * 2) On port detach we add the different between "initial" and * current counters data to detached_counters array. */ static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt) { struct lagg_softc *sc; struct lagg_port *lp; struct ifnet *lpifp; uint64_t newval, oldval, vsum; /* Revise this when we've got non-generic counters. */ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); sc = (struct lagg_softc *)ifp->if_softc; vsum = 0; LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { /* Saved attached value */ oldval = lp->port_counters.val[cnt]; /* current value */ lpifp = lp->lp_ifp; newval = lpifp->if_get_counter(lpifp, cnt); /* Calculate diff and save new */ vsum += newval - oldval; } LAGG_RUNLOCK(); /* * Add counter data which might be added by upper * layer protocols operating on logical interface. */ vsum += if_get_counter_default(ifp, cnt); /* * Add counter data from detached ports counters */ vsum += sc->detached_counters.val[cnt]; return (vsum); } /* * For direct output to child ports. */ static int lagg_port_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct lagg_port *lp = ifp->if_lagg; switch (dst->sa_family) { case pseudo_AF_HDRCMPLT: case AF_UNSPEC: return ((*lp->lp_output)(ifp, m, dst, ro)); } /* drop any other frames */ m_freem(m); return (ENETDOWN); } static void lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) { struct lagg_port *lp; struct lagg_softc *sc; if ((lp = ifp->if_lagg) == NULL) return; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; sc = lp->lp_softc; LAGG_XLOCK(sc); lp->lp_detaching = 1; lagg_port_destroy(lp, 1); LAGG_XUNLOCK(sc); VLAN_CAPABILITIES(sc->sc_ifp); } static void lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) { struct lagg_softc *sc = lp->lp_softc; strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); rp->rp_prio = lp->lp_prio; rp->rp_flags = lp->lp_flags; lagg_proto_portreq(sc, lp, &rp->rp_psc); /* Add protocol specific flags */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: if (lp == sc->sc_primary) rp->rp_flags |= LAGG_PORT_MASTER; if (lp == lagg_link_active(sc, sc->sc_primary)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_BROADCAST: if (LAGG_PORTACTIVE(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_LACP: /* LACP has a different definition of active */ if (lacp_isactive(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; if (lacp_iscollecting(lp)) rp->rp_flags |= LAGG_PORT_COLLECTING; if (lacp_isdistributing(lp)) rp->rp_flags |= LAGG_PORT_DISTRIBUTING; break; } } static void lagg_init(void *xsc) { struct lagg_softc *sc = (struct lagg_softc *)xsc; struct ifnet *ifp = sc->sc_ifp; struct lagg_port *lp; LAGG_XLOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { LAGG_XUNLOCK(sc); return; } ifp->if_drv_flags |= IFF_DRV_RUNNING; /* * Update the port lladdrs if needed. * This might be if_setlladdr() notification * that lladdr has been changed. */ CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp), ETHER_ADDR_LEN) != 0) if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN); } lagg_proto_init(sc); LAGG_XUNLOCK(sc); } static void lagg_stop(struct lagg_softc *sc) { struct ifnet *ifp = sc->sc_ifp; LAGG_XLOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lagg_proto_stop(sc); } static int lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; struct lagg_reqopts *ro = (struct lagg_reqopts *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; int count, buflen, len, error = 0; bzero(&rpbuf, sizeof(rpbuf)); switch (cmd) { case SIOCGLAGG: LAGG_XLOCK(sc); buflen = sc->sc_count * sizeof(struct lagg_reqport); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); ra->ra_proto = sc->sc_proto; lagg_proto_request(sc, &ra->ra_psc); count = 0; buf = outbuf; len = min(ra->ra_size, buflen); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (len < sizeof(rpbuf)) break; lagg_port2req(lp, &rpbuf); memcpy(buf, &rpbuf, sizeof(rpbuf)); count++; buf += sizeof(rpbuf); len -= sizeof(rpbuf); } LAGG_XUNLOCK(sc); ra->ra_ports = count; ra->ra_size = count * sizeof(rpbuf); error = copyout(outbuf, ra->ra_port, ra->ra_size); free(outbuf, M_TEMP); break; case SIOCSLAGG: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (ra->ra_proto >= LAGG_PROTO_MAX) { error = EPROTONOSUPPORT; break; } LAGG_XLOCK(sc); lagg_proto_detach(sc); LAGG_UNLOCK_ASSERT(); lagg_proto_attach(sc, ra->ra_proto); LAGG_XUNLOCK(sc); break; case SIOCGLAGGOPTS: LAGG_XLOCK(sc); ro->ro_opts = sc->sc_opts; if (sc->sc_proto == LAGG_PROTO_LACP) { struct lacp_softc *lsc; lsc = (struct lacp_softc *)sc->sc_psc; if (lsc->lsc_debug.lsc_tx_test != 0) ro->ro_opts |= LAGG_OPT_LACP_TXTEST; if (lsc->lsc_debug.lsc_rx_test != 0) ro->ro_opts |= LAGG_OPT_LACP_RXTEST; if (lsc->lsc_strict_mode != 0) ro->ro_opts |= LAGG_OPT_LACP_STRICT; if (lsc->lsc_fast_timeout != 0) ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT; ro->ro_active = sc->sc_active; } else { ro->ro_active = 0; CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) ro->ro_active += LAGG_PORTACTIVE(lp); } ro->ro_bkt = sc->sc_bkt; ro->ro_flapping = sc->sc_flapping; ro->ro_flowid_shift = sc->flowid_shift; LAGG_XUNLOCK(sc); break; case SIOCSLAGGOPTS: if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) { if (ro->ro_bkt == 0) sc->sc_bkt = 1; // Minimum 1 packet per iface. else sc->sc_bkt = ro->ro_bkt; } error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (ro->ro_opts == 0) break; /* * Set options. LACP options are stored in sc->sc_psc, * not in sc_opts. */ int valid, lacp; switch (ro->ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_FLOWIDSHIFT: valid = 1; lacp = 0; break; case LAGG_OPT_LACP_TXTEST: case -LAGG_OPT_LACP_TXTEST: case LAGG_OPT_LACP_RXTEST: case -LAGG_OPT_LACP_RXTEST: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TIMEOUT: case -LAGG_OPT_LACP_TIMEOUT: valid = lacp = 1; break; default: valid = lacp = 0; break; } LAGG_XLOCK(sc); if (valid == 0 || (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { /* Invalid combination of options specified. */ error = EINVAL; LAGG_XUNLOCK(sc); break; /* Return from SIOCSLAGGOPTS. */ } /* * Store new options into sc->sc_opts except for * FLOWIDSHIFT and LACP options. */ if (lacp == 0) { if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) sc->flowid_shift = ro->ro_flowid_shift; else if (ro->ro_opts > 0) sc->sc_opts |= ro->ro_opts; else sc->sc_opts &= ~ro->ro_opts; } else { struct lacp_softc *lsc; struct lacp_port *lp; lsc = (struct lacp_softc *)sc->sc_psc; switch (ro->ro_opts) { case LAGG_OPT_LACP_TXTEST: lsc->lsc_debug.lsc_tx_test = 1; break; case -LAGG_OPT_LACP_TXTEST: lsc->lsc_debug.lsc_tx_test = 0; break; case LAGG_OPT_LACP_RXTEST: lsc->lsc_debug.lsc_rx_test = 1; break; case -LAGG_OPT_LACP_RXTEST: lsc->lsc_debug.lsc_rx_test = 0; break; case LAGG_OPT_LACP_STRICT: lsc->lsc_strict_mode = 1; break; case -LAGG_OPT_LACP_STRICT: lsc->lsc_strict_mode = 0; break; case LAGG_OPT_LACP_TIMEOUT: LACP_LOCK(lsc); LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) lp->lp_state |= LACP_STATE_TIMEOUT; LACP_UNLOCK(lsc); lsc->lsc_fast_timeout = 1; break; case -LAGG_OPT_LACP_TIMEOUT: LACP_LOCK(lsc); LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) lp->lp_state &= ~LACP_STATE_TIMEOUT; LACP_UNLOCK(lsc); lsc->lsc_fast_timeout = 0; break; } } LAGG_XUNLOCK(sc); break; case SIOCGLAGGFLAGS: rf->rf_flags = 0; LAGG_XLOCK(sc); if (sc->sc_flags & MBUF_HASHFLAG_L2) rf->rf_flags |= LAGG_F_HASHL2; if (sc->sc_flags & MBUF_HASHFLAG_L3) rf->rf_flags |= LAGG_F_HASHL3; if (sc->sc_flags & MBUF_HASHFLAG_L4) rf->rf_flags |= LAGG_F_HASHL4; LAGG_XUNLOCK(sc); break; case SIOCSLAGGHASH: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { error = EINVAL; break; } LAGG_XLOCK(sc); sc->sc_flags = 0; if (rf->rf_flags & LAGG_F_HASHL2) sc->sc_flags |= MBUF_HASHFLAG_L2; if (rf->rf_flags & LAGG_F_HASHL3) sc->sc_flags |= MBUF_HASHFLAG_L3; if (rf->rf_flags & LAGG_F_HASHL4) sc->sc_flags |= MBUF_HASHFLAG_L4; LAGG_XUNLOCK(sc); break; case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || (tpif = ifunit_ref(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_RLOCK(); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(); if_rele(tpif); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(); if_rele(tpif); break; case SIOCSLAGGPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit_ref(rp->rp_portname)) == NULL) { error = EINVAL; break; } #ifdef INET6 /* * A laggport interface should not have inet6 address * because two interfaces with a valid link-local * scope zone must not be merged in any form. This * restriction is needed to prevent violation of * link-local scope zone. Attempts to add a laggport * interface which has inet6 addresses triggers * removal of all inet6 addresses on the member * interface. */ if (in6ifa_llaonifp(tpif)) { in6_ifdetach(tpif); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", tpif->if_xname); } #endif LAGG_XLOCK(sc); error = lagg_port_create(sc, tpif); LAGG_XUNLOCK(sc); if_rele(tpif); VLAN_CAPABILITIES(ifp); break; case SIOCSLAGGDELPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit_ref(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_XLOCK(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_XUNLOCK(sc); if_rele(tpif); break; } error = lagg_port_destroy(lp, 1); LAGG_XUNLOCK(sc); if_rele(tpif); VLAN_CAPABILITIES(ifp); break; case SIOCSIFFLAGS: /* Set flags on ports too */ LAGG_XLOCK(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_setflags(lp, 1); } if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ lagg_stop(sc); LAGG_XUNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ LAGG_XUNLOCK(sc); (*ifp->if_init)(sc); } else LAGG_XUNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: LAGG_XLOCK(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_clrmulti(lp); lagg_setmulti(lp); } LAGG_XUNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCSIFCAP: LAGG_XLOCK(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ioctl != NULL) (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); } lagg_capabilities(sc); LAGG_XUNLOCK(sc); VLAN_CAPABILITIES(ifp); error = 0; break; case SIOCSIFMTU: LAGG_XLOCK(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ioctl != NULL) error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); else error = EINVAL; if (error != 0) { if_printf(ifp, "failed to change MTU to %d on port %s, " "reverting all ports to original MTU (%d)\n", ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu); break; } } if (error == 0) { ifp->if_mtu = ifr->ifr_mtu; } else { /* set every port back to the original MTU */ ifr->ifr_mtu = ifp->if_mtu; CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ioctl != NULL) (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); } } LAGG_XUNLOCK(sc); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } #ifdef RATELIMIT static int lagg_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; struct lagg_lb *lb; uint32_t p; LAGG_RLOCK(); switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: lp = lagg_link_active(sc, sc->sc_primary); break; case LAGG_PROTO_LOADBALANCE: if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || params->hdr.flowtype == M_HASHTYPE_NONE) { LAGG_RUNLOCK(); return (EOPNOTSUPP); } p = params->hdr.flowid >> sc->flowid_shift; p %= sc->sc_count; lb = (struct lagg_lb *)sc->sc_psc; lp = lb->lb_ports[p]; lp = lagg_link_active(sc, lp); break; case LAGG_PROTO_LACP: if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || params->hdr.flowtype == M_HASHTYPE_NONE) { LAGG_RUNLOCK(); return (EOPNOTSUPP); } lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid); break; default: LAGG_RUNLOCK(); return (EOPNOTSUPP); } if (lp == NULL) { LAGG_RUNLOCK(); return (EOPNOTSUPP); } ifp = lp->lp_ifp; LAGG_RUNLOCK(); if (ifp == NULL || ifp->if_snd_tag_alloc == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0) return (EOPNOTSUPP); /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } static void lagg_snd_tag_free(struct m_snd_tag *tag) { tag->ifp->if_snd_tag_free(tag); } #endif static int lagg_setmulti(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *ifp = lp->lp_ifp; struct ifnet *scifp = sc->sc_ifp; struct lagg_mc *mc; struct ifmultiaddr *ifma; int error; IF_ADDR_WLOCK(scifp); CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(scifp); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp->if_index; mc->mc_ifma = NULL; SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); } IF_ADDR_WUNLOCK(scifp); SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) { error = if_addmulti(ifp, (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma); if (error) return (error); } return (0); } static int lagg_clrmulti(struct lagg_port *lp) { struct lagg_mc *mc; LAGG_XLOCK_ASSERT(lp->lp_softc); while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); if (mc->mc_ifma && lp->lp_detaching == 0) if_delmulti_ifma(mc->mc_ifma); free(mc, M_LAGG); } return (0); } static int lagg_setcaps(struct lagg_port *lp, int cap) { struct ifreq ifr; if (lp->lp_ifp->if_capenable == cap) return (0); if (lp->lp_ioctl == NULL) return (ENXIO); ifr.ifr_reqcap = cap; return ((*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr)); } /* Handle a ref counted flag that should be set on the lagg port as well */ static int lagg_setflag(struct lagg_port *lp, int flag, int status, int (*func)(struct ifnet *, int)) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; struct ifnet *ifp = lp->lp_ifp; int error; LAGG_XLOCK_ASSERT(sc); status = status ? (scifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded ports status is different from what * we want it to be. If it is, flip it. We record ports * status in lp_ifflags so that we won't clear ports flag * we haven't set. In fact, we don't clear or set ports * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual ports flags. */ if (status != (lp->lp_ifflags & flag)) { error = (*func)(ifp, status); if (error) return (error); lp->lp_ifflags &= ~flag; lp->lp_ifflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the lagg port * if "status" is true, update ports flags respective to the lagg * if "status" is false, forcedly clear the flags set on port. */ static int lagg_setflags(struct lagg_port *lp, int status) { int error, i; for (i = 0; lagg_pflags[i].flag; i++) { error = lagg_setflag(lp, lagg_pflags[i].flag, status, lagg_pflags[i].func); if (error) return (error); } return (0); } static int lagg_transmit(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error; LAGG_RLOCK(); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { LAGG_RUNLOCK(); m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); } ETHER_BPF_MTAP(ifp, m); error = lagg_proto_start(sc, m); LAGG_RUNLOCK(); if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ static void lagg_qflush(struct ifnet *ifp __unused) { } static struct mbuf * lagg_input(struct ifnet *ifp, struct mbuf *m) { struct lagg_port *lp = ifp->if_lagg; struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; LAGG_RLOCK(); if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || lp->lp_detaching != 0 || sc->sc_proto == LAGG_PROTO_NONE) { LAGG_RUNLOCK(); m_freem(m); return (NULL); } ETHER_BPF_MTAP(scifp, m); m = lagg_proto_input(sc, lp, m); if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { m_freem(m); m = NULL; } LAGG_RUNLOCK(); return (m); } static int lagg_media_change(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; if (sc->sc_ifflags & IFF_DEBUG) printf("%s\n", __func__); /* Ignore */ return (0); } static void lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; imr->ifm_status = IFM_AVALID; imr->ifm_active = IFM_ETHER | IFM_AUTO; LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp)) imr->ifm_status |= IFM_ACTIVE; } LAGG_RUNLOCK(); } static void lagg_linkstate(struct lagg_softc *sc) { struct lagg_port *lp; int new_link = LINK_STATE_DOWN; uint64_t speed; LAGG_XLOCK_ASSERT(sc); /* LACP handles link state itself */ if (sc->sc_proto == LAGG_PROTO_LACP) return; /* Our link is considered up if at least one of our ports is active */ LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } LAGG_RUNLOCK(); if_link_state_change(sc->sc_ifp, new_link); /* Update if_baudrate to reflect the max possible speed */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? sc->sc_primary->lp_ifp->if_baudrate : 0; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_BROADCAST: speed = 0; LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) speed += lp->lp_ifp->if_baudrate; LAGG_RUNLOCK(); sc->sc_ifp->if_baudrate = speed; break; case LAGG_PROTO_LACP: /* LACP updates if_baudrate itself */ break; } } static void lagg_port_state(struct ifnet *ifp, int state) { struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; struct lagg_softc *sc = NULL; if (lp != NULL) sc = lp->lp_softc; if (sc == NULL) return; LAGG_XLOCK(sc); lagg_linkstate(sc); lagg_proto_linkstate(sc, lp); LAGG_XUNLOCK(sc); } struct lagg_port * lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; /* * Search a port which reports an active link state. */ /* * This is called with either LAGG_RLOCK() held or * LAGG_XLOCK(sc) held. */ if (!in_epoch(net_epoch_preempt)) LAGG_XLOCK_ASSERT(sc); if (lp == NULL) goto search; if (LAGG_PORTACTIVE(lp)) { rval = lp; goto found; } if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL && LAGG_PORTACTIVE(lp_next)) { rval = lp_next; goto found; } search: CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { return (lp_next); } } found: return (rval); } int lagg_enqueue(struct ifnet *ifp, struct mbuf *m) { return (ifp->if_transmit)(ifp, m); } /* * Simple round robin aggregation */ static void lagg_rr_attach(struct lagg_softc *sc) { sc->sc_seq = 0; sc->sc_bkt_count = sc->sc_bkt; } static int lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; uint32_t p; if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0) sc->sc_bkt_count = sc->sc_bkt; if (sc->sc_bkt > 0) { atomic_subtract_int(&sc->sc_bkt_count, 1); if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt)) p = atomic_fetchadd_32(&sc->sc_seq, 1); else p = sc->sc_seq; } else p = atomic_fetchadd_32(&sc->sc_seq, 1); p %= sc->sc_count; lp = CK_SLIST_FIRST(&sc->sc_ports); while (p--) lp = CK_SLIST_NEXT(lp, lp_entries); /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * Broadcast mode */ static int lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) { int active_ports = 0; int errors = 0; int ret; struct lagg_port *lp, *last = NULL; struct mbuf *m0; LAGG_RLOCK_ASSERT(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (!LAGG_PORTACTIVE(lp)) continue; active_ports++; if (last != NULL) { m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (m0 == NULL) { ret = ENOBUFS; errors++; break; } ret = lagg_enqueue(last->lp_ifp, m0); if (ret != 0) errors++; } last = lp; } if (last == NULL) { m_freem(m); return (ENOENT); } if ((last = lagg_link_active(sc, last)) == NULL) { m_freem(m); return (ENETDOWN); } ret = lagg_enqueue(last->lp_ifp, m); if (ret != 0) errors++; if (errors == 0) return (ret); return (0); } static struct mbuf* lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * Active failover */ static int lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; /* Use the master port if active or the next available port */ if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct lagg_port *tmp_tp; if (lp == sc->sc_primary || V_lagg_failover_rx_all) { m->m_pkthdr.rcvif = ifp; return (m); } if (!LAGG_PORTACTIVE(sc->sc_primary)) { tmp_tp = lagg_link_active(sc, sc->sc_primary); /* * If tmp_tp is null, we've received a packet when all * our links are down. Weird, but process it anyways. */ if ((tmp_tp == NULL || tmp_tp == lp)) { m->m_pkthdr.rcvif = ifp; return (m); } } m_freem(m); return (NULL); } /* * Loadbalancing */ static void lagg_lb_attach(struct lagg_softc *sc) { struct lagg_port *lp; struct lagg_lb *lb; LAGG_XLOCK_ASSERT(sc); lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO); lb->lb_key = m_ether_tcpip_hash_init(); sc->sc_psc = lb; CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_lb_port_create(lp); } static void lagg_lb_detach(struct lagg_softc *sc) { struct lagg_lb *lb; lb = (struct lagg_lb *)sc->sc_psc; if (lb != NULL) free(lb, M_LAGG); } static int lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp_next; int i = 0, rv; rv = 0; bzero(&lb->lb_ports, sizeof(lb->lb_ports)); LAGG_XLOCK_ASSERT(sc); CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (lp_next == lp) continue; if (i >= LAGG_MAX_PORTS) { rv = EINVAL; break; } if (sc->sc_ifflags & IFF_DEBUG) printf("%s: port %s at index %d\n", sc->sc_ifname, lp_next->lp_ifp->if_xname, i); lb->lb_ports[i++] = lp_next; } return (rv); } static int lagg_lb_port_create(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; return (lagg_lb_porttable(sc, NULL)); } static void lagg_lb_port_destroy(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; lagg_lb_porttable(sc, lp); } static int lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp = NULL; uint32_t p = 0; if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) p = m->m_pkthdr.flowid >> sc->flowid_shift; else p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key); p %= sc->sc_count; lp = lb->lb_ports[p]; /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * 802.3ad LACP */ static void lagg_lacp_attach(struct lagg_softc *sc) { struct lagg_port *lp; lacp_attach(sc); LAGG_XLOCK_ASSERT(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); } static void lagg_lacp_detach(struct lagg_softc *sc) { struct lagg_port *lp; void *psc; LAGG_XLOCK_ASSERT(sc); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); psc = sc->sc_psc; sc->sc_psc = NULL; lacp_detach(psc); } static void lagg_lacp_lladdr(struct lagg_softc *sc) { struct lagg_port *lp; LAGG_SXLOCK_ASSERT(sc); /* purge all the lacp ports */ CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); /* add them back in */ CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); } static int lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; lp = lacp_select_tx_port(sc, m); if (lp == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct ether_header *eh; u_short etype; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); /* Tap off LACP control messages */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { m = lacp_input(lp, m); if (m == NULL) return (NULL); } /* * If the port is not collecting or not in the active aggregator then * free and return. */ if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { m_freem(m); return (NULL); } m->m_pkthdr.rcvif = ifp; return (m); } Index: projects/runtime-coverage-v2/sys/net/if_lagg.h =================================================================== --- projects/runtime-coverage-v2/sys/net/if_lagg.h (revision 347075) +++ projects/runtime-coverage-v2/sys/net/if_lagg.h (revision 347076) @@ -1,267 +1,269 @@ /* $OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $ */ /* * Copyright (c) 2005, 2006 Reyk Floeter * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #ifndef _NET_LAGG_H #define _NET_LAGG_H /* * Global definitions */ #define LAGG_MAX_PORTS 32 /* logically */ #define LAGG_MAX_NAMESIZE 32 /* name of a protocol */ #define LAGG_MAX_STACKING 4 /* maximum number of stacked laggs */ /* Lagg flags */ #define LAGG_F_HASHL2 0x00000001 /* hash layer 2 */ #define LAGG_F_HASHL3 0x00000002 /* hash layer 3 */ #define LAGG_F_HASHL4 0x00000004 /* hash layer 4 */ #define LAGG_F_HASHMASK 0x00000007 /* Port flags */ #define LAGG_PORT_SLAVE 0x00000000 /* normal enslaved port */ #define LAGG_PORT_MASTER 0x00000001 /* primary port */ #define LAGG_PORT_STACK 0x00000002 /* stacked lagg port */ #define LAGG_PORT_ACTIVE 0x00000004 /* port is active */ #define LAGG_PORT_COLLECTING 0x00000008 /* port is receiving frames */ #define LAGG_PORT_DISTRIBUTING 0x00000010 /* port is sending frames */ #define LAGG_PORT_BITS "\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \ "\05DISTRIBUTING" /* Supported lagg PROTOs */ typedef enum { LAGG_PROTO_NONE = 0, /* no lagg protocol defined */ LAGG_PROTO_ROUNDROBIN, /* simple round robin */ LAGG_PROTO_FAILOVER, /* active failover */ LAGG_PROTO_LOADBALANCE, /* loadbalance */ LAGG_PROTO_LACP, /* 802.3ad lacp */ LAGG_PROTO_BROADCAST, /* broadcast */ LAGG_PROTO_MAX, } lagg_proto; struct lagg_protos { const char *lpr_name; lagg_proto lpr_proto; }; #define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER #define LAGG_PROTOS { \ { "failover", LAGG_PROTO_FAILOVER }, \ { "lacp", LAGG_PROTO_LACP }, \ { "loadbalance", LAGG_PROTO_LOADBALANCE }, \ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \ { "broadcast", LAGG_PROTO_BROADCAST }, \ { "none", LAGG_PROTO_NONE }, \ { "default", LAGG_PROTO_DEFAULT } \ } /* * lagg ioctls. */ /* * LACP current operational parameters structure. */ struct lacp_opreq { uint16_t actor_prio; uint8_t actor_mac[ETHER_ADDR_LEN]; uint16_t actor_key; uint16_t actor_portprio; uint16_t actor_portno; uint8_t actor_state; uint16_t partner_prio; uint8_t partner_mac[ETHER_ADDR_LEN]; uint16_t partner_key; uint16_t partner_portprio; uint16_t partner_portno; uint8_t partner_state; }; /* lagg port settings */ struct lagg_reqport { char rp_ifname[IFNAMSIZ]; /* name of the lagg */ char rp_portname[IFNAMSIZ]; /* name of the port */ u_int32_t rp_prio; /* port priority */ u_int32_t rp_flags; /* port flags */ union { struct lacp_opreq rpsc_lacp; } rp_psc; #define rp_lacpreq rp_psc.rpsc_lacp }; #define SIOCGLAGGPORT _IOWR('i', 140, struct lagg_reqport) #define SIOCSLAGGPORT _IOW('i', 141, struct lagg_reqport) #define SIOCSLAGGDELPORT _IOW('i', 142, struct lagg_reqport) /* lagg, ports and options */ struct lagg_reqall { char ra_ifname[IFNAMSIZ]; /* name of the lagg */ u_int ra_proto; /* lagg protocol */ size_t ra_size; /* size of buffer */ struct lagg_reqport *ra_port; /* allocated buffer */ int ra_ports; /* total port count */ union { struct lacp_opreq rpsc_lacp; } ra_psc; #define ra_lacpreq ra_psc.rpsc_lacp }; #define SIOCGLAGG _IOWR('i', 143, struct lagg_reqall) #define SIOCSLAGG _IOW('i', 144, struct lagg_reqall) struct lagg_reqflags { char rf_ifname[IFNAMSIZ]; /* name of the lagg */ uint32_t rf_flags; /* lagg protocol */ }; #define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags) #define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags) struct lagg_reqopts { char ro_ifname[IFNAMSIZ]; /* name of the lagg */ int ro_opts; /* Option bitmap */ #define LAGG_OPT_NONE 0x00 #define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */ /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ #define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */ +#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */ #define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ #define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ #define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ #define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */ #define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */ u_int ro_count; /* number of ports */ u_int ro_active; /* active port count */ u_int ro_flapping; /* number of flapping */ int ro_flowid_shift; /* shift the flowid */ uint32_t ro_bkt; /* packet bucket for roundrobin */ }; #define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) #define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) -#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \ - "\006LACP_TXTEST\007LACP_RXTEST" +#define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \ + "\005LACP_STRICT\006LACP_TXTEST" \ + "\007LACP_RXTEST" #ifdef _KERNEL /* * Internal kernel part */ #define LAGG_PORTACTIVE(_tp) ( \ ((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) && \ ((_tp)->lp_ifp->if_flags & IFF_UP) \ ) struct lagg_ifreq { union { struct ifreq ifreq; struct { char ifr_name[IFNAMSIZ]; struct sockaddr_storage ifr_ss; } ifreq_storage; } ifreq; }; #define sc_ifflags sc_ifp->if_flags /* flags */ #define sc_ifname sc_ifp->if_xname /* name */ /* Private data used by the loadbalancing protocol */ struct lagg_lb { u_int32_t lb_key; struct lagg_port *lb_ports[LAGG_MAX_PORTS]; }; struct lagg_mc { struct sockaddr_dl mc_addr; struct ifmultiaddr *mc_ifma; SLIST_ENTRY(lagg_mc) mc_entries; }; struct lagg_counters { uint64_t val[IFCOUNTERS]; }; struct lagg_softc { struct ifnet *sc_ifp; /* virtual interface */ struct rmlock sc_mtx; struct sx sc_sx; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ u_int sc_active; /* active port count */ u_int sc_flapping; /* number of flapping * events */ struct lagg_port *sc_primary; /* primary port */ struct ifmedia sc_media; /* media config */ void *sc_psc; /* protocol data */ uint32_t sc_seq; /* sequence counter */ uint32_t sc_flags; int sc_destroying; /* destroying lagg */ CK_SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */ SLIST_ENTRY(lagg_softc) sc_entries; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout sc_callout; u_int sc_opts; int flowid_shift; /* shift the flowid */ uint32_t sc_bkt; /* packates bucket for roundrobin */ uint32_t sc_bkt_count; /* packates bucket count for roundrobin */ struct lagg_counters detached_counters; /* detached ports sum */ }; struct lagg_port { struct ifnet *lp_ifp; /* physical interface */ struct lagg_softc *lp_softc; /* parent lagg */ uint8_t lp_lladdr[ETHER_ADDR_LEN]; u_char lp_iftype; /* interface type */ uint32_t lp_prio; /* port priority */ uint32_t lp_flags; /* port flags */ int lp_ifflags; /* saved ifp flags */ int lp_ifcapenable; /* saved ifp capenable */ void *lh_cookie; /* if state hook */ void *lp_psc; /* protocol data */ int lp_detaching; /* ifnet is detaching */ SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */ /* Redirected callbacks */ int (*lp_ioctl)(struct ifnet *, u_long, caddr_t); int (*lp_output)(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); struct lagg_counters port_counters; /* ifp counters copy */ CK_SLIST_ENTRY(lagg_port) lp_entries; struct epoch_context lp_epoch_ctx; }; extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); int lagg_enqueue(struct ifnet *, struct mbuf *); SYSCTL_DECL(_net_link_lagg); #endif /* _KERNEL */ #endif /* _NET_LAGG_H */ Index: projects/runtime-coverage-v2/sys/net/iflib.c =================================================================== --- projects/runtime-coverage-v2/sys/net/iflib.c (revision 347075) +++ projects/runtime-coverage-v2/sys/net/iflib.c (revision 347076) @@ -1,6730 +1,6734 @@ /*- * Copyright (c) 2014-2018, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #ifdef PCI_IOV #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct sx ifc_ctx_sx; struct mtx ifc_state_mtx; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; uint32_t ifc_rx_mbuf_sz; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; + struct ifmedia *ifc_mediap; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; uint16_t ifc_sysctl_tx_abdicate; uint16_t ifc_sysctl_core_offset; #define CORE_OFFSET_UNSPECIFIED 0xffff uint8_t ifc_sysctl_separate_txrx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; struct ether_addr ifc_mac; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { - return (&ctx->ifc_media); + return (ctx->ifc_mediap); } uint32_t iflib_get_flags(if_ctx_t ctx) { return (ctx->ifc_flags); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; uint64_t ift_last_timer_tick; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_buf_tag; bus_dma_tag_t ift_tso_buf_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_buf_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; struct pfil_head *pfil; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static device_method_t iflib_pseudo_methods[] = { DEVMETHOD(device_attach, noop_attach), DEVMETHOD(device_detach, iflib_pseudo_detach), DEVMETHOD_END }; driver_t iflib_pseudodriver = { "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), }; static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) void iflib_set_detach(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); } /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_if_input; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_if_input = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); #ifdef ALTQ static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_if_init_locked(if_ctx_t ctx); static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); struct cpu_offset { SLIST_ENTRY(cpu_offset) entries; cpuset_t set; unsigned int refcount; uint16_t offset; }; static struct mtx cpu_offset_mtx; MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", MTX_DEF); NETDUMP_DEFINE(iflib); #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; u_int head = kring->rhead; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; uint32_t refill_pidx, nic_i; #if IFLIB_DEBUG_COUNTERS int rf_count = 0; #endif if (nm_i == head && __predict_true(!init)) return 0; iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; refill_pidx = netmap_idx_k2n(kring, nm_i); /* * IMPORTANT: we must leave one free slot in the ring, * so move head back by one unit */ head = nm_prev(head, lim); nic_i = UINT_MAX; DBG_COUNTER_INC(fl_refills); while (nm_i != head) { #if IFLIB_DEBUG_COUNTERS if (++rf_count == 9) DBG_COUNTER_INC(fl_refills_large); #endif for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); uint32_t nic_i_dma = refill_pidx; nic_i = netmap_idx_k2n(kring, nm_i); MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_vm_addrs[tmp_pidx] = addr; if (__predict_false(init)) { netmap_load_map(na, fl->ifl_buf_tag, map[nic_i], addr); } else if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_buf_tag, map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) continue; iru.iru_pidx = refill_pidx; iru.iru_count = tmp_pidx+1; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); refill_pidx = nic_i; for (int n = 0; n < iru.iru_count; n++) { bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i_dma], BUS_DMASYNC_PREREAD); /* XXX - change this to not use the netmap func*/ nic_i_dma = nm_next(nic_i_dma, lim); } } } kring->nr_hwcur = head; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (__predict_true(nic_i != UINT_MAX)) { ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); DBG_COUNTER_INC(rxd_flush); } return (0); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap kring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap kring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); DBG_COUNTER_INC(tx_encap); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = nm_i; /* synchronize the NIC ring */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. * * If there are unclaimed buffers, attempt to reclaim them. * If none are reclaimed, and TX IRQs are not in use, do an initial * minimal delay, then trigger the tx handler which will spin in the * group task queue. */ if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { callout_reset_on(&txq->ift_timer, hz < 2000 ? 1 : hz / 1000, iflib_timer, txq, txq->ift_timer.c_cpu); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; iflib_fl_t fl; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; if (head > lim) return netmap_ring_reinit(kring); /* * XXX netmap_fl_refill() only ever (re)fills free list 0 so far. */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = nic_i; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; bus_dmamap_sync(fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = kring->nr_hwcur; return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring = na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; nm_i = netmap_idx_n2k(kring, 0); netmap_fl_refill(rxq, kring, nm_i, true); } static void iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on) { struct netmap_kring *kring; uint16_t txqid; txqid = txq->ift_id; kring = NA(ctx->ifc_ifp)->tx_rings[txqid]; if (kring->nr_hwcur != nm_next(kring->nr_hwtail, kring->nkr_num_slots - 1)) { bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) netmap_tx_irq(ctx->ifc_ifp, txqid); if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) { if (hz < 2000) *reset_on = 1; else *reset_on = hz / 1000; } } } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #define iflib_netmap_timer_adjust(ctx, txq, reset_on) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else #define prefetch(x) #define prefetch2cachelines(x) #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) { int err; device_t dev = ctx->ifc_dev; err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { if_shared_ctx_t sctx = ctx->ifc_sctx; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; if (!iflib_started) return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if_ctx_t ctx; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; iflib_txq_t txq; void *sc; int i, cidx, result; qidx_t txqid; if (!iflib_started) return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; MPASS(rxq->ifr_ntxqirq); for (i = 0; i < rxq->ifr_ntxqirq; i++) { txqid = rxq->ifr_txqid[i]; txq = &ctx->ifc_txqs[txqid]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (!ctx->isc_txd_credits_update(sc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&txq->ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else { IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; if (!iflib_started) return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, const char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate DMA resources for TX buffers as well as memory for the TX * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a * iflib_sw_tx_desc_array structure, storing all the information that * is needed to transmit a packet on the wire. This is called only * once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; bus_size_t tsomaxsize; int err, nsegments, ntsosegments; bool tso; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; tsomaxsize = scctx->isc_tx_tso_size_max; if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) tsomaxsize += sizeof(struct ether_vlan_header); MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { MPASS(ntsosegments > 0); MPASS(sctx->isc_tso_maxsize >= tsomaxsize); } /* * Set up DMA tags for TX buffers. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_buf_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tsomaxsize, /* maxsize */ ntsosegments, /* nsegments */ sctx->isc_tso_maxsegsize,/* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_buf_tag))) { device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the TX mbuf map. */ if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX mbuf map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for TX buffers. */ if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX buffer DMA map memory\n"); err = ENOMEM; goto fail; } if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TSO TX buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_buf_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } if (!tso) continue; err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, &txq->ift_sds.ifsd_tso_map[i]); if (err != 0) { device_printf(dev, "Unable to create TSO TX DMA map\n"); goto fail; } } return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, map); bus_dmamap_destroy(txq->ift_buf_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } map = NULL; if (txq->ift_sds.ifsd_tso_map != NULL) map = txq->ift_sds.ifsd_tso_map[i]; if (map != NULL) { bus_dmamap_sync(txq->ift_tso_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, map); bus_dmamap_destroy(txq->ift_tso_buf_tag, map); txq->ift_sds.ifsd_tso_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_tso_map != NULL) { free(txq->ift_sds.ifsd_tso_map, M_IFLIB); txq->ift_sds.ifsd_tso_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_buf_tag); txq->ift_buf_tag = NULL; } if (txq->ift_tso_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_buf_tag); txq->ift_tso_buf_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); } if (txq->ift_sds.ifsd_tso_map != NULL) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate DMA resources for RX buffers as well as memory for the RX * mbuf map, direct RX cluster pointer map and RX cluster bus address * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and * RX cluster map are kept in a iflib_sw_rx_desc_array structure. * Since we use use one entry in iflib_sw_rx_desc_array per received * packet, the maximum number of entries we'll need is equal to the * number of hardware receive descriptors that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ /* Set up DMA tag for RX buffers. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_buf_tag); if (err) { device_printf(dev, "Unable to allocate RX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the RX mbuf map. */ if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX mbuf map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the direct RX cluster pointer map. */ if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX cluster map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the RX cluster bus address map. */ if (!(fl->ifl_sds.ifsd_ba = (bus_addr_t *) malloc(sizeof(bus_addr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX bus address map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for RX buffers. */ if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX buffer DMA map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_buf_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct if_rxd_update iru; struct rxq_refill_cb_arg cb_arg; struct mbuf *m; caddr_t cl, *sd_cl; struct mbuf **sd_m; bus_dmamap_t *sd_map; bus_addr_t bus_addr, *sd_ba; int err, frag_idx, i, idx, n, pidx; qidx_t credits; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_ba = fl->ifl_sds.ifsd_ba; pidx = fl->ifl_pidx; idx = pidx; frag_idx = fl->ifl_fragidx; credits = fl->ifl_credits; i = 0; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if (frag_idx < 0) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); MPASS(frag_idx >= 0); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; cb_arg.error = 0; MPASS(sd_map != NULL); err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, BUS_DMA_NOWAIT); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); break; } sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; sd_cl[frag_idx] = cl; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } else { bus_addr = sd_ba[frag_idx]; } bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (sd_m[frag_idx] == NULL) { if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } sd_m[frag_idx] = m; } bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; fl->ifl_credits = credits; } } if (i) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } uint8_t iflib_in_detach(if_ctx_t ctx) { bool in_detach; STATE_LOCK(ctx); in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); STATE_UNLOCK(ctx); return (in_detach); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; bus_dmamap_t sd_map; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_cl != NULL) { sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_sync(fl->ifl_buf_tag, sd_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fl->ifl_buf_tag, sd_map); if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); // XXX: Should this get moved out? if (iflib_in_detach(fl->ifl_rxq->ifr_ctx)) bus_dmamap_destroy(fl->ifl_buf_tag, sd_map); if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i, j; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_buf_tag != NULL) { if (fl->ifl_sds.ifsd_map != NULL) { for (j = 0; j < fl->ifl_size; j++) { if (fl->ifl_sds.ifsd_map[j] == NULL) continue; bus_dmamap_sync( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j], BUS_DMASYNC_POSTREAD); bus_dmamap_unload( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j]); } } bus_dma_tag_destroy(fl->ifl_buf_tag); fl->ifl_buf_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); free(fl->ifl_sds.ifsd_ba, M_IFLIB); free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_ba = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; uint64_t this_tick = ticks; uint32_t reset_on = hz / 2; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ if (this_tick - txq->ift_last_timer_tick >= hz / 2) { txq->ift_last_timer_tick = this_tick; IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (ifmp_ring_is_stalled(txq->ift_br)) txq->ift_qstatus = IFLIB_QUEUE_HUNG; txq->ift_cleaned_prev = txq->ift_cleaned; } #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) iflib_netmap_timer_adjust(ctx, txq, &reset_on); #endif /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); STATE_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); } static void iflib_calc_rx_mbuf_sz(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= MCLBYTES) ctx->ifc_rx_mbuf_sz = MCLBYTES; else ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE; } uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx) { return (ctx->ifc_rx_mbuf_sz); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } /* * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so * that drivers can use the value when setting up the hardware receive * buffers. */ iflib_calc_rx_mbuf_sz(ctx); #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { MPASS(rxq->ifr_id == i); iflib_netmap_rxq_init(ctx, rxq); continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static struct mbuf * rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, int *pf_rv, if_rxd_info_t ri) { bus_dmamap_t map; iflib_fl_t fl; caddr_t payload; struct mbuf *m; int flid, cidx, len, next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; m = fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL) { payload = *sd->ifsd_cl; payload += ri->iri_pad; len = ri->iri_len - ri->iri_pad; *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, len | PFIL_MEMPTR | PFIL_IN, NULL); switch (*pf_rv) { case PFIL_DROPPED: case PFIL_CONSUMED: /* * The filter ate it. Everything is recycled. */ m = NULL; unload = 0; break; case PFIL_REALLOCED: /* * The filter copied it. Everything is recycled. */ m = pfil_mem2mbuf(payload); unload = 0; break; case PFIL_PASS: /* * Filter said it was OK, so receive like * normal */ fl->ifl_sds.ifsd_m[cidx] = NULL; break; default: MPASS(0); } } else { fl->ifl_sds.ifsd_m[cidx] = NULL; *pf_rv = PFIL_PASS; } if (unload) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; bit_clear(fl->ifl_rx_bitmap, cidx); return (m); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) { struct mbuf *m, *mh, *mt; caddr_t cl; int *pf_rv_ptr, flags, i, padlen; bool consumed; i = 0; mh = NULL; consumed = false; *pf_rv = PFIL_PASS; pf_rv_ptr = pf_rv; do { m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, pf_rv_ptr, ri); MPASS(*sd->ifsd_cl != NULL); /* * Exclude zero-length frags & frags from * packets the filter has consumed or dropped */ if (ri->iri_frags[i].irf_len == 0 || consumed || *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) { if (mh == NULL) { /* everything saved here */ consumed = true; pf_rv_ptr = NULL; continue; } /* XXX we can save the cluster here, but not the mbuf */ m_init(m, M_NOWAIT, MT_DATA, 0); m_free(m); continue; } if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; int pf_rv; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, &pf_rv, ri); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); if (pf_rv == PFIL_PASS) { m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } } else { m = assemble_segments(rxq, ri, &sd, &pf_rv); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { CURVNET_SET(lc->ifp->if_vnet); #if defined(INET6) *v6 = VNET(ip6_forwarding); #endif #if defined(INET) *v4 = VNET(ipforwarding); #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; uint16_t eh_type; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); switch (eh_type) { #if defined(INET6) case ETHERTYPE_IPV6: return !v6_forwarding; #endif #if defined (INET) case ETHERTYPE_IP: return !v4_forwarding; #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; bool v4_forwarding, v6_forwarding, lro_possible; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; lro_possible = v4_forwarding = v6_forwarding = false; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } /* pfil needs the vnet to be set */ CURVNET_SET_QUIET(ifp->if_vnet); for (budget_left = budget; budget_left > 0 && avail > 0;) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; rx_pkts += 1; rx_bytes += ri.iri_len; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); avail--; budget_left--; if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) continue; /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } CURVNET_RESTORE(); /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { ifp->if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) max((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ (ctx)->ifc_softc_ctx.isc_tx_nsegments) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m; m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct mbuf *n; struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD4(pi)) { if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } } if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= CSUM_IP6_TCP; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, pidx; struct mbuf *m, **ifsd_m; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx & (ntxd - 1); ifsd_m = txq->ift_sds.ifsd_m; m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); if (txq->ift_sds.ifsd_tso_map != NULL) bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[pidx]); #if MEMORY_LOGGING txq->ift_dequeued++; #endif return (m); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_tag_t buf_tag; bus_dma_segment_t *segs; struct mbuf *m_head, **ifsd_m; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); } map = txq->ift_sds.ifsd_map[pidx]; ifsd_m = txq->ift_sds.ifsd_m; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { buf_tag = txq->ift_tso_buf_tag; max_segs = scctx->isc_tx_tso_segments_max; map = txq->ift_sds.ifsd_tso_map[pidx]; MPASS(buf_tag != NULL); MPASS(max_segs > 0); } else { buf_tag = txq->ift_buf_tag; max_segs = scctx->isc_tx_nsegments; map = txq->ift_sds.ifsd_map[pidx]; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) { DBG_COUNTER_INC(encap_txd_encap_fail); return err; } } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } m_head = *m_headp; } retry: err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) { m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); /* try defrag if collapsing fails */ if (m_head == NULL) remap++; } if (remap == 1) { txq->ift_mbuf_defrag++; m_head = m_defrag(*m_headp, M_NOWAIT); } /* * remap should never be >1 unless bus_dmamap_load_mbuf_sg * failed to map an mbuf that was run through m_defrag */ MPASS(remap <= 1); if (__predict_false(m_head == NULL || remap > 1)) goto defrag_failed; remap++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } ifsd_m[pidx] = m_head; /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; bus_dmamap_unload(buf_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); DBG_COUNTER_INC(encap_txd_encap_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else { *m_headp = m_head = iflib_remove_mbuf(txq); if (err == EFBIG) { txq->ift_txd_encap_efbig++; if (remap < 2) { remap = 1; goto defrag; } } goto defrag_failed; } /* * err can't possibly be non-zero here, so we don't neet to test it * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). */ return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; DBG_COUNTER_INC(encap_txd_encap_fail); return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; ifsd_m = txq->ift_sds.ifsd_m; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if ((m = ifsd_m[cidx]) != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); if (m->m_pkthdr.csum_flags & CSUM_TSO) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx]); } else { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx]); } /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); m_freem(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) return (1); bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); err = 0; for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; err = iflib_encap(txq, mp); if (__predict_false(err)) { /* no room - bail out */ if (err == ENOBUFS) break; consumed++; /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); DBG_COUNTER_INC(tx_frees); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; #if defined(ALTQ) || defined(DEV_NETMAP) if_t ifp = ctx->ifc_ifp; #endif int abdicate = ctx->ifc_sysctl_tx_abdicate; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; #ifdef DEV_NETMAP if (if_getcapenable(ifp) & IFCAP_NETMAP) { bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } #endif #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) iflib_altq_if_start(ifp); #endif if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); else if (!abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); /* * When abdicating, we always need to check drainage, not just when we don't enqueue */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { #ifdef INVARIANTS int rc = #endif IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; more = true; #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { u_int work = 0; if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { more = false; } } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { #ifdef INVARIANTS int rc = #endif IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); DBG_COUNTER_INC(rx_intr_enables); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; bool oactive, running, do_reset, do_watchdog, in_detach; uint32_t reset_on = hz / 2; STATE_LOCK(ctx); running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); do_reset = (ctx->ifc_flags & IFC_DO_RESET); do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); in_detach = (ctx->ifc_flags & IFC_IN_DETACH); ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); STATE_UNLOCK(ctx); if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; if (in_detach) return; CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } if (do_watchdog) { ctx->ifc_watchdog_events++; IFDI_WATCHDOG_RESET(ctx); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { #ifdef DEV_NETMAP reset_on = hz / 2; if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) iflib_netmap_timer_adjust(ctx, txq, &reset_on); #endif callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); } IFDI_LINK_INTR_ENABLE(ctx); if (do_reset) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; int abdicate = ctx->ifc_sysctl_tx_abdicate; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENETDOWN); } MPASS(m->m_nextpkt == NULL); /* ALTQ-enabled interfaces always use queue 0. */ qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); DBG_COUNTER_INC(tx_frees); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); if (abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { if (!abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); DBG_COUNTER_INC(tx_frees); } return (err); } #ifdef ALTQ /* * The overall approach to integrating iflib with ALTQ is to continue to use * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring * is redundant/unnecessary, but doing so minimizes the amount of * ALTQ-specific code required in iflib. It is assumed that the overhead of * redundantly queueing to an intermediate mp_ring is swamped by the * performance limitations inherent in using ALTQ. * * When ALTQ support is compiled in, all iflib drivers will use a transmit * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the * given interface. If ALTQ is enabled for an interface, then all * transmitted packets for that interface will be submitted to the ALTQ * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() * implementation because it uses IFQ_HANDOFF(), which will duplicatively * update stats that the iflib machinery handles, and which is sensitve to * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() * will be installed as the start routine for use by ALTQ facilities that * need to trigger queue drains on a scheduled basis. * */ static void iflib_altq_if_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { iflib_if_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) iflib_altq_if_start(ifp); } else err = iflib_if_transmit(ifp, m); return (err); } #endif /* ALTQ */ static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; STATE_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; STATE_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); STATE_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; STATE_UNLOCK(ctx); /* * When ALTQ is enabled, this will also take care of purging the * ALTQ queue(s). */ if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { STATE_LOCK(ctx); if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; STATE_UNLOCK(ctx); err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: - err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); + err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask, oldmask; oldmask = if_getcapenable(ifp); mask = ifr->ifr_reqcap ^ oldmask; mask &= ctx->ifc_softc_ctx.isc_capabilities; setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); setmask |= (mask & IFCAP_WOL); /* * If any RX csum has changed, change all the ones that * are supported by the driver. */ if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { setmask |= ctx->ifc_softc_ctx.isc_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); } /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_stop(ctx); STATE_LOCK(ctx); if_togglecapenable(ifp, setmask); STATE_UNLOCK(ctx); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); } if_vlancap(ifp); break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } static void iflib_reset_qvalues(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; int i; scctx->isc_txrx_budget_bytes_max = IFLIB_MAX_TX_BYTES; scctx->isc_tx_qdepth = IFLIB_DEFAULT_TX_QDEPTH; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } } static void iflib_add_pfil(if_ctx_t ctx) { struct pfil_head *pfil; struct pfil_head_args pa; iflib_rxq_t rxq; int i; pa.pa_version = PFIL_VERSION; pa.pa_flags = PFIL_IN; pa.pa_type = PFIL_TYPE_ETHERNET; pa.pa_headname = ctx->ifc_ifp->if_xname; pfil = pfil_head_register(&pa); for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = pfil; } } static void iflib_rem_pfil(if_ctx_t ctx) { struct pfil_head *pfil; iflib_rxq_t rxq; int i; rxq = ctx->ifc_rxqs; pfil = rxq->pfil; for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = NULL; } pfil_head_unregister(pfil); } static uint16_t get_ctx_core_offset(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; struct cpu_offset *op; uint16_t qc; uint16_t ret = ctx->ifc_sysctl_core_offset; if (ret != CORE_OFFSET_UNSPECIFIED) return (ret); if (ctx->ifc_sysctl_separate_txrx) qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets; else qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets); mtx_lock(&cpu_offset_mtx); SLIST_FOREACH(op, &cpu_offsets, entries) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { ret = op->offset; op->offset += qc; MPASS(op->refcount < UINT_MAX); op->refcount++; break; } } if (ret == CORE_OFFSET_UNSPECIFIED) { ret = 0; op = malloc(sizeof(struct cpu_offset), M_IFLIB, M_NOWAIT | M_ZERO); if (op == NULL) { device_printf(ctx->ifc_dev, "allocation for cpu offset failed.\n"); } else { op->offset = qc; op->refcount = 1; CPU_COPY(&ctx->ifc_cpus, &op->set); SLIST_INSERT_HEAD(&cpu_offsets, op, entries); } } mtx_unlock(&cpu_offset_mtx); return (ret); } static void unref_ctx_core_offset(if_ctx_t ctx) { struct cpu_offset *op, *top; mtx_lock(&cpu_offset_mtx); SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { MPASS(op->refcount > 0); op->refcount--; if (op->refcount == 0) { SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); free(op, M_IFLIB); } break; } } mtx_unlock(&cpu_offset_mtx); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; + if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) + ctx->ifc_mediap = scctx->isc_media; + #ifdef INVARIANTS - MPASS(scctx->isc_capabilities); if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail_iflib_detach; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail_iflib_detach; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); } MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now set up MSI or MSI-X, should return us the number of supported ** vectors (will be 1 for a legacy interrupt and MSI). */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_intr_free; } if ((err = iflib_qset_structures_setup(ctx))) goto fail_queues; /* * Now that we know how many queues there are, get the core offset. */ ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_queues; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_queues; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; NETDUMP_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: iflib_free_intr_mem(ctx); fail_queues: iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); fail_iflib_detach: IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); fail_ctx_free: if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, struct iflib_cloneattach_ctx *clctx) { int err; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; void *sc; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); ctx->ifc_flags |= IFC_SC_ALLOCATED; if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) ctx->ifc_flags |= IFC_PSEUDO; ctx->ifc_sctx = sctx; ctx->ifc_softc = sc; ctx->ifc_dev = dev; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } if (sctx->isc_flags & IFLIB_GEN_MAC) ether_gen_addr(ifp, &ctx->ifc_mac); if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, clctx->cc_params)) != 0) { device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); goto fail_ctx_free; } - ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&ctx->ifc_media, IFM_ETHER | IFM_AUTO); + ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); #ifdef INVARIANTS - MPASS(scctx->isc_capabilities); if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); ifp->if_flags |= IFF_NOGROUP; if (sctx->isc_flags & IFLIB_PSEUDO) { ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } *ctxp = ctx; /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail_iflib_detach; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail_iflib_detach; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* XXX --- can support > 1 -- but keep it simple for now */ scctx->isc_intr = IFLIB_INTR_LEGACY; /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_iflib_detach; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * XXX What if anything do we want to do about interrupts? */ ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); /* XXX handle more than one queue */ for (i = 0; i < scctx->isc_nrxqsets; i++) IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); fail_iflib_detach: IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); fail_ctx_free: free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } #endif STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); CTX_LOCK(ctx); iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); iflib_rem_pfil(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); CTX_LOCK(ctx); IFDI_DETACH(ctx); CTX_UNLOCK(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); device_set_softc(ctx->ifc_dev, NULL); iflib_free_intr_mem(ctx); bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); unref_ctx_core_offset(ctx); STATE_LOCK_DESTROY(ctx); free(ctx, M_IFLIB); return (0); } static void iflib_free_intr_mem(if_ctx_t ctx) { if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(ctx->ifc_dev); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx); STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); #ifdef ALTQ if_setstartfn(ifp, iflib_altq_if_start); if_settransmitfn(ifp, iflib_altq_if_transmit); if_setsendqready(ifp); #else if_settransmitfn(ifp, iflib_if_transmit); #endif if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); - ifmedia_init(&ctx->ifc_media, IFM_IMASK, - iflib_media_change, iflib_media_status); - + if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { + ctx->ifc_mediap = &ctx->ifc_media; + ifmedia_init(ctx->ifc_mediap, IFM_IMASK, + iflib_media_change, iflib_media_status); + } return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); /* Allocate the TX ring struct memory */ if (!(ctx->ifc_txqs = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(ctx->ifc_rxqs = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } txq = ctx->ifc_txqs; rxq = ctx->ifc_rxqs; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate TX descriptors\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate RX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate RX descriptors\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring */ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device TX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device RX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: rx_fail: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; if_shared_ctx_t sctx = ctx->ifc_sctx; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < sctx->isc_ntxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; /* * It is expected that the caller takes care of freeing queues if this * fails. */ if ((err = iflib_tx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); return (err); } if ((err = iflib_rx_structures_setup(ctx)) != 0) device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } #ifdef SMP static int find_nth(if_ctx_t ctx, int qid) { cpuset_t cpus; int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, &cpus); } cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } #ifdef SCHED_ULE extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find the nth "close" core to the specified core * "close" is defined as the deepest level that shares * at least an L2 cache. With threads, this will be * threads on the same core. If the shared cache is L3 * or higher, simply returns the same core. */ static int find_close_core(int cpu, int core_offset) { struct cpu_group *grp; int i; int fcpu; cpuset_t cs; grp = cpu_top; if (grp == NULL) return cpu; i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* If the child only has one cpu, don't descend */ if (grp->cg_child[i].cg_count <= 1) break; grp = &grp->cg_child[i]; } /* If they don't share at least an L2 cache, use the same CPU */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return cpu; /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); /* Add the selected CPU offset to core offset. */ for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) { if (fcpu - 1 == cpu) break; CPU_CLR(fcpu - 1, &cs); } MPASS(fcpu); core_offset += i; CPU_COPY(&grp->cg_mask, &cs); for (i = core_offset % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } MPASS(CPU_FFS(&cs)); return CPU_FFS(&cs) - 1; } #else static int find_close_core(int cpu, int core_offset __unused) { return cpu; } #endif static int get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */ /* XXX handle multiple RX threads per core and more than two core per L2 group */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: /* RX queues get the specified core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else #define get_core_offset(ctx, type, qid) CPU_FIRST() #define find_close_core(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, const char *name) { device_t dev; int co, cpuid, err, tid; dev = ctx->ifc_dev; co = ctx->ifc_sysctl_core_offset; if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX) co += ctx->ifc_softc_ctx.isc_nrxqsets; cpuid = find_nth(ctx, qid + co); tid = get_core_offset(ctx, type, qid); MPASS(tid >= 0); cpuid = find_close_core(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res, name); if (err) { device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return 0; } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name) { device_t dev; struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; dev = ctx->ifc_dev; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); if (irq != NULL) { err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev, irq->ii_res, name); } else { taskqgroup_attach(tqg, gtask, q, NULL, NULL, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, rman_get_rid(irq->ii_res), irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; device_t dev; struct grouptask *gtask; struct resource *res; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; dev = ctx->ifc_dev; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); res = irq->ii_res; taskqgroup_attach(tqg, gtask, q, dev, res, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, name); } void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_PREFETCH; STATE_UNLOCK(ctx); } /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { iflib_fl_t fl; u_int i; for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct sx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_ctx_sx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; if (bootverbose) device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* First try MSI-X */ if ((msgs = pci_msix_count(dev)) == 0) { if (bootverbose) device_printf(dev, "MSI-X not supported or disabled\n"); goto msi; } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use MSI-X in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { device_printf(dev, "Unable to map MSI-X table\n"); goto msi; } } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); if (bootverbose) device_printf(dev, "intr CPUs: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "Using %d rx queues %d tx queues\n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d MSI-X vectors, err: %d - using MSI\n", vectors, err); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { scctx->isc_vectors = 1; device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; const char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; default: panic("unhandled type"); } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, "cause tx to abdicate instead of running to completion"); ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, "offset to start using cores at"); SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, "use separate cores for TX and RX"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } void iflib_request_reset(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; STATE_UNLOCK(ctx); } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif #ifdef NETDUMP static void iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; ctx = if_getsoftc(ifp); CTX_LOCK(ctx); *nrxr = NRXQSETS(ctx); *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; CTX_UNLOCK(ctx); } static void iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_fl_t fl; iflib_rxq_t rxq; int i, j; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; switch (event) { case NETDUMP_START: for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &ctx->ifc_rxqs[i]; for (j = 0; j < rxq->ifr_nfl; j++) { fl = rxq->ifr_fl; fl->ifl_zone = m_getzone(fl->ifl_buf_size); } } iflib_no_tx_batch = 1; break; default: break; } } static int iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; int error; ctx = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; error = iflib_encap(txq, &m); if (error == 0) (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use); return (error); } static int iflib_netdump_poll(struct ifnet *ifp, int count) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_txq_t txq; int i; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); for (i = 0; i < scctx->isc_nrxqsets; i++) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); return (0); } #endif /* NETDUMP */ Index: projects/runtime-coverage-v2/sys/net/iflib.h =================================================================== --- projects/runtime-coverage-v2/sys/net/iflib.h (revision 347075) +++ projects/runtime-coverage-v2/sys/net/iflib.h (revision 347076) @@ -1,457 +1,463 @@ /*- * Copyright (c) 2014-2017, Matthew Macy (mmacy@mattmacy.io) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __IFLIB_H_ #define __IFLIB_H_ #include #include #include #include #include #include struct if_clone; /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t * in the future if the need arises. */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; struct if_shared_ctx; typedef struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; struct if_pseudo; typedef struct if_pseudo *if_pseudo_t; /* * File organization: * - public structures * - iflib accessors * - iflib utility functions * - iflib core functions */ typedef struct if_rxd_frag { uint8_t irf_flid; qidx_t irf_idx; uint16_t irf_len; } *if_rxd_frag_t; /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; uint32_t iri_flowid; /* RSS hash for packet */ uint32_t iri_csum_flags; /* m_pkthdr csum flags */ uint32_t iri_csum_data; /* m_pkthdr csum data */ uint8_t iri_flags; /* mbuf flags for packet */ uint8_t iri_nfrags; /* number of fragments in packet */ uint8_t iri_rsstype; /* RSS hash type */ uint8_t iri_pad; /* any padding in the received data */ } *if_rxd_info_t; typedef struct if_rxd_update { uint64_t *iru_paddrs; caddr_t *iru_vaddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; uint16_t iru_count; uint16_t iru_buf_size; uint8_t iru_flidx; } *if_rxd_update_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ #define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ #define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { bus_dma_segment_t *ipi_segs; /* physical addresses */ uint32_t ipi_len; /* packet length */ uint16_t ipi_qsidx; /* queue set index */ qidx_t ipi_nsegs; /* number of segments */ qidx_t ipi_ndescs; /* number of descriptors used by encap */ uint16_t ipi_flags; /* iflib per-packet flags */ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ uint8_t ipi_ipproto; /* ip protocol */ uint32_t ipi_csum_flags; /* packet checksum flags */ uint16_t ipi_tso_segsz; /* tso segment size */ uint16_t ipi_vtag; /* VLAN tag */ uint16_t ipi_etype; /* ether header type */ uint8_t ipi_tcp_hflags; /* tcp header flags */ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ uint32_t ipi_tcp_sum; /* tcp csum */ } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; int ii_rid; void *ii_tag; } *if_irq_t; struct if_int_delay_info { if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ int iidi_offset; /* Register offset to read/write */ int iidi_value; /* Current value in usecs */ struct sysctl_oid *iidi_oidp; struct sysctl_req *iidi_req; }; typedef enum { IFLIB_INTR_LEGACY, IFLIB_INTR_MSI, IFLIB_INTR_MSIX } iflib_intr_mode_t; /* * This really belongs in pciio.h or some place more general * but this is the only consumer for now. */ typedef struct pci_vendor_info { uint32_t pvi_vendor_id; uint32_t pvi_device_id; uint32_t pvi_subvendor_id; uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; caddr_t pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} #define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} #define PVID_END {0, 0, 0, 0, 0, 0, NULL} #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \ "U32:revision;U32:class;D:#" #define IFLIB_PNP_INFO(b, u, t) \ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, nitems(t) - 1) typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; uint8_t isc_min_tx_latency; /* disable doorbell update batching */ uint8_t isc_rx_mvec_enable; /* generate mvecs on rx */ uint32_t isc_txrx_budget_bytes_max; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; int isc_nrxd[8]; uint32_t isc_txqsizes[8]; uint32_t isc_rxqsizes[8]; /* is there such thing as a descriptor that is more than 248 bytes ? */ uint8_t isc_txd_size[8]; uint8_t isc_rxd_size[8]; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; int isc_capabilities; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; uint32_t isc_tx_qdepth; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ uint16_t isc_min_frame_size; /* set at init time by driver, only used if IFLIB_NEED_ETHER_PAD is set. */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */ int isc_disable_msix; if_txrx_t isc_txrx; + struct ifmedia *isc_media; } *if_softc_ctx_t; + /* * Initialization values for device */ struct if_shared_ctx { unsigned isc_magic; driver_t *isc_driver; bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; bus_size_t isc_tso_maxsize; bus_size_t isc_tso_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ pci_vendor_info_t *isc_vendor_info; const char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; int isc_nrxd_default[8]; int isc_nrxd_max[8]; int isc_ntxd_min[8]; int isc_ntxd_default[8]; int isc_ntxd_max[8]; /* actively used during operation */ int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int isc_rx_process_limit; int isc_tx_reclaim_thresh; int isc_flags; const char *isc_name; }; typedef struct iflib_dma_info { bus_addr_t idi_paddr; caddr_t idi_vaddr; bus_dma_tag_t idi_tag; bus_dmamap_t idi_map; uint32_t idi_size; } *iflib_dma_info_t; #define IFLIB_MAGIC 0xCAFEF00D typedef enum { IFLIB_INTR_RX, IFLIB_INTR_TX, IFLIB_INTR_RXTX, IFLIB_INTR_ADMIN, IFLIB_INTR_IOV, } iflib_intr_type_t; #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif /* * Interface has a separate command queue for RX */ #define IFLIB_HAS_RXCQ 0x01 /* * Driver has already allocated vectors */ #define IFLIB_SKIP_MSIX 0x02 /* * Interface is a virtual function */ #define IFLIB_IS_VF 0x04 /* * Interface has a separate command queue for TX */ #define IFLIB_HAS_TXCQ 0x08 /* * Interface does checksum in place */ #define IFLIB_NEED_SCRATCH 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ #define IFLIB_TSO_INIT_IP 0x20 /* * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 /* * Driver needs csum zeroed for offloading */ #define IFLIB_NEED_ZERO_CSUM 0x80 /* * Driver needs frames padded to some minimum length */ #define IFLIB_NEED_ETHER_PAD 0x100 /* * Packets can be freed immediately after encap */ #define IFLIB_TXD_ENCAP_PIO 0x00200 /* * Use RX completion handler */ #define IFLIB_RX_COMPLETION 0x00400 /* * Skip refilling cluster free lists */ #define IFLIB_SKIP_CLREFILL 0x00800 /* * Don't reset on hang */ #define IFLIB_NO_HANG_RESET 0x01000 /* * Don't need/want most of the niceties of * queue management */ #define IFLIB_PSEUDO 0x02000 /* * No DMA support needed / wanted */ #define IFLIB_VIRTUAL 0x04000 /* * autogenerate a MAC address */ #define IFLIB_GEN_MAC 0x08000 /* * Interface needs admin task to ignore interface up/down status */ #define IFLIB_ADMIN_ALWAYS_RUN 0x10000 +/* + * Driver will pass the media + */ +#define IFLIB_DRIVER_MEDIA 0x20000 /* * field accessors */ void *iflib_get_softc(if_ctx_t ctx); device_t iflib_get_dev(if_ctx_t ctx); if_t iflib_get_ifp(if_ctx_t ctx); struct ifmedia *iflib_get_media(if_ctx_t ctx); if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); void iflib_request_reset(if_ctx_t ctx); uint8_t iflib_in_detach(if_ctx_t ctx); uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx); /* * If the driver can plug cleanly in to newbus use these */ int iflib_device_probe(device_t); int iflib_device_attach(device_t); int iflib_device_detach(device_t); int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); /* * If the driver can't plug cleanly in to newbus * use these */ int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name); void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name); void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name); void iflib_config_gtask_deinit(struct grouptask *gtask); void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); struct sx *iflib_ctx_lock_get(if_ctx_t); struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); /* * Pseudo device support */ if_pseudo_t iflib_clone_register(if_shared_ctx_t); void iflib_clone_deregister(if_pseudo_t); #endif /* __IFLIB_H_ */ Index: projects/runtime-coverage-v2/sys/powerpc/booke/locore.S =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/booke/locore.S (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/booke/locore.S (revision 347076) @@ -1,940 +1,931 @@ /*- * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.inc" #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #define TMPSTACKSZ 16384 #ifdef __powerpc64__ #define GET_TOCBASE(r) \ mfspr r, SPR_SPRG8 #define TOC_RESTORE nop #define CMPI cmpdi #define CMPL cmpld #define LOAD ld #define LOADX ldarx #define STORE std #define STOREX stdcx. #define STU stdu #define CALLSIZE 48 #define REDZONE 288 #define THREAD_REG %r13 #define ADDR(x) \ .llong x #define WORD_SIZE 8 #else #define GET_TOCBASE(r) #define TOC_RESTORE #define CMPI cmpwi #define CMPL cmplw #define LOAD lwz #define LOADX lwarx #define STOREX stwcx. #define STORE stw #define STU stwu #define CALLSIZE 8 #define REDZONE 0 #define THREAD_REG %r2 #define ADDR(x) \ .long x #define WORD_SIZE 4 #endif .text .globl btext btext: /* * This symbol is here for the benefit of kvm_mkdb, and is supposed to * mark the start of kernel text. */ .globl kernel_text kernel_text: /* * Startup entry. Note, this must be the first thing in the text segment! */ .text .globl __start __start: /* * Assumptions on the boot loader: * - System memory starts from physical address 0 * - It's mapped by a single TLB1 entry * - TLB1 mapping is 1:1 pa to va * - Kernel is loaded at 64MB boundary * - All PID registers are set to the same value * - CPU is running in AS=0 * * Registers contents provided by the loader(8): * r1 : stack pointer * r3 : metadata pointer * * We rearrange the TLB1 layout as follows: * - Find TLB1 entry we started in * - Make sure it's protected, invalidate other entries * - Create temp entry in the second AS (make sure it's not TLB[1]) * - Switch to temp mapping * - Map 64MB of RAM in TLB1[1] * - Use AS=0, set EPN to VM_MIN_KERNEL_ADDRESS and RPN to kernel load address * - Switch to TLB1[1] mapping * - Invalidate temp mapping * * locore registers use: * r1 : stack pointer * r2 : trace pointer (AP only, for early diagnostics) * r3-r27 : scratch registers * r28 : temp TLB1 entry * r29 : initial TLB1 entry we started in * r30-r31 : arguments (metadata pointer) */ /* * Keep arguments in r30 & r31 for later use. */ mr %r30, %r3 mr %r31, %r4 /* * Initial cleanup */ li %r3, PSL_DE /* Keep debug exceptions for CodeWarrior. */ #ifdef __powerpc64__ oris %r3, %r3, PSL_CM@h #endif mtmsr %r3 isync /* * Initial HIDs configuration */ 1: mfpvr %r3 rlwinm %r3, %r3, 16, 16, 31 lis %r4, HID0_E500_DEFAULT_SET@h ori %r4, %r4, HID0_E500_DEFAULT_SET@l /* Check for e500mc and e5500 */ cmpli 0, 0, %r3, FSL_E500mc bne 2f lis %r4, HID0_E500MC_DEFAULT_SET@h ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l b 3f 2: cmpli 0, 0, %r3, FSL_E5500 bne 3f lis %r4, HID0_E5500_DEFAULT_SET@h ori %r4, %r4, HID0_E5500_DEFAULT_SET@l 3: mtspr SPR_HID0, %r4 isync /* * E500mc and E5500 do not have HID1 register, so skip HID1 setup on * this core. */ cmpli 0, 0, %r3, FSL_E500mc beq 1f cmpli 0, 0, %r3, FSL_E5500 beq 1f cmpli 0, 0, %r3, FSL_E6500 beq 1f lis %r3, HID1_E500_DEFAULT_SET@h ori %r3, %r3, HID1_E500_DEFAULT_SET@l mtspr SPR_HID1, %r3 isync 1: /* Invalidate all entries in TLB0 */ li %r3, 0 bl tlb_inval_all cmpwi %r30, 0 beq done_mapping /* * Locate the TLB1 entry that maps this code */ bl 1f 1: mflr %r3 bl tlb1_find_current /* the entry found is returned in r29 */ bl tlb1_inval_all_but_current /* * Create temporary mapping in AS=1 and switch to it */ bl tlb1_temp_mapping_as1 mfmsr %r3 ori %r3, %r3, (PSL_IS | PSL_DS) bl 2f 2: mflr %r4 addi %r4, %r4, (3f - 2b) mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* Switch context */ /* * Invalidate initial entry */ 3: mr %r3, %r29 bl tlb1_inval_entry /* * Setup final mapping in TLB1[1] and switch to it */ /* Final kernel mapping, map in 64 MB of RAM */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ li %r4, 0 /* Entry 0 */ rlwimi %r3, %r4, 16, 10, 15 mtspr SPR_MAS0, %r3 isync li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ isync LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS) ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ mtspr SPR_MAS2, %r3 isync /* Discover phys load address */ bl 3f 3: mflr %r4 /* Use current address */ rlwinm %r4, %r4, 0, 0, 5 /* 64MB alignment mask */ ori %r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr SPR_MAS3, %r4 /* Set RPN and protection */ isync - bl zero_mas7 + li %r4, 0 + mtspr SPR_MAS7, %r4 bl zero_mas8 isync tlbwe isync msync /* Switch to the above TLB1[1] mapping */ bl 4f 4: mflr %r4 #ifdef __powerpc64__ clrldi %r4, %r4, 38 clrrdi %r3, %r3, 12 #else rlwinm %r4, %r4, 0, 6, 31 /* Current offset from kernel load address */ rlwinm %r3, %r3, 0, 0, 19 #endif add %r4, %r4, %r3 /* Convert to kernel virtual address */ addi %r4, %r4, (5f - 4b) li %r3, PSL_DE /* Note AS=0 */ #ifdef __powerpc64__ oris %r3, %r3, PSL_CM@h #endif mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* * Invalidate temp mapping */ 5: mr %r3, %r28 bl tlb1_inval_entry done_mapping: #ifdef __powerpc64__ /* Set up the TOC pointer */ b 0f .align 3 0: nop bl 1f .llong __tocbase + 0x8000 - . 1: mflr %r2 ld %r1,0(%r2) add %r2,%r1,%r2 mtspr SPR_SPRG8, %r2 /* Get load offset */ ld %r31,-0x8000(%r2) /* First TOC entry is TOC base */ subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */ /* Set up the stack pointer */ addis %r1,%r2,TOC_REF(tmpstack)@ha ld %r1,TOC_REF(tmpstack)@l(%r1) addi %r1,%r1,TMPSTACKSZ-96 add %r1,%r1,%r31 bl 1f .llong _DYNAMIC-. 1: mflr %r3 ld %r4,0(%r3) add %r3,%r4,%r3 mr %r4,%r31 #else /* * Setup a temporary stack */ bl 1f .long tmpstack-. 1: mflr %r1 lwz %r2,0(%r1) add %r1,%r1,%r2 addi %r1, %r1, (TMPSTACKSZ - 16) /* * Relocate kernel */ bl 1f .long _DYNAMIC-. .long _GLOBAL_OFFSET_TABLE_-. 1: mflr %r5 lwz %r3,0(%r5) /* _DYNAMIC in %r3 */ add %r3,%r3,%r5 lwz %r4,4(%r5) /* GOT pointer */ add %r4,%r4,%r5 lwz %r4,4(%r4) /* got[0] is _DYNAMIC link addr */ subf %r4,%r4,%r3 /* subtract to calculate relocbase */ #endif bl CNAME(elf_reloc_self) TOC_RESTORE /* * Initialise exception vector offsets */ bl CNAME(ivor_setup) TOC_RESTORE /* * Set up arguments and jump to system initialization code */ mr %r3, %r30 mr %r4, %r31 /* Prepare core */ bl CNAME(booke_init) TOC_RESTORE /* Switch to thread0.td_kstack now */ mr %r1, %r3 li %r3, 0 STORE %r3, 0(%r1) /* Machine independet part, does not return */ bl CNAME(mi_startup) TOC_RESTORE /* NOT REACHED */ 5: b 5b #ifdef SMP /************************************************************************/ /* AP Boot page */ /************************************************************************/ .text .globl __boot_page .align 12 __boot_page: bl 1f .globl bp_trace bp_trace: .long 0 .globl bp_kernload bp_kernload: .long 0 /* * Initial configuration */ 1: mflr %r31 /* r31 hold the address of bp_trace */ /* Set HIDs */ mfpvr %r3 rlwinm %r3, %r3, 16, 16, 31 /* HID0 for E500 is default */ lis %r4, HID0_E500_DEFAULT_SET@h ori %r4, %r4, HID0_E500_DEFAULT_SET@l cmpli 0, 0, %r3, FSL_E500mc bne 2f lis %r4, HID0_E500MC_DEFAULT_SET@h ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l b 3f 2: cmpli 0, 0, %r3, FSL_E5500 bne 3f lis %r4, HID0_E5500_DEFAULT_SET@h ori %r4, %r4, HID0_E5500_DEFAULT_SET@l 3: mtspr SPR_HID0, %r4 isync /* Enable branch prediction */ li %r3, BUCSR_BPEN mtspr SPR_BUCSR, %r3 isync /* Invalidate all entries in TLB0 */ li %r3, 0 bl tlb_inval_all /* * Find TLB1 entry which is translating us now */ bl 2f 2: mflr %r3 bl tlb1_find_current /* the entry number found is in r29 */ bl tlb1_inval_all_but_current /* * Create temporary translation in AS=1 and switch to it */ bl tlb1_temp_mapping_as1 mfmsr %r3 ori %r3, %r3, (PSL_IS | PSL_DS) #ifdef __powerpc64__ oris %r3, %r3, PSL_CM@h #endif bl 3f 3: mflr %r4 addi %r4, %r4, (4f - 3b) mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* Switch context */ /* * Invalidate initial entry */ 4: mr %r3, %r29 bl tlb1_inval_entry /* * Setup final mapping in TLB1[1] and switch to it */ /* Final kernel mapping, map in 64 MB of RAM */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ li %r4, 0 /* Entry 0 */ rlwimi %r3, %r4, 16, 4, 15 mtspr SPR_MAS0, %r3 isync li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ isync LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS) ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ mtspr SPR_MAS2, %r3 isync /* Retrieve kernel load [physical] address from bp_kernload */ #ifdef __powerpc64__ b 0f .align 3 0: nop #endif bl 5f ADDR(bp_kernload) ADDR(__boot_page) 5: mflr %r3 #ifdef __powerpc64__ ld %r4, 0(%r3) ld %r5, 8(%r3) clrrdi %r3, %r3, 12 #else lwz %r4, 0(%r3) lwz %r5, 4(%r3) rlwinm %r3, %r3, 0, 0, 19 #endif sub %r4, %r4, %r5 /* offset of bp_kernload within __boot_page */ lwzx %r3, %r4, %r3 /* Set RPN and protection */ ori %r3, %r3, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr SPR_MAS3, %r3 isync - bl zero_mas7 + li %r4, 0 + mtspr SPR_MAS7, %r4 bl zero_mas8 isync tlbwe isync msync /* Switch to the final mapping */ bl 6f 6: mflr %r3 rlwinm %r3, %r3, 0, 0xfff /* Offset from boot page start */ add %r3, %r3, %r5 /* Make this virtual address */ addi %r3, %r3, (7f - 6b) #ifdef __powerpc64__ lis %r4, PSL_CM@h /* Note AS=0 */ #else li %r4, 0 /* Note AS=0 */ #endif mtspr SPR_SRR0, %r3 mtspr SPR_SRR1, %r4 rfi 7: /* * At this point we're running at virtual addresses VM_MIN_KERNEL_ADDRESS and * beyond so it's allowed to directly access all locations the kernel was linked * against. */ /* * Invalidate temp mapping */ mr %r3, %r28 bl tlb1_inval_entry #ifdef __powerpc64__ /* Set up the TOC pointer */ b 0f .align 3 0: nop bl 1f .llong __tocbase + 0x8000 - . 1: mflr %r2 ld %r1,0(%r2) add %r2,%r1,%r2 mtspr SPR_SPRG8, %r2 /* Set up the stack pointer */ addis %r1,%r2,TOC_REF(tmpstack)@ha ld %r1,TOC_REF(tmpstack)@l(%r1) addi %r1,%r1,TMPSTACKSZ-96 #else /* * Setup a temporary stack */ bl 1f .long tmpstack-. 1: mflr %r1 lwz %r2,0(%r1) add %r1,%r1,%r2 stw %r1, 0(%r1) addi %r1, %r1, (TMPSTACKSZ - 16) #endif /* * Initialise exception vector offsets */ bl CNAME(ivor_setup) TOC_RESTORE /* * Assign our pcpu instance */ bl 1f .long ap_pcpu-. 1: mflr %r4 lwz %r3, 0(%r4) add %r3, %r3, %r4 LOAD %r3, 0(%r3) mtsprg0 %r3 bl CNAME(pmap_bootstrap_ap) TOC_RESTORE bl CNAME(cpudep_ap_bootstrap) TOC_RESTORE /* Switch to the idle thread's kstack */ mr %r1, %r3 bl CNAME(machdep_ap_bootstrap) TOC_RESTORE /* NOT REACHED */ 6: b 6b #endif /* SMP */ #if defined (BOOKE_E500) /* * Invalidate all entries in the given TLB. * * r3 TLBSEL */ tlb_inval_all: rlwinm %r3, %r3, 3, (1 << 3) /* TLBSEL */ ori %r3, %r3, (1 << 2) /* INVALL */ tlbivax 0, %r3 isync msync tlbsync msync blr /* * expects address to look up in r3, returns entry number in r29 * * FIXME: the hidden assumption is we are now running in AS=0, but we should * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS] */ tlb1_find_current: mfspr %r17, SPR_PID0 slwi %r17, %r17, MAS6_SPID0_SHIFT mtspr SPR_MAS6, %r17 isync tlbsx 0, %r3 mfspr %r17, SPR_MAS0 rlwinm %r29, %r17, 16, 26, 31 /* MAS0[ESEL] -> r29 */ /* Make sure we have IPROT set on the entry */ mfspr %r17, SPR_MAS1 oris %r17, %r17, MAS1_IPROT@h mtspr SPR_MAS1, %r17 isync tlbwe isync msync blr /* * Invalidates a single entry in TLB1. * * r3 ESEL * r4-r5 scratched */ tlb1_inval_entry: lis %r4, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r4, %r3, 16, 10, 15 /* Select our entry */ mtspr SPR_MAS0, %r4 isync tlbre li %r5, 0 /* MAS1[V] = 0 */ mtspr SPR_MAS1, %r5 isync tlbwe isync msync blr /* * r29 current entry number * r28 returned temp entry * r3-r5 scratched */ tlb1_temp_mapping_as1: /* Read our current translation */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r3, %r29, 16, 10, 15 /* Select our current entry */ mtspr SPR_MAS0, %r3 isync tlbre /* * Prepare and write temp entry * * FIXME this is not robust against overflow i.e. when the current * entry is the last in TLB1 */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ addi %r28, %r29, 1 /* Use next entry. */ rlwimi %r3, %r28, 16, 10, 15 /* Select temp entry */ mtspr SPR_MAS0, %r3 isync mfspr %r5, SPR_MAS1 li %r4, 1 /* AS=1 */ rlwimi %r5, %r4, 12, 19, 19 li %r4, 0 /* Global mapping, TID=0 */ rlwimi %r5, %r4, 16, 8, 15 oris %r5, %r5, (MAS1_VALID | MAS1_IPROT)@h mtspr SPR_MAS1, %r5 isync mflr %r3 - bl zero_mas7 + li %r4, 0 + mtspr SPR_MAS7, %r4 bl zero_mas8 mtlr %r3 isync tlbwe isync msync blr /* * Loops over TLB1, invalidates all entries skipping the one which currently * maps this code. * * r29 current entry * r3-r5 scratched */ tlb1_inval_all_but_current: mfspr %r3, SPR_TLB1CFG /* Get number of entries */ andi. %r3, %r3, TLBCFG_NENTRY_MASK@l li %r4, 0 /* Start from Entry 0 */ 1: lis %r5, MAS0_TLBSEL1@h rlwimi %r5, %r4, 16, 10, 15 mtspr SPR_MAS0, %r5 isync tlbre mfspr %r5, SPR_MAS1 cmpw %r4, %r29 /* our current entry? */ beq 2f rlwinm %r5, %r5, 0, 2, 31 /* clear VALID and IPROT bits */ mtspr SPR_MAS1, %r5 isync tlbwe isync msync 2: addi %r4, %r4, 1 cmpw %r4, %r3 /* Check if this is the last entry */ bne 1b blr /* - * MAS7 and MAS8 conditional zeroing. + * MAS8 conditional zeroing. */ -.globl zero_mas7 -zero_mas7: - mfpvr %r20 - rlwinm %r20, %r20, 16, 16, 31 - cmpli 0, 0, %r20, FSL_E500v1 - beq 1f - - li %r20, 0 - mtspr SPR_MAS7, %r20 -1: - blr - .globl zero_mas8 zero_mas8: mfpvr %r20 rlwinm %r20, %r20, 16, 16, 31 cmpli 0, 0, %r20, FSL_E500mc beq 1f cmpli 0, 0, %r20, FSL_E5500 beq 1f blr 1: li %r20, 0 mtspr SPR_MAS8, %r20 blr #endif #ifdef SMP .globl __boot_tlb1 /* * The __boot_tlb1 table is used to hold BSP TLB1 entries * marked with _TLB_ENTRY_SHARED flag during AP bootstrap. * The BSP fills in the table in tlb_ap_prep() function. Next, * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap(). */ __boot_tlb1: .space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE __boot_page_padding: /* * Boot page needs to be exactly 4K, with the last word of this page * acting as the reset vector, so we need to stuff the remainder. * Upon release from holdoff CPU fetches the last word of the boot * page. */ .space 4092 - (__boot_page_padding - __boot_page) b __boot_page #endif /* SMP */ /************************************************************************/ /* locore subroutines */ /************************************************************************/ /* * Cache disable/enable/inval sequences according * to section 2.16 of E500CORE RM. */ ENTRY(dcache_inval) /* Invalidate d-cache */ mfspr %r3, SPR_L1CSR0 ori %r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l msync isync mtspr SPR_L1CSR0, %r3 isync 1: mfspr %r3, SPR_L1CSR0 andi. %r3, %r3, L1CSR0_DCFI bne 1b blr ENTRY(dcache_disable) /* Disable d-cache */ mfspr %r3, SPR_L1CSR0 li %r4, L1CSR0_DCE@l not %r4, %r4 and %r3, %r3, %r4 msync isync mtspr SPR_L1CSR0, %r3 isync blr ENTRY(dcache_enable) /* Enable d-cache */ mfspr %r3, SPR_L1CSR0 oris %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h ori %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l msync isync mtspr SPR_L1CSR0, %r3 isync blr ENTRY(icache_inval) /* Invalidate i-cache */ mfspr %r3, SPR_L1CSR1 ori %r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l isync mtspr SPR_L1CSR1, %r3 isync 1: mfspr %r3, SPR_L1CSR1 andi. %r3, %r3, L1CSR1_ICFI bne 1b blr ENTRY(icache_disable) /* Disable i-cache */ mfspr %r3, SPR_L1CSR1 li %r4, L1CSR1_ICE@l not %r4, %r4 and %r3, %r3, %r4 isync mtspr SPR_L1CSR1, %r3 isync blr ENTRY(icache_enable) /* Enable i-cache */ mfspr %r3, SPR_L1CSR1 oris %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h ori %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l isync mtspr SPR_L1CSR1, %r3 isync blr /* * L2 cache disable/enable/inval sequences for E500mc. */ ENTRY(l2cache_inval) mfspr %r3, SPR_L2CSR0 oris %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h ori %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l isync mtspr SPR_L2CSR0, %r3 isync 1: mfspr %r3, SPR_L2CSR0 andis. %r3, %r3, L2CSR0_L2FI@h bne 1b blr ENTRY(l2cache_enable) mfspr %r3, SPR_L2CSR0 oris %r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h isync mtspr SPR_L2CSR0, %r3 isync blr /* * Branch predictor setup. */ ENTRY(bpred_enable) mfspr %r3, SPR_BUCSR ori %r3, %r3, BUCSR_BBFI isync mtspr SPR_BUCSR, %r3 isync ori %r3, %r3, BUCSR_BPEN isync mtspr SPR_BUCSR, %r3 isync blr /* * XXX: This should be moved to a shared AIM/booke asm file, if one ever is * created. */ ENTRY(get_spr) mfspr %r3, 0 blr /************************************************************************/ /* Data section */ /************************************************************************/ .data .align 3 GLOBAL(__startkernel) ADDR(begin) GLOBAL(__endkernel) ADDR(end) .align 4 tmpstack: .space TMPSTACKSZ tmpstackbound: .space 10240 /* XXX: this really should not be necessary */ #ifdef __powerpc64__ TOC_ENTRY(tmpstack) TOC_ENTRY(bp_kernload) #endif /* * Compiled KERNBASE locations */ .globl kernbase .set kernbase, KERNBASE #include Index: projects/runtime-coverage-v2/sys/powerpc/booke/trap_subr.S =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/booke/trap_subr.S (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/booke/trap_subr.S (revision 347076) @@ -1,1129 +1,1130 @@ /*- * Copyright (C) 2006-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * Copyright (C) 2006 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ */ /* * NOTICE: This is not a standalone file. to use it, #include it in * your port's locore.S, like so: * * #include */ /* * SPRG usage notes * * SPRG0 - pcpu pointer * SPRG1 - all interrupts except TLB miss, critical, machine check * SPRG2 - critical * SPRG3 - machine check * SPRG4-6 - scratch * */ /* Get the per-CPU data structure */ #define GET_CPUINFO(r) mfsprg0 r #define RES_GRANULE 64 #define RES_LOCK 0 /* offset to the 'lock' word */ #ifdef __powerpc64__ #define RES_RECURSE 8 /* offset to the 'recurse' word */ #else #define RES_RECURSE 4 /* offset to the 'recurse' word */ #endif /* * Standard interrupt prolog * * sprg_sp - SPRG{1-3} reg used to temporarily store the SP * savearea - temp save area (pc_{tempsave, disisave, critsave, mchksave}) * isrr0-1 - save restore registers with CPU state at interrupt time (may be * SRR0-1, CSRR0-1, MCSRR0-1 * * 1. saves in the given savearea: * - R30-31 * - DEAR, ESR * - xSRR0-1 * * 2. saves CR -> R30 * * 3. switches to kstack if needed * * 4. notes: * - R31 can be used as scratch register until a new frame is layed on * the stack with FRAME_SETUP * * - potential TLB miss: NO. Saveareas are always acessible via TLB1 * permanent entries, and within this prolog we do not dereference any * locations potentially not in the TLB */ #define STANDARD_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ mtspr sprg_sp, %r1; /* Save SP */ \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ STORE %r30, (savearea+CPUSAVE_R30)(%r1); \ STORE %r31, (savearea+CPUSAVE_R31)(%r1); \ mfdear %r30; \ mfesr %r31; \ STORE %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ STORE %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ mfspr %r30, isrr0; \ mfspr %r31, isrr1; /* MSR at interrupt time */ \ STORE %r30, (savearea+CPUSAVE_SRR0)(%r1); \ STORE %r31, (savearea+CPUSAVE_SRR1)(%r1); \ isync; \ mfspr %r1, sprg_sp; /* Restore SP */ \ mfcr %r30; /* Save CR */ \ /* switch to per-thread kstack if intr taken in user mode */ \ mtcr %r31; /* MSR at interrupt time */ \ bf 17, 1f; \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ LOAD %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ 1: #define STANDARD_CRIT_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ mtspr sprg_sp, %r1; /* Save SP */ \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ STORE %r30, (savearea+CPUSAVE_R30)(%r1); \ STORE %r31, (savearea+CPUSAVE_R31)(%r1); \ mfdear %r30; \ mfesr %r31; \ STORE %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ STORE %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ mfspr %r30, isrr0; \ mfspr %r31, isrr1; /* MSR at interrupt time */ \ STORE %r30, (savearea+CPUSAVE_SRR0)(%r1); \ STORE %r31, (savearea+CPUSAVE_SRR1)(%r1); \ mfspr %r30, SPR_SRR0; \ mfspr %r31, SPR_SRR1; /* MSR at interrupt time */ \ STORE %r30, (savearea+BOOKE_CRITSAVE_SRR0)(%r1); \ STORE %r31, (savearea+BOOKE_CRITSAVE_SRR1)(%r1); \ isync; \ mfspr %r1, sprg_sp; /* Restore SP */ \ mfcr %r30; /* Save CR */ \ /* switch to per-thread kstack if intr taken in user mode */ \ mtcr %r31; /* MSR at interrupt time */ \ bf 17, 1f; \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ LOAD %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ 1: /* * FRAME_SETUP assumes: * SPRG{1-3} SP at the time interrupt occured * savearea r30-r31, DEAR, ESR, xSRR0-1 * r30 CR * r31 scratch * r1 kernel stack * * sprg_sp - SPRG reg containing SP at the time interrupt occured * savearea - temp save * exc - exception number (EXC_xxx) * * 1. sets a new frame * 2. saves in the frame: * - R0, R1 (SP at the time of interrupt), R2, LR, CR * - R3-31 (R30-31 first restored from savearea) * - XER, CTR, DEAR, ESR (from savearea), xSRR0-1 * * Notes: * - potential TLB miss: YES, since we make dereferences to kstack, which * can happen not covered (we can have up to two DTLB misses if fortunate * enough i.e. when kstack crosses page boundary and both pages are * untranslated) */ #ifdef __powerpc64__ #define SAVE_REGS(r) \ std %r3, FRAME_3+CALLSIZE(r); \ std %r4, FRAME_4+CALLSIZE(r); \ std %r5, FRAME_5+CALLSIZE(r); \ std %r6, FRAME_6+CALLSIZE(r); \ std %r7, FRAME_7+CALLSIZE(r); \ std %r8, FRAME_8+CALLSIZE(r); \ std %r9, FRAME_9+CALLSIZE(r); \ std %r10, FRAME_10+CALLSIZE(r); \ std %r11, FRAME_11+CALLSIZE(r); \ std %r12, FRAME_12+CALLSIZE(r); \ std %r13, FRAME_13+CALLSIZE(r); \ std %r14, FRAME_14+CALLSIZE(r); \ std %r15, FRAME_15+CALLSIZE(r); \ std %r16, FRAME_16+CALLSIZE(r); \ std %r17, FRAME_17+CALLSIZE(r); \ std %r18, FRAME_18+CALLSIZE(r); \ std %r19, FRAME_19+CALLSIZE(r); \ std %r20, FRAME_20+CALLSIZE(r); \ std %r21, FRAME_21+CALLSIZE(r); \ std %r22, FRAME_22+CALLSIZE(r); \ std %r23, FRAME_23+CALLSIZE(r); \ std %r24, FRAME_24+CALLSIZE(r); \ std %r25, FRAME_25+CALLSIZE(r); \ std %r26, FRAME_26+CALLSIZE(r); \ std %r27, FRAME_27+CALLSIZE(r); \ std %r28, FRAME_28+CALLSIZE(r); \ std %r29, FRAME_29+CALLSIZE(r); \ std %r30, FRAME_30+CALLSIZE(r); \ std %r31, FRAME_31+CALLSIZE(r) #define LD_REGS(r) \ ld %r3, FRAME_3+CALLSIZE(r); \ ld %r4, FRAME_4+CALLSIZE(r); \ ld %r5, FRAME_5+CALLSIZE(r); \ ld %r6, FRAME_6+CALLSIZE(r); \ ld %r7, FRAME_7+CALLSIZE(r); \ ld %r8, FRAME_8+CALLSIZE(r); \ ld %r9, FRAME_9+CALLSIZE(r); \ ld %r10, FRAME_10+CALLSIZE(r); \ ld %r11, FRAME_11+CALLSIZE(r); \ ld %r12, FRAME_12+CALLSIZE(r); \ ld %r13, FRAME_13+CALLSIZE(r); \ ld %r14, FRAME_14+CALLSIZE(r); \ ld %r15, FRAME_15+CALLSIZE(r); \ ld %r16, FRAME_16+CALLSIZE(r); \ ld %r17, FRAME_17+CALLSIZE(r); \ ld %r18, FRAME_18+CALLSIZE(r); \ ld %r19, FRAME_19+CALLSIZE(r); \ ld %r20, FRAME_20+CALLSIZE(r); \ ld %r21, FRAME_21+CALLSIZE(r); \ ld %r22, FRAME_22+CALLSIZE(r); \ ld %r23, FRAME_23+CALLSIZE(r); \ ld %r24, FRAME_24+CALLSIZE(r); \ ld %r25, FRAME_25+CALLSIZE(r); \ ld %r26, FRAME_26+CALLSIZE(r); \ ld %r27, FRAME_27+CALLSIZE(r); \ ld %r28, FRAME_28+CALLSIZE(r); \ ld %r29, FRAME_29+CALLSIZE(r); \ ld %r30, FRAME_30+CALLSIZE(r); \ ld %r31, FRAME_31+CALLSIZE(r) #else #define SAVE_REGS(r) \ stmw %r3, FRAME_3+CALLSIZE(r) #define LD_REGS(r) \ lmw %r3, FRAME_3+CALLSIZE(r) #endif #define FRAME_SETUP(sprg_sp, savearea, exc) \ mfspr %r31, sprg_sp; /* get saved SP */ \ /* establish a new stack frame and put everything on it */ \ STU %r31, -(FRAMELEN+REDZONE)(%r1); \ STORE %r0, FRAME_0+CALLSIZE(%r1); /* save r0 in the trapframe */ \ STORE %r31, FRAME_1+CALLSIZE(%r1); /* save SP " " */ \ STORE %r2, FRAME_2+CALLSIZE(%r1); /* save r2 " " */ \ mflr %r31; \ STORE %r31, FRAME_LR+CALLSIZE(%r1); /* save LR " " */ \ STORE %r30, FRAME_CR+CALLSIZE(%r1); /* save CR " " */ \ GET_CPUINFO(%r2); \ LOAD %r30, (savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ LOAD %r31, (savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ /* save R3-31 */ \ SAVE_REGS(%r1); \ /* save DEAR, ESR */ \ LOAD %r28, (savearea+CPUSAVE_BOOKE_DEAR)(%r2); \ LOAD %r29, (savearea+CPUSAVE_BOOKE_ESR)(%r2); \ STORE %r28, FRAME_BOOKE_DEAR+CALLSIZE(%r1); \ STORE %r29, FRAME_BOOKE_ESR+CALLSIZE(%r1); \ /* save XER, CTR, exc number */ \ mfxer %r3; \ mfctr %r4; \ STORE %r3, FRAME_XER+CALLSIZE(%r1); \ STORE %r4, FRAME_CTR+CALLSIZE(%r1); \ li %r5, exc; \ STORE %r5, FRAME_EXC+CALLSIZE(%r1); \ /* save DBCR0 */ \ mfspr %r3, SPR_DBCR0; \ STORE %r3, FRAME_BOOKE_DBCR0+CALLSIZE(%r1); \ /* save xSSR0-1 */ \ LOAD %r30, (savearea+CPUSAVE_SRR0)(%r2); \ LOAD %r31, (savearea+CPUSAVE_SRR1)(%r2); \ STORE %r30, FRAME_SRR0+CALLSIZE(%r1); \ STORE %r31, FRAME_SRR1+CALLSIZE(%r1); \ LOAD THREAD_REG, PC_CURTHREAD(%r2); \ /* * * isrr0-1 - save restore registers to restore CPU state to (may be * SRR0-1, CSRR0-1, MCSRR0-1 * * Notes: * - potential TLB miss: YES. The deref'd kstack may be not covered */ #define FRAME_LEAVE(isrr0, isrr1) \ wrteei 0; \ /* restore CTR, XER, LR, CR */ \ LOAD %r4, FRAME_CTR+CALLSIZE(%r1); \ LOAD %r5, FRAME_XER+CALLSIZE(%r1); \ LOAD %r6, FRAME_LR+CALLSIZE(%r1); \ LOAD %r7, FRAME_CR+CALLSIZE(%r1); \ mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ mtcr %r7; \ /* restore DBCR0 */ \ LOAD %r4, FRAME_BOOKE_DBCR0+CALLSIZE(%r1); \ mtspr SPR_DBCR0, %r4; \ /* restore xSRR0-1 */ \ LOAD %r30, FRAME_SRR0+CALLSIZE(%r1); \ LOAD %r31, FRAME_SRR1+CALLSIZE(%r1); \ mtspr isrr0, %r30; \ mtspr isrr1, %r31; \ /* restore R2-31, SP */ \ LD_REGS(%r1); \ LOAD %r2, FRAME_2+CALLSIZE(%r1); \ LOAD %r0, FRAME_0+CALLSIZE(%r1); \ LOAD %r1, FRAME_1+CALLSIZE(%r1); \ isync /* * TLB miss prolog * * saves LR, CR, SRR0-1, R20-31 in the TLBSAVE area * * Notes: * - potential TLB miss: NO. It is crucial that we do not generate a TLB * miss within the TLB prolog itself! * - TLBSAVE is always translated */ #ifdef __powerpc64__ #define TLB_SAVE_REGS(br) \ std %r20, (TLBSAVE_BOOKE_R20)(br); \ std %r21, (TLBSAVE_BOOKE_R21)(br); \ std %r22, (TLBSAVE_BOOKE_R22)(br); \ std %r23, (TLBSAVE_BOOKE_R23)(br); \ std %r24, (TLBSAVE_BOOKE_R24)(br); \ std %r25, (TLBSAVE_BOOKE_R25)(br); \ std %r26, (TLBSAVE_BOOKE_R26)(br); \ std %r27, (TLBSAVE_BOOKE_R27)(br); \ std %r28, (TLBSAVE_BOOKE_R28)(br); \ std %r29, (TLBSAVE_BOOKE_R29)(br); \ std %r30, (TLBSAVE_BOOKE_R30)(br); \ std %r31, (TLBSAVE_BOOKE_R31)(br); #define TLB_RESTORE_REGS(br) \ ld %r20, (TLBSAVE_BOOKE_R20)(br); \ ld %r21, (TLBSAVE_BOOKE_R21)(br); \ ld %r22, (TLBSAVE_BOOKE_R22)(br); \ ld %r23, (TLBSAVE_BOOKE_R23)(br); \ ld %r24, (TLBSAVE_BOOKE_R24)(br); \ ld %r25, (TLBSAVE_BOOKE_R25)(br); \ ld %r26, (TLBSAVE_BOOKE_R26)(br); \ ld %r27, (TLBSAVE_BOOKE_R27)(br); \ ld %r28, (TLBSAVE_BOOKE_R28)(br); \ ld %r29, (TLBSAVE_BOOKE_R29)(br); \ ld %r30, (TLBSAVE_BOOKE_R30)(br); \ ld %r31, (TLBSAVE_BOOKE_R31)(br); #define TLB_NEST(outr,inr) \ rlwinm outr, inr, 7, 22, 24; /* 8 x TLBSAVE_LEN */ #else #define TLB_SAVE_REGS(br) \ stmw %r20, TLBSAVE_BOOKE_R20(br) #define TLB_RESTORE_REGS(br) \ lmw %r20, TLBSAVE_BOOKE_R20(br) #define TLB_NEST(outr,inr) \ rlwinm outr, inr, 6, 23, 25; /* 4 x TLBSAVE_LEN */ #endif #define TLB_PROLOG \ mtsprg4 %r1; /* Save SP */ \ mtsprg5 %r28; \ mtsprg6 %r29; \ /* calculate TLB nesting level and TLBSAVE instance address */ \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ LOAD %r28, PC_BOOKE_TLB_LEVEL(%r1); \ TLB_NEST(%r29,%r28); \ addi %r28, %r28, 1; \ STORE %r28, PC_BOOKE_TLB_LEVEL(%r1); \ addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ add %r1, %r1, %r29; /* current TLBSAVE ptr */ \ \ /* save R20-31 */ \ mfsprg5 %r28; \ mfsprg6 %r29; \ TLB_SAVE_REGS(%r1); \ /* save LR, CR */ \ mflr %r30; \ mfcr %r31; \ STORE %r30, (TLBSAVE_BOOKE_LR)(%r1); \ STORE %r31, (TLBSAVE_BOOKE_CR)(%r1); \ /* save SRR0-1 */ \ mfsrr0 %r30; /* execution addr at interrupt time */ \ mfsrr1 %r31; /* MSR at interrupt time*/ \ STORE %r30, (TLBSAVE_BOOKE_SRR0)(%r1); /* save SRR0 */ \ STORE %r31, (TLBSAVE_BOOKE_SRR1)(%r1); /* save SRR1 */ \ isync; \ mfsprg4 %r1 /* * restores LR, CR, SRR0-1, R20-31 from the TLBSAVE area * * same notes as for the TLB_PROLOG */ #define TLB_RESTORE \ mtsprg4 %r1; /* Save SP */ \ GET_CPUINFO(%r1); /* Per-cpu structure */ \ /* calculate TLB nesting level and TLBSAVE instance addr */ \ LOAD %r28, PC_BOOKE_TLB_LEVEL(%r1); \ subi %r28, %r28, 1; \ STORE %r28, PC_BOOKE_TLB_LEVEL(%r1); \ TLB_NEST(%r29,%r28); \ addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ add %r1, %r1, %r29; \ \ /* restore LR, CR */ \ LOAD %r30, (TLBSAVE_BOOKE_LR)(%r1); \ LOAD %r31, (TLBSAVE_BOOKE_CR)(%r1); \ mtlr %r30; \ mtcr %r31; \ /* restore SRR0-1 */ \ LOAD %r30, (TLBSAVE_BOOKE_SRR0)(%r1); \ LOAD %r31, (TLBSAVE_BOOKE_SRR1)(%r1); \ mtsrr0 %r30; \ mtsrr1 %r31; \ /* restore R20-31 */ \ TLB_RESTORE_REGS(%r1); \ mfsprg4 %r1 #ifdef SMP #define TLB_LOCK \ GET_CPUINFO(%r20); \ LOAD %r21, PC_CURTHREAD(%r20); \ LOAD %r22, PC_BOOKE_TLB_LOCK(%r20); \ \ 1: LOADX %r23, 0, %r22; \ CMPI %r23, TLB_UNLOCKED; \ beq 2f; \ \ /* check if this is recursion */ \ CMPL cr0, %r21, %r23; \ bne- 1b; \ \ 2: /* try to acquire lock */ \ STOREX %r21, 0, %r22; \ bne- 1b; \ \ /* got it, update recursion counter */ \ lwz %r21, RES_RECURSE(%r22); \ addi %r21, %r21, 1; \ stw %r21, RES_RECURSE(%r22); \ isync; \ msync #define TLB_UNLOCK \ GET_CPUINFO(%r20); \ LOAD %r21, PC_CURTHREAD(%r20); \ LOAD %r22, PC_BOOKE_TLB_LOCK(%r20); \ \ /* update recursion counter */ \ lwz %r23, RES_RECURSE(%r22); \ subi %r23, %r23, 1; \ stw %r23, RES_RECURSE(%r22); \ \ cmplwi %r23, 0; \ bne 1f; \ isync; \ msync; \ \ /* release the lock */ \ li %r23, TLB_UNLOCKED; \ STORE %r23, 0(%r22); \ 1: isync; \ msync #else #define TLB_LOCK #define TLB_UNLOCK #endif /* SMP */ #define INTERRUPT(label) \ .globl label; \ .align 5; \ CNAME(label): /* * Interrupt handling routines in BookE can be flexibly placed and do not have * to live in pre-defined vectors location. Note they need to be TLB-mapped at * all times in order to be able to handle exceptions. We thus arrange for * them to be part of kernel text which is always TLB-accessible. * * The interrupt handling routines have to be 16 bytes aligned: we align them * to 32 bytes (cache line length) which supposedly performs better. * */ .text .globl CNAME(interrupt_vector_base) .align 5 interrupt_vector_base: /***************************************************************************** * Catch-all handler to handle uninstalled IVORs ****************************************************************************/ INTERRUPT(int_unknown) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_RSVD) b trap_common /***************************************************************************** * Critical input interrupt ****************************************************************************/ INTERRUPT(int_critical_input) STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_CRIT) GET_TOCBASE(%r2) addi %r3, %r1, CALLSIZE bl CNAME(powerpc_interrupt) TOC_RESTORE FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) rfci /***************************************************************************** * Machine check interrupt ****************************************************************************/ INTERRUPT(int_machine_check) STANDARD_PROLOG(SPR_SPRG3, PC_BOOKE_MCHKSAVE, SPR_MCSRR0, SPR_MCSRR1) FRAME_SETUP(SPR_SPRG3, PC_BOOKE_MCHKSAVE, EXC_MCHK) GET_TOCBASE(%r2) addi %r3, %r1, CALLSIZE bl CNAME(powerpc_interrupt) TOC_RESTORE FRAME_LEAVE(SPR_MCSRR0, SPR_MCSRR1) rfmci /***************************************************************************** * Data storage interrupt ****************************************************************************/ INTERRUPT(int_data_storage) STANDARD_PROLOG(SPR_SPRG1, PC_DISISAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_DISISAVE, EXC_DSI) b trap_common /***************************************************************************** * Instruction storage interrupt ****************************************************************************/ INTERRUPT(int_instr_storage) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ISI) b trap_common /***************************************************************************** * External input interrupt ****************************************************************************/ INTERRUPT(int_external_input) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_EXI) b trap_common INTERRUPT(int_alignment) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ALI) b trap_common INTERRUPT(int_program) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_PGM) b trap_common INTERRUPT(int_fpu) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_FPU) b trap_common /***************************************************************************** * System call ****************************************************************************/ INTERRUPT(int_syscall) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SC) b trap_common /***************************************************************************** * Decrementer interrupt ****************************************************************************/ INTERRUPT(int_decrementer) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_DECR) b trap_common /***************************************************************************** * Fixed interval timer ****************************************************************************/ INTERRUPT(int_fixed_interval_timer) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_FIT) b trap_common /***************************************************************************** * Watchdog interrupt ****************************************************************************/ INTERRUPT(int_watchdog) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_WDOG) b trap_common /***************************************************************************** * Altivec Unavailable interrupt ****************************************************************************/ INTERRUPT(int_vec) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_VEC) b trap_common /***************************************************************************** * Altivec Assist interrupt ****************************************************************************/ INTERRUPT(int_vecast) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_VECAST_E) b trap_common #ifdef __SPE__ /***************************************************************************** * Floating point Assist interrupt ****************************************************************************/ INTERRUPT(int_spe_fpdata) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPD) addi %r3, %r1, CALLSIZE bl spe_handle_fpdata FRAME_LEAVE(SPR_SRR0, SPR_SRR1) rfi INTERRUPT(int_spe_fpround) STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPR) addi %r3, %r1, CALLSIZE bl spe_handle_fpround FRAME_LEAVE(SPR_SRR0, SPR_SRR1) rfi #endif #ifdef HWPMC_HOOKS /***************************************************************************** * PMC Interrupt ****************************************************************************/ INTERRUPT(int_performance_counter) STANDARD_PROLOG(SPR_SPRG3, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) FRAME_SETUP(SPR_SPRG3, PC_TEMPSAVE, EXC_PERF) b trap_common #endif /***************************************************************************** * Data TLB miss interrupt * * There can be nested TLB misses - while handling a TLB miss we reference * data structures that may be not covered by translations. We support up to * TLB_NESTED_MAX-1 nested misses. * * Registers use: * r31 - dear * r30 - unused * r29 - saved mas0 * r28 - saved mas1 * r27 - saved mas2 * r26 - pmap address * r25 - pte address * * r20:r23 - scratch registers ****************************************************************************/ INTERRUPT(int_data_tlb_error) TLB_PROLOG TLB_LOCK mfdear %r31 /* * Save MAS0-MAS2 registers. There might be another tlb miss during * pte lookup overwriting current contents (which was hw filled). */ mfspr %r29, SPR_MAS0 mfspr %r28, SPR_MAS1 mfspr %r27, SPR_MAS2 /* Check faulting address. */ LOAD_ADDR(%r21, VM_MAXUSER_ADDRESS) CMPL cr0, %r31, %r21 blt search_user_pmap /* If it's kernel address, allow only supervisor mode misses. */ mfsrr1 %r21 mtcr %r21 bt 17, search_failed /* check MSR[PR] */ search_kernel_pmap: /* Load r26 with kernel_pmap address */ bl 1f #ifdef __powerpc64__ .llong kernel_pmap_store-. #else .long kernel_pmap_store-. #endif 1: mflr %r21 LOAD %r26, 0(%r21) add %r26, %r21, %r26 /* kernel_pmap_store in r26 */ /* Force kernel tid, set TID to 0 in MAS1. */ li %r21, 0 rlwimi %r28, %r21, 0, 8, 15 /* clear TID bits */ tlb_miss_handle: /* This may result in nested tlb miss. */ bl pte_lookup /* returns PTE address in R25 */ CMPI %r25, 0 /* pte found? */ beq search_failed /* Finish up, write TLB entry. */ bl tlb_fill_entry tlb_miss_return: TLB_UNLOCK TLB_RESTORE rfi search_user_pmap: /* Load r26 with current user space process pmap */ GET_CPUINFO(%r26) LOAD %r26, PC_CURPMAP(%r26) b tlb_miss_handle search_failed: /* * Whenever we don't find a TLB mapping in PT, set a TLB0 entry with * the faulting virtual address anyway, but put a fake RPN and no * access rights. This should cause a following {D,I}SI exception. */ lis %r23, 0xffff0000@h /* revoke all permissions */ /* Load MAS registers. */ mtspr SPR_MAS0, %r29 mtspr SPR_MAS1, %r28 mtspr SPR_MAS2, %r27 mtspr SPR_MAS3, %r23 - bl zero_mas7 + li %r23, 0 + mtspr SPR_MAS7, %r23 bl zero_mas8 isync tlbwe msync isync b tlb_miss_return /***************************************************************************** * * Return pte address that corresponds to given pmap/va. If there is no valid * entry return 0. * * input: r26 - pmap * input: r31 - dear * output: r25 - pte address * * scratch regs used: r21 * ****************************************************************************/ pte_lookup: CMPI %r26, 0 beq 1f /* fail quickly if pmap is invalid */ #ifdef __powerpc64__ rldicl %r21, %r31, (64 - PP2D_L_L), (64 - PP2D_L_NUM) /* pp2d offset */ rldicl %r25, %r31, (64 - PP2D_H_L), (64 - PP2D_H_NUM) rldimi %r21, %r25, PP2D_L_NUM, (64 - (PP2D_L_NUM + PP2D_H_NUM)) slwi %r21, %r21, PP2D_ENTRY_SHIFT /* multiply by pp2d entry size */ addi %r25, %r26, PM_PP2D /* pmap pm_pp2d[] address */ add %r25, %r25, %r21 /* offset within pm_pp2d[] table */ ld %r25, 0(%r25) /* get pdir address, i.e. pmap->pm_pp2d[pp2d_idx] * */ cmpdi %r25, 0 beq 1f #if PAGE_SIZE < 65536 rldicl %r21, %r31, (64 - PDIR_L), (64 - PDIR_NUM) /* pdir offset */ slwi %r21, %r21, PDIR_ENTRY_SHIFT /* multiply by pdir entry size */ add %r25, %r25, %r21 /* offset within pdir table */ ld %r25, 0(%r25) /* get ptbl address, i.e. pmap->pm_pp2d[pp2d_idx][pdir_idx] */ cmpdi %r25, 0 beq 1f #endif rldicl %r21, %r31, (64 - PTBL_L), (64 - PTBL_NUM) /* ptbl offset */ slwi %r21, %r21, PTBL_ENTRY_SHIFT /* multiply by pte entry size */ #else srwi %r21, %r31, PDIR_SHIFT /* pdir offset */ slwi %r21, %r21, PDIR_ENTRY_SHIFT /* multiply by pdir entry size */ addi %r25, %r26, PM_PDIR /* pmap pm_dir[] address */ add %r25, %r25, %r21 /* offset within pm_pdir[] table */ /* * Get ptbl address, i.e. pmap->pm_pdir[pdir_idx] * This load may cause a Data TLB miss for non-kernel pmap! */ LOAD %r25, 0(%r25) CMPI %r25, 0 beq 2f lis %r21, PTBL_MASK@h ori %r21, %r21, PTBL_MASK@l and %r21, %r21, %r31 /* ptbl offset, multiply by ptbl entry size */ srwi %r21, %r21, (PTBL_SHIFT - PTBL_ENTRY_SHIFT) #endif add %r25, %r25, %r21 /* address of pte entry */ /* * Get pte->flags * This load may cause a Data TLB miss for non-kernel pmap! */ lwz %r21, PTE_FLAGS(%r25) andi. %r21, %r21, PTE_VALID@l bne 2f 1: li %r25, 0 2: blr /***************************************************************************** * * Load MAS1-MAS3 registers with data, write TLB entry * * input: * r29 - mas0 * r28 - mas1 * r27 - mas2 * r25 - pte * * output: none * * scratch regs: r21-r23 * ****************************************************************************/ tlb_fill_entry: /* * Update PTE flags: we have to do it atomically, as pmap_protect() * running on other CPUs could attempt to update the flags at the same * time. */ li %r23, PTE_FLAGS 1: lwarx %r21, %r23, %r25 /* get pte->flags */ oris %r21, %r21, PTE_REFERENCED@h /* set referenced bit */ andi. %r22, %r21, (PTE_SW | PTE_UW)@l /* check if writable */ beq 2f ori %r21, %r21, PTE_MODIFIED@l /* set modified bit */ 2: stwcx. %r21, %r23, %r25 /* write it back */ bne- 1b /* Update MAS2. */ rlwimi %r27, %r21, 13, 27, 30 /* insert WIMG bits from pte */ /* Setup MAS3 value in r23. */ LOAD %r23, PTE_RPN(%r25) /* get pte->rpn */ #ifdef __powerpc64__ rldicr %r22, %r23, 52, 51 /* extract MAS3 portion of RPN */ rldicl %r23, %r23, 20, 54 /* extract MAS7 portion of RPN */ rlwimi %r22, %r21, 30, 26, 31 /* insert protection bits from pte */ #else rlwinm %r22, %r23, 20, 0, 11 /* extract MAS3 portion of RPN */ rlwimi %r22, %r21, 30, 26, 31 /* insert protection bits from pte */ rlwimi %r22, %r21, 20, 12, 19 /* insert lower 8 RPN bits to MAS3 */ rlwinm %r23, %r23, 20, 24, 31 /* MAS7 portion of RPN */ #endif /* Load MAS registers. */ mtspr SPR_MAS0, %r29 mtspr SPR_MAS1, %r28 mtspr SPR_MAS2, %r27 mtspr SPR_MAS3, %r22 mtspr SPR_MAS7, %r23 mflr %r21 bl zero_mas8 mtlr %r21 isync tlbwe isync msync blr /***************************************************************************** * Instruction TLB miss interrupt * * Same notes as for the Data TLB miss ****************************************************************************/ INTERRUPT(int_inst_tlb_error) TLB_PROLOG TLB_LOCK mfsrr0 %r31 /* faulting address */ /* * Save MAS0-MAS2 registers. There might be another tlb miss during pte * lookup overwriting current contents (which was hw filled). */ mfspr %r29, SPR_MAS0 mfspr %r28, SPR_MAS1 mfspr %r27, SPR_MAS2 mfsrr1 %r21 mtcr %r21 /* check MSR[PR] */ bt 17, search_user_pmap b search_kernel_pmap .globl interrupt_vector_top interrupt_vector_top: /***************************************************************************** * Debug interrupt ****************************************************************************/ INTERRUPT(int_debug) STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_DEBUG) bl int_debug_int FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) rfci INTERRUPT(int_debug_ed) STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_DSRR0, SPR_DSRR1) FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_DEBUG) bl int_debug_int FRAME_LEAVE(SPR_DSRR0, SPR_DSRR1) rfdi /* .long 0x4c00004e */ /* Internal helper for debug interrupt handling. */ /* Common code between e500v1/v2 and e500mc-based cores. */ int_debug_int: mflr %r14 GET_CPUINFO(%r3) LOAD %r3, (PC_BOOKE_CRITSAVE+CPUSAVE_SRR0)(%r3) bl 0f ADDR(interrupt_vector_base-.) ADDR(interrupt_vector_top-.) 0: mflr %r5 LOAD %r4,0(%r5) /* interrupt_vector_base in r4 */ add %r4,%r4,%r5 CMPL cr0, %r3, %r4 blt trap_common LOAD %r4,WORD_SIZE(%r5) /* interrupt_vector_top in r4 */ add %r4,%r4,%r5 addi %r4,%r4,4 CMPL cr0, %r3, %r4 bge trap_common /* Disable single-stepping for the interrupt handlers. */ LOAD %r3, FRAME_SRR1+CALLSIZE(%r1); rlwinm %r3, %r3, 0, 23, 21 STORE %r3, FRAME_SRR1+CALLSIZE(%r1); /* Restore srr0 and srr1 as they could have been clobbered. */ GET_CPUINFO(%r4) LOAD %r3, (PC_BOOKE_CRITSAVE+BOOKE_CRITSAVE_SRR0)(%r4); mtspr SPR_SRR0, %r3 LOAD %r4, (PC_BOOKE_CRITSAVE+BOOKE_CRITSAVE_SRR1)(%r4); mtspr SPR_SRR1, %r4 mtlr %r14 blr /***************************************************************************** * Common trap code ****************************************************************************/ trap_common: /* Call C trap dispatcher */ GET_TOCBASE(%r2) addi %r3, %r1, CALLSIZE bl CNAME(powerpc_interrupt) TOC_RESTORE .globl CNAME(trapexit) /* exported for db_backtrace use */ CNAME(trapexit): /* disable interrupts */ wrteei 0 /* Test AST pending - makes sense for user process only */ LOAD %r5, FRAME_SRR1+CALLSIZE(%r1) mtcr %r5 bf 17, 1f GET_CPUINFO(%r3) LOAD %r4, PC_CURTHREAD(%r3) lwz %r4, TD_FLAGS(%r4) lis %r5, (TDF_ASTPENDING | TDF_NEEDRESCHED)@h ori %r5, %r5, (TDF_ASTPENDING | TDF_NEEDRESCHED)@l and. %r4, %r4, %r5 beq 1f /* re-enable interrupts before calling ast() */ wrteei 1 addi %r3, %r1, CALLSIZE bl CNAME(ast) TOC_RESTORE .globl CNAME(asttrapexit) /* db_backtrace code sentinel #2 */ CNAME(asttrapexit): b trapexit /* test ast ret value ? */ 1: FRAME_LEAVE(SPR_SRR0, SPR_SRR1) rfi #if defined(KDB) /* * Deliberate entry to dbtrap */ /* .globl CNAME(breakpoint)*/ ASENTRY_NOPROF(breakpoint) mtsprg1 %r1 mfmsr %r3 mtsrr1 %r3 li %r4, ~(PSL_EE | PSL_ME)@l oris %r4, %r4, ~(PSL_EE | PSL_ME)@h and %r3, %r3, %r4 mtmsr %r3 /* disable interrupts */ isync GET_CPUINFO(%r3) STORE %r30, (PC_DBSAVE+CPUSAVE_R30)(%r3) STORE %r31, (PC_DBSAVE+CPUSAVE_R31)(%r3) mflr %r31 mtsrr0 %r31 mfdear %r30 mfesr %r31 STORE %r30, (PC_DBSAVE+CPUSAVE_BOOKE_DEAR)(%r3) STORE %r31, (PC_DBSAVE+CPUSAVE_BOOKE_ESR)(%r3) mfsrr0 %r30 mfsrr1 %r31 STORE %r30, (PC_DBSAVE+CPUSAVE_SRR0)(%r3) STORE %r31, (PC_DBSAVE+CPUSAVE_SRR1)(%r3) isync mfcr %r30 /* * Now the kdb trap catching code. */ dbtrap: FRAME_SETUP(SPR_SPRG1, PC_DBSAVE, EXC_DEBUG) /* Call C trap code: */ GET_TOCBASE(%r2) addi %r3, %r1, CALLSIZE bl CNAME(db_trap_glue) TOC_RESTORE or. %r3, %r3, %r3 bne dbleave /* This wasn't for KDB, so switch to real trap: */ b trap_common dbleave: FRAME_LEAVE(SPR_SRR0, SPR_SRR1) rfi #endif /* KDB */ #ifdef SMP ENTRY(tlb_lock) GET_CPUINFO(%r5) LOAD %r5, PC_CURTHREAD(%r5) 1: LOADX %r4, 0, %r3 CMPI %r4, TLB_UNLOCKED bne 1b STOREX %r5, 0, %r3 bne- 1b isync msync blr ENTRY(tlb_unlock) isync msync li %r4, TLB_UNLOCKED STORE %r4, 0(%r3) isync msync blr /* * TLB miss spin locks. For each CPU we have a reservation granule (32 bytes); * only a single word from this granule will actually be used as a spin lock * for mutual exclusion between TLB miss handler and pmap layer that * manipulates page table contents. */ .data .align 5 GLOBAL(tlb0_miss_locks) .space RES_GRANULE * MAXCPU #endif Index: projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC (revision 347076) @@ -1,233 +1,234 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/powerpc # # For more information on this file, please read the handbook section on # Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu AIM ident GENERIC machine powerpc powerpc makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Platform support options POWERMAC #NewWorld Apple PowerMacs options PSIM #GDB PSIM ppc simulator options MAMBO #IBM Mambo Full System Simulator options PSERIES #PAPR-compliant systems options FDT options SCHED_ULE #ULE scheduler options PREEMPTION #Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET #InterNETworking options INET6 #IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_HHOOK # hhook(9) framework for TCP options TCP_RFC7413 # TCP Fast Open options SCTP #Stream Control Transmission Protocol options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories options UFS_GJOURNAL #Enable gjournal-based UFS journaling options QUOTA #Enable disk quotas for UFS options MD_ROOT #MD is a potential root device options NFSCL #Network Filesystem Client options NFSD #Network Filesystem Server options NFSLOCKD #Network Lock Manager options NFS_ROOT #NFS usable as root device options MSDOSFS #MSDOS Filesystem options CD9660 #ISO 9660 Filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options GEOM_PART_APM #Apple Partition Maps. options GEOM_PART_GPT #GUID Partition Tables. options GEOM_LABEL #Provides labelization options COMPAT_FREEBSD4 #Keep this for a while options COMPAT_FREEBSD5 #Compatible with FreeBSD5 options COMPAT_FREEBSD6 #Compatible with FreeBSD6 options COMPAT_FREEBSD7 #Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 #Delay (in ms) before probing SCSI options KTRACE #ktrace(1) syscall trace support options STACK #stack(9) support options SYSVSHM #SYSV-style shared memory options SYSVMSG #SYSV-style message queues options SYSVSEM #SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB #Support DDB #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS options WITNESS #Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel # CPU frequency control device cpufreq # Standard busses device pci options PCI_HP # PCI-Express native HotPlug device agp # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices options AHC_ALLOW_MEMIO # Attempt to use memory mapped I/O device isp # Qlogic family device ispfw # Firmware module for Qlogic host adapters device mpt # LSI-Logic MPT-Fusion device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) # vt is the default console driver, resembling an SCO console device vt # Generic console driver (pulls in OF FB) device kbdmux # Serial (COM) ports device scc device uart device uart_z8530 # FireWire support device firewire # FireWire bus code device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) # PCI Ethernet NICs that use the common MII bus controller code. device miibus # MII bus support device bge # Broadcom BCM570xx Gigabit Ethernet device bm # Apple BMAC Ethernet device gem # Sun GEM/Sun ERI/Apple GMAC device dc # DEC/Intel 21143 and various workalikes device fxp # Intel EtherExpress PRO/100B (82557, 82558) # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device ofwd # Open Firmware disks device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf #Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface device usb # USB Bus (required) device uhid # "Human Interface Devices" device ukbd # Keyboard options KBD_INSTALL_CDEV # install a CDEV entry in /dev device ulpt # Printer device umass # Disks/Mass storage - Requires scbus and da0 device ums # Mouse device atp # Apple USB touchpad device urio # Diamond Rio 500 MP3 player # USB Ethernet device aue # ADMtek USB Ethernet device axe # ASIX Electronics USB Ethernet device cdce # Generic USB over Ethernet device cue # CATC USB Ethernet device kue # Kawasaki LSI USB Ethernet # Wireless NIC cards options IEEE80211_SUPPORT_MESH # Misc device iicbus # I2C bus code device kiic # Keywest I2C device ad7417 # PowerMac7,2 temperature sensor device adt746x # PowerBook5,8 temperature sensor device ds1631 # PowerMac11,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor device powermac_nvram # Open Firmware configuration NVRAM device smu # Apple System Management Unit device adm1030 # Apple G4 MDD fan controller device atibl # ATI-based backlight driver for PowerBooks/iBooks device nvbl # nVidia-based backlight driver for PowerBooks/iBooks # ADB support device adb device cuda device pmu # Sound support device sound # Generic sound driver (required) device snd_ai2s # Apple I2S audio device snd_davbus # Apple DAVBUS audio device snd_uaudio # USB Audio # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64 =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64 (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64 (revision 347076) @@ -1,256 +1,257 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/powerpc # # For more information on this file, please read the handbook section on # Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu AIM ident GENERIC machine powerpc powerpc64 makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Platform support options POWERMAC #NewWorld Apple PowerMacs options PS3 #Sony Playstation 3 options MAMBO #IBM Mambo Full System Simulator options PSERIES #PAPR-compliant systems (e.g. IBM p) options POWERNV #Non-virtualized OpenPOWER systems options FDT #Flattened Device Tree options SCHED_ULE #ULE scheduler options NUMA #Non-Uniform Memory Architecture support options PREEMPTION #Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET #InterNETworking options INET6 #IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options TCP_BLACKBOX # Enhanced TCP event logging options TCP_HHOOK # hhook(9) framework for TCP options TCP_RFC7413 # TCP Fast Open options SCTP #Stream Control Transmission Protocol options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories options UFS_GJOURNAL #Enable gjournal-based UFS journaling options QUOTA #Enable disk quotas for UFS options MD_ROOT #MD is a potential root device options NFSCL #Network Filesystem Client options NFSD #Network Filesystem Server options NFSLOCKD #Network Lock Manager options NFS_ROOT #NFS usable as root device options MSDOSFS #MSDOS Filesystem options CD9660 #ISO 9660 Filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options GEOM_PART_APM #Apple Partition Maps. options GEOM_PART_GPT #GUID Partition Tables. options GEOM_LABEL #Provides labelization options COMPAT_FREEBSD32 #Compatible with FreeBSD/powerpc binaries options COMPAT_FREEBSD5 #Compatible with FreeBSD5 options COMPAT_FREEBSD6 #Compatible with FreeBSD6 options COMPAT_FREEBSD7 #Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 #Delay (in ms) before probing SCSI options KTRACE #ktrace(1) syscall trace support options STACK #stack(9) support options SYSVSHM #SYSV-style shared memory options SYSVMSG #SYSV-style message queues options SYSVSEM #SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB #Support DDB #options DEADLKRES #Enable the deadlock resolver options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS options WITNESS #Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel # CPU frequency control device cpufreq # Standard busses device pci options PCI_HP # PCI-Express native HotPlug device agp # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # NVM Express (NVMe) support device nvme # base NVMe driver options NVME_USE_NVD=0 # prefer the cam(4) based nda(4) driver device nvd # expose NVMe namespaces as disks, depends on nvme # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices options AHC_ALLOW_MEMIO # Attempt to use memory mapped I/O device isp # Qlogic family device ispfw # Firmware module for Qlogic host adapters device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Service (SES and SAF-TE) # vt is the default console driver, resembling an SCO console device vt # Core console driver device kbdmux # Serial (COM) ports device scc device uart device uart_z8530 device iflib # Ethernet hardware device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet Family device ixv # Intel PRO/10GbE PCIE VF Ethernet Family device glc # Sony Playstation 3 Ethernet device llan # IBM pSeries Virtual Ethernet device cxgbe # Chelsio 10/25G NIC # PCI Ethernet NICs that use the common MII bus controller code. device miibus # MII bus support device bge # Broadcom BCM570xx Gigabit Ethernet device gem # Sun GEM/Sun ERI/Apple GMAC device dc # DEC/Intel 21143 and various workalikes device fxp # Intel EtherExpress PRO/100B (82557, 82558) device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device ofwd # Open Firmware disks device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf #Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface device xhci # XHCI PCI->USB interface device usb # USB Bus (required) device uhid # "Human Interface Devices" device ukbd # Keyboard options KBD_INSTALL_CDEV # install a CDEV entry in /dev device umass # Disks/Mass storage - Requires scbus and da0 device ums # Mouse # USB Ethernet device aue # ADMtek USB Ethernet device axe # ASIX Electronics USB Ethernet device cdce # Generic USB over Ethernet device cue # CATC USB Ethernet device kue # Kawasaki LSI USB Ethernet # Wireless NIC cards options IEEE80211_SUPPORT_MESH # FireWire support device firewire # FireWire bus code device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) # Misc device iicbus # I2C bus code device iic device kiic # Keywest I2C device ad7417 # PowerMac7,2 temperature sensor device ds1631 # PowerMac11,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor device powermac_nvram # Open Firmware configuration NVRAM device smu # Apple System Management Unit device atibl # ATI-based backlight driver for PowerBooks/iBooks device nvbl # nVidia-based backlight driver for PowerBooks/iBooks device opalflash # PowerNV embedded flash memory # ADB support device adb device pmu # Sound support device sound # Generic sound driver (required) device snd_ai2s # Apple I2S audio device snd_uaudio # USB Audio # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support device uinput # install /dev/uinput cdev Index: projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XX =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XX (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XX (revision 347076) @@ -1,120 +1,121 @@ # # Custom kernel for Freescale MPC85XX development boards like the CDS etc. # # $FreeBSD$ # cpu BOOKE cpu BOOKE_E500 ident MPC85XX machine powerpc powerpc include "dpaa/config.dpaa" makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 makeoptions WERROR="-Werror -Wno-format -Wno-redundant-decls" options FPU_EMU options _KPOSIX_PRIORITY_SCHEDULING options ALT_BREAK_TO_DEBUGGER options BREAK_TO_DEBUGGER options BOOTP options BOOTP_NFSROOT #options BOOTP_NFSV3 options CD9660 options COMPAT_43 options DDB #options DEADLKRES options DEVICE_POLLING #options DIAGNOSTIC options FDT #makeoptions FDT_DTS_FILE=mpc8555cds.dts options FFS options GDB options GEOM_PART_GPT options INET options INET6 options TCP_HHOOK # hhook(9) framework for TCP options INVARIANTS options INVARIANT_SUPPORT options KDB options KTRACE options MD_ROOT options MPC85XX options MSDOSFS options NFS_ROOT options NFSCL options NFSLOCKD options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options PROCFS options PSEUDOFS options SCHED_ULE options CAPABILITIES options CAPABILITY_MODE options SMP options SYSVMSG options SYSVSEM options SYSVSHM options WITNESS options WITNESS_SKIPSPIN options COMPAT_FREEBSD10 options COMPAT_FREEBSD11 +options COMPAT_FREEBSD12 options HWPMC_HOOKS options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data device ata device bpf device cfi device cpufreq device crypto device cryptodev device da device ds1307 device ds1553 device iflib device em device alc device ether device fxp device gpio device gpiopower device iic device iicbus #device isa device loop device md device miibus device mmc device mmcsd device pass device pci device quicc device random #device rl device scbus device scc device sdhci device sec device spibus device spigen device tsec device dpaa device tun device uart options USB_DEBUG # enable debug msgs #device uhci device ehci device ukbd device ums device umass device usb device vlan # P1022 DIU device diu device videomode device vt device fbd Index: projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XXSPE =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XXSPE (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/conf/MPC85XXSPE (revision 347076) @@ -1,121 +1,122 @@ # # Custom kernel for Freescale MPC85XX development boards like the CDS etc. # # $FreeBSD$ # cpu BOOKE cpu BOOKE_E500 ident MPC85XXSPE machine powerpc powerpcspe include "dpaa/config.dpaa" makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 makeoptions WERROR="-Werror -Wno-format -Wno-redundant-decls" options FPU_EMU options MAXCPU=2 options _KPOSIX_PRIORITY_SCHEDULING options ALT_BREAK_TO_DEBUGGER options BREAK_TO_DEBUGGER options BOOTP options BOOTP_NFSROOT #options BOOTP_NFSV3 options CD9660 options COMPAT_43 options DDB #options DEADLKRES options DEVICE_POLLING #options DIAGNOSTIC options FDT #makeoptions FDT_DTS_FILE=mpc8555cds.dts options FFS options GDB options GEOM_PART_GPT options INET options INET6 options TCP_HHOOK # hhook(9) framework for TCP options INVARIANTS options INVARIANT_SUPPORT options KDB options KTRACE options MD_ROOT options MPC85XX options MSDOSFS options NFS_ROOT options NFSCL options NFSLOCKD options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options PROCFS options PSEUDOFS options SCHED_ULE options CAPABILITIES options CAPABILITY_MODE options SMP options SYSVMSG options SYSVSEM options SYSVSHM options WITNESS options WITNESS_SKIPSPIN options COMPAT_FREEBSD10 options COMPAT_FREEBSD11 +options COMPAT_FREEBSD12 options HWPMC_HOOKS options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data device ata device bpf device cfi device cpufreq device crypto device cryptodev device da device ds1307 device ds1553 device iflib device em device alc device ether device fxp device gpio device gpiopower device iic device iicbus #device isa device loop device md device miibus device mmc device mmcsd device pass device pci device quicc device random #device rl device scbus device scc device sdhci device sec device spibus device spigen device tsec device dpaa device tun device uart options USB_DEBUG # enable debug msgs #device uhci device ehci device ukbd device ums device umass device usb device vlan # P1022 DIU device diu device videomode device vt device fbd Index: projects/runtime-coverage-v2/sys/powerpc/powernv/opal_hmi.c =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/powernv/opal_hmi.c (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/powernv/opal_hmi.c (revision 347076) @@ -1,97 +1,76 @@ /*- * Copyright (c) 2019 Justin Hibbits * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "opal.h" static int -opal_hmi_handler2(struct trapframe *frame) -{ - int64_t flags; - int err; - - err = opal_call(OPAL_HANDLE_HMI2, vtophys(&flags)); - - /* XXX: At some point, handle the flags outvar. */ - if (err == OPAL_SUCCESS) { - mtspr(SPR_HMER, 0); - return (0); - } - - printf("HMI handler failed! OPAL error code: %d\n", err); - - return (-1); -} - -static int opal_hmi_handler(struct trapframe *frame) { int err; err = opal_call(OPAL_HANDLE_HMI); if (err == OPAL_SUCCESS) { mtspr(SPR_HMER, 0); return (0); } printf("HMI handler failed! OPAL error code: %d\n", err); return (-1); } static void opal_setup_hmi(void *data) { /* This only works for OPAL, so first make sure we have it. */ if (opal_check() != 0) return; - if (opal_call(OPAL_CHECK_TOKEN, OPAL_HANDLE_HMI2) == OPAL_TOKEN_PRESENT) - hmi_handler = opal_hmi_handler2; - else if (opal_call(OPAL_CHECK_TOKEN, OPAL_HANDLE_HMI) == OPAL_TOKEN_PRESENT) + if (opal_call(OPAL_CHECK_TOKEN, OPAL_HANDLE_HMI) == OPAL_TOKEN_PRESENT) hmi_handler = opal_hmi_handler; else { printf("Warning: No OPAL HMI handler found.\n"); return; } if (bootverbose) printf("Installed OPAL HMI handler.\n"); } SYSINIT(opal_setup_hmi, SI_SUB_HYPERVISOR, SI_ORDER_ANY, opal_setup_hmi, NULL); Index: projects/runtime-coverage-v2/sys/powerpc/powerpc/swtch64.S =================================================================== --- projects/runtime-coverage-v2/sys/powerpc/powerpc/swtch64.S (revision 347075) +++ projects/runtime-coverage-v2/sys/powerpc/powerpc/swtch64.S (revision 347076) @@ -1,359 +1,360 @@ /* $FreeBSD$ */ /* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "assym.inc" #include "opt_sched.h" #include #include #include #include #include #ifdef _CALL_ELF .abiversion _CALL_ELF #endif TOC_ENTRY(blocked_lock) /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) mr %r13, %r4 li %r14,0 /* Tell cpu_switchin not to release a thread */ + li %r18,0 /* No old pcb flags. The old thread is extinguished. */ b cpu_switchin /* * void cpu_switch(struct thread *old, * struct thread *new, * struct mutex *mtx); * * Switch to a new thread saving the current state in the old thread. * * Internally clobbers (not visible outside of this file): * r18 - old thread pcb_flags * r19 - new thread pcb_flags */ ENTRY(cpu_switch) ld %r6,TD_PCB(%r3) /* Get the old thread's PCB ptr */ std %r12,PCB_CONTEXT(%r6) /* Save the non-volatile GP regs. These can now be used for scratch */ std %r14,PCB_CONTEXT+2*8(%r6) std %r15,PCB_CONTEXT+3*8(%r6) std %r16,PCB_CONTEXT+4*8(%r6) std %r17,PCB_CONTEXT+5*8(%r6) std %r18,PCB_CONTEXT+6*8(%r6) std %r19,PCB_CONTEXT+7*8(%r6) std %r20,PCB_CONTEXT+8*8(%r6) std %r21,PCB_CONTEXT+9*8(%r6) std %r22,PCB_CONTEXT+10*8(%r6) std %r23,PCB_CONTEXT+11*8(%r6) std %r24,PCB_CONTEXT+12*8(%r6) std %r25,PCB_CONTEXT+13*8(%r6) std %r26,PCB_CONTEXT+14*8(%r6) std %r27,PCB_CONTEXT+15*8(%r6) std %r28,PCB_CONTEXT+16*8(%r6) std %r29,PCB_CONTEXT+17*8(%r6) std %r30,PCB_CONTEXT+18*8(%r6) std %r31,PCB_CONTEXT+19*8(%r6) mfcr %r16 /* Save the condition register */ std %r16,PCB_CR(%r6) mflr %r16 /* Save the link register */ std %r16,PCB_LR(%r6) std %r1,PCB_SP(%r6) /* Save the stack pointer */ std %r2,PCB_TOC(%r6) /* Save the TOC pointer */ mr %r14,%r3 /* Copy the old thread ptr... */ mr %r13,%r4 /* and the new thread ptr in curthread*/ mr %r16,%r5 /* and the new lock */ mr %r17,%r6 /* and the PCB */ stdu %r1,-48(%r1) lwz %r18, PCB_FLAGS(%r17) andi. %r7, %r18, PCB_CFSCR beq 1f mfspr %r6, SPR_FSCR std %r6, PCB_FSCR(%r17) save_ebb: andi. %r0, %r6, FSCR_EBB beq save_lm mfspr %r7, SPR_EBBHR std %r7, PCB_EBB_EBBHR(%r17) mfspr %r7, SPR_EBBRR std %r7, PCB_EBB_EBBRR(%r17) mfspr %r7, SPR_BESCR std %r7, PCB_EBB_BESCR(%r17) save_lm: andi. %r0, %r6, FSCR_LM beq save_tar mfspr %r7, SPR_LMRR std %r7, PCB_LMON_LMRR(%r17) mfspr %r7, SPR_LMSER std %r7, PCB_LMON_LMSER(%r17) save_tar: andi. %r0, %r6, FSCR_TAR beq 1f mfspr %r7, SPR_TAR std %r7, PCB_TAR(%r17) 1: andi. %r7, %r18, PCB_CDSCR beq .L0 mfspr %r6, SPR_DSCRP std %r6, PCB_DSCR(%r17) .L0: /* Save FPU context if needed */ andi. %r7, %r18, PCB_FPU beq .L1 bl save_fpu nop .L1: mr %r3,%r14 /* restore old thread ptr */ /* Save Altivec context if needed */ andi. %r7, %r18, PCB_VEC beq .L2 bl save_vec nop .L2: mr %r3,%r14 /* restore old thread ptr */ bl pmap_deactivate /* Deactivate the current pmap */ nop sync /* Make sure all of that finished */ cpu_switchin: #if defined(SMP) && defined(SCHED_ULE) /* Wait for the new thread to become unblocked */ addis %r6,%r2,TOC_REF(blocked_lock)@ha ld %r6,TOC_REF(blocked_lock)@l(%r6) blocked_loop: ld %r7,TD_LOCK(%r13) cmpd %r6,%r7 beq- blocked_loop isync #endif ld %r17,TD_PCB(%r13) /* Get new PCB */ ld %r1,PCB_SP(%r17) /* Load the stack pointer */ addi %r1,%r1,-48 /* Remember about cpu_switch stack frame */ /* Release old thread now that we have a stack pointer set up */ cmpdi %r14,0 beq- 1f std %r16,TD_LOCK(%r14) /* ULE: update old thread's lock */ 1: mfsprg %r7,0 /* Get the pcpu pointer */ std %r13,PC_CURTHREAD(%r7) /* Store new current thread */ ld %r17,TD_PCB(%r13) /* Store new current PCB */ std %r17,PC_CURPCB(%r7) mr %r3,%r13 /* Get new thread ptr */ bl pmap_activate /* Activate the new address space */ nop lwz %r19, PCB_FLAGS(%r17) /* Restore FPU context if needed */ andi. %r6, %r19, PCB_FPU beq .L3 mr %r3,%r13 /* Pass curthread to enable_fpu */ bl enable_fpu nop .L3: /* Restore Altivec context if needed */ andi. %r6, %r19, PCB_VEC beq .L31 mr %r3,%r13 /* Pass curthread to enable_vec */ bl enable_vec nop .L31: /* Load custom DSCR on PowerISA 2.06+ CPUs. */ /* Load changed FSCR on PowerISA 2.07+ CPUs. */ or %r18,%r18,%r19 /* Restore Custom DSCR if needed (zeroes if in old but not new) */ andi. %r6, %r18, PCB_CDSCR beq .L32 ld %r7, PCB_DSCR(%r17) /* Load the DSCR register*/ mtspr SPR_DSCRP, %r7 .L32: /* Restore FSCR if needed (zeroes if in old but not new) */ andi. %r6, %r18, PCB_CFSCR beq .L4 ld %r7, PCB_FSCR(%r17) /* Load the FSCR register*/ mtspr SPR_FSCR, %r7 restore_ebb: andi. %r0, %r7, FSCR_EBB beq restore_lm ld %r6, PCB_EBB_EBBHR(%r17) mtspr SPR_EBBHR, %r6 ld %r6, PCB_EBB_EBBRR(%r17) mtspr SPR_EBBRR, %r6 ld %r6, PCB_EBB_BESCR(%r17) mtspr SPR_BESCR, %r6 restore_lm: andi. %r0, %r7, FSCR_LM beq restore_tar ld %r6, PCB_LMON_LMRR(%r17) mtspr SPR_LMRR, %r6 ld %r6, PCB_LMON_LMSER(%r17) mtspr SPR_LMSER, %r6 restore_tar: andi. %r0, %r7, FSCR_TAR beq .L4 ld %r6, PCB_TAR(%r17) mtspr SPR_TAR, %r6 /* thread to restore is in r3 */ .L4: addi %r1,%r1,48 mr %r3,%r17 /* Recover PCB ptr */ ld %r12,PCB_CONTEXT(%r3) /* Load the non-volatile GP regs. */ ld %r14,PCB_CONTEXT+2*8(%r3) ld %r15,PCB_CONTEXT+3*8(%r3) ld %r16,PCB_CONTEXT+4*8(%r3) ld %r17,PCB_CONTEXT+5*8(%r3) ld %r18,PCB_CONTEXT+6*8(%r3) ld %r19,PCB_CONTEXT+7*8(%r3) ld %r20,PCB_CONTEXT+8*8(%r3) ld %r21,PCB_CONTEXT+9*8(%r3) ld %r22,PCB_CONTEXT+10*8(%r3) ld %r23,PCB_CONTEXT+11*8(%r3) ld %r24,PCB_CONTEXT+12*8(%r3) ld %r25,PCB_CONTEXT+13*8(%r3) ld %r26,PCB_CONTEXT+14*8(%r3) ld %r27,PCB_CONTEXT+15*8(%r3) ld %r28,PCB_CONTEXT+16*8(%r3) ld %r29,PCB_CONTEXT+17*8(%r3) ld %r30,PCB_CONTEXT+18*8(%r3) ld %r31,PCB_CONTEXT+19*8(%r3) ld %r5,PCB_CR(%r3) /* Load the condition register */ mtcr %r5 ld %r5,PCB_LR(%r3) /* Load the link register */ mtlr %r5 ld %r1,PCB_SP(%r3) /* Load the stack pointer */ ld %r2,PCB_TOC(%r3) /* Load the TOC pointer */ /* * Perform a dummy stdcx. to clear any reservations we may have * inherited from the previous thread. It doesn't matter if the * stdcx succeeds or not. pcb_context[0] can be clobbered. */ stdcx. %r1, 0, %r3 blr /* * savectx(pcb) * Update pcb, saving current processor state */ ENTRY(savectx) std %r12,PCB_CONTEXT(%r3) /* Save the non-volatile GP regs. */ std %r13,PCB_CONTEXT+1*8(%r3) std %r14,PCB_CONTEXT+2*8(%r3) std %r15,PCB_CONTEXT+3*8(%r3) std %r16,PCB_CONTEXT+4*8(%r3) std %r17,PCB_CONTEXT+5*8(%r3) std %r18,PCB_CONTEXT+6*8(%r3) std %r19,PCB_CONTEXT+7*8(%r3) std %r20,PCB_CONTEXT+8*8(%r3) std %r21,PCB_CONTEXT+9*8(%r3) std %r22,PCB_CONTEXT+10*8(%r3) std %r23,PCB_CONTEXT+11*8(%r3) std %r24,PCB_CONTEXT+12*8(%r3) std %r25,PCB_CONTEXT+13*8(%r3) std %r26,PCB_CONTEXT+14*8(%r3) std %r27,PCB_CONTEXT+15*8(%r3) std %r28,PCB_CONTEXT+16*8(%r3) std %r29,PCB_CONTEXT+17*8(%r3) std %r30,PCB_CONTEXT+18*8(%r3) std %r31,PCB_CONTEXT+19*8(%r3) mfcr %r4 /* Save the condition register */ std %r4,PCB_CR(%r3) std %r1,PCB_SP(%r3) /* Save the stack pointer */ std %r2,PCB_TOC(%r3) /* Save the TOC pointer */ mflr %r4 /* Save the link register */ std %r4,PCB_LR(%r3) blr /* * fork_trampoline() * Set up the return from cpu_fork() */ ENTRY_NOPROF(fork_trampoline) ld %r3,CF_FUNC(%r1) ld %r4,CF_ARG0(%r1) ld %r5,CF_ARG1(%r1) stdu %r1,-48(%r1) bl fork_exit nop addi %r1,%r1,48+CF_SIZE-FSP /* Allow 8 bytes in front of trapframe to simulate FRAME_SETUP does when allocating space for a frame pointer/saved LR */ bl trapexit nop Index: projects/runtime-coverage-v2/sys/riscv/riscv/nexus.c =================================================================== --- projects/runtime-coverage-v2/sys/riscv/riscv/nexus.c (revision 347075) +++ projects/runtime-coverage-v2/sys/riscv/riscv/nexus.c (revision 347076) @@ -1,394 +1,398 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * This code implements a `root nexus' for RISC-V Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include "ofw_bus_if.h" #endif extern struct bus_space memmap_bus; static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); struct nexus_device { struct resource_list nx_resources; }; #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) static struct rman mem_rman; static struct rman irq_rman; static device_probe_t nexus_fdt_probe; static int nexus_attach(device_t); static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t, u_int, const char *, int); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol); static struct resource_list *nexus_get_reslist(device_t, device_t); static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr); static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_fdt_probe), DEVMETHOD(device_attach, nexus_attach), /* OFW interface */ DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_get_resource_list, nexus_get_reslist), DEVMETHOD(bus_set_resource, nexus_set_resource), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), { 0, 0 } }; static driver_t nexus_fdt_driver = { "nexus", nexus_methods, 1 /* no softc */ }; static int nexus_fdt_probe(device_t dev) { device_quiet(dev); return (BUS_PROBE_DEFAULT); } static int nexus_attach(device_t dev) { mem_rman.rm_start = 0; mem_rman.rm_end = BUS_SPACE_MAXADDR; mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR)) panic("nexus_attach mem_rman"); irq_rman.rm_start = 0; irq_rman.rm_end = ~0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupts"; if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0)) panic("nexus_attach irq_rman"); nexus_add_child(dev, 8, "timer", 0); nexus_add_child(dev, 9, "rcons", 0); nexus_add_child(dev, 10, "ofwbus", 0); bus_generic_probe(dev); bus_generic_attach(dev); return (0); } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += printf("\n"); return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return (0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return (child); } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. * (Exceptions include footbridge.) */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int needactivate = flags & RF_ACTIVE; /* * If this is an allocation of the "default" range for a given * RID, and we know what the resources for this device are * (ie. they aren't maintained by a child bus), then work out * the start/end values. */ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { if (device_get_parent(child) != bus || ndev == NULL) return(NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return(NULL); start = rle->start; end = rle->end; count = rle->count; } switch (type) { case SYS_RES_IRQ: rm = &irq_rman; break; case SYS_RES_MEMORY: case SYS_RES_IOPORT: rm = &mem_rman; break; default: return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == NULL) return (NULL); rman_set_rid(rv, *rid); rman_set_bushandle(rv, rman_get_start(rv)); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return (NULL); } } return (rv); } static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { return (EOPNOTSUPP); } static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { int error; if ((rman_get_flags(res) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(res); if (error) return (error); error = intr_setup_irq(child, res, filt, intr, arg, flags, cookiep); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { return (intr_teardown_irq(child, r, ih)); } static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { int err; bus_addr_t paddr; bus_size_t psize; bus_space_handle_t vaddr; if ((err = rman_activate_resource(r)) != 0) return (err); /* * If this is a memory resource, map it into the kernel. */ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { paddr = (bus_addr_t)rman_get_start(r); psize = (bus_size_t)rman_get_size(r); err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr); if (err != 0) { rman_deactivate_resource(r); return (err); } rman_set_bustag(r, &memmap_bus); rman_set_virtual(r, (void *)vaddr); rman_set_bushandle(r, vaddr); } else if (type == SYS_RES_IRQ) { err = intr_activate_irq(child, r); if (err != 0) { rman_deactivate_resource(r); return (err); } } return (0); } static struct resource_list * nexus_get_reslist(device_t dev, device_t child) { struct nexus_device *ndev = DEVTONX(child); return (&ndev->nx_resources); } static int nexus_set_resource(device_t dev, device_t child, int type, int rid, u_long start, u_long count) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; /* XXX this should return a success/failure indicator */ resource_list_add(rl, type, rid, start, start + count - 1, count); return(0); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { bus_size_t psize; bus_space_handle_t vaddr; - psize = (bus_size_t)rman_get_size(r); - vaddr = rman_get_bushandle(r); + if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { + psize = (bus_size_t)rman_get_size(r); + vaddr = rman_get_bushandle(r); - if (vaddr != 0) { - bus_space_unmap(&memmap_bus, vaddr, psize); - rman_set_virtual(r, NULL); - rman_set_bushandle(r, 0); + if (vaddr != 0) { + bus_space_unmap(&memmap_bus, vaddr, psize); + rman_set_virtual(r, NULL); + rman_set_bushandle(r, 0); + } + } else if (type == SYS_RES_IRQ) { + intr_deactivate_irq(child, r); } return (rman_deactivate_resource(r)); } static devclass_t nexus_fdt_devclass; EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr) { struct intr_map_data_fdt *fdt_data; size_t len; u_int irq; len = sizeof(*fdt_data) + icells * sizeof(pcell_t); fdt_data = (struct intr_map_data_fdt *)intr_alloc_map_data( INTR_MAP_DATA_FDT, len, M_WAITOK | M_ZERO); fdt_data->iparent = iparent; fdt_data->ncells = icells; memcpy(fdt_data->cells, intr, icells * sizeof(pcell_t)); irq = intr_map_irq(NULL, iparent, (struct intr_map_data *)fdt_data); return (irq); } Index: projects/runtime-coverage-v2/sys/security/mac/mac_inet.c =================================================================== --- projects/runtime-coverage-v2/sys/security/mac/mac_inet.c (revision 347075) +++ projects/runtime-coverage-v2/sys/security/mac/mac_inet.c (revision 347076) @@ -1,507 +1,509 @@ /*- - * Copyright (c) 1999-2002, 2007, 2009 Robert N. M. Watson + * Copyright (c) 1999-2002, 2007, 2009, 2019 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2004 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2008 Apple Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct label * mac_inpcb_label_alloc(int flag) { struct label *label; int error; label = mac_labelzone_alloc(flag); if (label == NULL) return (NULL); if (flag & M_WAITOK) MAC_POLICY_CHECK(inpcb_init_label, label, flag); else MAC_POLICY_CHECK_NOSLEEP(inpcb_init_label, label, flag); if (error) { MAC_POLICY_PERFORM_NOSLEEP(inpcb_destroy_label, label); mac_labelzone_free(label); return (NULL); } return (label); } int mac_inpcb_init(struct inpcb *inp, int flag) { if (mac_labeled & MPC_OBJECT_INPCB) { inp->inp_label = mac_inpcb_label_alloc(flag); if (inp->inp_label == NULL) return (ENOMEM); } else inp->inp_label = NULL; return (0); } static struct label * mac_ipq_label_alloc(int flag) { struct label *label; int error; label = mac_labelzone_alloc(flag); if (label == NULL) return (NULL); if (flag & M_WAITOK) MAC_POLICY_CHECK(ipq_init_label, label, flag); else MAC_POLICY_CHECK_NOSLEEP(ipq_init_label, label, flag); if (error) { MAC_POLICY_PERFORM_NOSLEEP(ipq_destroy_label, label); mac_labelzone_free(label); return (NULL); } return (label); } int mac_ipq_init(struct ipq *q, int flag) { if (mac_labeled & MPC_OBJECT_IPQ) { q->ipq_label = mac_ipq_label_alloc(flag); if (q->ipq_label == NULL) return (ENOMEM); } else q->ipq_label = NULL; return (0); } static void mac_inpcb_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(inpcb_destroy_label, label); mac_labelzone_free(label); } void mac_inpcb_destroy(struct inpcb *inp) { if (inp->inp_label != NULL) { mac_inpcb_label_free(inp->inp_label); inp->inp_label = NULL; } } static void mac_ipq_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(ipq_destroy_label, label); mac_labelzone_free(label); } void mac_ipq_destroy(struct ipq *q) { if (q->ipq_label != NULL) { mac_ipq_label_free(q->ipq_label); q->ipq_label = NULL; } } void mac_inpcb_create(struct socket *so, struct inpcb *inp) { MAC_POLICY_PERFORM_NOSLEEP(inpcb_create, so, so->so_label, inp, inp->inp_label); } void mac_ipq_reassemble(struct ipq *q, struct mbuf *m) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(ipq_reassemble, q, q->ipq_label, m, label); } void mac_netinet_fragment(struct mbuf *m, struct mbuf *frag) { struct label *mlabel, *fraglabel; if (mac_policy_count == 0) return; mlabel = mac_mbuf_to_label(m); fraglabel = mac_mbuf_to_label(frag); MAC_POLICY_PERFORM_NOSLEEP(netinet_fragment, m, mlabel, frag, fraglabel); } void mac_ipq_create(struct mbuf *m, struct ipq *q) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(ipq_create, m, label, q, q->ipq_label); } void mac_inpcb_create_mbuf(struct inpcb *inp, struct mbuf *m) { struct label *mlabel; INP_LOCK_ASSERT(inp); if (mac_policy_count == 0) return; mlabel = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(inpcb_create_mbuf, inp, inp->inp_label, m, mlabel); } int mac_ipq_match(struct mbuf *m, struct ipq *q) { struct label *label; int result; if (mac_policy_count == 0) return (1); label = mac_mbuf_to_label(m); result = 1; MAC_POLICY_BOOLEAN_NOSLEEP(ipq_match, &&, m, label, q, q->ipq_label); return (result); } void mac_netinet_arp_send(struct ifnet *ifp, struct mbuf *m) { struct label *mlabel; + int locked; if (mac_policy_count == 0) return; mlabel = mac_mbuf_to_label(m); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_PERFORM_NOSLEEP(netinet_arp_send, ifp, ifp->if_label, m, mlabel); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); } void mac_netinet_icmp_reply(struct mbuf *mrecv, struct mbuf *msend) { struct label *mrecvlabel, *msendlabel; if (mac_policy_count == 0) return; mrecvlabel = mac_mbuf_to_label(mrecv); msendlabel = mac_mbuf_to_label(msend); MAC_POLICY_PERFORM_NOSLEEP(netinet_icmp_reply, mrecv, mrecvlabel, msend, msendlabel); } void mac_netinet_icmp_replyinplace(struct mbuf *m) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(netinet_icmp_replyinplace, m, label); } void mac_netinet_igmp_send(struct ifnet *ifp, struct mbuf *m) { struct label *mlabel; + int locked; if (mac_policy_count == 0) return; mlabel = mac_mbuf_to_label(m); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_PERFORM_NOSLEEP(netinet_igmp_send, ifp, ifp->if_label, m, mlabel); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); } void mac_netinet_tcp_reply(struct mbuf *m) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(netinet_tcp_reply, m, label); } void mac_ipq_update(struct mbuf *m, struct ipq *q) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(ipq_update, m, label, q, q->ipq_label); } MAC_CHECK_PROBE_DEFINE2(inpcb_check_deliver, "struct inpcb *", "struct mbuf *"); int mac_inpcb_check_deliver(struct inpcb *inp, struct mbuf *m) { struct label *label; int error; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return (0); label = mac_mbuf_to_label(m); MAC_POLICY_CHECK_NOSLEEP(inpcb_check_deliver, inp, inp->inp_label, m, label); MAC_CHECK_PROBE2(inpcb_check_deliver, error, inp, m); return (error); } MAC_CHECK_PROBE_DEFINE2(inpcb_check_visible, "struct ucred *", "struct inpcb *"); int mac_inpcb_check_visible(struct ucred *cred, struct inpcb *inp) { int error; INP_LOCK_ASSERT(inp); MAC_POLICY_CHECK_NOSLEEP(inpcb_check_visible, cred, inp, inp->inp_label); MAC_CHECK_PROBE2(inpcb_check_visible, error, cred, inp); return (error); } void mac_inpcb_sosetlabel(struct socket *so, struct inpcb *inp) { INP_WLOCK_ASSERT(inp); SOCK_LOCK_ASSERT(so); MAC_POLICY_PERFORM_NOSLEEP(inpcb_sosetlabel, so, so->so_label, inp, inp->inp_label); } void mac_netinet_firewall_reply(struct mbuf *mrecv, struct mbuf *msend) { struct label *mrecvlabel, *msendlabel; M_ASSERTPKTHDR(mrecv); M_ASSERTPKTHDR(msend); if (mac_policy_count == 0) return; mrecvlabel = mac_mbuf_to_label(mrecv); msendlabel = mac_mbuf_to_label(msend); MAC_POLICY_PERFORM_NOSLEEP(netinet_firewall_reply, mrecv, mrecvlabel, msend, msendlabel); } void mac_netinet_firewall_send(struct mbuf *m) { struct label *label; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(netinet_firewall_send, m, label); } /* * These functions really should be referencing the syncache structure * instead of the label. However, due to some of the complexities associated * with exposing this syncache structure we operate directly on its label * pointer. This should be OK since we aren't making any access control * decisions within this code directly, we are merely allocating and copying * label storage so we can properly initialize mbuf labels for any packets * the syncache code might create. */ void mac_syncache_destroy(struct label **label) { if (*label != NULL) { MAC_POLICY_PERFORM_NOSLEEP(syncache_destroy_label, *label); mac_labelzone_free(*label); *label = NULL; } } int mac_syncache_init(struct label **label) { int error; if (mac_labeled & MPC_OBJECT_SYNCACHE) { *label = mac_labelzone_alloc(M_NOWAIT); if (*label == NULL) return (ENOMEM); /* * Since we are holding the inpcb locks the policy can not * allocate policy specific label storage using M_WAITOK. So * we need to do a MAC_CHECK instead of the typical * MAC_PERFORM so we can propagate allocation failures back * to the syncache code. */ MAC_POLICY_CHECK_NOSLEEP(syncache_init_label, *label, M_NOWAIT); if (error) { MAC_POLICY_PERFORM_NOSLEEP(syncache_destroy_label, *label); mac_labelzone_free(*label); } return (error); } else *label = NULL; return (0); } void mac_syncache_create(struct label *label, struct inpcb *inp) { INP_WLOCK_ASSERT(inp); MAC_POLICY_PERFORM_NOSLEEP(syncache_create, label, inp); } void mac_syncache_create_mbuf(struct label *sc_label, struct mbuf *m) { struct label *mlabel; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return; mlabel = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(syncache_create_mbuf, sc_label, m, mlabel); } Index: projects/runtime-coverage-v2/sys/security/mac/mac_internal.h =================================================================== --- projects/runtime-coverage-v2/sys/security/mac/mac_internal.h (revision 347075) +++ projects/runtime-coverage-v2/sys/security/mac/mac_internal.h (revision 347076) @@ -1,520 +1,536 @@ /*- - * Copyright (c) 1999-2002, 2006, 2009 Robert N. M. Watson + * Copyright (c) 1999-2002, 2006, 2009, 2019 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2004 Networks Associates Technology, Inc. * Copyright (c) 2006 nCircle Network Security, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2009 Apple, Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was developed by Robert N. M. Watson for the TrustedBSD * Project under contract to nCircle Network Security, Inc. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SECURITY_MAC_MAC_INTERNAL_H_ #define _SECURITY_MAC_MAC_INTERNAL_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif #include #include /* * MAC Framework sysctl namespace. */ #ifdef SYSCTL_DECL SYSCTL_DECL(_security_mac); #endif /* SYSCTL_DECL */ /* * MAC Framework SDT DTrace probe namespace, macros for declaring entry * point probes, macros for invoking them. */ #ifdef SDT_PROVIDER_DECLARE SDT_PROVIDER_DECLARE(mac); /* MAC Framework-level events. */ SDT_PROVIDER_DECLARE(mac_framework); /* Entry points to MAC. */ #define MAC_CHECK_PROBE_DEFINE4(name, arg0, arg1, arg2, arg3) \ SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__err, \ "int", arg0, arg1, arg2, arg3); \ SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1, arg2, arg3); #define MAC_CHECK_PROBE_DEFINE3(name, arg0, arg1, arg2) \ SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__err, \ "int", arg0, arg1, arg2); \ SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1, arg2); #define MAC_CHECK_PROBE_DEFINE2(name, arg0, arg1) \ SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__err, \ "int", arg0, arg1); \ SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1); #define MAC_CHECK_PROBE_DEFINE1(name, arg0) \ SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__err, \ "int", arg0); \ SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__ok, \ "int", arg0); #define MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, arg3) do { \ if (SDT_PROBES_ENABLED()) { \ if (error) { \ SDT_PROBE5(mac_framework, , name, mac__check__err,\ error, arg0, arg1, arg2, arg3); \ } else { \ SDT_PROBE5(mac_framework, , name, mac__check__ok,\ 0, arg0, arg1, arg2, arg3); \ } \ } \ } while (0) #define MAC_CHECK_PROBE3(name, error, arg0, arg1, arg2) \ MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, 0) #define MAC_CHECK_PROBE2(name, error, arg0, arg1) \ MAC_CHECK_PROBE3(name, error, arg0, arg1, 0) #define MAC_CHECK_PROBE1(name, error, arg0) \ MAC_CHECK_PROBE2(name, error, arg0, 0) #endif #define MAC_GRANT_PROBE_DEFINE2(name, arg0, arg1) \ SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__err, \ "int", arg0, arg1); \ SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__ok, \ "int", arg0, arg1); #define MAC_GRANT_PROBE2(name, error, arg0, arg1) do { \ if (SDT_PROBES_ENABLED()) { \ if (error) { \ SDT_PROBE3(mac_framework, , name, mac__grant__err,\ error, arg0, arg1); \ } else { \ SDT_PROBE3(mac_framework, , name, mac__grant__ok,\ error, arg0, arg1); \ } \ } \ } while (0) /* * MAC Framework global types and typedefs. */ LIST_HEAD(mac_policy_list_head, mac_policy_conf); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MACTEMP); #endif /* * MAC labels -- in-kernel storage format. * * In general, struct label pointers are embedded in kernel data structures * representing objects that may be labeled (and protected). Struct label is * opaque to both kernel services that invoke the MAC Framework and MAC * policy modules. In particular, we do not wish to encode the layout of the * label structure into any ABIs. Historically, the slot array contained * unions of {long, void} but now contains uintptr_t. */ #define MAC_MAX_SLOTS 4 #define MAC_FLAG_INITIALIZED 0x0000001 /* Is initialized for use. */ struct label { int l_flags; intptr_t l_perpolicy[MAC_MAX_SLOTS]; }; /* * Flags for mac_labeled, a bitmask of object types need across the union of * all policies currently registered with the MAC Framework, used to key * whether or not labels are allocated and constructors for the type are * invoked. */ #define MPC_OBJECT_CRED 0x0000000000000001 #define MPC_OBJECT_PROC 0x0000000000000002 #define MPC_OBJECT_VNODE 0x0000000000000004 #define MPC_OBJECT_INPCB 0x0000000000000008 #define MPC_OBJECT_SOCKET 0x0000000000000010 #define MPC_OBJECT_DEVFS 0x0000000000000020 #define MPC_OBJECT_MBUF 0x0000000000000040 #define MPC_OBJECT_IPQ 0x0000000000000080 #define MPC_OBJECT_IFNET 0x0000000000000100 #define MPC_OBJECT_BPFDESC 0x0000000000000200 #define MPC_OBJECT_PIPE 0x0000000000000400 #define MPC_OBJECT_MOUNT 0x0000000000000800 #define MPC_OBJECT_POSIXSEM 0x0000000000001000 #define MPC_OBJECT_POSIXSHM 0x0000000000002000 #define MPC_OBJECT_SYSVMSG 0x0000000000004000 #define MPC_OBJECT_SYSVMSQ 0x0000000000008000 #define MPC_OBJECT_SYSVSEM 0x0000000000010000 #define MPC_OBJECT_SYSVSHM 0x0000000000020000 #define MPC_OBJECT_SYNCACHE 0x0000000000040000 #define MPC_OBJECT_IP6Q 0x0000000000080000 /* * MAC Framework global variables. */ extern struct mac_policy_list_head mac_policy_list; extern struct mac_policy_list_head mac_static_policy_list; extern u_int mac_policy_count; extern uint64_t mac_labeled; extern struct mtx mac_ifnet_mtx; /* * MAC Framework infrastructure functions. */ int mac_error_select(int error1, int error2); void mac_policy_slock_nosleep(struct rm_priotracker *tracker); void mac_policy_slock_sleep(void); void mac_policy_sunlock_nosleep(struct rm_priotracker *tracker); void mac_policy_sunlock_sleep(void); struct label *mac_labelzone_alloc(int flags); void mac_labelzone_free(struct label *label); void mac_labelzone_init(void); void mac_init_label(struct label *label); void mac_destroy_label(struct label *label); int mac_check_structmac_consistent(struct mac *mac); int mac_allocate_slot(void); -#define MAC_IFNET_LOCK(ifp) mtx_lock(&mac_ifnet_mtx) -#define MAC_IFNET_UNLOCK(ifp) mtx_unlock(&mac_ifnet_mtx) +/* + * Lock ifnets to protect labels only if ifnet labels are in use. + */ +#define MAC_IFNET_LOCK(ifp, locked) do { \ + if (mac_labeled & MPC_OBJECT_IFNET) { \ + mtx_lock(&mac_ifnet_mtx); \ + locked = 1; \ + } else { \ + locked = 0; \ + } \ +} while (0) + +#define MAC_IFNET_UNLOCK(ifp, locked) do { \ + if (locked) { \ + mtx_unlock(&mac_ifnet_mtx); \ + locked = 0; \ + } \ +} while (0) /* * MAC Framework per-object type functions. It's not yet clear how the * namespaces, etc, should work for these, so for now, sort by object type. */ struct label *mac_cred_label_alloc(void); void mac_cred_label_free(struct label *label); struct label *mac_pipe_label_alloc(void); void mac_pipe_label_free(struct label *label); struct label *mac_socket_label_alloc(int flag); void mac_socket_label_free(struct label *label); struct label *mac_vnode_label_alloc(void); void mac_vnode_label_free(struct label *label); int mac_cred_check_relabel(struct ucred *cred, struct label *newlabel); int mac_cred_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen); int mac_cred_internalize_label(struct label *label, char *string); void mac_cred_relabel(struct ucred *cred, struct label *newlabel); struct label *mac_mbuf_to_label(struct mbuf *m); void mac_pipe_copy_label(struct label *src, struct label *dest); int mac_pipe_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen); int mac_pipe_internalize_label(struct label *label, char *string); int mac_socket_label_set(struct ucred *cred, struct socket *so, struct label *label); void mac_socket_copy_label(struct label *src, struct label *dest); int mac_socket_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen); int mac_socket_internalize_label(struct label *label, char *string); int mac_vnode_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen); int mac_vnode_internalize_label(struct label *label, char *string); void mac_vnode_check_mmap_downgrade(struct ucred *cred, struct vnode *vp, int *prot); int vn_setlabel(struct vnode *vp, struct label *intlabel, struct ucred *cred); /* * MAC Framework composition macros invoke all registered MAC policies for a * specific entry point. They come in two forms: one which permits policies * to sleep/block, and another that does not. * * MAC_POLICY_CHECK performs the designated check by walking the policy * module list and checking with each as to how it feels about the request. * Note that it returns its value via 'error' in the scope of the caller. */ #define MAC_POLICY_CHECK(check, args...) do { \ struct mac_policy_conf *mpc; \ \ error = 0; \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) \ error = mac_error_select( \ mpc->mpc_ops->mpo_ ## check (args), \ error); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ mac_policy_slock_sleep(); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) \ error = mac_error_select( \ mpc->mpc_ops->mpo_ ## check (args), \ error); \ } \ mac_policy_sunlock_sleep(); \ } \ } while (0) #define MAC_POLICY_CHECK_NOSLEEP(check, args...) do { \ struct mac_policy_conf *mpc; \ \ error = 0; \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) \ error = mac_error_select( \ mpc->mpc_ops->mpo_ ## check (args), \ error); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ struct rm_priotracker tracker; \ \ mac_policy_slock_nosleep(&tracker); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) \ error = mac_error_select( \ mpc->mpc_ops->mpo_ ## check (args), \ error); \ } \ mac_policy_sunlock_nosleep(&tracker); \ } \ } while (0) /* * MAC_POLICY_GRANT performs the designated check by walking the policy * module list and checking with each as to how it feels about the request. * Unlike MAC_POLICY_CHECK, it grants if any policies return '0', and * otherwise returns EPERM. Note that it returns its value via 'error' in * the scope of the caller. */ #define MAC_POLICY_GRANT_NOSLEEP(check, args...) do { \ struct mac_policy_conf *mpc; \ \ error = EPERM; \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) { \ if (mpc->mpc_ops->mpo_ ## check(args) == 0) \ error = 0; \ } \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ struct rm_priotracker tracker; \ \ mac_policy_slock_nosleep(&tracker); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## check != NULL) { \ if (mpc->mpc_ops->mpo_ ## check (args) \ == 0) \ error = 0; \ } \ } \ mac_policy_sunlock_nosleep(&tracker); \ } \ } while (0) /* * MAC_POLICY_BOOLEAN performs the designated boolean composition by walking * the module list, invoking each instance of the operation, and combining * the results using the passed C operator. Note that it returns its value * via 'result' in the scope of the caller, which should be initialized by * the caller in a meaningful way to get a meaningful result. */ #define MAC_POLICY_BOOLEAN(operation, composition, args...) do { \ struct mac_policy_conf *mpc; \ \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ result = result composition \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ mac_policy_slock_sleep(); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ result = result composition \ mpc->mpc_ops->mpo_ ## operation \ (args); \ } \ mac_policy_sunlock_sleep(); \ } \ } while (0) #define MAC_POLICY_BOOLEAN_NOSLEEP(operation, composition, args...) do {\ struct mac_policy_conf *mpc; \ \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ result = result composition \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ struct rm_priotracker tracker; \ \ mac_policy_slock_nosleep(&tracker); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ result = result composition \ mpc->mpc_ops->mpo_ ## operation \ (args); \ } \ mac_policy_sunlock_nosleep(&tracker); \ } \ } while (0) /* * MAC_POLICY_EXTERNALIZE queries each policy to see if it can generate an * externalized version of a label element by name. Policies declare whether * they have matched a particular element name, parsed from the string by * MAC_POLICY_EXTERNALIZE, and an error is returned if any element is matched * by no policy. */ #define MAC_POLICY_EXTERNALIZE(type, label, elementlist, outbuf, \ outbuflen) do { \ int claimed, first, ignorenotfound, savedlen; \ char *element_name, *element_temp; \ struct sbuf sb; \ \ error = 0; \ first = 1; \ sbuf_new(&sb, outbuf, outbuflen, SBUF_FIXEDLEN); \ element_temp = elementlist; \ while ((element_name = strsep(&element_temp, ",")) != NULL) { \ if (element_name[0] == '?') { \ element_name++; \ ignorenotfound = 1; \ } else \ ignorenotfound = 0; \ savedlen = sbuf_len(&sb); \ if (first) \ error = sbuf_printf(&sb, "%s/", element_name); \ else \ error = sbuf_printf(&sb, ",%s/", element_name); \ if (error == -1) { \ error = EINVAL; /* XXX: E2BIG? */ \ break; \ } \ claimed = 0; \ MAC_POLICY_CHECK(type ## _externalize_label, label, \ element_name, &sb, &claimed); \ if (error) \ break; \ if (claimed == 0 && ignorenotfound) { \ /* Revert last label name. */ \ sbuf_setpos(&sb, savedlen); \ } else if (claimed != 1) { \ error = EINVAL; /* XXX: ENOLABEL? */ \ break; \ } else { \ first = 0; \ } \ } \ sbuf_finish(&sb); \ } while (0) /* * MAC_POLICY_INTERNALIZE presents parsed element names and data to each * policy to see if any is willing to claim it and internalize the label * data. If no policies match, an error is returned. */ #define MAC_POLICY_INTERNALIZE(type, label, instring) do { \ char *element, *element_name, *element_data; \ int claimed; \ \ error = 0; \ element = instring; \ while ((element_name = strsep(&element, ",")) != NULL) { \ element_data = element_name; \ element_name = strsep(&element_data, "/"); \ if (element_data == NULL) { \ error = EINVAL; \ break; \ } \ claimed = 0; \ MAC_POLICY_CHECK(type ## _internalize_label, label, \ element_name, element_data, &claimed); \ if (error) \ break; \ if (claimed != 1) { \ /* XXXMAC: Another error here? */ \ error = EINVAL; \ break; \ } \ } \ } while (0) /* * MAC_POLICY_PERFORM performs the designated operation by walking the policy * module list and invoking that operation for each policy. */ #define MAC_POLICY_PERFORM(operation, args...) do { \ struct mac_policy_conf *mpc; \ \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ mac_policy_slock_sleep(); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ mac_policy_sunlock_sleep(); \ } \ } while (0) #define MAC_POLICY_PERFORM_NOSLEEP(operation, args...) do { \ struct mac_policy_conf *mpc; \ \ LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ if (!LIST_EMPTY(&mac_policy_list)) { \ struct rm_priotracker tracker; \ \ mac_policy_slock_nosleep(&tracker); \ LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { \ if (mpc->mpc_ops->mpo_ ## operation != NULL) \ mpc->mpc_ops->mpo_ ## operation (args); \ } \ mac_policy_sunlock_nosleep(&tracker); \ } \ } while (0) #endif /* !_SECURITY_MAC_MAC_INTERNAL_H_ */ Index: projects/runtime-coverage-v2/sys/security/mac/mac_net.c =================================================================== --- projects/runtime-coverage-v2/sys/security/mac/mac_net.c (revision 347075) +++ projects/runtime-coverage-v2/sys/security/mac/mac_net.c (revision 347076) @@ -1,501 +1,508 @@ /*- - * Copyright (c) 1999-2002, 2009 Robert N. M. Watson + * Copyright (c) 1999-2002, 2009, 2019 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2004 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2008 Apple Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * XXXRW: struct ifnet locking is incomplete in the network code, so we use * our own global mutex for struct ifnet. Non-ideal, but should help in the * SMP environment. + * + * This lock is acquired only if a loaded policy is using ifnet labeling. + * This should not ever change during a MAC policy check, itself, but could + * change during setup/return from a check, so we have to condition unlock on + * previous lock. */ struct mtx mac_ifnet_mtx; MTX_SYSINIT(mac_ifnet_mtx, &mac_ifnet_mtx, "mac_ifnet", MTX_DEF); /* * Retrieve the label associated with an mbuf by searching for the tag. * Depending on the value of mac_labelmbufs, it's possible that a label will * not be present, in which case NULL is returned. Policies must handle the * possibility of an mbuf not having label storage if they do not enforce * early loading. */ struct label * mac_mbuf_to_label(struct mbuf *m) { struct m_tag *tag; struct label *label; if (m == NULL) return (NULL); tag = m_tag_find(m, PACKET_TAG_MACLABEL, NULL); if (tag == NULL) return (NULL); label = (struct label *)(tag+1); return (label); } static struct label * mac_bpfdesc_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(bpfdesc_init_label, label); return (label); } void mac_bpfdesc_init(struct bpf_d *d) { if (mac_labeled & MPC_OBJECT_BPFDESC) d->bd_label = mac_bpfdesc_label_alloc(); else d->bd_label = NULL; } static struct label * mac_ifnet_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(ifnet_init_label, label); return (label); } void mac_ifnet_init(struct ifnet *ifp) { if (mac_labeled & MPC_OBJECT_IFNET) ifp->if_label = mac_ifnet_label_alloc(); else ifp->if_label = NULL; } int mac_mbuf_tag_init(struct m_tag *tag, int flag) { struct label *label; int error; label = (struct label *) (tag + 1); mac_init_label(label); if (flag & M_WAITOK) MAC_POLICY_CHECK(mbuf_init_label, label, flag); else MAC_POLICY_CHECK_NOSLEEP(mbuf_init_label, label, flag); if (error) { MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } return (error); } int mac_mbuf_init(struct mbuf *m, int flag) { struct m_tag *tag; int error; M_ASSERTPKTHDR(m); if (mac_labeled & MPC_OBJECT_MBUF) { tag = m_tag_get(PACKET_TAG_MACLABEL, sizeof(struct label), flag); if (tag == NULL) return (ENOMEM); error = mac_mbuf_tag_init(tag, flag); if (error) { m_tag_free(tag); return (error); } m_tag_prepend(m, tag); } return (0); } static void mac_bpfdesc_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_destroy_label, label); mac_labelzone_free(label); } void mac_bpfdesc_destroy(struct bpf_d *d) { if (d->bd_label != NULL) { mac_bpfdesc_label_free(d->bd_label); d->bd_label = NULL; } } static void mac_ifnet_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_destroy_label, label); mac_labelzone_free(label); } void mac_ifnet_destroy(struct ifnet *ifp) { if (ifp->if_label != NULL) { mac_ifnet_label_free(ifp->if_label); ifp->if_label = NULL; } } void mac_mbuf_tag_destroy(struct m_tag *tag) { struct label *label; label = (struct label *)(tag+1); MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } /* * mac_mbuf_tag_copy is called when an mbuf header is duplicated, in which * case the labels must also be duplicated. */ void mac_mbuf_tag_copy(struct m_tag *src, struct m_tag *dest) { struct label *src_label, *dest_label; src_label = (struct label *)(src+1); dest_label = (struct label *)(dest+1); /* * mac_mbuf_tag_init() is called on the target tag in m_tag_copy(), * so we don't need to call it here. */ MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } void mac_mbuf_copy(struct mbuf *m_from, struct mbuf *m_to) { struct label *src_label, *dest_label; if (mac_policy_count == 0) return; src_label = mac_mbuf_to_label(m_from); dest_label = mac_mbuf_to_label(m_to); MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } static void mac_ifnet_copy_label(struct label *src, struct label *dest) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_copy_label, src, dest); } static int mac_ifnet_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen) { int error; MAC_POLICY_EXTERNALIZE(ifnet, label, elements, outbuf, outbuflen); return (error); } static int mac_ifnet_internalize_label(struct label *label, char *string) { int error; MAC_POLICY_INTERNALIZE(ifnet, label, string); return (error); } void mac_ifnet_create(struct ifnet *ifp) { + int locked; if (mac_policy_count == 0) return; - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create, ifp, ifp->if_label); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); } void mac_bpfdesc_create(struct ucred *cred, struct bpf_d *d) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create, cred, d, d->bd_label); } void mac_bpfdesc_create_mbuf(struct bpf_d *d, struct mbuf *m) { struct label *label; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create_mbuf, d, d->bd_label, m, label); } void mac_ifnet_create_mbuf(struct ifnet *ifp, struct mbuf *m) { struct label *label; + int locked; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create_mbuf, ifp, ifp->if_label, m, label); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); } MAC_CHECK_PROBE_DEFINE2(bpfdesc_check_receive, "struct bpf_d *", "struct ifnet *"); int mac_bpfdesc_check_receive(struct bpf_d *d, struct ifnet *ifp) { - int error; + int error, locked; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return (0); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_CHECK_NOSLEEP(bpfdesc_check_receive, d, d->bd_label, ifp, ifp->if_label); MAC_CHECK_PROBE2(bpfdesc_check_receive, error, d, ifp); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); return (error); } MAC_CHECK_PROBE_DEFINE2(ifnet_check_transmit, "struct ifnet *", "struct mbuf *"); int mac_ifnet_check_transmit(struct ifnet *ifp, struct mbuf *m) { struct label *label; - int error; + int error, locked; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return (0); label = mac_mbuf_to_label(m); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_transmit, ifp, ifp->if_label, m, label); MAC_CHECK_PROBE2(ifnet_check_transmit, error, ifp, m); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); return (error); } int mac_ifnet_ioctl_get(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { char *elements, *buffer; struct label *intlabel; struct mac mac; - int error; + int error, locked; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); elements = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, elements, mac.m_buflen, NULL); if (error) { free(elements, M_MACTEMP); return (error); } buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO); intlabel = mac_ifnet_label_alloc(); - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); mac_ifnet_copy_label(ifp->if_label, intlabel); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); error = mac_ifnet_externalize_label(intlabel, elements, buffer, mac.m_buflen); mac_ifnet_label_free(intlabel); if (error == 0) error = copyout(buffer, mac.m_string, strlen(buffer)+1); free(buffer, M_MACTEMP); free(elements, M_MACTEMP); return (error); } int mac_ifnet_ioctl_set(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { struct label *intlabel; struct mac mac; char *buffer; - int error; + int error, locked; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, buffer, mac.m_buflen, NULL); if (error) { free(buffer, M_MACTEMP); return (error); } intlabel = mac_ifnet_label_alloc(); error = mac_ifnet_internalize_label(intlabel, buffer); free(buffer, M_MACTEMP); if (error) { mac_ifnet_label_free(intlabel); return (error); } /* * XXX: Note that this is a redundant privilege check, since policies * impose this check themselves if required by the policy * Eventually, this should go away. */ error = priv_check_cred(cred, PRIV_NET_SETIFMAC); if (error) { mac_ifnet_label_free(intlabel); return (error); } - MAC_IFNET_LOCK(ifp); + MAC_IFNET_LOCK(ifp, locked); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_relabel, cred, ifp, ifp->if_label, intlabel); if (error) { - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); mac_ifnet_label_free(intlabel); return (error); } MAC_POLICY_PERFORM_NOSLEEP(ifnet_relabel, cred, ifp, ifp->if_label, intlabel); - MAC_IFNET_UNLOCK(ifp); + MAC_IFNET_UNLOCK(ifp, locked); mac_ifnet_label_free(intlabel); return (0); } Index: projects/runtime-coverage-v2/sys/sparc64/conf/GENERIC =================================================================== --- projects/runtime-coverage-v2/sys/sparc64/conf/GENERIC (revision 347075) +++ projects/runtime-coverage-v2/sys/sparc64/conf/GENERIC (revision 347076) @@ -1,261 +1,262 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/sparc64 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu SUN4U ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols # Platforms supported # At this time all platforms are supported, as-is. options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_HHOOK # hhook(9) framework for TCP options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL #options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options COMPAT_FREEBSD11 # Compatible with FreeBSD11 +options COMPAT_FREEBSD12 # Compatible with FreeBSD12 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options INCLUDE_CONFIG_FILE # Include this file in kernel options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel # Standard busses device ebus device isa device pci device sbus device central device fhc # Floppy drives #device fdc # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices options AHC_ALLOW_MEMIO # Attempt to use memory mapped I/O device esp # AMD Am53C974, Sun ESP and FAS families device isp # Qlogic family device ispfw # Firmware module for Qlogic host adapters device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device mpr # LSI-Logic MPT-Fusion 3 device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) #device ctl # CAM Target Layer # RAID controllers #device amr # AMI MegaRAID #device mlx # Mylex DAC960 family # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer # syscons is the default console driver, resembling an SCO console device sc device creator # Creator, Creator3D and Elite3D framebuffers device machfb # ATI Mach64 framebuffers device splash # Splash screen and screen saver support options KBD_INSTALL_CDEV # install a CDEV entry in /dev # vt is the new video console driver #device vt # Builtin hardware device auxio # auxiliary I/O device device eeprom # eeprom (really a front-end for the MK48Txx) device mk48txx # Mostek MK48Txx clocks device rtc # rtc (really a front-end for the MC146818) device mc146818 # Motorola MC146818 and compatible clocks device epic # Sun Fire V215/V245 LEDs device sbbc # Sun BootBus controller (time-of-day clock for # Serengeti and StarCat, console for Serengeti, # requires device uart) # Serial (COM) ports device puc # Multi-channel uarts device scc # Serial communications controllers. device uart # Multi-uart driver # Parallel port #device ppc #device ppbus # Parallel port bus (required) #device lpt # Printer #device ppi # Parallel port interface device #device vpo # Requires scbus and da device iflib # PCI Ethernet NICs. #device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel PRO/1000 adapter Gigabit Ethernet Card device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') #device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support #device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device fxp # Intel EtherExpress PRO/100B (82557, 82558) device gem # Sun GEM/Sun ERI/Apple GMAC device hme # Sun HME (Happy Meal Ethernet) device nge # NatSemi DP83820 gigabit Ethernet #device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet #device tl # Texas Instruments ThunderLAN #device tx # SMC EtherPower II (83c170 ``EPIC'') device vr # VIA Rhine, Rhine II #device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s D3.0 support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device ath # Atheros NICs device ath_pci # Atheros pci/cardbus glue device ath_hal # Atheros HAL (Hardware Access Layer) device ath_rate_sample # SampleRate tx rate control for ath # Pseudo devices. device crypto # core crypto support device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # Sound support device sound # Generic sound driver (required) device snd_audiocs # Crystal Semiconductor CS4231 device snd_es137x # Ensoniq AudioPCI ES137x device snd_t4dwave # Acer Labs M5451 Index: projects/runtime-coverage-v2/sys/ufs/ufs/dir.h =================================================================== --- projects/runtime-coverage-v2/sys/ufs/ufs/dir.h (revision 347075) +++ projects/runtime-coverage-v2/sys/ufs/ufs/dir.h (revision 347076) @@ -1,158 +1,156 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dir.h 8.2 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _UFS_UFS_DIR_H_ #define _UFS_UFS_DIR_H_ /* * Theoretically, directories can be more than 2Gb in length, however, in * practice this seems unlikely. So, we define the type doff_t as a 32-bit * quantity to keep down the cost of doing lookup on a 32-bit machine. */ #define doff_t int32_t #define MAXDIRSIZE (0x7fffffff) /* * A directory consists of some number of blocks of DIRBLKSIZ * bytes, where DIRBLKSIZ is chosen such that it can be transferred * to disk in a single atomic operation (e.g. 512 bytes on most machines). * * Each DIRBLKSIZ byte block contains some number of directory entry * structures, which are of variable length. Each directory entry has * a struct direct at the front of it, containing its inode number, * the length of the entry, and the length of the name contained in * the entry. These are followed by the name padded to a 4 byte boundary * with null bytes. All names are guaranteed null terminated. * The maximum length of a name in a directory is UFS_MAXNAMLEN. * * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent * a directory entry. Free space in a directory is represented by * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes * in a directory block are claimed by the directory entries. This * usually results in the last entry in a directory having a large * dp->d_reclen. When entries are deleted from a directory, the * space is returned to the previous entry in the same directory * block by increasing its dp->d_reclen. If the first entry of * a directory block is free, then its dp->d_ino is set to 0. * Entries other than the first in a directory do not normally have * dp->d_ino set to 0. */ #define DIRBLKSIZ DEV_BSIZE #define UFS_MAXNAMLEN 255 struct direct { u_int32_t d_ino; /* inode number of entry */ u_int16_t d_reclen; /* length of this record */ u_int8_t d_type; /* file type, see below */ u_int8_t d_namlen; /* length of string in d_name */ char d_name[UFS_MAXNAMLEN + 1]; /* name with length <= UFS_MAXNAMLEN */ }; /* * File types */ #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 /* * Convert between stat structure types and directory types. */ #define IFTODT(mode) (((mode) & 0170000) >> 12) #define DTTOIF(dirtype) ((dirtype) << 12) /* * The DIRSIZ macro gives the minimum record length which will hold * the directory entry. This requires the amount of space in struct direct * without the d_name field, plus enough space for the name with a terminating - * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. - * - * + * null byte (dp->d_namlen + 1), rounded up to a 4 byte boundary. */ -#define DIRECTSIZ(namlen) \ - ((__offsetof(struct direct, d_name) + \ - ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3) +#define DIR_ROUNDUP 4 /* Directory name roundup size */ +#define DIRECTSIZ(namlen) \ + (roundup2(__offsetof(struct direct, d_name) + (namlen) + 1, DIR_ROUNDUP)) #if (BYTE_ORDER == LITTLE_ENDIAN) #define DIRSIZ(oldfmt, dp) \ ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen)) #else #define DIRSIZ(oldfmt, dp) \ DIRECTSIZ((dp)->d_namlen) #endif #define OLDDIRFMT 1 #define NEWDIRFMT 0 /* * Template for manipulating directories. Should use struct direct's, * but the name field is UFS_MAXNAMLEN - 1, and this just won't do. */ struct dirtemplate { u_int32_t dot_ino; int16_t dot_reclen; u_int8_t dot_type; u_int8_t dot_namlen; char dot_name[4]; /* must be multiple of 4 */ u_int32_t dotdot_ino; int16_t dotdot_reclen; u_int8_t dotdot_type; u_int8_t dotdot_namlen; char dotdot_name[4]; /* ditto */ }; /* * This is the old format of directories, sanz type element. */ struct odirtemplate { u_int32_t dot_ino; int16_t dot_reclen; u_int16_t dot_namlen; char dot_name[4]; /* must be multiple of 4 */ u_int32_t dotdot_ino; int16_t dotdot_reclen; u_int16_t dotdot_namlen; char dotdot_name[4]; /* ditto */ }; #endif /* !_DIR_H_ */ Index: projects/runtime-coverage-v2/sys/ufs/ufs/ufs_bmap.c =================================================================== --- projects/runtime-coverage-v2/sys/ufs/ufs/ufs_bmap.c (revision 347075) +++ projects/runtime-coverage-v2/sys/ufs/ufs/ufs_bmap.c (revision 347076) @@ -1,387 +1,515 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +static ufs_lbn_t lbn_count(struct ufsmount *, int); +static int readindir(struct vnode *, ufs_lbn_t, ufs2_daddr_t, struct buf **); + /* * Bmap converts the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block * number to index into the array of block pointers described by the dinode. */ int ufs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { ufs2_daddr_t blkno; int error; /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ if (ap->a_bop != NULL) *ap->a_bop = &VFSTOUFS(ap->a_vp->v_mount)->um_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL, ap->a_runp, ap->a_runb); *ap->a_bnp = blkno; return (error); } +static int +readindir(vp, lbn, daddr, bpp) + struct vnode *vp; + ufs_lbn_t lbn; + ufs2_daddr_t daddr; + struct buf **bpp; +{ + struct buf *bp; + struct mount *mp; + struct ufsmount *ump; + int error; + + mp = vp->v_mount; + ump = VFSTOUFS(mp); + + bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0); + if ((bp->b_flags & B_CACHE) == 0) { + KASSERT(daddr != 0, + ("readindir: indirect block not in cache")); + + bp->b_blkno = blkptrtodb(ump, daddr); + bp->b_iocmd = BIO_READ; + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(curproc); + racct_add_buf(curproc, bp, 0); + PROC_UNLOCK(curproc); + } +#endif + curthread->td_ru.ru_inblock++; + error = bufwait(bp); + if (error != 0) { + brelse(bp); + return (error); + } + } + *bpp = bp; + return (0); +} + /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(vp, bn, bnp, nbp, runp, runb) struct vnode *vp; ufs2_daddr_t bn; ufs2_daddr_t *bnp; struct buf *nbp; int *runp; int *runb; { struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct indir a[UFS_NIADDR+1], *ap; ufs2_daddr_t daddr; ufs_lbn_t metalbn; int error, num, maxrun = 0; int *nump; ap = NULL; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); if (runp) { maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; *runp = 0; } if (runb) { *runb = 0; } ap = a; nump = # error = ufs_getlbns(vp, bn, ap, nump); if (error) return (error); num = *nump; if (num == 0) { if (bn >= 0 && bn < UFS_NDADDR) { *bnp = blkptrtodb(ump, DIP(ip, i_db[bn])); } else if (bn < 0 && bn >= -UFS_NXADDR) { *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]); if (*bnp == 0) *bnp = -1; if (nbp == NULL) panic("ufs_bmaparray: mapping ext data"); nbp->b_xflags |= BX_ALTDATA; return (0); } else { panic("ufs_bmaparray: blkno out of range"); } /* * Since this is FFS independent code, we are out of * scope for the definitions of BLK_NOCOPY and * BLK_SNAP, but we do know that they will fall in * the range 1..um_seqinc, so we use that test and * return a request for a zeroed out buffer if attempts * are made to read a BLK_NOCOPY or BLK_SNAP block. */ if ((ip->i_flags & SF_SNAPSHOT) && DIP(ip, i_db[bn]) > 0 && DIP(ip, i_db[bn]) < ump->um_seqinc) { *bnp = -1; } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else *bnp = -1; } else if (runp) { ufs2_daddr_t bnb = bn; for (++bn; bn < UFS_NDADDR && *runp < maxrun && is_sequential(ump, DIP(ip, i_db[bn - 1]), DIP(ip, i_db[bn])); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, DIP(ip, i_db[bn]), DIP(ip, i_db[bn+1])); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = DIP(ip, i_ib[ap->in_off]); for (bp = NULL, ++ap; --num; ++ap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = ap->in_lbn; if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); + error = readindir(vp, metalbn, daddr, &bp); + if (error != 0) + return (error); - bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0); - if ((bp->b_flags & B_CACHE) == 0) { -#ifdef INVARIANTS - if (!daddr) - panic("ufs_bmaparray: indirect block not in cache"); -#endif - bp->b_blkno = blkptrtodb(ump, daddr); - bp->b_iocmd = BIO_READ; - bp->b_flags &= ~B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - vfs_busy_pages(bp, 0); - bp->b_iooffset = dbtob(bp->b_blkno); - bstrategy(bp); -#ifdef RACCT - if (racct_enable) { - PROC_LOCK(curproc); - racct_add_buf(curproc, bp, 0); - PROC_UNLOCK(curproc); - } -#endif /* RACCT */ - curthread->td_ru.ru_inblock++; - error = bufwait(bp); - if (error) { - brelse(bp); - return (error); - } - } - if (I_IS_UFS1(ip)) { daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off]; if (num == 1 && daddr && runp) { for (bn = ap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ufs1_daddr_t *)bp->b_data)[bn - 1], ((ufs1_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = ap->in_off; if (runb && bn) { for (--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((ufs1_daddr_t *)bp->b_data)[bn], ((ufs1_daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } continue; } daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off]; if (num == 1 && daddr && runp) { for (bn = ap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ufs2_daddr_t *)bp->b_data)[bn - 1], ((ufs2_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = ap->in_off; if (runb && bn) { for (--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((ufs2_daddr_t *)bp->b_data)[bn], ((ufs2_daddr_t *)bp->b_data)[bn + 1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); /* * Since this is FFS independent code, we are out of scope for the * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they * will fall in the range 1..um_seqinc, so we use that test and * return a request for a zeroed out buffer if attempts are made * to read a BLK_NOCOPY or BLK_SNAP block. */ if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ *bnp = -1; return (0); } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else *bnp = -1; } return (0); +} + +static ufs_lbn_t +lbn_count(ump, level) + struct ufsmount *ump; + int level; +{ + ufs_lbn_t blockcnt; + + for (blockcnt = 1; level > 0; level--) + blockcnt *= MNINDIR(ump); + return (blockcnt); +} + +int +ufs_bmap_seekdata(vp, offp) + struct vnode *vp; + off_t *offp; +{ + struct buf *bp; + struct indir a[UFS_NIADDR + 1], *ap; + struct inode *ip; + struct mount *mp; + struct ufsmount *ump; + ufs2_daddr_t bn, daddr, nextbn; + uint64_t bsize; + off_t numblks; + int error, num, num1, off; + + bp = NULL; + ip = VTOI(vp); + mp = vp->v_mount; + ump = VFSTOUFS(mp); + + if (vp->v_type != VREG || (ip->i_flags & SF_SNAPSHOT) != 0) + return (EINVAL); + if (*offp < 0 || *offp >= ip->i_size) + return (ENXIO); + + bsize = mp->mnt_stat.f_iosize; + for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize); + bn < numblks; bn = nextbn) { + if (bn < UFS_NDADDR) { + daddr = DIP(ip, i_db[bn]); + if (daddr != 0) + break; + nextbn = bn + 1; + continue; + } + + ap = a; + error = ufs_getlbns(vp, bn, ap, &num); + if (error != 0) + break; + MPASS(num >= 2); + daddr = DIP(ip, i_ib[ap->in_off]); + ap++, num--; + for (nextbn = UFS_NDADDR, num1 = num - 1; num1 > 0; num1--) + nextbn += lbn_count(ump, num1); + if (daddr == 0) { + nextbn += lbn_count(ump, num); + continue; + } + + for (; daddr != 0 && num > 0; ap++, num--) { + if (bp != NULL) + bqrelse(bp); + error = readindir(vp, ap->in_lbn, daddr, &bp); + if (error != 0) + return (error); + + /* + * Scan the indirect block until we find a non-zero + * pointer. + */ + off = ap->in_off; + do { + daddr = I_IS_UFS1(ip) ? + ((ufs1_daddr_t *)bp->b_data)[off] : + ((ufs2_daddr_t *)bp->b_data)[off]; + } while (daddr == 0 && ++off < MNINDIR(ump)); + nextbn += off * lbn_count(ump, num - 1); + + /* + * We need to recompute the LBNs of indirect + * blocks, so restart with the updated block offset. + */ + if (off != ap->in_off) + break; + } + if (num == 0) { + /* + * We found a data block. + */ + bn = nextbn; + break; + } + } + if (bp != NULL) + bqrelse(bp); + if (bn >= numblks) + error = ENXIO; + if (error == 0 && *offp < bn * bsize) + *offp = bn * bsize; + return (error); } /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ int ufs_getlbns(vp, bn, ap, nump) struct vnode *vp; ufs2_daddr_t bn; struct indir *ap; int *nump; { ufs2_daddr_t blockcnt; ufs_lbn_t metalbn, realbn; struct ufsmount *ump; int i, numlevels, off; ump = VFSTOUFS(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if (bn < 0) bn = -bn; /* The first UFS_NDADDR blocks are direct blocks. */ if (bn < UFS_NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and UFS_NIADDR - i is the * number of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = UFS_NIADDR, bn -= UFS_NDADDR; ; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); blockcnt *= MNINDIR(ump); if (bn < blockcnt) break; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + UFS_NIADDR - i); else metalbn = -(-realbn - bn + UFS_NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = UFS_NIADDR - i; ap++; for (++numlevels; i <= UFS_NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; blockcnt /= MNINDIR(ump); off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ++ap; metalbn -= -1 + off * blockcnt; } if (nump) *nump = numlevels; return (0); } Index: projects/runtime-coverage-v2/sys/ufs/ufs/ufs_extern.h =================================================================== --- projects/runtime-coverage-v2/sys/ufs/ufs/ufs_extern.h (revision 347075) +++ projects/runtime-coverage-v2/sys/ufs/ufs/ufs_extern.h (revision 347076) @@ -1,129 +1,130 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 * $FreeBSD$ */ #ifndef _UFS_UFS_EXTERN_H_ #define _UFS_UFS_EXTERN_H_ struct componentname; struct direct; struct indir; struct inode; struct mount; struct thread; struct sockaddr; struct ucred; struct ufid; struct vfsconf; struct vnode; struct vop_bmap_args; struct vop_cachedlookup_args; struct vop_generic_args; struct vop_inactive_args; struct vop_reclaim_args; extern struct vop_vector ufs_fifoops; extern struct vop_vector ufs_vnodeops; int ufs_bmap(struct vop_bmap_args *); int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, struct buf *, int *, int *); +int ufs_bmap_seekdata(struct vnode *, off_t *); int ufs_fhtovp(struct mount *, struct ufid *, int, struct vnode **); int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *); void ufs_dirbad(struct inode *, doff_t, char *); int ufs_dirbadentry(struct vnode *, struct direct *, int); int ufs_dirempty(struct inode *, ino_t, struct ucred *); int ufs_extread(struct vop_read_args *); int ufs_extwrite(struct vop_write_args *); void ufs_makedirentry(struct inode *, struct componentname *, struct direct *); int ufs_direnter(struct vnode *, struct vnode *, struct direct *, struct componentname *, struct buf *, int); int ufs_dirremove(struct vnode *, struct inode *, int, int); int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int); int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *, ino_t *); int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *); int ufs_inactive(struct vop_inactive_args *); int ufs_init(struct vfsconf *); void ufs_itimes(struct vnode *vp); int ufs_lookup(struct vop_cachedlookup_args *); void ufs_prepare_reclaim(struct vnode *vp); int ufs_readdir(struct vop_readdir_args *); int ufs_reclaim(struct vop_reclaim_args *); void ffs_snapgone(struct inode *); vfs_root_t ufs_root; int ufs_uninit(struct vfsconf *); int ufs_vinit(struct mount *, struct vop_vector *, struct vnode **); #include SYSCTL_DECL(_vfs_ufs); /* * Soft update function prototypes. */ int softdep_setup_directory_add(struct buf *, struct inode *, off_t, ino_t, struct buf *, int); void softdep_change_directoryentry_offset(struct buf *, struct inode *, caddr_t, caddr_t, caddr_t, int); void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int); void softdep_setup_directory_change(struct buf *, struct inode *, struct inode *, ino_t, int); void softdep_change_linkcnt(struct inode *); int softdep_slowdown(struct vnode *); void softdep_setup_create(struct inode *, struct inode *); void softdep_setup_dotdot_link(struct inode *, struct inode *); void softdep_setup_link(struct inode *, struct inode *); void softdep_setup_mkdir(struct inode *, struct inode *); void softdep_setup_rmdir(struct inode *, struct inode *); void softdep_setup_unlink(struct inode *, struct inode *); void softdep_revert_create(struct inode *, struct inode *); void softdep_revert_link(struct inode *, struct inode *); void softdep_revert_mkdir(struct inode *, struct inode *); void softdep_revert_rmdir(struct inode *, struct inode *); /* * Flags to low-level allocation routines. The low 16-bits are reserved * for IO_ flags from vnode.h. * * Note: The general vfs code typically limits the sequential heuristic * count to 127. See sequential_heuristic() in kern/vfs_vnops.c */ #define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */ #define BA_METAONLY 0x00020000 /* Return indirect block buffer. */ #define BA_UNMAPPED 0x00040000 /* Do not mmap resulted buffer. */ #define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */ #define BA_SEQSHIFT 24 #define BA_SEQMAX 0x7F #endif /* !_UFS_UFS_EXTERN_H_ */ Index: projects/runtime-coverage-v2/sys/ufs/ufs/ufs_lookup.c =================================================================== --- projects/runtime-coverage-v2/sys/ufs/ufs/ufs_lookup.c (revision 347075) +++ projects/runtime-coverage-v2/sys/ufs/ufs/ufs_lookup.c (revision 347076) @@ -1,1488 +1,1495 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ufs.h" #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #include #include #ifdef DIAGNOSTIC static int dirchk = 1; #else static int dirchk = 0; #endif SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) static int ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred, struct thread *td) { int error; #ifdef UFS_ACL /* * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt * * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD */ /* * XXX: Is this check required? */ error = VOP_ACCESS(vdp, VEXEC, cred, td); if (error) return (error); error = VOP_ACCESSX(tdp, VDELETE, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred, td); if (error) return (error); #endif /* !UFS_ACL */ /* * Standard Unix access control - delete access requires VWRITE. */ error = VOP_ACCESS(vdp, VWRITE, cred, td); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((VTOI(vdp)->i_mode & ISVTX) && VOP_ACCESS(vdp, VADMIN, cred, td) && VOP_ACCESS(tdp, VADMIN, cred, td)) return (EPERM); return (0); } /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the filesystem is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * This routine is actually used as VOP_CACHEDLOOKUP method, and the * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP * method. * * vfs_cache_lookup() performs the following for us: * check that it is a directory * check accessibility of directory * check for modification attempts on read-only mounts * if name found in cache * if at end of path and deleting or creating * drop it * else * return name. * return VOP_CACHEDLOOKUP() * * Overall outline of ufs_lookup: * * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(ap) struct vop_cachedlookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } int ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, ino_t *dd_ino) { struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ doff_t i_diroff; /* cached i_diroff value. */ doff_t i_offset; /* cached i_offset value. */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int namlen, error; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; ino_t ino, ino1; int ltype; if (vpp != NULL) *vpp = NULL; dp = VTOI(vdp); if (dp->i_effnlink == 0) return (ENOENT); /* * Create a vm object if vmiodirenable is enabled. * Alternatively we could call vnode_create_vobject * in VFS_VGET but we could end up creating objects * that are never used. */ vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread); bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; #ifdef DEBUG_VFS_LOCKS /* * Assert that the directory vnode is locked, and locked * exclusively for the last component lookup for modifying * operations. * * The directory-modifying operations need to save * intermediate state in the inode between namei() call and * actual directory manipulations. See fields in the struct * inode marked as 'used during directory lookup'. We must * ensure that upgrade in namei() does not happen, since * upgrade might need to unlock vdp. If quotas are enabled, * getinoquota() also requires exclusive lock to modify inode. */ ASSERT_VOP_LOCKED(vdp, "ufs_lookup1"); if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) && (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN)) ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2"); #endif restart: bp = NULL; slotoffset = -1; /* * We now have a segment name to search for, and a directory to search. * * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ ino = 0; i_diroff = dp->i_diroff; slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = DIRECTSIZ(cnp->cn_namelen); } #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = dp->i_size; if (slotstatus != FOUND) { slotoffset = ufsdirhash_findfree(dp, slotneeded, &slotsize); if (slotoffset >= 0) { slotstatus = COMPACT; enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) enduseful = dp->i_size; } } /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + (i_offset & bmask)); goto foundentry; case ENOENT: i_offset = roundup2(dp->i_size, DIRBLKSIZ); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) { entryoffsetinblock = 0; i_offset = 0; numdirpasses = 1; } else { i_offset = i_diroff; if ((entryoffsetinblock = i_offset & bmask) && (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = i_offset; endsearch = roundup2(dp->i_size, DIRBLKSIZ); enduseful = 0; searchloop: while (i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(OFSFMT(vdp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vdp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && (cnp->cn_nameptr[0] == ep->d_name[0]) && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ if (vdp->v_mount->mnt_maxsymlinklen > 0 && ep->d_type == DT_WHT) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; enduseful = dp->i_size; cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } ino = ep->d_ino; goto found; } } prevoff = i_offset; i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = i_offset; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; i_offset = 0; endsearch = i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && (cnp->cn_flags & DOWHITEOUT) && (cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. * * XXX: Fix the comment above. */ if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else if (nameiop == DELETE) { dp->i_offset = slotoffset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(vdp, NULL, cnp); return (ENOENT); found: if (dd_ino != NULL) *dd_ino = ino; if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { ufs_dirbad(dp, i_offset, "i_size too small"); dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep); DIP_SET(dp, i_size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. */ if (nameiop == DELETE && (flags & ISLASTCN)) { if (flags & LOCKPARENT) ASSERT_VOP_ELOCKED(vdp, __FUNCTION__); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). * * Technically we shouldn't be setting these in the * WANTPARENT case (first lookup in rename()), but any * lookups that will result in directory changes will * overwrite these. */ dp->i_offset = i_offset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dd_ino != NULL) return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } if (dp->i_number == ino) { VREF(vdp); *vpp = vdp; vput(tdp); return (0); } *vpp = tdp; return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && (flags & ISLASTCN)) { if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ dp->i_offset = i_offset; if (dp->i_number == ino) return (EISDIR); if (dd_ino != NULL) return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #ifdef SunOS_doesnt_do_that /* * The only purpose of this check is to return the correct * error. Assume that we want to rename directory "a" * to a file "b", and that we have no ACL_WRITE_DATA on * a containing directory, but we _do_ have ACL_APPEND_DATA. * In that case, the VOP_ACCESS check above will return 0, * and the operation will fail with ENOTDIR instead * of EACCESS. */ if (tdp->v_type == VDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #endif *vpp = tdp; cnp->cn_flags |= SAVENAME; return (0); } if (dd_ino != NULL) return (0); /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the filesystem has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp); if (error) return (error); /* * Recheck that ".." entry in the vdp directory points * to the inode we looked up before vdp lock was * dropped. */ error = ufs_lookup_ino(pdp, NULL, cnp, &ino1); if (error) { vput(tdp); return (error); } if (ino1 != ino) { vput(tdp); goto restart; } *vpp = tdp; } else if (dp->i_number == ino) { VREF(vdp); /* we want ourself, ie "." */ /* * When we lookup "." we still can be asked to lock it * differently. */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(vdp)) { if (ltype == LK_EXCLUSIVE) vn_lock(vdp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(vdp, LK_DOWNGRADE | LK_RETRY); /* * Relock for the "." case may left us with * reclaimed vnode. */ if (vdp->v_iflag & VI_DOOMED) { vrele(vdp); return (ENOENT); } } *vpp = vdp; } else { error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp); if (error) return (error); *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } void ufs_dirbad(ip, offset, how) struct inode *ip; doff_t offset; char *how; { struct mount *mp; mp = ITOV(ip)->v_mount; if ((mp->mnt_flag & MNT_RDONLY) == 0) panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); else (void)printf("%s: bad dir ino %ju at offset %ld: %s\n", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than UFS_MAXNAMLEN * name must be as long as advertised, and null terminated */ int ufs_dirbadentry(dp, ep, entryoffsetinblock) struct vnode *dp; struct direct *ep; int entryoffsetinblock; { int i, namlen; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > UFS_MAXNAMLEN) { /*return (1); */ printf("First bad\n"); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); } /* * Construct a new directory entry after a call to namei, using the * parameters that it left in the componentname argument cnp. The * argument ip is the inode to which the new directory entry will refer. */ void ufs_makedirentry(ip, cnp, newdirp) struct inode *ip; struct componentname *cnp; struct direct *newdirp; { + u_int namelen; -#ifdef INVARIANTS - if ((cnp->cn_flags & SAVENAME) == 0) - panic("ufs_makedirentry: missing name"); -#endif + namelen = (unsigned)cnp->cn_namelen; + KASSERT((cnp->cn_flags & SAVENAME) != 0, + ("ufs_makedirentry: missing name")); + KASSERT(namelen <= UFS_MAXNAMLEN, + ("ufs_makedirentry: name too long")); newdirp->d_ino = ip->i_number; - newdirp->d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); + newdirp->d_namlen = namelen; + + /* Zero out after-name padding */ + *(u_int32_t *)(&newdirp->d_name[namelen & ~(DIR_ROUNDUP - 1)]) = 0; + + bcopy(cnp->cn_nameptr, newdirp->d_name, namelen); + if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) newdirp->d_type = IFTODT(ip->i_mode); else { newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) { u_char tmp = newdirp->d_namlen; newdirp->d_namlen = newdirp->d_type; newdirp->d_type = tmp; } # endif } } /* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) struct vnode *dvp; struct vnode *tvp; struct direct *dirp; struct componentname *cnp; struct buf *newdirbp; int isrename; { struct ucred *cr; struct thread *td; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; u_int64_t old_isize; int error, ret, blkoff, loc, spacefree, flags, namlen; char *dirbuf; td = curthread; /* XXX */ cr = td->td_ucred; dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = BA_CLRBUF; if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) flags |= IO_SYNC; #ifdef QUOTA if ((error = getinoquota(dp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } #endif old_isize = dp->i_size; vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ); if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); vnode_pager_setsize(dvp, (u_long)old_isize); return (error); } dp->i_size = dp->i_offset + DIRBLKSIZ; DIP_SET(dp, i_size, dp->i_size); dp->i_endoff = dp->i_size; dp->i_flag |= IN_CHANGE | IN_UPDATE; dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, dp->i_offset); ufsdirhash_add(dp, dirp, dp->i_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, dp->i_offset); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, dirp->d_ino, newdirbp, 1)) dp->i_flag |= IN_NEEDSYNC; if (newdirbp) bdwrite(newdirbp); bdwrite(bp); if ((dp->i_flag & IN_NEEDSYNC) == 0) return (UFS_UPDATE(dvp, 0)); /* * We have just allocated a directory block in an * indirect block. We must prevent holes in the * directory created if directory entries are * written out of order. To accomplish this we * fsync when we extend a directory into indirects. * During rename it's not safe to drop the tvp lock * so sync must be delayed until it is. * * This synchronous step could be removed if fsck and * the kernel were taught to fill in sparse * directories rather than panic. */ if (isrename) return (0); if (tvp != NULL) VOP_UNLOCK(tvp, 0); (void) VOP_FSYNC(dvp, MNT_WAIT, td); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); return (error); } if (DOINGASYNC(dvp)) { bdwrite(bp); return (UFS_UPDATE(dvp, 0)); } error = bwrite(bp); ret = UFS_UPDATE(dvp, 1); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > dp->i_size) { dp->i_size = dp->i_offset + dp->i_count; DIP_SET(dp, i_size, dp->i_size); } /* * Get the block containing the space for the new directory entry. */ error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); if (error) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the bcopy() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't bcopy it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, dp->i_offset + ((char *)nep - dirbuf), dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(bp, dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dvp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (ep->d_ino == 0 || (ep->d_ino == UFS_WINO && namlen == dirp->d_namlen && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); #endif bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (dp->i_offset & (DIRBLKSIZ - 1)), rounddown2(dp->i_offset, DIRBLKSIZ)); #endif if (DOINGSOFTDEP(dvp)) { (void) softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); if (newdirbp != NULL) bdwrite(newdirbp); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (isrename == 0 && error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp, 0); error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr); if (error != 0) vn_printf(dvp, "ufs_direnter: failed to truncate, error %d\n", error); #ifdef UFS_DIRHASH if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif error = 0; if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); } return (error); } /* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; struct inode *ip; int flags; int isrmdir; { struct inode *dp; struct direct *ep, *rep; struct buf *bp; int error; dp = VTOI(dvp); /* * Adjust the link count early so softdep can block if necessary. */ if (ip) { ip->i_effnlink--; if (DOINGSOFTDEP(dvp)) { softdep_setup_unlink(dp, ip); } else { ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; } } if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to UFS_WINO. */ if ((error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) return (error); ep->d_ino = UFS_WINO; ep->d_type = DT_WHT; goto out; } if ((error = UFS_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); /* Set 'rep' to the entry being removed. */ if (dp->i_count == 0) rep = ep; else rep = (struct direct *)((char *)ep + ep->d_reclen); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, rep, dp->i_offset); #endif if (ip && rep->d_ino != ip->i_number) panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n", (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino); if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen += rep->d_reclen; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), rounddown2(dp->i_offset, DIRBLKSIZ)); #endif out: error = 0; if (DOINGSOFTDEP(dvp)) { if (ip) softdep_setup_remove(bp, dp, ip, isrmdir); if (softdep_slowdown(dvp)) error = bwrite(bp); else bdwrite(bp); } else { if (flags & DOWHITEOUT) error = bwrite(bp); else if (DOINGASYNC(dvp)) bdwrite(bp); else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0) UFS_SNAPGONE(ip); return (error); } /* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) struct inode *dp, *oip; ino_t newinum; int newtype; int isrmdir; { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; /* * Drop the link before we lock the buf so softdep can block if * necessary. */ oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { softdep_setup_unlink(dp, oip); } else { oip->i_nlink--; DIP_SET(oip, i_nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; } error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); if (ep->d_namlen == 2 && ep->d_name[1] == '.' && ep->d_name[0] == '.' && ep->d_ino != oip->i_number) { brelse(bp); return (EIDRM); } ep->d_ino = newinum; if (!OFSFMT(vdp)) ep->d_type = newtype; if (DOINGSOFTDEP(vdp)) { softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0) UFS_SNAPGONE(oip); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(ip, parentino, cred) struct inode *ip; ino_t parentino; struct ucred *cred; { doff_t off; struct dirtemplate dbuf; struct direct *dp = (struct direct *)&dbuf; int error, namlen; ssize_t count; #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, &count, (struct thread *)0); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) continue; /* accept only "." and ".." */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(ITOV(ip))) namlen = dp->d_type; else namlen = dp->d_namlen; # else namlen = dp->d_namlen; # endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } static int ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino, struct vnode **dd_vp) { struct dirtemplate dirbuf; struct vnode *ddvp; int error, namlen; ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino"); if (vp->v_type != VDIR) return (ENOTDIR); /* * First check to see if we have it in the name cache. */ if ((ddvp = vn_dir_dd_ino(vp)) != NULL) { KASSERT(ddvp->v_mount == vp->v_mount, ("ufs_dir_dd_ino: Unexpected mount point crossing")); *dd_ino = VTOI(ddvp)->i_number; *dd_vp = ddvp; return (0); } /* * Have to read the directory. */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL); if (error != 0) return (error); #if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vp)) namlen = dirbuf.dotdot_type; else namlen = dirbuf.dotdot_namlen; #else namlen = dirbuf.dotdot_namlen; #endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') return (ENOTDIR); *dd_ino = dirbuf.dotdot_ino; *dd_vp = NULL; return (0); } /* * Check if source directory is in the path of the target directory. */ int ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino) { struct mount *mp; struct vnode *tvp, *vp, *vp1; int error; ino_t dd_ino; vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; if (target->i_number == source_ino) return (EEXIST); if (target->i_number == parent_ino) return (0); if (target->i_number == UFS_ROOTINO) return (0); for (;;) { error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1); if (error != 0) break; if (dd_ino == source_ino) { error = EINVAL; break; } if (dd_ino == UFS_ROOTINO) break; if (dd_ino == parent_ino) break; if (vp1 == NULL) { error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1); if (error != 0) { *wait_ino = dd_ino; break; } } KASSERT(dd_ino == VTOI(vp1)->i_number, ("directory %ju reparented\n", (uintmax_t)VTOI(vp1)->i_number)); if (vp != tvp) vput(vp); vp = vp1; } if (error == ENOTDIR) panic("checkpath: .. not a directory\n"); if (vp1 != NULL) vput(vp1); if (vp != tvp) vput(vp); return (error); } Index: projects/runtime-coverage-v2/sys/ufs/ufs/ufs_vnops.c =================================================================== --- projects/runtime-coverage-v2/sys/ufs/ufs/ufs_vnops.c (revision 347075) +++ projects/runtime-coverage-v2/sys/ufs/ufs/ufs_vnops.c (revision 347076) @@ -1,2792 +1,2795 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_suiddir.h" #include "opt_ufs.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #ifdef UFS_GJOURNAL #include FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); #endif #ifdef QUOTA FEATURE(ufs_quota, "UFS disk quotas support"); FEATURE(ufs_quota64, "64bit UFS disk quotas support"); #endif #ifdef SUIDDIR FEATURE(suiddir, "Give all new files in directory the same ownership as the directory"); #endif #include static vop_accessx_t ufs_accessx; static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ufs_close; static vop_create_t ufs_create; static vop_getattr_t ufs_getattr; static vop_ioctl_t ufs_ioctl; static vop_link_t ufs_link; static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *); static vop_markatime_t ufs_markatime; static vop_mkdir_t ufs_mkdir; static vop_mknod_t ufs_mknod; static vop_open_t ufs_open; static vop_pathconf_t ufs_pathconf; static vop_print_t ufs_print; static vop_readlink_t ufs_readlink; static vop_remove_t ufs_remove; static vop_rename_t ufs_rename; static vop_rmdir_t ufs_rmdir; static vop_setattr_t ufs_setattr; static vop_strategy_t ufs_strategy; static vop_symlink_t ufs_symlink; static vop_whiteout_t ufs_whiteout; static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); /* * A virgin directory (no blushing please). */ static struct dirtemplate mastertemplate = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static struct odirtemplate omastertemplate = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static void ufs_itimes_locked(struct vnode *vp) { struct inode *ip; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); ip = VTOI(vp); if (UFS_RDONLY(ip)) goto out; if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) ip->i_flag |= IN_LAZYMOD; else if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) ip->i_flag |= IN_MODIFIED; else if (ip->i_flag & IN_ACCESS) ip->i_flag |= IN_LAZYACCESS; vfs_timestamp(&ts); if (ip->i_flag & IN_ACCESS) { DIP_SET(ip, i_atime, ts.tv_sec); DIP_SET(ip, i_atimensec, ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_SET(ip, i_mtime, ts.tv_sec); DIP_SET(ip, i_mtimensec, ts.tv_nsec); } if (ip->i_flag & IN_CHANGE) { DIP_SET(ip, i_ctime, ts.tv_sec); DIP_SET(ip, i_ctimensec, ts.tv_nsec); DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); } out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void ufs_itimes(struct vnode *vp) { VI_LOCK(vp); ufs_itimes_locked(vp); VI_UNLOCK(vp); } /* * Create a regular file */ static int ufs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); if (error != 0) return (error); if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ static int ufs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; ino_t ino; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); if (error) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ DIP_SET(ip, i_rdev, vap->va_rdev); } /* * Remove inode, then reload it through VFS_VGET so it is * checked to see if it is an alias of an existing entry in * the inode cache. XXX I don't believe this is necessary now. */ (*vpp)->v_type = VNON; ino = ip->i_number; /* Save this before vgone() invalidates ip. */ vgone(*vpp); vput(*vpp); error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); if (error) { *vpp = NULL; return (error); } return (0); } /* * Open called. */ /* ARGSUSED */ static int ufs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); ip = VTOI(vp); /* * Files marked append-only must be opened for appending. */ if ((ip->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ static int ufs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (0); } static int ufs_accessx(ap) struct vop_accessx_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); accmode_t accmode = ap->a_accmode; int error; #ifdef UFS_ACL struct acl *acl; acl_type_t type; #endif /* * Disallow write attempts on read-only filesystems; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VMODIFY_PERMS) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA /* * Inode is accounted in the quotas only if struct * dquot is attached to it. VOP_ACCESS() is called * from vn_open_cred() and provides a convenient * point to call getinoquota(). The lock mode is * exclusive when the file is opening for write. */ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = getinoquota(ip); if (error != 0) return (error); } #endif break; default: break; } } /* * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" * permits the owner of the file to remove the IMMUTABLE flag. */ if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) return (EPERM); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) type = ACL_TYPE_NFS4; else type = ACL_TYPE_ACCESS; acl = acl_alloc(M_WAITOK); if (type == ACL_TYPE_NFS4) error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); else error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); switch (error) { case 0: if (type == ACL_TYPE_NFS4) { error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred, NULL); } else { error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred, NULL); } break; default: if (error != EOPNOTSUPP) printf( "ufs_accessx(): Error retrieving ACL on object (%d).\n", error); /* * XXX: Fall back until debugged. Should * eventually possibly log an error, and return * EPERM for safety. */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred, NULL); } acl_free(acl); return (error); } #endif /* !UFS_ACL */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred, NULL); return (error); } /* ARGSUSED */ static int ufs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct vattr *vap = ap->a_vap; VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { vap->va_atime.tv_sec = ip->i_din1->di_atime; vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; } else { vap->va_atime.tv_sec = ip->i_din2->di_atime; vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); /* * Copy from inode table */ vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; if (I_IS_UFS1(ip)) { vap->va_rdev = ip->i_din1->di_rdev; vap->va_size = ip->i_din1->di_size; vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din1->di_ctime; vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); vap->va_filerev = ip->i_din1->di_modrev; } else { vap->va_rdev = ip->i_din2->di_rdev; vap->va_size = ip->i_din2->di_size; vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din2->di_ctime; vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); vap->va_filerev = ip->i_din2->di_modrev; } vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_type = IFTOVT(ip->i_mode); return (0); } /* * Set attribute vnode op. called from several syscalls */ static int ufs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the PR_ALLOW_CHFLAGS permission bit is set; * otherwise, they behave like unprivileged processes. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* The snapshot flag cannot be toggled. */ if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) return (EPERM); } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) return (EPERM); } ip->i_flags = vap->va_flags; DIP_SET(ip, i_flags, vap->va_flags); ip->i_flag |= IN_CHANGE; error = UFS_UPDATE(vp, 0); if (ip->i_flags & (IMMUTABLE | APPEND)) return (error); } /* * If immutable or append, no one can change any of its attributes * except the ones already handled (in some cases, file flags * including the immutability flags themselves for the superuser). */ if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, td)) != 0) return (error); } if (vap->va_size != VNOVAL) { /* * XXX most of the following special cases should be in * callers instead of in N filesystems. The VDIR check * mostly already is. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: /* * Truncation should have an effect in these cases. * Disallow it if the filesystem is read-only or * the file is being snapshotted. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), cred)) != 0) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); ip->i_flag |= IN_CHANGE | IN_MODIFIED; if (vap->va_atime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_ACCESS; DIP_SET(ip, i_atime, vap->va_atime.tv_sec); DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); } if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_UPDATE; DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); } if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; } error = UFS_UPDATE(vp, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) return (EPERM); error = ufs_chmod(vp, (int)vap->va_mode, cred, td); } return (error); } #ifdef UFS_ACL static int ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, int file_owner_id, struct ucred *cred, struct thread *td) { int error; struct acl *aclp; aclp = acl_alloc(M_WAITOK); error = ufs_getacl_nfs4_internal(vp, aclp, td); /* * We don't have to handle EOPNOTSUPP here, as the filesystem claims * it supports ACLs. */ if (error) goto out; acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); error = ufs_setacl_nfs4_internal(vp, aclp, td); out: acl_free(aclp); return (error); } #endif /* UFS_ACL */ /* * Mark this file's access time for update for vfs_mark_atime(). This * is called from execve() and mmap(). */ static int ufs_markatime(ap) struct vop_markatime_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); VI_LOCK(vp); ip->i_flag |= IN_ACCESS; VI_UNLOCK(vp); /* * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. */ return (0); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(vp, mode, cred, td) struct vnode *vp; int mode; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); int error; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); DIP_SET(ip, i_mode, ip->i_mode); ip->i_flag |= IN_CHANGE; #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); #endif if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) error = UFS_UPDATE(vp, 0); return (error); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(vp, uid, gid, cred, td) struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA int i; ufs2_daddr_t change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != ip->i_uid && uid != cred->cr_uid) || (gid != ip->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = DIP(ip, i_blocks); (void) chkdq(ip, -change, cred, CHOWN); (void) chkiq(ip, -1, cred, CHOWN); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; DIP_SET(ip, i_gid, gid); ip->i_uid = uid; DIP_SET(ip, i_uid, uid); #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; DIP_SET(ip, i_gid, ogid); ip->i_uid = ouid; DIP_SET(ip, i_uid, ouid); if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ ip->i_flag |= IN_CHANGE; if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { ip->i_mode &= ~(ISUID | ISGID); DIP_SET(ip, i_mode, ip->i_mode); } } error = UFS_UPDATE(vp, 0); return (error); } static int ufs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; struct thread *td; td = curthread; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); if (ip->i_nlink <= 0) vp->v_vflag |= VV_NOSYNC; if ((ip->i_flags & SF_SNAPSHOT) != 0) { /* * Avoid deadlock where another thread is trying to * update the inodeblock for dvp and is waiting on * snaplk. Temporary unlock the vnode lock for the * unlinked file and sync the directory. This should * allow vput() of the directory to not block later on * while holding the snapshot vnode locked, assuming * that the directory hasn't been unlinked too. */ VOP_UNLOCK(vp, 0); (void) VOP_FSYNC(dvp, MNT_WAIT, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } out: return (error); } static void print_bad_link_count(const char *funcname, struct vnode *dvp) { struct inode *dip; dip = VTOI(dvp); uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", funcname, dip->i_effnlink, (intmax_t)dip->i_number, dvp->v_mount->mnt_stat.f_mntonname); } /* * link vnode call */ static int ufs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct inode *ip; struct direct newdir; int error; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (VTOI(tdvp)->i_effnlink < 2) { print_bad_link_count("ufs_link", tdvp); error = EINVAL; goto out; } ip = VTOI(vp); if (ip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } /* * The file may have been removed after namei droped the original * lock. */ if (ip->i_effnlink == 0) { error = ENOENT; goto out; } if (ip->i_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } ip->i_effnlink++; ip->i_nlink++; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); if (!error) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0); } if (error) { ip->i_effnlink--; ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) softdep_revert_link(VTOI(tdvp), ip); } out: return (error); } /* * whiteout vnode call */ static int ufs_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = UFS_WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0); break; case DELETE: /* remove an existing directory whiteout */ #ifdef INVARIANTS if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); } return (error); } static volatile int rename_restarts; SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &rename_restarts), 0, "Times rename had to restart due to lock contention"); /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int ufs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; off_t endoff; int doingdirectory, newparent; int error = 0; struct mount *mp; ino_t ino; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif endoff = 0; mp = tdvp->v_mount; VOP_UNLOCK(tdvp, 0); if (tvp && tvp != tdvp) VOP_UNLOCK(tvp, 0); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; mp = NULL; goto releout; } relock: /* * We need to acquire 2 to 4 locks depending on whether tvp is NULL * and fdvp and tdvp are the same directory. Subsequently we need * to double-check all paths and in the directory rename case we * need to verify that we are not creating a directory loop. To * handle this we acquire all but fdvp using non-blocking * acquisitions. If we fail to acquire any lock in the path we will * drop all held locks, acquire the new lock in a blocking fashion, * and then release it and restart the rename. This acquire/release * step ensures that we do not spin on a lock waiting for release. */ error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp, 0); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp, 0); atomic_add_int(&rename_restarts, 1); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); goto releout; } error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp, 0); vrele(fvp); fvp = nvp; atomic_add_int(&rename_restarts, 1); goto relock; } vrele(fvp); fvp = nvp; /* * Re-resolve tvp and acquire the vnode lock if present. */ error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); if (error != 0 && error != EJUSTRETURN) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); VOP_UNLOCK(fvp, 0); goto releout; } /* * If tvp disappeared we just carry on. */ if (error == EJUSTRETURN && tvp != NULL) { vrele(tvp); tvp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (error == 0) { nvp = NULL; error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (tvp) vrele(tvp); tvp = nvp; if (error) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); VOP_UNLOCK(fvp, 0); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } } fdp = VTOI(fdvp); fip = VTOI(fvp); tdp = VTOI(tdvp); tip = NULL; if (tvp) tip = VTOI(tvp); if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto unlockout; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. However, things could change after * we drop the locks above. */ if (fvp == tvp) { error = 0; goto unlockout; } doingdirectory = 0; newparent = 0; ino = fip->i_number; if (fip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto unlockout; } if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdp->i_flags & APPEND)) { error = EPERM; goto unlockout; } if ((fip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || fdp == fip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; goto unlockout; } if (fdp->i_number != tdp->i_number) newparent = tdp->i_number; doingdirectory = 1; } if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || (tvp != NULL && tvp->v_type == VDIR && tvp->v_mountedhere != NULL)) { error = EXDEV; goto unlockout; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". */ if (doingdirectory && newparent) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); if (error) goto unlockout; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* * We encountered a lock that we have to wait for. Unlock * everything else and VGET before restarting. */ if (ino) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(fvp, 0); VOP_UNLOCK(tdvp, 0); if (tvp) VOP_UNLOCK(tvp, 0); error = VFS_VGET(mp, ino, LK_SHARED, &nvp); if (error == 0) vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } if (error) goto unlockout; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); } if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || tdp->i_effnlink == 0) panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ fip->i_effnlink++; fip->i_nlink++; DIP_SET(fip, i_nlink, fip->i_nlink); fip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) softdep_setup_link(tdp, fip); error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); if (error) goto bad; /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (tip == NULL) { if (ITODEV(tdp) != ITODEV(fip)) panic("ufs_rename: EXDEV"); if (doingdirectory && newparent) { /* * Account for ".." in new directory. * When source and destination have the same * parent we don't adjust the link count. The * actual link modification is completed when * .. is rewritten below. */ if (tdp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto bad; } } ufs_makedirentry(fip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1); if (error) goto bad; /* Setup tdvp for directory compaction if needed. */ if (tdp->i_count && tdp->i_endoff && tdp->i_endoff < tdp->i_size) endoff = tdp->i_endoff; } else { if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (tip->i_number == fip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the caller * must possess VADMIN for the parent directory, or the * destination of the rename. This implements append-only * directories. */ if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((tip->i_mode & IFMT) == IFDIR) { if ((tip->i_effnlink > 2) || !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } if (doingdirectory) { if (!newparent) { tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink--; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } error = ufs_dirrewrite(tdp, tip, fip->i_number, IFTODT(fip->i_mode), (doingdirectory && newparent) ? newparent : doingdirectory); if (error) { if (doingdirectory) { if (!newparent) { tdp->i_effnlink++; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink++; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * The only stuff left in the directory is "." * and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ if (!newparent) { tdp->i_nlink--; DIP_SET(tdp, i_nlink, tdp->i_nlink); tdp->i_flag |= IN_CHANGE; } tip->i_nlink--; DIP_SET(tip, i_nlink, tip->i_nlink); tip->i_flag |= IN_CHANGE; } } /* * 3) Unlink the source. We have to resolve the path again to * fixup the directory offset and count for ufs_dirremove. */ if (fdvp == tdvp) { error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) panic("ufs_rename: from entry went away!"); if (ino != fip->i_number) panic("ufs_rename: ino mismatch %ju != %ju\n", (uintmax_t)ino, (uintmax_t)fip->i_number); } /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { /* * If tip exists we simply use its link, otherwise we must * add a new one. */ if (tip == NULL) { tdp->i_effnlink++; tdp->i_nlink++; DIP_SET(tdp, i_nlink, tdp->i_nlink); tdp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) softdep_setup_dotdot_link(tdp, fip); error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && !DOINGASYNC(tdvp)); /* Don't go to bad here as the new link exists. */ if (error) goto unlockout; } else if (DOINGSUJ(tdvp)) /* Journal must account for each new link. */ softdep_setup_dotdot_link(tdp, fip); fip->i_offset = mastertemplate.dot_reclen; ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); /* * The kern_renameat() looks up the fvp using the DELETE flag, which * causes the removal of the name cache entry for fvp. * As the relookup of the fvp is done in two steps: * ufs_lookup_ino() and then VFS_VGET(), another thread might do a * normal lookup of the from name just before the VFS_VGET() call, * causing the cache entry to be re-instantiated. * * The same issue also applies to tvp if it exists as * otherwise we may have a stale name cache entry for the new * name that references the old i-node if it has other links * or open file descriptors. */ cache_purge(fvp); if (tvp) cache_purge(tvp); cache_purge_negative(tdvp); unlockout: vput(fdvp); vput(fvp); if (tvp) vput(tvp); /* * If compaction or fsync was requested do it now that other locks * are no longer needed. */ if (error == 0 && endoff != 0) { error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred); if (error != 0) vn_printf(tdvp, "ufs_rename: failed to truncate, error %d\n", error); #ifdef UFS_DIRHASH else if (tdp->i_dirhash != NULL) ufsdirhash_dirtrunc(tdp, endoff); #endif /* * Even if the directory compaction failed, rename was * succesful. Do not propagate a UFS_TRUNCATE() error * to the caller. */ error = 0; } if (error == 0 && tdp->i_flag & IN_NEEDSYNC) error = VOP_FSYNC(tdvp, MNT_WAIT, td); vput(tdvp); return (error); bad: fip->i_effnlink--; fip->i_nlink--; DIP_SET(fip, i_nlink, fip->i_nlink); fip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) softdep_revert_link(tdp, fip); goto unlockout; releout: vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp) vrele(tvp); return (error); } #ifdef UFS_ACL static int ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, mode_t dmode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *dacl, *acl; acl = acl_alloc(M_WAITOK); dacl = acl_alloc(M_WAITOK); /* * Retrieve default ACL from parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. If the ACL is empty, fall through to * the "not defined or available" case. */ if (acl->acl_cnt != 0) { dmode = acl_posix1e_newfilemode(dmode, acl); ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); *dacl = *acl; ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); if (error == 0) error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above * was supposed to free acl. */ printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); /* panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); acl_free(dacl); return (error); } static int ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, mode_t mode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *acl; acl = acl_alloc(M_WAITOK); /* * Retrieve default ACL for parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. */ if (acl->acl_cnt != 0) { /* * Two possible ways for default ACL to not * be present. First, the EA can be * undefined, or second, the default ACL can * be blank. If it's blank, fall through to * the it's not defined case. */ mode = acl_posix1e_newfilemode(mode, acl); ip->i_mode = mode; DIP_SET(ip, i_mode, mode); ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ ip->i_mode = mode; DIP_SET(ip, i_mode, mode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above was * supposed to free acl. */ printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()\n"); /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); return (error); } static int ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, mode_t child_mode, struct ucred *cred, struct thread *td) { int error; struct acl *parent_aclp, *child_aclp; parent_aclp = acl_alloc(M_WAITOK); child_aclp = acl_alloc(M_WAITOK | M_ZERO); error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); if (error) goto out; acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); if (error) goto out; out: acl_free(parent_aclp); acl_free(child_aclp); return (error); } #endif /* * Mkdir system call */ static int ufs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct dirtemplate dirtemplate, *dtp; struct direct newdir; int error, dmode; long blkoff; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if (dp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if (dp->i_effnlink < 2) { print_bad_link_count("ufs_mkdir", dvp); error = EINVAL; goto out; } error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; DIP_SET(ip, i_gid, dp->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; DIP_SET(ip, i_uid, dp->i_uid); #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is created so cleanup is * possible if we crash. */ dp->i_effnlink++; dp->i_nlink++; DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) softdep_setup_mkdir(dp, ip); error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ /* * Initialize directory with "." and ".." from static template. */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; vnode_pager_setsize(tvp, DIRBLKSIZ); if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, BA_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; DIP_SET(ip, i_size, DIRBLKSIZ); ip->i_flag |= IN_CHANGE | IN_UPDATE; bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff = DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } } if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp))) != 0) { (void)bwrite(bp); goto bad; } /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (DOINGASYNC(dvp)) bdwrite(bp); else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0); bad: if (error == 0) { *ap->a_vpp = tvp; } else { dp->i_effnlink--; dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) softdep_revert_mkdir(dp, ip); vput(tvp); } out: return (error); } /* * Rmdir system call. */ static int ufs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. Do not allow the removal of mounted on * directories (this can happen when an NFS exported filesystem * tries to remove a locally mounted on directory). */ error = 0; if (dp->i_effnlink <= 2) { if (dp->i_effnlink == 2) print_bad_link_count("ufs_rmdir", dvp); error = EINVAL; goto out; } if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } if (vp->v_mountedhere != 0) { error = EINVAL; goto out; } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ dp->i_effnlink--; ip->i_effnlink--; if (DOINGSOFTDEP(vp)) softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; if (DOINGSOFTDEP(vp)) softdep_revert_rmdir(dp, ip); goto out; } cache_purge(dvp); /* * The only stuff left in the directory is "." and "..". The "." * reference is inconsequential since we are quashing it. The soft * dependency code will arrange to do these operations after * the parent directory entry has been deleted on disk, so * when running with that code we avoid doing them now. */ if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; error = UFS_UPDATE(dvp, 0); ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; } cache_purge(vp); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) ufsdirhash_free(ip); #endif out: return (error); } /* * symlink -- make a symbolic link */ static int ufs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; const char *a_target; } */ *ap; { struct vnode *vp, **vpp = ap->a_vpp; struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp, "ufs_symlink"); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, SHORTLINK(ip), len); ip->i_size = len; DIP_SET(ip, i_size, len); ip->i_flag |= IN_CHANGE | IN_UPDATE; error = UFS_UPDATE(vp, 0); } else error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, ap->a_cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(vp); return (error); } /* * Vnode op for reading directories. */ int ufs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; struct inode *ip; struct direct *dp, *edp; u_long *cookies; struct dirent dstdp; off_t offset, startoffset; size_t readcnt, skipcnt; ssize_t startresid; u_int ncookies; int error; if (uio->uio_offset < 0) return (EINVAL); ip = VTOI(vp); if (ip->i_effnlink == 0) return (0); if (ap->a_ncookies != NULL) { if (uio->uio_resid < 0) ncookies = 0; else ncookies = uio->uio_resid; if (uio->uio_offset >= ip->i_size) ncookies = 0; else if (ip->i_size - uio->uio_offset < ncookies) ncookies = ip->i_size - uio->uio_offset; ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } offset = startoffset = uio->uio_offset; startresid = uio->uio_resid; error = 0; while (error == 0 && uio->uio_resid > 0 && uio->uio_offset < ip->i_size) { error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp); if (error) break; if (bp->b_offset + bp->b_bcount > ip->i_size) readcnt = ip->i_size - bp->b_offset; else readcnt = bp->b_bcount; skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & ~(size_t)(DIRBLKSIZ - 1); offset = bp->b_offset + skipcnt; dp = (struct direct *)&bp->b_data[skipcnt]; edp = (struct direct *)&bp->b_data[readcnt]; while (error == 0 && uio->uio_resid > 0 && dp < edp) { if (dp->d_reclen <= offsetof(struct direct, d_name) || (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { error = EIO; break; } #if BYTE_ORDER == LITTLE_ENDIAN /* Old filesystem format. */ if (vp->v_mount->mnt_maxsymlinklen <= 0) { dstdp.d_namlen = dp->d_type; dstdp.d_type = dp->d_namlen; } else #endif { dstdp.d_namlen = dp->d_namlen; dstdp.d_type = dp->d_type; } if (offsetof(struct direct, d_name) + dstdp.d_namlen > dp->d_reclen) { error = EIO; break; } if (offset < startoffset || dp->d_ino == 0) goto nextentry; dstdp.d_fileno = dp->d_ino; dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); /* NOTE: d_off is the offset of the *next* entry. */ dstdp.d_off = offset + dp->d_reclen; dirent_terminate(&dstdp); if (dstdp.d_reclen > uio->uio_resid) { if (uio->uio_resid == startresid) error = EINVAL; else error = EJUSTRETURN; break; } /* Advance dp. */ error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); if (error) break; if (cookies != NULL) { KASSERT(ncookies > 0, ("ufs_readdir: cookies buffer too small")); *cookies = offset + dp->d_reclen; cookies++; ncookies--; } nextentry: offset += dp->d_reclen; dp = (struct direct *)((caddr_t)dp + dp->d_reclen); } bqrelse(bp); uio->uio_offset = offset; } /* We need to correct uio_offset. */ uio->uio_offset = offset; if (error == EJUSTRETURN) error = 0; if (ap->a_ncookies != NULL) { if (error == 0) { ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (error == 0 && ap->a_eofflag) *ap->a_eofflag = ip->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ static int ufs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); doff_t isize; isize = ip->i_size; if ((isize < vp->v_mount->mnt_maxsymlinklen) || DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the ufs_bmaparray() operation may not * deadlock on memory. See ufs_bmap() for details. */ static int ufs_strategy(ap) struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; ufs2_daddr_t blkno; int error; if (bp->b_blkno == bp->b_lblkno) { error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); return (0); } /* * Print out the contents of an inode. */ static int ufs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, ip->i_effnlink, (intmax_t)ip->i_size); if (I_IS_UFS2(ip)) printf(", extsize %d", ip->i_din2->di_extsize); printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, (u_int)ip->i_flags, PRINT_INODE_FLAGS); printf("\tino %lu, on dev %s", (u_long)ip->i_number, devtoname(ITODEV(ip))); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int ufsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (fifo_specops.vop_close(ap)); } /* * Kqfilter wrapper for fifos. * * Fall through to ufs kqfilter routines if needed */ static int ufsfifo_kqfilter(ap) struct vop_kqfilter_args *ap; { int error; error = fifo_specops.vop_kqfilter(ap); if (error) error = vfs_kqfilter(ap); return (error); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ static int ufs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = UFS_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; #ifdef UFS_ACL case _PC_ACL_EXTENDED: if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_ACL_PATH_MAX: #ifdef UFS_ACL if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; #else *ap->a_retval = 3; #endif break; #ifdef MAC case _PC_MAC_PRESENT: if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_MIN_HOLE_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; default: error = vop_stdpathconf(ap); break; } return (error); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(mntp, fifoops, vpp) struct mount *mntp; struct vop_vector *fifoops; struct vnode **vpp; { struct inode *ip; struct vnode *vp; vp = *vpp; ASSERT_VOP_LOCKED(vp, "ufs_vinit"); ip = VTOI(vp); vp->v_type = IFTOVT(ip->i_mode); /* * Only unallocated inodes should be of type VNON. */ if (ip->i_mode != 0 && vp->v_type == VNON) return (EINVAL); if (vp->v_type == VFIFO) vp->v_op = fifoops; if (ip->i_number == UFS_ROOTINO) vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Allocate a new inode. * Vnode dvp must be locked. */ static int ufs_makeinode(mode, dvp, vpp, cnp, callfunc) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; const char *callfunc; { struct inode *ip, *pdir; struct direct newdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", callfunc); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; if (pdir->i_effnlink < 2) { print_bad_link_count(callfunc, dvp); return (EINVAL); } error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); ip = VTOI(tvp); ip->i_gid = pdir->i_gid; DIP_SET(ip, i_gid, pdir->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; DIP_SET(ip, i_uid, ip->i_uid); mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; DIP_SET(ip, i_mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { ip->i_mode &= ~ISGID; DIP_SET(ip, i_mode, ip->i_mode); } if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Make sure inode goes to disk before directory entry. */ error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); if (error) goto bad; *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) softdep_revert_create(VTOI(dvp), ip); vput(tvp); return (error); } static int ufs_ioctl(struct vop_ioctl_args *ap) { + struct vnode *vp; + vp = ap->a_vp; switch (ap->a_command) { case FIOSEEKDATA: + return (ufs_bmap_seekdata(vp, (off_t *)ap->a_data)); case FIOSEEKHOLE: - return (vn_bmap_seekhole(ap->a_vp, ap->a_command, - (off_t *)ap->a_data, ap->a_cred)); + return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, + ap->a_cred)); default: return (ENOTTY); } } /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, .vop_fsync = VOP_PANIC, .vop_read = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_write = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_bmap = ufs_bmap, .vop_cachedlookup = ufs_lookup, .vop_close = ufs_close, .vop_create = ufs_create, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_ioctl = ufs_ioctl, .vop_link = ufs_link, .vop_lookup = vfs_cache_lookup, .vop_markatime = ufs_markatime, .vop_mkdir = ufs_mkdir, .vop_mknod = ufs_mknod, .vop_open = ufs_open, .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim, .vop_remove = ufs_remove, .vop_rename = ufs_rename, .vop_rmdir = ufs_rmdir, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_strategy = ufs_strategy, .vop_symlink = ufs_symlink, .vop_whiteout = ufs_whiteout, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; struct vop_vector ufs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_close = ufsfifo_close, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_kqfilter = ufsfifo_kqfilter, .vop_markatime = ufs_markatime, .vop_pathconf = ufs_pathconf, .vop_print = ufs_print, .vop_read = VOP_PANIC, .vop_reclaim = ufs_reclaim, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_write = VOP_PANIC, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; Index: projects/runtime-coverage-v2/sys/vm/vm_map.c =================================================================== --- projects/runtime-coverage-v2/sys/vm/vm_map.c (revision 347075) +++ projects/runtime-coverage-v2/sys/vm/vm_map.c (revision 347076) @@ -1,4749 +1,4767 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory mapping module. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Virtual memory maps provide for the mapping, protection, * and sharing of virtual memory objects. In addition, * this module provides for an efficient virtual copy of * memory from one map to another. * * Synchronization is required prior to most operations. * * Maps consist of an ordered doubly-linked list of simple * entries; a self-adjusting binary search tree of these * entries is used to speed up lookups. * * Since portions of maps are specified by start/end addresses, * which may not align with existing map entries, all * routines merely "clip" entries to these start/end values. * [That is, an entry is split into two, bordering at a * start or end value.] Note that these clippings may not * always be necessary (as the two resulting entries are then * not changed); however, the clipping is done for convenience. * * As mentioned above, virtual copy operations are performed * by copying VM object references from one map to * another, and then marking both regions as copy-on-write. */ static struct mtx map_sleep_mtx; static uma_zone_t mapentzone; static uma_zone_t kmapentzone; static uma_zone_t mapzone; static uma_zone_t vmspace_zone; static int vmspace_zinit(void *mem, int size, int flags); static int vm_map_zinit(void *mem, int ize, int flags); static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max); static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry); static int vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry); static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags); #ifdef INVARIANTS static void vm_map_zdtor(void *mem, int size, void *arg); static void vmspace_zdtor(void *mem, int size, void *arg); #endif static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow); static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, vm_offset_t failed_addr); #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ !((e)->eflags & MAP_ENTRY_NEEDS_COPY))) /* * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type * stable. */ #define PROC_VMSPACE_LOCK(p) do { } while (0) #define PROC_VMSPACE_UNLOCK(p) do { } while (0) /* * VM_MAP_RANGE_CHECK: [ internal use only ] * * Asserts that the starting and ending region * addresses fall within the valid range of the map. */ #define VM_MAP_RANGE_CHECK(map, start, end) \ { \ if (start < vm_map_min(map)) \ start = vm_map_min(map); \ if (end > vm_map_max(map)) \ end = vm_map_max(map); \ if (start > end) \ start = end; \ } /* * vm_map_startup: * * Initialize the vm_map module. Must be called before * any other vm_map routines. * * Map and entry structures are allocated from the general * purpose memory pool with some exceptions: * * - The kernel map and kmem submap are allocated statically. * - Kernel map entries are allocated out of a static pool. * * These restrictions are necessary since malloc() uses the * maps and requires map entries. */ void vm_map_startup(void) { mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF); mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, #ifdef INVARIANTS vm_map_zdtor, #else NULL, #endif vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_prealloc(mapzone, MAX_KMAP); kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MTXCLASS | UMA_ZONE_VM); mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, #ifdef INVARIANTS vmspace_zdtor, #else NULL, #endif vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); } static int vmspace_zinit(void *mem, int size, int flags) { struct vmspace *vm; vm = (struct vmspace *)mem; vm->vm_map.pmap = NULL; (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags); PMAP_LOCK_INIT(vmspace_pmap(vm)); return (0); } static int vm_map_zinit(void *mem, int size, int flags) { vm_map_t map; map = (vm_map_t)mem; memset(map, 0, sizeof(*map)); mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK); sx_init(&map->lock, "vm map (user)"); return (0); } #ifdef INVARIANTS static void vmspace_zdtor(void *mem, int size, void *arg) { struct vmspace *vm; vm = (struct vmspace *)mem; vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); } static void vm_map_zdtor(void *mem, int size, void *arg) { vm_map_t map; map = (vm_map_t)mem; KASSERT(map->nentries == 0, ("map %p nentries == %d on free.", map, map->nentries)); KASSERT(map->size == 0, ("map %p size == %lu on free.", map, (unsigned long)map->size)); } #endif /* INVARIANTS */ /* * Allocate a vmspace structure, including a vm_map and pmap, * and initialize those structures. The refcnt is set to 1. * * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit(). */ struct vmspace * vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) { struct vmspace *vm; vm = uma_zalloc(vmspace_zone, M_WAITOK); KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL")); if (!pinit(vmspace_pmap(vm))) { uma_zfree(vmspace_zone, vm); return (NULL); } CTR1(KTR_VM, "vmspace_alloc: %p", vm); _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); vm->vm_refcnt = 1; vm->vm_shm = NULL; vm->vm_swrss = 0; vm->vm_tsize = 0; vm->vm_dsize = 0; vm->vm_ssize = 0; vm->vm_taddr = 0; vm->vm_daddr = 0; vm->vm_maxsaddr = 0; return (vm); } #ifdef RACCT static void vmspace_container_reset(struct proc *p) { PROC_LOCK(p); racct_set(p, RACCT_DATA, 0); racct_set(p, RACCT_STACK, 0); racct_set(p, RACCT_RSS, 0); racct_set(p, RACCT_MEMLOCK, 0); racct_set(p, RACCT_VMEM, 0); PROC_UNLOCK(p); } #endif static inline void vmspace_dofree(struct vmspace *vm) { CTR1(KTR_VM, "vmspace_free: %p", vm); /* * Make sure any SysV shm is freed, it might not have been in * exit1(). */ shmexit(vm); /* * Lock the map, to wait out all other references to it. * Delete all of the mappings and pages they hold, then call * the pmap module to reclaim anything left. */ (void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); pmap_release(vmspace_pmap(vm)); vm->vm_map.pmap = NULL; uma_zfree(vmspace_zone, vm); } void vmspace_free(struct vmspace *vm) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmspace_free() called"); if (vm->vm_refcnt == 0) panic("vmspace_free: attempt to free already freed vmspace"); if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1) vmspace_dofree(vm); } void vmspace_exitfree(struct proc *p) { struct vmspace *vm; PROC_VMSPACE_LOCK(p); vm = p->p_vmspace; p->p_vmspace = NULL; PROC_VMSPACE_UNLOCK(p); KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); vmspace_free(vm); } void vmspace_exit(struct thread *td) { int refcnt; struct vmspace *vm; struct proc *p; /* * Release user portion of address space. * This releases references to vnodes, * which could cause I/O if the file has been unlinked. * Need to do this early enough that we can still sleep. * * The last exiting process to reach this point releases as * much of the environment as it can. vmspace_dofree() is the * slower fallback in case another process had a temporary * reference to the vmspace. */ p = td->td_proc; vm = p->p_vmspace; atomic_add_int(&vmspace0.vm_refcnt, 1); refcnt = vm->vm_refcnt; do { if (refcnt > 1 && p->p_vmspace != &vmspace0) { /* Switch now since other proc might free vmspace */ PROC_VMSPACE_LOCK(p); p->p_vmspace = &vmspace0; PROC_VMSPACE_UNLOCK(p); pmap_activate(td); } } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt - 1)); if (refcnt == 1) { if (p->p_vmspace != vm) { /* vmspace not yet freed, switch back */ PROC_VMSPACE_LOCK(p); p->p_vmspace = vm; PROC_VMSPACE_UNLOCK(p); pmap_activate(td); } pmap_remove_pages(vmspace_pmap(vm)); /* Switch now since this proc will free vmspace */ PROC_VMSPACE_LOCK(p); p->p_vmspace = &vmspace0; PROC_VMSPACE_UNLOCK(p); pmap_activate(td); vmspace_dofree(vm); } #ifdef RACCT if (racct_enable) vmspace_container_reset(p); #endif } /* Acquire reference to vmspace owned by another process. */ struct vmspace * vmspace_acquire_ref(struct proc *p) { struct vmspace *vm; int refcnt; PROC_VMSPACE_LOCK(p); vm = p->p_vmspace; if (vm == NULL) { PROC_VMSPACE_UNLOCK(p); return (NULL); } refcnt = vm->vm_refcnt; do { if (refcnt <= 0) { /* Avoid 0->1 transition */ PROC_VMSPACE_UNLOCK(p); return (NULL); } } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt + 1)); if (vm != p->p_vmspace) { PROC_VMSPACE_UNLOCK(p); vmspace_free(vm); return (NULL); } PROC_VMSPACE_UNLOCK(p); return (vm); } /* * Switch between vmspaces in an AIO kernel process. * * The AIO kernel processes switch to and from a user process's * vmspace while performing an I/O operation on behalf of a user * process. The new vmspace is either the vmspace of a user process * obtained from an active AIO request or the initial vmspace of the * AIO kernel process (when it is idling). Because user processes * will block to drain any active AIO requests before proceeding in * exit() or execve(), the vmspace reference count for these vmspaces * can never be 0. This allows for a much simpler implementation than * the loop in vmspace_acquire_ref() above. Similarly, AIO kernel * processes hold an extra reference on their initial vmspace for the * life of the process so that this guarantee is true for any vmspace * passed as 'newvm'. */ void vmspace_switch_aio(struct vmspace *newvm) { struct vmspace *oldvm; /* XXX: Need some way to assert that this is an aio daemon. */ KASSERT(newvm->vm_refcnt > 0, ("vmspace_switch_aio: newvm unreferenced")); oldvm = curproc->p_vmspace; if (oldvm == newvm) return; /* * Point to the new address space and refer to it. */ curproc->p_vmspace = newvm; atomic_add_int(&newvm->vm_refcnt, 1); /* Activate the new mapping. */ pmap_activate(curthread); /* Remove the daemon's reference to the old address space. */ KASSERT(oldvm->vm_refcnt > 1, ("vmspace_switch_aio: oldvm dropping last reference")); vmspace_free(oldvm); } void _vm_map_lock(vm_map_t map, const char *file, int line) { if (map->system_map) mtx_lock_flags_(&map->system_mtx, 0, file, line); else sx_xlock_(&map->lock, file, line); map->timestamp++; } static void vm_map_process_deferred(void) { struct thread *td; vm_map_entry_t entry, next; vm_object_t object; td = curthread; entry = td->td_map_def_user; td->td_map_def_user = NULL; while (entry != NULL) { next = entry->next; if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) { /* * Decrement the object's writemappings and * possibly the vnode's v_writecount. */ KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, ("Submap with writecount")); object = entry->object.vm_object; KASSERT(object != NULL, ("No object for writecount")); vnode_pager_release_writecount(object, entry->start, entry->end); } vm_map_entry_deallocate(entry, FALSE); entry = next; } } void _vm_map_unlock(vm_map_t map, const char *file, int line) { if (map->system_map) mtx_unlock_flags_(&map->system_mtx, 0, file, line); else { sx_xunlock_(&map->lock, file, line); vm_map_process_deferred(); } } void _vm_map_lock_read(vm_map_t map, const char *file, int line) { if (map->system_map) mtx_lock_flags_(&map->system_mtx, 0, file, line); else sx_slock_(&map->lock, file, line); } void _vm_map_unlock_read(vm_map_t map, const char *file, int line) { if (map->system_map) mtx_unlock_flags_(&map->system_mtx, 0, file, line); else { sx_sunlock_(&map->lock, file, line); vm_map_process_deferred(); } } int _vm_map_trylock(vm_map_t map, const char *file, int line) { int error; error = map->system_map ? !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : !sx_try_xlock_(&map->lock, file, line); if (error == 0) map->timestamp++; return (error == 0); } int _vm_map_trylock_read(vm_map_t map, const char *file, int line) { int error; error = map->system_map ? !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : !sx_try_slock_(&map->lock, file, line); return (error == 0); } /* * _vm_map_lock_upgrade: [ internal use only ] * * Tries to upgrade a read (shared) lock on the specified map to a write * (exclusive) lock. Returns the value "0" if the upgrade succeeds and a * non-zero value if the upgrade fails. If the upgrade fails, the map is * returned without a read or write lock held. * * Requires that the map be read locked. */ int _vm_map_lock_upgrade(vm_map_t map, const char *file, int line) { unsigned int last_timestamp; if (map->system_map) { mtx_assert_(&map->system_mtx, MA_OWNED, file, line); } else { if (!sx_try_upgrade_(&map->lock, file, line)) { last_timestamp = map->timestamp; sx_sunlock_(&map->lock, file, line); vm_map_process_deferred(); /* * If the map's timestamp does not change while the * map is unlocked, then the upgrade succeeds. */ sx_xlock_(&map->lock, file, line); if (last_timestamp != map->timestamp) { sx_xunlock_(&map->lock, file, line); return (1); } } } map->timestamp++; return (0); } void _vm_map_lock_downgrade(vm_map_t map, const char *file, int line) { if (map->system_map) { mtx_assert_(&map->system_mtx, MA_OWNED, file, line); } else sx_downgrade_(&map->lock, file, line); } /* * vm_map_locked: * * Returns a non-zero value if the caller holds a write (exclusive) lock * on the specified map and the value "0" otherwise. */ int vm_map_locked(vm_map_t map) { if (map->system_map) return (mtx_owned(&map->system_mtx)); else return (sx_xlocked(&map->lock)); } #ifdef INVARIANTS static void _vm_map_assert_locked(vm_map_t map, const char *file, int line) { if (map->system_map) mtx_assert_(&map->system_mtx, MA_OWNED, file, line); else sx_assert_(&map->lock, SA_XLOCKED, file, line); } #define VM_MAP_ASSERT_LOCKED(map) \ _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE) #ifdef DIAGNOSTIC static int enable_vmmap_check = 1; #else static int enable_vmmap_check = 0; #endif SYSCTL_INT(_debug, OID_AUTO, vmmap_check, CTLFLAG_RWTUN, &enable_vmmap_check, 0, "Enable vm map consistency checking"); static void _vm_map_assert_consistent(vm_map_t map) { vm_map_entry_t entry; vm_map_entry_t child; vm_size_t max_left, max_right; if (!enable_vmmap_check) return; for (entry = map->header.next; entry != &map->header; entry = entry->next) { KASSERT(entry->prev->end <= entry->start, ("map %p prev->end = %jx, start = %jx", map, (uintmax_t)entry->prev->end, (uintmax_t)entry->start)); KASSERT(entry->start < entry->end, ("map %p start = %jx, end = %jx", map, (uintmax_t)entry->start, (uintmax_t)entry->end)); KASSERT(entry->end <= entry->next->start, ("map %p end = %jx, next->start = %jx", map, (uintmax_t)entry->end, (uintmax_t)entry->next->start)); KASSERT(entry->left == NULL || entry->left->start < entry->start, ("map %p left->start = %jx, start = %jx", map, (uintmax_t)entry->left->start, (uintmax_t)entry->start)); KASSERT(entry->right == NULL || entry->start < entry->right->start, ("map %p start = %jx, right->start = %jx", map, (uintmax_t)entry->start, (uintmax_t)entry->right->start)); child = entry->left; max_left = (child != NULL) ? child->max_free : entry->start - entry->prev->end; child = entry->right; max_right = (child != NULL) ? child->max_free : entry->next->start - entry->end; KASSERT(entry->max_free == MAX(max_left, max_right), ("map %p max = %jx, max_left = %jx, max_right = %jx", map, (uintmax_t)entry->max_free, (uintmax_t)max_left, (uintmax_t)max_right)); } } #define VM_MAP_ASSERT_CONSISTENT(map) \ _vm_map_assert_consistent(map) #else #define VM_MAP_ASSERT_LOCKED(map) #define VM_MAP_ASSERT_CONSISTENT(map) #endif /* INVARIANTS */ /* * _vm_map_unlock_and_wait: * * Atomically releases the lock on the specified map and puts the calling * thread to sleep. The calling thread will remain asleep until either * vm_map_wakeup() is performed on the map or the specified timeout is * exceeded. * * WARNING! This function does not perform deferred deallocations of * objects and map entries. Therefore, the calling thread is expected to * reacquire the map lock after reawakening and later perform an ordinary * unlock operation, such as vm_map_unlock(), before completing its * operation on the map. */ int _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line) { mtx_lock(&map_sleep_mtx); if (map->system_map) mtx_unlock_flags_(&map->system_mtx, 0, file, line); else sx_xunlock_(&map->lock, file, line); return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", timo)); } /* * vm_map_wakeup: * * Awaken any threads that have slept on the map using * vm_map_unlock_and_wait(). */ void vm_map_wakeup(vm_map_t map) { /* * Acquire and release map_sleep_mtx to prevent a wakeup() * from being performed (and lost) between the map unlock * and the msleep() in _vm_map_unlock_and_wait(). */ mtx_lock(&map_sleep_mtx); mtx_unlock(&map_sleep_mtx); wakeup(&map->root); } void vm_map_busy(vm_map_t map) { VM_MAP_ASSERT_LOCKED(map); map->busy++; } void vm_map_unbusy(vm_map_t map) { VM_MAP_ASSERT_LOCKED(map); KASSERT(map->busy, ("vm_map_unbusy: not busy")); if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); wakeup(&map->busy); } } void vm_map_wait_busy(vm_map_t map) { VM_MAP_ASSERT_LOCKED(map); while (map->busy) { vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); if (map->system_map) msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); else sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); } map->timestamp++; } long vmspace_resident_count(struct vmspace *vmspace) { return pmap_resident_count(vmspace_pmap(vmspace)); } /* * vm_map_create: * * Creates and returns a new empty VM map with * the given physical map structure, and having * the given lower and upper address bounds. */ vm_map_t vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) { vm_map_t result; result = uma_zalloc(mapzone, M_WAITOK); CTR1(KTR_VM, "vm_map_create: %p", result); _vm_map_init(result, pmap, min, max); return (result); } /* * Initialize an existing vm_map structure * such as that in the vmspace structure. */ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) { map->header.next = map->header.prev = &map->header; map->header.eflags = MAP_ENTRY_HEADER; map->needs_wakeup = FALSE; map->system_map = 0; map->pmap = pmap; map->header.end = min; map->header.start = max; map->flags = 0; map->root = NULL; map->timestamp = 0; map->busy = 0; map->anon_loc = 0; } void vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) { _vm_map_init(map, pmap, min, max); mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK); sx_init(&map->lock, "user map"); } /* * vm_map_entry_dispose: [ internal use only ] * * Inverse of vm_map_entry_create. */ static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) { uma_zfree(map->system_map ? kmapentzone : mapentzone, entry); } /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. * No entry fields are filled in. */ static vm_map_entry_t vm_map_entry_create(vm_map_t map) { vm_map_entry_t new_entry; if (map->system_map) new_entry = uma_zalloc(kmapentzone, M_NOWAIT); else new_entry = uma_zalloc(mapentzone, M_WAITOK); if (new_entry == NULL) panic("vm_map_entry_create: kernel resources exhausted"); return (new_entry); } /* * vm_map_entry_set_behavior: * * Set the expected access behavior, either normal, random, or * sequential. */ static inline void vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) { entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | (behavior & MAP_ENTRY_BEHAV_MASK); } /* * vm_map_entry_set_max_free: * * Set the max_free field in a vm_map_entry. */ static inline void vm_map_entry_set_max_free(vm_map_entry_t entry) { vm_map_entry_t child; vm_size_t max_left, max_right; child = entry->left; max_left = (child != NULL) ? child->max_free : entry->start - entry->prev->end; child = entry->right; max_right = (child != NULL) ? child->max_free : entry->next->start - entry->end; entry->max_free = MAX(max_left, max_right); } #define SPLAY_LEFT_STEP(root, y, rlist, test) do { \ y = root->left; \ if (y != NULL && (test)) { \ /* Rotate right and make y root. */ \ root->left = y->right; \ y->right = root; \ vm_map_entry_set_max_free(root); \ root = y; \ y = root->left; \ } \ /* Put root on rlist. */ \ root->left = rlist; \ rlist = root; \ root = y; \ } while (0) #define SPLAY_RIGHT_STEP(root, y, llist, test) do { \ y = root->right; \ if (y != NULL && (test)) { \ /* Rotate left and make y root. */ \ root->right = y->left; \ y->left = root; \ vm_map_entry_set_max_free(root); \ root = y; \ y = root->right; \ } \ /* Put root on llist. */ \ root->right = llist; \ llist = root; \ root = y; \ } while (0) /* * Walk down the tree until we find addr or a NULL pointer where addr would go, * breaking off left and right subtrees of nodes less than, or greater than * addr. Treat pointers to nodes with max_free < length as NULL pointers. * llist and rlist are the two sides in reverse order (bottom-up), with llist * linked by the right pointer and rlist linked by the left pointer in the * vm_map_entry. */ static vm_map_entry_t vm_map_splay_split(vm_offset_t addr, vm_size_t length, vm_map_entry_t root, vm_map_entry_t *out_llist, vm_map_entry_t *out_rlist) { vm_map_entry_t llist, rlist; vm_map_entry_t y; llist = NULL; rlist = NULL; while (root != NULL && root->max_free >= length) { if (addr < root->start) { SPLAY_LEFT_STEP(root, y, rlist, y->max_free >= length && addr < y->start); } else if (addr >= root->end) { SPLAY_RIGHT_STEP(root, y, llist, y->max_free >= length && addr >= y->end); } else break; } *out_llist = llist; *out_rlist = rlist; return (root); } static void vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *iolist) { vm_map_entry_t rlist, y; root = root->right; rlist = *iolist; while (root != NULL) SPLAY_LEFT_STEP(root, y, rlist, true); *iolist = rlist; } static void vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *iolist) { vm_map_entry_t llist, y; root = root->left; llist = *iolist; while (root != NULL) SPLAY_RIGHT_STEP(root, y, llist, true); *iolist = llist; } /* * Walk back up the two spines, flip the pointers and set max_free. The * subtrees of the root go at the bottom of llist and rlist. */ static vm_map_entry_t vm_map_splay_merge(vm_map_entry_t root, vm_map_entry_t llist, vm_map_entry_t rlist, vm_map_entry_t ltree, vm_map_entry_t rtree) { vm_map_entry_t y; while (llist != NULL) { y = llist->right; llist->right = ltree; vm_map_entry_set_max_free(llist); ltree = llist; llist = y; } while (rlist != NULL) { y = rlist->left; rlist->left = rtree; vm_map_entry_set_max_free(rlist); rtree = rlist; rlist = y; } /* * Final assembly: add ltree and rtree as subtrees of root. */ root->left = ltree; root->right = rtree; vm_map_entry_set_max_free(root); return (root); } /* * vm_map_entry_splay: * * The Sleator and Tarjan top-down splay algorithm with the * following variation. Max_free must be computed bottom-up, so * on the downward pass, maintain the left and right spines in * reverse order. Then, make a second pass up each side to fix * the pointers and compute max_free. The time bound is O(log n) * amortized. * * The new root is the vm_map_entry containing "addr", or else an * adjacent entry (lower if possible) if addr is not in the tree. * * The map must be locked, and leaves it so. * * Returns: the new root. */ static vm_map_entry_t vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) { vm_map_entry_t llist, rlist; root = vm_map_splay_split(addr, 0, root, &llist, &rlist); if (root != NULL) { /* do nothing */ } else if (llist != NULL) { /* * Recover the greatest node in the left * subtree and make it the root. */ root = llist; llist = root->right; root->right = NULL; } else if (rlist != NULL) { /* * Recover the least node in the right * subtree and make it the root. */ root = rlist; rlist = root->left; root->left = NULL; } else { /* There is no root. */ return (NULL); } return (vm_map_splay_merge(root, llist, rlist, root->left, root->right)); } /* * vm_map_entry_{un,}link: * * Insert/remove entries from maps. */ static void vm_map_entry_link(vm_map_t map, vm_map_entry_t entry) { vm_map_entry_t llist, rlist, root; CTR3(KTR_VM, "vm_map_entry_link: map %p, nentries %d, entry %p", map, map->nentries, entry); VM_MAP_ASSERT_LOCKED(map); map->nentries++; root = map->root; root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); KASSERT(root == NULL, ("vm_map_entry_link: link object already mapped")); entry->prev = (llist == NULL) ? &map->header : llist; entry->next = (rlist == NULL) ? &map->header : rlist; entry->prev->next = entry->next->prev = entry; root = vm_map_splay_merge(entry, llist, rlist, NULL, NULL); map->root = entry; VM_MAP_ASSERT_CONSISTENT(map); } enum unlink_merge_type { UNLINK_MERGE_PREV, UNLINK_MERGE_NONE, UNLINK_MERGE_NEXT }; static void vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry, enum unlink_merge_type op) { vm_map_entry_t llist, rlist, root, y; VM_MAP_ASSERT_LOCKED(map); llist = entry->prev; rlist = entry->next; llist->next = rlist; rlist->prev = llist; root = map->root; root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); KASSERT(root != NULL, ("vm_map_entry_unlink: unlink object not mapped")); switch (op) { case UNLINK_MERGE_PREV: vm_map_splay_findprev(root, &llist); llist->end = root->end; y = root->right; root = llist; llist = root->right; root->right = y; break; case UNLINK_MERGE_NEXT: vm_map_splay_findnext(root, &rlist); rlist->start = root->start; rlist->offset = root->offset; y = root->left; root = rlist; rlist = root->left; root->left = y; break; case UNLINK_MERGE_NONE: vm_map_splay_findprev(root, &llist); vm_map_splay_findnext(root, &rlist); if (llist != NULL) { root = llist; llist = root->right; root->right = NULL; } else if (rlist != NULL) { root = rlist; rlist = root->left; root->left = NULL; } else root = NULL; break; } if (root != NULL) root = vm_map_splay_merge(root, llist, rlist, root->left, root->right); map->root = root; VM_MAP_ASSERT_CONSISTENT(map); map->nentries--; CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, map->nentries, entry); } /* * vm_map_entry_resize_free: * * Recompute the amount of free space following a modified vm_map_entry * and propagate those values up the tree. Call this function after * resizing a map entry in-place by changing the end value, without a * call to vm_map_entry_link() or _unlink(). * * The map must be locked, and leaves it so. */ static void vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) { vm_map_entry_t llist, rlist, root; VM_MAP_ASSERT_LOCKED(map); root = map->root; root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); KASSERT(root != NULL, ("vm_map_entry_resize_free: resize_free object not mapped")); vm_map_splay_findnext(root, &rlist); root->right = NULL; map->root = vm_map_splay_merge(root, llist, rlist, root->left, root->right); VM_MAP_ASSERT_CONSISTENT(map); CTR3(KTR_VM, "vm_map_entry_resize_free: map %p, nentries %d, entry %p", map, map->nentries, entry); } /* * vm_map_lookup_entry: [ internal use only ] * * Finds the map entry containing (or * immediately preceding) the specified address * in the given map; the entry is returned * in the "entry" parameter. The boolean * result indicates whether the address is * actually contained in the map. */ boolean_t vm_map_lookup_entry( vm_map_t map, vm_offset_t address, vm_map_entry_t *entry) /* OUT */ { vm_map_entry_t cur, lbound; boolean_t locked; /* * If the map is empty, then the map entry immediately preceding * "address" is the map's header. */ cur = map->root; if (cur == NULL) { *entry = &map->header; return (FALSE); } if (address >= cur->start && cur->end > address) { *entry = cur; return (TRUE); } if ((locked = vm_map_locked(map)) || sx_try_upgrade(&map->lock)) { /* * Splay requires a write lock on the map. However, it only * restructures the binary search tree; it does not otherwise * change the map. Thus, the map's timestamp need not change * on a temporary upgrade. */ map->root = cur = vm_map_entry_splay(address, cur); VM_MAP_ASSERT_CONSISTENT(map); if (!locked) sx_downgrade(&map->lock); /* * If "address" is contained within a map entry, the new root * is that map entry. Otherwise, the new root is a map entry * immediately before or after "address". */ if (address < cur->start) { *entry = &map->header; return (FALSE); } *entry = cur; return (address < cur->end); } /* * Since the map is only locked for read access, perform a * standard binary search tree lookup for "address". */ lbound = &map->header; do { if (address < cur->start) { cur = cur->left; } else if (cur->end <= address) { lbound = cur; cur = cur->right; } else { *entry = cur; return (TRUE); } } while (cur != NULL); *entry = lbound; return (FALSE); } /* * vm_map_insert: * * Inserts the given whole VM object into the target * map at the specified address range. The object's * size should match that of the address range. * * Requires that the map be locked, and leaves it so. * * If object is non-NULL, ref count must be bumped by caller * prior to making call to account for the new entry. */ int vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) { vm_map_entry_t new_entry, prev_entry, temp_entry; struct ucred *cred; vm_eflags_t protoeflags; vm_inherit_t inheritance; VM_MAP_ASSERT_LOCKED(map); KASSERT(object != kernel_object || (cow & MAP_COPY_ON_WRITE) == 0, ("vm_map_insert: kernel object and COW")); KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, ("vm_map_insert: paradoxical MAP_NOFAULT request")); KASSERT((prot & ~max) == 0, ("prot %#x is not subset of max_prot %#x", prot, max)); /* * Check that the start and end points are not bogus. */ if (start < vm_map_min(map) || end > vm_map_max(map) || start >= end) return (KERN_INVALID_ADDRESS); /* * Find the entry prior to the proposed starting address; if it's part * of an existing entry, this range is bogus. */ if (vm_map_lookup_entry(map, start, &temp_entry)) return (KERN_NO_SPACE); prev_entry = temp_entry; /* * Assert that the next entry doesn't overlap the end point. */ if (prev_entry->next->start < end) return (KERN_NO_SPACE); if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL || max != VM_PROT_NONE)) return (KERN_INVALID_ARGUMENT); protoeflags = 0; if (cow & MAP_COPY_ON_WRITE) protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; if (cow & MAP_NOFAULT) protoeflags |= MAP_ENTRY_NOFAULT; if (cow & MAP_DISABLE_SYNCER) protoeflags |= MAP_ENTRY_NOSYNC; if (cow & MAP_DISABLE_COREDUMP) protoeflags |= MAP_ENTRY_NOCOREDUMP; if (cow & MAP_STACK_GROWS_DOWN) protoeflags |= MAP_ENTRY_GROWS_DOWN; if (cow & MAP_STACK_GROWS_UP) protoeflags |= MAP_ENTRY_GROWS_UP; if (cow & MAP_VN_WRITECOUNT) protoeflags |= MAP_ENTRY_VN_WRITECNT; if ((cow & MAP_CREATE_GUARD) != 0) protoeflags |= MAP_ENTRY_GUARD; if ((cow & MAP_CREATE_STACK_GAP_DN) != 0) protoeflags |= MAP_ENTRY_STACK_GAP_DN; if ((cow & MAP_CREATE_STACK_GAP_UP) != 0) protoeflags |= MAP_ENTRY_STACK_GAP_UP; if (cow & MAP_INHERIT_SHARE) inheritance = VM_INHERIT_SHARE; else inheritance = VM_INHERIT_DEFAULT; cred = NULL; if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) goto charged; if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) return (KERN_RESOURCE_SHORTAGE); KASSERT(object == NULL || (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || object->cred == NULL, ("overcommit: vm_map_insert o %p", object)); cred = curthread->td_ucred; } charged: /* Expand the kernel pmap, if necessary. */ if (map == kernel_map && end > kernel_vm_end) pmap_growkernel(end); if (object != NULL) { /* * OBJ_ONEMAPPING must be cleared unless this mapping * is trivially proven to be the only mapping for any * of the object's pages. (Object granularity * reference counting is insufficient to recognize * aliases with precision.) */ VM_OBJECT_WLOCK(object); if (object->ref_count > 1 || object->shadow_count != 0) vm_object_clear_flag(object, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(object); } else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) == protoeflags && (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 && prev_entry->end == start && (prev_entry->cred == cred || (prev_entry->object.vm_object != NULL && prev_entry->object.vm_object->cred == cred)) && vm_object_coalesce(prev_entry->object.vm_object, prev_entry->offset, (vm_size_t)(prev_entry->end - prev_entry->start), (vm_size_t)(end - prev_entry->end), cred != NULL && (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) { /* * We were able to extend the object. Determine if we * can extend the previous map entry to include the * new range as well. */ if (prev_entry->inheritance == inheritance && prev_entry->protection == prot && prev_entry->max_protection == max && prev_entry->wired_count == 0) { KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) == 0, ("prev_entry %p has incoherent wiring", prev_entry)); if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) map->size += end - prev_entry->end; prev_entry->end = end; vm_map_entry_resize_free(map, prev_entry); vm_map_simplify_entry(map, prev_entry); return (KERN_SUCCESS); } /* * If we can extend the object but cannot extend the * map entry, we have to create a new map entry. We * must bump the ref count on the extended object to * account for it. object may be NULL. */ object = prev_entry->object.vm_object; offset = prev_entry->offset + (prev_entry->end - prev_entry->start); vm_object_reference(object); if (cred != NULL && object != NULL && object->cred != NULL && !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { /* Object already accounts for this uid. */ cred = NULL; } } if (cred != NULL) crhold(cred); /* * Create a new entry */ new_entry = vm_map_entry_create(map); new_entry->start = start; new_entry->end = end; new_entry->cred = NULL; new_entry->eflags = protoeflags; new_entry->object.vm_object = object; new_entry->offset = offset; new_entry->inheritance = inheritance; new_entry->protection = prot; new_entry->max_protection = max; new_entry->wired_count = 0; new_entry->wiring_thread = NULL; new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; new_entry->next_read = start; KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry), ("overcommit: vm_map_insert leaks vm_map %p", new_entry)); new_entry->cred = cred; /* * Insert the new entry into the list */ vm_map_entry_link(map, new_entry); if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0) map->size += new_entry->end - new_entry->start; /* * Try to coalesce the new entry with both the previous and next * entries in the list. Previously, we only attempted to coalesce * with the previous entry when object is NULL. Here, we handle the * other cases, which are less common. */ vm_map_simplify_entry(map, new_entry); if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) { vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset), end - start, cow & MAP_PREFAULT_PARTIAL); } return (KERN_SUCCESS); } /* * vm_map_findspace: * * Find the first fit (lowest VM address) for "length" free bytes * beginning at address >= start in the given map. * * In a vm_map_entry, "max_free" is the maximum amount of * contiguous free space between an entry in its subtree and a * neighbor of that entry. This allows finding a free region in * one path down the tree, so O(log n) amortized with splay * trees. * * The map must be locked, and leaves it so. * * Returns: starting address if sufficient space, * vm_map_max(map)-length+1 if insufficient space. */ vm_offset_t vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length) { vm_map_entry_t llist, rlist, root, y; vm_size_t left_length; /* * Request must fit within min/max VM address and must avoid * address wrap. */ start = MAX(start, vm_map_min(map)); if (start + length > vm_map_max(map) || start + length < start) return (vm_map_max(map) - length + 1); /* Empty tree means wide open address space. */ if (map->root == NULL) return (start); /* * After splay, if start comes before root node, then there * must be a gap from start to the root. */ root = vm_map_splay_split(start, length, map->root, &llist, &rlist); if (root != NULL) start = root->end; else if (rlist != NULL) { root = rlist; rlist = root->left; root->left = NULL; } else { root = llist; llist = root->right; root->right = NULL; } map->root = vm_map_splay_merge(root, llist, rlist, root->left, root->right); VM_MAP_ASSERT_CONSISTENT(map); if (start + length <= root->start) return (start); /* * Root is the last node that might begin its gap before * start, and this is the last comparison where address * wrap might be a problem. */ if (root->right == NULL && start + length <= vm_map_max(map)) return (start); /* With max_free, can immediately tell if no solution. */ if (root->right == NULL || length > root->right->max_free) return (vm_map_max(map) - length + 1); /* * Splay for the least large-enough gap in the right subtree. */ llist = NULL; rlist = NULL; for (left_length = 0; ; left_length = root->left != NULL ? root->left->max_free : root->start - llist->end) { if (length <= left_length) SPLAY_LEFT_STEP(root, y, rlist, length <= (y->left != NULL ? y->left->max_free : y->start - llist->end)); else SPLAY_RIGHT_STEP(root, y, llist, length > (y->left != NULL ? y->left->max_free : y->start - root->end)); if (root == NULL) break; } root = llist; llist = root->right; if ((y = rlist) == NULL) root->right = NULL; else { rlist = y->left; y->left = NULL; root->right = y->right; } root = vm_map_splay_merge(root, llist, rlist, root->left, root->right); if (y != NULL) { y->right = root->right; vm_map_entry_set_max_free(y); root->right = y; vm_map_entry_set_max_free(root); } map->root = root; VM_MAP_ASSERT_CONSISTENT(map); return (root->end); } int vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_size_t length, vm_prot_t prot, vm_prot_t max, int cow) { vm_offset_t end; int result; end = start + length; KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || object == NULL, ("vm_map_fixed: non-NULL backing object for stack")); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if ((cow & MAP_CHECK_EXCL) == 0) vm_map_delete(map, start, end); if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { result = vm_map_stack_locked(map, start, length, sgrowsiz, prot, max, cow); } else { result = vm_map_insert(map, object, offset, start, end, prot, max, cow); } vm_map_unlock(map); return (result); } static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; static int cluster_anon = 1; SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, &cluster_anon, 0, "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); static bool clustering_anon_allowed(vm_offset_t addr) { switch (cluster_anon) { case 0: return (false); case 1: return (addr == 0); case 2: default: return (true); } } static long aslr_restarts; SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, &aslr_restarts, 0, "Number of aslr failures"); #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) /* * Searches for the specified amount of free space in the given map with the * specified alignment. Performs an address-ordered, first-fit search from * the given address "*addr", with an optional upper bound "max_addr". If the * parameter "alignment" is zero, then the alignment is computed from the * given (object, offset) pair so as to enable the greatest possible use of * superpage mappings. Returns KERN_SUCCESS and the address of the free space * in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. * * The map must be locked. Initially, there must be at least "length" bytes * of free space at the given address. */ static int vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, vm_offset_t alignment) { vm_offset_t aligned_addr, free_addr; VM_MAP_ASSERT_LOCKED(map); free_addr = *addr; KASSERT(free_addr == vm_map_findspace(map, free_addr, length), ("caller failed to provide space %d at address %p", (int)length, (void*)free_addr)); for (;;) { /* * At the start of every iteration, the free space at address * "*addr" is at least "length" bytes. */ if (alignment == 0) pmap_align_superpage(object, offset, addr, length); else if ((*addr & (alignment - 1)) != 0) { *addr &= ~(alignment - 1); *addr += alignment; } aligned_addr = *addr; if (aligned_addr == free_addr) { /* * Alignment did not change "*addr", so "*addr" must * still provide sufficient free space. */ return (KERN_SUCCESS); } /* * Test for address wrap on "*addr". A wrapped "*addr" could * be a valid address, in which case vm_map_findspace() cannot * be relied upon to fail. */ if (aligned_addr < free_addr) return (KERN_NO_SPACE); *addr = vm_map_findspace(map, aligned_addr, length); if (*addr + length > vm_map_max(map) || (max_addr != 0 && *addr + length > max_addr)) return (KERN_NO_SPACE); free_addr = *addr; if (free_addr == aligned_addr) { /* * If a successful call to vm_map_findspace() did not * change "*addr", then "*addr" must still be aligned * and provide sufficient free space. */ return (KERN_SUCCESS); } } } /* * vm_map_find finds an unallocated region in the target address * map with the given length. The search is defined to be * first-fit from the specified address; the region found is * returned in the same parameter. * * If object is non-NULL, ref count must be bumped by caller * prior to making call to account for the new entry. */ int vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, /* IN/OUT */ vm_size_t length, vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { vm_offset_t alignment, curr_min_addr, min_addr; int gap, pidx, rv, try; bool cluster, en_aslr, update_anon; KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || object == NULL, ("vm_map_find: non-NULL backing object for stack")); MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE && (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0)); if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || (object->flags & OBJ_COLORED) == 0)) find_space = VMFS_ANY_SPACE; if (find_space >> 8 != 0) { KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); alignment = (vm_offset_t)1 << (find_space >> 8); } else alignment = 0; en_aslr = (map->flags & MAP_ASLR) != 0; update_anon = cluster = clustering_anon_allowed(*addr) && (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && find_space != VMFS_NO_SPACE && object == NULL && (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; curr_min_addr = min_addr = *addr; if (en_aslr && min_addr == 0 && !cluster && find_space != VMFS_NO_SPACE && (map->flags & MAP_ASLR_IGNSTART) != 0) curr_min_addr = min_addr = vm_map_min(map); try = 0; vm_map_lock(map); if (cluster) { curr_min_addr = map->anon_loc; if (curr_min_addr == 0) cluster = false; } if (find_space != VMFS_NO_SPACE) { KASSERT(find_space == VMFS_ANY_SPACE || find_space == VMFS_OPTIMAL_SPACE || find_space == VMFS_SUPER_SPACE || alignment != 0, ("unexpected VMFS flag")); again: /* * When creating an anonymous mapping, try clustering * with an existing anonymous mapping first. * * We make up to two attempts to find address space * for a given find_space value. The first attempt may * apply randomization or may cluster with an existing * anonymous mapping. If this first attempt fails, * perform a first-fit search of the available address * space. * * If all tries failed, and find_space is * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. * Again enable clustering and randomization. */ try++; MPASS(try <= 2); if (try == 2) { /* * Second try: we failed either to find a * suitable region for randomizing the * allocation, or to cluster with an existing * mapping. Retry with free run. */ curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? vm_map_min(map) : min_addr; atomic_add_long(&aslr_restarts, 1); } if (try == 1 && en_aslr && !cluster) { /* * Find space for allocation, including * gap needed for later randomization. */ pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && (find_space == VMFS_SUPER_SPACE || find_space == VMFS_OPTIMAL_SPACE) ? 1 : 0; gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; *addr = vm_map_findspace(map, curr_min_addr, length + gap * pagesizes[pidx]); if (*addr + length + gap * pagesizes[pidx] > vm_map_max(map)) goto again; /* And randomize the start address. */ *addr += (arc4random() % gap) * pagesizes[pidx]; if (max_addr != 0 && *addr + length > max_addr) goto again; } else { *addr = vm_map_findspace(map, curr_min_addr, length); if (*addr + length > vm_map_max(map) || (max_addr != 0 && *addr + length > max_addr)) { if (cluster) { cluster = false; MPASS(try == 1); goto again; } rv = KERN_NO_SPACE; goto done; } } if (find_space != VMFS_ANY_SPACE && (rv = vm_map_alignspace(map, object, offset, addr, length, max_addr, alignment)) != KERN_SUCCESS) { if (find_space == VMFS_OPTIMAL_SPACE) { find_space = VMFS_ANY_SPACE; curr_min_addr = min_addr; cluster = update_anon; try = 0; goto again; } goto done; } } else if ((cow & MAP_REMAP) != 0) { if (*addr < vm_map_min(map) || *addr + length > vm_map_max(map) || *addr + length <= length) { rv = KERN_INVALID_ADDRESS; goto done; } vm_map_delete(map, *addr, *addr + length); } if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, max, cow); } else { rv = vm_map_insert(map, object, offset, *addr, *addr + length, prot, max, cow); } if (rv == KERN_SUCCESS && update_anon) map->anon_loc = *addr + length; done: vm_map_unlock(map); return (rv); } /* * vm_map_find_min() is a variant of vm_map_find() that takes an * additional parameter (min_addr) and treats the given address * (*addr) differently. Specifically, it treats *addr as a hint * and not as the minimum address where the mapping is created. * * This function works in two phases. First, it tries to * allocate above the hint. If that fails and the hint is * greater than min_addr, it performs a second pass, replacing * the hint with min_addr as the minimum address for the * allocation. */ int vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { vm_offset_t hint; int rv; hint = *addr; for (;;) { rv = vm_map_find(map, object, offset, addr, length, max_addr, find_space, prot, max, cow); if (rv == KERN_SUCCESS || min_addr >= hint) return (rv); *addr = hint = min_addr; } } /* * A map entry with any of the following flags set must not be merged with * another entry. */ #define MAP_ENTRY_NOMERGE_MASK (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \ MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP) static bool vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry) { KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 || (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0, ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable", prev, entry)); return (prev->end == entry->start && prev->object.vm_object == entry->object.vm_object && (prev->object.vm_object == NULL || prev->offset + (prev->end - prev->start) == entry->offset) && prev->eflags == entry->eflags && prev->protection == entry->protection && prev->max_protection == entry->max_protection && prev->inheritance == entry->inheritance && prev->wired_count == entry->wired_count && prev->cred == entry->cred); } static void vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry) { /* * If the backing object is a vnode object, vm_object_deallocate() * calls vrele(). However, vrele() does not lock the vnode because * the vnode has additional references. Thus, the map lock can be * kept without causing a lock-order reversal with the vnode lock. * * Since we count the number of virtual page mappings in * object->un_pager.vnp.writemappings, the writemappings value * should not be adjusted when the entry is disposed of. */ if (entry->object.vm_object != NULL) vm_object_deallocate(entry->object.vm_object); if (entry->cred != NULL) crfree(entry->cred); vm_map_entry_dispose(map, entry); } /* * vm_map_simplify_entry: * * Simplify the given map entry by merging with either neighbor. This * routine also has the ability to merge with both neighbors. * * The map must be locked. * * This routine guarantees that the passed entry remains valid (though * possibly extended). When merging, this routine may delete one or * both neighbors. */ void vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) { vm_map_entry_t next, prev; if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) != 0) return; prev = entry->prev; if (vm_map_mergeable_neighbors(prev, entry)) { vm_map_entry_unlink(map, prev, UNLINK_MERGE_NEXT); vm_map_merged_neighbor_dispose(map, prev); } next = entry->next; if (vm_map_mergeable_neighbors(entry, next)) { vm_map_entry_unlink(map, next, UNLINK_MERGE_PREV); vm_map_merged_neighbor_dispose(map, next); } } /* * vm_map_clip_start: [ internal use only ] * * Asserts that the given entry begins at or after * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_start(map, entry, startaddr) \ { \ if (startaddr > entry->start) \ _vm_map_clip_start(map, entry, startaddr); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) { vm_map_entry_t new_entry; VM_MAP_ASSERT_LOCKED(map); KASSERT(entry->end > start && entry->start < start, ("_vm_map_clip_start: invalid clip of entry %p", entry)); /* * Split off the front portion -- note that we must insert the new * entry BEFORE this one, so that this entry has the specified * starting address. */ vm_map_simplify_entry(map, entry); /* * If there is no object backing this entry, we might as well create * one now. If we defer it, an object can get created after the map * is clipped, and individual objects will be created for the split-up * map. This is a bit of a hack, but is also about the best place to * put this improvement. */ if (entry->object.vm_object == NULL && !map->system_map && (entry->eflags & MAP_ENTRY_GUARD) == 0) { vm_object_t object; object = vm_object_allocate(OBJT_DEFAULT, atop(entry->end - entry->start)); entry->object.vm_object = object; entry->offset = 0; if (entry->cred != NULL) { object->cred = entry->cred; object->charge = entry->end - entry->start; entry->cred = NULL; } } else if (entry->object.vm_object != NULL && ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && entry->cred != NULL) { VM_OBJECT_WLOCK(entry->object.vm_object); KASSERT(entry->object.vm_object->cred == NULL, ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry)); entry->object.vm_object->cred = entry->cred; entry->object.vm_object->charge = entry->end - entry->start; VM_OBJECT_WUNLOCK(entry->object.vm_object); entry->cred = NULL; } new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->end = start; entry->offset += (start - entry->start); entry->start = start; if (new_entry->cred != NULL) crhold(entry->cred); vm_map_entry_link(map, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); /* * The object->un_pager.vnp.writemappings for the * object of MAP_ENTRY_VN_WRITECNT type entry shall be * kept as is here. The virtual pages are * re-distributed among the clipped entries, so the sum is * left the same. */ } } /* * vm_map_clip_end: [ internal use only ] * * Asserts that the given entry ends at or before * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_end(map, entry, endaddr) \ { \ if ((endaddr) < (entry->end)) \ _vm_map_clip_end((map), (entry), (endaddr)); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) { vm_map_entry_t new_entry; VM_MAP_ASSERT_LOCKED(map); KASSERT(entry->start < end && entry->end > end, ("_vm_map_clip_end: invalid clip of entry %p", entry)); /* * If there is no object backing this entry, we might as well create * one now. If we defer it, an object can get created after the map * is clipped, and individual objects will be created for the split-up * map. This is a bit of a hack, but is also about the best place to * put this improvement. */ if (entry->object.vm_object == NULL && !map->system_map && (entry->eflags & MAP_ENTRY_GUARD) == 0) { vm_object_t object; object = vm_object_allocate(OBJT_DEFAULT, atop(entry->end - entry->start)); entry->object.vm_object = object; entry->offset = 0; if (entry->cred != NULL) { object->cred = entry->cred; object->charge = entry->end - entry->start; entry->cred = NULL; } } else if (entry->object.vm_object != NULL && ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && entry->cred != NULL) { VM_OBJECT_WLOCK(entry->object.vm_object); KASSERT(entry->object.vm_object->cred == NULL, ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry)); entry->object.vm_object->cred = entry->cred; entry->object.vm_object->charge = entry->end - entry->start; VM_OBJECT_WUNLOCK(entry->object.vm_object); entry->cred = NULL; } /* * Create a new entry and insert it AFTER the specified entry */ new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->start = entry->end = end; new_entry->offset += (end - entry->start); if (new_entry->cred != NULL) crhold(entry->cred); vm_map_entry_link(map, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); } } /* * vm_map_submap: [ kernel use only ] * * Mark the given range as handled by a subordinate map. * * This range must have been created with vm_map_find, * and no other operations may have been performed on this * range prior to calling vm_map_submap. * * Only a limited number of operations can be performed * within this rage after calling vm_map_submap: * vm_fault * [Don't try vm_map_copy!] * * To remove a submapping, one must first remove the * range from the superior map, and then destroy the * submap (if desired). [Better yet, don't try it.] */ int vm_map_submap( vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap) { vm_map_entry_t entry; int result; result = KERN_INVALID_ARGUMENT; vm_map_lock(submap); submap->flags |= MAP_IS_SUB_MAP; vm_map_unlock(submap); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else entry = entry->next; vm_map_clip_end(map, entry, end); if ((entry->start == start) && (entry->end == end) && ((entry->eflags & MAP_ENTRY_COW) == 0) && (entry->object.vm_object == NULL)) { entry->object.sub_map = submap; entry->eflags |= MAP_ENTRY_IS_SUB_MAP; result = KERN_SUCCESS; } vm_map_unlock(map); if (result != KERN_SUCCESS) { vm_map_lock(submap); submap->flags &= ~MAP_IS_SUB_MAP; vm_map_unlock(submap); } return (result); } /* * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified */ #define MAX_INIT_PT 96 /* * vm_map_pmap_enter: * * Preload the specified map's pmap with mappings to the specified * object's memory-resident pages. No further physical pages are * allocated, and no further virtual pages are retrieved from secondary * storage. If the specified flags include MAP_PREFAULT_PARTIAL, then a * limited number of page mappings are created at the low-end of the * specified address range. (For this purpose, a superpage mapping * counts as one page mapping.) Otherwise, all resident pages within * the specified address range are mapped. */ static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) { vm_offset_t start; vm_page_t p, p_start; vm_pindex_t mask, psize, threshold, tmpidx; if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) return; VM_OBJECT_RLOCK(object); if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { VM_OBJECT_RUNLOCK(object); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { pmap_object_init_pt(map->pmap, addr, object, pindex, size); VM_OBJECT_WUNLOCK(object); return; } VM_OBJECT_LOCK_DOWNGRADE(object); } psize = atop(size); if (psize + pindex > object->size) { if (object->size < pindex) { VM_OBJECT_RUNLOCK(object); return; } psize = object->size - pindex; } start = 0; p_start = NULL; threshold = MAX_INIT_PT; p = vm_page_find_least(object, pindex); /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ for (; p != NULL && (tmpidx = p->pindex - pindex) < psize; p = TAILQ_NEXT(p, listq)) { /* * don't allow an madvise to blow away our really * free pages allocating pv entries. */ if (((flags & MAP_PREFAULT_MADVISE) != 0 && vm_page_count_severe()) || ((flags & MAP_PREFAULT_PARTIAL) != 0 && tmpidx >= threshold)) { psize = tmpidx; break; } if (p->valid == VM_PAGE_BITS_ALL) { if (p_start == NULL) { start = addr + ptoa(tmpidx); p_start = p; } /* Jump ahead if a superpage mapping is possible. */ if (p->psind > 0 && ((addr + ptoa(tmpidx)) & (pagesizes[p->psind] - 1)) == 0) { mask = atop(pagesizes[p->psind]) - 1; if (tmpidx + mask < psize && vm_page_ps_test(p, PS_ALL_VALID, NULL)) { p += mask; threshold += mask; } } } else if (p_start != NULL) { pmap_enter_object(map->pmap, start, addr + ptoa(tmpidx), p_start, prot); p_start = NULL; } } if (p_start != NULL) pmap_enter_object(map->pmap, start, addr + ptoa(psize), p_start, prot); VM_OBJECT_RUNLOCK(object); } /* * vm_map_protect: * * Sets the protection of the specified address * region in the target map. If "set_max" is * specified, the maximum protection is to be set; * otherwise, only the current protection is affected. */ int vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_prot_t new_prot, boolean_t set_max) { - vm_map_entry_t current, entry; + vm_map_entry_t current, entry, in_tran; vm_object_t obj; struct ucred *cred; vm_prot_t old_prot; if (start == end) return (KERN_SUCCESS); +again: + in_tran = NULL; vm_map_lock(map); /* * Ensure that we are not concurrently wiring pages. vm_map_wire() may * need to fault pages into the map and will drop the map lock while * doing so, and the VM object may end up in an inconsistent state if we * update the protection on the map entry in between faults. */ vm_map_wait_busy(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else { entry = entry->next; } /* * Make a first pass to check for protection violations. */ for (current = entry; current->start < end; current = current->next) { if ((current->eflags & MAP_ENTRY_GUARD) != 0) continue; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); } if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } + if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) + in_tran = entry; + } + + /* + * Postpone the operation until all in transition map entries + * are stabilized. In-transition entry might already have its + * pages wired and wired_count incremented, but + * MAP_ENTRY_USER_WIRED flag not yet set, and visible to other + * threads because the map lock is dropped. In this case we + * would miss our call to vm_fault_copy_entry(). + */ + if (in_tran != NULL) { + in_tran->eflags |= MAP_ENTRY_NEEDS_WAKEUP; + vm_map_unlock_and_wait(map, 0); + goto again; } /* * Do an accounting pass for private read-only mappings that * now will do cow due to allowed write (e.g. debugger sets * breakpoint on text segment) */ for (current = entry; current->start < end; current = current->next) { vm_map_clip_end(map, current, end); if (set_max || ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 || ENTRY_CHARGED(current) || (current->eflags & MAP_ENTRY_GUARD) != 0) { continue; } cred = curthread->td_ucred; obj = current->object.vm_object; if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) { if (!swap_reserve(current->end - current->start)) { vm_map_unlock(map); return (KERN_RESOURCE_SHORTAGE); } crhold(cred); current->cred = cred; continue; } VM_OBJECT_WLOCK(obj); if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) { VM_OBJECT_WUNLOCK(obj); continue; } /* * Charge for the whole object allocation now, since * we cannot distinguish between non-charged and * charged clipped mapping of the same object later. */ KASSERT(obj->charge == 0, ("vm_map_protect: object %p overcharged (entry %p)", obj, current)); if (!swap_reserve(ptoa(obj->size))) { VM_OBJECT_WUNLOCK(obj); vm_map_unlock(map); return (KERN_RESOURCE_SHORTAGE); } crhold(cred); obj->cred = cred; obj->charge = ptoa(obj->size); VM_OBJECT_WUNLOCK(obj); } /* * Go back and fix up protections. [Note that clipping is not * necessary the second time.] */ for (current = entry; current->start < end; current = current->next) { if ((current->eflags & MAP_ENTRY_GUARD) != 0) continue; old_prot = current->protection; if (set_max) current->protection = (current->max_protection = new_prot) & old_prot; else current->protection = new_prot; /* * For user wired map entries, the normal lazy evaluation of * write access upgrades through soft page faults is * undesirable. Instead, immediately copy any pages that are * copy-on-write and enable write access in the physical map. */ if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 && (current->protection & VM_PROT_WRITE) != 0 && (old_prot & VM_PROT_WRITE) == 0) vm_fault_copy_entry(map, map, current, current, NULL); /* * When restricting access, update the physical map. Worry * about copy-on-write here. */ if ((old_prot & ~current->protection) != 0) { #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ VM_PROT_ALL) pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(current)); #undef MASK } vm_map_simplify_entry(map, current); } vm_map_unlock(map); return (KERN_SUCCESS); } /* * vm_map_madvise: * * This routine traverses a processes map handling the madvise * system call. Advisories are classified as either those effecting * the vm_map_entry structure, or those effecting the underlying * objects. */ int vm_map_madvise( vm_map_t map, vm_offset_t start, vm_offset_t end, int behav) { vm_map_entry_t current, entry; bool modify_map; /* * Some madvise calls directly modify the vm_map_entry, in which case * we need to use an exclusive lock on the map and we need to perform * various clipping operations. Otherwise we only need a read-lock * on the map. */ switch(behav) { case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: case MADV_NOSYNC: case MADV_AUTOSYNC: case MADV_NOCORE: case MADV_CORE: if (start == end) return (0); modify_map = true; vm_map_lock(map); break; case MADV_WILLNEED: case MADV_DONTNEED: case MADV_FREE: if (start == end) return (0); modify_map = false; vm_map_lock_read(map); break; default: return (EINVAL); } /* * Locate starting entry and clip if necessary. */ VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { if (modify_map) vm_map_clip_start(map, entry, start); } else { entry = entry->next; } if (modify_map) { /* * madvise behaviors that are implemented in the vm_map_entry. * * We clip the vm_map_entry so that behavioral changes are * limited to the specified address range. */ for (current = entry; current->start < end; current = current->next) { if (current->eflags & MAP_ENTRY_IS_SUB_MAP) continue; vm_map_clip_end(map, current, end); switch (behav) { case MADV_NORMAL: vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); break; case MADV_SEQUENTIAL: vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); break; case MADV_RANDOM: vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); break; case MADV_NOSYNC: current->eflags |= MAP_ENTRY_NOSYNC; break; case MADV_AUTOSYNC: current->eflags &= ~MAP_ENTRY_NOSYNC; break; case MADV_NOCORE: current->eflags |= MAP_ENTRY_NOCOREDUMP; break; case MADV_CORE: current->eflags &= ~MAP_ENTRY_NOCOREDUMP; break; default: break; } vm_map_simplify_entry(map, current); } vm_map_unlock(map); } else { vm_pindex_t pstart, pend; /* * madvise behaviors that are implemented in the underlying * vm_object. * * Since we don't clip the vm_map_entry, we have to clip * the vm_object pindex and count. */ for (current = entry; current->start < end; current = current->next) { vm_offset_t useEnd, useStart; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) continue; pstart = OFF_TO_IDX(current->offset); pend = pstart + atop(current->end - current->start); useStart = current->start; useEnd = current->end; if (current->start < start) { pstart += atop(start - current->start); useStart = start; } if (current->end > end) { pend -= atop(current->end - end); useEnd = end; } if (pstart >= pend) continue; /* * Perform the pmap_advise() before clearing * PGA_REFERENCED in vm_page_advise(). Otherwise, a * concurrent pmap operation, such as pmap_remove(), * could clear a reference in the pmap and set * PGA_REFERENCED on the page before the pmap_advise() * had completed. Consequently, the page would appear * referenced based upon an old reference that * occurred before this pmap_advise() ran. */ if (behav == MADV_DONTNEED || behav == MADV_FREE) pmap_advise(map->pmap, useStart, useEnd, behav); vm_object_madvise(current->object.vm_object, pstart, pend, behav); /* * Pre-populate paging structures in the * WILLNEED case. For wired entries, the * paging structures are already populated. */ if (behav == MADV_WILLNEED && current->wired_count == 0) { vm_map_pmap_enter(map, useStart, current->protection, current->object.vm_object, pstart, ptoa(pend - pstart), MAP_PREFAULT_MADVISE ); } } vm_map_unlock_read(map); } return (0); } /* * vm_map_inherit: * * Sets the inheritance of the specified address * range in the target map. Inheritance * affects how the map will be shared with * child maps at the time of vmspace_fork. */ int vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_inherit_t new_inheritance) { vm_map_entry_t entry; vm_map_entry_t temp_entry; switch (new_inheritance) { case VM_INHERIT_NONE: case VM_INHERIT_COPY: case VM_INHERIT_SHARE: case VM_INHERIT_ZERO: break; default: return (KERN_INVALID_ARGUMENT); } if (start == end) return (KERN_SUCCESS); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; vm_map_clip_start(map, entry, start); } else entry = temp_entry->next; while (entry->start < end) { vm_map_clip_end(map, entry, end); if ((entry->eflags & MAP_ENTRY_GUARD) == 0 || new_inheritance != VM_INHERIT_ZERO) entry->inheritance = new_inheritance; vm_map_simplify_entry(map, entry); entry = entry->next; } vm_map_unlock(map); return (KERN_SUCCESS); } /* * vm_map_unwire: * * Implements both kernel and user unwiring. */ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t saved_start; unsigned int last_timestamp; int rv; boolean_t need_wakeup, result, user_unwire; if (start == end) return (KERN_SUCCESS); user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) first_entry = first_entry->next; else { vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } } last_timestamp = map->timestamp; entry = first_entry; while (entry->start < end) { if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { /* * We have not yet clipped the entry. */ saved_start = (start >= entry->start) ? start : entry->start; entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; if (vm_map_unlock_and_wait(map, 0)) { /* * Allow interruption of user unwiring? */ } vm_map_lock(map); if (last_timestamp+1 != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. * Specifically, the entry may have been * clipped, merged, or deleted. */ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) tmp_entry = tmp_entry->next; else { if (saved_start == start) { /* * First_entry has been deleted. */ vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } end = saved_start; rv = KERN_INVALID_ADDRESS; goto done; } } if (entry == first_entry) first_entry = tmp_entry; else first_entry = NULL; entry = tmp_entry; } last_timestamp = map->timestamp; continue; } vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); /* * Mark the entry in case the map lock is released. (See * above.) */ KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && entry->wiring_thread == NULL, ("owned map entry %p", entry)); entry->eflags |= MAP_ENTRY_IN_TRANSITION; entry->wiring_thread = curthread; /* * Check the map for holes in the specified region. * If VM_MAP_WIRE_HOLESOK was specified, skip this check. */ if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && (entry->end < end && entry->next->start > entry->end)) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; } /* * If system unwiring, require that the entry is system wired. */ if (!user_unwire && vm_map_entry_system_wired_count(entry) == 0) { end = entry->end; rv = KERN_INVALID_ARGUMENT; goto done; } entry = entry->next; } rv = KERN_SUCCESS; done: need_wakeup = FALSE; if (first_entry == NULL) { result = vm_map_lookup_entry(map, start, &first_entry); if (!result && (flags & VM_MAP_WIRE_HOLESOK)) first_entry = first_entry->next; else KASSERT(result, ("vm_map_unwire: lookup failed")); } for (entry = first_entry; entry->start < end; entry = entry->next) { /* * If VM_MAP_WIRE_HOLESOK was specified, an empty * space in the unwired region could have been mapped * while the map lock was dropped for draining * MAP_ENTRY_IN_TRANSITION. Moreover, another thread * could be simultaneously wiring this new mapping * entry. Detect these cases and skip any entries * marked as in transition by us. */ if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || entry->wiring_thread != curthread) { KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, ("vm_map_unwire: !HOLESOK and new/changed entry")); continue; } if (rv == KERN_SUCCESS && (!user_unwire || (entry->eflags & MAP_ENTRY_USER_WIRED))) { if (user_unwire) entry->eflags &= ~MAP_ENTRY_USER_WIRED; if (entry->wired_count == 1) vm_map_entry_unwire(map, entry); else entry->wired_count--; } KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, ("vm_map_unwire: in-transition flag missing %p", entry)); KASSERT(entry->wiring_thread == curthread, ("vm_map_unwire: alien wire %p", entry)); entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; entry->wiring_thread = NULL; if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; need_wakeup = TRUE; } vm_map_simplify_entry(map, entry); } vm_map_unlock(map); if (need_wakeup) vm_map_wakeup(map); return (rv); } /* * vm_map_wire_entry_failure: * * Handle a wiring failure on the given entry. * * The map should be locked. */ static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, vm_offset_t failed_addr) { VM_MAP_ASSERT_LOCKED(map); KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && entry->wired_count == 1, ("vm_map_wire_entry_failure: entry %p isn't being wired", entry)); KASSERT(failed_addr < entry->end, ("vm_map_wire_entry_failure: entry %p was fully wired", entry)); /* * If any pages at the start of this entry were successfully wired, * then unwire them. */ if (failed_addr > entry->start) { pmap_unwire(map->pmap, entry->start, failed_addr); vm_object_unwire(entry->object.vm_object, entry->offset, failed_addr - entry->start, PQ_ACTIVE); } /* * Assign an out-of-range value to represent the failure to wire this * entry. */ entry->wired_count = -1; } /* * vm_map_wire: * * Implements both kernel and user wiring. */ int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t faddr, saved_end, saved_start; unsigned int last_timestamp; int rv; boolean_t need_wakeup, result, user_wire; vm_prot_t prot; if (start == end) return (KERN_SUCCESS); prot = 0; if (flags & VM_MAP_WIRE_WRITE) prot |= VM_PROT_WRITE; user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) first_entry = first_entry->next; else { vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } } last_timestamp = map->timestamp; entry = first_entry; while (entry->start < end) { if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { /* * We have not yet clipped the entry. */ saved_start = (start >= entry->start) ? start : entry->start; entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; if (vm_map_unlock_and_wait(map, 0)) { /* * Allow interruption of user wiring? */ } vm_map_lock(map); if (last_timestamp + 1 != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. * Specifically, the entry may have been * clipped, merged, or deleted. */ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) tmp_entry = tmp_entry->next; else { if (saved_start == start) { /* * first_entry has been deleted. */ vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } end = saved_start; rv = KERN_INVALID_ADDRESS; goto done; } } if (entry == first_entry) first_entry = tmp_entry; else first_entry = NULL; entry = tmp_entry; } last_timestamp = map->timestamp; continue; } vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); /* * Mark the entry in case the map lock is released. (See * above.) */ KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && entry->wiring_thread == NULL, ("owned map entry %p", entry)); entry->eflags |= MAP_ENTRY_IN_TRANSITION; entry->wiring_thread = curthread; if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || (entry->protection & prot) != prot) { entry->eflags |= MAP_ENTRY_WIRE_SKIPPED; if ((flags & VM_MAP_WIRE_HOLESOK) == 0) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; } goto next_entry; } if (entry->wired_count == 0) { entry->wired_count++; saved_start = entry->start; saved_end = entry->end; /* * Release the map lock, relying on the in-transition * mark. Mark the map busy for fork. */ vm_map_busy(map); vm_map_unlock(map); faddr = saved_start; do { /* * Simulate a fault to get the page and enter * it into the physical map. */ if ((rv = vm_fault(map, faddr, VM_PROT_NONE, VM_FAULT_WIRE)) != KERN_SUCCESS) break; } while ((faddr += PAGE_SIZE) < saved_end); vm_map_lock(map); vm_map_unbusy(map); if (last_timestamp + 1 != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. The entry * may have been clipped, but NOT merged or * deleted. */ result = vm_map_lookup_entry(map, saved_start, &tmp_entry); KASSERT(result, ("vm_map_wire: lookup failed")); if (entry == first_entry) first_entry = tmp_entry; else first_entry = NULL; entry = tmp_entry; while (entry->end < saved_end) { /* * In case of failure, handle entries * that were not fully wired here; * fully wired entries are handled * later. */ if (rv != KERN_SUCCESS && faddr < entry->end) vm_map_wire_entry_failure(map, entry, faddr); entry = entry->next; } } last_timestamp = map->timestamp; if (rv != KERN_SUCCESS) { vm_map_wire_entry_failure(map, entry, faddr); end = entry->end; goto done; } } else if (!user_wire || (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { entry->wired_count++; } /* * Check the map for holes in the specified region. * If VM_MAP_WIRE_HOLESOK was specified, skip this check. */ next_entry: if ((flags & VM_MAP_WIRE_HOLESOK) == 0 && entry->end < end && entry->next->start > entry->end) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; } entry = entry->next; } rv = KERN_SUCCESS; done: need_wakeup = FALSE; if (first_entry == NULL) { result = vm_map_lookup_entry(map, start, &first_entry); if (!result && (flags & VM_MAP_WIRE_HOLESOK)) first_entry = first_entry->next; else KASSERT(result, ("vm_map_wire: lookup failed")); } for (entry = first_entry; entry->start < end; entry = entry->next) { /* * If VM_MAP_WIRE_HOLESOK was specified, an empty * space in the unwired region could have been mapped * while the map lock was dropped for faulting in the * pages or draining MAP_ENTRY_IN_TRANSITION. * Moreover, another thread could be simultaneously * wiring this new mapping entry. Detect these cases * and skip any entries marked as in transition not by us. */ if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || entry->wiring_thread != curthread) { KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, ("vm_map_wire: !HOLESOK and new/changed entry")); continue; } if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) goto next_entry_done; if (rv == KERN_SUCCESS) { if (user_wire) entry->eflags |= MAP_ENTRY_USER_WIRED; } else if (entry->wired_count == -1) { /* * Wiring failed on this entry. Thus, unwiring is * unnecessary. */ entry->wired_count = 0; } else if (!user_wire || (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { /* * Undo the wiring. Wiring succeeded on this entry * but failed on a later entry. */ if (entry->wired_count == 1) vm_map_entry_unwire(map, entry); else entry->wired_count--; } next_entry_done: KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, ("vm_map_wire: in-transition flag missing %p", entry)); KASSERT(entry->wiring_thread == curthread, ("vm_map_wire: alien wire %p", entry)); entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_WIRE_SKIPPED); entry->wiring_thread = NULL; if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; need_wakeup = TRUE; } vm_map_simplify_entry(map, entry); } vm_map_unlock(map); if (need_wakeup) vm_map_wakeup(map); return (rv); } /* * vm_map_sync * * Push any dirty cached pages in the address range to their pager. * If syncio is TRUE, dirty pages are written synchronously. * If invalidate is TRUE, any cached pages are freed as well. * * If the size of the region from start to end is zero, we are * supposed to flush all modified pages within the region containing * start. Unfortunately, a region can be split or coalesced with * neighboring regions, making it difficult to determine what the * original region was. Therefore, we approximate this requirement by * flushing the current region containing start. * * Returns an error if any part of the specified range is not mapped. */ int vm_map_sync( vm_map_t map, vm_offset_t start, vm_offset_t end, boolean_t syncio, boolean_t invalidate) { vm_map_entry_t current; vm_map_entry_t entry; vm_size_t size; vm_object_t object; vm_ooffset_t offset; unsigned int last_timestamp; boolean_t failed; vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &entry)) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } else if (start == end) { start = entry->start; end = entry->end; } /* * Make a first pass to check for user-wired memory and holes. */ for (current = entry; current->start < end; current = current->next) { if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) { vm_map_unlock_read(map); return (KERN_INVALID_ARGUMENT); } if (end > current->end && current->end != current->next->start) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } } if (invalidate) pmap_remove(map->pmap, start, end); failed = FALSE; /* * Make a second pass, cleaning/uncaching pages from the indicated * objects as we go. */ for (current = entry; current->start < end;) { offset = current->offset + (start - current->start); size = (end <= current->end ? end : current->end) - start; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { vm_map_t smap; vm_map_entry_t tentry; vm_size_t tsize; smap = current->object.sub_map; vm_map_lock_read(smap); (void) vm_map_lookup_entry(smap, offset, &tentry); tsize = tentry->end - offset; if (tsize < size) size = tsize; object = tentry->object.vm_object; offset = tentry->offset + (offset - tentry->start); vm_map_unlock_read(smap); } else { object = current->object.vm_object; } vm_object_reference(object); last_timestamp = map->timestamp; vm_map_unlock_read(map); if (!vm_object_sync(object, offset, size, syncio, invalidate)) failed = TRUE; start += size; vm_object_deallocate(object); vm_map_lock_read(map); if (last_timestamp == map->timestamp || !vm_map_lookup_entry(map, start, ¤t)) current = current->next; } vm_map_unlock_read(map); return (failed ? KERN_FAILURE : KERN_SUCCESS); } /* * vm_map_entry_unwire: [ internal use only ] * * Make the region specified by this entry pageable. * * The map in question should be locked. * [This is the reason for this routine's existence.] */ static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { VM_MAP_ASSERT_LOCKED(map); KASSERT(entry->wired_count > 0, ("vm_map_entry_unwire: entry %p isn't wired", entry)); pmap_unwire(map->pmap, entry->start, entry->end); vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - entry->start, PQ_ACTIVE); entry->wired_count = 0; } static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) vm_object_deallocate(entry->object.vm_object); uma_zfree(system_map ? kmapentzone : mapentzone, entry); } /* * vm_map_entry_delete: [ internal use only ] * * Deallocate the given entry from the target map. */ static void vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) { vm_object_t object; vm_pindex_t offidxstart, offidxend, count, size1; vm_size_t size; vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE); object = entry->object.vm_object; if ((entry->eflags & MAP_ENTRY_GUARD) != 0) { MPASS(entry->cred == NULL); MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0); MPASS(object == NULL); vm_map_entry_deallocate(entry, map->system_map); return; } size = entry->end - entry->start; map->size -= size; if (entry->cred != NULL) { swap_release_by_cred(size, entry->cred); crfree(entry->cred); } if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && (object != NULL)) { KASSERT(entry->cred == NULL || object->cred == NULL || (entry->eflags & MAP_ENTRY_NEEDS_COPY), ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry)); count = atop(size); offidxstart = OFF_TO_IDX(entry->offset); offidxend = offidxstart + count; VM_OBJECT_WLOCK(object); if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT | OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || object == kernel_object)) { vm_object_collapse(object); /* * The option OBJPR_NOTMAPPED can be passed here * because vm_map_delete() already performed * pmap_remove() on the only mapping to this range * of pages. */ vm_object_page_remove(object, offidxstart, offidxend, OBJPR_NOTMAPPED); if (object->type == OBJT_SWAP) swap_pager_freespace(object, offidxstart, count); if (offidxend >= object->size && offidxstart < object->size) { size1 = object->size; object->size = offidxstart; if (object->cred != NULL) { size1 -= object->size; KASSERT(object->charge >= ptoa(size1), ("object %p charge < 0", object)); swap_release_by_cred(ptoa(size1), object->cred); object->charge -= ptoa(size1); } } } VM_OBJECT_WUNLOCK(object); } else entry->object.vm_object = NULL; if (map->system_map) vm_map_entry_deallocate(entry, TRUE); else { entry->next = curthread->td_map_def_user; curthread->td_map_def_user = entry; } } /* * vm_map_delete: [ internal use only ] * * Deallocates the given address range from the target * map. */ int vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) { vm_map_entry_t entry; vm_map_entry_t first_entry; VM_MAP_ASSERT_LOCKED(map); if (start == end) return (KERN_SUCCESS); /* * Find the start of the region, and clip it */ if (!vm_map_lookup_entry(map, start, &first_entry)) entry = first_entry->next; else { entry = first_entry; vm_map_clip_start(map, entry, start); } /* * Step through all entries in this region */ while (entry->start < end) { vm_map_entry_t next; /* * Wait for wiring or unwiring of an entry to complete. * Also wait for any system wirings to disappear on * user maps. */ if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || (vm_map_pmap(map) != kernel_pmap && vm_map_entry_system_wired_count(entry) != 0)) { unsigned int last_timestamp; vm_offset_t saved_start; vm_map_entry_t tmp_entry; saved_start = entry->start; entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; last_timestamp = map->timestamp; (void) vm_map_unlock_and_wait(map, 0); vm_map_lock(map); if (last_timestamp + 1 != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. * Specifically, the entry may have been * clipped, merged, or deleted. */ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) entry = tmp_entry->next; else { entry = tmp_entry; vm_map_clip_start(map, entry, saved_start); } } continue; } vm_map_clip_end(map, entry, end); next = entry->next; /* * Unwire before removing addresses from the pmap; otherwise, * unwiring will put the entries back in the pmap. */ if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); /* * Remove mappings for the pages, but only if the * mappings could exist. For instance, it does not * make sense to call pmap_remove() for guard entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || entry->object.vm_object != NULL) pmap_remove(map->pmap, entry->start, entry->end); if (entry->end == map->anon_loc) map->anon_loc = entry->start; /* * Delete the entry only after removing all pmap * entries pointing to its pages. (Otherwise, its * page frames may be reallocated, and any modify bits * will be set in the wrong object!) */ vm_map_entry_delete(map, entry); entry = next; } return (KERN_SUCCESS); } /* * vm_map_remove: * * Remove the given address range from the target map. * This is the exported form of vm_map_delete. */ int vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) { int result; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); result = vm_map_delete(map, start, end); vm_map_unlock(map); return (result); } /* * vm_map_check_protection: * * Assert that the target map allows the specified privilege on the * entire address region given. The entire region must be allocated. * * WARNING! This code does not and should not check whether the * contents of the region is accessible. For example a smaller file * might be mapped into a larger address space. * * NOTE! This code is also called by munmap(). * * The map must be locked. A read lock is sufficient. */ boolean_t vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_prot_t protection) { vm_map_entry_t entry; vm_map_entry_t tmp_entry; if (!vm_map_lookup_entry(map, start, &tmp_entry)) return (FALSE); entry = tmp_entry; while (start < end) { /* * No holes allowed! */ if (start < entry->start) return (FALSE); /* * Check protection associated with entry. */ if ((entry->protection & protection) != protection) return (FALSE); /* go to next entry */ start = entry->end; entry = entry->next; } return (TRUE); } /* * vm_map_copy_entry: * * Copies the contents of the source entry to the destination * entry. The entries *must* be aligned properly. */ static void vm_map_copy_entry( vm_map_t src_map, vm_map_t dst_map, vm_map_entry_t src_entry, vm_map_entry_t dst_entry, vm_ooffset_t *fork_charge) { vm_object_t src_object; vm_map_entry_t fake_entry; vm_offset_t size; struct ucred *cred; int charged; VM_MAP_ASSERT_LOCKED(dst_map); if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) return; if (src_entry->wired_count == 0 || (src_entry->protection & VM_PROT_WRITE) == 0) { /* * If the source entry is marked needs_copy, it is already * write-protected. */ if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 && (src_entry->protection & VM_PROT_WRITE) != 0) { pmap_protect(src_map->pmap, src_entry->start, src_entry->end, src_entry->protection & ~VM_PROT_WRITE); } /* * Make a copy of the object. */ size = src_entry->end - src_entry->start; if ((src_object = src_entry->object.vm_object) != NULL) { VM_OBJECT_WLOCK(src_object); charged = ENTRY_CHARGED(src_entry); if (src_object->handle == NULL && (src_object->type == OBJT_DEFAULT || src_object->type == OBJT_SWAP)) { vm_object_collapse(src_object); if ((src_object->flags & (OBJ_NOSPLIT | OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { vm_object_split(src_entry); src_object = src_entry->object.vm_object; } } vm_object_reference_locked(src_object); vm_object_clear_flag(src_object, OBJ_ONEMAPPING); if (src_entry->cred != NULL && !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { KASSERT(src_object->cred == NULL, ("OVERCOMMIT: vm_map_copy_entry: cred %p", src_object)); src_object->cred = src_entry->cred; src_object->charge = size; } VM_OBJECT_WUNLOCK(src_object); dst_entry->object.vm_object = src_object; if (charged) { cred = curthread->td_ucred; crhold(cred); dst_entry->cred = cred; *fork_charge += size; if (!(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { crhold(cred); src_entry->cred = cred; *fork_charge += size; } } src_entry->eflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; dst_entry->eflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; dst_entry->offset = src_entry->offset; if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) { /* * MAP_ENTRY_VN_WRITECNT cannot * indicate write reference from * src_entry, since the entry is * marked as needs copy. Allocate a * fake entry that is used to * decrement object->un_pager.vnp.writecount * at the appropriate time. Attach * fake_entry to the deferred list. */ fake_entry = vm_map_entry_create(dst_map); fake_entry->eflags = MAP_ENTRY_VN_WRITECNT; src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; vm_object_reference(src_object); fake_entry->object.vm_object = src_object; fake_entry->start = src_entry->start; fake_entry->end = src_entry->end; fake_entry->next = curthread->td_map_def_user; curthread->td_map_def_user = fake_entry; } pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, dst_entry->end - dst_entry->start, src_entry->start); } else { dst_entry->object.vm_object = NULL; dst_entry->offset = 0; if (src_entry->cred != NULL) { dst_entry->cred = curthread->td_ucred; crhold(dst_entry->cred); *fork_charge += size; } } } else { /* * We don't want to make writeable wired pages copy-on-write. * Immediately copy these pages into the new map by simulating * page faults. The new pages are pageable. */ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry, fork_charge); } } /* * vmspace_map_entry_forked: * Update the newly-forked vmspace each time a map entry is inherited * or copied. The values for vm_dsize and vm_tsize are approximate * (and mostly-obsolete ideas in the face of mmap(2) et al.) */ static void vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, vm_map_entry_t entry) { vm_size_t entrysize; vm_offset_t newend; if ((entry->eflags & MAP_ENTRY_GUARD) != 0) return; entrysize = entry->end - entry->start; vm2->vm_map.size += entrysize; if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) { vm2->vm_ssize += btoc(entrysize); } else if (entry->start >= (vm_offset_t)vm1->vm_daddr && entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) { newend = MIN(entry->end, (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)); vm2->vm_dsize += btoc(newend - entry->start); } else if (entry->start >= (vm_offset_t)vm1->vm_taddr && entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) { newend = MIN(entry->end, (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); vm2->vm_tsize += btoc(newend - entry->start); } } /* * vmspace_fork: * Create a new process vmspace structure and vm_map * based on those of an existing process. The new map * is based on the old map, according to the inheritance * values on the regions in that map. * * XXX It might be worth coalescing the entries added to the new vmspace. * * The source map must not be locked. */ struct vmspace * vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) { struct vmspace *vm2; vm_map_t new_map, old_map; vm_map_entry_t new_entry, old_entry; vm_object_t object; int error, locked; vm_inherit_t inh; old_map = &vm1->vm_map; /* Copy immutable fields of vm1 to vm2. */ vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), pmap_pinit); if (vm2 == NULL) return (NULL); vm2->vm_taddr = vm1->vm_taddr; vm2->vm_daddr = vm1->vm_daddr; vm2->vm_maxsaddr = vm1->vm_maxsaddr; vm_map_lock(old_map); if (old_map->busy) vm_map_wait_busy(old_map); new_map = &vm2->vm_map; locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ KASSERT(locked, ("vmspace_fork: lock failed")); error = pmap_vmspace_copy(new_map->pmap, old_map->pmap); if (error != 0) { sx_xunlock(&old_map->lock); sx_xunlock(&new_map->lock); vm_map_process_deferred(); vmspace_free(vm2); return (NULL); } new_map->anon_loc = old_map->anon_loc; old_entry = old_map->header.next; while (old_entry != &old_map->header) { if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) panic("vm_map_fork: encountered a submap"); inh = old_entry->inheritance; if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 && inh != VM_INHERIT_NONE) inh = VM_INHERIT_COPY; switch (inh) { case VM_INHERIT_NONE: break; case VM_INHERIT_SHARE: /* * Clone the entry, creating the shared object if necessary. */ object = old_entry->object.vm_object; if (object == NULL) { object = vm_object_allocate(OBJT_DEFAULT, atop(old_entry->end - old_entry->start)); old_entry->object.vm_object = object; old_entry->offset = 0; if (old_entry->cred != NULL) { object->cred = old_entry->cred; object->charge = old_entry->end - old_entry->start; old_entry->cred = NULL; } } /* * Add the reference before calling vm_object_shadow * to insure that a shadow object is created. */ vm_object_reference(object); if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { vm_object_shadow(&old_entry->object.vm_object, &old_entry->offset, old_entry->end - old_entry->start); old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; /* Transfer the second reference too. */ vm_object_reference( old_entry->object.vm_object); /* * As in vm_map_simplify_entry(), the * vnode lock will not be acquired in * this call to vm_object_deallocate(). */ vm_object_deallocate(object); object = old_entry->object.vm_object; } VM_OBJECT_WLOCK(object); vm_object_clear_flag(object, OBJ_ONEMAPPING); if (old_entry->cred != NULL) { KASSERT(object->cred == NULL, ("vmspace_fork both cred")); object->cred = old_entry->cred; object->charge = old_entry->end - old_entry->start; old_entry->cred = NULL; } /* * Assert the correct state of the vnode * v_writecount while the object is locked, to * not relock it later for the assertion * correctness. */ if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT && object->type == OBJT_VNODE) { KASSERT(((struct vnode *)object->handle)-> v_writecount > 0, ("vmspace_fork: v_writecount %p", object)); KASSERT(object->un_pager.vnp.writemappings > 0, ("vmspace_fork: vnp.writecount %p", object)); } VM_OBJECT_WUNLOCK(object); /* * Clone the entry, referencing the shared object. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION); new_entry->wiring_thread = NULL; new_entry->wired_count = 0; if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { vnode_pager_update_writecount(object, new_entry->start, new_entry->end); } /* * Insert the entry into the new map -- we know we're * inserting at the end of the new map. */ vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); /* * Update the physical map */ pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, (old_entry->end - old_entry->start), old_entry->start); break; case VM_INHERIT_COPY: /* * Clone the entry and link into the map. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; /* * Copied entry is COW over the old object. */ new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); new_entry->wiring_thread = NULL; new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->cred = NULL; vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); vm_map_copy_entry(old_map, new_map, old_entry, new_entry, fork_charge); break; case VM_INHERIT_ZERO: /* * Create a new anonymous mapping entry modelled from * the old one. */ new_entry = vm_map_entry_create(new_map); memset(new_entry, 0, sizeof(*new_entry)); new_entry->start = old_entry->start; new_entry->end = old_entry->end; new_entry->eflags = old_entry->eflags & ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); new_entry->protection = old_entry->protection; new_entry->max_protection = old_entry->max_protection; new_entry->inheritance = VM_INHERIT_ZERO; vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); new_entry->cred = curthread->td_ucred; crhold(new_entry->cred); *fork_charge += (new_entry->end - new_entry->start); break; } old_entry = old_entry->next; } /* * Use inlined vm_map_unlock() to postpone handling the deferred * map entries, which cannot be done until both old_map and * new_map locks are released. */ sx_xunlock(&old_map->lock); sx_xunlock(&new_map->lock); vm_map_process_deferred(); return (vm2); } /* * Create a process's stack for exec_new_vmspace(). This function is never * asked to wire the newly created stack. */ int vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_prot_t prot, vm_prot_t max, int cow) { vm_size_t growsize, init_ssize; rlim_t vmemlim; int rv; MPASS((map->flags & MAP_WIREFUTURE) == 0); growsize = sgrowsiz; init_ssize = (max_ssize < growsize) ? max_ssize : growsize; vm_map_lock(map); vmemlim = lim_cur(curthread, RLIMIT_VMEM); /* If we would blow our VMEM resource limit, no go */ if (map->size + init_ssize > vmemlim) { rv = KERN_NO_SPACE; goto out; } rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot, max, cow); out: vm_map_unlock(map); return (rv); } static int stack_guard_page = 1; SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN, &stack_guard_page, 0, "Specifies the number of guard pages for a stack that grows"); static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow) { vm_map_entry_t new_entry, prev_entry; vm_offset_t bot, gap_bot, gap_top, top; vm_size_t init_ssize, sgp; int orient, rv; /* * The stack orientation is piggybacked with the cow argument. * Extract it into orient and mask the cow argument so that we * don't pass it around further. */ orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP); KASSERT(orient != 0, ("No stack grow direction")); KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP), ("bi-dir stack")); if (addrbos < vm_map_min(map) || addrbos + max_ssize > vm_map_max(map) || addrbos + max_ssize <= addrbos) return (KERN_INVALID_ADDRESS); sgp = (vm_size_t)stack_guard_page * PAGE_SIZE; if (sgp >= max_ssize) return (KERN_INVALID_ARGUMENT); init_ssize = growsize; if (max_ssize < init_ssize + sgp) init_ssize = max_ssize - sgp; /* If addr is already mapped, no go */ if (vm_map_lookup_entry(map, addrbos, &prev_entry)) return (KERN_NO_SPACE); /* * If we can't accommodate max_ssize in the current mapping, no go. */ if (prev_entry->next->start < addrbos + max_ssize) return (KERN_NO_SPACE); /* * We initially map a stack of only init_ssize. We will grow as * needed later. Depending on the orientation of the stack (i.e. * the grow direction) we either map at the top of the range, the * bottom of the range or in the middle. * * Note: we would normally expect prot and max to be VM_PROT_ALL, * and cow to be 0. Possibly we should eliminate these as input * parameters, and just pass these values here in the insert call. */ if (orient == MAP_STACK_GROWS_DOWN) { bot = addrbos + max_ssize - init_ssize; top = bot + init_ssize; gap_bot = addrbos; gap_top = bot; } else /* if (orient == MAP_STACK_GROWS_UP) */ { bot = addrbos; top = bot + init_ssize; gap_bot = top; gap_top = addrbos + max_ssize; } rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow); if (rv != KERN_SUCCESS) return (rv); new_entry = prev_entry->next; KASSERT(new_entry->end == top || new_entry->start == bot, ("Bad entry start/end for new stack entry")); KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 || (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0, ("new entry lacks MAP_ENTRY_GROWS_DOWN")); KASSERT((orient & MAP_STACK_GROWS_UP) == 0 || (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0, ("new entry lacks MAP_ENTRY_GROWS_UP")); rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ? MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP)); if (rv != KERN_SUCCESS) (void)vm_map_delete(map, bot, top); return (rv); } /* * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we * successfully grow the stack. */ static int vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) { vm_map_entry_t stack_entry; struct proc *p; struct vmspace *vm; struct ucred *cred; vm_offset_t gap_end, gap_start, grow_start; size_t grow_amount, guard, max_grow; rlim_t lmemlim, stacklim, vmemlim; int rv, rv1; bool gap_deleted, grow_down, is_procstack; #ifdef notyet uint64_t limit; #endif #ifdef RACCT int error; #endif p = curproc; vm = p->p_vmspace; /* * Disallow stack growth when the access is performed by a * debugger or AIO daemon. The reason is that the wrong * resource limits are applied. */ if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL) return (KERN_FAILURE); MPASS(!map->system_map); guard = stack_guard_page * PAGE_SIZE; lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK); stacklim = lim_cur(curthread, RLIMIT_STACK); vmemlim = lim_cur(curthread, RLIMIT_VMEM); retry: /* If addr is not in a hole for a stack grow area, no need to grow. */ if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry)) return (KERN_FAILURE); if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0) return (KERN_SUCCESS); if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) { stack_entry = gap_entry->next; if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 || stack_entry->start != gap_entry->end) return (KERN_FAILURE); grow_amount = round_page(stack_entry->start - addr); grow_down = true; } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) { stack_entry = gap_entry->prev; if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 || stack_entry->end != gap_entry->start) return (KERN_FAILURE); grow_amount = round_page(addr + 1 - stack_entry->end); grow_down = false; } else { return (KERN_FAILURE); } max_grow = gap_entry->end - gap_entry->start; if (guard > max_grow) return (KERN_NO_SPACE); max_grow -= guard; if (grow_amount > max_grow) return (KERN_NO_SPACE); /* * If this is the main process stack, see if we're over the stack * limit. */ is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr && addr < (vm_offset_t)p->p_sysent->sv_usrstack; if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) return (KERN_NO_SPACE); #ifdef RACCT if (racct_enable) { PROC_LOCK(p); if (is_procstack && racct_set(p, RACCT_STACK, ctob(vm->vm_ssize) + grow_amount)) { PROC_UNLOCK(p); return (KERN_NO_SPACE); } PROC_UNLOCK(p); } #endif grow_amount = roundup(grow_amount, sgrowsiz); if (grow_amount > max_grow) grow_amount = max_grow; if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { grow_amount = trunc_page((vm_size_t)stacklim) - ctob(vm->vm_ssize); } #ifdef notyet PROC_LOCK(p); limit = racct_get_available(p, RACCT_STACK); PROC_UNLOCK(p); if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) grow_amount = limit - ctob(vm->vm_ssize); #endif if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { rv = KERN_NO_SPACE; goto out; } #ifdef RACCT if (racct_enable) { PROC_LOCK(p); if (racct_set(p, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap)) + grow_amount)) { PROC_UNLOCK(p); rv = KERN_NO_SPACE; goto out; } PROC_UNLOCK(p); } #endif } /* If we would blow our VMEM resource limit, no go */ if (map->size + grow_amount > vmemlim) { rv = KERN_NO_SPACE; goto out; } #ifdef RACCT if (racct_enable) { PROC_LOCK(p); if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) { PROC_UNLOCK(p); rv = KERN_NO_SPACE; goto out; } PROC_UNLOCK(p); } #endif if (vm_map_lock_upgrade(map)) { gap_entry = NULL; vm_map_lock_read(map); goto retry; } if (grow_down) { grow_start = gap_entry->end - grow_amount; if (gap_entry->start + grow_amount == gap_entry->end) { gap_start = gap_entry->start; gap_end = gap_entry->end; vm_map_entry_delete(map, gap_entry); gap_deleted = true; } else { MPASS(gap_entry->start < gap_entry->end - grow_amount); gap_entry->end -= grow_amount; vm_map_entry_resize_free(map, gap_entry); gap_deleted = false; } rv = vm_map_insert(map, NULL, 0, grow_start, grow_start + grow_amount, stack_entry->protection, stack_entry->max_protection, MAP_STACK_GROWS_DOWN); if (rv != KERN_SUCCESS) { if (gap_deleted) { rv1 = vm_map_insert(map, NULL, 0, gap_start, gap_end, VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN); MPASS(rv1 == KERN_SUCCESS); } else { gap_entry->end += grow_amount; vm_map_entry_resize_free(map, gap_entry); } } } else { grow_start = stack_entry->end; cred = stack_entry->cred; if (cred == NULL && stack_entry->object.vm_object != NULL) cred = stack_entry->object.vm_object->cred; if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred)) rv = KERN_NO_SPACE; /* Grow the underlying object if applicable. */ else if (stack_entry->object.vm_object == NULL || vm_object_coalesce(stack_entry->object.vm_object, stack_entry->offset, (vm_size_t)(stack_entry->end - stack_entry->start), (vm_size_t)grow_amount, cred != NULL)) { if (gap_entry->start + grow_amount == gap_entry->end) vm_map_entry_delete(map, gap_entry); else gap_entry->start += grow_amount; stack_entry->end += grow_amount; map->size += grow_amount; vm_map_entry_resize_free(map, stack_entry); rv = KERN_SUCCESS; } else rv = KERN_FAILURE; } if (rv == KERN_SUCCESS && is_procstack) vm->vm_ssize += btoc(grow_amount); /* * Heed the MAP_WIREFUTURE flag if it was set for this process. */ if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { vm_map_unlock(map); vm_map_wire(map, grow_start, grow_start + grow_amount, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); vm_map_lock_read(map); } else vm_map_lock_downgrade(map); out: #ifdef RACCT if (racct_enable && rv != KERN_SUCCESS) { PROC_LOCK(p); error = racct_set(p, RACCT_VMEM, map->size); KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); if (!old_mlock) { error = racct_set(p, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); } error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); KASSERT(error == 0, ("decreasing RACCT_STACK failed")); PROC_UNLOCK(p); } #endif return (rv); } /* * Unshare the specified VM space for exec. If other processes are * mapped to it, then create a new one. The new vmspace is null. */ int vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) { struct vmspace *oldvmspace = p->p_vmspace; struct vmspace *newvmspace; KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0, ("vmspace_exec recursed")); newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit); if (newvmspace == NULL) return (ENOMEM); newvmspace->vm_swrss = oldvmspace->vm_swrss; /* * This code is written like this for prototype purposes. The * goal is to avoid running down the vmspace here, but let the * other process's that are still using the vmspace to finally * run it down. Even though there is little or no chance of blocking * here, it is a good idea to keep this form for future mods. */ PROC_VMSPACE_LOCK(p); p->p_vmspace = newvmspace; PROC_VMSPACE_UNLOCK(p); if (p == curthread->td_proc) pmap_activate(curthread); curthread->td_pflags |= TDP_EXECVMSPC; return (0); } /* * Unshare the specified VM space for forcing COW. This * is called by rfork, for the (RFMEM|RFPROC) == 0 case. */ int vmspace_unshare(struct proc *p) { struct vmspace *oldvmspace = p->p_vmspace; struct vmspace *newvmspace; vm_ooffset_t fork_charge; if (oldvmspace->vm_refcnt == 1) return (0); fork_charge = 0; newvmspace = vmspace_fork(oldvmspace, &fork_charge); if (newvmspace == NULL) return (ENOMEM); if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) { vmspace_free(newvmspace); return (ENOMEM); } PROC_VMSPACE_LOCK(p); p->p_vmspace = newvmspace; PROC_VMSPACE_UNLOCK(p); if (p == curthread->td_proc) pmap_activate(curthread); vmspace_free(oldvmspace); return (0); } /* * vm_map_lookup: * * Finds the VM object, offset, and * protection for a given virtual address in the * specified map, assuming a page fault of the * type specified. * * Leaves the map in question locked for read; return * values are guaranteed until a vm_map_lookup_done * call is performed. Note that the map argument * is in/out; the returned map must be used in * the call to vm_map_lookup_done. * * A handle (out_entry) is returned for use in * vm_map_lookup_done, to make that fast. * * If a lookup is requested with "write protection" * specified, the map may be changed to perform virtual * copying operations, although the data referenced will * remain the same. */ int vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ vm_offset_t vaddr, vm_prot_t fault_typea, vm_map_entry_t *out_entry, /* OUT */ vm_object_t *object, /* OUT */ vm_pindex_t *pindex, /* OUT */ vm_prot_t *out_prot, /* OUT */ boolean_t *wired) /* OUT */ { vm_map_entry_t entry; vm_map_t map = *var_map; vm_prot_t prot; vm_prot_t fault_type = fault_typea; vm_object_t eobject; vm_size_t size; struct ucred *cred; RetryLookup: vm_map_lock_read(map); RetryLookupLocked: /* * Lookup the faulting address. */ if (!vm_map_lookup_entry(map, vaddr, out_entry)) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } entry = *out_entry; /* * Handle submaps. */ if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { vm_map_t old_map = map; *var_map = map = entry->object.sub_map; vm_map_unlock_read(old_map); goto RetryLookup; } /* * Check whether this task is allowed to have this page. */ prot = entry->protection; if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) { fault_typea &= ~VM_PROT_FAULT_LOOKUP; if (prot == VM_PROT_NONE && map != kernel_map && (entry->eflags & MAP_ENTRY_GUARD) != 0 && (entry->eflags & (MAP_ENTRY_STACK_GAP_DN | MAP_ENTRY_STACK_GAP_UP)) != 0 && vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS) goto RetryLookupLocked; } fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { vm_map_unlock_read(map); return (KERN_PROTECTION_FAILURE); } KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags & (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) != (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY), ("entry %p flags %x", entry, entry->eflags)); if ((fault_typea & VM_PROT_COPY) != 0 && (entry->max_protection & VM_PROT_WRITE) == 0 && (entry->eflags & MAP_ENTRY_COW) == 0) { vm_map_unlock_read(map); return (KERN_PROTECTION_FAILURE); } /* * If this page is not pageable, we have to get it for all possible * accesses. */ *wired = (entry->wired_count != 0); if (*wired) fault_type = entry->protection; size = entry->end - entry->start; /* * If the entry was copy-on-write, we either ... */ if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { /* * If we want to write the page, we may as well handle that * now since we've got the map locked. * * If we don't need to write the page, we just demote the * permissions allowed. */ if ((fault_type & VM_PROT_WRITE) != 0 || (fault_typea & VM_PROT_COPY) != 0) { /* * Make a new object, and place it in the object * chain. Note that no new references have appeared * -- one just moved from the map to the new * object. */ if (vm_map_lock_upgrade(map)) goto RetryLookup; if (entry->cred == NULL) { /* * The debugger owner is charged for * the memory. */ cred = curthread->td_ucred; crhold(cred); if (!swap_reserve_by_cred(size, cred)) { crfree(cred); vm_map_unlock(map); return (KERN_RESOURCE_SHORTAGE); } entry->cred = cred; } vm_object_shadow(&entry->object.vm_object, &entry->offset, size); entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; eobject = entry->object.vm_object; if (eobject->cred != NULL) { /* * The object was not shadowed. */ swap_release_by_cred(size, entry->cred); crfree(entry->cred); entry->cred = NULL; } else if (entry->cred != NULL) { VM_OBJECT_WLOCK(eobject); eobject->cred = entry->cred; eobject->charge = size; VM_OBJECT_WUNLOCK(eobject); entry->cred = NULL; } vm_map_lock_downgrade(map); } else { /* * We're attempting to read a copy-on-write page -- * don't allow writes. */ prot &= ~VM_PROT_WRITE; } } /* * Create an object if necessary. */ if (entry->object.vm_object == NULL && !map->system_map) { if (vm_map_lock_upgrade(map)) goto RetryLookup; entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, atop(size)); entry->offset = 0; if (entry->cred != NULL) { VM_OBJECT_WLOCK(entry->object.vm_object); entry->object.vm_object->cred = entry->cred; entry->object.vm_object->charge = size; VM_OBJECT_WUNLOCK(entry->object.vm_object); entry->cred = NULL; } vm_map_lock_downgrade(map); } /* * Return the object/offset from this entry. If the entry was * copy-on-write or empty, it has been fixed up. */ *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); *object = entry->object.vm_object; *out_prot = prot; return (KERN_SUCCESS); } /* * vm_map_lookup_locked: * * Lookup the faulting address. A version of vm_map_lookup that returns * KERN_FAILURE instead of blocking on map lock or memory allocation. */ int vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ vm_offset_t vaddr, vm_prot_t fault_typea, vm_map_entry_t *out_entry, /* OUT */ vm_object_t *object, /* OUT */ vm_pindex_t *pindex, /* OUT */ vm_prot_t *out_prot, /* OUT */ boolean_t *wired) /* OUT */ { vm_map_entry_t entry; vm_map_t map = *var_map; vm_prot_t prot; vm_prot_t fault_type = fault_typea; /* * Lookup the faulting address. */ if (!vm_map_lookup_entry(map, vaddr, out_entry)) return (KERN_INVALID_ADDRESS); entry = *out_entry; /* * Fail if the entry refers to a submap. */ if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) return (KERN_FAILURE); /* * Check whether this task is allowed to have this page. */ prot = entry->protection; fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; if ((fault_type & prot) != fault_type) return (KERN_PROTECTION_FAILURE); /* * If this page is not pageable, we have to get it for all possible * accesses. */ *wired = (entry->wired_count != 0); if (*wired) fault_type = entry->protection; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { /* * Fail if the entry was copy-on-write for a write fault. */ if (fault_type & VM_PROT_WRITE) return (KERN_FAILURE); /* * We're attempting to read a copy-on-write page -- * don't allow writes. */ prot &= ~VM_PROT_WRITE; } /* * Fail if an object should be created. */ if (entry->object.vm_object == NULL && !map->system_map) return (KERN_FAILURE); /* * Return the object/offset from this entry. If the entry was * copy-on-write or empty, it has been fixed up. */ *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); *object = entry->object.vm_object; *out_prot = prot; return (KERN_SUCCESS); } /* * vm_map_lookup_done: * * Releases locks acquired by a vm_map_lookup * (according to the handle returned by that lookup). */ void vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) { /* * Unlock the main-level map */ vm_map_unlock_read(map); } vm_offset_t vm_map_max_KBI(const struct vm_map *map) { return (vm_map_max(map)); } vm_offset_t vm_map_min_KBI(const struct vm_map *map) { return (vm_map_min(map)); } pmap_t vm_map_pmap_KBI(vm_map_t map) { return (map->pmap); } #include "opt_ddb.h" #ifdef DDB #include #include static void vm_map_print(vm_map_t map) { vm_map_entry_t entry; db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", (void *)map, (void *)map->pmap, map->nentries, map->timestamp); db_indent += 2; for (entry = map->header.next; entry != &map->header; entry = entry->next) { db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n", (void *)entry, (void *)entry->start, (void *)entry->end, entry->eflags); { static char *inheritance_name[4] = {"share", "copy", "none", "donate_copy"}; db_iprintf(" prot=%x/%x/%s", entry->protection, entry->max_protection, inheritance_name[(int)(unsigned char)entry->inheritance]); if (entry->wired_count != 0) db_printf(", wired"); } if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { db_printf(", share=%p, offset=0x%jx\n", (void *)entry->object.sub_map, (uintmax_t)entry->offset); if ((entry->prev == &map->header) || (entry->prev->object.sub_map != entry->object.sub_map)) { db_indent += 2; vm_map_print((vm_map_t)entry->object.sub_map); db_indent -= 2; } } else { if (entry->cred != NULL) db_printf(", ruid %d", entry->cred->cr_ruid); db_printf(", object=%p, offset=0x%jx", (void *)entry->object.vm_object, (uintmax_t)entry->offset); if (entry->object.vm_object && entry->object.vm_object->cred) db_printf(", obj ruid %d charge %jx", entry->object.vm_object->cred->cr_ruid, (uintmax_t)entry->object.vm_object->charge); if (entry->eflags & MAP_ENTRY_COW) db_printf(", copy (%s)", (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); db_printf("\n"); if ((entry->prev == &map->header) || (entry->prev->object.vm_object != entry->object.vm_object)) { db_indent += 2; vm_object_print((db_expr_t)(intptr_t) entry->object.vm_object, 0, 0, (char *)0); db_indent -= 2; } } } db_indent -= 2; } DB_SHOW_COMMAND(map, map) { if (!have_addr) { db_printf("usage: show map \n"); return; } vm_map_print((vm_map_t)addr); } DB_SHOW_COMMAND(procvm, procvm) { struct proc *p; if (have_addr) { p = db_lookup_proc(addr); } else { p = curproc; } db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, (void *)vmspace_pmap(p->p_vmspace)); vm_map_print((vm_map_t)&p->p_vmspace->vm_map); } #endif /* DDB */ Index: projects/runtime-coverage-v2/sys/vm/vm_pageout.c =================================================================== --- projects/runtime-coverage-v2/sys/vm/vm_pageout.c (revision 347075) +++ projects/runtime-coverage-v2/sys/vm/vm_pageout.c (revision 347076) @@ -1,2112 +1,2110 @@ /*- * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU) * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Yahoo! Technologies Norway AS * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * The proverbial page-out daemon. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * System initialization */ /* the kernel process "vm_pageout"*/ static void vm_pageout(void); static void vm_pageout_init(void); static int vm_pageout_clean(vm_page_t m, int *numpagedout); static int vm_pageout_cluster(vm_page_t m); static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, int starting_page_shortage); SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, NULL); struct proc *pageproc; static struct kproc_desc page_kp = { "pagedaemon", vm_pageout, &pageproc }; SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &page_kp); SDT_PROVIDER_DEFINE(vm); SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); /* Pagedaemon activity rates, in subdivisions of one second. */ #define VM_LAUNDER_RATE 10 #define VM_INACT_SCAN_RATE 10 static int vm_pageout_oom_seq = 12; static int vm_pageout_update_period; static int disable_swap_pageouts; static int lowmem_period = 10; static int swapdev_enabled; static int vm_panic_on_oom = 0; SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, CTLFLAG_RWTUN, &vm_panic_on_oom, 0, "panic on out of memory instead of killing the largest process"); SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, CTLFLAG_RWTUN, &vm_pageout_update_period, 0, "Maximum active LRU update period"); SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0, "Low memory callback period"); SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); static int pageout_lock_miss; SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0, "back-to-back calls to oom detector to start OOM"); static int act_scan_laundry_weight = 3; SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN, &act_scan_laundry_weight, 0, "weight given to clean vs. dirty pages in active queue scans"); static u_int vm_background_launder_rate = 4096; SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN, &vm_background_launder_rate, 0, "background laundering rate, in kilobytes per second"); static u_int vm_background_launder_max = 20 * 1024; SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN, &vm_background_launder_max, 0, "background laundering cap, in kilobytes"); int vm_pageout_page_count = 32; int vm_page_max_wired; /* XXX max # of wired pages system-wide */ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); static u_int isqrt(u_int num); static int vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall); static void vm_pageout_laundry_worker(void *arg); struct scan_state { struct vm_batchqueue bq; struct vm_pagequeue *pq; vm_page_t marker; int maxscan; int scanned; }; static void vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq, vm_page_t marker, vm_page_t after, int maxscan) { vm_pagequeue_assert_locked(pq); KASSERT((marker->aflags & PGA_ENQUEUED) == 0, ("marker %p already enqueued", marker)); if (after == NULL) TAILQ_INSERT_HEAD(&pq->pq_pl, marker, plinks.q); else TAILQ_INSERT_AFTER(&pq->pq_pl, after, marker, plinks.q); vm_page_aflag_set(marker, PGA_ENQUEUED); vm_batchqueue_init(&ss->bq); ss->pq = pq; ss->marker = marker; ss->maxscan = maxscan; ss->scanned = 0; vm_pagequeue_unlock(pq); } static void vm_pageout_end_scan(struct scan_state *ss) { struct vm_pagequeue *pq; pq = ss->pq; vm_pagequeue_assert_locked(pq); KASSERT((ss->marker->aflags & PGA_ENQUEUED) != 0, ("marker %p not enqueued", ss->marker)); TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q); vm_page_aflag_clear(ss->marker, PGA_ENQUEUED); pq->pq_pdpages += ss->scanned; } /* * Add a small number of queued pages to a batch queue for later processing * without the corresponding queue lock held. The caller must have enqueued a * marker page at the desired start point for the scan. Pages will be * physically dequeued if the caller so requests. Otherwise, the returned * batch may contain marker pages, and it is up to the caller to handle them. * * When processing the batch queue, vm_page_queue() must be used to * determine whether the page has been logically dequeued by another thread. * Once this check is performed, the page lock guarantees that the page will * not be disassociated from the queue. */ static __always_inline void vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue) { struct vm_pagequeue *pq; vm_page_t m, marker; marker = ss->marker; pq = ss->pq; KASSERT((marker->aflags & PGA_ENQUEUED) != 0, ("marker %p not enqueued", ss->marker)); vm_pagequeue_lock(pq); for (m = TAILQ_NEXT(marker, plinks.q); m != NULL && ss->scanned < ss->maxscan && ss->bq.bq_cnt < VM_BATCHQUEUE_SIZE; m = TAILQ_NEXT(m, plinks.q), ss->scanned++) { if ((m->flags & PG_MARKER) == 0) { KASSERT((m->aflags & PGA_ENQUEUED) != 0, ("page %p not enqueued", m)); KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in page queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in page queue", m)); } else if (dequeue) continue; (void)vm_batchqueue_insert(&ss->bq, m); if (dequeue) { TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_page_aflag_clear(m, PGA_ENQUEUED); } } TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q); if (__predict_true(m != NULL)) TAILQ_INSERT_BEFORE(m, marker, plinks.q); else TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q); if (dequeue) vm_pagequeue_cnt_add(pq, -ss->bq.bq_cnt); vm_pagequeue_unlock(pq); } /* Return the next page to be scanned, or NULL if the scan is complete. */ static __always_inline vm_page_t vm_pageout_next(struct scan_state *ss, const bool dequeue) { if (ss->bq.bq_cnt == 0) vm_pageout_collect_batch(ss, dequeue); return (vm_batchqueue_pop(&ss->bq)); } /* * Scan for pages at adjacent offsets within the given page's object that are * eligible for laundering, form a cluster of these pages and the given page, * and launder that cluster. */ static int vm_pageout_cluster(vm_page_t m) { vm_object_t object; vm_page_t mc[2 * vm_pageout_page_count], p, pb, ps; vm_pindex_t pindex; int ib, is, page_base, pageout_count; vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; vm_page_assert_unbusied(m); KASSERT(!vm_page_held(m), ("page %p is held", m)); pmap_remove_write(m); vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; page_base = vm_pageout_page_count; ib = 1; is = 1; /* * We can cluster only if the page is not clean, busy, or held, and * the page is in the laundry queue. * * During heavy mmap/modification loads the pageout * daemon can really fragment the underlying file * due to flushing pages out of order and not trying to * align the clusters (which leaves sporadic out-of-order * holes). To solve this problem we do the reverse scan * first and attempt to align our cluster, then do a * forward scan if room remains. */ more: while (ib != 0 && pageout_count < vm_pageout_page_count) { if (ib > pindex) { ib = 0; break; } if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { ib = 0; break; } vm_page_test_dirty(p); if (p->dirty == 0) { ib = 0; break; } vm_page_lock(p); if (vm_page_held(p) || !vm_page_in_laundry(p)) { vm_page_unlock(p); ib = 0; break; } pmap_remove_write(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; /* * We are at an alignment boundary. Stop here, and switch * directions. Do not clear ib. */ if ((pindex - (ib - 1)) % vm_pageout_page_count == 0) break; } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); if (vm_page_held(p) || !vm_page_in_laundry(p)) { vm_page_unlock(p); break; } pmap_remove_write(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; } /* * If we exhausted our forward scan, continue with the reverse scan * when possible, even past an alignment boundary. This catches * boundary conditions. */ if (ib != 0 && pageout_count < vm_pageout_page_count) goto more; return (vm_pageout_flush(&mc[page_base], pageout_count, VM_PAGER_PUT_NOREUSE, 0, NULL, NULL)); } /* * vm_pageout_flush() - launder the given pages * * The given pages are laundered. Note that we setup for the start of * I/O ( i.e. busy the page ), mark it read-only, and bump the object * reference count all in here rather then in the parent. If we want * the parent to do more sophisticated things we may have to change * the ordering. * * Returned runlen is the count of pages between mreq and first * page after mreq with status VM_PAGER_AGAIN. * *eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL * for any page in runlen set. */ int vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, boolean_t *eio) { vm_object_t object = mc[0]->object; int pageout_status[count]; int numpagedout = 0; int i, runlen; VM_OBJECT_ASSERT_WLOCKED(object); /* * Initiate I/O. Mark the pages busy and verify that they're valid * and read-only. * * We do not have to fixup the clean/dirty bits here... we can * allow the pager to do it after the I/O completes. * * NOTE! mc[i]->dirty may be partial or fragmented due to an * edge case with file fragments. */ for (i = 0; i < count; i++) { KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0, ("vm_pageout_flush: writeable page %p", mc[i])); vm_page_sbusy(mc[i]); } vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); runlen = count - mreq; if (eio != NULL) *eio = FALSE; for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; KASSERT(pageout_status[i] == VM_PAGER_PEND || !pmap_page_is_write_mapped(mt), ("vm_pageout_flush: page %p is not write protected", mt)); switch (pageout_status[i]) { case VM_PAGER_OK: vm_page_lock(mt); if (vm_page_in_laundry(mt)) vm_page_deactivate_noreuse(mt); vm_page_unlock(mt); /* FALLTHROUGH */ case VM_PAGER_PEND: numpagedout++; break; case VM_PAGER_BAD: /* * The page is outside the object's range. We pretend * that the page out worked and clean the page, so the * changes will be lost if the page is reclaimed by * the page daemon. */ vm_page_undirty(mt); vm_page_lock(mt); if (vm_page_in_laundry(mt)) vm_page_deactivate_noreuse(mt); vm_page_unlock(mt); break; case VM_PAGER_ERROR: case VM_PAGER_FAIL: /* * If the page couldn't be paged out to swap because the * pager wasn't able to find space, place the page in * the PQ_UNSWAPPABLE holding queue. This is an * optimization that prevents the page daemon from * wasting CPU cycles on pages that cannot be reclaimed * becase no swap device is configured. * * Otherwise, reactivate the page so that it doesn't * clog the laundry and inactive queues. (We will try * paging it out again later.) */ vm_page_lock(mt); if (object->type == OBJT_SWAP && pageout_status[i] == VM_PAGER_FAIL) { vm_page_unswappable(mt); numpagedout++; } else vm_page_activate(mt); vm_page_unlock(mt); if (eio != NULL && i >= mreq && i - mreq < runlen) *eio = TRUE; break; case VM_PAGER_AGAIN: if (i >= mreq && i - mreq < runlen) runlen = i - mreq; break; } /* * If the operation is still going, leave the page busy to * block all other accesses. Also, leave the paging in * progress indicator set so that we don't attempt an object * collapse. */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); vm_page_sunbusy(mt); } } if (prunlen != NULL) *prunlen = runlen; return (numpagedout); } static void vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused) { atomic_store_rel_int(&swapdev_enabled, 1); } static void vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused) { if (swap_pager_nswapdev() == 1) atomic_store_rel_int(&swapdev_enabled, 0); } /* * Attempt to acquire all of the necessary locks to launder a page and * then call through the clustering layer to PUTPAGES. Wait a short * time for a vnode lock. * * Requires the page and object lock on entry, releases both before return. * Returns 0 on success and an errno otherwise. */ static int vm_pageout_clean(vm_page_t m, int *numpagedout) { struct vnode *vp; struct mount *mp; vm_object_t object; vm_pindex_t pindex; int error, lockmode; vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; vp = NULL; mp = NULL; /* * The object is already known NOT to be dead. It * is possible for the vget() to block the whole * pageout daemon, but the new low-memory handling * code should prevent it. * * We can't wait forever for the vnode lock, we might * deadlock due to a vn_read() getting stuck in * vm_wait while holding this vnode. We skip the * vnode if we can't get it in a reasonable amount * of time. */ if (object->type == OBJT_VNODE) { vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { mp = NULL; error = EDEADLK; goto unlock_all; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); vm_object_reference_locked(object); pindex = m->pindex; VM_OBJECT_WUNLOCK(object); lockmode = MNT_SHARED_WRITES(vp->v_mount) ? LK_SHARED : LK_EXCLUSIVE; if (vget(vp, lockmode | LK_TIMELOCK, curthread)) { vp = NULL; error = EDEADLK; goto unlock_mp; } VM_OBJECT_WLOCK(object); /* * Ensure that the object and vnode were not disassociated * while locks were dropped. */ if (vp->v_object != object) { error = ENOENT; goto unlock_all; } vm_page_lock(m); /* * While the object and page were unlocked, the page * may have been: * (1) moved to a different queue, * (2) reallocated to a different object, * (3) reallocated to a different offset, or * (4) cleaned. */ if (!vm_page_in_laundry(m) || m->object != object || m->pindex != pindex || m->dirty == 0) { vm_page_unlock(m); error = ENXIO; goto unlock_all; } /* * The page may have been busied or referenced while the object * and page locks were released. */ if (vm_page_busied(m) || vm_page_held(m)) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the * laundry. If it is still in the laundry, then we * start the cleaning operation. */ if ((*numpagedout = vm_pageout_cluster(m)) == 0) error = EIO; unlock_all: VM_OBJECT_WUNLOCK(object); unlock_mp: vm_page_lock_assert(m, MA_NOTOWNED); if (mp != NULL) { if (vp != NULL) vput(vp); vm_object_deallocate(object); vn_finished_write(mp); } return (error); } /* * Attempt to launder the specified number of pages. * * Returns the number of pages successfully laundered. */ static int vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) { struct scan_state ss; struct vm_pagequeue *pq; struct mtx *mtx; vm_object_t object; vm_page_t m, marker; int act_delta, error, numpagedout, queue, starting_target; int vnodes_skipped; bool pageout_ok; mtx = NULL; object = NULL; starting_target = launder; vnodes_skipped = 0; /* * Scan the laundry queues for pages eligible to be laundered. We stop * once the target number of dirty pages have been laundered, or once * we've reached the end of the queue. A single iteration of this loop * may cause more than one page to be laundered because of clustering. * * As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no * swap devices are configured. */ if (atomic_load_acq_int(&swapdev_enabled)) queue = PQ_UNSWAPPABLE; else queue = PQ_LAUNDRY; scan: marker = &vmd->vmd_markers[queue]; pq = &vmd->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt); while (launder > 0 && (m = vm_pageout_next(&ss, false)) != NULL) { if (__predict_false((m->flags & PG_MARKER) != 0)) continue; vm_page_change_lock(m, &mtx); recheck: /* * The page may have been disassociated from the queue * while locks were dropped. */ if (vm_page_queue(m) != queue) continue; /* * A requeue was requested, so this page gets a second * chance. */ if ((m->aflags & PGA_REQUEUE) != 0) { vm_page_requeue(m); continue; } /* * Held pages are essentially stuck in the queue. * * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during * future scans. */ if (m->hold_count != 0) continue; if (m->wire_count != 0) { vm_page_dequeue_deferred(m); continue; } if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = m->object; if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); mtx_lock(mtx); goto recheck; } } if (vm_page_busied(m)) continue; /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. */ if (m->valid == 0) goto free_page; /* * If the page has been referenced and the object is not dead, * reactivate or requeue the page depending on whether the * object is mapped. * * Test PGA_REFERENCED after calling pmap_ts_referenced() so * that a reference from a concurrently destroyed mapping is * observed here and now. */ if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else { KASSERT(!pmap_page_is_mapped(m), ("page %p is mapped", m)); act_delta = 0; } if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta++; } if (act_delta != 0) { if (object->ref_count != 0) { VM_CNT_INC(v_reactivated); vm_page_activate(m); /* * Increase the activation count if the page * was referenced while in the laundry queue. * This makes it less likely that the page will * be returned prematurely to the inactive * queue. */ m->act_count += act_delta + ACT_ADVANCE; /* * If this was a background laundering, count * activated pages towards our target. The * purpose of background laundering is to ensure * that pages are eventually cycled through the * laundry queue, and an activation is a valid * way out. */ if (!in_shortfall) launder--; continue; } else if ((object->flags & OBJ_DEAD) == 0) { vm_page_requeue(m); continue; } } /* * If the page appears to be clean at the machine-independent * layer, then remove all of its mappings from the pmap in * anticipation of freeing it. If, however, any of the page's * mappings allow write access, then the page may still be * modified until the last of those mappings are removed. */ if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0) pmap_remove_all(m); } /* * Clean pages are freed, and dirty pages are paged out unless * they belong to a dead object. Requeueing dirty pages from * dead objects is pointless, as they are being paged out and * freed by the thread that destroyed the object. */ if (m->dirty == 0) { free_page: vm_page_free(m); VM_CNT_INC(v_dfree); } else if ((object->flags & OBJ_DEAD) == 0) { if (object->type != OBJT_SWAP && object->type != OBJT_DEFAULT) pageout_ok = true; else if (disable_swap_pageouts) pageout_ok = false; else pageout_ok = true; if (!pageout_ok) { vm_page_requeue(m); continue; } /* * Form a cluster with adjacent, dirty pages from the * same object, and page out that entire cluster. * * The adjacent, dirty pages must also be in the * laundry. However, their mappings are not checked * for new references. Consequently, a recently * referenced page may be paged out. However, that * page will not be prematurely reclaimed. After page * out, the page will be placed in the inactive queue, * where any new references will be detected and the * page reactivated. */ error = vm_pageout_clean(m, &numpagedout); if (error == 0) { launder -= numpagedout; ss.scanned += numpagedout; } else if (error == EDEADLK) { pageout_lock_miss++; vnodes_skipped++; } mtx = NULL; object = NULL; } } if (mtx != NULL) { mtx_unlock(mtx); mtx = NULL; } if (object != NULL) { VM_OBJECT_WUNLOCK(object); object = NULL; } vm_pagequeue_lock(pq); vm_pageout_end_scan(&ss); vm_pagequeue_unlock(pq); if (launder > 0 && queue == PQ_UNSWAPPABLE) { queue = PQ_LAUNDRY; goto scan; } /* * Wakeup the sync daemon if we skipped a vnode in a writeable object * and we didn't launder enough pages. */ if (vnodes_skipped > 0 && launder > 0) (void)speedup_syncer(); return (starting_target - launder); } /* * Compute the integer square root. */ static u_int isqrt(u_int num) { u_int bit, root, tmp; - bit = 1u << ((NBBY * sizeof(u_int)) - 2); - while (bit > num) - bit >>= 2; + bit = num != 0 ? (1u << ((fls(num) - 1) & ~1)) : 0; root = 0; while (bit != 0) { tmp = root + bit; root >>= 1; if (num >= tmp) { num -= tmp; root += bit; } bit >>= 2; } return (root); } /* * Perform the work of the laundry thread: periodically wake up and determine * whether any pages need to be laundered. If so, determine the number of pages * that need to be laundered, and launder them. */ static void vm_pageout_laundry_worker(void *arg) { struct vm_domain *vmd; struct vm_pagequeue *pq; uint64_t nclean, ndirty, nfreed; int domain, last_target, launder, shortfall, shortfall_cycle, target; bool in_shortfall; domain = (uintptr_t)arg; vmd = VM_DOMAIN(domain); pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; KASSERT(vmd->vmd_segs != 0, ("domain without segments")); shortfall = 0; in_shortfall = false; shortfall_cycle = 0; last_target = target = 0; nfreed = 0; /* * Calls to these handlers are serialized by the swap syscall lock. */ (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd, EVENTHANDLER_PRI_ANY); (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd, EVENTHANDLER_PRI_ANY); /* * The pageout laundry worker is never done, so loop forever. */ for (;;) { KASSERT(target >= 0, ("negative target %d", target)); KASSERT(shortfall_cycle >= 0, ("negative cycle %d", shortfall_cycle)); launder = 0; /* * First determine whether we need to launder pages to meet a * shortage of free pages. */ if (shortfall > 0) { in_shortfall = true; shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE; target = shortfall; } else if (!in_shortfall) goto trybackground; else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) { /* * We recently entered shortfall and began laundering * pages. If we have completed that laundering run * (and we are no longer in shortfall) or we have met * our laundry target through other activity, then we * can stop laundering pages. */ in_shortfall = false; target = 0; goto trybackground; } launder = target / shortfall_cycle--; goto dolaundry; /* * There's no immediate need to launder any pages; see if we * meet the conditions to perform background laundering: * * 1. The ratio of dirty to clean inactive pages exceeds the * background laundering threshold, or * 2. we haven't yet reached the target of the current * background laundering run. * * The background laundering threshold is not a constant. * Instead, it is a slowly growing function of the number of * clean pages freed by the page daemon since the last * background laundering. Thus, as the ratio of dirty to * clean inactive pages grows, the amount of memory pressure * required to trigger laundering decreases. We ensure * that the threshold is non-zero after an inactive queue * scan, even if that scan failed to free a single clean page. */ trybackground: nclean = vmd->vmd_free_count + vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt; ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt; if (target == 0 && ndirty * isqrt(howmany(nfreed + 1, vmd->vmd_free_target - vmd->vmd_free_min)) >= nclean) { target = vmd->vmd_background_launder_target; } /* * We have a non-zero background laundering target. If we've * laundered up to our maximum without observing a page daemon * request, just stop. This is a safety belt that ensures we * don't launder an excessive amount if memory pressure is low * and the ratio of dirty to clean pages is large. Otherwise, * proceed at the background laundering rate. */ if (target > 0) { if (nfreed > 0) { nfreed = 0; last_target = target; } else if (last_target - target >= vm_background_launder_max * PAGE_SIZE / 1024) { target = 0; } launder = vm_background_launder_rate * PAGE_SIZE / 1024; launder /= VM_LAUNDER_RATE; if (launder > target) launder = target; } dolaundry: if (launder > 0) { /* * Because of I/O clustering, the number of laundered * pages could exceed "target" by the maximum size of * a cluster minus one. */ target -= min(vm_pageout_launder(vmd, launder, in_shortfall), target); pause("laundp", hz / VM_LAUNDER_RATE); } /* * If we're not currently laundering pages and the page daemon * hasn't posted a new request, sleep until the page daemon * kicks us. */ vm_pagequeue_lock(pq); if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE) (void)mtx_sleep(&vmd->vmd_laundry_request, vm_pagequeue_lockptr(pq), PVM, "launds", 0); /* * If the pagedaemon has indicated that it's in shortfall, start * a shortfall laundering unless we're already in the middle of * one. This may preempt a background laundering. */ if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL && (!in_shortfall || shortfall_cycle == 0)) { shortfall = vm_laundry_target(vmd) + vmd->vmd_pageout_deficit; target = 0; } else shortfall = 0; if (target == 0) vmd->vmd_laundry_request = VM_LAUNDRY_IDLE; nfreed += vmd->vmd_clean_pages_freed; vmd->vmd_clean_pages_freed = 0; vm_pagequeue_unlock(pq); } } /* * Compute the number of pages we want to try to move from the * active queue to either the inactive or laundry queue. * * When scanning active pages during a shortage, we make clean pages * count more heavily towards the page shortage than dirty pages. * This is because dirty pages must be laundered before they can be * reused and thus have less utility when attempting to quickly * alleviate a free page shortage. However, this weighting also * causes the scan to deactivate dirty pages more aggressively, * improving the effectiveness of clustering. */ static int vm_pageout_active_target(struct vm_domain *vmd) { int shortage; shortage = vmd->vmd_inactive_target + vm_paging_target(vmd) - (vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt + vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight); shortage *= act_scan_laundry_weight; return (shortage); } /* * Scan the active queue. If there is no shortage of inactive pages, scan a * small portion of the queue in order to maintain quasi-LRU. */ static void vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) { struct scan_state ss; struct mtx *mtx; vm_page_t m, marker; struct vm_pagequeue *pq; long min_scan; int act_delta, max_scan, scan_tick; marker = &vmd->vmd_markers[PQ_ACTIVE]; pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; vm_pagequeue_lock(pq); /* * If we're just idle polling attempt to visit every * active page within 'update_period' seconds. */ scan_tick = ticks; if (vm_pageout_update_period != 0) { min_scan = pq->pq_cnt; min_scan *= scan_tick - vmd->vmd_last_active_scan; min_scan /= hz * vm_pageout_update_period; } else min_scan = 0; if (min_scan > 0 || (page_shortage > 0 && pq->pq_cnt > 0)) vmd->vmd_last_active_scan = scan_tick; /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation * candidates. Held pages may be deactivated. * * To avoid requeuing each page that remains in the active queue, we * implement the CLOCK algorithm. To keep the implementation of the * enqueue operation consistent for all page queues, we use two hands, * represented by marker pages. Scans begin at the first hand, which * precedes the second hand in the queue. When the two hands meet, * they are moved back to the head and tail of the queue, respectively, * and scanning resumes. */ max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan; mtx = NULL; act_scan: vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan); while ((m = vm_pageout_next(&ss, false)) != NULL) { if (__predict_false(m == &vmd->vmd_clock[1])) { vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[1], plinks.q); TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, &vmd->vmd_clock[1], plinks.q); max_scan -= ss.scanned; vm_pageout_end_scan(&ss); goto act_scan; } if (__predict_false((m->flags & PG_MARKER) != 0)) continue; vm_page_change_lock(m, &mtx); /* * The page may have been disassociated from the queue * while locks were dropped. */ if (vm_page_queue(m) != PQ_ACTIVE) continue; /* * Wired pages are dequeued lazily. */ if (m->wire_count != 0) { vm_page_dequeue_deferred(m); continue; } /* * Check to see "how much" the page has been used. * * Test PGA_REFERENCED after calling pmap_ts_referenced() so * that a reference from a concurrently destroyed mapping is * observed here and now. * * Perform an unsynchronized object ref count check. While * the page lock ensures that the page is not reallocated to * another object, in particular, one with unmanaged mappings * that cannot support pmap_ts_referenced(), two races are, * nonetheless, possible: * 1) The count was transitioning to zero, but we saw a non- * zero value. pmap_ts_referenced() will return zero * because the page is not mapped. * 2) The count was transitioning to one, but we saw zero. * This race delays the detection of a new reference. At * worst, we will deactivate and reactivate the page. */ if (m->object->ref_count != 0) act_delta = pmap_ts_referenced(m); else act_delta = 0; if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta++; } /* * Advance or decay the act_count based on recent usage. */ if (act_delta != 0) { m->act_count += ACT_ADVANCE + act_delta; if (m->act_count > ACT_MAX) m->act_count = ACT_MAX; } else m->act_count -= min(m->act_count, ACT_DECLINE); if (m->act_count == 0) { /* * When not short for inactive pages, let dirty pages go * through the inactive queue before moving to the * laundry queues. This gives them some extra time to * be reactivated, potentially avoiding an expensive * pageout. However, during a page shortage, the * inactive queue is necessarily small, and so dirty * pages would only spend a trivial amount of time in * the inactive queue. Therefore, we might as well * place them directly in the laundry queue to reduce * queuing overhead. */ if (page_shortage <= 0) vm_page_deactivate(m); else { /* * Calling vm_page_test_dirty() here would * require acquisition of the object's write * lock. However, during a page shortage, * directing dirty pages into the laundry * queue is only an optimization and not a * requirement. Therefore, we simply rely on * the opportunistic updates to the page's * dirty field by the pmap. */ if (m->dirty == 0) { vm_page_deactivate(m); page_shortage -= act_scan_laundry_weight; } else { vm_page_launder(m); page_shortage--; } } } } if (mtx != NULL) { mtx_unlock(mtx); mtx = NULL; } vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q); vm_pageout_end_scan(&ss); vm_pagequeue_unlock(pq); } static int vm_pageout_reinsert_inactive_page(struct scan_state *ss, vm_page_t m) { struct vm_domain *vmd; if (m->queue != PQ_INACTIVE || (m->aflags & PGA_ENQUEUED) != 0) return (0); vm_page_aflag_set(m, PGA_ENQUEUED); if ((m->aflags & PGA_REQUEUE_HEAD) != 0) { vmd = vm_pagequeue_domain(m); TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD); } else if ((m->aflags & PGA_REQUEUE) != 0) { TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q); vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD); } else TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q); return (1); } /* * Re-add stuck pages to the inactive queue. We will examine them again * during the next scan. If the queue state of a page has changed since * it was physically removed from the page queue in * vm_pageout_collect_batch(), don't do anything with that page. */ static void vm_pageout_reinsert_inactive(struct scan_state *ss, struct vm_batchqueue *bq, vm_page_t m) { struct vm_pagequeue *pq; int delta; delta = 0; pq = ss->pq; if (m != NULL) { if (vm_batchqueue_insert(bq, m)) return; vm_pagequeue_lock(pq); delta += vm_pageout_reinsert_inactive_page(ss, m); } else vm_pagequeue_lock(pq); while ((m = vm_batchqueue_pop(bq)) != NULL) delta += vm_pageout_reinsert_inactive_page(ss, m); vm_pagequeue_cnt_add(pq, delta); vm_pagequeue_unlock(pq); vm_batchqueue_init(bq); } /* * Attempt to reclaim the requested number of pages from the inactive queue. * Returns true if the shortage was addressed. */ static int vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, int *addl_shortage) { struct scan_state ss; struct vm_batchqueue rq; struct mtx *mtx; vm_page_t m, marker; struct vm_pagequeue *pq; vm_object_t object; int act_delta, addl_page_shortage, deficit, page_shortage; int starting_page_shortage; /* * The addl_page_shortage is an estimate of the number of temporarily * stuck pages in the inactive queue. In other words, the * number of pages from the inactive count that should be * discounted in setting the target for the active queue scan. */ addl_page_shortage = 0; /* * vmd_pageout_deficit counts the number of pages requested in * allocations that failed because of a free page shortage. We assume * that the allocations will be reattempted and thus include the deficit * in our scan target. */ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit); starting_page_shortage = page_shortage = shortage + deficit; mtx = NULL; object = NULL; vm_batchqueue_init(&rq); /* * Start scanning the inactive queue for pages that we can free. The * scan will stop when we reach the target or we have scanned the * entire queue. (Note that m->act_count is not used to make * decisions for the inactive queue, only for the active queue.) */ marker = &vmd->vmd_markers[PQ_INACTIVE]; pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; vm_pagequeue_lock(pq); vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt); while (page_shortage > 0 && (m = vm_pageout_next(&ss, true)) != NULL) { KASSERT((m->flags & PG_MARKER) == 0, ("marker page %p was dequeued", m)); vm_page_change_lock(m, &mtx); recheck: /* * The page may have been disassociated from the queue * while locks were dropped. */ if (vm_page_queue(m) != PQ_INACTIVE) { addl_page_shortage++; continue; } /* * The page was re-enqueued after the page queue lock was * dropped, or a requeue was requested. This page gets a second * chance. */ if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) goto reinsert; /* * Held pages are essentially stuck in the queue. So, * they ought to be discounted from the inactive count. * See the description of addl_page_shortage above. * * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during * future scans. */ if (m->hold_count != 0) { addl_page_shortage++; goto reinsert; } if (m->wire_count != 0) { vm_page_dequeue_deferred(m); continue; } if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = m->object; if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); mtx_lock(mtx); goto recheck; } } if (vm_page_busied(m)) { /* * Don't mess with busy pages. Leave them at * the front of the queue. Most likely, they * are being paged out and will leave the * queue shortly after the scan finishes. So, * they ought to be discounted from the * inactive count. */ addl_page_shortage++; goto reinsert; } /* * Invalid pages can be easily freed. They cannot be * mapped, vm_page_free() asserts this. */ if (m->valid == 0) goto free_page; /* * If the page has been referenced and the object is not dead, * reactivate or requeue the page depending on whether the * object is mapped. * * Test PGA_REFERENCED after calling pmap_ts_referenced() so * that a reference from a concurrently destroyed mapping is * observed here and now. */ if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else { KASSERT(!pmap_page_is_mapped(m), ("page %p is mapped", m)); act_delta = 0; } if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta++; } if (act_delta != 0) { if (object->ref_count != 0) { VM_CNT_INC(v_reactivated); vm_page_activate(m); /* * Increase the activation count if the page * was referenced while in the inactive queue. * This makes it less likely that the page will * be returned prematurely to the inactive * queue. */ m->act_count += act_delta + ACT_ADVANCE; continue; } else if ((object->flags & OBJ_DEAD) == 0) { vm_page_aflag_set(m, PGA_REQUEUE); goto reinsert; } } /* * If the page appears to be clean at the machine-independent * layer, then remove all of its mappings from the pmap in * anticipation of freeing it. If, however, any of the page's * mappings allow write access, then the page may still be * modified until the last of those mappings are removed. */ if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0) pmap_remove_all(m); } /* * Clean pages can be freed, but dirty pages must be sent back * to the laundry, unless they belong to a dead object. * Requeueing dirty pages from dead objects is pointless, as * they are being paged out and freed by the thread that * destroyed the object. */ if (m->dirty == 0) { free_page: /* * Because we dequeued the page and have already * checked for concurrent dequeue and enqueue * requests, we can safely disassociate the page * from the inactive queue. */ KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has queue state", m)); m->queue = PQ_NONE; vm_page_free(m); page_shortage--; } else if ((object->flags & OBJ_DEAD) == 0) vm_page_launder(m); continue; reinsert: vm_pageout_reinsert_inactive(&ss, &rq, m); } if (mtx != NULL) mtx_unlock(mtx); if (object != NULL) VM_OBJECT_WUNLOCK(object); vm_pageout_reinsert_inactive(&ss, &rq, NULL); vm_pageout_reinsert_inactive(&ss, &ss.bq, NULL); vm_pagequeue_lock(pq); vm_pageout_end_scan(&ss); vm_pagequeue_unlock(pq); VM_CNT_ADD(v_dfree, starting_page_shortage - page_shortage); /* * Wake up the laundry thread so that it can perform any needed * laundering. If we didn't meet our target, we're in shortfall and * need to launder more aggressively. If PQ_LAUNDRY is empty and no * swap devices are configured, the laundry thread has no work to do, so * don't bother waking it up. * * The laundry thread uses the number of inactive queue scans elapsed * since the last laundering to determine whether to launder again, so * keep count. */ if (starting_page_shortage > 0) { pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; vm_pagequeue_lock(pq); if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE && (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) { if (page_shortage > 0) { vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL; VM_CNT_INC(v_pdshortfalls); } else if (vmd->vmd_laundry_request != VM_LAUNDRY_SHORTFALL) vmd->vmd_laundry_request = VM_LAUNDRY_BACKGROUND; wakeup(&vmd->vmd_laundry_request); } vmd->vmd_clean_pages_freed += starting_page_shortage - page_shortage; vm_pagequeue_unlock(pq); } /* * Wakeup the swapout daemon if we didn't free the targeted number of * pages. */ if (page_shortage > 0) vm_swapout_run(); /* * If the inactive queue scan fails repeatedly to meet its * target, kill the largest process. */ vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage); /* * Reclaim pages by swapping out idle processes, if configured to do so. */ vm_swapout_run_idle(); /* * See the description of addl_page_shortage above. */ *addl_shortage = addl_page_shortage + deficit; return (page_shortage <= 0); } static int vm_pageout_oom_vote; /* * The pagedaemon threads randlomly select one to perform the * OOM. Trying to kill processes before all pagedaemons * failed to reach free target is premature. */ static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, int starting_page_shortage) { int old_vote; if (starting_page_shortage <= 0 || starting_page_shortage != page_shortage) vmd->vmd_oom_seq = 0; else vmd->vmd_oom_seq++; if (vmd->vmd_oom_seq < vm_pageout_oom_seq) { if (vmd->vmd_oom) { vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } return; } /* * Do not follow the call sequence until OOM condition is * cleared. */ vmd->vmd_oom_seq = 0; if (vmd->vmd_oom) return; vmd->vmd_oom = TRUE; old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1); if (old_vote != vm_ndomains - 1) return; /* * The current pagedaemon thread is the last in the quorum to * start OOM. Initiate the selection and signaling of the * victim. */ vm_pageout_oom(VM_OOM_MEM); /* * After one round of OOM terror, recall our vote. On the * next pass, current pagedaemon would vote again if the low * memory condition is still there, due to vmd_oom being * false. */ vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } /* * The OOM killer is the page daemon's action of last resort when * memory allocation requests have been stalled for a prolonged period * of time because it cannot reclaim memory. This function computes * the approximate number of physical pages that could be reclaimed if * the specified address space is destroyed. * * Private, anonymous memory owned by the address space is the * principal resource that we expect to recover after an OOM kill. * Since the physical pages mapped by the address space's COW entries * are typically shared pages, they are unlikely to be released and so * they are not counted. * * To get to the point where the page daemon runs the OOM killer, its * efforts to write-back vnode-backed pages may have stalled. This * could be caused by a memory allocation deadlock in the write path * that might be resolved by an OOM kill. Therefore, physical pages * belonging to vnode-backed objects are counted, because they might * be freed without being written out first if the address space holds * the last reference to an unlinked vnode. * * Similarly, physical pages belonging to OBJT_PHYS objects are * counted because the address space might hold the last reference to * the object. */ static long vm_pageout_oom_pagecount(struct vmspace *vmspace) { vm_map_t map; vm_map_entry_t entry; vm_object_t obj; long res; map = &vmspace->vm_map; KASSERT(!map->system_map, ("system map")); sx_assert(&map->lock, SA_LOCKED); res = 0; for (entry = map->header.next; entry != &map->header; entry = entry->next) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) continue; obj = entry->object.vm_object; if (obj == NULL) continue; if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 && obj->ref_count != 1) continue; switch (obj->type) { case OBJT_DEFAULT: case OBJT_SWAP: case OBJT_PHYS: case OBJT_VNODE: res += obj->resident_page_count; break; } } return (res); } void vm_pageout_oom(int shortage) { struct proc *p, *bigproc; vm_offset_t size, bigsize; struct thread *td; struct vmspace *vm; bool breakout; /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of its child processes * attempts to propagate a signal to B while we are waiting for A's * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ bigproc = NULL; bigsize = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); /* * If this is a system, protected or killed process, skip it. */ if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 || p->p_pid == 1 || P_KILLED(p) || (p->p_pid < 48 && swap_pager_avail != 0)) { PROC_UNLOCK(p); continue; } /* * If the process is in a non-running type state, * don't touch it. Check all the threads individually. */ breakout = false; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td) && !TD_IS_SWAPPED(td)) { thread_unlock(td); breakout = true; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get the process size */ vm = vmspace_acquire_ref(p); if (vm == NULL) { PROC_UNLOCK(p); continue; } _PHOLD_LITE(p); PROC_UNLOCK(p); sx_sunlock(&allproc_lock); if (!vm_map_trylock_read(&vm->vm_map)) { vmspace_free(vm); sx_slock(&allproc_lock); PRELE(p); continue; } size = vmspace_swap_count(vm); if (shortage == VM_OOM_MEM) size += vm_pageout_oom_pagecount(vm); vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); sx_slock(&allproc_lock); /* * If this process is bigger than the biggest one, * remember it. */ if (size > bigsize) { if (bigproc != NULL) PRELE(bigproc); bigproc = p; bigsize = size; } else { PRELE(p); } } sx_sunlock(&allproc_lock); if (bigproc != NULL) { if (vm_panic_on_oom != 0) panic("out of swap space"); PROC_LOCK(bigproc); killproc(bigproc, "out of swap space"); sched_nice(bigproc, PRIO_MIN); _PRELE(bigproc); PROC_UNLOCK(bigproc); } } static bool vm_pageout_lowmem(void) { static int lowmem_ticks = 0; int last; last = atomic_load_int(&lowmem_ticks); while ((u_int)(ticks - last) / hz >= lowmem_period) { if (atomic_fcmpset_int(&lowmem_ticks, &last, ticks) == 0) continue; /* * Decrease registered cache sizes. */ SDT_PROBE0(vm, , , vm__lowmem_scan); EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES); /* * We do this explicitly after the caches have been * drained above. */ uma_reclaim(); return (true); } return (false); } static void vm_pageout_worker(void *arg) { struct vm_domain *vmd; u_int ofree; int addl_shortage, domain, shortage; bool target_met; domain = (uintptr_t)arg; vmd = VM_DOMAIN(domain); shortage = 0; target_met = true; /* * XXXKIB It could be useful to bind pageout daemon threads to * the cores belonging to the domain, from which vm_page_array * is allocated. */ KASSERT(vmd->vmd_segs != 0, ("domain without segments")); vmd->vmd_last_active_scan = ticks; /* * The pageout daemon worker is never done, so loop forever. */ while (TRUE) { vm_domain_pageout_lock(vmd); /* * We need to clear wanted before we check the limits. This * prevents races with wakers who will check wanted after they * reach the limit. */ atomic_store_int(&vmd->vmd_pageout_wanted, 0); /* * Might the page daemon need to run again? */ if (vm_paging_needed(vmd, vmd->vmd_free_count)) { /* * Yes. If the scan failed to produce enough free * pages, sleep uninterruptibly for some time in the * hope that the laundry thread will clean some pages. */ vm_domain_pageout_unlock(vmd); if (!target_met) pause("pwait", hz / VM_INACT_SCAN_RATE); } else { /* * No, sleep until the next wakeup or until pages * need to have their reference stats updated. */ if (mtx_sleep(&vmd->vmd_pageout_wanted, vm_domain_pageout_lockptr(vmd), PDROP | PVM, "psleep", hz / VM_INACT_SCAN_RATE) == 0) VM_CNT_INC(v_pdwakeups); } /* Prevent spurious wakeups by ensuring that wanted is set. */ atomic_store_int(&vmd->vmd_pageout_wanted, 1); /* * Use the controller to calculate how many pages to free in * this interval, and scan the inactive queue. If the lowmem * handlers appear to have freed up some pages, subtract the * difference from the inactive queue scan target. */ shortage = pidctrl_daemon(&vmd->vmd_pid, vmd->vmd_free_count); if (shortage > 0) { ofree = vmd->vmd_free_count; if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree) shortage -= min(vmd->vmd_free_count - ofree, (u_int)shortage); target_met = vm_pageout_scan_inactive(vmd, shortage, &addl_shortage); } else addl_shortage = 0; /* * Scan the active queue. A positive value for shortage * indicates that we must aggressively deactivate pages to avoid * a shortfall. */ shortage = vm_pageout_active_target(vmd) + addl_shortage; vm_pageout_scan_active(vmd, shortage); } } /* * vm_pageout_init initialises basic pageout daemon settings. */ static void vm_pageout_init_domain(int domain) { struct vm_domain *vmd; struct sysctl_oid *oid; vmd = VM_DOMAIN(domain); vmd->vmd_interrupt_free_min = 2; /* * v_free_reserved needs to include enough for the largest * swap pager structures plus enough for any pv_entry structs * when paging. */ if (vmd->vmd_page_count > 1024) vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200; else vmd->vmd_free_min = 4; vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + vmd->vmd_interrupt_free_min; vmd->vmd_free_reserved = vm_pageout_page_count + vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768); vmd->vmd_free_severe = vmd->vmd_free_min / 2; vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved; vmd->vmd_free_min += vmd->vmd_free_reserved; vmd->vmd_free_severe += vmd->vmd_free_reserved; vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2; if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3) vmd->vmd_inactive_target = vmd->vmd_free_count / 3; /* * Set the default wakeup threshold to be 10% below the paging * target. This keeps the steady state out of shortfall. */ vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_target / 10) * 9; /* * Target amount of memory to move out of the laundry queue during a * background laundering. This is proportional to the amount of system * memory. */ vmd->vmd_background_launder_target = (vmd->vmd_free_target - vmd->vmd_free_min) / 10; /* Initialize the pageout daemon pid controller. */ pidctrl_init(&vmd->vmd_pid, hz / VM_INACT_SCAN_RATE, vmd->vmd_free_target, PIDCTRL_BOUND, PIDCTRL_KPD, PIDCTRL_KID, PIDCTRL_KDD); oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO, "pidctrl", CTLFLAG_RD, NULL, ""); pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid)); } static void vm_pageout_init(void) { u_int freecount; int i; /* * Initialize some paging parameters. */ if (vm_cnt.v_page_count < 2000) vm_pageout_page_count = 8; freecount = 0; for (i = 0; i < vm_ndomains; i++) { struct vm_domain *vmd; vm_pageout_init_domain(i); vmd = VM_DOMAIN(i); vm_cnt.v_free_reserved += vmd->vmd_free_reserved; vm_cnt.v_free_target += vmd->vmd_free_target; vm_cnt.v_free_min += vmd->vmd_free_min; vm_cnt.v_inactive_target += vmd->vmd_inactive_target; vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min; vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min; vm_cnt.v_free_severe += vmd->vmd_free_severe; freecount += vmd->vmd_free_count; } /* * Set interval in seconds for active scan. We want to visit each * page at least once every ten minutes. This is to prevent worst * case paging behaviors with stale active LRU. */ if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; if (vm_page_max_wired == 0) vm_page_max_wired = freecount / 3; } /* * vm_pageout is the high level pageout daemon. */ static void vm_pageout(void) { struct proc *p; struct thread *td; int error, first, i; p = curproc; td = curthread; swap_pager_swap_init(); for (first = -1, i = 0; i < vm_ndomains; i++) { if (VM_DOMAIN_EMPTY(i)) { if (bootverbose) printf("domain %d empty; skipping pageout\n", i); continue; } if (first == -1) first = i; else { error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, p, NULL, 0, 0, "dom%d", i); if (error != 0) panic("starting pageout for domain %d: %d\n", i, error); } error = kthread_add(vm_pageout_laundry_worker, (void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i); if (error != 0) panic("starting laundry for domain %d: %d", i, error); } error = kthread_add(uma_reclaim_worker, NULL, p, NULL, 0, 0, "uma"); if (error != 0) panic("starting uma_reclaim helper, error %d\n", error); snprintf(td->td_name, sizeof(td->td_name), "dom%d", first); vm_pageout_worker((void *)(uintptr_t)first); } /* * Perform an advisory wakeup of the page daemon. */ void pagedaemon_wakeup(int domain) { struct vm_domain *vmd; vmd = VM_DOMAIN(domain); vm_domain_pageout_assert_unlocked(vmd); if (curproc == pageproc) return; if (atomic_fetchadd_int(&vmd->vmd_pageout_wanted, 1) == 0) { vm_domain_pageout_lock(vmd); atomic_store_int(&vmd->vmd_pageout_wanted, 1); wakeup(&vmd->vmd_pageout_wanted); vm_domain_pageout_unlock(vmd); } } Index: projects/runtime-coverage-v2/tests/sys/vm/Makefile =================================================================== --- projects/runtime-coverage-v2/tests/sys/vm/Makefile (revision 347075) +++ projects/runtime-coverage-v2/tests/sys/vm/Makefile (revision 347076) @@ -1,9 +1,10 @@ # $FreeBSD$ PACKAGE= tests TESTSDIR= ${TESTSBASE}/sys/vm -ATF_TESTS_C+= mmap_test +ATF_TESTS_C+= mlock_test \ + mmap_test .include Index: projects/runtime-coverage-v2/tests/sys/vm/mlock_test.c =================================================================== --- projects/runtime-coverage-v2/tests/sys/vm/mlock_test.c (nonexistent) +++ projects/runtime-coverage-v2/tests/sys/vm/mlock_test.c (revision 347076) @@ -0,0 +1,174 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Mark Johnston + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +static void +test_wired_copy_on_write(void *addr, size_t len) +{ + int status, val; + pid_t pid; + + pid = fork(); + if (pid == -1) + atf_tc_fail("fork() failed: %s", strerror(errno)); + if (pid == 0) { + if (mlock(addr, len) != 0) + _exit(1); + if (ptrace(PT_TRACE_ME, 0, NULL, 0) != 0) + _exit(2); + if (raise(SIGSTOP) != 0) + _exit(3); + if (munlock(addr, len) != 0) + _exit(4); + _exit(0); + } + + ATF_REQUIRE(waitpid(pid, &status, 0) == pid); + ATF_REQUIRE_MSG(!WIFEXITED(status), + "child exited with status %d", WEXITSTATUS(status)); + ATF_REQUIRE(WIFSTOPPED(status)); + ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); + + errno = 0; + val = ptrace(PT_READ_D, pid, addr, 0); + ATF_REQUIRE(errno == 0); + ATF_REQUIRE(ptrace(PT_WRITE_D, pid, addr, val) == 0); + ATF_REQUIRE(ptrace(PT_CONTINUE, pid, (caddr_t)1, 0) == 0); + ATF_REQUIRE(waitpid(pid, &status, 0) == pid); + ATF_REQUIRE(WIFEXITED(status)); + ATF_REQUIRE_MSG(WEXITSTATUS(status) == 0, + "child exited with status %d", WSTOPSIG(status)); +} + +/* + * Use ptrace(2) to trigger a copy-on-write fault of anonymous memory. + */ +ATF_TC_WITHOUT_HEAD(mlock__copy_on_write_anon); +ATF_TC_BODY(mlock__copy_on_write_anon, tc) +{ + char *addr; + int len; + + len = getpagesize(); + addr = mmap(NULL, len, PROT_READ, MAP_ANON, -1, 0); + ATF_REQUIRE(addr != MAP_FAILED); + + test_wired_copy_on_write(addr, len); +} + +/* + * Use ptrace(2) to trigger a copy-on-write fault of a read-only text page. + */ +ATF_TC_WITHOUT_HEAD(mlock__copy_on_write_vnode); +ATF_TC_BODY(mlock__copy_on_write_vnode, tc) +{ + void *addr; + int len; + + len = getpagesize(); + addr = (void *)((uintptr_t)test_wired_copy_on_write & ~(len - 1)); + + test_wired_copy_on_write(addr, len); +} + +/* + * Try truncating and then resizing an mlock()ed mapping. + */ +ATF_TC_WITHOUT_HEAD(mlock__truncate_and_resize); +ATF_TC_BODY(mlock__truncate_and_resize, tc) +{ + char filename[16]; + char *addr; + int fd, i, len; + + snprintf(filename, sizeof(filename), "tmp.XXXXXX"); + fd = mkstemp(filename); + ATF_REQUIRE(fd >= 0); + ATF_REQUIRE(unlink(filename) == 0); + + len = getpagesize(); + ATF_REQUIRE(ftruncate(fd, len) == 0); + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ATF_REQUIRE(addr != MAP_FAILED); + ATF_REQUIRE(mlock(addr, len) == 0); + memset(addr, 1, len); + ATF_REQUIRE(ftruncate(fd, 0) == 0); + ATF_REQUIRE(ftruncate(fd, len) == 0); + for (i = 0; i < len; i++) + ATF_REQUIRE(addr[i] == 0); + ATF_REQUIRE(munlock(addr, len) == 0); +} + +/* + * Make sure that we can munlock() a truncated mapping. + */ +ATF_TC_WITHOUT_HEAD(mlock__truncate_and_unlock); +ATF_TC_BODY(mlock__truncate_and_unlock, tc) +{ + char filename[16]; + void *addr; + int fd, len; + + snprintf(filename, sizeof(filename), "tmp.XXXXXX"); + fd = mkstemp(filename); + ATF_REQUIRE(fd >= 0); + ATF_REQUIRE(unlink(filename) == 0); + + len = getpagesize(); + ATF_REQUIRE(ftruncate(fd, len) == 0); + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ATF_REQUIRE(addr != MAP_FAILED); + ATF_REQUIRE(mlock(addr, len) == 0); + ATF_REQUIRE(ftruncate(fd, 0) == 0); + ATF_REQUIRE(munlock(addr, len) == 0); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, mlock__copy_on_write_anon); + ATF_TP_ADD_TC(tp, mlock__copy_on_write_vnode); + ATF_TP_ADD_TC(tp, mlock__truncate_and_resize); + ATF_TP_ADD_TC(tp, mlock__truncate_and_unlock); + + return (atf_no_error()); +} Property changes on: projects/runtime-coverage-v2/tests/sys/vm/mlock_test.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: projects/runtime-coverage-v2/tools/boot/ci-qemu-test.sh =================================================================== --- projects/runtime-coverage-v2/tools/boot/ci-qemu-test.sh (revision 347075) +++ projects/runtime-coverage-v2/tools/boot/ci-qemu-test.sh (revision 347076) @@ -1,110 +1,110 @@ #!/bin/sh # Install loader, kernel, and enough of userland to boot in QEMU and echo # "Hello world." from init, as a very quick smoke test for CI. Uses QEMU's # virtual FAT filesystem to avoid the need to create a disk image. While # designed for CI automated testing, this script can also be run by hand as # a quick smoke-test. The rootgen.sh and related scripts generate much more # extensive tests for many combinations of boot env (ufs, zfs, geli, etc). # # $FreeBSD$ set -e die() { echo "$*" 1>&2 exit 1 } tempdir_cleanup() { trap - EXIT SIGINT SIGHUP SIGTERM SIGQUIT rm -rf ${ROOTDIR} } tempdir_setup() { # Create minimal directory structure and populate it. # Caller must cd ${SRCTOP} before calling this function. for dir in dev bin efi/boot etc lib libexec sbin usr/lib usr/libexec; do mkdir -p ${ROOTDIR}/${dir} done # Install kernel, loader and minimal userland. make -DNO_ROOT DESTDIR=${ROOTDIR} \ MODULES_OVERRIDE= \ WITHOUT_DEBUG_FILES=yes \ WITHOUT_KERNEL_SYMBOLS=yes \ installkernel for dir in stand \ lib/libc lib/libedit lib/ncurses \ libexec/rtld-elf \ bin/sh sbin/init sbin/shutdown; do make -DNO_ROOT DESTDIR=${ROOTDIR} INSTALL="install -U" \ WITHOUT_DEBUG_FILES= \ WITHOUT_MAN= \ WITHOUT_PROFILE= \ WITHOUT_TESTS= \ WITHOUT_TOOLCHAIN= \ -C ${dir} install done # Put loader in standard EFI location. mv ${ROOTDIR}/boot/loader.efi ${ROOTDIR}/efi/boot/BOOTx64.EFI # Configuration files. cat > ${ROOTDIR}/boot/loader.conf < ${ROOTDIR}/etc/rc <&1 | tee ${BOOTLOG} # Check whether we succesfully booted... if grep -q 'Hello world.' ${BOOTLOG}; then echo "OK" else die "Did not boot successfully, see ${BOOTLOG}" fi Index: projects/runtime-coverage-v2/usr.bin/stat/stat.c =================================================================== --- projects/runtime-coverage-v2/usr.bin/stat/stat.c (revision 347075) +++ projects/runtime-coverage-v2/usr.bin/stat/stat.c (revision 347076) @@ -1,1096 +1,1097 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Brown. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #if 0 #ifndef lint __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $" "$OpenBSD: stat.c,v 1.14 2009/06/24 09:44:25 sobrado Exp $"); #endif #endif __FBSDID("$FreeBSD$"); #if HAVE_CONFIG_H #include "config.h" #else /* HAVE_CONFIG_H */ #define HAVE_STRUCT_STAT_ST_FLAGS 1 #define HAVE_STRUCT_STAT_ST_GEN 1 #define HAVE_STRUCT_STAT_ST_BIRTHTIME 1 #define HAVE_STRUCT_STAT_ST_MTIMENSEC 1 #define HAVE_DEVNAME 1 #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_STRUCT_STAT_ST_FLAGS #define DEF_F "%#Xf " #define RAW_F "%f " #define SHELL_F " st_flags=%f" #else /* HAVE_STRUCT_STAT_ST_FLAGS */ #define DEF_F #define RAW_F #define SHELL_F #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ #if HAVE_STRUCT_STAT_ST_BIRTHTIME #define DEF_B "\"%SB\" " #define RAW_B "%B " #define SHELL_B "st_birthtime=%B " #else /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ #define DEF_B #define RAW_B #define SHELL_B #endif /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ #if HAVE_STRUCT_STAT_ST_ATIM #define st_atimespec st_atim #define st_ctimespec st_ctim #define st_mtimespec st_mtim #endif /* HAVE_STRUCT_STAT_ST_ATIM */ #define DEF_FORMAT \ "%d %i %Sp %l %Su %Sg %r %z \"%Sa\" \"%Sm\" \"%Sc\" " DEF_B \ "%k %b " DEF_F "%N" #define RAW_FORMAT "%d %i %#p %l %u %g %r %z %a %m %c " RAW_B \ "%k %b " RAW_F "%N" #define LS_FORMAT "%Sp %l %Su %Sg %Z %Sm %N%SY" #define LSF_FORMAT "%Sp %l %Su %Sg %Z %Sm %N%T%SY" #define SHELL_FORMAT \ "st_dev=%d st_ino=%i st_mode=%#p st_nlink=%l " \ "st_uid=%u st_gid=%g st_rdev=%r st_size=%z " \ "st_atime=%a st_mtime=%m st_ctime=%c " SHELL_B \ "st_blksize=%k st_blocks=%b" SHELL_F #define LINUX_FORMAT \ " File: \"%N\"%n" \ " Size: %-11z FileType: %HT%n" \ " Mode: (%OMp%03OLp/%.10Sp) Uid: (%5u/%8Su) Gid: (%5g/%8Sg)%n" \ "Device: %Hd,%Ld Inode: %i Links: %l%n" \ "Access: %Sa%n" \ "Modify: %Sm%n" \ - "Change: %Sc" + "Change: %Sc%n" \ + " Birth: %SB" #define TIME_FORMAT "%b %e %T %Y" #define FLAG_POUND 0x01 #define FLAG_SPACE 0x02 #define FLAG_PLUS 0x04 #define FLAG_ZERO 0x08 #define FLAG_MINUS 0x10 /* * These format characters must all be unique, except the magic one. */ #define FMT_MAGIC '%' #define FMT_DOT '.' #define SIMPLE_NEWLINE 'n' #define SIMPLE_TAB 't' #define SIMPLE_PERCENT '%' #define SIMPLE_NUMBER '@' #define FMT_POUND '#' #define FMT_SPACE ' ' #define FMT_PLUS '+' #define FMT_ZERO '0' #define FMT_MINUS '-' #define FMT_DECIMAL 'D' #define FMT_OCTAL 'O' #define FMT_UNSIGNED 'U' #define FMT_HEX 'X' #define FMT_FLOAT 'F' #define FMT_STRING 'S' #define FMTF_DECIMAL 0x01 #define FMTF_OCTAL 0x02 #define FMTF_UNSIGNED 0x04 #define FMTF_HEX 0x08 #define FMTF_FLOAT 0x10 #define FMTF_STRING 0x20 #define HIGH_PIECE 'H' #define MIDDLE_PIECE 'M' #define LOW_PIECE 'L' #define SHOW_realpath 'R' #define SHOW_st_dev 'd' #define SHOW_st_ino 'i' #define SHOW_st_mode 'p' #define SHOW_st_nlink 'l' #define SHOW_st_uid 'u' #define SHOW_st_gid 'g' #define SHOW_st_rdev 'r' #define SHOW_st_atime 'a' #define SHOW_st_mtime 'm' #define SHOW_st_ctime 'c' #define SHOW_st_btime 'B' #define SHOW_st_size 'z' #define SHOW_st_blocks 'b' #define SHOW_st_blksize 'k' #define SHOW_st_flags 'f' #define SHOW_st_gen 'v' #define SHOW_symlink 'Y' #define SHOW_filetype 'T' #define SHOW_filename 'N' #define SHOW_sizerdev 'Z' void usage(const char *); void output(const struct stat *, const char *, const char *, int, int); int format1(const struct stat *, /* stat info */ const char *, /* the file name */ const char *, int, /* the format string itself */ char *, size_t, /* a place to put the output */ int, int, int, int, /* the parsed format */ int, int); int hex2byte(const char [2]); #if HAVE_STRUCT_STAT_ST_FLAGS char *xfflagstostr(unsigned long); #endif static const char *timefmt; static int linkfail; #define addchar(s, c, nl) \ do { \ (void)fputc((c), (s)); \ (*nl) = ((c) == '\n'); \ } while (0/*CONSTCOND*/) int main(int argc, char *argv[]) { struct stat st; int ch, rc, errs, am_readlink; int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet; const char *statfmt, *options, *synopsis; char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV; fhandle_t fhnd; const char *file; am_readlink = 0; lsF = 0; fmtchar = '\0'; usestat = 0; nfs_handle = 0; nonl = 0; quiet = 0; linkfail = 0; statfmt = NULL; timefmt = NULL; if (strcmp(getprogname(), "readlink") == 0) { am_readlink = 1; options = "fn"; synopsis = "[-fn] [file ...]"; statfmt = "%Y"; fmtchar = 'f'; quiet = 1; } else { options = "f:FHlLnqrst:x"; synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] " "[-t timefmt] [file|handle ...]"; } while ((ch = getopt(argc, argv, options)) != -1) switch (ch) { case 'F': lsF = 1; break; case 'H': nfs_handle = 1; break; case 'L': usestat = 1; break; case 'n': nonl = 1; break; case 'q': quiet = 1; break; case 'f': if (am_readlink) { statfmt = "%R"; break; } statfmt = optarg; /* FALLTHROUGH */ case 'l': case 'r': case 's': case 'x': if (fmtchar != 0) errx(1, "can't use format '%c' with '%c'", fmtchar, ch); fmtchar = ch; break; case 't': timefmt = optarg; break; default: usage(synopsis); } argc -= optind; argv += optind; fn = 1; if (fmtchar == '\0') { if (lsF) fmtchar = 'l'; else { fmtchar = 'f'; statfmt = DEF_FORMAT; } } if (lsF && fmtchar != 'l') errx(1, "can't use format '%c' with -F", fmtchar); switch (fmtchar) { case 'f': /* statfmt already set */ break; case 'l': statfmt = lsF ? LSF_FORMAT : LS_FORMAT; break; case 'r': statfmt = RAW_FORMAT; break; case 's': statfmt = SHELL_FORMAT; break; case 'x': statfmt = LINUX_FORMAT; if (timefmt == NULL) timefmt = "%c"; break; default: usage(synopsis); /*NOTREACHED*/ } if (timefmt == NULL) timefmt = TIME_FORMAT; errs = 0; do { if (argc == 0) { if (fdevname_r(STDIN_FILENO, dname + sizeof _PATH_DEV - 1, SPECNAMELEN) != NULL) file = dname; else file = "(stdin)"; rc = fstat(STDIN_FILENO, &st); } else { int j; file = argv[0]; if (nfs_handle) { rc = 0; bzero(&fhnd, sizeof(fhnd)); j = MIN(2 * sizeof(fhnd), strlen(file)); if ((j & 1) != 0) { rc = -1; } else { while (j) { rc = hex2byte(&file[j - 2]); if (rc == -1) break; ((char*) &fhnd)[j / 2 - 1] = rc; j -= 2; } } if (rc == -1) errno = EINVAL; else rc = fhstat(&fhnd, &st); } else if (usestat) { /* * Try stat() and if it fails, fall back to * lstat() just in case we're examining a * broken symlink. */ if ((rc = stat(file, &st)) == -1 && errno == ENOENT && (rc = lstat(file, &st)) == -1) errno = ENOENT; } else rc = lstat(file, &st); } if (rc == -1) { errs = 1; linkfail = 1; if (!quiet) warn("%s: stat", file); } else output(&st, file, statfmt, fn, nonl); argv++; argc--; fn++; } while (argc > 0); return (am_readlink ? linkfail : errs); } #if HAVE_STRUCT_STAT_ST_FLAGS /* * fflagstostr() wrapper that leaks only once */ char * xfflagstostr(unsigned long fflags) { static char *str = NULL; if (str != NULL) free(str); str = fflagstostr(fflags); if (str == NULL) err(1, "fflagstostr"); return (str); } #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ void usage(const char *synopsis) { (void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis); exit(1); } /* * Parses a format string. */ void output(const struct stat *st, const char *file, const char *statfmt, int fn, int nonl) { int flags, size, prec, ofmt, hilo, what; char buf[PATH_MAX + 4 + 1]; const char *subfmt; int nl, t, i; nl = 1; while (*statfmt != '\0') { /* * Non-format characters go straight out. */ if (*statfmt != FMT_MAGIC) { addchar(stdout, *statfmt, &nl); statfmt++; continue; } /* * The current format "substring" starts here, * and then we skip the magic. */ subfmt = statfmt; statfmt++; /* * Some simple one-character "formats". */ switch (*statfmt) { case SIMPLE_NEWLINE: addchar(stdout, '\n', &nl); statfmt++; continue; case SIMPLE_TAB: addchar(stdout, '\t', &nl); statfmt++; continue; case SIMPLE_PERCENT: addchar(stdout, '%', &nl); statfmt++; continue; case SIMPLE_NUMBER: { char num[12], *p; snprintf(num, sizeof(num), "%d", fn); for (p = &num[0]; *p; p++) addchar(stdout, *p, &nl); statfmt++; continue; } } /* * This must be an actual format string. Format strings are * similar to printf(3) formats up to a point, and are of * the form: * * % required start of format * [-# +0] opt. format characters * size opt. field width * . opt. decimal separator, followed by * prec opt. precision * fmt opt. output specifier (string, numeric, etc.) * sub opt. sub field specifier (high, middle, low) * datum required field specifier (size, mode, etc) * * Only the % and the datum selector are required. All data * have reasonable default output forms. The "sub" specifier * only applies to certain data (mode, dev, rdev, filetype). * The symlink output defaults to STRING, yet will only emit * the leading " -> " if STRING is explicitly specified. The * sizerdev datum will generate rdev output for character or * block devices, and size output for all others. */ flags = 0; do { if (*statfmt == FMT_POUND) flags |= FLAG_POUND; else if (*statfmt == FMT_SPACE) flags |= FLAG_SPACE; else if (*statfmt == FMT_PLUS) flags |= FLAG_PLUS; else if (*statfmt == FMT_ZERO) flags |= FLAG_ZERO; else if (*statfmt == FMT_MINUS) flags |= FLAG_MINUS; else break; statfmt++; } while (1/*CONSTCOND*/); size = -1; if (isdigit((unsigned)*statfmt)) { size = 0; while (isdigit((unsigned)*statfmt)) { size = (size * 10) + (*statfmt - '0'); statfmt++; if (size < 0) goto badfmt; } } prec = -1; if (*statfmt == FMT_DOT) { statfmt++; prec = 0; while (isdigit((unsigned)*statfmt)) { prec = (prec * 10) + (*statfmt - '0'); statfmt++; if (prec < 0) goto badfmt; } } #define fmtcase(x, y) case (y): (x) = (y); statfmt++; break #define fmtcasef(x, y, z) case (y): (x) = (z); statfmt++; break switch (*statfmt) { fmtcasef(ofmt, FMT_DECIMAL, FMTF_DECIMAL); fmtcasef(ofmt, FMT_OCTAL, FMTF_OCTAL); fmtcasef(ofmt, FMT_UNSIGNED, FMTF_UNSIGNED); fmtcasef(ofmt, FMT_HEX, FMTF_HEX); fmtcasef(ofmt, FMT_FLOAT, FMTF_FLOAT); fmtcasef(ofmt, FMT_STRING, FMTF_STRING); default: ofmt = 0; break; } switch (*statfmt) { fmtcase(hilo, HIGH_PIECE); fmtcase(hilo, MIDDLE_PIECE); fmtcase(hilo, LOW_PIECE); default: hilo = 0; break; } switch (*statfmt) { fmtcase(what, SHOW_realpath); fmtcase(what, SHOW_st_dev); fmtcase(what, SHOW_st_ino); fmtcase(what, SHOW_st_mode); fmtcase(what, SHOW_st_nlink); fmtcase(what, SHOW_st_uid); fmtcase(what, SHOW_st_gid); fmtcase(what, SHOW_st_rdev); fmtcase(what, SHOW_st_atime); fmtcase(what, SHOW_st_mtime); fmtcase(what, SHOW_st_ctime); fmtcase(what, SHOW_st_btime); fmtcase(what, SHOW_st_size); fmtcase(what, SHOW_st_blocks); fmtcase(what, SHOW_st_blksize); fmtcase(what, SHOW_st_flags); fmtcase(what, SHOW_st_gen); fmtcase(what, SHOW_symlink); fmtcase(what, SHOW_filetype); fmtcase(what, SHOW_filename); fmtcase(what, SHOW_sizerdev); default: goto badfmt; } #undef fmtcasef #undef fmtcase t = format1(st, file, subfmt, statfmt - subfmt, buf, sizeof(buf), flags, size, prec, ofmt, hilo, what); for (i = 0; i < t && i < (int)(sizeof(buf) - 1); i++) addchar(stdout, buf[i], &nl); continue; badfmt: errx(1, "%.*s: bad format", (int)(statfmt - subfmt + 1), subfmt); } if (!nl && !nonl) (void)fputc('\n', stdout); (void)fflush(stdout); } /* * Arranges output according to a single parsed format substring. */ int format1(const struct stat *st, const char *file, const char *fmt, int flen, char *buf, size_t blen, int flags, int size, int prec, int ofmt, int hilo, int what) { u_int64_t data; char *stmp, lfmt[24], tmp[20]; const char *sdata; char smode[12], sid[12], path[PATH_MAX + 4]; const struct timespec *tsp; struct timespec ts; struct tm *tm; int l, small, formats; tsp = NULL; formats = 0; small = 0; /* * First, pick out the data and tweak it based on hilo or * specified output format (symlink output only). */ switch (what) { case SHOW_st_dev: case SHOW_st_rdev: small = (sizeof(st->st_dev) == 4); data = (what == SHOW_st_dev) ? st->st_dev : st->st_rdev; #if HAVE_DEVNAME sdata = (what == SHOW_st_dev) ? devname(st->st_dev, S_IFBLK) : devname(st->st_rdev, S_ISCHR(st->st_mode) ? S_IFCHR : S_ISBLK(st->st_mode) ? S_IFBLK : 0U); if (sdata == NULL) sdata = "???"; #endif /* HAVE_DEVNAME */ if (hilo == HIGH_PIECE) { data = major(data); hilo = 0; } else if (hilo == LOW_PIECE) { data = minor((unsigned)data); hilo = 0; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | #if HAVE_DEVNAME FMTF_STRING; #else /* HAVE_DEVNAME */ 0; #endif /* HAVE_DEVNAME */ if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_ino: small = (sizeof(st->st_ino) == 4); data = st->st_ino; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_mode: small = (sizeof(st->st_mode) == 4); data = st->st_mode; strmode(st->st_mode, smode); stmp = smode; l = strlen(stmp); if (stmp[l - 1] == ' ') stmp[--l] = '\0'; if (hilo == HIGH_PIECE) { data >>= 12; stmp += 1; stmp[3] = '\0'; hilo = 0; } else if (hilo == MIDDLE_PIECE) { data = (data >> 9) & 07; stmp += 4; stmp[3] = '\0'; hilo = 0; } else if (hilo == LOW_PIECE) { data &= 0777; stmp += 7; stmp[3] = '\0'; hilo = 0; } sdata = stmp; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_OCTAL; break; case SHOW_st_nlink: small = (sizeof(st->st_dev) == 4); data = st->st_nlink; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_uid: small = (sizeof(st->st_uid) == 4); data = st->st_uid; sdata = user_from_uid(st->st_uid, 1); if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_uid); sdata = sid; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_gid: small = (sizeof(st->st_gid) == 4); data = st->st_gid; sdata = group_from_gid(st->st_gid, 1); if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_gid); sdata = sid; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_atime: tsp = &st->st_atimespec; /* FALLTHROUGH */ case SHOW_st_mtime: if (tsp == NULL) tsp = &st->st_mtimespec; /* FALLTHROUGH */ case SHOW_st_ctime: if (tsp == NULL) tsp = &st->st_ctimespec; /* FALLTHROUGH */ #if HAVE_STRUCT_STAT_ST_BIRTHTIME case SHOW_st_btime: if (tsp == NULL) tsp = &st->st_birthtimespec; #endif /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ ts = *tsp; /* copy so we can muck with it */ small = (sizeof(ts.tv_sec) == 4); data = ts.tv_sec; tm = localtime(&ts.tv_sec); if (tm == NULL) { ts.tv_sec = 0; tm = localtime(&ts.tv_sec); } (void)strftime(path, sizeof(path), timefmt, tm); sdata = path; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_FLOAT | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_DECIMAL; break; case SHOW_st_size: small = (sizeof(st->st_size) == 4); data = st->st_size; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_blocks: small = (sizeof(st->st_blocks) == 4); data = st->st_blocks; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_blksize: small = (sizeof(st->st_blksize) == 4); data = st->st_blksize; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #if HAVE_STRUCT_STAT_ST_FLAGS case SHOW_st_flags: small = (sizeof(st->st_flags) == 4); data = st->st_flags; sdata = xfflagstostr(st->st_flags); if (*sdata == '\0') sdata = "-"; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ #if HAVE_STRUCT_STAT_ST_GEN case SHOW_st_gen: small = (sizeof(st->st_gen) == 4); data = st->st_gen; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #endif /* HAVE_STRUCT_STAT_ST_GEN */ case SHOW_realpath: small = 0; data = 0; if (file == NULL) { (void)strlcpy(path, "(stdin)", sizeof(path)); sdata = path; } else { snprintf(path, sizeof(path), " -> "); if (realpath(file, path + 4) == NULL) { linkfail = 1; l = 0; path[0] = '\0'; } sdata = path + (ofmt == FMTF_STRING ? 0 : 4); } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_symlink: small = 0; data = 0; if (S_ISLNK(st->st_mode)) { snprintf(path, sizeof(path), " -> "); l = readlink(file, path + 4, sizeof(path) - 4 - 1); if (l == -1) { linkfail = 1; l = 0; path[0] = '\0'; } path[l + 4] = '\0'; sdata = path + (ofmt == FMTF_STRING ? 0 : 4); } else { linkfail = 1; sdata = ""; } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_filetype: small = 0; data = 0; sdata = ""; if (hilo == 0 || hilo == LOW_PIECE) { switch (st->st_mode & S_IFMT) { case S_IFIFO: sdata = "|"; break; case S_IFDIR: sdata = "/"; break; case S_IFREG: if (st->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) sdata = "*"; break; case S_IFLNK: sdata = "@"; break; case S_IFSOCK: sdata = "="; break; #ifdef S_IFWHT case S_IFWHT: sdata = "%"; break; #endif /* S_IFWHT */ #ifdef S_IFDOOR case S_IFDOOR: sdata = ">"; break; #endif /* S_IFDOOR */ } hilo = 0; } else if (hilo == HIGH_PIECE) { switch (st->st_mode & S_IFMT) { case S_IFIFO: sdata = "Fifo File"; break; case S_IFCHR: sdata = "Character Device"; break; case S_IFDIR: sdata = "Directory"; break; case S_IFBLK: sdata = "Block Device"; break; case S_IFREG: sdata = "Regular File"; break; case S_IFLNK: sdata = "Symbolic Link"; break; case S_IFSOCK: sdata = "Socket"; break; #ifdef S_IFWHT case S_IFWHT: sdata = "Whiteout File"; break; #endif /* S_IFWHT */ #ifdef S_IFDOOR case S_IFDOOR: sdata = "Door"; break; #endif /* S_IFDOOR */ default: sdata = "???"; break; } hilo = 0; } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_filename: small = 0; data = 0; (void)strlcpy(path, file, sizeof(path)); sdata = path; formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_sizerdev: if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) { char majdev[20], mindev[20]; int l1, l2; l1 = format1(st, file, fmt, flen, majdev, sizeof(majdev), flags, size, prec, ofmt, HIGH_PIECE, SHOW_st_rdev); l2 = format1(st, file, fmt, flen, mindev, sizeof(mindev), flags, size, prec, ofmt, LOW_PIECE, SHOW_st_rdev); return (snprintf(buf, blen, "%.*s,%.*s", l1, majdev, l2, mindev)); } else { return (format1(st, file, fmt, flen, buf, blen, flags, size, prec, ofmt, 0, SHOW_st_size)); } /*NOTREACHED*/ default: errx(1, "%.*s: bad format", (int)flen, fmt); } /* * If a subdatum was specified but not supported, or an output * format was selected that is not supported, that's an error. */ if (hilo != 0 || (ofmt & formats) == 0) errx(1, "%.*s: bad format", (int)flen, fmt); /* * Assemble the format string for passing to printf(3). */ lfmt[0] = '\0'; (void)strcat(lfmt, "%"); if (flags & FLAG_POUND) (void)strcat(lfmt, "#"); if (flags & FLAG_SPACE) (void)strcat(lfmt, " "); if (flags & FLAG_PLUS) (void)strcat(lfmt, "+"); if (flags & FLAG_MINUS) (void)strcat(lfmt, "-"); if (flags & FLAG_ZERO) (void)strcat(lfmt, "0"); /* * Only the timespecs support the FLOAT output format, and that * requires work that differs from the other formats. */ if (ofmt == FMTF_FLOAT) { /* * Nothing after the decimal point, so just print seconds. */ if (prec == 0) { if (size != -1) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } (void)strcat(lfmt, "lld"); return (snprintf(buf, blen, lfmt, (long long)ts.tv_sec)); } /* * Unspecified precision gets all the precision we have: * 9 digits. */ if (prec == -1) prec = 9; /* * Adjust the size for the decimal point and the digits * that will follow. */ size -= prec + 1; /* * Any leftover size that's legitimate will be used. */ if (size > 0) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } /* Seconds: time_t cast to long long. */ (void)strcat(lfmt, "lld"); /* * The stuff after the decimal point always needs zero * filling. */ (void)strcat(lfmt, ".%0"); /* * We can "print" at most nine digits of precision. The * rest we will pad on at the end. * * Nanoseconds: long. */ (void)snprintf(tmp, sizeof(tmp), "%dld", MIN(prec, 9)); (void)strcat(lfmt, tmp); /* * For precision of less that nine digits, trim off the * less significant figures. */ for (; prec < 9; prec++) ts.tv_nsec /= 10; /* * Use the format, and then tack on any zeroes that * might be required to make up the requested precision. */ l = snprintf(buf, blen, lfmt, (long long)ts.tv_sec, ts.tv_nsec); for (; prec > 9 && l < (int)blen; prec--, l++) (void)strcat(buf, "0"); return (l); } /* * Add on size and precision, if specified, to the format. */ if (size != -1) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } if (prec != -1) { (void)snprintf(tmp, sizeof(tmp), ".%d", prec); (void)strcat(lfmt, tmp); } /* * String output uses the temporary sdata. */ if (ofmt == FMTF_STRING) { if (sdata == NULL) errx(1, "%.*s: bad format", (int)flen, fmt); (void)strcat(lfmt, "s"); return (snprintf(buf, blen, lfmt, sdata)); } /* * Ensure that sign extension does not cause bad looking output * for some forms. */ if (small && ofmt != FMTF_DECIMAL) data = (u_int32_t)data; /* * The four "numeric" output forms. */ (void)strcat(lfmt, "ll"); switch (ofmt) { case FMTF_DECIMAL: (void)strcat(lfmt, "d"); break; case FMTF_OCTAL: (void)strcat(lfmt, "o"); break; case FMTF_UNSIGNED: (void)strcat(lfmt, "u"); break; case FMTF_HEX: (void)strcat(lfmt, "x"); break; } return (snprintf(buf, blen, lfmt, data)); } #define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10) int hex2byte(const char c[2]) { if (!(ishexnumber(c[0]) && ishexnumber(c[1]))) return -1; return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]); } Index: projects/runtime-coverage-v2/usr.bin/stat/tests/stat_test.sh =================================================================== --- projects/runtime-coverage-v2/usr.bin/stat/tests/stat_test.sh (revision 347075) +++ projects/runtime-coverage-v2/usr.bin/stat/tests/stat_test.sh (revision 347076) @@ -1,244 +1,246 @@ # # Copyright (c) 2017 Dell EMC # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ atf_test_case F_flag F_flag_head() { atf_set "descr" "Verify the output format for -F" } F_flag_body() { # TODO: socket, whiteout file atf_check touch a atf_check mkdir b atf_check install -m 0777 /dev/null c atf_check ln -s a d atf_check mkfifo f atf_check -o match:'.* a' stat -Fn a atf_check -o match:'.* b/' stat -Fn b atf_check -o match:'.* c\*' stat -Fn c atf_check -o match:'.* d@' stat -Fn d atf_check -o match:'.* f\|' stat -Fn f } atf_test_case l_flag l_flag_head() { atf_set "descr" "Verify the output format for -l" } l_flag_body() { atf_check touch a atf_check ln a b atf_check ln -s a c atf_check mkdir d paths="a b c d" ls_out=ls.output stat_out=stat.output # NOTE: # - Even though stat -l claims to be equivalent to `ls -lT`, the # whitespace is a bit more liberal in the `ls -lT` output. # - `ls -ldT` is used to not recursively list the contents of # directories. for path in $paths; do atf_check -o save:$ls_out ls -ldT $path cat $ls_out atf_check -o save:$stat_out stat -l $path cat $stat_out echo "Comparing normalized whitespace" atf_check sed -i '' -E -e 's/[[:space:]]+/ /g' $ls_out atf_check sed -i '' -E -e 's/[[:space:]]+/ /g' $stat_out atf_check cmp $ls_out $stat_out done } atf_test_case n_flag n_flag_head() { atf_set "descr" "Verify that -n suppresses newline output for lines" } n_flag_body() { atf_check touch a b atf_check -o inline:"$(stat a | tr -d '\n')" stat -n a atf_check -o inline:"$(stat a b | tr -d '\n')" stat -n a b } atf_test_case q_flag q_flag_head() { atf_set "descr" "Verify that -q suppresses error messages from l?stat(2)" } q_flag_body() { ln -s nonexistent broken-link atf_check -s exit:1 stat -q nonexistent atf_check -s exit:1 stat -q nonexistent atf_check -o not-empty stat -q broken-link atf_check -o not-empty stat -qL broken-link } atf_test_case r_flag r_flag_head() { atf_set "descr" "Verify that -r displays output in 'raw mode'" } r_flag_body() { atf_check touch a # TODO: add more thorough checks. atf_check -o not-empty stat -r a } atf_test_case s_flag s_flag_head() { atf_set "descr" "Verify the output format for -s" } s_flag_body() { atf_check touch a atf_check ln a b atf_check ln -s a c atf_check mkdir d paths="a b c d" # The order/name of each of the fields is specified by stat(1) manpage. fields="st_dev st_ino st_mode st_nlink" fields="$fields st_uid st_gid st_rdev st_size" fields="$fields st_uid st_gid st_mode" fields="$fields st_atime st_mtime st_ctime st_birthtime" fields="$fields st_blksize st_blocks st_flags" # NOTE: the following... # - ... relies on set -eu to ensure that the fields are set, as # documented, in stat(1). # - ... uses a subshell to ensure that the eval'ed variables don't # pollute the next iteration's behavior. for path in $paths; do ( set -eu eval $(stat -s $path) for field in $fields; do eval "$field=\$$field" done ) || atf_fail 'One or more fields not set by stat(1)' done } atf_test_case t_flag t_flag_head() { atf_set "descr" "Verify the output format for -t" } t_flag_body() { atf_check touch foo atf_check touch -d 1970-01-01T00:00:42 foo atf_check -o inline:'42\n' \ stat -t '%s' -f '%a' foo atf_check -o inline:'1970-01-01 00:00:42\n' \ stat -t '%F %H:%M:%S' -f '%Sa' foo } x_output_date() { local date_format='%a %b %e %H:%M:%S %Y' stat -t "$date_format" "$@" } x_output() { local path=$1; shift local atime_s=$(x_output_date -f '%Sa' $path) + local btime_s=$(x_output_date -f '%SB' $path) local ctime_s=$(x_output_date -f '%Sc' $path) local devid=$(stat -f '%Hd,%Ld' $path) local file_type_s=$(stat -f '%HT' $path) local gid=$(stat -f '%5g' $path) local groupname=$(stat -f '%8Sg' $path) local inode=$(stat -f '%i' $path) local mode=$(stat -f '%Mp%Lp' $path) local mode_s=$(stat -f '%Sp' $path) local mtime_s=$(x_output_date -f '%Sm' $path) local nlink=$(stat -f '%l' $path) local size_a=$(stat -f '%-11z' $path) local uid=$(stat -f '%5u' $path) local username=$(stat -f '%8Su' $path) cat < * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "mevent.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) +#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) enum blockop { BOP_READ, BOP_WRITE, BOP_FLUSH, BOP_DELETE }; enum blockstat { BST_FREE, BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE }; struct blockif_elem { TAILQ_ENTRY(blockif_elem) be_link; struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; pthread_t be_tid; off_t be_block; }; struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; int bc_isgeom; int bc_candelete; int bc_rdonly; off_t bc_size; int bc_sectsz; int bc_psectsz; int bc_psectoff; int bc_closing; pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; struct blockif_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; struct blockif_sig_elem *bse_next; }; static struct blockif_sig_elem *blockif_bse_head; static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { struct blockif_elem *be, *tbe; off_t off; int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); be->be_req = breq; be->be_op = op; switch (op) { case BOP_READ: case BOP_WRITE: case BOP_DELETE: off = breq->br_offset; for (i = 0; i < breq->br_iovcnt; i++) off += breq->br_iov[i].iov_len; break; default: off = OFF_MAX; } be->be_block = off; TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_block == breq->br_offset) break; } if (tbe == NULL) { TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { if (tbe->be_block == breq->br_offset) break; } } if (tbe == NULL) be->be_status = BST_PEND; else be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); return (be->be_status == BST_PEND); } static int blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_status == BST_PEND) break; assert(be->be_status == BST_BLOCK); } if (be == NULL) return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); *bep = be; return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_elem *tbe; if (be->be_status == BST_DONE || be->be_status == BST_BUSY) TAILQ_REMOVE(&bc->bc_busyq, be, be_link); else TAILQ_REMOVE(&bc->bc_pendq, be, be_link); TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_req->br_offset == be->be_block) tbe->be_status = BST_PEND; } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); } static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; off_t arg[2]; ssize_t clen, len, off, boff, voff; int i, err; br = be->be_req; if (br->br_iovcnt <= 1) buf = NULL; err = 0; switch (be->be_op) { case BOP_READ: if (buf == NULL) { if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); if (pread(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(br->br_iov[i].iov_base + voff, buf + boff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); off += len; br->br_resid -= len; } break; case BOP_WRITE: if (bc->bc_rdonly) { err = EROFS; break; } if (buf == NULL) { if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(buf + boff, br->br_iov[i].iov_base + voff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); if (pwrite(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } off += len; br->br_resid -= len; } break; case BOP_FLUSH: if (bc->bc_ischr) { if (ioctl(bc->bc_fd, DIOCGFLUSH)) err = errno; } else if (fsync(bc->bc_fd)) err = errno; break; case BOP_DELETE: if (!bc->bc_candelete) err = EOPNOTSUPP; else if (bc->bc_rdonly) err = EROFS; else if (bc->bc_ischr) { arg[0] = br->br_offset; arg[1] = br->br_resid; if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) err = errno; else br->br_resid = 0; } else err = EOPNOTSUPP; break; default: err = EINVAL; break; } be->be_status = BST_DONE; (*br->br_callback)(br, err); } static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; pthread_t t; uint8_t *buf; bc = arg; if (bc->bc_isgeom) buf = malloc(MAXPHYS); else buf = NULL; t = pthread_self(); pthread_mutex_lock(&bc->bc_mtx); for (;;) { while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); if (buf) free(buf); pthread_exit(NULL); return (NULL); } static void blockif_sigcont_handler(int signal, enum ev_type type, void *arg) { struct blockif_sig_elem *bse; for (;;) { /* * Process the entire list even if not intended for * this thread. */ do { bse = blockif_bse_head; if (bse == NULL) return; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); pthread_mutex_lock(&bse->bse_mtx); bse->bse_pending = 0; pthread_cond_signal(&bse->bse_cond); pthread_mutex_unlock(&bse->bse_mtx); } } static void blockif_init(void) { mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; char *nopt, *xopts, *cp; struct blockif_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; int nocache, sync, ro, candelete, geom, ssopt, pssopt; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; #endif pthread_once(&blockif_once, blockif_init); fd = -1; ssopt = 0; nocache = 0; sync = 0; ro = 0; /* * The first element in the optstring is always a pathname. * Optional elements follow */ nopt = xopts = strdup(optstr); while (xopts != NULL) { cp = strsep(&xopts, ","); if (cp == nopt) /* file or device pathname */ continue; else if (!strcmp(cp, "nocache")) nocache = 1; else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; else if (!strcmp(cp, "ro")) ro = 1; else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) ; else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) pssopt = ssopt; else { fprintf(stderr, "Invalid device option \"%s\"\n", cp); goto err; } } extra = 0; if (nocache) extra |= O_DIRECT; if (sync) extra |= O_SYNC; fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); if (fd < 0 && !ro) { /* Attempt a r/w fail with a r/o open */ fd = open(nopt, O_RDONLY | extra); ro = 1; } if (fd < 0) { warn("Could not open backing file: %s", nopt); goto err; } if (fstat(fd, &sbuf) < 0) { warn("Could not stat backing file %s", nopt); goto err; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, CAP_WRITE); if (ro) cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif /* * Deal with raw devices */ size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; candelete = geom = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); goto err; } assert(size != 0); assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); if (ioctl(fd, DIOCGATTR, &arg) == 0) candelete = arg.value.i; if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) geom = 1; } else psectsz = sbuf.st_blksize; #ifndef WITHOUT_CAPSICUM if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif if (ssopt != 0) { if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || ssopt > pssopt) { fprintf(stderr, "Invalid sector size %d/%d\n", ssopt, pssopt); goto err; } /* * Some backend drivers (e.g. cd0, ada0) require that the I/O * size be a multiple of the device's sector size. * * Validate that the emulated sector size complies with this * requirement. */ if (S_ISCHR(sbuf.st_mode)) { if (ssopt < sectsz || (ssopt % sectsz) != 0) { fprintf(stderr, "Sector size %d incompatible " "with underlying device sector size %d\n", ssopt, sectsz); goto err; } } sectsz = ssopt; psectsz = pssopt; psectoff = 0; } bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { perror("calloc"); goto err; } bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); bc->bc_isgeom = geom; bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; bc->bc_psectsz = psectsz; bc->bc_psectoff = psectoff; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } for (i = 0; i < BLOCKIF_NUMTHR; i++) { pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } return (bc); err: if (fd >= 0) close(fd); free(nopt); return (NULL); } static int blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { int err; err = 0; pthread_mutex_lock(&bc->bc_mtx); if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ if (blockif_enqueue(bc, breq, op)) pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than * the specified blockif queue limit. Return an * error to indicate that the queue length has been * exceeded. */ err = E2BIG; } pthread_mutex_unlock(&bc->bc_mtx); return (err); } int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* * Check pending requests. */ TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_req == breq) break; } if (be != NULL) { /* * Found it. */ blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); } /* * Check in-flight requests. */ TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { /* * Didn't find it. */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } /* * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { struct blockif_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); bse.bse_pending = 1; do { old_head = blockif_bse_head; bse.bse_next = old_head; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); pthread_kill(be->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); pthread_mutex_unlock(&bse.bse_mtx); } pthread_mutex_unlock(&bc->bc_mtx); /* * The processing thread has been interrupted. Since it's not * clear if the callback has been invoked yet, return EBUSY. */ return (EBUSY); } int blockif_close(struct blockif_ctxt *bc) { void *jval; int i; assert(bc->bc_magic == BLOCKIF_SIG); /* * Stop the block i/o thread */ pthread_mutex_lock(&bc->bc_mtx); bc->bc_closing = 1; pthread_mutex_unlock(&bc->bc_mtx); pthread_cond_broadcast(&bc->bc_cond); for (i = 0; i < BLOCKIF_NUMTHR; i++) pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */ /* * Release resources */ bc->bc_magic = 0; close(bc->bc_fd); free(bc); return (0); } /* * Return virtual C/H/S values for a given block. Use the algorithm * outlined in the VHD specification to calculate values. */ void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; assert(bc->bc_magic == BLOCKIF_SIG); sectors = bc->bc_size / bc->bc_sectsz; /* Clamp the size to the largest possible with CHS */ if (sectors > 65535UL*16*255) sectors = 65535UL*16*255; if (sectors >= 65536UL*16*63) { secpt = 255; heads = 16; hcyl = sectors / secpt; } else { secpt = 17; hcyl = sectors / secpt; heads = (hcyl + 1023) / 1024; if (heads < 4) heads = 4; if (hcyl >= (heads * 1024) || heads > 16) { secpt = 31; heads = 16; hcyl = sectors / secpt; } if (hcyl >= (heads * 1024)) { secpt = 63; heads = 16; hcyl = sectors / secpt; } } *c = hcyl / heads; *h = heads; *s = secpt; } /* * Accessors */ off_t blockif_size(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } int blockif_sectsz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; } int blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int blockif_is_ro(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } int blockif_candelete(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } Index: projects/runtime-coverage-v2/usr.sbin/bhyve/block_if.h =================================================================== --- projects/runtime-coverage-v2/usr.sbin/bhyve/block_if.h (revision 347075) +++ projects/runtime-coverage-v2/usr.sbin/bhyve/block_if.h (revision 347076) @@ -1,72 +1,78 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Peter Grehan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * The block API to be used by bhyve block-device emulations. The routines * are thread safe, with no assumptions about the context of the completion * callback - it may occur in the caller's context, or asynchronously in * another thread. */ #ifndef _BLOCK_IF_H_ #define _BLOCK_IF_H_ #include #include -#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */ +/* + * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in + * a single request. BLOCKIF_RING_MAX is the maxmimum number of + * pending requests that can be queued. + */ +#define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */ +#define BLOCKIF_RING_MAX 128 struct blockif_req { int br_iovcnt; off_t br_offset; ssize_t br_resid; void (*br_callback)(struct blockif_req *req, int err); void *br_param; struct iovec br_iov[BLOCKIF_IOV_MAX]; }; struct blockif_ctxt; struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); off_t blockif_size(struct blockif_ctxt *bc); void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s); int blockif_sectsz(struct blockif_ctxt *bc); void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); int blockif_queuesz(struct blockif_ctxt *bc); int blockif_is_ro(struct blockif_ctxt *bc); int blockif_candelete(struct blockif_ctxt *bc); int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_close(struct blockif_ctxt *bc); #endif /* _BLOCK_IF_H_ */ Index: projects/runtime-coverage-v2/usr.sbin/bhyve/pci_virtio_block.c =================================================================== --- projects/runtime-coverage-v2/usr.sbin/bhyve/pci_virtio_block.c (revision 347075) +++ projects/runtime-coverage-v2/usr.sbin/bhyve/pci_virtio_block.c (revision 347076) @@ -1,413 +1,424 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. + * Copyright (c) 2019 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "pci_emul.h" #include "virtio.h" #include "block_if.h" -#define VTBLK_RINGSZ 64 +#define VTBLK_RINGSZ 128 +_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request"); + #define VTBLK_S_OK 0 #define VTBLK_S_IOERR 1 #define VTBLK_S_UNSUPP 2 #define VTBLK_BLK_ID_BYTES 20 + 1 /* Capability bits */ #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ #define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ #define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */ #define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */ /* * Host capabilities */ #define VTBLK_S_HOSTCAPS \ ( VTBLK_F_SEG_MAX | \ VTBLK_F_BLK_SIZE | \ VTBLK_F_FLUSH | \ VTBLK_F_TOPOLOGY | \ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ /* * Config space "registers" */ struct vtblk_config { uint64_t vbc_capacity; uint32_t vbc_size_max; uint32_t vbc_seg_max; struct { uint16_t cylinders; uint8_t heads; uint8_t sectors; } vbc_geometry; uint32_t vbc_blk_size; struct { uint8_t physical_block_exp; uint8_t alignment_offset; uint16_t min_io_size; uint32_t opt_io_size; } vbc_topology; uint8_t vbc_writeback; } __packed; /* * Fixed-size block header */ struct virtio_blk_hdr { #define VBH_OP_READ 0 #define VBH_OP_WRITE 1 #define VBH_OP_FLUSH 4 #define VBH_OP_FLUSH_OUT 5 #define VBH_OP_IDENT 8 #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ uint32_t vbh_type; uint32_t vbh_ioprio; uint64_t vbh_sector; } __packed; /* * Debug printf */ static int pci_vtblk_debug; #define DPRINTF(params) if (pci_vtblk_debug) printf params #define WPRINTF(params) printf params struct pci_vtblk_ioreq { struct blockif_req io_req; struct pci_vtblk_softc *io_sc; uint8_t *io_status; uint16_t io_idx; }; /* * Per-device softc */ struct pci_vtblk_softc { struct virtio_softc vbsc_vs; pthread_mutex_t vsc_mtx; struct vqueue_info vbsc_vq; struct vtblk_config vbsc_cfg; struct blockif_ctxt *bc; char vbsc_ident[VTBLK_BLK_ID_BYTES]; struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ]; }; static void pci_vtblk_reset(void *); static void pci_vtblk_notify(void *, struct vqueue_info *); static int pci_vtblk_cfgread(void *, int, int, uint32_t *); static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); static struct virtio_consts vtblk_vi_consts = { "vtblk", /* our name */ 1, /* we support 1 virtqueue */ sizeof(struct vtblk_config), /* config reg size */ pci_vtblk_reset, /* reset */ pci_vtblk_notify, /* device-wide qnotify */ pci_vtblk_cfgread, /* read PCI config */ pci_vtblk_cfgwrite, /* write PCI config */ NULL, /* apply negotiated features */ VTBLK_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtblk_reset(void *vsc) { struct pci_vtblk_softc *sc = vsc; DPRINTF(("vtblk: device reset requested !\n")); vi_reset_dev(&sc->vbsc_vs); } static void pci_vtblk_done(struct blockif_req *br, int err) { struct pci_vtblk_ioreq *io = br->br_param; struct pci_vtblk_softc *sc = io->io_sc; /* convert errno into a virtio block error return */ if (err == EOPNOTSUPP || err == ENOSYS) *io->io_status = VTBLK_S_UNSUPP; else if (err != 0) *io->io_status = VTBLK_S_IOERR; else *io->io_status = VTBLK_S_OK; /* * Return the descriptor back to the host. * We wrote 1 byte (our status) to host. */ pthread_mutex_lock(&sc->vsc_mtx); vq_relchain(&sc->vbsc_vq, io->io_idx, 1); vq_endchains(&sc->vbsc_vq, 0); pthread_mutex_unlock(&sc->vsc_mtx); } static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { struct virtio_blk_hdr *vbh; struct pci_vtblk_ioreq *io; int i, n; int err; ssize_t iolen; int writeop, type; struct iovec iov[BLOCKIF_IOV_MAX + 2]; uint16_t idx, flags[BLOCKIF_IOV_MAX + 2]; n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags); /* * The first descriptor will be the read-only fixed header, * and the last is for status (hence +2 above and below). * The remaining iov's are the actual data I/O vectors. * * XXX - note - this fails on crash dump, which does a * VIRTIO_BLK_T_FLUSH with a zero transfer length */ assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2); io = &sc->vbsc_ios[idx]; assert((flags[0] & VRING_DESC_F_WRITE) == 0); assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); vbh = iov[0].iov_base; memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2)); io->io_req.br_iovcnt = n - 2; io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE; io->io_status = iov[--n].iov_base; assert(iov[n].iov_len == 1); assert(flags[n] & VRING_DESC_F_WRITE); /* * XXX * The guest should not be setting the BARRIER flag because * we don't advertise the capability. */ type = vbh->vbh_type & ~VBH_FLAG_BARRIER; writeop = (type == VBH_OP_WRITE); iolen = 0; for (i = 1; i < n; i++) { /* * - write op implies read-only descriptor, * - read/ident op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); iolen += iov[i].iov_len; } io->io_req.br_resid = iolen; DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read/ident", iolen, i - 1, io->io_req.br_offset)); switch (type) { case VBH_OP_READ: err = blockif_read(sc->bc, &io->io_req); break; case VBH_OP_WRITE: err = blockif_write(sc->bc, &io->io_req); break; case VBH_OP_FLUSH: case VBH_OP_FLUSH_OUT: err = blockif_flush(sc->bc, &io->io_req); break; case VBH_OP_IDENT: /* Assume a single buffer */ /* S/n equal to buffer is not zero-terminated. */ memset(iov[1].iov_base, 0, iov[1].iov_len); strncpy(iov[1].iov_base, sc->vbsc_ident, MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); pci_vtblk_done(&io->io_req, 0); return; default: pci_vtblk_done(&io->io_req, EOPNOTSUPP); return; } assert(err == 0); } static void pci_vtblk_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtblk_softc *sc = vsc; while (vq_has_descs(vq)) pci_vtblk_proc(sc, vq); } static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { char bident[sizeof("XX:X:X")]; struct blockif_ctxt *bctxt; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; off_t size; int i, sectsz, sts, sto; if (opts == NULL) { printf("virtio-block: backing device required\n"); return (1); } /* * The supplied backing file has to exist */ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); bctxt = blockif_open(opts, bident); if (bctxt == NULL) { perror("Could not open backing file"); return (1); } size = blockif_size(bctxt); sectsz = blockif_sectsz(bctxt); blockif_psectsz(bctxt, &sts, &sto); sc = calloc(1, sizeof(struct pci_vtblk_softc)); sc->bc = bctxt; for (i = 0; i < VTBLK_RINGSZ; i++) { struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; io->io_req.br_callback = pci_vtblk_done; io->io_req.br_param = io; io->io_sc = sc; io->io_idx = i; } pthread_mutex_init(&sc->vsc_mtx, NULL); /* init virtio softc and virtqueues */ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ /* * Create an identifier for the backing file. Use parts of the * md5 sum of the filename */ MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ - sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX; + + /* + * If Linux is presented with a seg_max greater than the virtio queue + * size, it can stumble into situations where it violates its own + * invariants and panics. For safety, we keep seg_max clamped, paying + * heed to the two extra descriptors needed for the header and status + * of a request. + */ + sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX); sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ sc->vbsc_cfg.vbc_geometry.heads = 0; sc->vbsc_cfg.vbc_geometry.sectors = 0; sc->vbsc_cfg.vbc_blk_size = sectsz; sc->vbsc_cfg.vbc_topology.physical_block_exp = (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0; sc->vbsc_cfg.vbc_topology.alignment_offset = (sto != 0) ? ((sts - sto) / sectsz) : 0; sc->vbsc_cfg.vbc_topology.min_io_size = 0; sc->vbsc_cfg.vbc_topology.opt_io_size = 0; sc->vbsc_cfg.vbc_writeback = 0; /* * Should we move some of this into virtio.c? Could * have the device, class, and subdev_0 as fields in * the virtio constants structure. */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { blockif_close(sc->bc); free(sc); return (1); } vi_set_io_bar(&sc->vbsc_vs, 0); return (0); } static int pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) { DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); return (1); } static int pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtblk_softc *sc = vsc; void *ptr; /* our caller has already verified offset and size */ ptr = (uint8_t *)&sc->vbsc_cfg + offset; memcpy(retval, ptr, size); return (0); } struct pci_devemu pci_de_vblk = { .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vblk); Index: projects/runtime-coverage-v2/usr.sbin/lpr/common_source/printcap.c =================================================================== --- projects/runtime-coverage-v2/usr.sbin/lpr/common_source/printcap.c (revision 347075) +++ projects/runtime-coverage-v2/usr.sbin/lpr/common_source/printcap.c (revision 347076) @@ -1,453 +1,447 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)printcap.c 8.2 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include "lp.cdefs.h" /* A cross-platform version of */ __FBSDID("$FreeBSD$"); #include #include #include #include #include #include /* required for lp.h, but not used here */ #include /* ditto */ #include "lp.h" #include "lp.local.h" #include "pathnames.h" /* * Routines and data used in processing the printcap file. */ -static char *printcapdb[2] = { _PATH_PRINTCAP, 0 }; /* list for cget* */ +static char *printcapdb[] = { __DECONST(char *, _PATH_PRINTCAP), NULL }; static char *capdb_canonical_name(const char *_bp); static int capdb_getaltlog(char *_bp, const char *_shrt, const char *_lng); static int capdb_getaltnum(char *_bp, const char *_shrt, const char *_lng, long _dflt, long *_result); static int capdb_getaltstr(char *_bp, const char *_shrt, const char *lng, const char *_dflt, char **_result); static int getprintcap_int(char *_bp, struct printer *_pp); /* * Change the name of the printcap file. Used by chkprintcap(8), * but could be used by other members of the suite with appropriate * security measures. */ void setprintcap(char *newfile) { printcapdb[0] = newfile; } /* * Read the printcap database for printer `printer' into the * struct printer pointed by `pp'. Return values are as for * cgetent(3): -1 means we could not find what we wanted, -2 * means a system error occurred (and errno is set), -3 if a * reference (`tc=') loop was detected, and 0 means success. * * Copied from lpr; should add additional capabilities as they * are required by the other programs in the suite so that * printcap-reading is consistent across the entire family. */ int getprintcap(const char *printer, struct printer *pp) { int status; - char *XXX; char *bp; - /* - * A bug in the declaration of cgetent(3) means that we have - * to hide the constness of its third argument. - */ - XXX = (char *)printer; - if ((status = cgetent(&bp, printcapdb, XXX)) < 0) + if ((status = cgetent(&bp, printcapdb, printer)) < 0) return status; status = getprintcap_int(bp, pp); free(bp); return status; } /* * Map the status values returned by cgetfirst/cgetnext into those * used by cgetent, returning truth if there are more records to * examine. This points out what is arguably a bug in the cget* * interface (or at least a nasty wart). */ static int firstnextmap(int *status) { switch (*status) { case 0: return 0; case 1: *status = 0; return 1; case 2: *status = 1; return 1; case -1: *status = -2; return 0; case -2: *status = -3; return 1; default: return 0; } } /* * Scan through the database of printers using cgetfirst/cgetnext. * Return false of error or end-of-database; else true. */ int firstprinter(struct printer *pp, int *error) { int status; char *bp; init_printer(pp); status = cgetfirst(&bp, printcapdb); if (firstnextmap(&status) == 0) { if (error) *error = status; return 0; } if (error) *error = status; status = getprintcap_int(bp, pp); free(bp); if (error && status) *error = status; return 1; } int nextprinter(struct printer *pp, int *error) { int status; char *bp; free_printer(pp); status = cgetnext(&bp, printcapdb); if (firstnextmap(&status) == 0) { if (error) *error = status; return 0; } if (error) *error = status; status = getprintcap_int(bp, pp); free(bp); if (error && status) *error = status; return 1; } void lastprinter(void) { cgetclose(); } /* * This must match the order of declaration of enum filter in lp.h. */ static const char *filters[] = { "cf", "df", "gf", "if", "nf", "of", "rf", "tf", "vf" }; static const char *longfilters[] = { "filt.cifplot", "filt.dvi", "filt.plot", "filt.input", "filt.ditroff", "filt.output", "filt.fortran", "filt.troff", "filt.raster" }; /* * Internal routine for both getprintcap() and nextprinter(). * Actually parse the printcap entry using cget* functions. * Also attempt to figure out the canonical name of the printer * and store a malloced copy of it in pp->printer. */ static int getprintcap_int(char *bp, struct printer *pp) { enum lpd_filters filt; char *rp_name; int error; if ((pp->printer = capdb_canonical_name(bp)) == NULL) return PCAPERR_OSERR; #define CHK(x) do {if ((x) == PCAPERR_OSERR) return PCAPERR_OSERR;}while(0) CHK(capdb_getaltstr(bp, "af", "acct.file", 0, &pp->acct_file)); CHK(capdb_getaltnum(bp, "br", "tty.rate", 0, &pp->baud_rate)); CHK(capdb_getaltnum(bp, "ct", "remote.timeout", DEFTIMEOUT, &pp->conn_timeout)); CHK(capdb_getaltnum(bp, "du", "daemon.user", DEFUID, &pp->daemon_user)); CHK(capdb_getaltstr(bp, "ff", "job.formfeed", DEFFF, &pp->form_feed)); CHK(capdb_getaltstr(bp, "lf", "spool.log", _PATH_CONSOLE, &pp->log_file)); CHK(capdb_getaltstr(bp, "lo", "spool.lock", DEFLOCK, &pp->lock_file)); CHK(capdb_getaltstr(bp, "lp", "tty.device", _PATH_DEFDEVLP, &pp->lp)); CHK(capdb_getaltnum(bp, "mc", "max.copies", DEFMAXCOPIES, &pp->max_copies)); CHK(capdb_getaltstr(bp, "ms", "tty.mode", 0, &pp->mode_set)); CHK(capdb_getaltnum(bp, "mx", "max.blocks", DEFMX, &pp->max_blocks)); CHK(capdb_getaltnum(bp, "pc", "acct.price", 0, &pp->price100)); CHK(capdb_getaltnum(bp, "pl", "page.length", DEFLENGTH, &pp->page_length)); CHK(capdb_getaltnum(bp, "pw", "page.width", DEFWIDTH, &pp->page_width)); CHK(capdb_getaltnum(bp, "px", "page.pwidth", 0, &pp->page_pwidth)); CHK(capdb_getaltnum(bp, "py", "page.plength", 0, &pp->page_plength)); CHK(capdb_getaltstr(bp, "rg", "daemon.restrictgrp", 0, &pp->restrict_grp)); CHK(capdb_getaltstr(bp, "rm", "remote.host", 0, &pp->remote_host)); CHK(capdb_getaltstr(bp, "rp", "remote.queue", DEFLP, &pp->remote_queue)); CHK(capdb_getaltstr(bp, "sd", "spool.dir", _PATH_DEFSPOOL, &pp->spool_dir)); CHK(capdb_getaltstr(bp, "sr", "stat.recv", 0, &pp->stat_recv)); CHK(capdb_getaltstr(bp, "ss", "stat.send", 0, &pp->stat_send)); CHK(capdb_getaltstr(bp, "st", "spool.status", DEFSTAT, &pp->status_file)); CHK(capdb_getaltstr(bp, "tr", "job.trailer", 0, &pp->trailer)); pp->resend_copies = capdb_getaltlog(bp, "rc", "remote.resend_copies"); pp->restricted = capdb_getaltlog(bp, "rs", "daemon.restricted"); pp->short_banner = capdb_getaltlog(bp, "sb", "banner.short"); pp->no_copies = capdb_getaltlog(bp, "sc", "job.no_copies"); pp->no_formfeed = capdb_getaltlog(bp, "sf", "job.no_formfeed"); pp->no_header = capdb_getaltlog(bp, "sh", "banner.disable"); pp->header_last = capdb_getaltlog(bp, "hl", "banner.last"); pp->rw = capdb_getaltlog(bp, "rw", "tty.rw"); pp->tof = !capdb_getaltlog(bp, "fo", "job.topofform"); /* * Decide if the remote printer name matches the local printer name. * If no name is given then we assume they mean them to match. * If a name is given see if the rp_name is one of the names for * this printer. */ pp->rp_matches_local = 1; CHK((error = capdb_getaltstr(bp, "rp", "remote.queue", 0, &rp_name))); if (error != PCAPERR_NOTFOUND && rp_name != NULL) { if (cgetmatch(bp,rp_name) != 0) pp->rp_matches_local = 0; free(rp_name); } /* * Filters: */ for (filt = 0; filt < LPF_COUNT; filt++) { CHK(capdb_getaltstr(bp, filters[filt], longfilters[filt], 0, &pp->filters[filt])); } return 0; } /* * Decode the error codes returned by cgetent() using the names we * made up for them from "lp.h". * This would have been much better done with Common Error, >sigh<. * Perhaps this can be fixed in the next incarnation of cget*. */ const char * pcaperr(int error) { switch(error) { case PCAPERR_TCOPEN: return "unresolved tc= expansion"; case PCAPERR_SUCCESS: return "no error"; case PCAPERR_NOTFOUND: return "printer not found"; case PCAPERR_OSERR: return strerror(errno); case PCAPERR_TCLOOP: return "loop detected in tc= expansion"; default: return "unknown printcap error"; } } /* * Initialize a `struct printer' to contain values harmless to * the other routines in liblpr. */ void init_printer(struct printer *pp) { static struct printer zero; *pp = zero; } /* * Free the dynamically-allocated strings in a `struct printer'. * Idempotent. */ void free_printer(struct printer *pp) { enum lpd_filters filt; #define cfree(x) do { if (x) free(x); } while(0) cfree(pp->printer); cfree(pp->acct_file); for (filt = 0; filt < LPF_COUNT; filt++) cfree(pp->filters[filt]); cfree(pp->form_feed); cfree(pp->log_file); cfree(pp->lock_file); cfree(pp->lp); cfree(pp->restrict_grp); cfree(pp->remote_host); cfree(pp->remote_queue); cfree(pp->spool_dir); cfree(pp->stat_recv); cfree(pp->stat_send); cfree(pp->status_file); cfree(pp->trailer); cfree(pp->mode_set); init_printer(pp); } /* * The following routines are part of what would be a sensible library * interface to capability databases. Maybe someday this will become * the default. */ /* * It provides similar functionality to cgetstr(), * except that it provides for both a long and a short * capability name and allows for a default to be specified. */ static int capdb_getaltstr(char *bp, const char *shrt, const char *lng, const char *dflt, char **result) { int status; - status = cgetstr(bp, (char *)/*XXX*/lng, result); + status = cgetstr(bp, lng, result); if (status >= 0 || status == PCAPERR_OSERR) return status; - status = cgetstr(bp, (char *)/*XXX*/shrt, result); + status = cgetstr(bp, shrt, result); if (status >= 0 || status == PCAPERR_OSERR) return status; if (dflt) { *result = strdup(dflt); if (*result == NULL) return PCAPERR_OSERR; return strlen(*result); } return PCAPERR_NOTFOUND; } /* * The same, only for integers. */ static int capdb_getaltnum(char *bp, const char *shrt, const char *lng, long dflt, long *result) { int status; - status = cgetnum(bp, (char *)/*XXX*/lng, result); + status = cgetnum(bp, lng, result); if (status >= 0) return status; - status = cgetnum(bp, (char *)/*XXX*/shrt, result); + status = cgetnum(bp, shrt, result); if (status >= 0) return status; *result = dflt; return 0; } /* * Likewise for logical values. There's no need for a default parameter * because the default is always false. */ static int capdb_getaltlog(char *bp, const char *shrt, const char *lng) { - if (cgetcap(bp, (char *)/*XXX*/lng, ':')) + if (cgetcap(bp, lng, ':')) return 1; - if (cgetcap(bp, (char *)/*XXX*/shrt, ':')) + if (cgetcap(bp, shrt, ':')) return 1; return 0; } /* * Also should be a part of a better cget* library. * Given a capdb entry, attempt to figure out what its canonical name * is, and return a malloced copy of it. The canonical name is * considered to be the first one listed. */ static char * capdb_canonical_name(const char *bp) { char *retval; const char *nameend; nameend = strpbrk(bp, "|:"); if (nameend == NULL) nameend = bp + 1; if ((retval = malloc(nameend - bp + 1)) != NULL) { retval[0] = '\0'; strncat(retval, bp, nameend - bp); } return retval; } Index: projects/runtime-coverage-v2/usr.sbin/mountd/mountd.c =================================================================== --- projects/runtime-coverage-v2/usr.sbin/mountd/mountd.c (revision 347075) +++ projects/runtime-coverage-v2/usr.sbin/mountd/mountd.c (revision 347076) @@ -1,3286 +1,3295 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Herb Hasler and Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /*not lint*/ #if 0 #ifndef lint static char sccsid[] = "@(#)mountd.c 8.15 (Berkeley) 5/1/95"; #endif /*not lint*/ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pathnames.h" #include "mntopts.h" #ifdef DEBUG #include #endif /* * Structures for keeping the mount list and export list */ struct mountlist { char ml_host[MNTNAMLEN+1]; char ml_dirp[MNTPATHLEN+1]; SLIST_ENTRY(mountlist) next; }; struct dirlist { struct dirlist *dp_left; struct dirlist *dp_right; int dp_flag; struct hostlist *dp_hosts; /* List of hosts this dir exported to */ char *dp_dirp; }; /* dp_flag bits */ #define DP_DEFSET 0x1 #define DP_HOSTSET 0x2 struct exportlist { struct dirlist *ex_dirl; struct dirlist *ex_defdir; int ex_flag; fsid_t ex_fs; char *ex_fsdir; char *ex_indexfile; int ex_numsecflavors; int ex_secflavors[MAXSECFLAVORS]; int ex_defnumsecflavors; int ex_defsecflavors[MAXSECFLAVORS]; SLIST_ENTRY(exportlist) entries; }; /* ex_flag bits */ #define EX_LINKED 0x1 struct netmsk { struct sockaddr_storage nt_net; struct sockaddr_storage nt_mask; char *nt_name; }; union grouptypes { struct addrinfo *gt_addrinfo; struct netmsk gt_net; }; struct grouplist { int gr_type; union grouptypes gr_ptr; struct grouplist *gr_next; int gr_numsecflavors; int gr_secflavors[MAXSECFLAVORS]; }; /* Group types */ #define GT_NULL 0x0 #define GT_HOST 0x1 #define GT_NET 0x2 #define GT_DEFAULT 0x3 #define GT_IGNORE 0x5 struct hostlist { int ht_flag; /* Uses DP_xx bits */ struct grouplist *ht_grp; struct hostlist *ht_next; }; struct fhreturn { int fhr_flag; int fhr_vers; nfsfh_t fhr_fh; int fhr_numsecflavors; int *fhr_secflavors; }; #define GETPORT_MAXTRY 20 /* Max tries to get a port # */ /* Global defs */ static char *add_expdir(struct dirlist **, char *, int); static void add_dlist(struct dirlist **, struct dirlist *, struct grouplist *, int, struct exportlist *); static void add_mlist(char *, char *); static int check_dirpath(char *); static int check_options(struct dirlist *); static int checkmask(struct sockaddr *sa); static int chk_host(struct dirlist *, struct sockaddr *, int *, int *, int *, int **); static char *strsep_quote(char **stringp, const char *delim); static int create_service(struct netconfig *nconf); static void complete_service(struct netconfig *nconf, char *port_str); static void clearout_service(void); static void del_mlist(char *hostp, char *dirp); static struct dirlist *dirp_search(struct dirlist *, char *); static int do_mount(struct exportlist *, struct grouplist *, int, struct xucred *, char *, int, struct statfs *); static int do_opt(char **, char **, struct exportlist *, struct grouplist *, int *, int *, struct xucred *); static struct exportlist *ex_search(fsid_t *); static struct exportlist *get_exp(void); static void free_dir(struct dirlist *); static void free_exp(struct exportlist *); static void free_grp(struct grouplist *); static void free_host(struct hostlist *); static void get_exportlist(void); static int get_host(char *, struct grouplist *, struct grouplist *); static struct hostlist *get_ht(void); static int get_line(void); static void get_mountlist(void); static int get_net(char *, struct netmsk *, int); static void getexp_err(struct exportlist *, struct grouplist *, const char *); static struct grouplist *get_grp(void); static void hang_dirp(struct dirlist *, struct grouplist *, struct exportlist *, int); static void huphandler(int sig); static int makemask(struct sockaddr_storage *ssp, int bitlen); static void mntsrv(struct svc_req *, SVCXPRT *); static void nextfield(char **, char **); static void out_of_mem(void); static void parsecred(char *, struct xucred *); static int parsesec(char *, struct exportlist *); static int put_exlist(struct dirlist *, XDR *, struct dirlist *, int *, int); static void *sa_rawaddr(struct sockaddr *sa, int *nbytes); static int sacmp(struct sockaddr *sa1, struct sockaddr *sa2, struct sockaddr *samask); static int scan_tree(struct dirlist *, struct sockaddr *); static void usage(void); static int xdr_dir(XDR *, char *); static int xdr_explist(XDR *, caddr_t); static int xdr_explist_brief(XDR *, caddr_t); static int xdr_explist_common(XDR *, caddr_t, int); static int xdr_fhs(XDR *, caddr_t); static int xdr_mlist(XDR *, caddr_t); static void terminate(int); static SLIST_HEAD(, exportlist) exphead = SLIST_HEAD_INITIALIZER(exphead); static SLIST_HEAD(, mountlist) mlhead = SLIST_HEAD_INITIALIZER(mlhead); static struct grouplist *grphead; static char *exnames_default[2] = { _PATH_EXPORTS, NULL }; static char **exnames; static char **hosts = NULL; static struct xucred def_anon = { XUCRED_VERSION, (uid_t)65534, 1, { (gid_t)65533 }, NULL }; static int force_v2 = 0; static int resvport_only = 1; static int nhosts = 0; static int dir_only = 1; static int dolog = 0; static int got_sighup = 0; static int xcreated = 0; static char *svcport_str = NULL; static int mallocd_svcport = 0; static int *sock_fd; static int sock_fdcnt; static int sock_fdpos; static int suspend_nfsd = 0; static int opt_flags; static int have_v6 = 1; static int v4root_phase = 0; static char v4root_dirpath[PATH_MAX + 1]; static int has_publicfh = 0; static struct pidfh *pfh = NULL; /* Bits for opt_flags above */ #define OP_MAPROOT 0x01 #define OP_MAPALL 0x02 /* 0x4 free */ #define OP_MASK 0x08 #define OP_NET 0x10 #define OP_ALLDIRS 0x40 #define OP_HAVEMASK 0x80 /* A mask was specified or inferred. */ #define OP_QUIET 0x100 #define OP_MASKLEN 0x200 #define OP_SEC 0x400 #ifdef DEBUG static int debug = 1; static void SYSLOG(int, const char *, ...) __printflike(2, 3); #define syslog SYSLOG #else static int debug = 0; #endif /* * Similar to strsep(), but it allows for quoted strings * and escaped characters. * * It returns the string (or NULL, if *stringp is NULL), * which is a de-quoted version of the string if necessary. * * It modifies *stringp in place. */ static char * strsep_quote(char **stringp, const char *delim) { char *srcptr, *dstptr, *retval; char quot = 0; if (stringp == NULL || *stringp == NULL) return (NULL); srcptr = dstptr = retval = *stringp; while (*srcptr) { /* * We're looking for several edge cases here. * First: if we're in quote state (quot != 0), * then we ignore the delim characters, but otherwise * process as normal, unless it is the quote character. * Second: if the current character is a backslash, * we take the next character as-is, without checking * for delim, quote, or backslash. Exception: if the * next character is a NUL, that's the end of the string. * Third: if the character is a quote character, we toggle * quote state. * Otherwise: check the current character for NUL, or * being in delim, and end the string if either is true. */ if (*srcptr == '\\') { srcptr++; /* * The edge case here is if the next character * is NUL, we want to stop processing. But if * it's not NUL, then we simply want to copy it. */ if (*srcptr) { *dstptr++ = *srcptr++; } continue; } if (quot == 0 && (*srcptr == '\'' || *srcptr == '"')) { quot = *srcptr++; continue; } if (quot && *srcptr == quot) { /* End of the quoted part */ quot = 0; srcptr++; continue; } if (!quot && strchr(delim, *srcptr)) break; *dstptr++ = *srcptr++; } *dstptr = 0; /* Terminate the string */ *stringp = (*srcptr == '\0') ? NULL : srcptr + 1; return (retval); } /* * Mountd server for NFS mount protocol as described in: * NFS: Network File System Protocol Specification, RFC1094, Appendix A * The optional arguments are the exports file name * default: _PATH_EXPORTS * and "-n" to allow nonroot mount. */ int main(int argc, char **argv) { fd_set readfds; struct netconfig *nconf; char *endptr, **hosts_bak; void *nc_handle; pid_t otherpid; in_port_t svcport; int c, k, s; int maxrec = RPC_MAXDATASIZE; int attempt_cnt, port_len, port_pos, ret; char **port_list; /* Check that another mountd isn't already running. */ pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &otherpid); if (pfh == NULL) { if (errno == EEXIST) errx(1, "mountd already running, pid: %d.", otherpid); warn("cannot open or create pidfile"); } s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); if (s < 0) have_v6 = 0; else close(s); while ((c = getopt(argc, argv, "2deh:lnp:rS")) != -1) switch (c) { case '2': force_v2 = 1; break; case 'e': /* now a no-op, since this is the default */ break; case 'n': resvport_only = 0; break; case 'r': dir_only = 0; break; case 'd': debug = debug ? 0 : 1; break; case 'l': dolog = 1; break; case 'p': endptr = NULL; svcport = (in_port_t)strtoul(optarg, &endptr, 10); if (endptr == NULL || *endptr != '\0' || svcport == 0 || svcport >= IPPORT_MAX) usage(); svcport_str = strdup(optarg); break; case 'h': ++nhosts; hosts_bak = hosts; hosts_bak = realloc(hosts, nhosts * sizeof(char *)); if (hosts_bak == NULL) { if (hosts != NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } } hosts = hosts_bak; hosts[nhosts - 1] = strdup(optarg); if (hosts[nhosts - 1] == NULL) { for (k = 0; k < (nhosts - 1); k++) free(hosts[k]); free(hosts); out_of_mem(); } break; case 'S': suspend_nfsd = 1; break; default: usage(); } if (modfind("nfsd") < 0) { /* Not present in kernel, try loading it */ if (kldload("nfsd") < 0 || modfind("nfsd") < 0) errx(1, "NFS server is not available"); } argc -= optind; argv += optind; grphead = (struct grouplist *)NULL; if (argc > 0) exnames = argv; else exnames = exnames_default; openlog("mountd", LOG_PID, LOG_DAEMON); if (debug) warnx("getting export list"); get_exportlist(); if (debug) warnx("getting mount list"); get_mountlist(); if (debug) warnx("here we go"); if (debug == 0) { daemon(0, 0); signal(SIGINT, SIG_IGN); signal(SIGQUIT, SIG_IGN); } signal(SIGHUP, huphandler); signal(SIGTERM, terminate); signal(SIGPIPE, SIG_IGN); pidfile_write(pfh); rpcb_unset(MOUNTPROG, MOUNTVERS, NULL); rpcb_unset(MOUNTPROG, MOUNTVERS3, NULL); rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec); if (!resvport_only) { if (sysctlbyname("vfs.nfsd.nfs_privport", NULL, NULL, &resvport_only, sizeof(resvport_only)) != 0 && errno != ENOENT) { syslog(LOG_ERR, "sysctl: %m"); exit(1); } } /* * If no hosts were specified, add a wildcard entry to bind to * INADDR_ANY. Otherwise make sure 127.0.0.1 and ::1 are added to the * list. */ if (nhosts == 0) { hosts = malloc(sizeof(char *)); if (hosts == NULL) out_of_mem(); hosts[0] = "*"; nhosts = 1; } else { hosts_bak = hosts; if (have_v6) { hosts_bak = realloc(hosts, (nhosts + 2) * sizeof(char *)); if (hosts_bak == NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } else hosts = hosts_bak; nhosts += 2; hosts[nhosts - 2] = "::1"; } else { hosts_bak = realloc(hosts, (nhosts + 1) * sizeof(char *)); if (hosts_bak == NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } else { nhosts += 1; hosts = hosts_bak; } } hosts[nhosts - 1] = "127.0.0.1"; } attempt_cnt = 1; sock_fdcnt = 0; sock_fd = NULL; port_list = NULL; port_len = 0; nc_handle = setnetconfig(); while ((nconf = getnetconfig(nc_handle))) { if (nconf->nc_flag & NC_VISIBLE) { if (have_v6 == 0 && strcmp(nconf->nc_protofmly, "inet6") == 0) { /* DO NOTHING */ } else { ret = create_service(nconf); if (ret == 1) /* Ignore this call */ continue; if (ret < 0) { /* * Failed to bind port, so close off * all sockets created and try again * if the port# was dynamically * assigned via bind(2). */ clearout_service(); if (mallocd_svcport != 0 && attempt_cnt < GETPORT_MAXTRY) { free(svcport_str); svcport_str = NULL; mallocd_svcport = 0; } else { errno = EADDRINUSE; syslog(LOG_ERR, "bindresvport_sa: %m"); exit(1); } /* Start over at the first service. */ free(sock_fd); sock_fdcnt = 0; sock_fd = NULL; nc_handle = setnetconfig(); attempt_cnt++; } else if (mallocd_svcport != 0 && attempt_cnt == GETPORT_MAXTRY) { /* * For the last attempt, allow * different port #s for each nconf * by saving the svcport_str and * setting it back to NULL. */ port_list = realloc(port_list, (port_len + 1) * sizeof(char *)); if (port_list == NULL) out_of_mem(); port_list[port_len++] = svcport_str; svcport_str = NULL; mallocd_svcport = 0; } } } } /* * Successfully bound the ports, so call complete_service() to * do the rest of the setup on the service(s). */ sock_fdpos = 0; port_pos = 0; nc_handle = setnetconfig(); while ((nconf = getnetconfig(nc_handle))) { if (nconf->nc_flag & NC_VISIBLE) { if (have_v6 == 0 && strcmp(nconf->nc_protofmly, "inet6") == 0) { /* DO NOTHING */ } else if (port_list != NULL) { if (port_pos >= port_len) { syslog(LOG_ERR, "too many port#s"); exit(1); } complete_service(nconf, port_list[port_pos++]); } else complete_service(nconf, svcport_str); } } endnetconfig(nc_handle); free(sock_fd); if (port_list != NULL) { for (port_pos = 0; port_pos < port_len; port_pos++) free(port_list[port_pos]); free(port_list); } if (xcreated == 0) { syslog(LOG_ERR, "could not create any services"); exit(1); } /* Expand svc_run() here so that we can call get_exportlist(). */ for (;;) { if (got_sighup) { get_exportlist(); got_sighup = 0; } readfds = svc_fdset; switch (select(svc_maxfd + 1, &readfds, NULL, NULL, NULL)) { case -1: if (errno == EINTR) continue; syslog(LOG_ERR, "mountd died: select: %m"); exit(1); case 0: continue; default: svc_getreqset(&readfds); } } } /* * This routine creates and binds sockets on the appropriate * addresses. It gets called one time for each transport. * It returns 0 upon success, 1 for ingore the call and -1 to indicate * bind failed with EADDRINUSE. * Any file descriptors that have been created are stored in sock_fd and * the total count of them is maintained in sock_fdcnt. */ static int create_service(struct netconfig *nconf) { struct addrinfo hints, *res = NULL; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct __rpc_sockinfo si; int aicode; int fd; int nhostsbak; int one = 1; int r; u_int32_t host_addr[4]; /* IPv4 or IPv6 */ int mallocd_res; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) return (1); /* not my type */ /* * XXX - using RPC library internal functions. */ if (!__rpc_nconf2sockinfo(nconf, &si)) { syslog(LOG_ERR, "cannot get information for %s", nconf->nc_netid); return (1); } /* Get mountd's address on this transport */ memset(&hints, 0, sizeof hints); hints.ai_family = si.si_af; hints.ai_socktype = si.si_socktype; hints.ai_protocol = si.si_proto; /* * Bind to specific IPs if asked to */ nhostsbak = nhosts; while (nhostsbak > 0) { --nhostsbak; sock_fd = realloc(sock_fd, (sock_fdcnt + 1) * sizeof(int)); if (sock_fd == NULL) out_of_mem(); sock_fd[sock_fdcnt++] = -1; /* Set invalid for now. */ mallocd_res = 0; hints.ai_flags = AI_PASSIVE; /* * XXX - using RPC library internal functions. */ if ((fd = __rpc_nconf2fd(nconf)) < 0) { int non_fatal = 0; if (errno == EAFNOSUPPORT && nconf->nc_semantics != NC_TPI_CLTS) non_fatal = 1; syslog(non_fatal ? LOG_DEBUG : LOG_ERR, "cannot create socket for %s", nconf->nc_netid); if (non_fatal != 0) continue; exit(1); } switch (hints.ai_family) { case AF_INET: if (inet_pton(AF_INET, hosts[nhostsbak], host_addr) == 1) { hints.ai_flags |= AI_NUMERICHOST; } else { /* * Skip if we have an AF_INET6 address. */ if (inet_pton(AF_INET6, hosts[nhostsbak], host_addr) == 1) { close(fd); continue; } } break; case AF_INET6: if (inet_pton(AF_INET6, hosts[nhostsbak], host_addr) == 1) { hints.ai_flags |= AI_NUMERICHOST; } else { /* * Skip if we have an AF_INET address. */ if (inet_pton(AF_INET, hosts[nhostsbak], host_addr) == 1) { close(fd); continue; } } /* * We're doing host-based access checks here, so don't * allow v4-in-v6 to confuse things. The kernel will * disable it by default on NFS sockets too. */ if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof one) < 0) { syslog(LOG_ERR, "can't disable v4-in-v6 on IPv6 socket"); exit(1); } break; default: break; } /* * If no hosts were specified, just bind to INADDR_ANY */ if (strcmp("*", hosts[nhostsbak]) == 0) { if (svcport_str == NULL) { res = malloc(sizeof(struct addrinfo)); if (res == NULL) out_of_mem(); mallocd_res = 1; res->ai_flags = hints.ai_flags; res->ai_family = hints.ai_family; res->ai_protocol = hints.ai_protocol; switch (res->ai_family) { case AF_INET: sin = malloc(sizeof(struct sockaddr_in)); if (sin == NULL) out_of_mem(); sin->sin_family = AF_INET; sin->sin_port = htons(0); sin->sin_addr.s_addr = htonl(INADDR_ANY); res->ai_addr = (struct sockaddr*) sin; res->ai_addrlen = (socklen_t) sizeof(struct sockaddr_in); break; case AF_INET6: sin6 = malloc(sizeof(struct sockaddr_in6)); if (sin6 == NULL) out_of_mem(); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(0); sin6->sin6_addr = in6addr_any; res->ai_addr = (struct sockaddr*) sin6; res->ai_addrlen = (socklen_t) sizeof(struct sockaddr_in6); break; default: syslog(LOG_ERR, "bad addr fam %d", res->ai_family); exit(1); } } else { if ((aicode = getaddrinfo(NULL, svcport_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address for %s: %s", nconf->nc_netid, gai_strerror(aicode)); close(fd); continue; } } } else { if ((aicode = getaddrinfo(hosts[nhostsbak], svcport_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address for %s: %s", nconf->nc_netid, gai_strerror(aicode)); close(fd); continue; } } /* Store the fd. */ sock_fd[sock_fdcnt - 1] = fd; /* Now, attempt the bind. */ r = bindresvport_sa(fd, res->ai_addr); if (r != 0) { if (errno == EADDRINUSE && mallocd_svcport != 0) { if (mallocd_res != 0) { free(res->ai_addr); free(res); } else freeaddrinfo(res); return (-1); } syslog(LOG_ERR, "bindresvport_sa: %m"); exit(1); } if (svcport_str == NULL) { svcport_str = malloc(NI_MAXSERV * sizeof(char)); if (svcport_str == NULL) out_of_mem(); mallocd_svcport = 1; if (getnameinfo(res->ai_addr, res->ai_addr->sa_len, NULL, NI_MAXHOST, svcport_str, NI_MAXSERV * sizeof(char), NI_NUMERICHOST | NI_NUMERICSERV)) errx(1, "Cannot get port number"); } if (mallocd_res != 0) { free(res->ai_addr); free(res); } else freeaddrinfo(res); res = NULL; } return (0); } /* * Called after all the create_service() calls have succeeded, to complete * the setup and registration. */ static void complete_service(struct netconfig *nconf, char *port_str) { struct addrinfo hints, *res = NULL; struct __rpc_sockinfo si; struct netbuf servaddr; SVCXPRT *transp = NULL; int aicode, fd, nhostsbak; int registered = 0; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) return; /* not my type */ /* * XXX - using RPC library internal functions. */ if (!__rpc_nconf2sockinfo(nconf, &si)) { syslog(LOG_ERR, "cannot get information for %s", nconf->nc_netid); return; } nhostsbak = nhosts; while (nhostsbak > 0) { --nhostsbak; if (sock_fdpos >= sock_fdcnt) { /* Should never happen. */ syslog(LOG_ERR, "Ran out of socket fd's"); return; } fd = sock_fd[sock_fdpos++]; if (fd < 0) continue; /* * Using -1 tells listen(2) to use * kern.ipc.soacceptqueue for the backlog. */ if (nconf->nc_semantics != NC_TPI_CLTS) listen(fd, -1); if (nconf->nc_semantics == NC_TPI_CLTS ) transp = svc_dg_create(fd, 0, 0); else transp = svc_vc_create(fd, RPC_MAXDATASIZE, RPC_MAXDATASIZE); if (transp != (SVCXPRT *) NULL) { if (!svc_reg(transp, MOUNTPROG, MOUNTVERS, mntsrv, NULL)) syslog(LOG_ERR, "can't register %s MOUNTVERS service", nconf->nc_netid); if (!force_v2) { if (!svc_reg(transp, MOUNTPROG, MOUNTVERS3, mntsrv, NULL)) syslog(LOG_ERR, "can't register %s MOUNTVERS3 service", nconf->nc_netid); } } else syslog(LOG_WARNING, "can't create %s services", nconf->nc_netid); if (registered == 0) { registered = 1; memset(&hints, 0, sizeof hints); hints.ai_flags = AI_PASSIVE; hints.ai_family = si.si_af; hints.ai_socktype = si.si_socktype; hints.ai_protocol = si.si_proto; if ((aicode = getaddrinfo(NULL, port_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address: %s", gai_strerror(aicode)); exit(1); } servaddr.buf = malloc(res->ai_addrlen); memcpy(servaddr.buf, res->ai_addr, res->ai_addrlen); servaddr.len = res->ai_addrlen; rpcb_set(MOUNTPROG, MOUNTVERS, nconf, &servaddr); rpcb_set(MOUNTPROG, MOUNTVERS3, nconf, &servaddr); xcreated++; freeaddrinfo(res); } } /* end while */ } /* * Clear out sockets after a failure to bind one of them, so that the * cycle of socket creation/binding can start anew. */ static void clearout_service(void) { int i; for (i = 0; i < sock_fdcnt; i++) { if (sock_fd[i] >= 0) { shutdown(sock_fd[i], SHUT_RDWR); close(sock_fd[i]); } } } static void usage(void) { fprintf(stderr, "usage: mountd [-2] [-d] [-e] [-l] [-n] [-p ] [-r] " "[-S] [-h ] [export_file ...]\n"); exit(1); } /* * The mount rpc service */ void mntsrv(struct svc_req *rqstp, SVCXPRT *transp) { struct exportlist *ep; struct dirlist *dp; struct fhreturn fhr; struct stat stb; struct statfs fsb; char host[NI_MAXHOST], numerichost[NI_MAXHOST]; int lookup_failed = 1; struct sockaddr *saddr; u_short sport; char rpcpath[MNTPATHLEN + 1], dirpath[MAXPATHLEN]; int bad = 0, defset, hostset; sigset_t sighup_mask; int numsecflavors, *secflavorsp; sigemptyset(&sighup_mask); sigaddset(&sighup_mask, SIGHUP); saddr = svc_getrpccaller(transp)->buf; switch (saddr->sa_family) { case AF_INET6: sport = ntohs(((struct sockaddr_in6 *)saddr)->sin6_port); break; case AF_INET: sport = ntohs(((struct sockaddr_in *)saddr)->sin_port); break; default: syslog(LOG_ERR, "request from unknown address family"); return; } switch (rqstp->rq_proc) { case MOUNTPROC_MNT: case MOUNTPROC_UMNT: case MOUNTPROC_UMNTALL: lookup_failed = getnameinfo(saddr, saddr->sa_len, host, sizeof host, NULL, 0, 0); } getnameinfo(saddr, saddr->sa_len, numerichost, sizeof numerichost, NULL, 0, NI_NUMERICHOST); switch (rqstp->rq_proc) { case NULLPROC: if (!svc_sendreply(transp, (xdrproc_t)xdr_void, NULL)) syslog(LOG_ERR, "can't send reply"); return; case MOUNTPROC_MNT: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "mount request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) { syslog(LOG_NOTICE, "undecodable mount request from %s", numerichost); svcerr_decode(transp); return; } /* * Get the real pathname and make sure it is a directory * or a regular file if the -r option was specified * and it exists. */ if (realpath(rpcpath, dirpath) == NULL || stat(dirpath, &stb) < 0 || statfs(dirpath, &fsb) < 0) { chdir("/"); /* Just in case realpath doesn't */ syslog(LOG_NOTICE, "mount request from %s for non existent path %s", numerichost, dirpath); if (debug) warnx("stat failed on %s", dirpath); bad = ENOENT; /* We will send error reply later */ } if (!bad && !S_ISDIR(stb.st_mode) && (dir_only || !S_ISREG(stb.st_mode))) { syslog(LOG_NOTICE, "mount request from %s for non-directory path %s", numerichost, dirpath); if (debug) warnx("mounting non-directory %s", dirpath); bad = ENOTDIR; /* We will send error reply later */ } /* Check in the exports list */ sigprocmask(SIG_BLOCK, &sighup_mask, NULL); if (bad) ep = NULL; else ep = ex_search(&fsb.f_fsid); hostset = defset = 0; if (ep && (chk_host(ep->ex_defdir, saddr, &defset, &hostset, &numsecflavors, &secflavorsp) || ((dp = dirp_search(ep->ex_dirl, dirpath)) && chk_host(dp, saddr, &defset, &hostset, &numsecflavors, &secflavorsp)) || (defset && scan_tree(ep->ex_defdir, saddr) == 0 && scan_tree(ep->ex_dirl, saddr) == 0))) { if (bad) { if (!svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; } if (hostset & DP_HOSTSET) { fhr.fhr_flag = hostset; fhr.fhr_numsecflavors = numsecflavors; fhr.fhr_secflavors = secflavorsp; } else { fhr.fhr_flag = defset; fhr.fhr_numsecflavors = ep->ex_defnumsecflavors; fhr.fhr_secflavors = ep->ex_defsecflavors; } fhr.fhr_vers = rqstp->rq_vers; /* Get the file handle */ memset(&fhr.fhr_fh, 0, sizeof(nfsfh_t)); if (getfh(dirpath, (fhandle_t *)&fhr.fhr_fh) < 0) { bad = errno; syslog(LOG_ERR, "can't get fh for %s", dirpath); if (!svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; } if (!svc_sendreply(transp, (xdrproc_t)xdr_fhs, (caddr_t)&fhr)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) add_mlist(host, dirpath); else add_mlist(numerichost, dirpath); if (debug) warnx("mount successful"); if (dolog) syslog(LOG_NOTICE, "mount request succeeded from %s for %s", numerichost, dirpath); } else { if (!bad) bad = EACCES; syslog(LOG_NOTICE, "mount request denied from %s for %s", numerichost, dirpath); } if (bad && !svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; case MOUNTPROC_DUMP: if (!svc_sendreply(transp, (xdrproc_t)xdr_mlist, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); else if (dolog) syslog(LOG_NOTICE, "dump request succeeded from %s", numerichost); return; case MOUNTPROC_UMNT: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "umount request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) { syslog(LOG_NOTICE, "undecodable umount request from %s", numerichost); svcerr_decode(transp); return; } if (realpath(rpcpath, dirpath) == NULL) { syslog(LOG_NOTICE, "umount request from %s " "for non existent path %s", numerichost, dirpath); } if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) del_mlist(host, dirpath); del_mlist(numerichost, dirpath); if (dolog) syslog(LOG_NOTICE, "umount request succeeded from %s for %s", numerichost, dirpath); return; case MOUNTPROC_UMNTALL: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "umountall request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) del_mlist(host, NULL); del_mlist(numerichost, NULL); if (dolog) syslog(LOG_NOTICE, "umountall request succeeded from %s", numerichost); return; case MOUNTPROC_EXPORT: if (!svc_sendreply(transp, (xdrproc_t)xdr_explist, (caddr_t)NULL)) if (!svc_sendreply(transp, (xdrproc_t)xdr_explist_brief, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (dolog) syslog(LOG_NOTICE, "export request succeeded from %s", numerichost); return; default: svcerr_noproc(transp); return; } } /* * Xdr conversion for a dirpath string */ static int xdr_dir(XDR *xdrsp, char *dirp) { return (xdr_string(xdrsp, &dirp, MNTPATHLEN)); } /* * Xdr routine to generate file handle reply */ static int xdr_fhs(XDR *xdrsp, caddr_t cp) { struct fhreturn *fhrp = (struct fhreturn *)cp; u_long ok = 0, len, auth; int i; if (!xdr_long(xdrsp, &ok)) return (0); switch (fhrp->fhr_vers) { case 1: return (xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, NFSX_V2FH)); case 3: len = NFSX_V3FH; if (!xdr_long(xdrsp, &len)) return (0); if (!xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, len)) return (0); if (fhrp->fhr_numsecflavors) { if (!xdr_int(xdrsp, &fhrp->fhr_numsecflavors)) return (0); for (i = 0; i < fhrp->fhr_numsecflavors; i++) if (!xdr_int(xdrsp, &fhrp->fhr_secflavors[i])) return (0); return (1); } else { auth = AUTH_SYS; len = 1; if (!xdr_long(xdrsp, &len)) return (0); return (xdr_long(xdrsp, &auth)); } } return (0); } static int xdr_mlist(XDR *xdrsp, caddr_t cp __unused) { struct mountlist *mlp; int true = 1; int false = 0; char *strp; SLIST_FOREACH(mlp, &mlhead, next) { if (!xdr_bool(xdrsp, &true)) return (0); strp = &mlp->ml_host[0]; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (0); strp = &mlp->ml_dirp[0]; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (0); } if (!xdr_bool(xdrsp, &false)) return (0); return (1); } /* * Xdr conversion for export list */ static int xdr_explist_common(XDR *xdrsp, caddr_t cp __unused, int brief) { struct exportlist *ep; int false = 0; int putdef; sigset_t sighup_mask; sigemptyset(&sighup_mask); sigaddset(&sighup_mask, SIGHUP); sigprocmask(SIG_BLOCK, &sighup_mask, NULL); SLIST_FOREACH(ep, &exphead, entries) { putdef = 0; if (put_exlist(ep->ex_dirl, xdrsp, ep->ex_defdir, &putdef, brief)) goto errout; if (ep->ex_defdir && putdef == 0 && put_exlist(ep->ex_defdir, xdrsp, (struct dirlist *)NULL, &putdef, brief)) goto errout; } sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); if (!xdr_bool(xdrsp, &false)) return (0); return (1); errout: sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return (0); } /* * Called from xdr_explist() to traverse the tree and export the * directory paths. */ static int put_exlist(struct dirlist *dp, XDR *xdrsp, struct dirlist *adp, int *putdefp, int brief) { struct grouplist *grp; struct hostlist *hp; int true = 1; int false = 0; int gotalldir = 0; char *strp; if (dp) { if (put_exlist(dp->dp_left, xdrsp, adp, putdefp, brief)) return (1); if (!xdr_bool(xdrsp, &true)) return (1); strp = dp->dp_dirp; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (1); if (adp && !strcmp(dp->dp_dirp, adp->dp_dirp)) { gotalldir = 1; *putdefp = 1; } if (brief) { if (!xdr_bool(xdrsp, &true)) return (1); strp = "(...)"; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (1); } else if ((dp->dp_flag & DP_DEFSET) == 0 && (gotalldir == 0 || (adp->dp_flag & DP_DEFSET) == 0)) { hp = dp->dp_hosts; while (hp) { grp = hp->ht_grp; if (grp->gr_type == GT_HOST) { if (!xdr_bool(xdrsp, &true)) return (1); strp = grp->gr_ptr.gt_addrinfo->ai_canonname; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (1); } else if (grp->gr_type == GT_NET) { if (!xdr_bool(xdrsp, &true)) return (1); strp = grp->gr_ptr.gt_net.nt_name; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (1); } hp = hp->ht_next; if (gotalldir && hp == (struct hostlist *)NULL) { hp = adp->dp_hosts; gotalldir = 0; } } } if (!xdr_bool(xdrsp, &false)) return (1); if (put_exlist(dp->dp_right, xdrsp, adp, putdefp, brief)) return (1); } return (0); } static int xdr_explist(XDR *xdrsp, caddr_t cp) { return xdr_explist_common(xdrsp, cp, 0); } static int xdr_explist_brief(XDR *xdrsp, caddr_t cp) { return xdr_explist_common(xdrsp, cp, 1); } static char *line; static size_t linesize; static FILE *exp_file; /* * Get the export list from one, currently open file */ static void get_exportlist_one(void) { struct exportlist *ep; struct grouplist *grp, *tgrp; struct dirlist *dirhead; struct statfs fsb; struct xucred anon; char *cp, *endcp, *dirp, *hst, *usr, *dom, savedc; int len, has_host, exflags, got_nondir, dirplen, netgrp; v4root_phase = 0; dirhead = (struct dirlist *)NULL; while (get_line()) { if (debug) warnx("got line %s", line); cp = line; nextfield(&cp, &endcp); if (*cp == '#') goto nextline; /* * Set defaults. */ has_host = FALSE; anon = def_anon; exflags = MNT_EXPORTED; got_nondir = 0; opt_flags = 0; ep = (struct exportlist *)NULL; dirp = NULL; /* * Handle the V4 root dir. */ if (*cp == 'V' && *(cp + 1) == '4' && *(cp + 2) == ':') { /* * V4: just indicates that it is the v4 root point, * so skip over that and set v4root_phase. */ if (v4root_phase > 0) { syslog(LOG_ERR, "V4:duplicate line, ignored"); goto nextline; } v4root_phase = 1; cp += 3; nextfield(&cp, &endcp); } /* * Create new exports list entry */ len = endcp-cp; tgrp = grp = get_grp(); while (len > 0) { if (len > MNTNAMLEN) { getexp_err(ep, tgrp, "mountpoint too long"); goto nextline; } if (*cp == '-') { if (ep == (struct exportlist *)NULL) { getexp_err(ep, tgrp, "flag before export path definition"); goto nextline; } if (debug) warnx("doing opt %s", cp); got_nondir = 1; if (do_opt(&cp, &endcp, ep, grp, &has_host, &exflags, &anon)) { getexp_err(ep, tgrp, NULL); goto nextline; } } else if (*cp == '/') { savedc = *endcp; *endcp = '\0'; if (v4root_phase > 1) { if (dirp != NULL) { getexp_err(ep, tgrp, "Multiple V4 dirs"); goto nextline; } } if (check_dirpath(cp) && statfs(cp, &fsb) >= 0) { if ((fsb.f_flags & MNT_AUTOMOUNTED) != 0) syslog(LOG_ERR, "Warning: exporting of " "automounted fs %s not supported", cp); if (got_nondir) { getexp_err(ep, tgrp, "dirs must be first"); goto nextline; } if (v4root_phase == 1) { if (dirp != NULL) { getexp_err(ep, tgrp, "Multiple V4 dirs"); goto nextline; } if (strlen(v4root_dirpath) == 0) { strlcpy(v4root_dirpath, cp, sizeof (v4root_dirpath)); } else if (strcmp(v4root_dirpath, cp) != 0) { syslog(LOG_ERR, "different V4 dirpath %s", cp); getexp_err(ep, tgrp, NULL); goto nextline; } dirp = cp; v4root_phase = 2; got_nondir = 1; ep = get_exp(); } else { if (ep) { if (ep->ex_fs.val[0] != fsb.f_fsid.val[0] || ep->ex_fs.val[1] != fsb.f_fsid.val[1]) { getexp_err(ep, tgrp, "fsid mismatch"); goto nextline; } } else { /* * See if this directory is already * in the list. */ ep = ex_search(&fsb.f_fsid); if (ep == (struct exportlist *)NULL) { ep = get_exp(); ep->ex_fs = fsb.f_fsid; ep->ex_fsdir = strdup(fsb.f_mntonname); if (ep->ex_fsdir == NULL) out_of_mem(); if (debug) warnx( "making new ep fs=0x%x,0x%x", fsb.f_fsid.val[0], fsb.f_fsid.val[1]); } else if (debug) warnx("found ep fs=0x%x,0x%x", fsb.f_fsid.val[0], fsb.f_fsid.val[1]); } /* * Add dirpath to export mount point. */ dirp = add_expdir(&dirhead, cp, len); dirplen = len; } } else { getexp_err(ep, tgrp, "symbolic link in export path or statfs failed"); goto nextline; } *endcp = savedc; } else { savedc = *endcp; *endcp = '\0'; got_nondir = 1; if (ep == (struct exportlist *)NULL) { getexp_err(ep, tgrp, "host(s) before export path definition"); goto nextline; } /* * Get the host or netgroup. */ setnetgrent(cp); netgrp = getnetgrent(&hst, &usr, &dom); do { if (has_host) { grp->gr_next = get_grp(); grp = grp->gr_next; } if (netgrp) { if (hst == 0) { syslog(LOG_ERR, "null hostname in netgroup %s, skipping", cp); grp->gr_type = GT_IGNORE; } else if (get_host(hst, grp, tgrp)) { syslog(LOG_ERR, "bad host %s in netgroup %s, skipping", hst, cp); grp->gr_type = GT_IGNORE; } } else if (get_host(cp, grp, tgrp)) { syslog(LOG_ERR, "bad host %s, skipping", cp); grp->gr_type = GT_IGNORE; } has_host = TRUE; } while (netgrp && getnetgrent(&hst, &usr, &dom)); endnetgrent(); *endcp = savedc; } cp = endcp; nextfield(&cp, &endcp); len = endcp - cp; } if (check_options(dirhead)) { getexp_err(ep, tgrp, NULL); goto nextline; } if (!has_host) { grp->gr_type = GT_DEFAULT; if (debug) warnx("adding a default entry"); /* * Don't allow a network export coincide with a list of * host(s) on the same line. */ } else if ((opt_flags & OP_NET) && tgrp->gr_next) { getexp_err(ep, tgrp, "network/host conflict"); goto nextline; /* * If an export list was specified on this line, make sure * that we have at least one valid entry, otherwise skip it. */ } else { grp = tgrp; while (grp && grp->gr_type == GT_IGNORE) grp = grp->gr_next; if (! grp) { getexp_err(ep, tgrp, "no valid entries"); goto nextline; } } if (v4root_phase == 1) { getexp_err(ep, tgrp, "V4:root, no dirp, ignored"); goto nextline; } /* * Loop through hosts, pushing the exports into the kernel. * After loop, tgrp points to the start of the list and * grp points to the last entry in the list. */ grp = tgrp; do { if (do_mount(ep, grp, exflags, &anon, dirp, dirplen, &fsb)) { getexp_err(ep, tgrp, NULL); goto nextline; } } while (grp->gr_next && (grp = grp->gr_next)); /* * For V4: don't enter in mount lists. */ if (v4root_phase > 0 && v4root_phase <= 2) { /* * Since these structures aren't used by mountd, * free them up now. */ if (ep != NULL) free_exp(ep); while (tgrp != NULL) { grp = tgrp; tgrp = tgrp->gr_next; free_grp(grp); } goto nextline; } /* * Success. Update the data structures. */ if (has_host) { hang_dirp(dirhead, tgrp, ep, opt_flags); grp->gr_next = grphead; grphead = tgrp; } else { hang_dirp(dirhead, (struct grouplist *)NULL, ep, opt_flags); free_grp(grp); } dirhead = (struct dirlist *)NULL; if ((ep->ex_flag & EX_LINKED) == 0) { SLIST_INSERT_HEAD(&exphead, ep, entries); ep->ex_flag |= EX_LINKED; } nextline: v4root_phase = 0; if (dirhead) { free_dir(dirhead); dirhead = (struct dirlist *)NULL; } } } /* * Get the export list from all specified files */ static void get_exportlist(void) { struct exportlist *ep, *ep2; struct grouplist *grp, *tgrp; struct export_args export; struct iovec *iov; struct statfs *fsp, *mntbufp; struct xvfsconf vfc; char errmsg[255]; int num, i; int iovlen; int done; struct nfsex_args eargs; if (suspend_nfsd != 0) (void)nfssvc(NFSSVC_SUSPENDNFSD, NULL); v4root_dirpath[0] = '\0'; bzero(&export, sizeof(export)); export.ex_flags = MNT_DELEXPORT; iov = NULL; iovlen = 0; bzero(errmsg, sizeof(errmsg)); /* * First, get rid of the old list */ SLIST_FOREACH_SAFE(ep, &exphead, entries, ep2) { SLIST_REMOVE(&exphead, ep, exportlist, entries); free_exp(ep); } grp = grphead; while (grp) { tgrp = grp; grp = grp->gr_next; free_grp(tgrp); } grphead = (struct grouplist *)NULL; /* * and the old V4 root dir. */ bzero(&eargs, sizeof (eargs)); eargs.export.ex_flags = MNT_DELEXPORT; if (nfssvc(NFSSVC_V4ROOTEXPORT, (caddr_t)&eargs) < 0 && errno != ENOENT) syslog(LOG_ERR, "Can't delete exports for V4:"); /* * and clear flag that notes if a public fh has been exported. */ has_publicfh = 0; /* * And delete exports that are in the kernel for all local * filesystems. * XXX: Should know how to handle all local exportable filesystems. */ num = getmntinfo(&mntbufp, MNT_NOWAIT); if (num > 0) { build_iovec(&iov, &iovlen, "fstype", NULL, 0); build_iovec(&iov, &iovlen, "fspath", NULL, 0); build_iovec(&iov, &iovlen, "from", NULL, 0); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "export", &export, sizeof(export)); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); } for (i = 0; i < num; i++) { fsp = &mntbufp[i]; if (getvfsbyname(fsp->f_fstypename, &vfc) != 0) { syslog(LOG_ERR, "getvfsbyname() failed for %s", fsp->f_fstypename); continue; } /* * We do not need to delete "export" flag from * filesystems that do not have it set. */ if (!(fsp->f_flags & MNT_EXPORTED)) continue; /* * Do not delete export for network filesystem by * passing "export" arg to nmount(). * It only makes sense to do this for local filesystems. */ if (vfc.vfc_flags & VFCF_NETWORK) continue; iov[1].iov_base = fsp->f_fstypename; iov[1].iov_len = strlen(fsp->f_fstypename) + 1; iov[3].iov_base = fsp->f_mntonname; iov[3].iov_len = strlen(fsp->f_mntonname) + 1; iov[5].iov_base = fsp->f_mntfromname; iov[5].iov_len = strlen(fsp->f_mntfromname) + 1; errmsg[0] = '\0'; /* * EXDEV is returned when path exists but is not a * mount point. May happens if raced with unmount. */ if (nmount(iov, iovlen, fsp->f_flags) < 0 && errno != ENOENT && errno != ENOTSUP && errno != EXDEV) { syslog(LOG_ERR, "can't delete exports for %s: %m %s", fsp->f_mntonname, errmsg); } } if (iov != NULL) { /* Free strings allocated by strdup() in getmntopts.c */ free(iov[0].iov_base); /* fstype */ free(iov[2].iov_base); /* fspath */ free(iov[4].iov_base); /* from */ free(iov[6].iov_base); /* update */ free(iov[8].iov_base); /* export */ free(iov[10].iov_base); /* errmsg */ /* free iov, allocated by realloc() */ free(iov); iovlen = 0; } /* * Read in the exports file and build the list, calling * nmount() as we go along to push the export rules into the kernel. */ done = 0; for (i = 0; exnames[i] != NULL; i++) { if (debug) warnx("reading exports from %s", exnames[i]); if ((exp_file = fopen(exnames[i], "r")) == NULL) { syslog(LOG_WARNING, "can't open %s", exnames[i]); continue; } get_exportlist_one(); fclose(exp_file); done++; } if (done == 0) { syslog(LOG_ERR, "can't open any exports file"); exit(2); } /* * If there was no public fh, clear any previous one set. */ if (has_publicfh == 0) (void) nfssvc(NFSSVC_NOPUBLICFH, NULL); /* Resume the nfsd. If they weren't suspended, this is harmless. */ (void)nfssvc(NFSSVC_RESUMENFSD, NULL); } /* * Allocate an export list element */ static struct exportlist * get_exp(void) { struct exportlist *ep; ep = (struct exportlist *)calloc(1, sizeof (struct exportlist)); if (ep == (struct exportlist *)NULL) out_of_mem(); return (ep); } /* * Allocate a group list element */ static struct grouplist * get_grp(void) { struct grouplist *gp; gp = (struct grouplist *)calloc(1, sizeof (struct grouplist)); if (gp == (struct grouplist *)NULL) out_of_mem(); return (gp); } /* * Clean up upon an error in get_exportlist(). */ static void getexp_err(struct exportlist *ep, struct grouplist *grp, const char *reason) { struct grouplist *tgrp; if (!(opt_flags & OP_QUIET)) { if (reason != NULL) syslog(LOG_ERR, "bad exports list line '%s': %s", line, reason); else syslog(LOG_ERR, "bad exports list line '%s'", line); } if (ep && (ep->ex_flag & EX_LINKED) == 0) free_exp(ep); while (grp) { tgrp = grp; grp = grp->gr_next; free_grp(tgrp); } } /* * Search the export list for a matching fs. */ static struct exportlist * ex_search(fsid_t *fsid) { struct exportlist *ep; SLIST_FOREACH(ep, &exphead, entries) { if (ep->ex_fs.val[0] == fsid->val[0] && ep->ex_fs.val[1] == fsid->val[1]) return (ep); } return (ep); } /* * Add a directory path to the list. */ static char * add_expdir(struct dirlist **dpp, char *cp, int len) { struct dirlist *dp; dp = malloc(sizeof (struct dirlist)); if (dp == (struct dirlist *)NULL) out_of_mem(); dp->dp_left = *dpp; dp->dp_right = (struct dirlist *)NULL; dp->dp_flag = 0; dp->dp_hosts = (struct hostlist *)NULL; dp->dp_dirp = strndup(cp, len); if (dp->dp_dirp == NULL) out_of_mem(); *dpp = dp; return (dp->dp_dirp); } /* * Hang the dir list element off the dirpath binary tree as required * and update the entry for host. */ static void hang_dirp(struct dirlist *dp, struct grouplist *grp, struct exportlist *ep, int flags) { struct hostlist *hp; struct dirlist *dp2; if (flags & OP_ALLDIRS) { if (ep->ex_defdir) free((caddr_t)dp); else ep->ex_defdir = dp; if (grp == (struct grouplist *)NULL) { ep->ex_defdir->dp_flag |= DP_DEFSET; /* Save the default security flavors list. */ ep->ex_defnumsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(ep->ex_defsecflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); } else while (grp) { hp = get_ht(); hp->ht_grp = grp; hp->ht_next = ep->ex_defdir->dp_hosts; ep->ex_defdir->dp_hosts = hp; /* Save the security flavors list for this host set. */ grp->gr_numsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(grp->gr_secflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); grp = grp->gr_next; } } else { /* * Loop through the directories adding them to the tree. */ while (dp) { dp2 = dp->dp_left; add_dlist(&ep->ex_dirl, dp, grp, flags, ep); dp = dp2; } } } /* * Traverse the binary tree either updating a node that is already there * for the new directory or adding the new node. */ static void add_dlist(struct dirlist **dpp, struct dirlist *newdp, struct grouplist *grp, int flags, struct exportlist *ep) { struct dirlist *dp; struct hostlist *hp; int cmp; dp = *dpp; if (dp) { cmp = strcmp(dp->dp_dirp, newdp->dp_dirp); if (cmp > 0) { add_dlist(&dp->dp_left, newdp, grp, flags, ep); return; } else if (cmp < 0) { add_dlist(&dp->dp_right, newdp, grp, flags, ep); return; } else free((caddr_t)newdp); } else { dp = newdp; dp->dp_left = (struct dirlist *)NULL; *dpp = dp; } if (grp) { /* * Hang all of the host(s) off of the directory point. */ do { hp = get_ht(); hp->ht_grp = grp; hp->ht_next = dp->dp_hosts; dp->dp_hosts = hp; /* Save the security flavors list for this host set. */ grp->gr_numsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(grp->gr_secflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); grp = grp->gr_next; } while (grp); } else { dp->dp_flag |= DP_DEFSET; /* Save the default security flavors list. */ ep->ex_defnumsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(ep->ex_defsecflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); } } /* * Search for a dirpath on the export point. */ static struct dirlist * dirp_search(struct dirlist *dp, char *dirp) { int cmp; if (dp) { cmp = strcmp(dp->dp_dirp, dirp); if (cmp > 0) return (dirp_search(dp->dp_left, dirp)); else if (cmp < 0) return (dirp_search(dp->dp_right, dirp)); else return (dp); } return (dp); } /* * Scan for a host match in a directory tree. */ static int chk_host(struct dirlist *dp, struct sockaddr *saddr, int *defsetp, int *hostsetp, int *numsecflavors, int **secflavorsp) { struct hostlist *hp; struct grouplist *grp; struct addrinfo *ai; if (dp) { if (dp->dp_flag & DP_DEFSET) *defsetp = dp->dp_flag; hp = dp->dp_hosts; while (hp) { grp = hp->ht_grp; switch (grp->gr_type) { case GT_HOST: ai = grp->gr_ptr.gt_addrinfo; for (; ai; ai = ai->ai_next) { if (!sacmp(ai->ai_addr, saddr, NULL)) { *hostsetp = (hp->ht_flag | DP_HOSTSET); if (numsecflavors != NULL) { *numsecflavors = grp->gr_numsecflavors; *secflavorsp = grp->gr_secflavors; } return (1); } } break; case GT_NET: if (!sacmp(saddr, (struct sockaddr *) &grp->gr_ptr.gt_net.nt_net, (struct sockaddr *) &grp->gr_ptr.gt_net.nt_mask)) { *hostsetp = (hp->ht_flag | DP_HOSTSET); if (numsecflavors != NULL) { *numsecflavors = grp->gr_numsecflavors; *secflavorsp = grp->gr_secflavors; } return (1); } break; } hp = hp->ht_next; } } return (0); } /* * Scan tree for a host that matches the address. */ static int scan_tree(struct dirlist *dp, struct sockaddr *saddr) { int defset, hostset; if (dp) { if (scan_tree(dp->dp_left, saddr)) return (1); if (chk_host(dp, saddr, &defset, &hostset, NULL, NULL)) return (1); if (scan_tree(dp->dp_right, saddr)) return (1); } return (0); } /* * Traverse the dirlist tree and free it up. */ static void free_dir(struct dirlist *dp) { if (dp) { free_dir(dp->dp_left); free_dir(dp->dp_right); free_host(dp->dp_hosts); free(dp->dp_dirp); free(dp); } } /* * Parse a colon separated list of security flavors */ static int parsesec(char *seclist, struct exportlist *ep) { char *cp, savedc; int flavor; ep->ex_numsecflavors = 0; for (;;) { cp = strchr(seclist, ':'); if (cp) { savedc = *cp; *cp = '\0'; } if (!strcmp(seclist, "sys")) flavor = AUTH_SYS; else if (!strcmp(seclist, "krb5")) flavor = RPCSEC_GSS_KRB5; else if (!strcmp(seclist, "krb5i")) flavor = RPCSEC_GSS_KRB5I; else if (!strcmp(seclist, "krb5p")) flavor = RPCSEC_GSS_KRB5P; else { if (cp) *cp = savedc; syslog(LOG_ERR, "bad sec flavor: %s", seclist); return (1); } if (ep->ex_numsecflavors == MAXSECFLAVORS) { if (cp) *cp = savedc; syslog(LOG_ERR, "too many sec flavors: %s", seclist); return (1); } ep->ex_secflavors[ep->ex_numsecflavors] = flavor; ep->ex_numsecflavors++; if (cp) { *cp = savedc; seclist = cp + 1; } else { break; } } return (0); } /* * Parse the option string and update fields. * Option arguments may either be -