diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c index 5361f41276a0..3c506378ad2f 100644 --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -1,1169 +1,1169 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2010 Marcel Moolenaar * Copyright (c) 1999-2004 Poul-Henning Kamp * Copyright (c) 1999 Michael Smith * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_rootdevname.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The root filesystem is detailed in the kernel environment variable * vfs.root.mountfrom, which is expected to be in the general format * * :[][ :[] ...] * vfsname := the name of a VFS known to the kernel and capable * of being mounted as root * path := disk device name or other data used by the filesystem * to locate its physical store * * If the environment variable vfs.root.mountfrom is a space separated list, * each list element is tried in turn and the root filesystem will be mounted * from the first one that succeeds. * * The environment variable vfs.root.mountfrom.options is a comma delimited * set of string mount options. These mount options must be parseable * by nmount() in the kernel. */ static int parse_mount(char **); static struct mntarg *parse_mountroot_options(struct mntarg *, const char *); static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS); static void vfs_mountroot_wait(void); static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev); /* * The vnode of the system's root (/ in the filesystem, without chroot * active.) */ struct vnode *rootvnode; /* * Mount of the system's /dev. */ struct mount *rootdevmp; char *rootdevnames[2] = {NULL, NULL}; struct mtx root_holds_mtx; MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF); static TAILQ_HEAD(, root_hold_token) root_holds = TAILQ_HEAD_INITIALIZER(root_holds); enum action { A_CONTINUE, A_PANIC, A_REBOOT, A_RETRY }; enum rh_flags { RH_FREE, RH_ALLOC, RH_ARG, }; static enum action root_mount_onfail = A_CONTINUE; static int root_mount_mddev; static int root_mount_complete; /* By default wait up to 3 seconds for devices to appear. */ static int root_mount_timeout = 3; TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout); static int root_mount_always_wait = 0; SYSCTL_INT(_vfs, OID_AUTO, root_mount_always_wait, CTLFLAG_RDTUN, &root_mount_always_wait, 0, "Wait for root mount holds even if the root device already exists"); SYSCTL_PROC(_vfs, OID_AUTO, root_mount_hold, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_root_mount_hold, "A", "List of root mount hold tokens"); static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct root_hold_token *h; int error; sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL); mtx_lock(&root_holds_mtx); TAILQ_FOREACH(h, &root_holds, list) { if (h != TAILQ_FIRST(&root_holds)) sbuf_putc(&sb, ' '); sbuf_printf(&sb, "%s", h->who); } mtx_unlock(&root_holds_mtx); error = sbuf_finish(&sb); if (error == 0) error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb)); sbuf_delete(&sb); return (error); } struct root_hold_token * root_mount_hold(const char *identifier) { struct root_hold_token *h; h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); h->flags = RH_ALLOC; h->who = identifier; mtx_lock(&root_holds_mtx); TSHOLD("root mount"); TAILQ_INSERT_TAIL(&root_holds, h, list); mtx_unlock(&root_holds_mtx); return (h); } void root_mount_hold_token(const char *identifier, struct root_hold_token *h) { #ifdef INVARIANTS struct root_hold_token *t; #endif h->flags = RH_ARG; h->who = identifier; mtx_lock(&root_holds_mtx); #ifdef INVARIANTS TAILQ_FOREACH(t, &root_holds, list) { if (t == h) { panic("Duplicate mount hold by '%s' on %p", identifier, h); } } #endif TSHOLD("root mount"); TAILQ_INSERT_TAIL(&root_holds, h, list); mtx_unlock(&root_holds_mtx); } void root_mount_rel(struct root_hold_token *h) { if (h == NULL || h->flags == RH_FREE) return; mtx_lock(&root_holds_mtx); TAILQ_REMOVE(&root_holds, h, list); TSRELEASE("root mount"); wakeup(&root_holds); mtx_unlock(&root_holds_mtx); if (h->flags == RH_ALLOC) { free(h, M_DEVBUF); } else h->flags = RH_FREE; } int root_mounted(void) { /* No mutex is acquired here because int stores are atomic. */ return (root_mount_complete); } static void set_rootvnode(void) { if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode)) panic("set_rootvnode: Cannot find root vnode"); VOP_UNLOCK(rootvnode); pwd_set_rootvnode(); } static int vfs_mountroot_devfs(struct thread *td, struct mount **mpp) { struct vfsoptlist *opts; struct vfsconf *vfsp; struct mount *mp; int error; *mpp = NULL; if (rootdevmp != NULL) { /* * Already have /dev; this happens during rerooting. */ error = vfs_busy(rootdevmp, 0); if (error != 0) return (error); *mpp = rootdevmp; } else { vfsp = vfs_byname("devfs"); KASSERT(vfsp != NULL, ("Could not find devfs by name")); if (vfsp == NULL) return (ENOENT); mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); error = VFS_MOUNT(mp); KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); if (error) return (error); error = VFS_STATFS(mp, &mp->mnt_stat); KASSERT(error == 0, ("VFS_STATFS(devfs) failed %d", error)); if (error) return (error); opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); TAILQ_INIT(opts); mp->mnt_opt = opts; mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); *mpp = mp; rootdevmp = mp; vfs_op_exit(mp); } set_rootvnode(); error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE); if (error) printf("kern_symlink /dev -> / returns %d\n", error); return (error); } static void vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs) { struct nameidata nd; struct mount *mporoot, *mpnroot; struct vnode *vp, *vporoot, *vpdevfs; char *fspath; int error; mpnroot = TAILQ_NEXT(mpdevfs, mnt_list); /* Shuffle the mountlist. */ mtx_lock(&mountlist_mtx); mporoot = TAILQ_FIRST(&mountlist); TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list); if (mporoot != mpdevfs) { TAILQ_REMOVE(&mountlist, mpnroot, mnt_list); TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list); } TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list); mtx_unlock(&mountlist_mtx); cache_purgevfs(mporoot); if (mporoot != mpdevfs) cache_purgevfs(mpdevfs); if (VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot)) panic("vfs_mountroot_shuffle: Cannot find root vnode"); VI_LOCK(vporoot); vporoot->v_iflag &= ~VI_MOUNT; vn_irflag_unset_locked(vporoot, VIRF_MOUNTPOINT); vporoot->v_mountedhere = NULL; VI_UNLOCK(vporoot); mporoot->mnt_flag &= ~MNT_ROOTFS; mporoot->mnt_vnodecovered = NULL; vput(vporoot); /* Set up the new rootvnode, and purge the cache */ mpnroot->mnt_vnodecovered = NULL; set_rootvnode(); cache_purgevfs(rootvnode->v_mount); if (mporoot != mpdevfs) { /* Remount old root under /.mount or /mnt */ fspath = "/.mount"; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath); error = namei(&nd); if (error) { NDFREE_PNBUF(&nd); fspath = "/mnt"; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath); error = namei(&nd); } if (!error) { vp = nd.ni_vp; error = (vp->v_type == VDIR) ? 0 : ENOTDIR; if (!error) error = vinvalbuf(vp, V_SAVE, 0, 0); if (!error) { cache_purge(vp); VI_LOCK(vp); mporoot->mnt_vnodecovered = vp; vn_irflag_set_locked(vp, VIRF_MOUNTPOINT); vp->v_mountedhere = mporoot; strlcpy(mporoot->mnt_stat.f_mntonname, fspath, MNAMELEN); VI_UNLOCK(vp); VOP_UNLOCK(vp); } else vput(vp); } NDFREE_PNBUF(&nd); if (error) printf("mountroot: unable to remount previous root " "under /.mount or /mnt (error %d)\n", error); } /* Remount devfs under /dev */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev"); error = namei(&nd); if (!error) { vp = nd.ni_vp; error = (vp->v_type == VDIR) ? 0 : ENOTDIR; if (!error) error = vinvalbuf(vp, V_SAVE, 0, 0); if (!error) { vpdevfs = mpdevfs->mnt_vnodecovered; if (vpdevfs != NULL) { cache_purge(vpdevfs); VI_LOCK(vpdevfs); vn_irflag_unset_locked(vpdevfs, VIRF_MOUNTPOINT); vpdevfs->v_mountedhere = NULL; VI_UNLOCK(vpdevfs); vrele(vpdevfs); } VI_LOCK(vp); mpdevfs->mnt_vnodecovered = vp; vn_irflag_set_locked(vp, VIRF_MOUNTPOINT); vp->v_mountedhere = mpdevfs; VI_UNLOCK(vp); VOP_UNLOCK(vp); } else vput(vp); } if (error) printf("mountroot: unable to remount devfs under /dev " "(error %d)\n", error); NDFREE_PNBUF(&nd); if (mporoot == mpdevfs) { vfs_unbusy(mpdevfs); /* Unlink the no longer needed /dev/dev -> / symlink */ error = kern_funlinkat(td, AT_FDCWD, "/dev/dev", FD_NONE, UIO_SYSSPACE, 0, 0); if (error) printf("mountroot: unable to unlink /dev/dev " "(error %d)\n", error); } } /* * Configuration parser. */ /* Parser character classes. */ #define CC_WHITESPACE -1 #define CC_NONWHITESPACE -2 /* Parse errors. */ #define PE_EOF -1 #define PE_EOL -2 static __inline int parse_peek(char **conf) { return (**conf); } static __inline void parse_poke(char **conf, int c) { **conf = c; } static __inline void parse_advance(char **conf) { (*conf)++; } static int parse_skipto(char **conf, int mc) { int c, match; while (1) { c = parse_peek(conf); if (c == 0) return (PE_EOF); switch (mc) { case CC_WHITESPACE: match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0; break; case CC_NONWHITESPACE: if (c == '\n') return (PE_EOL); match = (c != ' ' && c != '\t') ? 1 : 0; break; default: match = (c == mc) ? 1 : 0; break; } if (match) break; parse_advance(conf); } return (0); } static int parse_token(char **conf, char **tok) { char *p; size_t len; int error; *tok = NULL; error = parse_skipto(conf, CC_NONWHITESPACE); if (error) return (error); p = *conf; error = parse_skipto(conf, CC_WHITESPACE); len = *conf - p; *tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO); bcopy(p, *tok, len); return (0); } static void parse_dir_ask_printenv(const char *var) { char *val; val = kern_getenv(var); if (val != NULL) { printf(" %s=%s\n", var, val); freeenv(val); } } static int parse_dir_ask(char **conf) { char name[80]; char *mnt; int error; vfs_mountroot_wait(); printf("\nLoader variables:\n"); parse_dir_ask_printenv("vfs.root.mountfrom"); parse_dir_ask_printenv("vfs.root.mountfrom.options"); printf("\nManual root filesystem specification:\n"); printf(" : [options]\n"); printf(" Mount using filesystem \n"); printf(" and with the specified (optional) option list.\n"); printf("\n"); printf(" eg. ufs:/dev/da0s1a\n"); printf(" zfs:zroot/ROOT/default\n"); printf(" cd9660:/dev/cd0 ro\n"); printf(" (which is equivalent to: "); printf("mount -t cd9660 -o ro /dev/cd0 /)\n"); printf("\n"); printf(" ? List valid disk boot devices\n"); printf(" . Yield 1 second (for background tasks)\n"); printf(" Abort manual input\n"); do { error = EINVAL; printf("\nmountroot> "); cngets(name, sizeof(name), GETS_ECHO); if (name[0] == '\0') break; if (name[0] == '?' && name[1] == '\0') { printf("\nList of GEOM managed disk devices:\n "); g_dev_print(); continue; } if (name[0] == '.' && name[1] == '\0') { pause("rmask", hz); continue; } mnt = name; error = parse_mount(&mnt); if (error == -1) printf("Invalid file system specification.\n"); } while (error != 0); return (error); } static int parse_dir_md(char **conf) { struct stat sb; struct thread *td; struct md_ioctl *mdio; char *path, *tok; int error, fd, len; td = curthread; fd = -1; error = parse_token(conf, &tok); if (error) return (error); len = strlen(tok); mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO); path = (void *)(mdio + 1); bcopy(tok, path, len); free(tok, M_TEMP); /* Get file status. */ error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL); if (error) goto out; /* Open /dev/mdctl so that we can attach/detach. */ error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE, O_RDWR, 0); if (error) goto out; fd = td->td_retval[0]; mdio->md_version = MDIOVERSION; mdio->md_type = MD_VNODE; if (root_mount_mddev != -1) { mdio->md_unit = root_mount_mddev; (void)kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio); /* Ignore errors. We don't care. */ root_mount_mddev = -1; } mdio->md_file = (void *)(mdio + 1); mdio->md_options = MD_AUTOUNIT | MD_READONLY; mdio->md_mediasize = sb.st_size; mdio->md_unit = 0; error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio); if (error) goto out; if (mdio->md_unit > 9) { printf("rootmount: too many md units\n"); mdio->md_file = NULL; mdio->md_options = 0; mdio->md_mediasize = 0; error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio); /* Ignore errors. We don't care. */ error = ERANGE; goto out; } root_mount_mddev = mdio->md_unit; printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file); out: if (fd >= 0) (void)kern_close(td, fd); free(mdio, M_TEMP); return (error); } static int parse_dir_onfail(char **conf) { char *action; int error; error = parse_token(conf, &action); if (error) return (error); if (!strcmp(action, "continue")) root_mount_onfail = A_CONTINUE; else if (!strcmp(action, "panic")) root_mount_onfail = A_PANIC; else if (!strcmp(action, "reboot")) root_mount_onfail = A_REBOOT; else if (!strcmp(action, "retry")) root_mount_onfail = A_RETRY; else { printf("rootmount: %s: unknown action\n", action); error = EINVAL; } free(action, M_TEMP); return (0); } static int parse_dir_timeout(char **conf) { char *tok, *endtok; long secs; int error; error = parse_token(conf, &tok); if (error) return (error); secs = strtol(tok, &endtok, 0); error = (secs < 0 || *endtok != '\0') ? EINVAL : 0; if (!error) root_mount_timeout = secs; free(tok, M_TEMP); return (error); } static int parse_directive(char **conf) { char *dir; int error; error = parse_token(conf, &dir); if (error) return (error); if (strcmp(dir, ".ask") == 0) error = parse_dir_ask(conf); else if (strcmp(dir, ".md") == 0) error = parse_dir_md(conf); else if (strcmp(dir, ".onfail") == 0) error = parse_dir_onfail(conf); else if (strcmp(dir, ".timeout") == 0) error = parse_dir_timeout(conf); else { printf("mountroot: invalid directive `%s'\n", dir); /* Ignore the rest of the line. */ (void)parse_skipto(conf, '\n'); error = EINVAL; } free(dir, M_TEMP); return (error); } static bool parse_mount_dev_present(const char *dev) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, dev); error = namei(&nd); if (error != 0) return (false); vrele(nd.ni_vp); NDFREE_PNBUF(&nd); return (true); } #define ERRMSGL 255 static int parse_mount(char **conf) { char *errmsg; struct mntarg *ma; char *dev, *fs, *opts, *tok; int delay, error, timeout; error = parse_token(conf, &tok); if (error) return (error); fs = tok; error = parse_skipto(&tok, ':'); if (error) { free(fs, M_TEMP); return (error); } parse_poke(&tok, '\0'); parse_advance(&tok); dev = tok; if (root_mount_mddev != -1) { /* Handle substitution for the md unit number. */ tok = strstr(dev, "md#"); if (tok != NULL) tok[2] = '0' + root_mount_mddev; } /* Parse options. */ error = parse_token(conf, &tok); opts = (error == 0) ? tok : NULL; printf("Trying to mount root from %s:%s [%s]...\n", fs, dev, (opts != NULL) ? opts : ""); errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO); if (vfs_byname(fs) == NULL) { strlcpy(errmsg, "unknown file system", ERRMSGL); error = ENOENT; goto out; } error = vfs_mountroot_wait_if_neccessary(fs, dev); if (error != 0) goto out; delay = hz / 10; timeout = root_mount_timeout * hz; for (;;) { ma = NULL; ma = mount_arg(ma, "fstype", fs, -1); ma = mount_arg(ma, "fspath", "/", -1); ma = mount_arg(ma, "from", dev, -1); ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL); ma = mount_arg(ma, "ro", NULL, 0); ma = parse_mountroot_options(ma, opts); error = kernel_mount(ma, MNT_ROOTFS); - if (error == 0 || timeout <= 0) + if (error == 0 || error == EILSEQ || timeout <= 0) break; if (root_mount_timeout * hz == timeout || (bootverbose && timeout % hz == 0)) { printf("Mounting from %s:%s failed with error %d; " "retrying for %d more second%s\n", fs, dev, error, timeout / hz, (timeout / hz > 1) ? "s" : ""); } pause("rmretry", delay); timeout -= delay; } out: if (error) { printf("Mounting from %s:%s failed with error %d", fs, dev, error); if (errmsg[0] != '\0') printf(": %s", errmsg); printf(".\n"); } free(fs, M_TEMP); free(errmsg, M_TEMP); if (opts != NULL) free(opts, M_TEMP); /* kernel_mount can return -1 on error. */ return ((error < 0) ? EDOOFUS : error); } #undef ERRMSGL static int vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs) { struct mount *mp; char *conf; int error; root_mount_mddev = -1; retry: conf = sbuf_data(sb); mp = TAILQ_NEXT(mpdevfs, mnt_list); error = (mp == NULL) ? 0 : EDOOFUS; root_mount_onfail = A_CONTINUE; while (mp == NULL) { error = parse_skipto(&conf, CC_NONWHITESPACE); if (error == PE_EOL) { parse_advance(&conf); continue; } if (error < 0) break; switch (parse_peek(&conf)) { case '#': error = parse_skipto(&conf, '\n'); break; case '.': error = parse_directive(&conf); break; default: error = parse_mount(&conf); if (error == -1) { printf("mountroot: invalid file system " "specification.\n"); error = 0; } break; } if (error < 0) break; /* Ignore any trailing garbage on the line. */ if (parse_peek(&conf) != '\n') { printf("mountroot: advancing to next directive...\n"); (void)parse_skipto(&conf, '\n'); } mp = TAILQ_NEXT(mpdevfs, mnt_list); } if (mp != NULL) return (0); /* * We failed to mount (a new) root. */ switch (root_mount_onfail) { case A_CONTINUE: break; case A_PANIC: panic("mountroot: unable to (re-)mount root."); /* NOTREACHED */ case A_RETRY: goto retry; case A_REBOOT: kern_reboot(RB_NOSYNC); /* NOTREACHED */ } return (error); } static void vfs_mountroot_conf0(struct sbuf *sb) { char *s, *tok, *mnt, *opt; int error; sbuf_printf(sb, ".onfail panic\n"); sbuf_printf(sb, ".timeout %d\n", root_mount_timeout); if (boothowto & RB_ASKNAME) sbuf_printf(sb, ".ask\n"); #ifdef ROOTDEVNAME if (boothowto & RB_DFLTROOT) sbuf_printf(sb, "%s\n", ROOTDEVNAME); #endif if (boothowto & RB_CDROM) { sbuf_printf(sb, "cd9660:/dev/cd0 ro\n"); sbuf_printf(sb, ".timeout 0\n"); sbuf_printf(sb, "cd9660:/dev/cd1 ro\n"); sbuf_printf(sb, ".timeout %d\n", root_mount_timeout); } s = kern_getenv("vfs.root.mountfrom"); if (s != NULL) { opt = kern_getenv("vfs.root.mountfrom.options"); tok = s; error = parse_token(&tok, &mnt); while (!error) { sbuf_printf(sb, "%s %s\n", mnt, (opt != NULL) ? opt : ""); free(mnt, M_TEMP); error = parse_token(&tok, &mnt); } if (opt != NULL) freeenv(opt); freeenv(s); } if (rootdevnames[0] != NULL) sbuf_printf(sb, "%s\n", rootdevnames[0]); if (rootdevnames[1] != NULL) sbuf_printf(sb, "%s\n", rootdevnames[1]); #ifdef ROOTDEVNAME if (!(boothowto & RB_DFLTROOT)) sbuf_printf(sb, "%s\n", ROOTDEVNAME); #endif if (!(boothowto & RB_ASKNAME)) sbuf_printf(sb, ".ask\n"); } static int vfs_mountroot_readconf(struct thread *td, struct sbuf *sb) { static char buf[128]; struct nameidata nd; off_t ofs; ssize_t resid; int error, flags, len; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf"); flags = FREAD; error = vn_open(&nd, &flags, 0, NULL); if (error) return (error); NDFREE_PNBUF(&nd); ofs = 0; len = sizeof(buf) - 1; while (1) { error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) break; if (resid == len) break; buf[len - resid] = 0; sbuf_printf(sb, "%s", buf); ofs += len - resid; } VOP_UNLOCK(nd.ni_vp); vn_close(nd.ni_vp, FREAD, td->td_ucred, td); return (error); } static void vfs_mountroot_wait(void) { struct root_hold_token *h; struct thread *td; struct timeval lastfail; int curfail; TSENTER(); curfail = 0; lastfail.tv_sec = 0; ppsratecheck(&lastfail, &curfail, 1); td = curthread; while (1) { g_waitidle(td); mtx_lock(&root_holds_mtx); if (TAILQ_EMPTY(&root_holds)) { mtx_unlock(&root_holds_mtx); break; } if (ppsratecheck(&lastfail, &curfail, 1)) { printf("Root mount waiting for:"); TAILQ_FOREACH(h, &root_holds, list) printf(" %s", h->who); printf("\n"); } TSWAIT("root mount"); msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold", hz); TSUNWAIT("root mount"); } g_waitidle(td); TSEXIT(); } static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev) { int delay, timeout; /* * In case of ZFS and NFS we don't have a way to wait for * specific device. Also do the wait if the user forced that * behaviour by setting vfs.root_mount_always_wait=1. */ if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL || dev[0] == '\0' || root_mount_always_wait != 0) { vfs_mountroot_wait(); return (0); } /* * Otherwise, no point in waiting if the device is already there. * Note that we must wait for GEOM to finish reconfiguring itself, * eg for geom_part(4) to finish tasting. */ g_waitidle(curthread); if (parse_mount_dev_present(dev)) return (0); /* * No luck. Let's wait. This code looks weird, but it's that way * to behave exactly as it used to work before. */ vfs_mountroot_wait(); if (parse_mount_dev_present(dev)) return (0); printf("mountroot: waiting for device %s...\n", dev); delay = hz / 10; timeout = root_mount_timeout * hz; do { pause("rmdev", delay); timeout -= delay; } while (timeout > 0 && !parse_mount_dev_present(dev)); if (timeout <= 0) return (ENODEV); return (0); } void vfs_mountroot(void) { struct mount *mp; struct sbuf *sb; struct thread *td; time_t timebase; int error; mtx_assert(&Giant, MA_NOTOWNED); TSENTER(); td = curthread; sb = sbuf_new_auto(); vfs_mountroot_conf0(sb); sbuf_finish(sb); error = vfs_mountroot_devfs(td, &mp); while (!error) { error = vfs_mountroot_parse(sb, mp); if (!error) { vfs_mountroot_shuffle(td, mp); sbuf_clear(sb); error = vfs_mountroot_readconf(td, sb); sbuf_finish(sb); } } sbuf_delete(sb); /* * Iterate over all currently mounted file systems and use * the time stamp found to check and/or initialize the RTC. * Call inittodr() only once and pass it the largest of the * timestamps we encounter. */ timebase = 0; mtx_lock(&mountlist_mtx); mp = TAILQ_FIRST(&mountlist); while (mp != NULL) { if (mp->mnt_time > timebase) timebase = mp->mnt_time; mp = TAILQ_NEXT(mp, mnt_list); } mtx_unlock(&mountlist_mtx); inittodr(timebase); /* Keep prison0's root in sync with the global rootvnode. */ mtx_lock(&prison0.pr_mtx); prison0.pr_root = rootvnode; vref(prison0.pr_root); mtx_unlock(&prison0.pr_mtx); mtx_lock(&root_holds_mtx); atomic_store_rel_int(&root_mount_complete, 1); wakeup(&root_mount_complete); mtx_unlock(&root_holds_mtx); EVENTHANDLER_INVOKE(mountroot); TSEXIT(); } static struct mntarg * parse_mountroot_options(struct mntarg *ma, const char *options) { char *p; char *name, *name_arg; char *val, *val_arg; char *opts; if (options == NULL || options[0] == '\0') return (ma); p = opts = strdup(options, M_MOUNT); if (opts == NULL) { return (ma); } while((name = strsep(&p, ",")) != NULL) { if (name[0] == '\0') break; val = strchr(name, '='); if (val != NULL) { *val = '\0'; ++val; } if (strcmp(name, "rw") == 0 || strcmp(name, "noro") == 0) { /* * The first time we mount the root file system, * we need to mount 'ro', so We need to ignore * 'rw' and 'noro' mount options. */ continue; } name_arg = strdup(name, M_MOUNT); val_arg = NULL; if (val != NULL) val_arg = strdup(val, M_MOUNT); ma = mount_arg(ma, name_arg, val_arg, (val_arg != NULL ? -1 : 0)); } free(opts, M_MOUNT); return (ma); } diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index 705f8c9c961d..31f30ba5d175 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -1,1066 +1,1088 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 */ #include __FBSDID("$FreeBSD$"); #include +#include #include #ifndef _KERNEL #include #include #include #include #include #include #include uint32_t calculate_crc32c(uint32_t, const void *, size_t); uint32_t ffs_calc_sbhash(struct fs *); struct malloc_type; #define UFS_MALLOC(size, type, flags) malloc(size) #define UFS_FREE(ptr, type) free(ptr) #define maxphys MAXPHYS #else /* _KERNEL */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define UFS_MALLOC(size, type, flags) malloc(size, type, flags) #define UFS_FREE(ptr, type) free(ptr, type) #endif /* _KERNEL */ /* * Verify an inode check-hash. */ int ffs_verify_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip) { uint32_t ckhash, save_ckhash; /* * Return success if unallocated or we are not doing inode check-hash. */ if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0) return (0); /* * Exclude di_ckhash from the crc32 calculation, e.g., always use * a check-hash value of zero when calculating the check-hash. */ save_ckhash = dip->di_ckhash; dip->di_ckhash = 0; ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip)); dip->di_ckhash = save_ckhash; if (save_ckhash == ckhash) return (0); return (EINVAL); } /* * Update an inode check-hash. */ void ffs_update_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip) { if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0) return; /* * Exclude old di_ckhash from the crc32 calculation, e.g., always use * a check-hash value of zero when calculating the new check-hash. */ dip->di_ckhash = 0; dip->di_ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip)); } /* * These are the low-level functions that actually read and write * the superblock and its associated data. */ static off_t sblock_try[] = SBLOCKSEARCH; static int readsuper(void *, struct fs **, off_t, int, int (*)(void *, off_t, void **, int)); static int validate_sblock(struct fs *, int); /* * Read a superblock from the devfd device. * * If an alternate superblock is specified, it is read. Otherwise the * set of locations given in the SBLOCKSEARCH list is searched for a * superblock. Memory is allocated for the superblock by the readfunc and * is returned. If filltype is non-NULL, additional memory is allocated * of type filltype and filled in with the superblock summary information. * All memory is freed when any error is returned. * * If a superblock is found, zero is returned. Otherwise one of the * following error values is returned: * EIO: non-existent or truncated superblock. * EIO: error reading summary information. * ENOENT: no usable known superblock found. + * EILSEQ: filesystem with wrong byte order found. * ENOMEM: failed to allocate space for the superblock. * EINVAL: The previous newfs operation on this volume did not complete. * The administrator must complete newfs before using this volume. */ int ffs_sbget(void *devfd, struct fs **fsp, off_t sblock, int flags, struct malloc_type *filltype, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fs *fs; struct fs_summary_info *fs_si; int i, error; uint64_t size, blks; uint8_t *space; int32_t *lp; char *buf; fs = NULL; *fsp = NULL; if (sblock != UFS_STDSB) { if ((error = readsuper(devfd, &fs, sblock, flags | UFS_ALTSBLK, readfunc)) != 0) { if (fs != NULL) UFS_FREE(fs, filltype); return (error); } } else { for (i = 0; sblock_try[i] != -1; i++) { if ((error = readsuper(devfd, &fs, sblock_try[i], flags, readfunc)) == 0) { if ((flags & UFS_NOCSUM) != 0) { *fsp = fs; return (0); } break; } if (fs != NULL) { UFS_FREE(fs, filltype); fs = NULL; } if (error == ENOENT) continue; return (error); } if (sblock_try[i] == -1) return (ENOENT); } /* * Read in the superblock summary information. */ size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(u_int8_t); if ((fs_si = UFS_MALLOC(sizeof(*fs_si), filltype, M_NOWAIT)) == NULL) { UFS_FREE(fs, filltype); return (ENOMEM); } bzero(fs_si, sizeof(*fs_si)); fs->fs_si = fs_si; if ((space = UFS_MALLOC(size, filltype, M_NOWAIT)) == NULL) { UFS_FREE(fs->fs_si, filltype); UFS_FREE(fs, filltype); return (ENOMEM); } fs->fs_csp = (struct csum *)space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; buf = NULL; error = (*readfunc)(devfd, dbtob(fsbtodb(fs, fs->fs_csaddr + i)), (void **)&buf, size); if (error) { if (buf != NULL) UFS_FREE(buf, filltype); UFS_FREE(fs->fs_csp, filltype); UFS_FREE(fs->fs_si, filltype); UFS_FREE(fs, filltype); return (error); } memcpy(space, buf, size); UFS_FREE(buf, filltype); space += size; } if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = (int32_t *)space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = (uint8_t *)lp; } size = fs->fs_ncg * sizeof(u_int8_t); fs->fs_contigdirs = (u_int8_t *)space; bzero(fs->fs_contigdirs, size); *fsp = fs; return (0); } /* * Try to read a superblock from the location specified by sblockloc. * Return zero on success or an errno on failure. */ static int readsuper(void *devfd, struct fs **fsp, off_t sblockloc, int flags, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fs *fs; int error, res; uint32_t ckhash; error = (*readfunc)(devfd, sblockloc, (void **)fsp, SBLOCKSIZE); if (error != 0) return (error); fs = *fsp; if (fs->fs_magic == FS_BAD_MAGIC) return (EINVAL); /* * For UFS1 with a 65536 block size, the first backup superblock * is at the same location as the UFS2 superblock. Since SBLOCK_UFS2 * is the first location checked, the first backup is the superblock * that will be accessed. Here we fail the lookup so that we can * retry with the correct location for the UFS1 superblock. */ if (fs->fs_magic == FS_UFS1_MAGIC && (flags & UFS_ALTSBLK) == 0 && fs->fs_bsize == SBLOCK_UFS2 && sblockloc == SBLOCK_UFS2) return (ENOENT); if ((error = validate_sblock(fs, flags)) > 0) return (error); /* * If the filesystem has been run on a kernel without * metadata check hashes, disable them. */ if ((fs->fs_flags & FS_METACKHASH) == 0) fs->fs_metackhash = 0; /* * Clear any check-hashes that are not maintained * by this kernel. Also clear any unsupported flags. */ fs->fs_metackhash &= CK_SUPPORTED; fs->fs_flags &= FS_SUPPORTED; if (fs->fs_ckhash != (ckhash = ffs_calc_sbhash(fs))) { if ((flags & (UFS_NOMSG | UFS_NOHASHFAIL)) == (UFS_NOMSG | UFS_NOHASHFAIL)) return (0); if ((flags & UFS_NOMSG) != 0) return (EINTEGRITY); #ifdef _KERNEL res = uprintf("Superblock check-hash failed: recorded " "check-hash 0x%x != computed check-hash 0x%x%s\n", fs->fs_ckhash, ckhash, (flags & UFS_NOHASHFAIL) != 0 ? " (Ignored)" : ""); #else res = 0; #endif /* * Print check-hash failure if no controlling terminal * in kernel or always if in user-mode (libufs). */ if (res == 0) printf("Superblock check-hash failed: recorded " "check-hash 0x%x != computed check-hash " "0x%x%s\n", fs->fs_ckhash, ckhash, (flags & UFS_NOHASHFAIL) ? " (Ignored)" : ""); if ((flags & UFS_NOHASHFAIL) != 0) return (0); return (EINTEGRITY); } /* Have to set for old filesystems that predate this field */ fs->fs_sblockactualloc = sblockloc; /* Not yet any summary information */ fs->fs_si = NULL; return (0); } /* * Verify the filesystem values. */ #define ILOG2(num) (fls(num) - 1) #ifdef STANDALONE_SMALL #define MPRINT(...) do { } while (0) #else #define MPRINT(...) if (prtmsg) printf(__VA_ARGS__) #endif #define FCHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ")\n", fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ if (error < 0) \ return (ENOENT); \ if (error == 0) \ error = ENOENT; \ } #define WCHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ")%s\n", fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2,\ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs, wmsg);\ if (error == 0) \ error = warnerr; \ } #define FCHK2(lhs1, op1, rhs1, lhs2, op2, rhs2, fmt) \ if (lhs1 op1 rhs1 && lhs2 op2 rhs2) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ") && %s (" #fmt ") %s %s (" #fmt ")\n", \ fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2, #lhs1, \ (intmax_t)lhs1, #op1, #rhs1, (intmax_t)rhs1, #lhs2, \ (intmax_t)lhs2, #op2, #rhs2, (intmax_t)rhs2); \ if (error < 0) \ return (ENOENT); \ if (error == 0) \ error = ENOENT; \ } #define WCHK2(lhs1, op1, rhs1, lhs2, op2, rhs2, fmt) \ if (lhs1 op1 rhs1 && lhs2 op2 rhs2) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ") && %s (" #fmt ") %s %s (" #fmt ")%s\n", \ fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2, #lhs1, \ (intmax_t)lhs1, #op1, #rhs1, (intmax_t)rhs1, #lhs2, \ (intmax_t)lhs2, #op2, #rhs2, (intmax_t)rhs2, wmsg); \ if (error == 0) \ error = warnerr; \ } static int validate_sblock(struct fs *fs, int flags) { u_long i, sectorsize; u_int64_t maxfilesize, sizepb; int error, prtmsg, warnerr; char *wmsg; error = 0; sectorsize = dbtob(1); prtmsg = ((flags & UFS_NOMSG) == 0); warnerr = (flags & UFS_NOWARNFAIL) == UFS_NOWARNFAIL ? 0 : ENOENT; wmsg = warnerr ? "" : " (Ignored)"; + /* + * Check for endian mismatch between machine and filesystem. + */ + if (((fs->fs_magic != FS_UFS2_MAGIC) && + (bswap32(fs->fs_magic) == FS_UFS2_MAGIC)) || + ((fs->fs_magic != FS_UFS1_MAGIC) && + (bswap32(fs->fs_magic) == FS_UFS1_MAGIC))) { + MPRINT("UFS superblock failed due to endian mismatch " + "between machine and filesystem\n"); + return(EILSEQ); + } /* * If just validating for recovery, then do just the minimal * checks needed for the superblock fields needed to find * alternate superblocks. */ if ((flags & UFS_FSRONLY) == UFS_FSRONLY && (fs->fs_magic == FS_UFS1_MAGIC || fs->fs_magic == FS_UFS2_MAGIC)) { error = -1; /* fail on first error */ if (fs->fs_magic == FS_UFS2_MAGIC) { FCHK(fs->fs_sblockloc, !=, SBLOCK_UFS2, %#jx); } else if (fs->fs_magic == FS_UFS1_MAGIC) { FCHK(fs->fs_sblockloc, <, 0, %jd); FCHK(fs->fs_sblockloc, >, SBLOCK_UFS1, %jd); } FCHK(fs->fs_frag, <, 1, %jd); FCHK(fs->fs_frag, >, MAXFRAG, %jd); FCHK(fs->fs_bsize, <, MINBSIZE, %jd); FCHK(fs->fs_bsize, >, MAXBSIZE, %jd); FCHK(fs->fs_bsize, <, roundup(sizeof(struct fs), DEV_BSIZE), %jd); FCHK(fs->fs_fsize, <, sectorsize, %jd); FCHK(fs->fs_fsize * fs->fs_frag, !=, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_fsize), ==, 0, %jd); FCHK(fs->fs_sbsize, >, SBLOCKSIZE, %jd); FCHK(fs->fs_sbsize, <, (signed)sizeof(struct fs), %jd); FCHK(fs->fs_sbsize % sectorsize, !=, 0, %jd); FCHK(fs->fs_fpg, <, 3 * fs->fs_frag, %jd); FCHK(fs->fs_ncg, <, 1, %jd); FCHK(fs->fs_fsbtodb, !=, ILOG2(fs->fs_fsize / sectorsize), %jd); FCHK(fs->fs_old_cgoffset, <, 0, %jd); FCHK2(fs->fs_old_cgoffset, >, 0, ~fs->fs_old_cgmask, <, 0, %jd); FCHK(fs->fs_old_cgoffset * (~fs->fs_old_cgmask), >, fs->fs_fpg, %jd); FCHK(fs->fs_sblkno, !=, roundup( howmany(fs->fs_sblockloc + SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); return (error); } if (fs->fs_magic == FS_UFS2_MAGIC) { if ((flags & UFS_ALTSBLK) == 0) FCHK2(fs->fs_sblockactualloc, !=, SBLOCK_UFS2, fs->fs_sblockactualloc, !=, 0, %jd); FCHK(fs->fs_sblockloc, !=, SBLOCK_UFS2, %#jx); FCHK(fs->fs_maxsymlinklen, !=, ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)), %jd); FCHK(fs->fs_nindir, !=, fs->fs_bsize / sizeof(ufs2_daddr_t), %jd); FCHK(fs->fs_inopb, !=, fs->fs_bsize / sizeof(struct ufs2_dinode), %jd); } else if (fs->fs_magic == FS_UFS1_MAGIC) { if ((flags & UFS_ALTSBLK) == 0) FCHK(fs->fs_sblockactualloc, >, SBLOCK_UFS1, %jd); FCHK(fs->fs_sblockloc, <, 0, %jd); FCHK(fs->fs_sblockloc, >, SBLOCK_UFS1, %jd); FCHK(fs->fs_nindir, !=, fs->fs_bsize / sizeof(ufs1_daddr_t), %jd); FCHK(fs->fs_inopb, !=, fs->fs_bsize / sizeof(struct ufs1_dinode), %jd); FCHK(fs->fs_maxsymlinklen, !=, ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)), %jd); WCHK(fs->fs_old_inodefmt, !=, FS_44INODEFMT, %jd); WCHK(fs->fs_old_rotdelay, !=, 0, %jd); WCHK(fs->fs_old_rps, !=, 60, %jd); WCHK(fs->fs_old_nspf, !=, fs->fs_fsize / sectorsize, %jd); FCHK(fs->fs_old_cpg, !=, 1, %jd); WCHK(fs->fs_old_interleave, !=, 1, %jd); WCHK(fs->fs_old_trackskew, !=, 0, %jd); WCHK(fs->fs_old_cpc, !=, 0, %jd); WCHK(fs->fs_old_postblformat, !=, 1, %jd); FCHK(fs->fs_old_nrpos, !=, 1, %jd); WCHK(fs->fs_old_spc, !=, fs->fs_fpg * fs->fs_old_nspf, %jd); WCHK(fs->fs_old_nsect, !=, fs->fs_old_spc, %jd); WCHK(fs->fs_old_npsect, !=, fs->fs_old_spc, %jd); FCHK(fs->fs_old_ncyl, !=, fs->fs_ncg, %jd); } else { /* Bad magic number, so assume not a superblock */ return (ENOENT); } FCHK(fs->fs_bsize, <, MINBSIZE, %jd); FCHK(fs->fs_bsize, >, MAXBSIZE, %jd); FCHK(fs->fs_bsize, <, roundup(sizeof(struct fs), DEV_BSIZE), %jd); FCHK(powerof2(fs->fs_bsize), ==, 0, %jd); FCHK(fs->fs_frag, <, 1, %jd); FCHK(fs->fs_frag, >, MAXFRAG, %jd); FCHK(fs->fs_frag, !=, numfrags(fs, fs->fs_bsize), %jd); FCHK(fs->fs_fsize, <, sectorsize, %jd); FCHK(fs->fs_fsize * fs->fs_frag, !=, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_fsize), ==, 0, %jd); FCHK(fs->fs_fpg, <, 3 * fs->fs_frag, %jd); FCHK(fs->fs_ncg, <, 1, %jd); FCHK(fs->fs_ipg, <, fs->fs_inopb, %jd); FCHK((u_int64_t)fs->fs_ipg * fs->fs_ncg, >, (((int64_t)(1)) << 32) - INOPB(fs), %jd); FCHK(fs->fs_cstotal.cs_nifree, <, 0, %jd); FCHK(fs->fs_cstotal.cs_nifree, >, (u_int64_t)fs->fs_ipg * fs->fs_ncg, %jd); FCHK(fs->fs_cstotal.cs_ndir, <, 0, %jd); FCHK(fs->fs_cstotal.cs_ndir, >, ((u_int64_t)fs->fs_ipg * fs->fs_ncg) - fs->fs_cstotal.cs_nifree, %jd); FCHK(fs->fs_sbsize, >, SBLOCKSIZE, %jd); FCHK(fs->fs_sbsize, <, (signed)sizeof(struct fs), %jd); FCHK(fs->fs_maxbsize, <, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_maxbsize), ==, 0, %jd); FCHK(fs->fs_maxbsize, >, FS_MAXCONTIG * fs->fs_bsize, %jd); FCHK(fs->fs_bmask, !=, ~(fs->fs_bsize - 1), %#jx); FCHK(fs->fs_fmask, !=, ~(fs->fs_fsize - 1), %#jx); FCHK(fs->fs_qbmask, !=, ~fs->fs_bmask, %#jx); FCHK(fs->fs_qfmask, !=, ~fs->fs_fmask, %#jx); FCHK(fs->fs_bshift, !=, ILOG2(fs->fs_bsize), %jd); FCHK(fs->fs_fshift, !=, ILOG2(fs->fs_fsize), %jd); FCHK(fs->fs_fragshift, !=, ILOG2(fs->fs_frag), %jd); FCHK(fs->fs_fsbtodb, !=, ILOG2(fs->fs_fsize / sectorsize), %jd); FCHK(fs->fs_old_cgoffset, <, 0, %jd); FCHK2(fs->fs_old_cgoffset, >, 0, ~fs->fs_old_cgmask, <, 0, %jd); FCHK(fs->fs_old_cgoffset * (~fs->fs_old_cgmask), >, fs->fs_fpg, %jd); /* * If anything has failed up to this point, it is usafe to proceed * as checks below may divide by zero or make other fatal calculations. * So if we have any errors at this point, give up. */ if (error) return (error); FCHK(fs->fs_sbsize % sectorsize, !=, 0, %jd); FCHK(fs->fs_ipg % fs->fs_inopb, !=, 0, %jd); FCHK(fs->fs_sblkno, !=, roundup( howmany(fs->fs_sblockloc + SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); FCHK(fs->fs_cblkno, !=, fs->fs_sblkno + roundup(howmany(SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); FCHK(fs->fs_iblkno, !=, fs->fs_cblkno + fs->fs_frag, %jd); FCHK(fs->fs_dblkno, !=, fs->fs_iblkno + fs->fs_ipg / INOPF(fs), %jd); FCHK(fs->fs_cgsize, >, fs->fs_bsize, %jd); FCHK(fs->fs_cgsize, <, fs->fs_fsize, %jd); FCHK(fs->fs_cgsize % fs->fs_fsize, !=, 0, %jd); /* * This test is valid, however older versions of growfs failed * to correctly update fs_dsize so will fail this test. Thus we * exclude it from the requirements. */ #ifdef notdef WCHK(fs->fs_dsize, !=, fs->fs_size - fs->fs_sblkno - fs->fs_ncg * (fs->fs_dblkno - fs->fs_sblkno) - howmany(fs->fs_cssize, fs->fs_fsize), %jd); #endif WCHK(fs->fs_metaspace, <, 0, %jd); WCHK(fs->fs_metaspace, >, fs->fs_fpg / 2, %jd); WCHK(fs->fs_minfree, >, 99, %jd%%); maxfilesize = fs->fs_bsize * UFS_NDADDR - 1; for (sizepb = fs->fs_bsize, i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(fs); maxfilesize += sizepb; } WCHK(fs->fs_maxfilesize, !=, maxfilesize, %jd); /* * These values have a tight interaction with each other that * makes it hard to tightly bound them. So we can only check * that they are within a broader possible range. * * The size cannot always be accurately determined, but ensure * that it is consistent with the number of cylinder groups (fs_ncg) * and the number of fragments per cylinder group (fs_fpg). Ensure * that the summary information size is correct and that it starts * and ends in the data area of the same cylinder group. */ FCHK(fs->fs_size, <, 8 * fs->fs_frag, %jd); FCHK(fs->fs_size, <=, ((int64_t)fs->fs_ncg - 1) * fs->fs_fpg, %jd); FCHK(fs->fs_size, >, (int64_t)fs->fs_ncg * fs->fs_fpg, %jd); /* * If we are not requested to read in the csum data stop here * as the correctness of the remaining values is only important * to bound the space needed to be allocated to hold the csum data. */ if ((flags & UFS_NOCSUM) != 0) return (error); FCHK(fs->fs_csaddr, <, 0, %jd); FCHK(fs->fs_cssize, !=, fragroundup(fs, fs->fs_ncg * sizeof(struct csum)), %jd); FCHK(dtog(fs, fs->fs_csaddr), >, fs->fs_ncg, %jd); FCHK(fs->fs_csaddr, <, cgdmin(fs, dtog(fs, fs->fs_csaddr)), %jd); FCHK(dtog(fs, fs->fs_csaddr + howmany(fs->fs_cssize, fs->fs_fsize)), >, dtog(fs, fs->fs_csaddr), %jd); /* * With file system clustering it is possible to allocate * many contiguous blocks. The kernel variable maxphys defines * the maximum transfer size permitted by the controller and/or * buffering. The fs_maxcontig parameter controls the maximum * number of blocks that the filesystem will read or write * in a single transfer. It is calculated when the filesystem * is created as maxphys / fs_bsize. The loader uses a maxphys * of 128K even when running on a system that supports larger * values. If the filesystem was built on a system that supports * a larger maxphys (1M is typical) it will have configured * fs_maxcontig for that larger system. So we bound the upper * allowable limit for fs_maxconfig to be able to at least * work with a 1M maxphys on the smallest block size filesystem: * 1M / 4096 == 256. There is no harm in allowing the mounting of * filesystems that make larger than maxphys I/O requests because * those (mostly 32-bit machines) can (very slowly) handle I/O * requests that exceed maxphys. */ WCHK(fs->fs_maxcontig, <, 0, %jd); WCHK(fs->fs_maxcontig, >, MAX(256, maxphys / fs->fs_bsize), %jd); FCHK2(fs->fs_maxcontig, ==, 0, fs->fs_contigsumsize, !=, 0, %jd); FCHK2(fs->fs_maxcontig, >, 1, fs->fs_contigsumsize, !=, MIN(fs->fs_maxcontig, FS_MAXCONTIG), %jd); return (error); } /* * Make an extensive search to find a superblock. If the superblock * in the standard place cannot be used, try looking for one of the * backup superblocks. * * Flags are made up of the following or'ed together options: * * UFS_NOMSG indicates that superblock inconsistency error messages * should not be printed. * * UFS_NOCSUM causes only the superblock itself to be returned, but does * not read in any auxillary data structures like the cylinder group * summary information. */ int ffs_sbsearch(void *devfd, struct fs **fsp, int reqflags, struct malloc_type *filltype, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fsrecovery *fsr; struct fs *protofs; void *fsrbuf; char *cp; long nocsum, flags, msg, cg; off_t sblk, secsize; int error; msg = (reqflags & UFS_NOMSG) == 0; nocsum = reqflags & UFS_NOCSUM; /* * Try normal superblock read and return it if it works. * * Suppress messages if it fails until we find out if * failure can be avoided. */ flags = UFS_NOMSG | nocsum; - if (ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc) == 0) - return (0); + error = ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc); + /* + * If successful or endian error, no need to try further. + */ + if (error == 0 || error == EILSEQ) { + if (msg && error == EILSEQ) + printf("UFS superblock failed due to endian mismatch " + "between machine and filesystem\n"); + return (error); + } /* * First try: ignoring hash failures. */ flags |= UFS_NOHASHFAIL; if (msg) flags &= ~UFS_NOMSG; if (ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc) == 0) return (0); /* * Next up is to check if fields of the superblock that are * needed to find backup superblocks are usable. */ if (msg) printf("Attempted recovery for standard superblock: failed\n"); flags = UFS_FSRONLY | UFS_NOHASHFAIL | UFS_NOMSG; if (ffs_sbget(devfd, &protofs, UFS_STDSB, flags, filltype, readfunc) == 0) { if (msg) printf("Attempt extraction of recovery data from " "standard superblock.\n"); } else { /* * Final desperation is to see if alternate superblock * parameters have been saved in the boot area. */ if (msg) printf("Attempted extraction of recovery data from " "standard superblock: failed\nAttempt to find " "boot zone recovery data.\n"); /* * Look to see if recovery information has been saved. * If so we can generate a prototype superblock based * on that information. * * We need fragments-per-group, number of cylinder groups, * location of the superblock within the cylinder group, and * the conversion from filesystem fragments to disk blocks. * * When building a UFS2 filesystem, newfs(8) stores these * details at the end of the boot block area at the start * of the filesystem partition. If they have been overwritten * by a boot block, we fail. But usually they are there * and we can use them. * * We could ask the underlying device for its sector size, * but some devices lie. So we just try a plausible range. */ error = ENOENT; + fsrbuf = NULL; for (secsize = dbtob(1); secsize <= SBLOCKSIZE; secsize *= 2) if ((error = (*readfunc)(devfd, (SBLOCK_UFS2 - secsize), &fsrbuf, secsize)) == 0) break; if (error != 0) goto trynowarn; cp = fsrbuf; /* type change to keep compiler happy */ fsr = (struct fsrecovery *)&cp[secsize - sizeof *fsr]; if (fsr->fsr_magic != FS_UFS2_MAGIC || (protofs = UFS_MALLOC(SBLOCKSIZE, filltype, M_NOWAIT)) == NULL) { UFS_FREE(fsrbuf, filltype); goto trynowarn; } memset(protofs, 0, sizeof(struct fs)); protofs->fs_fpg = fsr->fsr_fpg; protofs->fs_fsbtodb = fsr->fsr_fsbtodb; protofs->fs_sblkno = fsr->fsr_sblkno; protofs->fs_magic = fsr->fsr_magic; protofs->fs_ncg = fsr->fsr_ncg; UFS_FREE(fsrbuf, filltype); } /* * Scan looking for alternative superblocks. */ flags = nocsum; if (!msg) flags |= UFS_NOMSG; for (cg = 0; cg < protofs->fs_ncg; cg++) { sblk = fsbtodb(protofs, cgsblock(protofs, cg)); if (msg) printf("Try cg %ld at sblock loc %jd\n", cg, (intmax_t)sblk); if (ffs_sbget(devfd, fsp, dbtob(sblk), flags, filltype, readfunc) == 0) { if (msg) printf("Succeeded with alternate superblock " "at %jd\n", (intmax_t)sblk); UFS_FREE(protofs, filltype); return (0); } } UFS_FREE(protofs, filltype); /* * Our alternate superblock strategies failed. Our last ditch effort * is to see if the standard superblock has only non-critical errors. */ trynowarn: flags = UFS_NOWARNFAIL | UFS_NOMSG | nocsum; if (msg) { printf("Finding an alternate superblock failed.\nCheck for " "only non-critical errors in standard superblock\n"); flags &= ~UFS_NOMSG; } if (ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc) != 0) { if (msg) printf("Failed, superblock has critical errors\n"); return (ENOENT); } if (msg) printf("Success, using standard superblock with " "non-critical errors.\n"); return (0); } /* * Write a superblock to the devfd device from the memory pointed to by fs. * Write out the superblock summary information if it is present. * * If the write is successful, zero is returned. Otherwise one of the * following error values is returned: * EIO: failed to write superblock. * EIO: failed to write superblock summary information. */ int ffs_sbput(void *devfd, struct fs *fs, off_t loc, int (*writefunc)(void *devfd, off_t loc, void *buf, int size)) { int i, error, blks, size; uint8_t *space; /* * If there is summary information, write it first, so if there * is an error, the superblock will not be marked as clean. */ if (fs->fs_si != NULL && fs->fs_csp != NULL) { blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (uint8_t *)fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; if ((error = (*writefunc)(devfd, dbtob(fsbtodb(fs, fs->fs_csaddr + i)), space, size)) != 0) return (error); space += size; } } fs->fs_fmod = 0; #ifndef _KERNEL { struct fs_summary_info *fs_si; fs->fs_time = time(NULL); /* Clear the pointers for the duration of writing. */ fs_si = fs->fs_si; fs->fs_si = NULL; fs->fs_ckhash = ffs_calc_sbhash(fs); error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize); fs->fs_si = fs_si; } #else /* _KERNEL */ fs->fs_time = time_second; fs->fs_ckhash = ffs_calc_sbhash(fs); error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize); #endif /* _KERNEL */ return (error); } /* * Calculate the check-hash for a superblock. */ uint32_t ffs_calc_sbhash(struct fs *fs) { uint32_t ckhash, save_ckhash; /* * A filesystem that was using a superblock ckhash may be moved * to an older kernel that does not support ckhashes. The * older kernel will clear the FS_METACKHASH flag indicating * that it does not update hashes. When the disk is moved back * to a kernel capable of ckhashes it disables them on mount: * * if ((fs->fs_flags & FS_METACKHASH) == 0) * fs->fs_metackhash = 0; * * This leaves (fs->fs_metackhash & CK_SUPERBLOCK) == 0) with an * old stale value in the fs->fs_ckhash field. Thus the need to * just accept what is there. */ if ((fs->fs_metackhash & CK_SUPERBLOCK) == 0) return (fs->fs_ckhash); save_ckhash = fs->fs_ckhash; fs->fs_ckhash = 0; /* * If newly read from disk, the caller is responsible for * verifying that fs->fs_sbsize <= SBLOCKSIZE. */ ckhash = calculate_crc32c(~0L, (void *)fs, fs->fs_sbsize); fs->fs_ckhash = save_ckhash; return (ckhash); } /* * Update the frsum fields to reflect addition or deletion * of some frags. */ void ffs_fragacct(struct fs *fs, int fragmap, int32_t fraglist[], int cnt) { int inblk; int field, subfield; int siz, pos; inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; fragmap <<= 1; for (siz = 1; siz < fs->fs_frag; siz++) { if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) continue; field = around[siz]; subfield = inside[siz]; for (pos = siz; pos <= fs->fs_frag; pos++) { if ((fragmap & field) == subfield) { fraglist[siz] += cnt; pos += siz; field <<= siz; subfield <<= siz; } field <<= 1; subfield <<= 1; } } } /* * block operations * * check if a block is available */ int ffs_isblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { unsigned char mask; switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: #ifdef _KERNEL panic("ffs_isblock"); #endif break; } return (0); } /* * check if a block is free */ int ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0); case 4: return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); case 2: return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); case 1: return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); default: #ifdef _KERNEL panic("ffs_isfreeblock"); #endif break; } return (0); } /* * take a block out of the map */ void ffs_clrblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_clrblock"); #endif break; } } /* * put a block into the map */ void ffs_setblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_setblock"); #endif break; } } /* * Update the cluster map because of an allocation or free. * * Cnt == 1 means free; cnt == -1 means allocating. */ void ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs1_daddr_t blkno, int cnt) { int32_t *sump; int32_t *lp; u_char *freemapp, *mapp; int i, start, end, forw, back, map; u_int bit; if (fs->fs_contigsumsize <= 0) return; freemapp = cg_clustersfree(cgp); sump = cg_clustersum(cgp); /* * Allocate or clear the actual block. */ if (cnt > 0) setbit(freemapp, blkno); else clrbit(freemapp, blkno); /* * Find the size of the cluster going forward. */ start = blkno + 1; end = start + fs->fs_contigsumsize; if (end >= cgp->cg_nclusterblks) end = cgp->cg_nclusterblks; mapp = &freemapp[start / NBBY]; map = *mapp++; bit = 1U << (start % NBBY); for (i = start; i < end; i++) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } forw = i - start; /* * Find the size of the cluster going backward. */ start = blkno - 1; end = start - fs->fs_contigsumsize; if (end < 0) end = -1; mapp = &freemapp[start / NBBY]; map = *mapp--; bit = 1U << (start % NBBY); for (i = start; i > end; i--) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != 0) { bit >>= 1; } else { map = *mapp--; bit = 1U << (NBBY - 1); } } back = start - i; /* * Account for old cluster and the possibly new forward and * back clusters. */ i = back + forw + 1; if (i > fs->fs_contigsumsize) i = fs->fs_contigsumsize; sump[i] += cnt; if (back > 0) sump[back] -= cnt; if (forw > 0) sump[forw] -= cnt; /* * Update cluster summary information. */ lp = &sump[fs->fs_contigsumsize]; for (i = fs->fs_contigsumsize; i > 0; i--) if (*lp-- > 0) break; fs->fs_maxcluster[cgp->cg_cgx] = i; }