diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 8e547b07a3c8..06992838ecd3 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -1,1606 +1,1620 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1999 Michael Smith * All rights reserved. * Copyright (c) 1999 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_rootdevname.h" #include "opt_ddb.h" #include "opt_mac.h" #ifdef DDB #include #endif #define ROOTNAME "root_device" #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) static void checkdirs(struct vnode *olddp, struct vnode *newdp); static void gets(char *cp); static int vfs_domount(struct thread *td, const char *fstype, char *fspath, int fsflags, void *fsdata, int compat); static int vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp, const char *fspath, struct thread *td, struct mount **mpp); static int vfs_mountroot_ask(void); static int vfs_mountroot_try(const char *mountfrom); static int vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions); static int usermount = 0; SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "Unprivileged users may mount and unmount file systems"); +static int mount_root_delay = 5; +TUNABLE_INT("mount_root_delay", &mount_root_delay); MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); /* List of mounted filesystems. */ struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* For any iteration/modification of mountlist */ struct mtx mountlist_mtx; /* * The vnode of the system's root (/ in the filesystem, without chroot * active.) */ struct vnode *rootvnode; /* * The root filesystem is detailed in the kernel environment variable * vfs.root.mountfrom, which is expected to be in the general format * * :[] * vfsname := the name of a VFS known to the kernel and capable * of being mounted as root * path := disk device name or other data used by the filesystem * to locate its physical store */ /* * The root specifiers we will try if RB_CDROM is specified. */ static char *cdrom_rootdevnames[] = { "cd9660:cd0", "cd9660:acd0", NULL }; /* legacy find-root code */ char *rootdevnames[2] = {NULL, NULL}; struct cdev *rootdev = NULL; #ifdef ROOTDEVNAME const char *ctrootdevname = ROOTDEVNAME; #else const char *ctrootdevname = NULL; #endif /* * Has to be dynamic as the value of rootdev can change; however, it can't * change after the root is mounted, so a user process can't access this * sysctl until after the value is unchangeable. */ static int sysctl_rootdev(SYSCTL_HANDLER_ARGS) { int error; /* _RD prevents this from happening. */ KASSERT(req->newptr == NULL, ("Attempt to change root device name")); if (rootdev != NULL) error = sysctl_handle_string(oidp, rootdev->si_name, 0, req); else error = sysctl_handle_string(oidp, "", 0, req); return (error); } SYSCTL_PROC(_kern, OID_AUTO, rootdev, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_rootdev, "A", "Root file system device"); /* Remove one mount option. */ static void vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) { TAILQ_REMOVE(opts, opt, link); free(opt->name, M_MOUNT); if (opt->value != NULL) free(opt->value, M_MOUNT); #ifdef INVARIANTS else if (opt->len != 0) panic("%s: mount option with NULL value but length != 0", __func__); #endif free(opt, M_MOUNT); } /* Release all resources related to the mount options. */ static void vfs_freeopts(struct vfsoptlist *opts) { struct vfsopt *opt; while (!TAILQ_EMPTY(opts)) { opt = TAILQ_FIRST(opts); vfs_freeopt(opts, opt); } free(opts, M_MOUNT); } /* * Check if options are equal (with or without the "no" prefix). */ static int vfs_equalopts(const char *opt1, const char *opt2) { /* "opt" vs. "opt" or "noopt" vs. "noopt" */ if (strcmp(opt1, opt2) == 0) return (1); /* "noopt" vs. "opt" */ if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) return (1); /* "opt" vs. "noopt" */ if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) return (1); return (0); } /* * If a mount option is specified several times, * (with or without the "no" prefix) only keep * the last occurence of it. */ static void vfs_sanitizeopts(struct vfsoptlist *opts) { struct vfsopt *opt, *opt2, *tmp; TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { opt2 = TAILQ_PREV(opt, vfsoptlist, link); while (opt2 != NULL) { if (vfs_equalopts(opt->name, opt2->name)) { tmp = TAILQ_PREV(opt2, vfsoptlist, link); vfs_freeopt(opts, opt2); opt2 = tmp; } else { opt2 = TAILQ_PREV(opt2, vfsoptlist, link); } } } } /* * Build a linked list of mount options from a struct uio. */ static int vfs_buildopts(struct uio *auio, struct vfsoptlist **options) { struct vfsoptlist *opts; struct vfsopt *opt; size_t memused; unsigned int i, iovcnt; int error, namelen, optlen; opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); TAILQ_INIT(opts); memused = 0; iovcnt = auio->uio_iovcnt; for (i = 0; i < iovcnt; i += 2) { opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); namelen = auio->uio_iov[i].iov_len; optlen = auio->uio_iov[i + 1].iov_len; opt->name = malloc(namelen, M_MOUNT, M_WAITOK); opt->value = NULL; opt->len = 0; /* * Do this early, so jumps to "bad" will free the current * option. */ TAILQ_INSERT_TAIL(opts, opt, link); memused += sizeof(struct vfsopt) + optlen + namelen; /* * Avoid consuming too much memory, and attempts to overflow * memused. */ if (memused > VFS_MOUNTARG_SIZE_MAX || optlen > VFS_MOUNTARG_SIZE_MAX || namelen > VFS_MOUNTARG_SIZE_MAX) { error = EINVAL; goto bad; } if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); } else { error = copyin(auio->uio_iov[i].iov_base, opt->name, namelen); if (error) goto bad; } /* Ensure names are null-terminated strings. */ if (opt->name[namelen - 1] != '\0') { error = EINVAL; goto bad; } if (optlen != 0) { opt->len = optlen; opt->value = malloc(optlen, M_MOUNT, M_WAITOK); if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i + 1].iov_base, opt->value, optlen); } else { error = copyin(auio->uio_iov[i + 1].iov_base, opt->value, optlen); if (error) goto bad; } } } vfs_sanitizeopts(opts); *options = opts; return (0); bad: vfs_freeopts(opts); return (error); } /* * Merge the old mount options with the new ones passed * in the MNT_UPDATE case. */ static void vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) { struct vfsopt *opt, *opt2, *new; TAILQ_FOREACH(opt, opts, link) { /* * Check that this option hasn't been redefined * nor cancelled with a "no" mount option. */ opt2 = TAILQ_FIRST(toopts); while (opt2 != NULL) { if (strcmp(opt2->name, opt->name) == 0) goto next; if (strncmp(opt2->name, "no", 2) == 0 && strcmp(opt2->name + 2, opt->name) == 0) { vfs_freeopt(toopts, opt2); goto next; } opt2 = TAILQ_NEXT(opt2, link); } /* We want this option, duplicate it. */ new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); strcpy(new->name, opt->name); if (opt->len != 0) { new->value = malloc(opt->len, M_MOUNT, M_WAITOK); bcopy(opt->value, new->value, opt->len); } else { new->value = NULL; } new->len = opt->len; TAILQ_INSERT_TAIL(toopts, new, link); next: continue; } } /* * New mount API. */ int nmount(td, uap) struct thread *td; struct nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap; { struct uio *auio; struct iovec *iov; unsigned int i; int error; u_int iovcnt; iovcnt = uap->iovcnt; /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4)) return (EINVAL); error = copyinuio(uap->iovp, iovcnt, &auio); if (error) return (error); iov = auio->uio_iov; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > MMAXOPTIONLEN) { free(auio, M_IOV); return (EINVAL); } iov++; } error = vfs_donmount(td, uap->flags, auio); free(auio, M_IOV); return (error); } int kernel_mount(struct iovec *iovp, u_int iovcnt, int flags) { struct uio auio; int error; /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4)) return (EINVAL); auio.uio_iov = iovp; auio.uio_iovcnt = iovcnt; auio.uio_segflg = UIO_SYSSPACE; error = vfs_donmount(curthread, flags, &auio); return (error); } int kernel_vmount(int flags, ...) { struct iovec *iovp; struct uio auio; va_list ap; u_int iovcnt, iovlen, len; const char *cp; char *buf, *pos; size_t n; int error, i; len = 0; va_start(ap, flags); for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++) len += strlen(cp) + 1; va_end(ap); if (iovcnt < 4 || iovcnt & 1) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK); MALLOC(buf, char *, len, M_MOUNT, M_WAITOK); pos = buf; va_start(ap, flags); for (i = 0; i < iovcnt; i++) { cp = va_arg(ap, const char *); copystr(cp, pos, len - (pos - buf), &n); iovp[i].iov_base = pos; iovp[i].iov_len = n; pos += n; } va_end(ap); auio.uio_iov = iovp; auio.uio_iovcnt = iovcnt; auio.uio_segflg = UIO_SYSSPACE; error = vfs_donmount(curthread, flags, &auio); FREE(iovp, M_MOUNT); FREE(buf, M_MOUNT); return (error); } /* * Allocate and initialize the mount point struct. */ static int vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, struct thread *td, struct mount **mpp) { struct mount *mp; mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); vfs_busy(mp, LK_NOWAIT, 0, td); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_vnodecovered = vp; mp->mnt_cred = crdup(td->td_ucred); mp->mnt_stat.f_owner = td->td_ucred->cr_uid; strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); mp->mnt_iosize_max = DFLTPHYS; #ifdef MAC mac_init_mount(mp); mac_create_mount(td->td_ucred, mp); #endif *mpp = mp; return (0); } /* * Destroy the mount struct previously allocated by vfs_mount_alloc(). */ void vfs_mount_destroy(struct mount *mp, struct thread *td) { mp->mnt_vfc->vfc_refcount--; if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) panic("unmount: dangling vnode"); vfs_unbusy(mp,td); lockdestroy(&mp->mnt_lock); mtx_destroy(&mp->mnt_mtx); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup(mp); #ifdef MAC mac_destroy_mount(mp); #endif if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); crfree(mp->mnt_cred); free(mp, M_MOUNT); } static int vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) { struct vfsoptlist *optlist; char *fstype, *fspath; int error, fstypelen, fspathlen; error = vfs_buildopts(fsoptions, &optlist); if (error) return (error); /* * We need these two options before the others, * and they are mandatory for any filesystem. * Ensure they are NUL terminated as well. */ fstypelen = 0; error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); if (error || fstype[fstypelen - 1] != '\0') { error = EINVAL; goto bail; } fspathlen = 0; error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); if (error || fspath[fspathlen - 1] != '\0') { error = EINVAL; goto bail; } /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { error = ENAMETOOLONG; goto bail; } error = vfs_domount(td, fstype, fspath, fsflags, optlist, 0); bail: if (error) vfs_freeopts(optlist); return (error); } /* * Old mount API. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(td, uap) struct thread *td; struct mount_args /* { char *type; char *path; int flags; caddr_t data; } */ *uap; { char *fstype; char *fspath; int error; fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK); /* * vfs_mount() actually takes a kernel string for `type' and * `path' now, so extract them. */ error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); if (error == 0) error = copyinstr(uap->path, fspath, MNAMELEN, NULL); if (error == 0) error = vfs_domount(td, fstype, fspath, uap->flags, uap->data, 1); free(fstype, M_TEMP); free(fspath, M_TEMP); return (error); } /* * vfs_mount(): actually attempt a filesystem mount. * * This routine is designed to be a "generic" entry point for routines * that wish to mount a filesystem. All parameters except `fsdata' are * pointers into kernel space. `fsdata' is currently still a pointer * into userspace. */ int vfs_mount(td, fstype, fspath, fsflags, fsdata) struct thread *td; const char *fstype; char *fspath; int fsflags; void *fsdata; { return (vfs_domount(td, fstype, fspath, fsflags, fsdata, 1)); } /* * vfs_domount(): actually attempt a filesystem mount. */ static int vfs_domount( struct thread *td, /* Flags common to all filesystems. */ const char *fstype, /* Filesystem type. */ char *fspath, /* Mount path. */ int fsflags, /* Flags common to all filesystems. */ void *fsdata, /* Options local to the filesystem. */ int compat /* Invocation from compat syscall. */ ) { linker_file_t lf; struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, kern_flag = 0; struct vattr va; struct nameidata nd; /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) return (ENAMETOOLONG); if (jailed(td->td_ucred)) return (EPERM); if (usermount == 0) { if ((error = suser(td)) != 0) return (error); } /* * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. */ if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) { if ((error = suser(td)) != 0) return (error); } /* * Silently enforce MNT_NODEV, MNT_NOSUID and MNT_USER for * unprivileged users. */ if (suser(td) != 0) fsflags |= MNT_NODEV | MNT_NOSUID | MNT_USER; /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (fsflags & MNT_UPDATE) { if ((vp->v_vflag & VV_ROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; kern_flag = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((fsflags & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } /* * Only privileged root, or (if MNT_USER is set) the user that * did the original mount is permitted to update it. */ error = vfs_suser(mp, td); if (error) { vput(vp); return (error); } if (vfs_busy(mp, LK_NOWAIT, 0, td)) { vput(vp); return (EBUSY); } VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { VI_UNLOCK(vp); vfs_unbusy(mp, td); vput(vp); return (EBUSY); } vp->v_iflag |= VI_MOUNT; VI_UNLOCK(vp); mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); VOP_UNLOCK(vp, 0, td); if (compat == 0) { mp->mnt_optnew = fsdata; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); } } else { /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ error = VOP_GETATTR(vp, &va, td->td_ucred, td); if (error) { vput(vp); return (error); } if (va.va_uid != td->td_ucred->cr_uid) { if ((error = suser(td)) != 0) { vput(vp); return (error); } } if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) { vput(vp); return (error); } if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } vfsp = vfs_byname(fstype); if (vfsp == NULL) { /* Only load modules for root (very important!). */ if ((error = suser(td)) != 0) { vput(vp); return (error); } error = securelevel_gt(td->td_ucred, 0); if (error) { vput(vp); return (error); } error = linker_load_module(NULL, fstype, NULL, NULL, &lf); if (error || lf == NULL) { vput(vp); if (lf == NULL) error = ENODEV; return (error); } lf->userrefs++; /* Look up again to see if the VFS was loaded. */ vfsp = vfs_byname(fstype); if (vfsp == NULL) { lf->userrefs--; linker_file_unload(lf, LINKER_UNLOAD_FORCE); vput(vp); return (ENODEV); } } VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { VI_UNLOCK(vp); vput(vp); return (EBUSY); } vp->v_iflag |= VI_MOUNT; VI_UNLOCK(vp); /* * Allocate and initialize the filesystem. */ error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp); if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, td); /* XXXMAC: pass to vfs_mount_alloc? */ if (compat == 0) mp->mnt_optnew = fsdata; } /* * Check if the fs implements the type VFS_[O]MOUNT() * function we are looking for. */ if ((compat == 0) == (mp->mnt_op->vfs_omount != NULL)) { printf("%s doesn't support the %s mount syscall\n", mp->mnt_vfc->vfc_name, compat ? "old" : "new"); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); if (mp->mnt_flag & MNT_UPDATE) vfs_unbusy(mp, td); else vfs_mount_destroy(mp, td); vrele(vp); return (EOPNOTSUPP); } /* * Set the mount level flags. */ if (fsflags & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ MNT_UPDATEMASK; mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ if (compat) error = VFS_OMOUNT(mp, fspath, fsdata, td); else error = VFS_MOUNT(mp, td); if (!error) { if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; } /* * Prevent external consumers of mount options from reading * mnt_optnew. */ mp->mnt_optnew = NULL; if (mp->mnt_flag & MNT_UPDATE) { if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); mp->mnt_kern_flag &= ~MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = kern_flag; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { if (mp->mnt_syncer == NULL) error = vfs_allocate_syncvnode(mp); } else { if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); mp->mnt_syncer = NULL; } vfs_unbusy(mp, td); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vrele(vp); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { struct vnode *newdp; VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vp->v_mountedhere = mp; mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, &newdp, td)) panic("mount: lost mount"); checkdirs(vp, newdp); vput(newdp); VOP_UNLOCK(vp, 0, td); if ((mp->mnt_flag & MNT_RDONLY) == 0) error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, td); if (error || (error = VFS_START(mp, 0, td)) != 0) vrele(vp); } else { VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vfs_mount_destroy(mp, td); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory of `olddp'. If so, replace them with the new * mount point. */ static void checkdirs(olddp, newdp) struct vnode *olddp, *newdp; { struct filedesc *fdp; struct proc *p; int nrele; if (vrefcnt(olddp) == 1) return; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { mtx_lock(&fdesc_mtx); fdp = p->p_fd; if (fdp == NULL) { mtx_unlock(&fdesc_mtx); continue; } nrele = 0; FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { VREF(newdp); fdp->fd_cdir = newdp; nrele++; } if (fdp->fd_rdir == olddp) { VREF(newdp); fdp->fd_rdir = newdp; nrele++; } FILEDESC_UNLOCK(fdp); mtx_unlock(&fdesc_mtx); while (nrele--) vrele(olddp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } } /* * Unmount a filesystem. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(td, uap) struct thread *td; register struct unmount_args /* { char *path; int flags; } */ *uap; { struct mount *mp; char *pathbuf; int error, id0, id1; if (jailed(td->td_ucred)) return (EPERM); if (usermount == 0) { if ((error = suser(td)) != 0) return (error); } pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); if (error) { free(pathbuf, M_TEMP); return (error); } if (uap->flags & MNT_BYFSID) { /* Decode the filesystem ID. */ if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { free(pathbuf, M_TEMP); return (EINVAL); } mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == id0 && mp->mnt_stat.f_fsid.val[1] == id1) break; } mtx_unlock(&mountlist_mtx); } else { mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) break; } mtx_unlock(&mountlist_mtx); } free(pathbuf, M_TEMP); if (mp == NULL) { /* * Previously we returned ENOENT for a nonexistent path and * EINVAL for a non-mountpoint. We cannot tell these apart * now, so in the !MNT_BYFSID case return the more likely * EINVAL for compatibility. */ return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); } /* * Only privileged root, or (if MNT_USER is set) the user that did the * original mount is permitted to unmount this filesystem. */ error = vfs_suser(mp, td); if (error) return (error); /* * Don't allow unmounting the root filesystem. */ if (mp->mnt_flag & MNT_ROOTFS) return (EINVAL); return (dounmount(mp, uap->flags, td)); } /* * Do the actual filesystem unmount. */ int dounmount(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct vnode *coveredvp, *fsrootvp; int error; int async_flag; mtx_lock(&mountlist_mtx); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { mtx_unlock(&mountlist_mtx); return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; /* Allow filesystems to detect that a forced unmount is in progress. */ if (flags & MNT_FORCE) mp->mnt_kern_flag |= MNTK_UNMOUNTF; error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td); if (error) { mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup(mp); return (error); } vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); async_flag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); /* * For forced unmounts, move process cdir/rdir refs on the fs root * vnode to the covered vnode. For non-forced unmounts we want * such references to cause an EBUSY error. */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) { if (mp->mnt_vnodecovered != NULL) checkdirs(fsrootvp, mp->mnt_vnodecovered); if (fsrootvp == rootvnode) { vrele(rootvnode); rootvnode = NULL; } vput(fsrootvp); } if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) || (flags & MNT_FORCE)) { error = VFS_UNMOUNT(mp, flags, td); } vn_finished_write(mp); if (error) { /* Undo cdir/rdir and rootvnode changes made above. */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) { if (mp->mnt_vnodecovered != NULL) checkdirs(mp->mnt_vnodecovered, fsrootvp); if (rootvnode == NULL) { rootvnode = fsrootvp; vref(rootvnode); } vput(fsrootvp); } if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) (void) vfs_allocate_syncvnode(mp); mtx_lock(&mountlist_mtx); mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); mp->mnt_flag |= async_flag; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup(mp); return (error); } mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULL) coveredvp->v_mountedhere = NULL; mtx_unlock(&mountlist_mtx); vfs_event_signal(NULL, VQ_UNMOUNT, 0); vfs_mount_destroy(mp, td); if (coveredvp != NULL) vrele(coveredvp); return (0); } /* * Lookup a filesystem type, and if found allocate and initialize * a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(fstypename, devname, mpp) char *fstypename; char *devname; struct mount **mpp; { struct thread *td = curthread; /* XXX */ struct vfsconf *vfsp; struct mount *mp; int error; if (fstypename == NULL) return (ENODEV); vfsp = vfs_byname(fstypename); if (vfsp == NULL) return (ENODEV); error = vfs_mount_alloc(NULLVP, vfsp, "/", td, &mp); if (error) return (error); mp->mnt_flag |= MNT_RDONLY | MNT_ROOTFS; strlcpy(mp->mnt_stat.f_mntfromname, devname, MNAMELEN); *mpp = mp; return (0); } /* * Find and mount the root filesystem */ void vfs_mountroot(void) { char *cp; - int error, i, asked = 0; - + int asked, error, i, nrootdevs; + + asked = 0; + error = EDOOFUS; + nrootdevs = sizeof(rootdevnames) / sizeof(rootdevnames[0]); + if (mount_root_delay <= 0) + mount_root_delay = 1; + for (; mount_root_delay > 0; mount_root_delay--) { + /* + * Wait for GEOM to settle down + */ + g_waitidle(); - /* - * Wait for GEOM to settle down - */ - g_waitidle(); + /* + * We are booted with instructions to prompt for the root filesystem. + */ + if (boothowto & RB_ASKNAME) { + if (!vfs_mountroot_ask()) + return; + asked = 1; + } - /* - * We are booted with instructions to prompt for the root filesystem. - */ - if (boothowto & RB_ASKNAME) { - if (!vfs_mountroot_ask()) - return; - asked = 1; - } + /* + * The root filesystem information is compiled in, and we are + * booted with instructions to use it. + */ + if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { + if ((error = vfs_mountroot_try(ctrootdevname)) == 0) + return; + ctrootdevname = NULL; + } - /* - * The root filesystem information is compiled in, and we are - * booted with instructions to use it. - */ - if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { - if (!vfs_mountroot_try(ctrootdevname)) - return; - ctrootdevname = NULL; - } + /* + * We've been given the generic "use CDROM as root" flag. This is + * necessary because one media may be used in many different + * devices, so we need to search for them. + */ + if (boothowto & RB_CDROM) { + for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { + error = vfs_mountroot_try(cdrom_rootdevnames[i]); + if (error == 0) + return; + } + } - /* - * We've been given the generic "use CDROM as root" flag. This is - * necessary because one media may be used in many different - * devices, so we need to search for them. - */ - if (boothowto & RB_CDROM) { - for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { - if (!vfs_mountroot_try(cdrom_rootdevnames[i])) + /* + * Try to use the value read by the loader from /etc/fstab, or + * supplied via some other means. This is the preferred + * mechanism. + */ + cp = getenv("vfs.root.mountfrom"); + if (cp != NULL) { + error = vfs_mountroot_try(cp); + freeenv(cp); + if (error == 0) return; } - } - - /* - * Try to use the value read by the loader from /etc/fstab, or - * supplied via some other means. This is the preferred - * mechanism. - */ - cp = getenv("vfs.root.mountfrom"); - if (cp != NULL) { - error = vfs_mountroot_try(cp); - freeenv(cp); - if (!error) - return; - } - /* - * Try values that may have been computed by code during boot - */ - if (!vfs_mountroot_try(rootdevnames[0])) - return; - if (!vfs_mountroot_try(rootdevnames[1])) - return; + /* + * Try values that may have been computed by code during boot + */ + for (i = 0; i < nrootdevs; i++) { + if ((error = vfs_mountroot_try(rootdevnames[i])) == 0) + return; + } - /* - * If we (still) have a compiled-in default, try it. - */ - if (ctrootdevname != NULL) - if (!vfs_mountroot_try(ctrootdevname)) + /* + * If we (still) have a compiled-in default, try it. + */ + if ((error = vfs_mountroot_try(ctrootdevname)) == 0) return; + tsleep(&mount_root_delay, PRIBIO, "mroot", hz); + } + printf("Root mount failed: %d.\n", error); /* * Everything so far has failed, prompt on the console if we haven't * already tried that. */ if (!asked) if (!vfs_mountroot_ask()) return; panic("Root mount failed, startup aborted."); } /* * Mount (mountfrom) as the root filesystem. */ static int vfs_mountroot_try(const char *mountfrom) { struct mount *mp; char *vfsname, *path; const char *devname; int error; char patt[32]; - int s; vfsname = NULL; path = NULL; mp = NULL; error = EINVAL; if (mountfrom == NULL) return (error); /* don't complain */ - s = splcam(); /* Overkill, but annoying without it */ - printf("Mounting root from %s\n", mountfrom); - splx(s); + if (bootverbose) { + int s; + + s = splcam(); /* Overkill, but annoying without it */ + printf("Trying to mount root from %s\n", mountfrom); + splx(s); + } /* parse vfs name and path */ vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); vfsname[0] = path[0] = 0; sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); if (sscanf(mountfrom, patt, vfsname, path) < 1) goto done; /* allocate a root mount */ error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME, &mp); if (error != 0) { printf("Can't allocate root mount for filesystem '%s': %d\n", vfsname, error); goto done; } /* * do our best to set rootdev * XXX: This does not belong here! */ if (path[0] != '\0') { struct cdev *diskdev; diskdev = getdiskbyname(path); if (diskdev != NULL) rootdev = diskdev; - else - printf("setrootbyname failed\n"); } /* If the root device is a type "memory disk", mount RW */ if (rootdev != NULL && devsw(rootdev) != NULL) { devname = devtoname(rootdev); if (devname[0] == 'm' && devname[1] == 'd') mp->mnt_flag &= ~MNT_RDONLY; } error = VFS_OMOUNT(mp, NULL, NULL, curthread); done: if (vfsname != NULL) free(vfsname, M_MOUNT); if (path != NULL) free(path, M_MOUNT); if (error != 0) { if (mp != NULL) vfs_mount_destroy(mp, curthread); - printf("Root mount failed: %d\n", error); } else { - /* register with list of mounted filesystems */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); /* sanity check system clock against root fs timestamp */ inittodr(mp->mnt_time); vfs_unbusy(mp, curthread); error = VFS_START(mp, 0, curthread); + if (error == 0) + printf("Mounted root from %s.\n", mountfrom); } return (error); } /* * Spin prompting on the console for a suitable root filesystem */ static int vfs_mountroot_ask(void) { char name[128]; + int error; for(;;) { printf("\nManual root filesystem specification:\n"); printf(" : Mount using filesystem \n"); #if defined(__i386__) || defined(__ia64__) printf(" eg. ufs:da0s1a\n"); #else printf(" eg. ufs:/dev/da0a\n"); #endif printf(" ? List valid disk boot devices\n"); printf(" Abort manual input\n"); printf("\nmountroot> "); gets(name); if (name[0] == '\0') return (1); if (name[0] == '?') { printf("\nList of GEOM managed disk devices:\n "); g_dev_print(); continue; } - if (!vfs_mountroot_try(name)) + if ((error = vfs_mountroot_try(name)) == 0) return (0); + else + printf("Root mount failed: %d\n", error); } } /* * Local helper function for vfs_mountroot_ask. */ static void gets(char *cp) { char *lp; int c; lp = cp; for (;;) { printf("%c", c = cngetc() & 0177); switch (c) { case -1: case '\n': case '\r': *lp++ = '\0'; return; case '\b': case '\177': if (lp > cp) { printf(" \b"); lp--; } continue; case '#': lp--; if (lp < cp) lp = cp; continue; case '@': case 'u' & 037: lp = cp; printf("%c", '\n'); continue; default: *lp++ = c; } } } /* * Convert a given name to the cdev pointer of the device, which is probably * but not by definition, a disk. Mount a DEVFS (on nothing), look the name * up, extract the cdev from the vnode and unmount it again. Unfortunately * we cannot use the vnode directly (because we unmount the DEVFS again) * so the filesystems still have to do the bdevvp() stunt. */ struct cdev * getdiskbyname(char *name) { char *cp = name; struct cdev *dev = NULL; struct thread *td = curthread; struct vfsconf *vfsp; struct mount *mp = NULL; struct vnode *vroot = NULL; struct nameidata nid; int error; if (!bcmp(cp, "/dev/", 5)) cp += 5; do { vfsp = vfs_byname("devfs"); if (vfsp == NULL) break; error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp); if (error) break; mp->mnt_flag |= MNT_RDONLY; error = VFS_MOUNT(mp, curthread); if (error) break; VFS_START(mp, 0, td); VFS_ROOT(mp, &vroot, td); VOP_UNLOCK(vroot, 0, td); NDINIT(&nid, LOOKUP, NOCACHE|FOLLOW, UIO_SYSSPACE, cp, curthread); nid.ni_startdir = vroot; nid.ni_pathlen = strlen(cp); nid.ni_cnd.cn_cred = curthread->td_ucred; nid.ni_cnd.cn_nameptr = cp; error = lookup(&nid); if (error) break; dev = vn_todev (nid.ni_vp); NDFREE(&nid, 0); } while (0); if (vroot != NULL) VFS_UNMOUNT(mp, 0, td); if (mp != NULL) vfs_mount_destroy(mp, td); return (dev); } /* Show the struct cdev *for a disk specified by name */ #ifdef DDB DB_SHOW_COMMAND(disk, db_getdiskbyname) { struct cdev *dev; if (modif[0] == '\0') { db_error("usage: show disk/devicename"); return; } dev = getdiskbyname(modif); if (dev != NULL) db_printf("struct cdev *= %p\n", dev); else db_printf("No disk device matched.\n"); } #endif /* * Get a mount option by its name. * * Return 0 if the option was found, ENOENT otherwise. * If len is non-NULL it will be filled with the length * of the option. If buf is non-NULL, it will be filled * with the address of the option. */ int vfs_getopt(opts, name, buf, len) struct vfsoptlist *opts; const char *name; void **buf; int *len; { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { if (len != NULL) *len = opt->len; if (buf != NULL) *buf = opt->value; return (0); } } return (ENOENT); } /* * Find and copy a mount option. * * The size of the buffer has to be specified * in len, if it is not the same length as the * mount option, EINVAL is returned. * Returns ENOENT if the option is not found. */ int vfs_copyopt(opts, name, dest, len) struct vfsoptlist *opts; const char *name; void *dest; int len; { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { if (len != opt->len) return (EINVAL); bcopy(opt->value, dest, opt->len); return (0); } } return (ENOENT); } /* * This is a helper function for filesystems to traverse their * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h */ struct vnode * __mnt_vnode_next(struct vnode **nvp, struct mount *mp) { struct vnode *vp; mtx_assert(&mp->mnt_mtx, MA_OWNED); vp = *nvp; /* Check if we are done */ if (vp == NULL) return (NULL); /* If our next vnode is no longer ours, start over */ if (vp->v_mount != mp) vp = TAILQ_FIRST(&mp->mnt_nvnodelist); /* Save pointer to next vnode in list */ if (vp != NULL) *nvp = TAILQ_NEXT(vp, v_nmntvnodes); else *nvp = NULL; return (vp); } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 8fc3741513c6..a8a5937b922c 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,1526 +1,1523 @@ /* * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include "opt_quota.h" #include "opt_ufs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include uma_zone_t uma_inode, uma_ufs1, uma_ufs2; static int ffs_sbupdate(struct ufsmount *, int); static int ffs_reload(struct mount *, struct thread *); static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, ufs2_daddr_t); static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; static vfs_extattrctl_t ffs_extattrctl; static vfs_omount_t ffs_omount; static struct vfsops ufs_vfsops = { .vfs_extattrctl = ffs_extattrctl, .vfs_fhtovp = ffs_fhtovp, .vfs_init = ffs_init, .vfs_omount = ffs_omount, .vfs_quotactl = ufs_quotactl, .vfs_root = ufs_root, .vfs_start = ufs_start, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit, .vfs_unmount = ffs_unmount, .vfs_vget = ffs_vget, .vfs_vptofh = ffs_vptofh, }; VFS_SET(ufs_vfsops, ufs, 0); /* * ffs_omount * * Called when mounting local physical media * * PARAMETERS: * mountroot * mp mount point structure * path NULL (flag for root mount!!!) * data * ndp * p process (user credentials check [statfs]) * * mount * mp mount point structure * path path to mount point * data pointer to argument struct in user space * ndp mount point namei() return (used for * credentials on reload), reused to look * up block device. * p process (user credentials check) * * RETURNS: 0 Success * !0 error number (errno.h) * * LOCK STATE: * * ENTRY * mount point is locked * EXIT * mount point is locked * * NOTES: * A NULL path can be used for a flag since the mount * system call will fail with EFAULT in copyinstr in * namei() if it is a genuine NULL from the user. */ static int ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td) { size_t size; struct vnode *devvp, *rootvp; struct ufs_args args; struct ufsmount *ump = 0; struct fs *fs; int error, flags; mode_t accessmode; struct nameidata ndp; if (uma_inode == NULL) { uma_inode = uma_zcreate("FFS inode", sizeof(struct inode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs1 = uma_zcreate("FFS1 dinode", sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } /* * Use NULL path to indicate we are mounting the root filesystem. */ if (path == NULL) { - if ((error = bdevvp(rootdev, &rootvp))) { - printf("ffs_mountroot: can't find rootvp\n"); + if ((error = bdevvp(rootdev, &rootvp)) != 0) return (error); - } - if ((error = ffs_mountfs(rootvp, mp, td)) != 0) return (error); return (0); } /* * Mounting non-root filesystem or updating a filesystem */ if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); /* * Flush any dirty data. */ if ((error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) != 0) { vn_finished_write(mp); return (error); } /* * Check for and optionally get rid of files open * for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (mp->mnt_flag & MNT_SOFTDEP) { error = softdep_flushfiles(mp, flags, td); } else { error = ffs_flushfiles(mp, flags, td); } if (error) { vn_finished_write(mp); return (error); } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("%s: %s: blocks %jd files %d\n", fs->fs_fsmnt, "update error", (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } fs->fs_ronly = 1; if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; vn_finished_write(mp); return (error); } vn_finished_write(mp); } if ((mp->mnt_flag & MNT_RELOAD) && (error = ffs_reload(mp, td)) != 0) return (error); if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (suser(td)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td)) != 0) { VOP_UNLOCK(devvp, 0, td); return (error); } VOP_UNLOCK(devvp, 0, td); } fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & FS_NEEDSFSCK) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not %s\n", fs->fs_fsmnt, "properly dismounted"); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); return (EPERM); } } if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); fs->fs_ronly = 0; fs->fs_clean = 0; if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { vn_finished_write(mp); return (error); } /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ vn_finished_write(mp); return (error); } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vn_finished_write(mp); } /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag in an update. * Softdep_mount() clears it in an initial mount * or ro->rw remount. */ if (mp->mnt_flag & MNT_SOFTDEP) mp->mnt_flag &= ~MNT_ASYNC; /* * If not updating name, process export requests. */ if (args.fspec == 0) return (vfs_export(mp, &args.export)); /* * If this is a snapshot request, take the snapshot. */ if (mp->mnt_flag & MNT_SNAPSHOT) return (ffs_snapshot(mp, args.fspec)); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(&ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td); if ((error = namei(&ndp)) != 0) return (error); NDFREE(&ndp, NDF_ONLY_PNBUF); devvp = ndp.ni_vp; if (!vn_isdisk(devvp, &error)) { vrele(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (suser(td)) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){ vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, td); } if (mp->mnt_flag & MNT_UPDATE) { /* * Update only * * If it's not the same vnode, or at least the same device * then it's not correct. */ if (devvp != ump->um_devvp && devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; /* needs translation */ vrele(devvp); if (error) return (error); } else { /* * New mount * * We need the name for the mount point (also used for * "last mounted on") copied in. If an error occurs, * the mount point is discarded by the upper level code. * Note that vfs_mount() populates f_mntonname for us. */ if ((error = ffs_mountfs(devvp, mp, td)) != 0) { vrele(devvp); return (error); } } /* * Save "mounted from" device name info for mount point (NULL pad). */ copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); return (0); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ static int ffs_reload(struct mount *mp, struct thread *td) { struct vnode *vp, *nvp, *devvp; struct inode *ip; void *space; struct buf *bp; struct fs *fs, *newfs; ufs2_daddr_t sblockloc; int i, blks, size, error; int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0) panic("ffs_reload: dirty1"); /* * Only VMIO the backing device if the backing device is a real * disk device. See ffs_mountfs() for more details. */ if (vn_isdisk(devvp, NULL)) vfs_object_create(devvp, td, td->td_ucred); VOP_UNLOCK(devvp, 0, td); /* * Step 2: re-read superblock from disk. */ fs = VFSTOUFS(mp)->um_fs; if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, NOCRED, &bp)) != 0) return (error); newfs = (struct fs *)bp->b_data; if ((newfs->fs_magic != FS_UFS1_MAGIC && newfs->fs_magic != FS_UFS2_MAGIC) || newfs->fs_bsize > MAXBSIZE || newfs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; newfs->fs_contigdirs = fs->fs_contigdirs; newfs->fs_active = fs->fs_active; /* The file system is still read-only. */ newfs->fs_ronly = 1; sblockloc = fs->fs_sblockloc; bcopy(newfs, fs, (u_int)fs->fs_sbsize); brelse(bp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("%s: reload pending error: blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } /* * Step 3: re-read summary information from disk. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, space, (u_int)size); space = (char *)space + size; brelse(bp); } /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { lp = fs->fs_maxcluster; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } loop: MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, nvp) { VI_LOCK(vp); if (vp->v_iflag & VI_XLOCK) { VI_UNLOCK(vp); continue; } MNT_IUNLOCK(mp); /* * Step 4: invalidate all inactive vnodes. */ if (vp->v_usecount == 0) { vgonel(vp, td); goto loop; } /* * Step 5: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { goto loop; } if (vinvalbuf(vp, 0, td->td_ucred, td, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0, td); vrele(vp); return (error); } ffs_load_inode(bp, ip, fs, ip->i_number); ip->i_effnlink = ip->i_nlink; brelse(bp); VOP_UNLOCK(vp, 0, td); vrele(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); return (0); } /* * Possible superblock locations ordered from most to least likely. */ static int sblock_try[] = SBLOCKSEARCH; /* * Common code for mount and mountroot */ static int ffs_mountfs(devvp, mp, td) struct vnode *devvp; struct mount *mp; struct thread *td; { struct ufsmount *ump; struct buf *bp; struct fs *fs; struct cdev *dev; void *space; ufs2_daddr_t sblockloc; int error, i, blks, size, ronly; int32_t *lp; struct ucred *cred; size_t strsize; dev = devvp->v_rdev; cred = td ? td->td_ucred : NOCRED; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); if (vcount(devvp) > 1) return (EBUSY); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0); if (error) { VOP_UNLOCK(devvp, 0, td); return (error); } /* * Only VMIO the backing device if the backing device is a real * disk device. * Note that it is optional that the backing device be VMIOed. This * increases the opportunity for metadata caching. */ if (vn_isdisk(devvp, NULL)) vfs_object_create(devvp, td, cred); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; /* * XXX: open the device with read and write access even if only * read access is needed now. Write access is needed if the * filesystem is ever mounted read/write, and we don't change the * access mode for remounts. */ #ifdef notyet error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1); #else error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1); #endif VOP_UNLOCK(devvp, 0, td); if (error) return (error); if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; bp = NULL; ump = NULL; fs = NULL; sblockloc = 0; /* * Try reading the superblock in each of its possible locations. */ for (i = 0; sblock_try[i] != -1; i++) { if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred, &bp)) != 0) goto out; fs = (struct fs *)bp->b_data; sblockloc = sblock_try[i]; if ((fs->fs_magic == FS_UFS1_MAGIC || (fs->fs_magic == FS_UFS2_MAGIC && (fs->fs_sblockloc == sblockloc || (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) && fs->fs_bsize <= MAXBSIZE && fs->fs_bsize >= sizeof(struct fs)) break; brelse(bp); bp = NULL; } if (sblock_try[i] == -1) { error = EINVAL; /* XXX needs translation */ goto out; } fs->fs_fmod = 0; fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indicies */ fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & FS_NEEDSFSCK) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf( "WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); error = EPERM; goto out; } if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && (mp->mnt_flag & MNT_FORCE)) { printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("%s: mount pending error: blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_fstype = UFS1; ump->um_balloc = ffs_balloc_ufs1; } else { ump->um_fstype = UFS2; ump->um_balloc = ffs_balloc_ufs2; } ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; ump->um_ifree = ffs_ifree; bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBLOCKSIZE) bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); bp = NULL; fs = ump->um_fs; ffs_oldfscompat_read(fs, ump, sblockloc); fs->fs_ronly = ronly; size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(u_int8_t); space = malloc((u_long)size, M_UFSMNT, M_WAITOK); fs->fs_csp = space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, cred, &bp)) != 0) { free(fs->fs_csp, M_UFSMNT); goto out; } bcopy(bp->b_data, space, (u_int)size); space = (char *)space + size; brelse(bp); bp = NULL; } if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = lp; } size = fs->fs_ncg * sizeof(u_int8_t); fs->fs_contigdirs = (u_int8_t *)space; bzero(fs->fs_contigdirs, size); fs->fs_active = NULL; mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || vfs_getvfs(&mp->mnt_stat.f_fsid)) vfs_getnewfsid(mp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; mp->mnt_flag |= MNT_LOCAL; if ((fs->fs_flags & FS_MULTILABEL) != 0) #ifdef MAC mp->mnt_flag |= MNT_MULTILABEL; #else printf( "WARNING: %s: multilabel flag on fs but no MAC support\n", fs->fs_fsmnt); #endif if ((fs->fs_flags & FS_ACLS) != 0) #ifdef UFS_ACL mp->mnt_flag |= MNT_ACLS; #else printf( "WARNING: %s: ACLs flag on fs but no ACLs support\n", fs->fs_fsmnt); #endif ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; #ifdef UFS_EXTATTR ufs_extattr_uepm_init(&ump->um_extattr); #endif devvp->v_rdev->si_mountpoint = mp; /* * Set FS local "last mounted on" information (NULL pad) */ copystr( mp->mnt_stat.f_mntonname, /* mount point*/ fs->fs_fsmnt, /* copy area*/ sizeof(fs->fs_fsmnt) - 1, /* max size*/ &strsize); /* real size*/ bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); if( mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) { if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { free(fs->fs_csp, M_UFSMNT); goto out; } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } /* * Initialize filesystem stat information in mount struct. */ (void)VFS_STATFS(mp, &mp->mnt_stat, td); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART /* * * Auto-starting does the following: * - check for /.attribute in the fs, and extattr_start if so * - for each file in .attribute, enable that file with * an attribute of the same name. * Not clear how to report errors -- probably eat them. * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ return (0); out: devvp->v_rdev->si_mountpoint = NULL; if (bp) brelse(bp); /* XXX: see comment above VOP_OPEN. */ #ifdef notyet (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td); #else (void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td); #endif if (ump) { free(ump->um_fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } #include int bigcgs = 0; SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); /* * Sanity checks for loading old filesystem superblocks. * See ffs_oldfscompat_write below for unwound actions. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_read(fs, ump, sblockloc) struct fs *fs; struct ufsmount *ump; ufs2_daddr_t sblockloc; { off_t maxfilesize; /* * If not yet done, update fs_flags location and value of fs_sblockloc. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { fs->fs_flags = fs->fs_old_flags; fs->fs_old_flags |= FS_FLAGS_UPDATED; fs->fs_sblockloc = sblockloc; } /* * If not yet done, update UFS1 superblock with new wider fields. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { fs->fs_maxbsize = fs->fs_bsize; fs->fs_time = fs->fs_old_time; fs->fs_size = fs->fs_old_size; fs->fs_dsize = fs->fs_old_dsize; fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_old_inodefmt < FS_44INODEFMT) { fs->fs_maxfilesize = (u_quad_t) 1LL << 39; fs->fs_qbmask = ~fs->fs_bmask; fs->fs_qfmask = ~fs->fs_fmask; } if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_savedmaxfilesize = fs->fs_maxfilesize; maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; if (fs->fs_maxfilesize > maxfilesize) fs->fs_maxfilesize = maxfilesize; } /* Compatibility for old filesystems */ if (fs->fs_avgfilesize <= 0) fs->fs_avgfilesize = AVFILESIZ; if (fs->fs_avgfpdir <= 0) fs->fs_avgfpdir = AFPDIR; if (bigcgs) { fs->fs_save_cgsize = fs->fs_cgsize; fs->fs_cgsize = fs->fs_bsize; } } /* * Unwinding superblock updates for old filesystems. * See ffs_oldfscompat_read above for details. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_write(fs, ump) struct fs *fs; struct ufsmount *ump; { /* * Copy back UFS2 updated fields that UFS1 inspects. */ if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_old_time = fs->fs_time; fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; fs->fs_maxfilesize = ump->um_savedmaxfilesize; } if (bigcgs) { fs->fs_cgsize = fs->fs_save_cgsize; fs->fs_save_cgsize = 0; } } /* * unmount system call */ int ffs_unmount(mp, mntflags, td) struct mount *mp; int mntflags; struct thread *td; { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, flags; flags = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } #ifdef UFS_EXTATTR if ((error = ufs_extattr_stop(mp, td))) { if (error != EOPNOTSUPP) printf("ffs_unmount: ufs_extattr_stop returned %d\n", error); } else { ufs_extattr_uepm_destroy(&ump->um_extattr); } #endif if (mp->mnt_flag & MNT_SOFTDEP) { if ((error = softdep_flushfiles(mp, flags, td)) != 0) return (error); } else { if ((error = ffs_flushfiles(mp, flags, td)) != 0) return (error); } fs = ump->um_fs; if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("%s: unmount pending error: blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if (fs->fs_ronly == 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT); if (error) { fs->fs_clean = 0; return (error); } } ump->um_devvp->v_rdev->si_mountpoint = NULL; vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0); /* XXX: see comment above VOP_OPEN. */ #ifdef notyet error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE, NOCRED, td); #else error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td); #endif vrele(ump->um_devvp); free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct ufsmount *ump; int error; ump = VFSTOUFS(mp); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, 0, SKIPSYSTEM|flags, td); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP) continue; quotaoff(td, mp, i); } /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } #endif ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) return (error); ffs_snapshot_unmount(mp); /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } /* * Flush all the files. */ if ((error = vflush(mp, 0, flags, td)) != 0) return (error); /* * Flush filesystem metadata. */ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td); VOP_UNLOCK(ump->um_devvp, 0, td); return (error); } /* * Get filesystem statistics. */ int ffs_statfs(mp, sbp, td) struct mount *mp; struct statfs *sbp; struct thread *td; { struct ufsmount *ump; struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_statfs"); sbp->f_version = STATFS_VERSION; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_bavail = freespace(fs, fs->fs_minfree) + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; sbp->f_namemax = NAME_MAX; if (sbp != &mp->mnt_stat) { sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; sbp->f_type = mp->mnt_vfc->vfc_typenum; sbp->f_syncwrites = mp->mnt_stat.f_syncwrites; sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites; sbp->f_syncreads = mp->mnt_stat.f_syncreads; sbp->f_asyncreads = mp->mnt_stat.f_asyncreads; sbp->f_owner = mp->mnt_stat.f_owner; sbp->f_fsid = mp->mnt_stat.f_fsid; bcopy((caddr_t)mp->mnt_stat.f_fstypename, (caddr_t)&sbp->f_fstypename[0], MFSNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ int ffs_sync(mp, waitfor, cred, td) struct mount *mp; int waitfor; struct ucred *cred; struct thread *td; { struct vnode *nvp, *vp, *devvp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, wait, lockreq, allerror = 0; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->fs_fsmnt); panic("ffs_sync: rofs mod"); } /* * Write back each (modified) inode. */ wait = 0; lockreq = LK_EXCLUSIVE | LK_NOWAIT; if (waitfor == MNT_WAIT) { wait = 1; lockreq = LK_EXCLUSIVE; } lockreq |= LK_INTERLOCK; MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, nvp) { /* * Depend on the mntvnode_slock to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. */ VI_LOCK(vp); if (vp->v_iflag & VI_XLOCK) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && TAILQ_EMPTY(&vp->v_dirtyblkhd))) { VI_UNLOCK(vp); continue; } MNT_IUNLOCK(mp); if ((error = vget(vp, lockreq, td)) != 0) { MNT_ILOCK(mp); if (error == ENOENT) goto loop; continue; } if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(vp, 0, td); vrele(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); /* * Force stale filesystem control information to be flushed. */ if (waitfor == MNT_WAIT) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) { MNT_ILOCK(mp); goto loop; } } #ifdef QUOTA qsync(mp); #endif devvp = ump->um_devvp; VI_LOCK(devvp); if (waitfor != MNT_LAZY && (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td); if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(devvp, 0, td); if (allerror == 0 && waitfor == MNT_WAIT) { MNT_ILOCK(mp); goto loop; } } else VI_UNLOCK(devvp); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) allerror = error; return (allerror); } int ffs_vget(mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { struct thread *td = curthread; /* XXX */ struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; struct cdev *dev; int error; ump = VFSTOUFS(mp); dev = ump->um_dev; /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0) return (error); if (*vpp != NULL) return (0); /* * If this MALLOC() is performed after the getnewvnode() * it might block, leaving a vnode with a NULL v_data to be * found by ffs_sync() if a sync happens to fire right then, * which will cause a panic because ffs_sync() blindly * dereferences vp->v_data (as well it should). */ ip = uma_zalloc(uma_inode, M_WAITOK); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp); if (error) { *vpp = NULL; uma_zfree(uma_inode, ip); return (error); } bzero((caddr_t)ip, sizeof(struct inode)); /* * FFS supports recursive locking. */ fs = ump->um_fs; vp->v_vnlock->lk_flags |= LK_CANRECURSE; vp->v_data = ip; vp->v_bsize = fs->fs_bsize; ip->i_vnode = vp; ip->i_ump = ump; ip->i_fs = fs; ip->i_dev = dev; ip->i_number = ino; #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif /* * Exclusively lock the vnode before adding to hash. Note, that we * must not release nor downgrade the lock (despite flags argument * says) till it is fully initialized. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td); /* * Atomicaly (in terms of ufs_hash operations) check the hash for * duplicate of vnode being created and add it to the hash. If a * duplicate vnode was found, it will be vget()ed from hash for us. */ if ((error = ufs_ihashins(ip, flags, vpp)) != 0) { vput(vp); *vpp = NULL; return (error); } /* We lost the race, then throw away our vnode and return existing */ if (*vpp != NULL) { vput(vp); return (0); } /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } if (ip->i_ump->um_fstype == UFS1) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); ffs_load_inode(bp, ip, fs, ino); if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ VREF(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { ip->i_gen = arc4random() / 2 + 1; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { ip->i_flag |= IN_MODIFIED; DIP_SET(ip, i_gen, ip->i_gen); } } /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_uid = ip->i_din1->di_ouid; /* XXX */ ip->i_gid = ip->i_din1->di_ogid; /* XXX */ } /* XXX */ #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* * If this vnode is already allocated, and we're running * multi-label, attempt to perform a label association * from the extended attributes on the inode. */ error = mac_associate_vnode_extattr(mp, vp); if (error) { /* ufs_inactive will release ip->i_devvp ref. */ vput(vp); *vpp = NULL; return (error); } } #endif *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ int ffs_fhtovp(mp, fhp, vpp) struct mount *mp; struct fid *fhp; struct vnode **vpp; { struct ufid *ufhp; struct fs *fs; ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); return (ufs_fhtovp(mp, ufhp, vpp)); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int ffs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { struct inode *ip; struct ufid *ufhp; ip = VTOI(vp); ufhp = (struct ufid *)fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Initialize the filesystem. */ static int ffs_init(vfsp) struct vfsconf *vfsp; { softdep_initialize(); return (ufs_init(vfsp)); } /* * Undo the work of ffs_init(). */ static int ffs_uninit(vfsp) struct vfsconf *vfsp; { int ret; ret = ufs_uninit(vfsp); softdep_uninitialize(); return (ret); } /* * Write a superblock and associated information back to disk. */ static int ffs_sbupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { struct fs *fs = mp->um_fs; struct buf *bp; int blks; void *space; int i, size, error, allerror = 0; if (fs->fs_ronly == 1 && (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != (MNT_RDONLY | MNT_UPDATE)) panic("ffs_sbupdate: write read-only filesystem"); /* * First write back the summary information. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0, 0); bcopy(space, bp->b_data, (u_int)size); space = (char *)space + size; if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) allerror = error; } /* * Now write back the superblock itself. If any errors occurred * up to this point, then fail so that the superblock avoids * being written out as clean. */ if (allerror) return (allerror); if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { printf("%s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); fs->fs_sblockloc = SBLOCK_UFS1; } if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { printf("%s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); fs->fs_sblockloc = SBLOCK_UFS2; } bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); fs->fs_fmod = 0; fs->fs_time = time_second; bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); ffs_oldfscompat_write((struct fs *)bp->b_data, mp); if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) allerror = error; return (allerror); } static int ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname, struct thread *td) { #ifdef UFS_EXTATTR return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)); #else return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)); #endif } static void ffs_ifree(struct ufsmount *ump, struct inode *ip) { if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); uma_zfree(uma_inode, ip); }