Index: lib/libc/sys/reboot.2 =================================================================== --- lib/libc/sys/reboot.2 +++ lib/libc/sys/reboot.2 @@ -113,6 +113,13 @@ before the processor is halted or rebooted. This option may be useful if file system changes have been made manually or if the processor is on fire. +.It Dv RB_REROOT +Instead of rebooting, unmount all filesystems except the one containing +currently-running executable, and mount root filesystem using the same +mechanism which is used during normal boot, based on +vfs.root.mountfrom +.Xr kenv 8 +variable. .It Dv RB_RDONLY Initially mount the root file system read-only. This is currently the default, and this option has been deprecated. Index: sys/kern/kern_shutdown.c =================================================================== --- sys/kern/kern_shutdown.c +++ sys/kern/kern_shutdown.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -150,10 +151,16 @@ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ +static struct cdevsw reroot_cdevsw = { + .d_version = D_VERSION, + .d_name = "reroot", +}; + static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); +static int kern_reroot(void); /* register various local shutdown events */ static void @@ -173,6 +180,26 @@ SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* + * The only reason this exists is to create the /dev/reroot/ directory, + * used by reroot code in init(8) as a mountpoint for tmpfs. + */ +static void +reroot_conf(void *unused) +{ + int error; + struct cdev *cdev; + + error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, + &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); + if (error != 0) { + printf("%s: failed to create device node, error %d", + __func__, error); + } +} + +SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); + +/* * The system call that results in a reboot. */ /* ARGSUSED */ @@ -188,9 +215,13 @@ if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { - mtx_lock(&Giant); - kern_reboot(uap->opt); - mtx_unlock(&Giant); + if (uap->opt & RB_REROOT) { + error = kern_reroot(); + } else { + mtx_lock(&Giant); + kern_reboot(uap->opt); + mtx_unlock(&Giant); + } } return (error); } @@ -336,6 +367,101 @@ } /* + * The system call that results in changing the rootfs. + */ +static int +kern_reroot(void) +{ + struct vnode *oldrootvnode, *vp; + struct mount *mp, *devmp; + int error; + + if (curproc != initproc) + return (EPERM); + + /* + * Mark the filesystem containing currently-running executable + * (the temporary copy of init(8)) busy. + */ + vp = curproc->p_textvp; + mp = vp->v_mount; + error = vn_lock(vp, LK_SHARED); + if (error != 0) + return (error); + error = vfs_busy(mp, MBF_NOWAIT); + if (error != 0) { + vfs_ref(mp); + VOP_UNLOCK(vp, 0); + error = vfs_busy(mp, 0); + vn_lock(vp, LK_SHARED | LK_RETRY); + vfs_rel(mp); + if (error != 0) + VOP_UNLOCK(vp, 0); + return (ENOENT); + if (vp->v_iflag & VI_DOOMED) { + VOP_UNLOCK(vp, 0); + vfs_unbusy(mp); + return (ENOENT); + } + } + VOP_UNLOCK(vp, 0); + + /* + * Remove the filesystem containing currently-running executable + * from the mount list, to prevent it from being unmounted + * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). + * + * Also preserve /dev - forcibly unmounting it could cause driver + * reinitialization. + */ + + vfs_ref(rootdevmp); + devmp = rootdevmp; + rootdevmp = NULL; + + mtx_lock(&mountlist_mtx); + TAILQ_REMOVE(&mountlist, mp, mnt_list); + TAILQ_REMOVE(&mountlist, devmp, mnt_list); + mtx_unlock(&mountlist_mtx); + + oldrootvnode = rootvnode; + + /* + * Unmount everything except for the two filesystems preserved above. + */ + vfs_unmountall(); + + /* + * Add /dev back; vfs_mountroot() will move it into its new place. + */ + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); + mtx_unlock(&mountlist_mtx); + rootdevmp = devmp; + vfs_rel(rootdevmp); + + /* + * Mount the new rootfs. + */ + vfs_mountroot(); + + /* + * Update all references to the old rootvnode. + */ + mountcheckdirs(oldrootvnode, rootvnode); + + /* + * Add the temporary filesystem back and unbusy it. + */ + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + vfs_unbusy(mp); + + return (0); +} + +/* * If the shutdown was a clean halt, behave accordingly. */ static void Index: sys/kern/vfs_mountroot.c =================================================================== --- sys/kern/vfs_mountroot.c +++ sys/kern/vfs_mountroot.c @@ -220,28 +220,37 @@ *mpp = NULL; - vfsp = vfs_byname("devfs"); - KASSERT(vfsp != NULL, ("Could not find devfs by name")); - if (vfsp == NULL) - return (ENOENT); + if (rootdevmp != NULL) { + /* + * Already have /dev; this happens during rerooting. + */ + vfs_busy(rootdevmp, 0); + *mpp = rootdevmp; + } else { + vfsp = vfs_byname("devfs"); + KASSERT(vfsp != NULL, ("Could not find devfs by name")); + if (vfsp == NULL) + return (ENOENT); - mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); + mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); - error = VFS_MOUNT(mp); - KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); - if (error) - return (error); + error = VFS_MOUNT(mp); + KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); + if (error) + return (error); - opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); - TAILQ_INIT(opts); - mp->mnt_opt = opts; + opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); + TAILQ_INIT(opts); + mp->mnt_opt = opts; + + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); - mtx_lock(&mountlist_mtx); - TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); - mtx_unlock(&mountlist_mtx); + *mpp = mp; + rootdevmp = mp; + } - *mpp = mp; - rootdevmp = mp; set_rootvnode(); error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE); Index: sys/sys/reboot.h =================================================================== --- sys/sys/reboot.h +++ sys/sys/reboot.h @@ -59,6 +59,7 @@ #define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */ #define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */ #define RB_PAUSE 0x100000 /* pause after each output line during probe */ +#define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */ #define RB_MULTIPLE 0x20000000 /* use multiple consoles */ #define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */