Index: stable/10/sys/dev/null/null.c =================================================================== --- stable/10/sys/dev/null/null.c (revision 291214) +++ stable/10/sys/dev/null/null.c (revision 291215) @@ -1,181 +1,178 @@ /*- * Copyright (c) 2000 Mark R. V. Murray & Jeroen C. van Gelderen * Copyright (c) 2001-2004 Mark R. V. Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include -#include #include #include #include #include #include /* For use with destroy_dev(9). */ static struct cdev *null_dev; static struct cdev *zero_dev; static d_write_t null_write; static d_ioctl_t null_ioctl; static d_ioctl_t zero_ioctl; static d_read_t zero_read; static struct cdevsw null_cdevsw = { .d_version = D_VERSION, .d_read = (d_read_t *)nullop, .d_write = null_write, .d_ioctl = null_ioctl, .d_name = "null", }; static struct cdevsw zero_cdevsw = { .d_version = D_VERSION, .d_read = zero_read, .d_write = null_write, .d_ioctl = zero_ioctl, .d_name = "zero", .d_flags = D_MMAP_ANON, }; /* ARGSUSED */ static int null_write(struct cdev *dev __unused, struct uio *uio, int flags __unused) { uio->uio_resid = 0; return (0); } /* ARGSUSED */ static int null_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data __unused, int flags __unused, struct thread *td) { int error; error = 0; switch (cmd) { case DIOCSKERNELDUMP: - error = priv_check(td, PRIV_SETDUMPER); - if (error == 0) - error = set_dumper(NULL, NULL); + error = set_dumper(NULL, NULL, td); break; case FIONBIO: break; case FIOASYNC: if (*(int *)data != 0) error = EINVAL; break; default: error = ENOIOCTL; } return (error); } /* ARGSUSED */ static int zero_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data __unused, int flags __unused, struct thread *td) { int error; error = 0; switch (cmd) { case FIONBIO: break; case FIOASYNC: if (*(int *)data != 0) error = EINVAL; break; default: error = ENOIOCTL; } return (error); } /* ARGSUSED */ static int zero_read(struct cdev *dev __unused, struct uio *uio, int flags __unused) { void *zbuf; ssize_t len; int error = 0; KASSERT(uio->uio_rw == UIO_READ, ("Can't be in %s for write", __func__)); zbuf = __DECONST(void *, zero_region); while (uio->uio_resid > 0 && error == 0) { len = uio->uio_resid; if (len > ZERO_REGION_SIZE) len = ZERO_REGION_SIZE; error = uiomove(zbuf, len, uio); } return (error); } /* ARGSUSED */ static int null_modevent(module_t mod __unused, int type, void *data __unused) { switch(type) { case MOD_LOAD: if (bootverbose) printf("null: \n"); null_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &null_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0666, "null"); zero_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &zero_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0666, "zero"); break; case MOD_UNLOAD: destroy_dev(null_dev); destroy_dev(zero_dev); break; case MOD_SHUTDOWN: break; default: return (EOPNOTSUPP); } return (0); } DEV_MODULE(null, null_modevent, NULL); MODULE_VERSION(null, 1); Index: stable/10/sys/geom/geom_dev.c =================================================================== --- stable/10/sys/geom/geom_dev.c (revision 291214) +++ stable/10/sys/geom/geom_dev.c (revision 291215) @@ -1,709 +1,709 @@ /*- * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_dev_softc { struct mtx sc_mtx; struct cdev *sc_dev; struct cdev *sc_alias; int sc_open; int sc_active; }; static d_open_t g_dev_open; static d_close_t g_dev_close; static d_strategy_t g_dev_strategy; static d_ioctl_t g_dev_ioctl; static struct cdevsw g_dev_cdevsw = { .d_version = D_VERSION, .d_open = g_dev_open, .d_close = g_dev_close, .d_read = physread, .d_write = physwrite, .d_ioctl = g_dev_ioctl, .d_strategy = g_dev_strategy, .d_name = "g_dev", .d_flags = D_DISK | D_TRACKCLOSE, }; static g_init_t g_dev_init; static g_fini_t g_dev_fini; static g_taste_t g_dev_taste; static g_orphan_t g_dev_orphan; static g_attrchanged_t g_dev_attrchanged; static struct g_class g_dev_class = { .name = "DEV", .version = G_VERSION, .init = g_dev_init, .fini = g_dev_fini, .taste = g_dev_taste, .orphan = g_dev_orphan, .attrchanged = g_dev_attrchanged }; /* * We target 262144 (8 x 32768) sectors by default as this significantly * increases the throughput on commonly used SSD's with a marginal * increase in non-interruptible request latency. */ static uint64_t g_dev_del_max_sectors = 262144; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, dev, CTLFLAG_RW, 0, "GEOM_DEV stuff"); SYSCTL_QUAD(_kern_geom_dev, OID_AUTO, delete_max_sectors, CTLFLAG_RW, &g_dev_del_max_sectors, 0, "Maximum number of sectors in a single " "delete request sent to the provider. Larger requests are chunked " "so they can be interrupted. (0 = disable chunking)"); static char *dumpdev = NULL; static void g_dev_init(struct g_class *mp) { dumpdev = getenv("dumpdev"); } static void g_dev_fini(struct g_class *mp) { freeenv(dumpdev); } static int -g_dev_setdumpdev(struct cdev *dev) +g_dev_setdumpdev(struct cdev *dev, struct thread *td) { struct g_kerneldump kd; struct g_consumer *cp; int error, len; if (dev == NULL) - return (set_dumper(NULL, NULL)); + return (set_dumper(NULL, NULL, td)); cp = dev->si_drv2; len = sizeof(kd); kd.offset = 0; kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); if (error == 0) { - error = set_dumper(&kd.di, devtoname(dev)); + error = set_dumper(&kd.di, devtoname(dev), td); if (error == 0) dev->si_flags |= SI_DUMPDEV; } return (error); } static void init_dumpdev(struct cdev *dev) { if (dumpdev == NULL) return; if (strcmp(devtoname(dev), dumpdev) != 0) return; - if (g_dev_setdumpdev(dev) == 0) { + if (g_dev_setdumpdev(dev, curthread) == 0) { freeenv(dumpdev); dumpdev = NULL; } } static void g_dev_destroy(void *arg, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_dev_softc *sc; char buf[SPECNAMELEN + 6]; g_topology_assert(); cp = arg; gp = cp->geom; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_destroy(%p(%s))", cp, gp->name); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "DESTROY", buf, M_WAITOK); if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); } void g_dev_print(void) { struct g_geom *gp; char const *p = ""; LIST_FOREACH(gp, &g_dev_class.geom, geom) { printf("%s%s", p, gp->name); p = " "; } printf("\n"); } static void g_dev_attrchanged(struct g_consumer *cp, const char *attr) { struct g_dev_softc *sc; struct cdev *dev; char buf[SPECNAMELEN + 6]; sc = cp->private; if (strcmp(attr, "GEOM::media") == 0) { dev = sc->sc_dev; snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); dev = sc->sc_alias; if (dev != NULL) { snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); } return; } if (strcmp(attr, "GEOM::physpath") != 0) return; if (g_access(cp, 1, 0, 0) == 0) { char *physpath; int error, physpath_len; physpath_len = MAXPATHLEN; physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); g_access(cp, -1, 0, 0); if (error == 0 && strlen(physpath) != 0) { struct cdev *old_alias_dev; struct cdev **alias_devp; dev = sc->sc_dev; old_alias_dev = sc->sc_alias; alias_devp = (struct cdev **)&sc->sc_alias; make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, dev, old_alias_dev, physpath); } else if (sc->sc_alias) { destroy_dev((struct cdev *)sc->sc_alias); sc->sc_alias = NULL; } g_free(physpath); } } struct g_provider * g_dev_getprovider(struct cdev *dev) { struct g_consumer *cp; g_topology_assert(); if (dev == NULL) return (NULL); if (dev->si_devsw != &g_dev_cdevsw) return (NULL); cp = dev->si_drv2; return (cp->provider); } static struct g_geom * g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) { struct g_geom *gp; struct g_consumer *cp; struct g_dev_softc *sc; int error, len; struct cdev *dev, *adev; char buf[SPECNAMELEN + 6], *val; g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); gp = g_new_geomf(mp, "%s", pp->name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); KASSERT(error == 0, ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error)); error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev, &g_dev_cdevsw, NULL, UID_ROOT, GID_OPERATOR, 0640, "%s", gp->name); if (error != 0) { printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); return (NULL); } dev->si_flags |= SI_UNMAPPED; sc->sc_dev = dev; /* Search for device alias name and create it if found. */ adev = NULL; for (len = MIN(strlen(gp->name), sizeof(buf) - 15); len > 0; len--) { snprintf(buf, sizeof(buf), "kern.devalias.%s", gp->name); buf[14 + len] = 0; val = getenv(buf); if (val != NULL) { snprintf(buf, sizeof(buf), "%s%s", val, gp->name + len); freeenv(val); make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &adev, dev, "%s", buf); adev->si_flags |= SI_UNMAPPED; break; } } dev->si_iosize_max = MAXPHYS; dev->si_drv2 = cp; init_dumpdev(dev); if (adev != NULL) { adev->si_iosize_max = MAXPHYS; adev->si_drv2 = cp; init_dumpdev(adev); } g_dev_attrchanged(cp, "GEOM::physpath"); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK); return (gp); } static int g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; if (cp == NULL) return(ENXIO); /* g_dev_taste() not done yet */ g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? 1 : 0; w = flags & FWRITE ? 1 : 0; #ifdef notyet e = flags & O_EXCL ? 1 : 0; #else e = 0; #endif /* * This happens on attempt to open a device node with O_EXEC. */ if (r + w + e == 0) return (EINVAL); if (w) { /* * When running in very secure mode, do not allow * opens for writing of any disks. */ error = securelevel_ge(td->td_ucred, 2); if (error) return (error); } g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); if (error == 0) { sc = cp->private; mtx_lock(&sc->sc_mtx); if (sc->sc_open == 0 && sc->sc_active != 0) wakeup(&sc->sc_active); sc->sc_open += r + w + e; mtx_unlock(&sc->sc_mtx); } return(error); } static int g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; if (cp == NULL) return(ENXIO); g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? -1 : 0; w = flags & FWRITE ? -1 : 0; #ifdef notyet e = flags & O_EXCL ? -1 : 0; #else e = 0; #endif /* * The vgonel(9) - caused by eg. forced unmount of devfs - calls * VOP_CLOSE(9) on devfs vnode without any FREAD or FWRITE flags, * which would result in zero deltas, which in turn would cause * panic in g_access(9). * * Note that we cannot zero the counters (ie. do "r = cp->acr" * etc) instead, because the consumer might be opened in another * devfs instance. */ if (r + w + e == 0) return (EINVAL); sc = cp->private; mtx_lock(&sc->sc_mtx); sc->sc_open += r + w + e; while (sc->sc_open == 0 && sc->sc_active != 0) msleep(&sc->sc_active, &sc->sc_mtx, 0, "PRIBIO", 0); mtx_unlock(&sc->sc_mtx); g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); return (error); } /* * XXX: Until we have unmessed the ioctl situation, there is a race against * XXX: a concurrent orphanization. We cannot close it by holding topology * XXX: since that would prevent us from doing our job, and stalling events * XXX: will break (actually: stall) the BSD disklabel hacks. */ static int g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct g_consumer *cp; struct g_provider *pp; off_t offset, length, chunk; int i, error; cp = dev->si_drv2; pp = cp->provider; error = 0; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_ioctl")); i = IOCPARM_LEN(cmd); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = cp->provider->sectorsize; if (*(u_int *)data == 0) error = ENOENT; break; case DIOCGMEDIASIZE: *(off_t *)data = cp->provider->mediasize; if (*(off_t *)data == 0) error = ENOENT; break; case DIOCGFWSECTORS: error = g_io_getattr("GEOM::fwsectors", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFWHEADS: error = g_io_getattr("GEOM::fwheads", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFRONTSTUFF: error = g_io_getattr("GEOM::frontstuff", cp, &i, data); break; case DIOCSKERNELDUMP: if (*(u_int *)data == 0) - error = g_dev_setdumpdev(NULL); + error = g_dev_setdumpdev(NULL, td); else - error = g_dev_setdumpdev(dev); + error = g_dev_setdumpdev(dev, td); break; case DIOCGFLUSH: error = g_io_flush(cp); break; case DIOCGDELETE: offset = ((off_t *)data)[0]; length = ((off_t *)data)[1]; if ((offset % cp->provider->sectorsize) != 0 || (length % cp->provider->sectorsize) != 0 || length <= 0) { printf("%s: offset=%jd length=%jd\n", __func__, offset, length); error = EINVAL; break; } while (length > 0) { chunk = length; if (g_dev_del_max_sectors != 0 && chunk > g_dev_del_max_sectors * cp->provider->sectorsize) { chunk = g_dev_del_max_sectors * cp->provider->sectorsize; } error = g_delete_data(cp, offset, chunk); length -= chunk; offset += chunk; if (error) break; /* * Since the request size can be large, the service * time can be is likewise. We make this ioctl * interruptible by checking for signals for each bio. */ if (SIGPENDING(td)) break; } break; case DIOCGIDENT: error = g_io_getattr("GEOM::ident", cp, &i, data); break; case DIOCGPROVIDERNAME: if (pp == NULL) return (ENOENT); strlcpy(data, pp->name, i); break; case DIOCGSTRIPESIZE: *(off_t *)data = cp->provider->stripesize; break; case DIOCGSTRIPEOFFSET: *(off_t *)data = cp->provider->stripeoffset; break; case DIOCGPHYSPATH: error = g_io_getattr("GEOM::physpath", cp, &i, data); if (error == 0 && *(char *)data == '\0') error = ENOENT; break; case DIOCGATTR: { struct diocgattr_arg *arg = (struct diocgattr_arg *)data; if (arg->len > sizeof(arg->value)) { error = EINVAL; break; } error = g_io_getattr(arg->name, cp, &arg->len, &arg->value); break; } default: if (cp->provider->geom->ioctl != NULL) { error = cp->provider->geom->ioctl(cp->provider, cmd, data, fflag, td); } else { error = ENOIOCTL; } } return (error); } static void g_dev_done(struct bio *bp2) { struct g_consumer *cp; struct g_dev_softc *sc; struct bio *bp; int destroy; cp = bp2->bio_from; sc = cp->private; bp = bp2->bio_parent; bp->bio_error = bp2->bio_error; bp->bio_completed = bp2->bio_completed; bp->bio_resid = bp->bio_length - bp2->bio_completed; if (bp2->bio_error != 0) { g_trace(G_T_BIO, "g_dev_done(%p) had error %d", bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; } else { g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd", bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed); } g_destroy_bio(bp2); destroy = 0; mtx_lock(&sc->sc_mtx); if ((--sc->sc_active) == 0) { if (sc->sc_open == 0) wakeup(&sc->sc_active); if (sc->sc_dev == NULL) destroy = 1; } mtx_unlock(&sc->sc_mtx); if (destroy) g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL); biodone(bp); } static void g_dev_strategy(struct bio *bp) { struct g_consumer *cp; struct bio *bp2; struct cdev *dev; struct g_dev_softc *sc; KASSERT(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE || bp->bio_cmd == BIO_FLUSH, ("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd)); dev = bp->bio_dev; cp = dev->si_drv2; sc = cp->private; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_strategy")); #ifdef INVARIANTS if ((bp->bio_offset % cp->provider->sectorsize) != 0 || (bp->bio_bcount % cp->provider->sectorsize) != 0) { bp->bio_resid = bp->bio_bcount; biofinish(bp, NULL, EINVAL); return; } #endif mtx_lock(&sc->sc_mtx); KASSERT(sc->sc_open > 0, ("Closed device in g_dev_strategy")); sc->sc_active++; mtx_unlock(&sc->sc_mtx); for (;;) { /* * XXX: This is not an ideal solution, but I belive it to * XXX: deadlock safe, all things considered. */ bp2 = g_clone_bio(bp); if (bp2 != NULL) break; pause("gdstrat", hz / 10); } KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place")); bp2->bio_done = g_dev_done; g_trace(G_T_BIO, "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d", bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length, bp2->bio_data, bp2->bio_cmd); g_io_request(bp2, cp); KASSERT(cp->acr || cp->acw, ("g_dev_strategy raced with g_dev_close and lost")); } /* * g_dev_callback() * * Called by devfs when asynchronous device destruction is completed. * - Mark that we have no attached device any more. * - If there are no outstanding requests, schedule geom destruction. * Otherwise destruction will be scheduled later by g_dev_done(). */ static void g_dev_callback(void *arg) { struct g_consumer *cp; struct g_dev_softc *sc; int destroy; cp = arg; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_callback(%p(%s))", cp, cp->geom->name); mtx_lock(&sc->sc_mtx); sc->sc_dev = NULL; sc->sc_alias = NULL; destroy = (sc->sc_active == 0); mtx_unlock(&sc->sc_mtx); if (destroy) g_post_event(g_dev_destroy, cp, M_WAITOK, NULL); } /* * g_dev_orphan() * * Called from below when the provider orphaned us. * - Clear any dump settings. * - Request asynchronous device destruction to prevent any more requests * from coming in. The provider is already marked with an error, so * anything which comes in in the interrim will be returned immediately. */ static void g_dev_orphan(struct g_consumer *cp) { struct cdev *dev; struct g_dev_softc *sc; g_topology_assert(); sc = cp->private; dev = sc->sc_dev; g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, cp->geom->name); /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) - set_dumper(NULL, NULL); + (void)set_dumper(NULL, NULL, curthread); /* Destroy the struct cdev *so we get no more requests */ destroy_dev_sched_cb(dev, g_dev_callback, cp); } DECLARE_GEOM_CLASS(g_dev_class, g_dev); Index: stable/10/sys/kern/kern_shutdown.c =================================================================== --- stable/10/sys/kern/kern_shutdown.c (revision 291214) +++ stable/10/sys/kern/kern_shutdown.c (revision 291215) @@ -1,901 +1,906 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kdb.h" #include "opt_panic.h" #include "opt_sched.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RW | CTLFLAG_TUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic"); TUNABLE_INT("kern.panic_reboot_wait_time", &panic_reboot_wait_time); /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_TUN, &debugger_on_panic, 0, "Run debugger on kernel panic"); TUNABLE_INT("debug.debugger_on_panic", &debugger_on_panic); #ifdef KDB_TRACE static int trace_on_panic = 1; #else static int trace_on_panic = 0; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_TUN, &trace_on_panic, 0, "Print stack trace on kernel panic"); TUNABLE_INT("debug.trace_on_panic", &trace_on_panic); #endif /* KDB */ static int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW | CTLFLAG_TUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); TUNABLE_INT("kern.sync_on_panic", &sync_on_panic); static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); #ifndef DIAGNOSTIC static int show_busybufs; #else static int show_busybufs = 1; #endif SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, &show_busybufs, 0, ""); int suspend_blocked = 0; SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, &suspend_blocked, 0, "Block suspend due to a pending shutdown"); /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* * The system call that results in a reboot. */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_system_check_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { mtx_lock(&Giant); kern_reboot(uap->opt); mtx_unlock(&Giant); } return (error); } /* * Called by events that want to shut down.. e.g on a PC */ static int shutdown_howto = 0; void shutdown_nice(int howto) { shutdown_howto = howto; /* Send a signal to init(8) and have it shutdown the world */ if (initproc != NULL) { PROC_LOCK(initproc); kern_psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } else { /* No init(8) running, so simply reboot */ kern_reboot(RB_NOSYNC); } return; } static int waittime = -1; static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } int doadump(boolean_t textdump) { boolean_t coredump; if (dumping) return (EBUSY); if (dumper.dumper == NULL) return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; coredump = TRUE; #ifdef DDB if (textdump && textdump_pending) { coredump = FALSE; textdump_dumpsys(&dumper); } #endif if (coredump) dumpsys(&dumper); dumping--; return (0); } static int isbufbusy(struct buf *bp) { if (((bp->b_flags & (B_INVAL | B_PERSISTENT)) == 0 && BUF_ISLOCKED(bp)) || ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)) return (1); return (0); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int first_buf_printf = 1; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* collect extra flags that shutdown_nice might have set */ howto |= shutdown_howto; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { register struct buf *bp; int iter, nbusy, pbusy; #ifndef PREEMPTION int subiter; #endif waittime = 0; wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if (isbufbusy(bp)) nbusy++; if (nbusy == 0) { if (first_buf_printf) printf("All buffers synced."); break; } if (first_buf_printf) { printf("Syncing disks, buffers remaining... "); first_buf_printf = 0; } printf("%d ", nbusy); if (nbusy < pbusy) iter = 0; pbusy = nbusy; wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); #ifdef PREEMPTION /* * Drop Giant and spin for a while to allow * interrupt threads to run. */ DROP_GIANT(); DELAY(50000 * iter); PICKUP_GIANT(); #else /* * Drop Giant and context switch several times to * allow interrupt threads to run. */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { thread_lock(curthread); mi_switch(SW_VOL, NULL); thread_unlock(curthread); DELAY(1000); } PICKUP_GIANT(); #endif } printf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if (isbufbusy(bp)) { #if 0 /* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ if (bp->b_dev == NULL) { TAILQ_REMOVE(&mountlist, bp->b_vp->v_mount, mnt_list); continue; } #endif nbusy++; if (show_busybufs > 0) { printf( "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:", nbusy, bp, bp->b_vp, bp->b_flags, (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); BUF_LOCKPRINTINFO(bp); if (show_busybufs > 1) vn_printf(bp->b_vp, "vnode content: "); } } } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("Giving up on %d buffers\n", nbusy); DELAY(5000000); /* 5 seconds */ } else { if (!first_buf_printf) printf("Final sync complete\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } swapoff_all(); DELAY(100000); /* wait for console output to finish */ } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (panic_reboot_wait_time != 0) { if (panic_reboot_wait_time != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", panic_reboot_wait_time); for (loop = panic_reboot_wait_time * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* * Acquiring smp_ipi_mtx here has a double effect: * - it disables interrupts avoiding CPU0 preemption * by fast handlers (thus deadlocking against other CPUs) * - it avoids deadlocks against smp_rendezvous() or, more * generally, threads busy-waiting, with this spinlock held, * and waiting for responses by threads on other CPUs * (ie. smp_tlb_shootdown()). * * For the !SMP case it just needs to handle the former problem. */ #ifdef SMP mtx_lock_spin(&smp_ipi_mtx); #else spinlock_enter(); #endif /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #if defined(WITNESS) || defined(INVARIANTS) static int kassert_warn_only = 0; #ifdef KDB static int kassert_do_kdb = 0; #endif #ifdef KTR static int kassert_do_ktr = 0; #endif static int kassert_do_log = 1; static int kassert_log_pps_limit = 4; static int kassert_log_mute_at = 0; static int kassert_log_panic_at = 0; static int kassert_warnings = 0; SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RW | CTLFLAG_TUN, &kassert_warn_only, 0, "KASSERT triggers a panic (1) or just a warning (0)"); TUNABLE_INT("debug.kassert.warn_only", &kassert_warn_only); #ifdef KDB SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RW | CTLFLAG_TUN, &kassert_do_kdb, 0, "KASSERT will enter the debugger"); TUNABLE_INT("debug.kassert.do_kdb", &kassert_do_kdb); #endif #ifdef KTR SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RW | CTLFLAG_TUN, &kassert_do_ktr, 0, "KASSERT does a KTR, set this to the KTRMASK you want"); TUNABLE_INT("debug.kassert.do_ktr", &kassert_do_ktr); #endif SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RW | CTLFLAG_TUN, &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); TUNABLE_INT("debug.kassert.do_log", &kassert_do_log); SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RW | CTLFLAG_TUN, &kassert_warnings, 0, "number of KASSERTs that have been triggered"); TUNABLE_INT("debug.kassert.warnings", &kassert_warnings); SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RW | CTLFLAG_TUN, &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); TUNABLE_INT("debug.kassert.log_panic_at", &kassert_log_panic_at); SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RW | CTLFLAG_TUN, &kassert_log_pps_limit, 0, "limit number of log messages per second"); TUNABLE_INT("debug.kassert.log_pps_limit", &kassert_log_pps_limit); SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RW | CTLFLAG_TUN, &kassert_log_mute_at, 0, "max number of KASSERTS to log"); TUNABLE_INT("debug.kassert.log_mute_at", &kassert_log_mute_at); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, kassert_sysctl_kassert, "I", "set to trigger a test kassert"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); return (0); } /* * Called by KASSERT, this decides if we will panic * or if we will log via printf and/or ktr. */ void kassert_panic(const char *fmt, ...) { static char buf[256]; va_list ap; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); /* * panic if we're not just warning, or if we've exceeded * kassert_log_panic_at warnings. */ if (!kassert_warn_only || (kassert_log_panic_at > 0 && kassert_warnings >= kassert_log_panic_at)) { va_start(ap, fmt); vpanic(fmt, ap); /* NORETURN */ } #ifdef KTR if (kassert_do_ktr) CTR0(ktr_mask, buf); #endif /* KTR */ /* * log if we've not yet met the mute limit. */ if (kassert_do_log && (kassert_log_mute_at == 0 || kassert_warnings < kassert_log_mute_at)) { static struct timeval lasterr; static int curerr; if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { printf("KASSERT failed: %s\n", buf); kdb_backtrace(); } } #ifdef KDB if (kassert_do_kdb) { kdb_enter(KDB_WHY_KASSERT, buf); } #endif atomic_add_int(&kassert_warnings, 1); } #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vpanic(fmt, ap); } void vpanic(const char *fmt, va_list ap) { #ifdef SMP cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; static char buf[256]; spinlock_enter(); #ifdef SMP /* * stop_cpus_hard(other_cpus) should prevent multiple CPUs from * concurrently entering panic. Only the winner will proceed * further. */ if (panicstr == NULL && !kdb_active) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); } /* * We set stop_scheduler here and not in the block above, * because we want to ensure that if panic has been called and * stop_scheduler_on_panic is true, then stop_scheduler will * always be set. Even if panic has been entered from kdb. */ td->td_stopsched = 1; #endif bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; cngrab(); printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) kdb_enter(KDB_WHY_PANIC, "panic"); #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; kern_reboot(bootopt); } /* * Support for poweroff delay. * * Please note that setting this delay too short might power off your machine * before the write cache on your hard disk has been flushed, leading to * soft-updates inconsistencies. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); static void poweroff_wait(void *junk, int howto) { if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); void kproc_shutdown(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop...", kproc_shutdown_wait, p->p_comm); error = kproc_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } void kthread_shutdown(void *arg, int howto) { struct thread *td; int error; if (panicstr) return; td = (struct thread *)arg; printf("Waiting (max %d seconds) for system thread `%s' to stop...", kproc_shutdown_wait, td->td_name); error = kthread_suspend(td, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, "Device for kernel dumps"); /* Registration of dumpers */ int -set_dumper(struct dumperinfo *di, const char *devname) +set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) { size_t wantcopy; + int error; + + error = priv_check(td, PRIV_SETDUMPER); + if (error != 0) + return (error); if (di == NULL) { bzero(&dumper, sizeof dumper); dumpdevname[0] = '\0'; return (0); } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); if (wantcopy >= sizeof(dumpdevname)) { printf("set_dumper: device name truncated from '%s' -> '%s'\n", devname, dumpdevname); } return (0); } /* Call dumper with bounds checking. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { if (length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { printf("Attempt to write outside dump device boundaries.\n" "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", (intmax_t)offset, (intmax_t)di->mediaoffset, (uintmax_t)length, (intmax_t)di->mediasize); return (ENOSPC); } return (di->dumper(di->priv, virtual, physical, offset, length)); } void mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, uint64_t dumplen, uint32_t blksz) { bzero(kdh, sizeof(*kdh)); strlcpy(kdh->magic, magic, sizeof(kdh->magic)); strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); kdh->version = htod32(KERNELDUMPVERSION); kdh->architectureversion = htod32(archver); kdh->dumplength = htod64(dumplen); kdh->dumptime = htod64(time_second); kdh->blocksize = htod32(blksz); strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); if (panicstr != NULL) strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); kdh->parity = kerneldump_parity(kdh); } Index: stable/10/sys/sys/conf.h =================================================================== --- stable/10/sys/sys/conf.h (revision 291214) +++ stable/10/sys/sys/conf.h (revision 291215) @@ -1,348 +1,348 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2000 * Poul-Henning Kamp. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)conf.h 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_CONF_H_ #define _SYS_CONF_H_ #ifdef _KERNEL #include #else #include #endif struct snapdata; struct devfs_dirent; struct cdevsw; struct file; struct cdev { void *si_spare0; u_int si_flags; #define SI_ETERNAL 0x0001 /* never destroyed */ #define SI_ALIAS 0x0002 /* carrier of alias name */ #define SI_NAMED 0x0004 /* make_dev{_alias} has been called */ #define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */ #define SI_CHILD 0x0010 /* child of another struct cdev **/ #define SI_DUMPDEV 0x0080 /* is kernel dumpdev */ #define SI_CLONELIST 0x0200 /* on a clone list */ #define SI_UNMAPPED 0x0400 /* can handle unmapped I/O */ #define SI_NOSPLIT 0x0800 /* I/O should not be split up */ struct timespec si_atime; struct timespec si_ctime; struct timespec si_mtime; uid_t si_uid; gid_t si_gid; mode_t si_mode; struct ucred *si_cred; /* cached clone-time credential */ int si_drv0; int si_refcount; LIST_ENTRY(cdev) si_list; LIST_ENTRY(cdev) si_clone; LIST_HEAD(, cdev) si_children; LIST_ENTRY(cdev) si_siblings; struct cdev *si_parent; struct mount *si_mountpt; void *si_drv1, *si_drv2; struct cdevsw *si_devsw; int si_iosize_max; /* maximum I/O size (for physio &al) */ u_long si_usecount; u_long si_threadcount; union { struct snapdata *__sid_snapdata; } __si_u; char si_name[SPECNAMELEN + 1]; }; #define si_snapdata __si_u.__sid_snapdata #ifdef _KERNEL /* * Definitions of device driver entry switches */ struct bio; struct buf; struct thread; struct uio; struct knote; struct clonedevs; struct vm_object; struct vnode; /* * Note: d_thread_t is provided as a transition aid for those drivers * that treat struct proc/struct thread as an opaque data type and * exist in substantially the same form in both 4.x and 5.x. Writers * of drivers that dips into the d_thread_t structure should use * struct thread or struct proc as appropriate for the version of the * OS they are using. It is provided in lieu of each device driver * inventing its own way of doing this. While it does violate style(9) * in a number of ways, this violation is deemed to be less * important than the benefits that a uniform API between releases * gives. * * Users of struct thread/struct proc that aren't device drivers should * not use d_thread_t. */ typedef struct thread d_thread_t; typedef int d_open_t(struct cdev *dev, int oflags, int devtype, struct thread *td); typedef int d_fdopen_t(struct cdev *dev, int oflags, struct thread *td, struct file *fp); typedef int d_close_t(struct cdev *dev, int fflag, int devtype, struct thread *td); typedef void d_strategy_t(struct bio *bp); typedef int d_ioctl_t(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); typedef int d_read_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_write_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_poll_t(struct cdev *dev, int events, struct thread *td); typedef int d_kqfilter_t(struct cdev *dev, struct knote *kn); typedef int d_mmap_t(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr); typedef int d_mmap_single_t(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot); typedef void d_purge_t(struct cdev *dev); typedef int dumper_t( void *_priv, /* Private to the driver. */ void *_virtual, /* Virtual (mapped) address. */ vm_offset_t _physical, /* Physical address of virtual. */ off_t _offset, /* Byte-offset to write at. */ size_t _length); /* Number of bytes to dump. */ #endif /* _KERNEL */ /* * Types for d_flags. */ #define D_TAPE 0x0001 #define D_DISK 0x0002 #define D_TTY 0x0004 #define D_MEM 0x0008 #ifdef _KERNEL #define D_TYPEMASK 0xffff /* * Flags for d_flags which the drivers can set. */ #define D_TRACKCLOSE 0x00080000 /* track all closes */ #define D_MMAP_ANON 0x00100000 /* special treatment in vm_mmap.c */ #define D_NEEDGIANT 0x00400000 /* driver want Giant */ #define D_NEEDMINOR 0x00800000 /* driver uses clone_create() */ /* * Version numbers. */ #define D_VERSION_00 0x20011966 #define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */ #define D_VERSION_02 0x28042009 /* Add d_mmap_single */ #define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */ #define D_VERSION D_VERSION_03 /* * Flags used for internal housekeeping */ #define D_INIT 0x80000000 /* cdevsw initialized */ /* * Character device switch table */ struct cdevsw { int d_version; u_int d_flags; const char *d_name; d_open_t *d_open; d_fdopen_t *d_fdopen; d_close_t *d_close; d_read_t *d_read; d_write_t *d_write; d_ioctl_t *d_ioctl; d_poll_t *d_poll; d_mmap_t *d_mmap; d_strategy_t *d_strategy; dumper_t *d_dump; d_kqfilter_t *d_kqfilter; d_purge_t *d_purge; d_mmap_single_t *d_mmap_single; int32_t d_spare0[3]; void *d_spare1[3]; /* These fields should not be messed with by drivers */ LIST_HEAD(, cdev) d_devs; int d_spare2; union { struct cdevsw *gianttrick; SLIST_ENTRY(cdevsw) postfree_list; } __d_giant; }; #define d_gianttrick __d_giant.gianttrick #define d_postfree_list __d_giant.postfree_list struct module; struct devsw_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ /* Do not initialize fields hereafter */ }; #define DEV_MODULE_ORDERED(name, evh, arg, ord) \ static moduledata_t name##_mod = { \ #name, \ evh, \ arg \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, ord) #define DEV_MODULE(name, evh, arg) \ DEV_MODULE_ORDERED(name, evh, arg, SI_ORDER_MIDDLE) void clone_setup(struct clonedevs **cdp); void clone_cleanup(struct clonedevs **); #define CLONE_UNITMASK 0xfffff #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); int count_dev(struct cdev *_dev); void delist_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); int destroy_dev_sched(struct cdev *dev); int destroy_dev_sched_cb(struct cdev *dev, void (*cb)(void *), void *arg); void destroy_dev_drain(struct cdevsw *csw); void drain_dev_clone_events(void); struct cdevsw *dev_refthread(struct cdev *_dev, int *_ref); struct cdevsw *devvn_refthread(struct vnode *vp, struct cdev **devp, int *_ref); void dev_relthread(struct cdev *_dev, int _ref); void dev_depends(struct cdev *_pdev, struct cdev *_cdev); void dev_ref(struct cdev *dev); void dev_refl(struct cdev *dev); void dev_rel(struct cdev *dev); void dev_strategy(struct cdev *dev, struct buf *bp); void dev_strategy_csw(struct cdev *dev, struct cdevsw *csw, struct buf *bp); struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(6, 7); struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); #define MAKEDEV_REF 0x01 #define MAKEDEV_WHTOUT 0x02 #define MAKEDEV_NOWAIT 0x04 #define MAKEDEV_WAITOK 0x08 #define MAKEDEV_ETERNAL 0x10 #define MAKEDEV_CHECKNAME 0x20 struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3); int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev, const char *_fmt, ...) __printflike(4, 5); int make_dev_physpath_alias(int _flags, struct cdev **_cdev, struct cdev *_pdev, struct cdev *_old_alias, const char *_physpath); void dev_lock(void); void dev_unlock(void); void setconf(void); #ifdef KLD_MODULE #define MAKEDEV_ETERNAL_KLD 0 #else #define MAKEDEV_ETERNAL_KLD MAKEDEV_ETERNAL #endif #define dev2unit(d) ((d)->si_drv0) typedef void (*cdevpriv_dtr_t)(void *data); int devfs_get_cdevpriv(void **datap); int devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t dtr); void devfs_clear_cdevpriv(void); void devfs_fpdrop(struct file *fp); /* XXX This is not public KPI */ ino_t devfs_alloc_cdp_inode(void); void devfs_free_cdp_inode(ino_t ino); #define UID_ROOT 0 #define UID_BIN 3 #define UID_UUCP 66 #define UID_NOBODY 65534 #define GID_WHEEL 0 #define GID_KMEM 2 #define GID_TTY 4 #define GID_OPERATOR 5 #define GID_BIN 7 #define GID_GAMES 13 #define GID_DIALER 68 #define GID_NOBODY 65534 typedef void (*dev_clone_fn)(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **result); int dev_stdclone(char *_name, char **_namep, const char *_stem, int *_unit); EVENTHANDLER_DECLARE(dev_clone, dev_clone_fn); /* Stuff relating to kernel-dump */ struct dumperinfo { dumper_t *dumper; /* Dumping function. */ void *priv; /* Private parts. */ u_int blocksize; /* Size of block in bytes. */ u_int maxiosize; /* Max size allowed for an individual I/O */ off_t mediaoffset; /* Initial offset in bytes. */ off_t mediasize; /* Space available in bytes. */ }; -int set_dumper(struct dumperinfo *, const char *_devname); +int set_dumper(struct dumperinfo *, const char *_devname, struct thread *td); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); void dumpsys(struct dumperinfo *); int doadump(boolean_t); extern int dumping; /* system is dumping */ #endif /* _KERNEL */ #endif /* !_SYS_CONF_H_ */ Index: stable/10 =================================================================== --- stable/10 (revision 291214) +++ stable/10 (revision 291215) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r274366