diff --git a/sbin/mdconfig/mdconfig.8 b/sbin/mdconfig/mdconfig.8 index d5000cf1903a..a437e40d58ec 100644 --- a/sbin/mdconfig/mdconfig.8 +++ b/sbin/mdconfig/mdconfig.8 @@ -1,320 +1,329 @@ .\" Copyright (c) 1993 University of Utah. .\" Copyright (c) 1980, 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" Copyright (c) 2000 .\" Poul-Henning Kamp All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" the Systems Programming Group of the University of Utah Computer .\" Science Department. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)vnconfig.8 8.1 (Berkeley) 6/5/93 .\" from: src/usr.sbin/vnconfig/vnconfig.8,v 1.19 2000/12/27 15:30:29 .\" .\" $FreeBSD$ .\" .Dd October 10, 2015 .Dt MDCONFIG 8 .Os .Sh NAME .Nm mdconfig .Nd create and control memory disks .Sh SYNOPSIS .Nm .Fl a .Fl t Ar type .Op Fl n .Oo Fl o Oo Cm no Oc Ns Ar option Oc ... .Op Fl f Ar file .Op Fl s Ar size .Op Fl S Ar sectorsize .Op Fl u Ar unit .Op Fl x Ar sectors/track .Op Fl y Ar heads/cylinder .Nm .Fl d .Fl u Ar unit .Op Fl o Oo Cm no Oc Ns Ar force .Nm .Fl r .Fl u Ar unit .Fl s Ar size .Op Fl o Oo Cm no Oc Ns Ar force .Nm .Fl l .Op Fl n .Op Fl v .Op Fl f Ar file .Op Fl u Ar unit .Nm .Ar file .Sh DESCRIPTION The .Nm utility creates and controls .Xr md 4 devices. .Pp Options indicate an action to be performed: .Bl -tag -width indent .It Fl a Attach a memory disk. This will configure and attach a memory disk with the parameters specified and attach it to the system. If the .Fl u Ar unit option is not provided, the newly created device name will be printed on stdout. .It Fl d Detach a memory disk from the system and release all resources. .It Fl r Resize a memory disk. .It Fl t Ar type Select the type of the memory disk. .Bl -tag -width "malloc" .It Cm malloc Storage for this type of memory disk is allocated with .Xr malloc 9 . This limits the size to the malloc bucket limit in the kernel. If the .Fl o Cm reserve option is not set, creating and filling a large malloc-backed memory disk is a very easy way to panic the system. .It Cm vnode A file specified with .Fl f Ar file becomes the backing store for this memory disk. .It Cm swap Storage for this type of memory disk is allocated from buffer memory. Pages get pushed out to swap when the system is under memory pressure, otherwise they stay in the operating memory. Using .Cm swap backing is generally preferred instead of using .Cm malloc backing. .It Cm null Bitsink; all writes do nothing, all reads return zeroes. .El .It Fl f Ar file Filename to use for the vnode type memory disk. The .Fl a and .Fl t Ar vnode options are implied if not specified. .It Fl l List configured devices. If given with .Fl u , display details about that particular device. If given with .Fl f Ar file , display .Xr md 4 device names of which .Ar file is used as the backing store. If both of .Fl u and .Fl f options are specified, display devices which match the two conditions. If the .Fl v option is specified, show all details. .It Fl n When printing .Xr md 4 device names, print only the unit number without the .Xr md 4 prefix. .It Fl s Ar size Size of the memory disk. .Ar Size is the number of 512 byte sectors unless suffixed with a .Cm b , k , m , g , t , or .Cm p which denotes byte, kilobyte, megabyte, gigabyte, terabyte and petabyte respectively. When used without the .Fl r option, the .Fl a and .Fl t Ar swap options are implied if not specified. .It Fl S Ar sectorsize Sectorsize to use for the memory disk, in bytes. .It Fl x Ar sectors/track See the description of the .Fl y option below. .It Fl y Ar heads/cylinder For .Cm malloc or .Cm vnode backed devices, the .Fl x and .Fl y options can be used to specify a synthetic geometry. This is useful for constructing bootable images for later download to other devices. .It Fl o Oo Cm no Oc Ns Ar option Set or reset options. .Bl -tag -width indent .It Oo Cm no Oc Ns Cm async For .Cm vnode backed devices: avoid .Dv IO_SYNC for increased performance but at the risk of deadlocking the entire kernel. .It Oo Cm no Oc Ns Cm reserve Allocate and reserve all needed storage from the start, rather than as needed. .It Oo Cm no Oc Ns Cm cluster Enable clustering on this disk. .It Oo Cm no Oc Ns Cm compress Enable/disable compression features to reduce memory usage. .It Oo Cm no Oc Ns Cm force Disable/enable extra sanity checks to prevent the user from doing something that might adversely affect the system. This can be used with the .Fl d flag to forcibly destroy an .Xr md 4 disk that is still in use. .It Oo Cm no Oc Ns Cm readonly Enable/disable readonly mode. +.It Oo Cm no Oc Ns Cm verify +For +.Cm vnode +backed devices: enable/disable requesting verification of the +file used for backing store. +The type of verification depends on which security features are available. +One example of verification is testing file integrity with +checksums or cryptographic signatures. .El .It Fl u Ar unit Request a specific unit number or device name for the .Xr md 4 device instead of automatic allocation. If a device name is specified, it must be start with .Dq md followed by the unit number. .El .Pp The last form, .Nm .Ar file , is provided for convenience as an abbreviation of .Nm .Fl a .Fl t Ar vnode .Fl f Ar file . .Sh EXAMPLES Create a disk with .Pa /tmp/boot.flp as backing storage. The name of the allocated unit will be printed on stdout, such as .Dq Li md0 : .Bd -literal -offset indent mdconfig /tmp/boot.flp .Ed .Pp Create a 1 gigabyte swap backed memory disk named .Dq Li md3 : .Bd -literal -offset indent mdconfig -s 1g -u md3 .Ed .Pp Detach and free all resources used by .Pa /dev/md3 : .Bd -literal -offset indent mdconfig -du md3 .Ed .Pp Show detailed information on current memory disks: .Bd -literal -offset indent mdconfig -lv .Ed .Pp Resize the .Dq Li md3 memory disk to 2 gigabytes: .Bd -literal -offset indent mdconfig -rs 2g -u md3 .Ed .Pp Create a 1 gigabyte swap backed disk, initialize an .Xr ffs 7 file system on it, and mount it on .Pa /tmp : .Bd -literal -offset indent mdconfig -s 1g -u md10 newfs -U /dev/md10 mount /dev/md10 /tmp chmod 1777 /tmp .Ed .Pp Create a memory disk out of an ISO 9660 CD image file, using the first available .Xr md 4 device, and then mount it: .Bd -literal -offset indent mount -t cd9660 /dev/`mdconfig -f cdimage.iso` /mnt .Ed .Pp Create a file-backed device from a hard disk image that begins with 512K of raw header information. .Xr gnop 8 is used to skip over the header information, positioning .Pa md1.nop to the start of the filesystem in the image. .Bd -literal -offset indent mdconfig -u md1 -f diskimage.img gnop create -o 512K md1 mount /dev/md1.nop /mnt .Ed .Sh SEE ALSO +.Xr open 2 , .Xr md 4 , .Xr ffs 7 , .Xr gpart 8 , .Xr mdmfs 8 , .Xr malloc 9 .Sh HISTORY The .Nm utility first appeared in .Fx 5.0 as a cleaner replacement for the .Xr vn 4 and .Xr vnconfig 8 combo. .Sh AUTHORS The .Nm utility was written by .An Poul-Henning Kamp Aq Mt phk@FreeBSD.org . diff --git a/sbin/mdconfig/mdconfig.c b/sbin/mdconfig/mdconfig.c index f1c013c8b9fe..12c2a82c44d7 100644 --- a/sbin/mdconfig/mdconfig.c +++ b/sbin/mdconfig/mdconfig.c @@ -1,572 +1,576 @@ /*- * Copyright (c) 2000-2004 Poul-Henning Kamp * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct md_ioctl mdio; static enum {UNSET, ATTACH, DETACH, RESIZE, LIST} action = UNSET; static int nflag; static void usage(void); static void md_set_file(const char *); static int md_find(const char *, const char *); static int md_query(const char *, const int, const char *); static int md_list(const char *, int, const char *); static char *geom_config_get(struct gconf *g, const char *name); static void md_prthumanval(char *length); #define OPT_VERBOSE 0x01 #define OPT_UNIT 0x02 #define OPT_DONE 0x04 #define OPT_LIST 0x10 #define CLASS_NAME_MD "MD" static void usage(void) { fprintf(stderr, "usage: mdconfig -a -t type [-n] [-o [no]option] ... [-f file]\n" " [-s size] [-S sectorsize] [-u unit]\n" " [-x sectors/track] [-y heads/cylinder]\n" " mdconfig -d -u unit [-o [no]force]\n" " mdconfig -r -u unit -s size [-o [no]force]\n" " mdconfig -l [-v] [-n] [-f file] [-u unit]\n" " mdconfig file\n"); fprintf(stderr, "\t\ttype = {malloc, vnode, swap}\n"); fprintf(stderr, "\t\toption = {cluster, compress, reserve}\n"); fprintf(stderr, "\t\tsize = %%d (512 byte blocks), %%db (B),\n"); fprintf(stderr, "\t\t %%dk (kB), %%dm (MB), %%dg (GB), \n"); fprintf(stderr, "\t\t %%dt (TB), or %%dp (PB)\n"); exit(1); } int main(int argc, char **argv) { int ch, fd, i, vflag; char *p; char *fflag = NULL, *sflag = NULL, *tflag = NULL, *uflag = NULL; bzero(&mdio, sizeof(mdio)); mdio.md_file = malloc(PATH_MAX); if (mdio.md_file == NULL) err(1, "could not allocate memory"); vflag = 0; bzero(mdio.md_file, PATH_MAX); if (argc == 1) usage(); while ((ch = getopt(argc, argv, "ab:df:lno:rs:S:t:u:vx:y:")) != -1) { switch (ch) { case 'a': if (action != UNSET && action != ATTACH) errx(1, "-a is mutually exclusive " "with -d, -r, and -l"); action = ATTACH; break; case 'd': if (action != UNSET && action != DETACH) errx(1, "-d is mutually exclusive " "with -a, -r, and -l"); action = DETACH; mdio.md_options |= MD_AUTOUNIT; break; case 'r': if (action != UNSET && action != RESIZE) errx(1, "-r is mutually exclusive " "with -a, -d, and -l"); action = RESIZE; mdio.md_options |= MD_AUTOUNIT; break; case 'l': if (action != UNSET && action != LIST) errx(1, "-l is mutually exclusive " "with -a, -r, and -d"); action = LIST; mdio.md_options |= MD_AUTOUNIT; break; case 'n': nflag = 1; break; case 't': if (tflag != NULL) errx(1, "-t can be passed only once"); tflag = optarg; if (!strcmp(optarg, "malloc")) { mdio.md_type = MD_MALLOC; mdio.md_options |= MD_AUTOUNIT | MD_COMPRESS; } else if (!strcmp(optarg, "vnode")) { mdio.md_type = MD_VNODE; mdio.md_options |= MD_CLUSTER | MD_AUTOUNIT | MD_COMPRESS; } else if (!strcmp(optarg, "swap")) { mdio.md_type = MD_SWAP; mdio.md_options |= MD_CLUSTER | MD_AUTOUNIT | MD_COMPRESS; } else if (!strcmp(optarg, "null")) { mdio.md_type = MD_NULL; mdio.md_options |= MD_CLUSTER | MD_AUTOUNIT | MD_COMPRESS; } else errx(1, "unknown type: %s", optarg); break; case 'f': if (fflag != NULL) errx(1, "-f can be passed only once"); fflag = realpath(optarg, NULL); if (fflag == NULL) err(1, "realpath"); break; case 'o': if (!strcmp(optarg, "async")) mdio.md_options |= MD_ASYNC; else if (!strcmp(optarg, "noasync")) mdio.md_options &= ~MD_ASYNC; else if (!strcmp(optarg, "cluster")) mdio.md_options |= MD_CLUSTER; else if (!strcmp(optarg, "nocluster")) mdio.md_options &= ~MD_CLUSTER; else if (!strcmp(optarg, "compress")) mdio.md_options |= MD_COMPRESS; else if (!strcmp(optarg, "nocompress")) mdio.md_options &= ~MD_COMPRESS; else if (!strcmp(optarg, "force")) mdio.md_options |= MD_FORCE; else if (!strcmp(optarg, "noforce")) mdio.md_options &= ~MD_FORCE; else if (!strcmp(optarg, "readonly")) mdio.md_options |= MD_READONLY; else if (!strcmp(optarg, "noreadonly")) mdio.md_options &= ~MD_READONLY; else if (!strcmp(optarg, "reserve")) mdio.md_options |= MD_RESERVE; else if (!strcmp(optarg, "noreserve")) mdio.md_options &= ~MD_RESERVE; + else if (!strcmp(optarg, "verify")) + mdio.md_options |= MD_VERIFY; + else if (!strcmp(optarg, "noverify")) + mdio.md_options &= ~MD_VERIFY; else errx(1, "unknown option: %s", optarg); break; case 'S': mdio.md_sectorsize = strtoul(optarg, &p, 0); break; case 's': if (sflag != NULL) errx(1, "-s can be passed only once"); sflag = optarg; mdio.md_mediasize = (off_t)strtoumax(optarg, &p, 0); if (p == NULL || *p == '\0') mdio.md_mediasize *= DEV_BSIZE; else if (*p == 'b' || *p == 'B') ; /* do nothing */ else if (*p == 'k' || *p == 'K') mdio.md_mediasize <<= 10; else if (*p == 'm' || *p == 'M') mdio.md_mediasize <<= 20; else if (*p == 'g' || *p == 'G') mdio.md_mediasize <<= 30; else if (*p == 't' || *p == 'T') { mdio.md_mediasize <<= 30; mdio.md_mediasize <<= 10; } else if (*p == 'p' || *p == 'P') { mdio.md_mediasize <<= 30; mdio.md_mediasize <<= 20; } else errx(1, "unknown suffix on -s argument"); break; case 'u': if (!strncmp(optarg, _PATH_DEV, sizeof(_PATH_DEV) - 1)) optarg += sizeof(_PATH_DEV) - 1; if (!strncmp(optarg, MD_NAME, sizeof(MD_NAME) - 1)) optarg += sizeof(MD_NAME) - 1; uflag = optarg; break; case 'v': vflag = OPT_VERBOSE; break; case 'x': mdio.md_fwsectors = strtoul(optarg, &p, 0); break; case 'y': mdio.md_fwheads = strtoul(optarg, &p, 0); break; default: usage(); } } argc -= optind; argv += optind; if (action == UNSET) action = ATTACH; if (action == ATTACH) { if (tflag == NULL) { /* * Try to infer the type based on other arguments. */ if (fflag != NULL || argc > 0) { /* Imply ``-t vnode'' */ mdio.md_type = MD_VNODE; mdio.md_options |= MD_CLUSTER | MD_AUTOUNIT | MD_COMPRESS; } else if (sflag != NULL) { /* Imply ``-t swap'' */ mdio.md_type = MD_SWAP; mdio.md_options |= MD_CLUSTER | MD_AUTOUNIT | MD_COMPRESS; } else errx(1, "unable to determine type"); } if ((fflag != NULL || argc > 0) && mdio.md_type != MD_VNODE) errx(1, "only -t vnode can be used with file name"); if (mdio.md_type == MD_VNODE) { if (fflag != NULL) { if (argc != 0) usage(); md_set_file(fflag); } else { if (argc != 1) usage(); md_set_file(*argv); } if ((mdio.md_options & MD_READONLY) == 0 && access(mdio.md_file, W_OK) < 0 && (errno == EACCES || errno == EPERM || errno == EROFS)) { warnx("WARNING: opening backing store: %s " "readonly", mdio.md_file); mdio.md_options |= MD_READONLY; } } if ((mdio.md_type == MD_MALLOC || mdio.md_type == MD_SWAP || mdio.md_type == MD_NULL) && sflag == NULL) errx(1, "must specify -s for -t malloc, -t swap, " "or -t null"); if (mdio.md_type == MD_VNODE && mdio.md_file[0] == '\0') errx(1, "must specify -f for -t vnode"); } else { if (mdio.md_sectorsize != 0) errx(1, "-S can only be used with -a"); if (action != RESIZE && sflag != NULL) errx(1, "-s can only be used with -a and -r"); if (mdio.md_fwsectors != 0) errx(1, "-x can only be used with -a"); if (mdio.md_fwheads != 0) errx(1, "-y can only be used with -a"); if (fflag != NULL && action != LIST) errx(1, "-f can only be used with -a and -l"); if (tflag != NULL) errx(1, "-t can only be used with -a"); if (argc > 0) errx(1, "file can only be used with -a"); if ((action != DETACH && action != RESIZE) && (mdio.md_options & ~MD_AUTOUNIT) != 0) errx(1, "-o can only be used with -a, -d, and -r"); if (action == DETACH && (mdio.md_options & ~(MD_FORCE | MD_AUTOUNIT)) != 0) errx(1, "only -o [no]force can be used with -d"); if (action == RESIZE && (mdio.md_options & ~(MD_FORCE | MD_RESERVE | MD_AUTOUNIT)) != 0) errx(1, "only -o [no]force and -o [no]reserve can be used with -r"); } if (action == RESIZE && sflag == NULL) errx(1, "must specify -s for -r"); if (action != LIST && vflag == OPT_VERBOSE) errx(1, "-v can only be used with -l"); if (uflag != NULL) { mdio.md_unit = strtoul(uflag, &p, 0); if (mdio.md_unit == (unsigned)ULONG_MAX || *p != '\0') errx(1, "bad unit: %s", uflag); mdio.md_options &= ~MD_AUTOUNIT; } mdio.md_version = MDIOVERSION; if (!kld_isloaded("g_md") && kld_load("geom_md") == -1) err(1, "failed to load geom_md module"); fd = open(_PATH_DEV MDCTL_NAME, O_RDWR, 0); if (fd < 0) err(1, "open(%s%s)", _PATH_DEV, MDCTL_NAME); if (action == ATTACH) { i = ioctl(fd, MDIOCATTACH, &mdio); if (i < 0) err(1, "ioctl(%s%s)", _PATH_DEV, MDCTL_NAME); if (mdio.md_options & MD_AUTOUNIT) printf("%s%d\n", nflag ? "" : MD_NAME, mdio.md_unit); } else if (action == DETACH) { if (mdio.md_options & MD_AUTOUNIT) errx(1, "-d requires -u"); i = ioctl(fd, MDIOCDETACH, &mdio); if (i < 0) err(1, "ioctl(%s%s)", _PATH_DEV, MDCTL_NAME); } else if (action == RESIZE) { if (mdio.md_options & MD_AUTOUNIT) errx(1, "-r requires -u"); i = ioctl(fd, MDIOCRESIZE, &mdio); if (i < 0) err(1, "ioctl(%s%s)", _PATH_DEV, MDCTL_NAME); } else if (action == LIST) { if (mdio.md_options & MD_AUTOUNIT) { /* * Listing all devices. This is why we pass NULL * together with OPT_LIST. */ return (md_list(NULL, OPT_LIST | vflag, fflag)); } else return (md_query(uflag, vflag, fflag)); } else usage(); close(fd); return (0); } static void md_set_file(const char *fn) { struct stat sb; int fd; if (realpath(fn, mdio.md_file) == NULL) err(1, "could not find full path for %s", fn); fd = open(mdio.md_file, O_RDONLY); if (fd < 0) err(1, "could not open %s", fn); if (fstat(fd, &sb) == -1) err(1, "could not stat %s", fn); if (!S_ISREG(sb.st_mode)) errx(1, "%s is not a regular file", fn); if (mdio.md_mediasize == 0) mdio.md_mediasize = sb.st_size; close(fd); } /* * Lists md(4) disks. Is used also as a query routine, since it handles XML * interface. 'units' can be NULL for listing memory disks. It might be * coma-separated string containing md(4) disk names. 'opt' distinguished * between list and query mode. */ static int md_list(const char *units, int opt, const char *fflag) { struct gmesh gm; struct gprovider *pp; struct gconf *gc; struct gident *gid; struct devstat *gsp; struct ggeom *gg; struct gclass *gcl; void *sq; int retcode, ffound, ufound; char *type, *file, *length; type = file = length = NULL; retcode = geom_gettree(&gm); if (retcode != 0) return (-1); retcode = geom_stats_open(); if (retcode != 0) return (-1); sq = geom_stats_snapshot_get(); if (sq == NULL) return (-1); ffound = ufound = 0; while ((gsp = geom_stats_snapshot_next(sq)) != NULL) { gid = geom_lookupid(&gm, gsp->id); if (gid == NULL) continue; if (gid->lg_what == ISPROVIDER) { pp = gid->lg_ptr; gg = pp->lg_geom; gcl = gg->lg_class; if (strcmp(gcl->lg_name, CLASS_NAME_MD) != 0) continue; if ((opt & OPT_UNIT) && (units != NULL)) { retcode = md_find(units, pp->lg_name); if (retcode != 1) continue; else ufound = 1; } gc = &pp->lg_config; type = geom_config_get(gc, "type"); if (type != NULL && (strcmp(type, "vnode") == 0 || strcmp(type, "preload") == 0)) { file = geom_config_get(gc, "file"); if (fflag != NULL && strcmp(fflag, file) != 0) continue; else ffound = 1; } else if (fflag != NULL) continue; if (nflag && strncmp(pp->lg_name, MD_NAME, 2) == 0) printf("%s", pp->lg_name + 2); else printf("%s", pp->lg_name); if (opt & OPT_VERBOSE || ((opt & OPT_UNIT) && fflag == NULL)) { length = geom_config_get(gc, "length"); printf("\t%s\t", type); if (length != NULL) md_prthumanval(length); if (file != NULL) { printf("\t%s", file); file = NULL; } } opt |= OPT_DONE; if ((opt & OPT_LIST) && !(opt & OPT_VERBOSE)) printf(" "); else printf("\n"); } } if ((opt & OPT_LIST) && (opt & OPT_DONE) && !(opt & OPT_VERBOSE)) printf("\n"); /* XXX: Check if it's enough to clean everything. */ geom_stats_snapshot_free(sq); if (opt & OPT_UNIT) { if (((fflag == NULL) && ufound) || ((fflag == NULL) && (units != NULL) && ufound) || ((fflag != NULL) && ffound) || ((fflag != NULL) && (units != NULL) && ufound && ffound)) return (0); } else if (opt & OPT_LIST) { if ((fflag == NULL) || ((fflag != NULL) && ffound)) return (0); } return (-1); } /* * Returns value of 'name' from gconfig structure. */ static char * geom_config_get(struct gconf *g, const char *name) { struct gconfig *gce; LIST_FOREACH(gce, g, lg_config) { if (strcmp(gce->lg_name, name) == 0) return (gce->lg_val); } return (NULL); } /* * List is comma separated list of MD disks. name is a * device name we look for. Returns 1 if found and 0 * otherwise. */ static int md_find(const char *list, const char *name) { int ret; char num[PATH_MAX]; char *ptr, *p, *u; ret = 0; ptr = strdup(list); if (ptr == NULL) return (-1); for (p = ptr; (u = strsep(&p, ",")) != NULL;) { if (strncmp(u, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) u += sizeof(_PATH_DEV) - 1; /* Just in case user specified number instead of full name */ snprintf(num, sizeof(num), "%s%s", MD_NAME, u); if (strcmp(u, name) == 0 || strcmp(num, name) == 0) { ret = 1; break; } } free(ptr); return (ret); } static void md_prthumanval(char *length) { char buf[6]; uintmax_t bytes; char *endptr; errno = 0; bytes = strtoumax(length, &endptr, 10); if (errno != 0 || *endptr != '\0' || bytes > INT64_MAX) return; humanize_number(buf, sizeof(buf), (int64_t)bytes, "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); (void)printf("%6s", buf); } static int md_query(const char *name, const int opt, const char *fflag) { return (md_list(name, opt | OPT_UNIT, fflag)); } diff --git a/sbin/mount/mount.c b/sbin/mount/mount.c index 0cd2f860548d..e474eac483e5 100644 --- a/sbin/mount/mount.c +++ b/sbin/mount/mount.c @@ -1,963 +1,964 @@ /*- * Copyright (c) 1980, 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1989, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #if 0 static char sccsid[] = "@(#)mount.c 8.25 (Berkeley) 5/8/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #include "mntopts.h" #include "pathnames.h" /* `meta' options */ #define MOUNT_META_OPTION_FSTAB "fstab" #define MOUNT_META_OPTION_CURRENT "current" static int debug, fstab_style, verbose; struct cpa { char **a; ssize_t sz; int c; }; char *catopt(char *, const char *); struct statfs *getmntpt(const char *); int hasopt(const char *, const char *); int ismounted(struct fstab *, struct statfs *, int); int isremountable(const char *); void mangle(char *, struct cpa *); char *update_options(char *, char *, int); int mountfs(const char *, const char *, const char *, int, const char *, const char *); void remopt(char *, const char *); void prmount(struct statfs *); void putfsent(struct statfs *); void usage(void); char *flags2opts(int); /* Map from mount options to printable formats. */ static struct opt { uint64_t o_opt; const char *o_name; } optnames[] = { { MNT_ASYNC, "asynchronous" }, { MNT_EXPORTED, "NFS exported" }, { MNT_LOCAL, "local" }, { MNT_NOATIME, "noatime" }, { MNT_NOEXEC, "noexec" }, { MNT_NOSUID, "nosuid" }, { MNT_NOSYMFOLLOW, "nosymfollow" }, { MNT_QUOTA, "with quotas" }, { MNT_RDONLY, "read-only" }, { MNT_SYNCHRONOUS, "synchronous" }, { MNT_UNION, "union" }, { MNT_NOCLUSTERR, "noclusterr" }, { MNT_NOCLUSTERW, "noclusterw" }, { MNT_SUIDDIR, "suiddir" }, { MNT_SOFTDEP, "soft-updates" }, { MNT_SUJ, "journaled soft-updates" }, { MNT_MULTILABEL, "multilabel" }, { MNT_ACLS, "acls" }, { MNT_NFS4ACLS, "nfsv4acls" }, { MNT_GJOURNAL, "gjournal" }, { MNT_AUTOMOUNTED, "automounted" }, + { MNT_VERIFIED, "verified" }, { 0, NULL } }; /* * List of VFS types that can be remounted without becoming mounted on top * of each other. * XXX Is this list correct? */ static const char * remountable_fs_names[] = { "ufs", "ffs", "ext2fs", 0 }; static const char userquotaeq[] = "userquota="; static const char groupquotaeq[] = "groupquota="; static char *mountprog = NULL; static int use_mountprog(const char *vfstype) { /* XXX: We need to get away from implementing external mount * programs for every filesystem, and move towards having * each filesystem properly implement the nmount() system call. */ unsigned int i; const char *fs[] = { "cd9660", "mfs", "msdosfs", "nfs", "nullfs", "smbfs", "udf", "unionfs", NULL }; if (mountprog != NULL) return (1); for (i = 0; fs[i] != NULL; ++i) { if (strcmp(vfstype, fs[i]) == 0) return (1); } return (0); } static int exec_mountprog(const char *name, const char *execname, char *const argv[]) { pid_t pid; int status; switch (pid = fork()) { case -1: /* Error. */ warn("fork"); exit (1); case 0: /* Child. */ /* Go find an executable. */ execvP(execname, _PATH_SYSPATH, argv); if (errno == ENOENT) { warn("exec %s not found", execname); if (execname[0] != '/') { warnx("in path: %s", _PATH_SYSPATH); } } exit(1); default: /* Parent. */ if (waitpid(pid, &status, 0) < 0) { warn("waitpid"); return (1); } if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) return (WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { warnx("%s: %s", name, sys_siglist[WTERMSIG(status)]); return (1); } break; } return (0); } static int specified_ro(const char *arg) { char *optbuf, *opt; int ret = 0; optbuf = strdup(arg); if (optbuf == NULL) err(1, NULL); for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) { if (strcmp(opt, "ro") == 0) { ret = 1; break; } } free(optbuf); return (ret); } static void restart_mountd(void) { struct pidfh *pfh; pid_t mountdpid; pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &mountdpid); if (pfh != NULL) { /* Mountd is not running. */ pidfile_remove(pfh); return; } if (errno != EEXIST) { /* Cannot open pidfile for some reason. */ return; } /* We have mountd(8) PID in mountdpid varible, let's signal it. */ if (kill(mountdpid, SIGHUP) == -1) err(1, "signal mountd"); } int main(int argc, char *argv[]) { const char *mntfromname, **vfslist, *vfstype; struct fstab *fs; struct statfs *mntbuf; int all, ch, i, init_flags, late, failok, mntsize, rval, have_fstab, ro; int onlylate; char *cp, *ep, *options; all = init_flags = late = onlylate = 0; ro = 0; options = NULL; vfslist = NULL; vfstype = "ufs"; while ((ch = getopt(argc, argv, "adF:fLlno:prt:uvw")) != -1) switch (ch) { case 'a': all = 1; break; case 'd': debug = 1; break; case 'F': setfstab(optarg); break; case 'f': init_flags |= MNT_FORCE; break; case 'L': onlylate = 1; late = 1; break; case 'l': late = 1; break; case 'n': /* For compatibility with the Linux version of mount. */ break; case 'o': if (*optarg) { options = catopt(options, optarg); if (specified_ro(optarg)) ro = 1; } break; case 'p': fstab_style = 1; verbose = 1; break; case 'r': options = catopt(options, "ro"); ro = 1; break; case 't': if (vfslist != NULL) errx(1, "only one -t option may be specified"); vfslist = makevfslist(optarg); vfstype = optarg; break; case 'u': init_flags |= MNT_UPDATE; break; case 'v': verbose = 1; break; case 'w': options = catopt(options, "noro"); break; case '?': default: usage(); /* NOTREACHED */ } argc -= optind; argv += optind; #define BADTYPE(type) \ (strcmp(type, FSTAB_RO) && \ strcmp(type, FSTAB_RW) && strcmp(type, FSTAB_RQ)) if ((init_flags & MNT_UPDATE) && (ro == 0)) options = catopt(options, "noro"); rval = 0; switch (argc) { case 0: if ((mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)) == 0) err(1, "getmntinfo"); if (all) { while ((fs = getfsent()) != NULL) { if (BADTYPE(fs->fs_type)) continue; if (checkvfsname(fs->fs_vfstype, vfslist)) continue; if (hasopt(fs->fs_mntops, "noauto")) continue; if (!hasopt(fs->fs_mntops, "late") && onlylate) continue; if (hasopt(fs->fs_mntops, "late") && !late) continue; if (hasopt(fs->fs_mntops, "failok")) failok = 1; else failok = 0; if (!(init_flags & MNT_UPDATE) && ismounted(fs, mntbuf, mntsize)) continue; options = update_options(options, fs->fs_mntops, mntbuf->f_flags); if (mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops) && !failok) rval = 1; } } else if (fstab_style) { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; putfsent(&mntbuf[i]); } } else { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; if (!verbose && (mntbuf[i].f_flags & MNT_IGNORE) != 0) continue; prmount(&mntbuf[i]); } } exit(rval); case 1: if (vfslist != NULL) usage(); rmslashes(*argv, *argv); if (init_flags & MNT_UPDATE) { mntfromname = NULL; have_fstab = 0; if ((mntbuf = getmntpt(*argv)) == NULL) errx(1, "not currently mounted %s", *argv); /* * Only get the mntflags from fstab if both mntpoint * and mntspec are identical. Also handle the special * case where just '/' is mounted and 'spec' is not * identical with the one from fstab ('/dev' is missing * in the spec-string at boot-time). */ if ((fs = getfsfile(mntbuf->f_mntonname)) != NULL) { if (strcmp(fs->fs_spec, mntbuf->f_mntfromname) == 0 && strcmp(fs->fs_file, mntbuf->f_mntonname) == 0) { have_fstab = 1; mntfromname = mntbuf->f_mntfromname; } else if (argv[0][0] == '/' && argv[0][1] == '\0') { fs = getfsfile("/"); have_fstab = 1; mntfromname = fs->fs_spec; } } if (have_fstab) { options = update_options(options, fs->fs_mntops, mntbuf->f_flags); } else { mntfromname = mntbuf->f_mntfromname; options = update_options(options, NULL, mntbuf->f_flags); } rval = mountfs(mntbuf->f_fstypename, mntfromname, mntbuf->f_mntonname, init_flags, options, 0); break; } if ((fs = getfsfile(*argv)) == NULL && (fs = getfsspec(*argv)) == NULL) errx(1, "%s: unknown special file or file system", *argv); if (BADTYPE(fs->fs_type)) errx(1, "%s has unknown file system type", *argv); rval = mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops); break; case 2: /* * If -t flag has not been specified, the path cannot be * found, spec contains either a ':' or a '@', then assume * that an NFS file system is being specified ala Sun. * Check if the hostname contains only allowed characters * to reduce false positives. IPv6 addresses containing * ':' will be correctly parsed only if the separator is '@'. * The definition of a valid hostname is taken from RFC 1034. */ if (vfslist == NULL && ((ep = strchr(argv[0], '@')) != NULL || (ep = strchr(argv[0], ':')) != NULL)) { if (*ep == '@') { cp = ep + 1; ep = cp + strlen(cp); } else cp = argv[0]; while (cp != ep) { if (!isdigit(*cp) && !isalpha(*cp) && *cp != '.' && *cp != '-' && *cp != ':') break; cp++; } if (cp == ep) vfstype = "nfs"; } rval = mountfs(vfstype, argv[0], argv[1], init_flags, options, NULL); break; default: usage(); /* NOTREACHED */ } /* * If the mount was successfully, and done by root, tell mountd the * good news. */ if (rval == 0 && getuid() == 0) restart_mountd(); exit(rval); } int ismounted(struct fstab *fs, struct statfs *mntbuf, int mntsize) { char realfsfile[PATH_MAX]; int i; if (fs->fs_file[0] == '/' && fs->fs_file[1] == '\0') /* the root file system can always be remounted */ return (0); /* The user may have specified a symlink in fstab, resolve the path */ if (realpath(fs->fs_file, realfsfile) == NULL) { /* Cannot resolve the path, use original one */ strlcpy(realfsfile, fs->fs_file, sizeof(realfsfile)); } /* * Consider the filesystem to be mounted if: * It has the same mountpoint as a mounted filesytem, and * It has the same type as that same mounted filesystem, and * It has the same device name as that same mounted filesystem, OR * It is a nonremountable filesystem */ for (i = mntsize - 1; i >= 0; --i) if (strcmp(realfsfile, mntbuf[i].f_mntonname) == 0 && strcmp(fs->fs_vfstype, mntbuf[i].f_fstypename) == 0 && (!isremountable(fs->fs_vfstype) || (strcmp(fs->fs_spec, mntbuf[i].f_mntfromname) == 0))) return (1); return (0); } int isremountable(const char *vfsname) { const char **cp; for (cp = remountable_fs_names; *cp; cp++) if (strcmp(*cp, vfsname) == 0) return (1); return (0); } int hasopt(const char *mntopts, const char *option) { int negative, found; char *opt, *optbuf; if (option[0] == 'n' && option[1] == 'o') { negative = 1; option += 2; } else negative = 0; optbuf = strdup(mntopts); found = 0; for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) { if (opt[0] == 'n' && opt[1] == 'o') { if (!strcasecmp(opt + 2, option)) found = negative; } else if (!strcasecmp(opt, option)) found = !negative; } free(optbuf); return (found); } static void append_arg(struct cpa *sa, char *arg) { if (sa->c + 1 == sa->sz) { sa->sz = sa->sz == 0 ? 8 : sa->sz * 2; sa->a = realloc(sa->a, sizeof(*sa->a) * sa->sz); if (sa->a == NULL) errx(1, "realloc failed"); } sa->a[++sa->c] = arg; } int mountfs(const char *vfstype, const char *spec, const char *name, int flags, const char *options, const char *mntopts) { struct statfs sf; int i, ret; char *optbuf, execname[PATH_MAX], mntpath[PATH_MAX]; static struct cpa mnt_argv; /* resolve the mountpoint with realpath(3) */ if (checkpath(name, mntpath) != 0) { warn("%s", mntpath); return (1); } name = mntpath; if (mntopts == NULL) mntopts = ""; optbuf = catopt(strdup(mntopts), options); if (strcmp(name, "/") == 0) flags |= MNT_UPDATE; if (flags & MNT_FORCE) optbuf = catopt(optbuf, "force"); if (flags & MNT_RDONLY) optbuf = catopt(optbuf, "ro"); /* * XXX * The mount_mfs (newfs) command uses -o to select the * optimization mode. We don't pass the default "-o rw" * for that reason. */ if (flags & MNT_UPDATE) optbuf = catopt(optbuf, "update"); /* Compatibility glue. */ if (strcmp(vfstype, "msdos") == 0) { warnx( "Using \"-t msdosfs\", since \"-t msdos\" is deprecated."); vfstype = "msdosfs"; } /* Construct the name of the appropriate mount command */ (void)snprintf(execname, sizeof(execname), "mount_%s", vfstype); mnt_argv.c = -1; append_arg(&mnt_argv, execname); mangle(optbuf, &mnt_argv); if (mountprog != NULL) strlcpy(execname, mountprog, sizeof(execname)); append_arg(&mnt_argv, strdup(spec)); append_arg(&mnt_argv, strdup(name)); append_arg(&mnt_argv, NULL); if (debug) { if (use_mountprog(vfstype)) printf("exec: %s", execname); else printf("mount -t %s", vfstype); for (i = 1; i < mnt_argv.c; i++) (void)printf(" %s", mnt_argv.a[i]); (void)printf("\n"); free(optbuf); free(mountprog); mountprog = NULL; return (0); } if (use_mountprog(vfstype)) { ret = exec_mountprog(name, execname, mnt_argv.a); } else { ret = mount_fs(vfstype, mnt_argv.c, mnt_argv.a); } free(optbuf); free(mountprog); mountprog = NULL; if (verbose) { if (statfs(name, &sf) < 0) { warn("statfs %s", name); return (1); } if (fstab_style) putfsent(&sf); else prmount(&sf); } return (ret); } void prmount(struct statfs *sfp) { uint64_t flags; unsigned int i; struct opt *o; struct passwd *pw; (void)printf("%s on %s (%s", sfp->f_mntfromname, sfp->f_mntonname, sfp->f_fstypename); flags = sfp->f_flags & MNT_VISFLAGMASK; for (o = optnames; flags != 0 && o->o_opt != 0; o++) if (flags & o->o_opt) { (void)printf(", %s", o->o_name); flags &= ~o->o_opt; } /* * Inform when file system is mounted by an unprivileged user * or privileged non-root user. */ if ((flags & MNT_USER) != 0 || sfp->f_owner != 0) { (void)printf(", mounted by "); if ((pw = getpwuid(sfp->f_owner)) != NULL) (void)printf("%s", pw->pw_name); else (void)printf("%d", sfp->f_owner); } if (verbose) { if (sfp->f_syncwrites != 0 || sfp->f_asyncwrites != 0) (void)printf(", writes: sync %ju async %ju", (uintmax_t)sfp->f_syncwrites, (uintmax_t)sfp->f_asyncwrites); if (sfp->f_syncreads != 0 || sfp->f_asyncreads != 0) (void)printf(", reads: sync %ju async %ju", (uintmax_t)sfp->f_syncreads, (uintmax_t)sfp->f_asyncreads); if (sfp->f_fsid.val[0] != 0 || sfp->f_fsid.val[1] != 0) { printf(", fsid "); for (i = 0; i < sizeof(sfp->f_fsid); i++) printf("%02x", ((u_char *)&sfp->f_fsid)[i]); } } (void)printf(")\n"); } struct statfs * getmntpt(const char *name) { struct statfs *mntbuf; int i, mntsize; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = mntsize - 1; i >= 0; i--) { if (strcmp(mntbuf[i].f_mntfromname, name) == 0 || strcmp(mntbuf[i].f_mntonname, name) == 0) return (&mntbuf[i]); } return (NULL); } char * catopt(char *s0, const char *s1) { char *cp; if (s1 == NULL || *s1 == '\0') return (s0); if (s0 && *s0) { if (asprintf(&cp, "%s,%s", s0, s1) == -1) errx(1, "asprintf failed"); } else cp = strdup(s1); if (s0) free(s0); return (cp); } void mangle(char *options, struct cpa *a) { char *p, *s, *val; for (s = options; (p = strsep(&s, ",")) != NULL;) if (*p != '\0') { if (strcmp(p, "noauto") == 0) { /* * Do not pass noauto option to nmount(). * or external mount program. noauto is * only used to prevent mounting a filesystem * when 'mount -a' is specified, and is * not a real mount option. */ continue; } else if (strcmp(p, "late") == 0) { /* * "late" is used to prevent certain file * systems from being mounted before late * in the boot cycle; for instance, * loopback NFS mounts can't be mounted * before mountd starts. */ continue; } else if (strcmp(p, "failok") == 0) { /* * "failok" is used to prevent certain file * systems from being causing the system to * drop into single user mode in the boot * cycle, and is not a real mount option. */ continue; } else if (strncmp(p, "mountprog", 9) == 0) { /* * "mountprog" is used to force the use of * userland mount programs. */ val = strchr(p, '='); if (val != NULL) { ++val; if (*val != '\0') mountprog = strdup(val); } if (mountprog == NULL) { errx(1, "Need value for -o mountprog"); } continue; } else if (strcmp(p, "userquota") == 0) { continue; } else if (strncmp(p, userquotaeq, sizeof(userquotaeq) - 1) == 0) { continue; } else if (strcmp(p, "groupquota") == 0) { continue; } else if (strncmp(p, groupquotaeq, sizeof(groupquotaeq) - 1) == 0) { continue; } else if (*p == '-') { append_arg(a, p); p = strchr(p, '='); if (p != NULL) { *p = '\0'; append_arg(a, p + 1); } } else { append_arg(a, strdup("-o")); append_arg(a, p); } } } char * update_options(char *opts, char *fstab, int curflags) { char *o, *p; char *cur; char *expopt, *newopt, *tmpopt; if (opts == NULL) return (strdup("")); /* remove meta options from list */ remopt(fstab, MOUNT_META_OPTION_FSTAB); remopt(fstab, MOUNT_META_OPTION_CURRENT); cur = flags2opts(curflags); /* * Expand all meta-options passed to us first. */ expopt = NULL; for (p = opts; (o = strsep(&p, ",")) != NULL;) { if (strcmp(MOUNT_META_OPTION_FSTAB, o) == 0) expopt = catopt(expopt, fstab); else if (strcmp(MOUNT_META_OPTION_CURRENT, o) == 0) expopt = catopt(expopt, cur); else expopt = catopt(expopt, o); } free(cur); free(opts); /* * Remove previous contradictory arguments. Given option "foo" we * remove all the "nofoo" options. Given "nofoo" we remove "nonofoo" * and "foo" - so we can deal with possible options like "notice". */ newopt = NULL; for (p = expopt; (o = strsep(&p, ",")) != NULL;) { if ((tmpopt = malloc( strlen(o) + 2 + 1 )) == NULL) errx(1, "malloc failed"); strcpy(tmpopt, "no"); strcat(tmpopt, o); remopt(newopt, tmpopt); free(tmpopt); if (strncmp("no", o, 2) == 0) remopt(newopt, o+2); newopt = catopt(newopt, o); } free(expopt); return (newopt); } void remopt(char *string, const char *opt) { char *o, *p, *r; if (string == NULL || *string == '\0' || opt == NULL || *opt == '\0') return; r = string; for (p = string; (o = strsep(&p, ",")) != NULL;) { if (strcmp(opt, o) != 0) { if (*r == ',' && *o != '\0') r++; while ((*r++ = *o++) != '\0') ; *--r = ','; } } *r = '\0'; } void usage(void) { (void)fprintf(stderr, "%s\n%s\n%s\n", "usage: mount [-adflpruvw] [-F fstab] [-o options] [-t ufs | external_type]", " mount [-dfpruvw] special | node", " mount [-dfpruvw] [-o options] [-t ufs | external_type] special node"); exit(1); } void putfsent(struct statfs *ent) { struct fstab *fst; char *opts, *rw; int l; opts = NULL; /* flags2opts() doesn't return the "rw" option. */ if ((ent->f_flags & MNT_RDONLY) != 0) rw = NULL; else rw = catopt(NULL, "rw"); opts = flags2opts(ent->f_flags); opts = catopt(rw, opts); if (strncmp(ent->f_mntfromname, "", 7) == 0 || strncmp(ent->f_mntfromname, "", 7) == 0) { strlcpy(ent->f_mntfromname, (strnstr(ent->f_mntfromname, ":", 8) +1), sizeof(ent->f_mntfromname)); } l = strlen(ent->f_mntfromname); printf("%s%s%s%s", ent->f_mntfromname, l < 8 ? "\t" : "", l < 16 ? "\t" : "", l < 24 ? "\t" : " "); l = strlen(ent->f_mntonname); printf("%s%s%s%s", ent->f_mntonname, l < 8 ? "\t" : "", l < 16 ? "\t" : "", l < 24 ? "\t" : " "); printf("%s\t", ent->f_fstypename); l = strlen(opts); printf("%s%s", opts, l < 8 ? "\t" : " "); free(opts); if ((fst = getfsspec(ent->f_mntfromname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if ((fst = getfsfile(ent->f_mntonname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if (strcmp(ent->f_fstypename, "ufs") == 0) { if (strcmp(ent->f_mntonname, "/") == 0) printf("\t1 1\n"); else printf("\t2 2\n"); } else printf("\t0 0\n"); } char * flags2opts(int flags) { char *res; res = NULL; if (flags & MNT_RDONLY) res = catopt(res, "ro"); if (flags & MNT_SYNCHRONOUS) res = catopt(res, "sync"); if (flags & MNT_NOEXEC) res = catopt(res, "noexec"); if (flags & MNT_NOSUID) res = catopt(res, "nosuid"); if (flags & MNT_UNION) res = catopt(res, "union"); if (flags & MNT_ASYNC) res = catopt(res, "async"); if (flags & MNT_NOATIME) res = catopt(res, "noatime"); if (flags & MNT_NOCLUSTERR) res = catopt(res, "noclusterr"); if (flags & MNT_NOCLUSTERW) res = catopt(res, "noclusterw"); if (flags & MNT_NOSYMFOLLOW) res = catopt(res, "nosymfollow"); if (flags & MNT_SUIDDIR) res = catopt(res, "suiddir"); if (flags & MNT_MULTILABEL) res = catopt(res, "multilabel"); if (flags & MNT_ACLS) res = catopt(res, "acls"); if (flags & MNT_NFS4ACLS) res = catopt(res, "nfsv4acls"); return (res); } diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index ae33c1e208e8..dd0b31c63bdc 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1,1880 +1,1890 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ * */ /*- * The following functions are based in the vn(4) driver: mdstart_swap(), * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), * and as such under the following copyright: * * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: vn.c 1.13 94/04/02 * * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 */ #include "opt_rootdevname.h" #include "opt_geom.h" #include "opt_md.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #define MD_MODVER 1 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ #define MD_EXITING 0x20000 /* Worker thread is exiting. */ #ifndef MD_NSECT #define MD_NSECT (10000 * 2) #endif static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); static int md_debug; SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "Enable md(4) debug messages"); static int md_malloc_wait; SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, "Allow malloc to wait for memory allocations"); #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) #define MD_ROOT_FSTYPE "ufs" #endif #if defined(MD_ROOT) /* * Preloaded image gets put here. */ #if defined(MD_ROOT_SIZE) /* * We put the mfs_root symbol into the oldmfs section of the kernel object file. * Applications that patch the object with the image can determine * the size looking at the oldmfs section size within the kernel. */ u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); const int mfs_root_size = sizeof(mfs_root); #else extern volatile u_char __weak_symbol mfs_root; extern volatile u_char __weak_symbol mfs_root_end; __GLOBL(mfs_root); __GLOBL(mfs_root_end); #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) #endif #endif static g_init_t g_md_init; static g_fini_t g_md_fini; static g_start_t g_md_start; static g_access_t g_md_access; static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); static struct cdev *status_dev = NULL; static struct sx md_sx; static struct unrhdr *md_uh; static d_ioctl_t mdctlioctl; static struct cdevsw mdctl_cdevsw = { .d_version = D_VERSION, .d_ioctl = mdctlioctl, .d_name = MD_NAME, }; struct g_class g_md_class = { .name = "MD", .version = G_VERSION, .init = g_md_init, .fini = g_md_fini, .start = g_md_start, .access = g_md_access, .dumpconf = g_md_dumpconf, }; DECLARE_GEOM_CLASS(g_md_class, g_md); static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) #define NMASK (NINDIR-1) static int nshift; static int md_vnode_pbuf_freecnt; struct indir { uintptr_t *array; u_int total; u_int used; u_int shift; }; struct md_s { int unit; LIST_ENTRY(md_s) list; struct bio_queue_head bio_queue; struct mtx queue_mtx; struct mtx stat_mtx; struct cdev *dev; enum md_types type; off_t mediasize; unsigned sectorsize; unsigned opencount; unsigned fwheads; unsigned fwsectors; unsigned flags; char name[20]; struct proc *procp; struct g_geom *gp; struct g_provider *pp; int (*start)(struct md_s *sc, struct bio *bp); struct devstat *devstat; /* MD_MALLOC related fields */ struct indir *indir; uma_zone_t uma; /* MD_PRELOAD related fields */ u_char *pl_ptr; size_t pl_len; /* MD_VNODE related fields */ struct vnode *vnode; char file[PATH_MAX]; struct ucred *cred; /* MD_SWAP related fields */ vm_object_t object; }; static struct indir * new_indir(u_int shift) { struct indir *ip; ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (ip == NULL) return (NULL); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (ip->array == NULL) { free(ip, M_MD); return (NULL); } ip->total = NINDIR; ip->shift = shift; return (ip); } static void del_indir(struct indir *ip) { free(ip->array, M_MDSECT); free(ip, M_MD); } static void destroy_indir(struct md_s *sc, struct indir *ip) { int i; for (i = 0; i < NINDIR; i++) { if (!ip->array[i]) continue; if (ip->shift) destroy_indir(sc, (struct indir*)(ip->array[i])); else if (ip->array[i] > 255) uma_zfree(sc->uma, (void *)(ip->array[i])); } del_indir(ip); } /* * This function does the math and allocates the top level "indir" structure * for a device of "size" sectors. */ static struct indir * dimension(off_t size) { off_t rcnt; struct indir *ip; int layer; rcnt = size; layer = 0; while (rcnt > NINDIR) { rcnt /= NINDIR; layer++; } /* * XXX: the top layer is probably not fully populated, so we allocate * too much space for ip->array in here. */ ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, M_WAITOK | M_ZERO); ip->total = NINDIR; ip->shift = layer * nshift; return (ip); } /* * Read a given sector */ static uintptr_t s_read(struct indir *ip, off_t offset) { struct indir *cip; int idx; uintptr_t up; if (md_debug > 1) printf("s_read(%jd)\n", (intmax_t)offset); up = 0; for (cip = ip; cip != NULL;) { if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; cip = (struct indir *)up; continue; } idx = offset & NMASK; return (cip->array[idx]); } return (0); } /* * Write a given sector, prune the tree if the value is 0 */ static int s_write(struct indir *ip, off_t offset, uintptr_t ptr) { struct indir *cip, *lip[10]; int idx, li; uintptr_t up; if (md_debug > 1) printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); up = 0; li = 0; cip = ip; for (;;) { lip[li++] = cip; if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; if (up != 0) { cip = (struct indir *)up; continue; } /* Allocate branch */ cip->array[idx] = (uintptr_t)new_indir(cip->shift - nshift); if (cip->array[idx] == 0) return (ENOSPC); cip->used++; up = cip->array[idx]; cip = (struct indir *)up; continue; } /* leafnode */ idx = offset & NMASK; up = cip->array[idx]; if (up != 0) cip->used--; cip->array[idx] = ptr; if (ptr != 0) cip->used++; break; } if (cip->used != 0 || li == 1) return (0); li--; while (cip->used == 0 && cip != ip) { li--; idx = (offset >> lip[li]->shift) & NMASK; up = lip[li]->array[idx]; KASSERT(up == (uintptr_t)cip, ("md screwed up")); del_indir(cip); lip[li]->array[idx] = 0; lip[li]->used--; cip = lip[li]; } return (0); } static int g_md_access(struct g_provider *pp, int r, int w, int e) { struct md_s *sc; sc = pp->geom->softc; if (sc == NULL) { if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; if ((sc->flags & MD_READONLY) != 0 && w > 0) return (EROFS); if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { sc->opencount = 1; } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { sc->opencount = 0; } return (0); } static void g_md_start(struct bio *bp) { struct md_s *sc; sc = bp->bio_to->geom->softc; if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { mtx_lock(&sc->stat_mtx); devstat_start_transaction_bio(sc->devstat, bp); mtx_unlock(&sc->stat_mtx); } mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); wakeup(sc); } #define MD_MALLOC_MOVE_ZERO 1 #define MD_MALLOC_MOVE_FILL 2 #define MD_MALLOC_MOVE_READ 3 #define MD_MALLOC_MOVE_WRITE 4 #define MD_MALLOC_MOVE_CMP 5 static int md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, void *ptr, u_char fill, int op) { struct sf_buf *sf; vm_page_t m, *mp1; char *p, first; off_t *uc; unsigned n; int error, i, ma_offs1, sz, first_read; m = NULL; error = 0; sf = NULL; /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ first = 0; first_read = 0; uc = ptr; mp1 = *mp; ma_offs1 = *ma_offs; /* } */ sched_pin(); for (n = sectorsize; n != 0; n -= sz) { sz = imin(PAGE_SIZE - *ma_offs, n); if (m != **mp) { if (sf != NULL) sf_buf_free(sf); m = **mp; sf = sf_buf_alloc(m, SFB_CPUPRIVATE | (md_malloc_wait ? 0 : SFB_NOWAIT)); if (sf == NULL) { error = ENOMEM; break; } } p = (char *)sf_buf_kva(sf) + *ma_offs; switch (op) { case MD_MALLOC_MOVE_ZERO: bzero(p, sz); break; case MD_MALLOC_MOVE_FILL: memset(p, fill, sz); break; case MD_MALLOC_MOVE_READ: bcopy(ptr, p, sz); cpu_flush_dcache(p, sz); break; case MD_MALLOC_MOVE_WRITE: bcopy(p, ptr, sz); break; case MD_MALLOC_MOVE_CMP: for (i = 0; i < sz; i++, p++) { if (!first_read) { *uc = (u_char)*p; first = *p; first_read = 1; } else if (*p != first) { error = EDOOFUS; break; } } break; default: KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); break; } if (error != 0) break; *ma_offs += sz; *ma_offs %= PAGE_SIZE; if (*ma_offs == 0) (*mp)++; ptr = (char *)ptr + sz; } if (sf != NULL) sf_buf_free(sf); sched_unpin(); if (op == MD_MALLOC_MOVE_CMP && error != 0) { *mp = mp1; *ma_offs = ma_offs1; } return (error); } static int md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, unsigned len, void *ptr, u_char fill, int op) { bus_dma_segment_t *vlist; uint8_t *p, *end, first; off_t *uc; int ma_offs, seg_len; vlist = *pvlist; ma_offs = *pma_offs; uc = ptr; for (; len != 0; len -= seg_len) { seg_len = imin(vlist->ds_len - ma_offs, len); p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; switch (op) { case MD_MALLOC_MOVE_ZERO: bzero(p, seg_len); break; case MD_MALLOC_MOVE_FILL: memset(p, fill, seg_len); break; case MD_MALLOC_MOVE_READ: bcopy(ptr, p, seg_len); cpu_flush_dcache(p, seg_len); break; case MD_MALLOC_MOVE_WRITE: bcopy(p, ptr, seg_len); break; case MD_MALLOC_MOVE_CMP: end = p + seg_len; first = *uc = *p; /* Confirm all following bytes match the first */ while (++p < end) { if (*p != first) return (EDOOFUS); } break; default: KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); break; } ma_offs += seg_len; if (ma_offs == vlist->ds_len) { ma_offs = 0; vlist++; } ptr = (uint8_t *)ptr + seg_len; } *pvlist = vlist; *pma_offs = ma_offs; return (0); } static int mdstart_malloc(struct md_s *sc, struct bio *bp) { u_char *dst; vm_page_t *m; bus_dma_segment_t *vlist; int i, error, error1, ma_offs, notmapped; off_t secno, nsec, uc; uintptr_t sp, osp; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; vlist = (bp->bio_flags & BIO_VLIST) != 0 ? (bus_dma_segment_t *)bp->bio_data : NULL; if (notmapped) { m = bp->bio_ma; ma_offs = bp->bio_ma_offset; dst = NULL; KASSERT(vlist == NULL, ("vlists cannot be unmapped")); } else if (vlist != NULL) { ma_offs = bp->bio_ma_offset; dst = NULL; } else { dst = bp->bio_data; } nsec = bp->bio_length / sc->sectorsize; secno = bp->bio_offset / sc->sectorsize; error = 0; while (nsec--) { osp = s_read(sc->indir, secno); if (bp->bio_cmd == BIO_DELETE) { if (osp != 0) error = s_write(sc->indir, secno, 0); } else if (bp->bio_cmd == BIO_READ) { if (osp == 0) { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, NULL, 0, MD_MALLOC_MOVE_ZERO); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, NULL, 0, MD_MALLOC_MOVE_ZERO); } else bzero(dst, sc->sectorsize); } else if (osp <= 255) { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, NULL, osp, MD_MALLOC_MOVE_FILL); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, NULL, osp, MD_MALLOC_MOVE_FILL); } else memset(dst, osp, sc->sectorsize); } else { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_READ); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_READ); } else { bcopy((void *)osp, dst, sc->sectorsize); cpu_flush_dcache(dst, sc->sectorsize); } } osp = 0; } else if (bp->bio_cmd == BIO_WRITE) { if (sc->flags & MD_COMPRESS) { if (notmapped) { error1 = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, &uc, 0, MD_MALLOC_MOVE_CMP); i = error1 == 0 ? sc->sectorsize : 0; } else if (vlist != NULL) { error1 = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, &uc, 0, MD_MALLOC_MOVE_CMP); i = error1 == 0 ? sc->sectorsize : 0; } else { uc = dst[0]; for (i = 1; i < sc->sectorsize; i++) { if (dst[i] != uc) break; } } } else { i = 0; uc = 0; } if (i == sc->sectorsize) { if (osp != uc) error = s_write(sc->indir, secno, uc); } else { if (osp <= 255) { sp = (uintptr_t)uma_zalloc(sc->uma, md_malloc_wait ? M_WAITOK : M_NOWAIT); if (sp == 0) { error = ENOSPC; break; } if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)sp, 0, MD_MALLOC_MOVE_WRITE); } else if (vlist != NULL) { error = md_malloc_move_vlist( &vlist, &ma_offs, sc->sectorsize, (void *)sp, 0, MD_MALLOC_MOVE_WRITE); } else { bcopy(dst, (void *)sp, sc->sectorsize); } error = s_write(sc->indir, secno, sp); } else { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_WRITE); } else if (vlist != NULL) { error = md_malloc_move_vlist( &vlist, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_WRITE); } else { bcopy(dst, (void *)osp, sc->sectorsize); } osp = 0; } } } else { error = EOPNOTSUPP; } if (osp > 255) uma_zfree(sc->uma, (void*)osp); if (error != 0) break; secno++; if (!notmapped && vlist == NULL) dst += sc->sectorsize; } bp->bio_resid = 0; return (error); } static void mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) { off_t seg_len; while (offset >= vlist->ds_len) { offset -= vlist->ds_len; vlist++; } while (len != 0) { seg_len = omin(len, vlist->ds_len - offset); bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), seg_len); offset = 0; src = (uint8_t *)src + seg_len; len -= seg_len; vlist++; } } static void mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) { off_t seg_len; while (offset >= vlist->ds_len) { offset -= vlist->ds_len; vlist++; } while (len != 0) { seg_len = omin(len, vlist->ds_len - offset); bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, seg_len); offset = 0; dst = (uint8_t *)dst + seg_len; len -= seg_len; vlist++; } } static int mdstart_preload(struct md_s *sc, struct bio *bp) { uint8_t *p; p = sc->pl_ptr + bp->bio_offset; switch (bp->bio_cmd) { case BIO_READ: if ((bp->bio_flags & BIO_VLIST) != 0) { mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, bp->bio_ma_offset, bp->bio_length); } else { bcopy(p, bp->bio_data, bp->bio_length); } cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: if ((bp->bio_flags & BIO_VLIST) != 0) { mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, bp->bio_ma_offset, p, bp->bio_length); } else { bcopy(bp->bio_data, p, bp->bio_length); } break; } bp->bio_resid = 0; return (0); } static int mdstart_vnode(struct md_s *sc, struct bio *bp) { int error; struct uio auio; struct iovec aiov; struct iovec *piov; struct mount *mp; struct vnode *vp; struct buf *pb; bus_dma_segment_t *vlist; struct thread *td; off_t iolen, len, zerosize; int ma_offs, npages; switch (bp->bio_cmd) { case BIO_READ: auio.uio_rw = UIO_READ; break; case BIO_WRITE: case BIO_DELETE: auio.uio_rw = UIO_WRITE; break; case BIO_FLUSH: break; default: return (EOPNOTSUPP); } td = curthread; vp = sc->vnode; pb = NULL; piov = NULL; ma_offs = bp->bio_ma_offset; len = bp->bio_length; /* * VNODE I/O * * If an error occurs, we set BIO_ERROR but we do not set * B_INVAL because (for a write anyway), the buffer is * still valid. */ if (bp->bio_cmd == BIO_FLUSH) { (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } auio.uio_offset = (vm_ooffset_t)bp->bio_offset; auio.uio_resid = bp->bio_length; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; if (bp->bio_cmd == BIO_DELETE) { /* * Emulate BIO_DELETE by writing zeros. */ zerosize = ZERO_REGION_SIZE - (ZERO_REGION_SIZE % sc->sectorsize); auio.uio_iovcnt = howmany(bp->bio_length, zerosize); piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); auio.uio_iov = piov; while (len > 0) { piov->iov_base = __DECONST(void *, zero_region); piov->iov_len = len; if (len > zerosize) piov->iov_len = zerosize; len -= piov->iov_len; piov++; } piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_VLIST) != 0) { piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); auio.uio_iov = piov; vlist = (bus_dma_segment_t *)bp->bio_data; while (len > 0) { piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + ma_offs); piov->iov_len = vlist->ds_len - ma_offs; if (piov->iov_len > len) piov->iov_len = len; len -= piov->iov_len; ma_offs = 0; vlist++; piov++; } auio.uio_iovcnt = piov - auio.uio_iov; piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pb = getpbuf(&md_vnode_pbuf_freecnt); bp->bio_resid = len; unmapped_step: npages = atop(min(MAXPHYS, round_page(len + (ma_offs & PAGE_MASK)))); iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); KASSERT(iolen > 0, ("zero iolen")); pmap_qenter((vm_offset_t)pb->b_data, &bp->bio_ma[atop(ma_offs)], npages); aiov.iov_base = (void *)((vm_offset_t)pb->b_data + (ma_offs & PAGE_MASK)); aiov.iov_len = iolen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = iolen; } else { aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_length; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; } /* * When reading set IO_DIRECT to try to avoid double-caching * the data. When writing IO_DIRECT is not optimal. */ if (auio.uio_rw == UIO_READ) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred); VOP_UNLOCK(vp, 0); } else { (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); + if (error == 0) + sc->flags &= ~MD_VERIFY; } if (pb != NULL) { pmap_qremove((vm_offset_t)pb->b_data, npages); if (error == 0) { len -= iolen; bp->bio_resid -= iolen; ma_offs += iolen; if (len > 0) goto unmapped_step; } relpbuf(pb, &md_vnode_pbuf_freecnt); } free(piov, M_MD); if (pb == NULL) bp->bio_resid = auio.uio_resid; return (error); } static int mdstart_swap(struct md_s *sc, struct bio *bp) { vm_page_t m; u_char *p; vm_pindex_t i, lastp; bus_dma_segment_t *vlist; int rv, ma_offs, offs, len, lastend; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } p = bp->bio_data; ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? bp->bio_ma_offset : 0; vlist = (bp->bio_flags & BIO_VLIST) != 0 ? (bus_dma_segment_t *)bp->bio_data : NULL; /* * offs is the offset at which to start operating on the * next (ie, first) page. lastp is the last page on * which we're going to operate. lastend is the ending * position within that last page (ie, PAGE_SIZE if * we're operating on complete aligned pages). */ offs = bp->bio_offset % PAGE_SIZE; lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; rv = VM_PAGER_OK; VM_OBJECT_WLOCK(sc->object); vm_object_pip_add(sc->object, 1); for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); if (bp->bio_cmd == BIO_READ) { if (m->valid == VM_PAGE_BITS_ALL) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); if (rv == VM_PAGER_ERROR) { vm_page_xunbusy(m); break; } else if (rv == VM_PAGER_FAIL) { /* * Pager does not have the page. Zero * the allocated page, and mark it as * valid. Do not set dirty, the page * can be recreated if thrown out. */ pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pmap_copy_pages(&m, offs, bp->bio_ma, ma_offs, len); } else if ((bp->bio_flags & BIO_VLIST) != 0) { physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, vlist, ma_offs, len); cpu_flush_dcache(p, len); } else { physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); cpu_flush_dcache(p, len); } } else if (bp->bio_cmd == BIO_WRITE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { vm_page_xunbusy(m); break; } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pmap_copy_pages(bp->bio_ma, ma_offs, &m, offs, len); } else if ((bp->bio_flags & BIO_VLIST) != 0) { physcopyin_vlist(vlist, ma_offs, VM_PAGE_TO_PHYS(m) + offs, len); } else { physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); } m->valid = VM_PAGE_BITS_ALL; } else if (bp->bio_cmd == BIO_DELETE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { vm_page_xunbusy(m); break; } if (len != PAGE_SIZE) { pmap_zero_page_area(m, offs, len); vm_page_clear_dirty(m, offs, len); m->valid = VM_PAGE_BITS_ALL; } else vm_pager_page_unswapped(m); } vm_page_xunbusy(m); vm_page_lock(m); if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE) vm_page_free(m); else vm_page_activate(m); vm_page_unlock(m); if (bp->bio_cmd == BIO_WRITE) { vm_page_dirty(m); vm_pager_page_unswapped(m); } /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; offs = 0; ma_offs += len; } vm_object_pip_wakeup(sc->object); VM_OBJECT_WUNLOCK(sc->object); return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); } static int mdstart_null(struct md_s *sc, struct bio *bp) { switch (bp->bio_cmd) { case BIO_READ: bzero(bp->bio_data, bp->bio_length); cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: break; } bp->bio_resid = 0; return (0); } static void md_kthread(void *arg) { struct md_s *sc; struct bio *bp; int error; sc = arg; thread_lock(curthread); sched_prio(curthread, PRIBIO); thread_unlock(curthread); if (sc->type == MD_VNODE) curthread->td_pflags |= TDP_NORUNNINGBUF; for (;;) { mtx_lock(&sc->queue_mtx); if (sc->flags & MD_SHUTDOWN) { sc->flags |= MD_EXITING; mtx_unlock(&sc->queue_mtx); kproc_exit(0); } bp = bioq_takefirst(&sc->bio_queue); if (!bp) { msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); continue; } mtx_unlock(&sc->queue_mtx); if (bp->bio_cmd == BIO_GETATTR) { + int isv = ((sc->flags & MD_VERIFY) != 0); + if ((sc->fwsectors && sc->fwheads && (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) || g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads))) || g_handleattr_int(bp, "GEOM::candelete", 1)) error = -1; + else if (g_handleattr_int(bp, "MNT::verified", isv)) + error = -1; else error = EOPNOTSUPP; } else { error = sc->start(sc, bp); } if (error != -1) { bp->bio_completed = bp->bio_length; if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) devstat_end_transaction_bio(sc->devstat, bp); g_io_deliver(bp, error); } } } static struct md_s * mdfind(int unit) { struct md_s *sc; LIST_FOREACH(sc, &md_softc_list, list) { if (sc->unit == unit) break; } return (sc); } static struct md_s * mdnew(int unit, int *errp, enum md_types type) { struct md_s *sc; int error; *errp = 0; if (unit == -1) unit = alloc_unr(md_uh); else unit = alloc_unr_specific(md_uh, unit); if (unit == -1) { *errp = EBUSY; return (NULL); } sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF); sc->unit = unit; sprintf(sc->name, "md%d", unit); LIST_INSERT_HEAD(&md_softc_list, sc, list); error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); if (error == 0) return (sc); LIST_REMOVE(sc, list); mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); free_unr(md_uh, sc->unit); free(sc, M_MD); *errp = error; return (NULL); } static void mdinit(struct md_s *sc) { struct g_geom *gp; struct g_provider *pp; g_topology_lock(); gp = g_new_geomf(&g_md_class, "md%d", sc->unit); gp->softc = sc; pp = g_new_providerf(gp, "md%d", sc->unit); pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; switch (sc->type) { case MD_MALLOC: case MD_VNODE: case MD_SWAP: pp->flags |= G_PF_ACCEPT_UNMAPPED; break; case MD_PRELOAD: case MD_NULL: break; } sc->gp = gp; sc->pp = pp; g_error_provider(pp, 0); g_topology_unlock(); sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); } static int mdcreate_malloc(struct md_s *sc, struct md_ioctl *mdio) { uintptr_t sp; int error; off_t u; error = 0; if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) return (EINVAL); if (mdio->md_sectorsize != 0 && !powerof2(mdio->md_sectorsize)) return (EINVAL); /* Compression doesn't make sense if we have reserved space */ if (mdio->md_options & MD_RESERVE) mdio->md_options &= ~MD_COMPRESS; if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE); sc->indir = dimension(sc->mediasize / sc->sectorsize); sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 0x1ff, 0); if (mdio->md_options & MD_RESERVE) { off_t nsectors; nsectors = sc->mediasize / sc->sectorsize; for (u = 0; u < nsectors; u++) { sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (sp != 0) error = s_write(sc->indir, u, sp); else error = ENOMEM; if (error != 0) break; } } return (error); } static int mdsetcred(struct md_s *sc, struct ucred *cred) { char *tmpbuf; int error = 0; /* * Set credits in our softc */ if (sc->cred) crfree(sc->cred); sc->cred = crhold(cred); /* * Horrible kludge to establish credentials for NFS XXX. */ if (sc->vnode) { struct uio auio; struct iovec aiov; tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); bzero(&auio, sizeof(auio)); aiov.iov_base = tmpbuf; aiov.iov_len = sc->sectorsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = aiov.iov_len; vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(sc->vnode, &auio, 0, sc->cred); VOP_UNLOCK(sc->vnode, 0); free(tmpbuf, M_TEMP); } return (error); } static int mdcreate_vnode(struct md_s *sc, struct md_ioctl *mdio, struct thread *td) { struct vattr vattr; struct nameidata nd; char *fname; int error, flags; /* * Kernel-originated requests must have the filename appended * to the mdio structure to protect against malicious software. */ fname = mdio->md_file; if ((void *)fname != (void *)(mdio + 1)) { error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); if (error != 0) return (error); } else strlcpy(sc->file, fname, sizeof(sc->file)); /* * If the user specified that this is a read only device, don't * set the FWRITE mask before trying to open the backing store. */ - flags = FREAD | ((mdio->md_options & MD_READONLY) ? 0 : FWRITE); + flags = FREAD | ((mdio->md_options & MD_READONLY) ? 0 : FWRITE) \ + | ((mdio->md_options & MD_VERIFY) ? 0 : O_VERIFY); NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td); error = vn_open(&nd, &flags, 0, NULL); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp->v_type != VREG) { error = EINVAL; goto bad; } error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); if (error != 0) goto bad; if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); if (nd.ni_vp->v_iflag & VI_DOOMED) { /* Forced unmount. */ error = EBADF; goto bad; } } nd.ni_vp->v_vflag |= VV_MD; VOP_UNLOCK(nd.ni_vp, 0); if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; - sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC); + sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC | MD_VERIFY); if (!(flags & FWRITE)) sc->flags |= MD_READONLY; sc->vnode = nd.ni_vp; error = mdsetcred(sc, td->td_ucred); if (error != 0) { sc->vnode = NULL; vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); nd.ni_vp->v_vflag &= ~VV_MD; goto bad; } return (0); bad: VOP_UNLOCK(nd.ni_vp, 0); (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); return (error); } static int mddestroy(struct md_s *sc, struct thread *td) { if (sc->gp) { sc->gp->softc = NULL; g_topology_lock(); g_wither_geom(sc->gp, ENXIO); g_topology_unlock(); sc->gp = NULL; sc->pp = NULL; } if (sc->devstat) { devstat_remove_entry(sc->devstat); sc->devstat = NULL; } mtx_lock(&sc->queue_mtx); sc->flags |= MD_SHUTDOWN; wakeup(sc); while (!(sc->flags & MD_EXITING)) msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); mtx_unlock(&sc->queue_mtx); mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); if (sc->vnode != NULL) { vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); sc->vnode->v_vflag &= ~VV_MD; VOP_UNLOCK(sc->vnode, 0); (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? FREAD : (FREAD|FWRITE), sc->cred, td); } if (sc->cred != NULL) crfree(sc->cred); if (sc->object != NULL) vm_object_deallocate(sc->object); if (sc->indir) destroy_indir(sc, sc->indir); if (sc->uma) uma_zdestroy(sc->uma); LIST_REMOVE(sc, list); free_unr(md_uh, sc->unit); free(sc, M_MD); return (0); } static int mdresize(struct md_s *sc, struct md_ioctl *mdio) { int error, res; vm_pindex_t oldpages, newpages; switch (sc->type) { case MD_VNODE: case MD_NULL: break; case MD_SWAP: if (mdio->md_mediasize <= 0 || (mdio->md_mediasize % PAGE_SIZE) != 0) return (EDOM); oldpages = OFF_TO_IDX(round_page(sc->mediasize)); newpages = OFF_TO_IDX(round_page(mdio->md_mediasize)); if (newpages < oldpages) { VM_OBJECT_WLOCK(sc->object); vm_object_page_remove(sc->object, newpages, 0, 0); swap_pager_freespace(sc->object, newpages, oldpages - newpages); swap_release_by_cred(IDX_TO_OFF(oldpages - newpages), sc->cred); sc->object->charge = IDX_TO_OFF(newpages); sc->object->size = newpages; VM_OBJECT_WUNLOCK(sc->object); } else if (newpages > oldpages) { res = swap_reserve_by_cred(IDX_TO_OFF(newpages - oldpages), sc->cred); if (!res) return (ENOMEM); if ((mdio->md_options & MD_RESERVE) || (sc->flags & MD_RESERVE)) { error = swap_pager_reserve(sc->object, oldpages, newpages - oldpages); if (error < 0) { swap_release_by_cred( IDX_TO_OFF(newpages - oldpages), sc->cred); return (EDOM); } } VM_OBJECT_WLOCK(sc->object); sc->object->charge = IDX_TO_OFF(newpages); sc->object->size = newpages; VM_OBJECT_WUNLOCK(sc->object); } break; default: return (EOPNOTSUPP); } sc->mediasize = mdio->md_mediasize; g_topology_lock(); g_resize_provider(sc->pp, sc->mediasize); g_topology_unlock(); return (0); } static int mdcreate_swap(struct md_s *sc, struct md_ioctl *mdio, struct thread *td) { vm_ooffset_t npage; int error; /* * Range check. Disallow negative sizes and sizes not being * multiple of page size. */ if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) return (EDOM); /* * Allocate an OBJT_SWAP object. * * Note the truncation. */ + if ((mdio->md_options & MD_VERIFY) != 0) + return (EINVAL); npage = mdio->md_mediasize / PAGE_SIZE; if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, VM_PROT_DEFAULT, 0, td->td_ucred); if (sc->object == NULL) return (ENOMEM); sc->flags = mdio->md_options & (MD_FORCE | MD_RESERVE); if (mdio->md_options & MD_RESERVE) { if (swap_pager_reserve(sc->object, 0, npage) < 0) { error = EDOM; goto finish; } } error = mdsetcred(sc, td->td_ucred); finish: if (error != 0) { vm_object_deallocate(sc->object); sc->object = NULL; } return (error); } static int mdcreate_null(struct md_s *sc, struct md_ioctl *mdio, struct thread *td) { /* * Range check. Disallow negative sizes and sizes not being * multiple of page size. */ if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) return (EDOM); return (0); } static int xmdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { struct md_ioctl *mdio; struct md_s *sc; int error, i; unsigned sectsize; if (md_debug) printf("mdctlioctl(%s %lx %p %x %p)\n", devtoname(dev), cmd, addr, flags, td); mdio = (struct md_ioctl *)addr; if (mdio->md_version != MDIOVERSION) return (EINVAL); /* * We assert the version number in the individual ioctl * handlers instead of out here because (a) it is possible we * may add another ioctl in the future which doesn't read an * mdio, and (b) the correct return value for an unknown ioctl * is ENOIOCTL, not EINVAL. */ error = 0; switch (cmd) { case MDIOCATTACH: switch (mdio->md_type) { case MD_MALLOC: case MD_PRELOAD: case MD_VNODE: case MD_SWAP: case MD_NULL: break; default: return (EINVAL); } if (mdio->md_sectorsize == 0) sectsize = DEV_BSIZE; else sectsize = mdio->md_sectorsize; if (sectsize > MAXPHYS || mdio->md_mediasize < sectsize) return (EINVAL); if (mdio->md_options & MD_AUTOUNIT) sc = mdnew(-1, &error, mdio->md_type); else { if (mdio->md_unit > INT_MAX) return (EINVAL); sc = mdnew(mdio->md_unit, &error, mdio->md_type); } if (sc == NULL) return (error); if (mdio->md_options & MD_AUTOUNIT) mdio->md_unit = sc->unit; sc->mediasize = mdio->md_mediasize; sc->sectorsize = sectsize; error = EDOOFUS; switch (sc->type) { case MD_MALLOC: sc->start = mdstart_malloc; error = mdcreate_malloc(sc, mdio); break; case MD_PRELOAD: /* * We disallow attaching preloaded memory disks via * ioctl. Preloaded memory disks are automatically * attached in g_md_init(). */ error = EOPNOTSUPP; break; case MD_VNODE: sc->start = mdstart_vnode; error = mdcreate_vnode(sc, mdio, td); break; case MD_SWAP: sc->start = mdstart_swap; error = mdcreate_swap(sc, mdio, td); break; case MD_NULL: sc->start = mdstart_null; error = mdcreate_null(sc, mdio, td); break; } if (error != 0) { mddestroy(sc, td); return (error); } /* Prune off any residual fractional sector */ i = sc->mediasize % sc->sectorsize; sc->mediasize -= i; mdinit(sc); return (0); case MDIOCDETACH: if (mdio->md_mediasize != 0 || (mdio->md_options & ~MD_FORCE) != 0) return (EINVAL); sc = mdfind(mdio->md_unit); if (sc == NULL) return (ENOENT); if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && !(mdio->md_options & MD_FORCE)) return (EBUSY); return (mddestroy(sc, td)); case MDIOCRESIZE: if ((mdio->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) return (EINVAL); sc = mdfind(mdio->md_unit); if (sc == NULL) return (ENOENT); if (mdio->md_mediasize < sc->sectorsize) return (EINVAL); if (mdio->md_mediasize < sc->mediasize && !(sc->flags & MD_FORCE) && !(mdio->md_options & MD_FORCE)) return (EBUSY); return (mdresize(sc, mdio)); case MDIOCQUERY: sc = mdfind(mdio->md_unit); if (sc == NULL) return (ENOENT); mdio->md_type = sc->type; mdio->md_options = sc->flags; mdio->md_mediasize = sc->mediasize; mdio->md_sectorsize = sc->sectorsize; if (sc->type == MD_VNODE || (sc->type == MD_PRELOAD && mdio->md_file != NULL)) error = copyout(sc->file, mdio->md_file, strlen(sc->file) + 1); return (error); case MDIOCLIST: i = 1; LIST_FOREACH(sc, &md_softc_list, list) { if (i == MDNPAD - 1) mdio->md_pad[i] = -1; else mdio->md_pad[i++] = sc->unit; } mdio->md_pad[0] = i - 1; return (0); default: return (ENOIOCTL); }; } static int mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error; sx_xlock(&md_sx); error = xmdctlioctl(dev, cmd, addr, flags, td); sx_xunlock(&md_sx); return (error); } static void md_preloaded(u_char *image, size_t length, const char *name) { struct md_s *sc; int error; sc = mdnew(-1, &error, MD_PRELOAD); if (sc == NULL) return; sc->mediasize = length; sc->sectorsize = DEV_BSIZE; sc->pl_ptr = image; sc->pl_len = length; sc->start = mdstart_preload; if (name != NULL) strlcpy(sc->file, name, sizeof(sc->file)); #if defined(MD_ROOT) && !defined(ROOTDEVNAME) if (sc->unit == 0) rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; #endif mdinit(sc); if (name != NULL) { printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", MD_NAME, sc->unit, name, length, image); } else { printf("%s%d: Embedded image %zd bytes at %p\n", MD_NAME, sc->unit, length, image); } } static void g_md_init(struct g_class *mp __unused) { caddr_t mod; u_char *ptr, *name, *type; unsigned len; int i; /* figure out log2(NINDIR) */ for (i = NINDIR, nshift = -1; i; nshift++) i >>= 1; mod = NULL; sx_init(&md_sx, "MD config lock"); g_topology_unlock(); md_uh = new_unrhdr(0, INT_MAX, NULL); #ifdef MD_ROOT if (mfs_root_size != 0) { sx_xlock(&md_sx); md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, NULL); sx_xunlock(&md_sx); } #endif /* XXX: are preload_* static or do they need Giant ? */ while ((mod = preload_search_next_name(mod)) != NULL) { name = (char *)preload_search_info(mod, MODINFO_NAME); if (name == NULL) continue; type = (char *)preload_search_info(mod, MODINFO_TYPE); if (type == NULL) continue; if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) continue; ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); if (ptr != NULL && len != 0) { sx_xlock(&md_sx); md_preloaded(ptr, len, name); sx_xunlock(&md_sx); } } md_vnode_pbuf_freecnt = nswbuf / 10; status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 0600, MDCTL_NAME); g_topology_lock(); } static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp) { struct md_s *mp; char *type; mp = gp->softc; if (mp == NULL) return; switch (mp->type) { case MD_MALLOC: type = "malloc"; break; case MD_PRELOAD: type = "preload"; break; case MD_VNODE: type = "vnode"; break; case MD_SWAP: type = "swap"; break; case MD_NULL: type = "null"; break; default: type = "unknown"; break; } if (pp != NULL) { if (indent == NULL) { sbuf_printf(sb, " u %d", mp->unit); sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); sbuf_printf(sb, " t %s", type); if ((mp->type == MD_VNODE && mp->vnode != NULL) || (mp->type == MD_PRELOAD && mp->file[0] != '\0')) sbuf_printf(sb, " file %s", mp->file); } else { sbuf_printf(sb, "%s%d\n", indent, mp->unit); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->sectorsize); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwheads); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwsectors); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->mediasize); sbuf_printf(sb, "%s%s\n", indent, (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); sbuf_printf(sb, "%s%s\n", indent, (mp->flags & MD_READONLY) == 0 ? "read-write": "read-only"); sbuf_printf(sb, "%s%s\n", indent, type); if ((mp->type == MD_VNODE && mp->vnode != NULL) || (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", mp->file); sbuf_printf(sb, "\n"); } } } } static void g_md_fini(struct g_class *mp __unused) { sx_destroy(&md_sx); if (status_dev != NULL) destroy_dev(status_dev); delete_unrhdr(md_uh); } diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c index ca654565901b..08e2b436c149 100644 --- a/sys/fs/cd9660/cd9660_vfsops.c +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -1,850 +1,855 @@ /*- * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ISOFSMNT, "isofs_mount", "ISOFS mount structure"); MALLOC_DEFINE(M_ISOFSNODE, "isofs_node", "ISOFS vnode private part"); struct iconv_functions *cd9660_iconv = NULL; static vfs_mount_t cd9660_mount; static vfs_cmount_t cd9660_cmount; static vfs_unmount_t cd9660_unmount; static vfs_root_t cd9660_root; static vfs_statfs_t cd9660_statfs; static vfs_vget_t cd9660_vget; static vfs_fhtovp_t cd9660_fhtovp; static struct vfsops cd9660_vfsops = { .vfs_fhtovp = cd9660_fhtovp, .vfs_mount = cd9660_mount, .vfs_cmount = cd9660_cmount, .vfs_root = cd9660_root, .vfs_statfs = cd9660_statfs, .vfs_unmount = cd9660_unmount, .vfs_vget = cd9660_vget, }; VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY); MODULE_VERSION(cd9660, 1); static int cd9660_vfs_hash_cmp(struct vnode *vp, void *pino); static int iso_mountfs(struct vnode *devvp, struct mount *mp); /* * VFS Operations. */ static int cd9660_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct iso_args args; struct export_args exp; int error; error = copyin(data, &args, sizeof args); if (error) return (error); vfs_oexport_conv(&args.export, &exp); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &exp, sizeof(exp)); ma = mount_argsu(ma, "cs_disk", args.cs_disk, 64); ma = mount_argsu(ma, "cs_local", args.cs_local, 64); ma = mount_argf(ma, "ssector", "%u", args.ssector); ma = mount_argb(ma, !(args.flags & ISOFSMNT_NORRIP), "norrip"); ma = mount_argb(ma, args.flags & ISOFSMNT_GENS, "nogens"); ma = mount_argb(ma, args.flags & ISOFSMNT_EXTATT, "noextatt"); ma = mount_argb(ma, !(args.flags & ISOFSMNT_NOJOLIET), "nojoliet"); ma = mount_argb(ma, args.flags & ISOFSMNT_BROKENJOLIET, "nobrokenjoliet"); ma = mount_argb(ma, args.flags & ISOFSMNT_KICONV, "nokiconv"); error = kernel_mount(ma, flags); return (error); } static int cd9660_mount(struct mount *mp) { struct vnode *devvp; struct thread *td; char *fspec; int error; accmode_t accmode; struct nameidata ndp; struct iso_mnt *imp = NULL; td = curthread; /* * Unconditionally mount as read-only. */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); fspec = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) return (error); imp = VFSTOISOFS(mp); if (mp->mnt_flag & MNT_UPDATE) { if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) return (0); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); if ((error = namei(&ndp))) return (error); NDFREE(&ndp, NDF_ONLY_PNBUF); devvp = ndp.ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * Verify that user has necessary permissions on the device, * or has superuser abilities */ accmode = VREAD; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = iso_mountfs(devvp, mp); if (error) vrele(devvp); } else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ vput(devvp); } if (error) return (error); vfs_mountedfrom(mp, fspec); return (0); } /* * Common code for mount and mountroot */ static int iso_mountfs(devvp, mp) struct vnode *devvp; struct mount *mp; { struct iso_mnt *isomp = NULL; struct buf *bp = NULL; struct buf *pribp = NULL, *supbp = NULL; struct cdev *dev; int error = EINVAL; int high_sierra = 0; int iso_bsize; int iso_blknum; int joliet_level; + int isverified = 0; struct iso_volume_descriptor *vdp = NULL; struct iso_primary_descriptor *pri = NULL; struct iso_sierra_primary_descriptor *pri_sierra = NULL; struct iso_supplementary_descriptor *sup = NULL; struct iso_directory_record *rootp; int logical_block_size, ssector; struct g_consumer *cp; struct bufobj *bo; char *cs_local, *cs_disk; dev = devvp->v_rdev; dev_ref(dev); g_topology_lock(); error = g_vfs_open(devvp, &cp, "cd9660", 0); + if (error == 0) + g_getattr("MNT::verified", cp, &isverified); g_topology_unlock(); VOP_UNLOCK(devvp, 0); if (error) goto out; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; bo = &devvp->v_bufobj; /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. */ if ((ISO_DEFAULT_BLOCK_SIZE % cp->provider->sectorsize) != 0) { error = EINVAL; goto out; } iso_bsize = cp->provider->sectorsize; joliet_level = 0; if (1 != vfs_scanopt(mp->mnt_optnew, "ssector", "%d", &ssector)) ssector = 0; for (iso_blknum = 16 + ssector; iso_blknum < 100 + ssector; iso_blknum++) { if ((error = bread(devvp, iso_blknum * btodb(ISO_DEFAULT_BLOCK_SIZE), iso_bsize, NOCRED, &bp)) != 0) goto out; vdp = (struct iso_volume_descriptor *)bp->b_data; if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, sizeof vdp->id_sierra) != 0) { error = EINVAL; goto out; } else high_sierra = 1; } switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){ case ISO_VD_PRIMARY: if (pribp == NULL) { pribp = bp; bp = NULL; pri = (struct iso_primary_descriptor *)vdp; pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; } break; case ISO_VD_SUPPLEMENTARY: if (supbp == NULL) { supbp = bp; bp = NULL; sup = (struct iso_supplementary_descriptor *)vdp; if (!vfs_flagopt(mp->mnt_optnew, "nojoliet", NULL, 0)) { if (bcmp(sup->escape, "%/@", 3) == 0) joliet_level = 1; if (bcmp(sup->escape, "%/C", 3) == 0) joliet_level = 2; if (bcmp(sup->escape, "%/E", 3) == 0) joliet_level = 3; if ((isonum_711 (sup->flags) & 1) && !vfs_flagopt(mp->mnt_optnew, "brokenjoliet", NULL, 0)) joliet_level = 0; } } break; case ISO_VD_END: goto vd_end; default: break; } if (bp != NULL) { brelse(bp); bp = NULL; } } vd_end: if (bp != NULL) { brelse(bp); bp = NULL; } if (pri == NULL) { error = EINVAL; goto out; } logical_block_size = isonum_723 (high_sierra? pri_sierra->logical_block_size: pri->logical_block_size); if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE || (logical_block_size & (logical_block_size - 1)) != 0) { error = EINVAL; goto out; } rootp = (struct iso_directory_record *) (high_sierra? pri_sierra->root_directory_record: pri->root_directory_record); isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK | M_ZERO); isomp->im_cp = cp; isomp->im_bo = bo; isomp->logical_block_size = logical_block_size; isomp->volume_space_size = isonum_733 (high_sierra? pri_sierra->volume_space_size: pri->volume_space_size); isomp->joliet_level = 0; /* * Since an ISO9660 multi-session CD can also access previous * sessions, we have to include them into the space consider- * ations. This doesn't yield a very accurate number since * parts of the old sessions might be inaccessible now, but we * can't do much better. This is also important for the NFS * filehandle validation. */ isomp->volume_space_size += ssector; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->im_bmask = logical_block_size - 1; isomp->im_bshift = ffs(logical_block_size) - 1; pribp->b_flags |= B_AGE; brelse(pribp); pribp = NULL; rootp = NULL; pri = NULL; pri_sierra = NULL; mp->mnt_data = isomp; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; MNT_ILOCK(mp); + if (isverified) + mp->mnt_flag |= MNT_VERIFIED; mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; MNT_IUNLOCK(mp); isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; vfs_flagopt(mp->mnt_optnew, "norrip", &isomp->im_flags, ISOFSMNT_NORRIP); vfs_flagopt(mp->mnt_optnew, "gens", &isomp->im_flags, ISOFSMNT_GENS); vfs_flagopt(mp->mnt_optnew, "extatt", &isomp->im_flags, ISOFSMNT_EXTATT); vfs_flagopt(mp->mnt_optnew, "nojoliet", &isomp->im_flags, ISOFSMNT_NOJOLIET); vfs_flagopt(mp->mnt_optnew, "kiconv", &isomp->im_flags, ISOFSMNT_KICONV); /* Check the Rock Ridge Extension support */ if (!(isomp->im_flags & ISOFSMNT_NORRIP)) { if ((error = bread(isomp->im_devvp, (isomp->root_extent + isonum_711(((struct iso_directory_record *)isomp->root)-> ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT), isomp->logical_block_size, NOCRED, &bp)) != 0) goto out; rootp = (struct iso_directory_record *)bp->b_data; if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { isomp->im_flags |= ISOFSMNT_NORRIP; } else { isomp->im_flags &= ~ISOFSMNT_GENS; } /* * The contents are valid, * but they will get reread as part of another vnode, so... */ bp->b_flags |= B_AGE; brelse(bp); bp = NULL; rootp = NULL; } if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) { cs_local = vfs_getopts(mp->mnt_optnew, "cs_local", &error); if (error) goto out; cs_disk = vfs_getopts(mp->mnt_optnew, "cs_disk", &error); if (error) goto out; cd9660_iconv->open(cs_local, cs_disk, &isomp->im_d2l); cd9660_iconv->open(cs_disk, cs_local, &isomp->im_l2d); } else { isomp->im_d2l = NULL; isomp->im_l2d = NULL; } if (high_sierra) { /* this effectively ignores all the mount flags */ if (bootverbose) log(LOG_INFO, "cd9660: High Sierra Format\n"); isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; } else switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { default: isomp->iso_ftype = ISO_FTYPE_DEFAULT; break; case ISOFSMNT_GENS|ISOFSMNT_NORRIP: isomp->iso_ftype = ISO_FTYPE_9660; break; case 0: if (bootverbose) log(LOG_INFO, "cd9660: RockRidge Extension\n"); isomp->iso_ftype = ISO_FTYPE_RRIP; break; } /* Decide whether to use the Joliet descriptor */ if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) { if (bootverbose) log(LOG_INFO, "cd9660: Joliet Extension (Level %d)\n", joliet_level); rootp = (struct iso_directory_record *) sup->root_directory_record; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->joliet_level = joliet_level; supbp->b_flags |= B_AGE; } if (supbp) { brelse(supbp); supbp = NULL; sup = NULL; } return 0; out: if (bp != NULL) brelse(bp); if (pribp != NULL) brelse(pribp); if (supbp != NULL) brelse(supbp); if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (isomp) { free(isomp, M_ISOFSMNT); mp->mnt_data = NULL; } dev_rel(dev); return error; } /* * unmount system call */ static int cd9660_unmount(mp, mntflags) struct mount *mp; int mntflags; { struct iso_mnt *isomp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; if ((error = vflush(mp, 0, flags, curthread))) return (error); isomp = VFSTOISOFS(mp); if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) { if (isomp->im_d2l) cd9660_iconv->close(isomp->im_d2l); if (isomp->im_l2d) cd9660_iconv->close(isomp->im_l2d); } g_topology_lock(); g_vfs_close(isomp->im_cp); g_topology_unlock(); vrele(isomp->im_devvp); dev_rel(isomp->im_dev); free(isomp, M_ISOFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (error); } /* * Return root of a filesystem */ static int cd9660_root(mp, flags, vpp) struct mount *mp; int flags; struct vnode **vpp; { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = (struct iso_directory_record *)imp->root; cd_ino_t ino = isodirino(dp, imp); /* * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ return (cd9660_vget_internal(mp, ino, flags, vpp, imp->iso_ftype == ISO_FTYPE_RRIP, dp)); } /* * Get filesystem statistics. */ static int cd9660_statfs(mp, sbp) struct mount *mp; struct statfs *sbp; { struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); sbp->f_bsize = isomp->logical_block_size; sbp->f_iosize = sbp->f_bsize; /* XXX */ sbp->f_blocks = isomp->volume_space_size; sbp->f_bfree = 0; /* total free blocks */ sbp->f_bavail = 0; /* blocks free for non superuser */ sbp->f_files = 0; /* total files */ sbp->f_ffree = 0; /* free file nodes */ return 0; } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is in range * - call iget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the generation number matches */ /* ARGSUSED */ static int cd9660_fhtovp(mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { struct ifid ifh; struct iso_node *ip; struct vnode *nvp; int error; memcpy(&ifh, fhp, sizeof(ifh)); #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifh.ifid_ino, ifh.ifid_start); #endif if ((error = VFS_VGET(mp, ifh.ifid_ino, LK_EXCLUSIVE, &nvp)) != 0) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; vnode_create_vobject(*vpp, ip->i_size, curthread); return (0); } /* * Conform to standard VFS interface; can't vget arbitrary inodes beyond 4GB * into media with current inode scheme and 32-bit ino_t. This shouldn't be * needed for anything other than nfsd, and who exports a mounted DVD over NFS? */ static int cd9660_vget(mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { /* * XXXX * It would be nice if we didn't always set the `relocated' flag * and force the extra read, but I don't want to think about fixing * that right now. */ return (cd9660_vget_internal(mp, ino, flags, vpp, #if 0 VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, #else 0, #endif (struct iso_directory_record *)0)); } /* Use special comparator for full 64-bit ino comparison. */ static int cd9660_vfs_hash_cmp(vp, pino) struct vnode *vp; void *pino; { struct iso_node *ip; cd_ino_t ino; ip = VTOI(vp); ino = *(cd_ino_t *)pino; return (ip->i_number != ino); } int cd9660_vget_internal(mp, ino, flags, vpp, relocated, isodir) struct mount *mp; cd_ino_t ino; int flags; struct vnode **vpp; int relocated; struct iso_directory_record *isodir; { struct iso_mnt *imp; struct iso_node *ip; struct buf *bp; struct vnode *vp; struct cdev *dev; int error; struct thread *td; td = curthread; error = vfs_hash_get(mp, ino, flags, td, vpp, cd9660_vfs_hash_cmp, &ino); if (error || *vpp != NULL) return (error); /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ imp = VFSTOISOFS(mp); dev = imp->im_dev; /* Allocate a new vnode/iso_node. */ if ((error = getnewvnode("isofs", mp, &cd9660_vnodeops, &vp)) != 0) { *vpp = NULLVP; return (error); } ip = malloc(sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK | M_ZERO); vp->v_data = ip; ip->i_vnode = vp; ip->i_number = ino; lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); error = insmntque(vp, mp); if (error != 0) { free(ip, M_ISOFSNODE); *vpp = NULLVP; return (error); } error = vfs_hash_insert(vp, ino, flags, td, vpp, cd9660_vfs_hash_cmp, &ino); if (error || *vpp != NULL) return (error); if (isodir == NULL) { int lbn, off; lbn = lblkno(imp, ino); if (lbn >= imp->volume_space_size) { vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); return (ESTALE); } off = blkoff(imp, ino); if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); return (ESTALE); } error = bread(imp->im_devvp, lbn << (imp->im_bshift - DEV_BSHIFT), imp->logical_block_size, NOCRED, &bp); if (error) { vput(vp); brelse(bp); printf("fhtovp: bread error %d\n",error); return (error); } isodir = (struct iso_directory_record *)(bp->b_data + off); if (off + isonum_711(isodir->length) > imp->logical_block_size) { vput(vp); brelse(bp); printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); return (ESTALE); } #if 0 if (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { brelse(bp); printf("fhtovp: file start miss %d vs %d\n", isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), ifhp->ifid_start); return (ESTALE); } #endif } else bp = NULL; ip->i_mnt = imp; if (relocated) { /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; if (bp != NULL) brelse(bp); if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) { vput(vp); return (error); } isodir = (struct iso_directory_record *)bp->b_data; } ip->iso_extent = isonum_733(isodir->extent); ip->i_size = isonum_733(isodir->size); ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; /* * Setup time stamp, attribute */ vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { struct buf *bp2; int off; if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); else bp2 = NULL; cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); if (bp2) brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } brelse(bp); /* * Initialize the associated vnode */ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { case VFIFO: vp->v_op = &cd9660_fifoops; break; default: VN_LOCK_ASHARE(vp); break; } if (ip->iso_extent == imp->root_extent) vp->v_vflag |= VV_ROOT; /* * XXX need generation number? */ *vpp = vp; return (0); } diff --git a/sys/geom/uzip/g_uzip.c b/sys/geom/uzip/g_uzip.c index 34f061c10fe4..c7d866cc643b 100644 --- a/sys/geom/uzip/g_uzip.c +++ b/sys/geom/uzip/g_uzip.c @@ -1,901 +1,922 @@ /*- * Copyright (c) 2004 Max Khon * Copyright (c) 2014 Juniper Networks, Inc. * Copyright (c) 2006-2016 Maxim Sobolev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_geom.h" MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures"); FEATURE(geom_uzip, "GEOM read-only compressed disks support"); struct g_uzip_blk { uint64_t offset; uint32_t blen; unsigned char last:1; unsigned char padded:1; #define BLEN_UNDEF UINT32_MAX }; #ifndef ABS #define ABS(a) ((a) < 0 ? -(a) : (a)) #endif #define BLK_IN_RANGE(mcn, bcn, ilen) \ (((bcn) != BLEN_UNDEF) && ( \ ((ilen) >= 0 && (mcn >= bcn) && (mcn <= ((intmax_t)(bcn) + (ilen)))) || \ ((ilen) < 0 && (mcn <= bcn) && (mcn >= ((intmax_t)(bcn) + (ilen)))) \ )) #ifdef GEOM_UZIP_DEBUG # define GEOM_UZIP_DBG_DEFAULT 3 #else # define GEOM_UZIP_DBG_DEFAULT 0 #endif #define GUZ_DBG_ERR 1 #define GUZ_DBG_INFO 2 #define GUZ_DBG_IO 3 #define GUZ_DBG_TOC 4 #define GUZ_DEV_SUFX ".uzip" #define GUZ_DEV_NAME(p) (p GUZ_DEV_SUFX) static char g_uzip_attach_to[MAXPATHLEN] = {"*"}; static char g_uzip_noattach_to[MAXPATHLEN] = {GUZ_DEV_NAME("*")}; TUNABLE_STR("kern.geom.uzip.attach_to", g_uzip_attach_to, sizeof(g_uzip_attach_to)); TUNABLE_STR("kern.geom.uzip.noattach_to", g_uzip_noattach_to, sizeof(g_uzip_noattach_to)); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW, 0, "GEOM_UZIP stuff"); static u_int g_uzip_debug = GEOM_UZIP_DBG_DEFAULT; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0, "Debug level (0-4)"); static u_int g_uzip_debug_block = BLEN_UNDEF; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN, &g_uzip_debug_block, 0, "Debug operations around specific cluster#"); #define DPRINTF(lvl, a) \ if ((lvl) <= g_uzip_debug) { \ printf a; \ } #define DPRINTF_BLK(lvl, cn, a) \ if ((lvl) <= g_uzip_debug || \ BLK_IN_RANGE(cn, g_uzip_debug_block, 8) || \ BLK_IN_RANGE(cn, g_uzip_debug_block, -8)) { \ printf a; \ } #define DPRINTF_BRNG(lvl, bcn, ecn, a) \ KASSERT(bcn < ecn, ("DPRINTF_BRNG: invalid range (%ju, %ju)", \ (uintmax_t)bcn, (uintmax_t)ecn)); \ if (((lvl) <= g_uzip_debug) || \ BLK_IN_RANGE(g_uzip_debug_block, bcn, \ (intmax_t)ecn - (intmax_t)bcn)) { \ printf a; \ } #define UZIP_CLASS_NAME "UZIP" /* * Maximum allowed valid block size (to prevent foot-shooting) */ #define MAX_BLKSZ (MAXPHYS) static char CLOOP_MAGIC_START[] = "#!/bin/sh\n"; static void g_uzip_read_done(struct bio *bp); static void g_uzip_do(struct g_uzip_softc *, struct bio *bp); static void g_uzip_softc_free(struct g_uzip_softc *sc, struct g_geom *gp) { if (gp != NULL) { DPRINTF(GUZ_DBG_INFO, ("%s: %d requests, %d cached\n", gp->name, sc->req_total, sc->req_cached)); } mtx_lock(&sc->queue_mtx); sc->wrkthr_flags |= GUZ_SHUTDOWN; wakeup(sc); while (!(sc->wrkthr_flags & GUZ_EXITING)) { msleep(sc->procp, &sc->queue_mtx, PRIBIO, "guzfree", hz / 10); } mtx_unlock(&sc->queue_mtx); sc->dcp->free(sc->dcp); free(sc->toc, M_GEOM_UZIP); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); free(sc->last_buf, M_GEOM_UZIP); free(sc, M_GEOM_UZIP); } static int g_uzip_cached(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; off_t ofs; size_t blk, blkofs, usz; sc = gp->softc; ofs = bp->bio_offset + bp->bio_completed; blk = ofs / sc->blksz; mtx_lock(&sc->last_mtx); if (blk == sc->last_blk) { blkofs = ofs % sc->blksz; usz = sc->blksz - blkofs; if (bp->bio_resid < usz) usz = bp->bio_resid; memcpy(bp->bio_data + bp->bio_completed, sc->last_buf + blkofs, usz); sc->req_cached++; mtx_unlock(&sc->last_mtx); DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: offset=%jd: got %jd bytes " "from cache\n", __func__, gp->name, bp, (intmax_t)ofs, (intmax_t)usz)); bp->bio_completed += usz; bp->bio_resid -= usz; if (bp->bio_resid == 0) { g_io_deliver(bp, 0); return (1); } } else mtx_unlock(&sc->last_mtx); return (0); } #define BLK_ENDS(sc, bi) ((sc)->toc[(bi)].offset + \ (sc)->toc[(bi)].blen) #define BLK_IS_CONT(sc, bi) (BLK_ENDS((sc), (bi) - 1) == \ (sc)->toc[(bi)].offset) #define BLK_IS_NIL(sc, bi) ((sc)->toc[(bi)].blen == 0) #define TOFF_2_BOFF(sc, pp, bi) ((sc)->toc[(bi)].offset - \ (sc)->toc[(bi)].offset % (pp)->sectorsize) #define TLEN_2_BLEN(sc, pp, bp, ei) roundup(BLK_ENDS((sc), (ei)) - \ (bp)->bio_offset, (pp)->sectorsize) static int g_uzip_request(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; struct bio *bp2; struct g_consumer *cp; struct g_provider *pp; off_t ofs, start_blk_ofs; size_t i, start_blk, end_blk, zsize; if (g_uzip_cached(gp, bp) != 0) return (1); sc = gp->softc; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; ofs = bp->bio_offset + bp->bio_completed; start_blk = ofs / sc->blksz; KASSERT(start_blk < sc->nblocks, ("start_blk out of range")); end_blk = howmany(ofs + bp->bio_resid, sc->blksz); KASSERT(end_blk <= sc->nblocks, ("end_blk out of range")); for (; BLK_IS_NIL(sc, start_blk) && start_blk < end_blk; start_blk++) { /* Fill in any leading Nil blocks */ start_blk_ofs = ofs % sc->blksz; zsize = MIN(sc->blksz - start_blk_ofs, bp->bio_resid); DPRINTF_BLK(GUZ_DBG_IO, start_blk, ("%s/%s: %p/%ju: " "filling %ju zero bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed, (uintmax_t)zsize)); bzero(bp->bio_data + bp->bio_completed, zsize); bp->bio_completed += zsize; bp->bio_resid -= zsize; ofs += zsize; } if (start_blk == end_blk) { KASSERT(bp->bio_resid == 0, ("bp->bio_resid is invalid")); /* * No non-Nil data is left, complete request immediately. */ DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: all done returning %ju " "bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed)); g_io_deliver(bp, 0); return (1); } for (i = start_blk + 1; i < end_blk; i++) { /* Trim discontinuous areas if any */ if (!BLK_IS_CONT(sc, i)) { end_blk = i; break; } } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "start=%u (%ju[%jd]), end=%u (%ju)\n", __func__, gp->name, bp, (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset, (intmax_t)sc->toc[start_blk].blen, (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1))); bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return (1); } bp2->bio_done = g_uzip_read_done; bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk); while (1) { bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1); if (bp2->bio_length <= MAXPHYS) { break; } if (end_blk == (start_blk + 1)) { break; } end_blk--; } DPRINTF(GUZ_DBG_IO, ("%s/%s: bp2->bio_length = %jd, " "bp2->bio_offset = %jd\n", __func__, gp->name, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT); if (bp2->bio_data == NULL) { g_destroy_bio(bp2); g_io_deliver(bp, ENOMEM); return (1); } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "reading %jd bytes from offset %jd\n", __func__, gp->name, bp, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); g_io_request(bp2, cp); return (0); } static void g_uzip_read_done(struct bio *bp) { struct bio *bp2; struct g_geom *gp; struct g_uzip_softc *sc; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; sc = gp->softc; mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); wakeup(sc); } static int g_uzip_memvcmp(const void *memory, unsigned char val, size_t size) { const u_char *mm; mm = (const u_char *)memory; return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; } static void g_uzip_do(struct g_uzip_softc *sc, struct bio *bp) { struct bio *bp2; struct g_provider *pp; struct g_consumer *cp; struct g_geom *gp; char *data, *data2; off_t ofs; size_t blk, blkofs, len, ulen, firstblk; int err; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; bp2->bio_error = bp->bio_error; if (bp2->bio_error != 0) goto done; /* Make sure there's forward progress. */ if (bp->bio_completed == 0) { bp2->bio_error = ECANCELED; goto done; } ofs = bp2->bio_offset + bp2->bio_completed; firstblk = blk = ofs / sc->blksz; blkofs = ofs % sc->blksz; data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize; data2 = bp2->bio_data + bp2->bio_completed; while (bp->bio_completed && bp2->bio_resid) { if (blk > firstblk && !BLK_IS_CONT(sc, blk)) { DPRINTF_BLK(GUZ_DBG_IO, blk, ("%s/%s: %p: backref'ed " "cluster #%u requested, looping around\n", __func__, gp->name, bp2, (u_int)blk)); goto done; } ulen = MIN(sc->blksz - blkofs, bp2->bio_resid); len = sc->toc[blk].blen; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p/%ju: data2=%p, ulen=%u, " "data=%p, len=%u\n", __func__, gp->name, gp, bp->bio_completed, data2, (u_int)ulen, data, (u_int)len)); if (len == 0) { /* All zero block: no cache update */ zero_block: bzero(data2, ulen); } else if (len <= bp->bio_completed) { mtx_lock(&sc->last_mtx); err = sc->dcp->decompress(sc->dcp, gp->name, data, len, sc->last_buf); if (err != 0 && sc->toc[blk].last != 0) { /* * Last block decompression has failed, check * if it's just zero padding. */ if (g_uzip_memvcmp(data, '\0', len) == 0) { sc->toc[blk].blen = 0; sc->last_blk = -1; mtx_unlock(&sc->last_mtx); len = 0; goto zero_block; } } if (err != 0) { sc->last_blk = -1; mtx_unlock(&sc->last_mtx); bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: decompress" "(%p, %ju, %ju) failed\n", __func__, gp->name, sc->dcp, (uintmax_t)blk, (uintmax_t)len)); goto done; } sc->last_blk = blk; memcpy(data2, sc->last_buf + blkofs, ulen); mtx_unlock(&sc->last_mtx); err = sc->dcp->rewind(sc->dcp, gp->name); if (err != 0) { bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: rewind(%p) " "failed\n", __func__, gp->name, sc->dcp)); goto done; } data += len; } else break; data2 += ulen; bp2->bio_completed += ulen; bp2->bio_resid -= ulen; bp->bio_completed -= len; blkofs = 0; blk++; } done: /* Finish processing the request. */ free(bp->bio_data, M_GEOM_UZIP); g_destroy_bio(bp); if (bp2->bio_error != 0 || bp2->bio_resid == 0) g_io_deliver(bp2, bp2->bio_error); else g_uzip_request(gp, bp2); } static void g_uzip_start(struct bio *bp) { struct g_provider *pp; struct g_geom *gp; struct g_uzip_softc *sc; pp = bp->bio_to; gp = pp->geom; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, " "buffer=%p\n", __func__, gp->name, bp, bp->bio_cmd, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length, bp->bio_data)); sc = gp->softc; sc->req_total++; + if (bp->bio_cmd == BIO_GETATTR) { + struct bio *bp2; + struct g_consumer *cp; + struct g_geom *gp; + struct g_provider *pp; + + /* pass on MNT:* requests and ignore others */ + if (strncmp(bp->bio_attribute, "MNT:", 4) == 0) { + bp2 = g_clone_bio(bp); + if (bp2 == NULL) { + g_io_deliver(bp, ENOMEM); + return; + } + bp2->bio_done = g_std_done; + pp = bp->bio_to; + gp = pp->geom; + cp = LIST_FIRST(&gp->consumer); + g_io_request(bp2, cp); + return; + } + } if (bp->bio_cmd != BIO_READ) { g_io_deliver(bp, EOPNOTSUPP); return; } bp->bio_resid = bp->bio_length; bp->bio_completed = 0; g_uzip_request(gp, bp); } static void g_uzip_orphan(struct g_consumer *cp) { struct g_geom *gp; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->provider->name); g_topology_assert(); gp = cp->geom; g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static int g_uzip_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); KASSERT (cp != NULL, ("g_uzip_access but no consumer")); if (cp->acw + dw > 0) return (EROFS); return (g_access(cp, dr, dw, de)); } static void g_uzip_spoiled(struct g_consumer *cp) { struct g_geom *gp; G_VALID_CONSUMER(cp); gp = cp->geom; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name); g_topology_assert(); g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static int g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp, struct g_geom *gp) { uint32_t i, j, backref_to; uint64_t max_offset, min_offset; struct g_uzip_blk *last_blk; min_offset = sizeof(struct cloop_header) + (sc->nblocks + 1) * sizeof(uint64_t); max_offset = sc->toc[0].offset - 1; last_blk = &sc->toc[0]; for (i = 0; i < sc->nblocks; i++) { /* First do some bounds checking */ if ((sc->toc[i].offset < min_offset) || (sc->toc[i].offset > pp->mediasize)) { goto error_offset; } DPRINTF_BLK(GUZ_DBG_IO, i, ("%s: cluster #%u " "offset=%ju max_offset=%ju\n", gp->name, (u_int)i, (uintmax_t)sc->toc[i].offset, (uintmax_t)max_offset)); backref_to = BLEN_UNDEF; if (sc->toc[i].offset < max_offset) { /* * For the backref'ed blocks search already parsed * TOC entries for the matching offset and copy the * size from matched entry. */ for (j = 0; j <= i; j++) { if (sc->toc[j].offset == sc->toc[i].offset && !BLK_IS_NIL(sc, j)) { break; } if (j != i) { continue; } DPRINTF(GUZ_DBG_ERR, ("%s: cannot match " "backref'ed offset at cluster #%u\n", gp->name, i)); return (-1); } sc->toc[i].blen = sc->toc[j].blen; backref_to = j; } else { last_blk = &sc->toc[i]; /* * For the "normal blocks" seek forward until we hit * block whose offset is larger than ours and assume * it's going to be the next one. */ for (j = i + 1; j < sc->nblocks; j++) { if (sc->toc[j].offset > max_offset) { break; } } sc->toc[i].blen = sc->toc[j].offset - sc->toc[i].offset; if (BLK_ENDS(sc, i) > pp->mediasize) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "extends past media boundary (%ju > %ju)\n", gp->name, (u_int)i, (uintmax_t)BLK_ENDS(sc, i), (intmax_t)pp->mediasize)); return (-1); } KASSERT(max_offset <= sc->toc[i].offset, ( "%s: max_offset is incorrect: %ju", gp->name, (uintmax_t)max_offset)); max_offset = BLK_ENDS(sc, i) - 1; } DPRINTF_BLK(GUZ_DBG_TOC, i, ("%s: cluster #%u, original %u " "bytes, in %u bytes", gp->name, i, sc->blksz, sc->toc[i].blen)); if (backref_to != BLEN_UNDEF) { DPRINTF_BLK(GUZ_DBG_TOC, i, (" (->#%u)", (u_int)backref_to)); } DPRINTF_BLK(GUZ_DBG_TOC, i, ("\n")); } last_blk->last = 1; /* Do a second pass to validate block lengths */ for (i = 0; i < sc->nblocks; i++) { if (sc->toc[i].blen > sc->dcp->max_blen) { if (sc->toc[i].last == 0) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "length (%ju) exceeds " "max_blen (%ju)\n", gp->name, i, (uintmax_t)sc->toc[i].blen, (uintmax_t)sc->dcp->max_blen)); return (-1); } DPRINTF(GUZ_DBG_INFO, ("%s: cluster #%u extra " "padding is detected, trimmed to %ju\n", gp->name, i, (uintmax_t)sc->dcp->max_blen)); sc->toc[i].blen = sc->dcp->max_blen; sc->toc[i].padded = 1; } } return (0); error_offset: DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u: invalid offset %ju, " "min_offset=%ju mediasize=%jd\n", gp->name, (u_int)i, sc->toc[i].offset, min_offset, pp->mediasize)); return (-1); } static struct g_geom * g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) { int error; uint32_t i, total_offsets, offsets_read, blk; void *buf; struct cloop_header *header; struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp2; struct g_uzip_softc *sc; enum { G_UZIP = 1, G_ULZMA } type; g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); if ((fnmatch(g_uzip_attach_to, pp->name, 0) != 0) || (fnmatch(g_uzip_noattach_to, pp->name, 0) == 0)) { DPRINTF(GUZ_DBG_INFO, ("%s(%s,%s), ignoring\n", __func__, mp->name, pp->name)); return (NULL); } buf = NULL; /* * Create geom instance. */ gp = g_new_geomf(mp, GUZ_DEV_NAME("%s"), pp->name); cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error) { goto e1; } g_topology_unlock(); /* * Read cloop header, look for CLOOP magic, perform * other validity checks. */ DPRINTF(GUZ_DBG_INFO, ("%s: media sectorsize %u, mediasize %jd\n", gp->name, pp->sectorsize, (intmax_t)pp->mediasize)); buf = g_read_data(cp, 0, pp->sectorsize, NULL); if (buf == NULL) goto e2; header = (struct cloop_header *) buf; if (strncmp(header->magic, CLOOP_MAGIC_START, sizeof(CLOOP_MAGIC_START) - 1) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: no CLOOP magic\n", gp->name)); goto e3; } switch (header->magic[CLOOP_OFS_COMPR]) { case CLOOP_COMP_LZMA: case CLOOP_COMP_LZMA_DDP: type = G_ULZMA; if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_LZMA image found\n", gp->name)); break; case CLOOP_COMP_LIBZ: case CLOOP_COMP_LIBZ_DDP: type = G_UZIP; if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n", gp->name)); break; default: DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n", gp->name)); goto e3; } /* * Initialize softc and read offsets. */ sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO); gp->softc = sc; sc->blksz = ntohl(header->blksz); sc->nblocks = ntohl(header->nblocks); if (sc->blksz % 512 != 0) { printf("%s: block size (%u) should be multiple of 512.\n", gp->name, sc->blksz); goto e4; } if (sc->blksz > MAX_BLKSZ) { printf("%s: block size (%u) should not be larger than %d.\n", gp->name, sc->blksz, MAX_BLKSZ); } total_offsets = sc->nblocks + 1; if (sizeof(struct cloop_header) + total_offsets * sizeof(uint64_t) > pp->mediasize) { printf("%s: media too small for %u blocks\n", gp->name, sc->nblocks); goto e4; } sc->toc = malloc(total_offsets * sizeof(struct g_uzip_blk), M_GEOM_UZIP, M_WAITOK | M_ZERO); offsets_read = MIN(total_offsets, (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t)); for (i = 0; i < offsets_read; i++) { sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]); sc->toc[i].blen = BLEN_UNDEF; } DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n", gp->name, offsets_read)); for (blk = 1; offsets_read < total_offsets; blk++) { uint32_t nread; free(buf, M_GEOM); buf = g_read_data( cp, blk * pp->sectorsize, pp->sectorsize, NULL); if (buf == NULL) goto e5; nread = MIN(total_offsets - offsets_read, pp->sectorsize / sizeof(uint64_t)); DPRINTF(GUZ_DBG_TOC, ("%s: %u offsets read from sector %d\n", gp->name, nread, blk)); for (i = 0; i < nread; i++) { sc->toc[offsets_read + i].offset = be64toh(((uint64_t *) buf)[i]); sc->toc[offsets_read + i].blen = BLEN_UNDEF; } offsets_read += nread; } free(buf, M_GEOM); buf = NULL; offsets_read -= 1; DPRINTF(GUZ_DBG_INFO, ("%s: done reading %u block offsets from %u " "sectors\n", gp->name, offsets_read, blk)); if (sc->nblocks != offsets_read) { DPRINTF(GUZ_DBG_ERR, ("%s: read %s offsets than expected " "blocks\n", gp->name, sc->nblocks < offsets_read ? "more" : "less")); goto e5; } if (type == G_UZIP) { sc->dcp = g_uzip_zlib_ctor(sc->blksz); } else { sc->dcp = g_uzip_lzma_ctor(sc->blksz); } if (sc->dcp == NULL) { goto e5; } /* * "Fake" last+1 block, to make it easier for the TOC parser to * iterate without making the last element a special case. */ sc->toc[sc->nblocks].offset = pp->mediasize; /* Massage TOC (table of contents), make sure it is sound */ if (g_uzip_parse_toc(sc, pp, gp) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name)); goto e6; } mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF); mtx_init(&sc->queue_mtx, "geom_uzip wrkthread", NULL, MTX_DEF); bioq_init(&sc->bio_queue); sc->last_blk = -1; sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK); sc->req_total = 0; sc->req_cached = 0; sc->uzip_do = &g_uzip_do; error = kproc_create(g_uzip_wrkthr, sc, &sc->procp, 0, 0, "%s", gp->name); if (error != 0) { goto e7; } g_topology_lock(); pp2 = g_new_providerf(gp, "%s", gp->name); pp2->sectorsize = 512; pp2->mediasize = (off_t)sc->nblocks * sc->blksz; pp2->stripesize = pp->stripesize; pp2->stripeoffset = pp->stripeoffset; g_error_provider(pp2, 0); g_access(cp, -1, 0, 0); DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %jd), (%d, %d), %x\n", gp->name, pp2->sectorsize, (intmax_t)pp2->mediasize, pp2->stripeoffset, pp2->stripesize, pp2->flags)); DPRINTF(GUZ_DBG_INFO, ("%s: %u x %u blocks\n", gp->name, sc->nblocks, sc->blksz)); return (gp); e7: free(sc->last_buf, M_GEOM); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); e6: sc->dcp->free(sc->dcp); e5: free(sc->toc, M_GEOM); e4: free(gp->softc, M_GEOM_UZIP); e3: if (buf != NULL) { free(buf, M_GEOM); } e2: g_topology_lock(); g_access(cp, -1, 0, 0); e1: g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } static int g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_provider *pp; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, gp->name); g_topology_assert(); if (gp->softc == NULL) { DPRINTF(GUZ_DBG_ERR, ("%s(%s): gp->softc == NULL\n", __func__, gp->name)); return (ENXIO); } KASSERT(gp != NULL, ("NULL geom")); pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL provider")); if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) return (EBUSY); g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); return (0); } static struct g_class g_uzip_class = { .name = UZIP_CLASS_NAME, .version = G_VERSION, .taste = g_uzip_taste, .destroy_geom = g_uzip_destroy_geom, .start = g_uzip_start, .orphan = g_uzip_orphan, .access = g_uzip_access, .spoiled = g_uzip_spoiled, }; DECLARE_GEOM_CLASS(g_uzip_class, g_uzip); MODULE_DEPEND(g_uzip, zlib, 1, 1, 1); diff --git a/sys/sys/mdioctl.h b/sys/sys/mdioctl.h index 4974cf84e8ba..9a7642923ccd 100644 --- a/sys/sys/mdioctl.h +++ b/sys/sys/mdioctl.h @@ -1,92 +1,93 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: fdioctl.h 1.1 90/07/09$ * * @(#)vnioctl.h 8.1 (Berkeley) 6/10/93 * * From: src/sys/sys/vnioctl.h,v 1.4 * * $FreeBSD$ */ #ifndef _SYS_MDIOCTL_H_ #define _SYS_MDIOCTL_H_ enum md_types {MD_MALLOC, MD_PRELOAD, MD_VNODE, MD_SWAP, MD_NULL}; /* * Ioctl definitions for memory disk pseudo-device. */ #define MDNPAD 97 struct md_ioctl { unsigned md_version; /* Structure layout version */ unsigned md_unit; /* unit number */ enum md_types md_type ; /* type of disk */ char *md_file; /* pathname of file to mount */ off_t md_mediasize; /* size of disk in bytes */ unsigned md_sectorsize; /* sectorsize */ unsigned md_options; /* options */ u_int64_t md_base; /* base address */ int md_fwheads; /* firmware heads */ int md_fwsectors; /* firmware sectors */ int md_pad[MDNPAD]; /* padding for future ideas */ }; #define MD_NAME "md" #define MDCTL_NAME "mdctl" #define MDIOVERSION 0 /* * Before you can use a unit, it must be configured with MDIOCSET. * The configuration persists across opens and closes of the device; * an MDIOCCLR must be used to reset a configuration. An attempt to * MDIOCSET an already active unit will return EBUSY. */ #define MDIOCATTACH _IOWR('m', 0, struct md_ioctl) /* attach disk */ #define MDIOCDETACH _IOWR('m', 1, struct md_ioctl) /* detach disk */ #define MDIOCQUERY _IOWR('m', 2, struct md_ioctl) /* query status */ #define MDIOCLIST _IOWR('m', 3, struct md_ioctl) /* query status */ #define MDIOCRESIZE _IOWR('m', 4, struct md_ioctl) /* resize disk */ #define MD_CLUSTER 0x01 /* Don't cluster */ #define MD_RESERVE 0x02 /* Pre-reserve swap */ #define MD_AUTOUNIT 0x04 /* Assign next free unit */ #define MD_READONLY 0x08 /* Readonly mode */ #define MD_COMPRESS 0x10 /* Compression mode */ #define MD_FORCE 0x20 /* Don't try to prevent foot-shooting */ #define MD_ASYNC 0x40 /* Asynchronous mode */ +#define MD_VERIFY 0x80 /* Open file with O_VERIFY (vnode only) */ #endif /* _SYS_MDIOCTL_H_*/ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 519b4cf94f52..152b2586153c 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,987 +1,988 @@ /*- * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mount.h 8.21 (Berkeley) 5/20/95 * $FreeBSD$ */ #ifndef _SYS_MOUNT_H_ #define _SYS_MOUNT_H_ #include #include #ifdef _KERNEL #include #include #include #include #endif /* * NOTE: When changing statfs structure, mount structure, MNT_* flags or * MNTK_* flags also update DDB show mount command in vfs_subr.c. */ typedef struct fsid { int32_t val[2]; } fsid_t; /* filesystem id type */ /* * File identifier. * These are unique per filesystem on a single machine. */ #define MAXFIDSZ 16 struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_data0; /* force longword alignment */ char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* * filesystem statistics */ #define MFSNAMELEN 16 /* length of type name including null */ #define MNAMELEN 1024 /* size of on/from name bufs */ #define STATFS_VERSION 0x20140518 /* current version number */ struct statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[MFSNAMELEN]; /* filesystem type name */ char f_mntfromname[MNAMELEN]; /* mounted filesystem */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ }; #if defined(_WANT_FREEBSD11_STATFS) || defined(_KERNEL) #define FREEBSD11_STATFS_VERSION 0x20030518 /* current version number */ struct freebsd11_statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[16]; /* filesystem type name */ char f_mntfromname[88]; /* mounted filesystem */ char f_mntonname[88]; /* directory on which mounted */ }; #endif /* _WANT_FREEBSD11_STATFS || _KERNEL */ #ifdef _KERNEL #define OMFSNAMELEN 16 /* length of fs type name, including null */ #define OMNAMELEN (88 - 2 * sizeof(long)) /* size of on/from name bufs */ /* XXX getfsstat.2 is out of date with write and read counter changes here. */ /* XXX statfs.2 is out of date with read counter changes here. */ struct ostatfs { long f_spare2; /* placeholder */ long f_bsize; /* fundamental filesystem block size */ long f_iosize; /* optimal transfer block size */ long f_blocks; /* total data blocks in filesystem */ long f_bfree; /* free blocks in fs */ long f_bavail; /* free blocks avail to non-superuser */ long f_files; /* total file nodes in filesystem */ long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* filesystem id */ uid_t f_owner; /* user that mounted the filesystem */ int f_type; /* type of filesystem */ int f_flags; /* copy of mount exported flags */ long f_syncwrites; /* count of sync writes since mount */ long f_asyncwrites; /* count of async writes since mount */ char f_fstypename[OMFSNAMELEN]; /* fs type name */ char f_mntonname[OMNAMELEN]; /* directory on which mounted */ long f_syncreads; /* count of sync reads since mount */ long f_asyncreads; /* count of async reads since mount */ short f_spares1; /* unused spare */ char f_mntfromname[OMNAMELEN];/* mounted filesystem */ short f_spares2; /* unused spare */ /* * XXX on machines where longs are aligned to 8-byte boundaries, there * is an unnamed int32_t here. This spare was after the apparent end * of the struct until we bit off the read counters from f_mntonname. */ long f_spare[2]; /* unused spare */ }; TAILQ_HEAD(vnodelst, vnode); /* Mount options list */ TAILQ_HEAD(vfsoptlist, vfsopt); struct vfsopt { TAILQ_ENTRY(vfsopt) link; char *name; void *value; int len; int pos; int seen; }; /* * Structure per mounted filesystem. Each mounted filesystem has an * array of operations and an instance record. The filesystems are * put on a doubly linked list. * * Lock reference: * l - mnt_listmtx * m - mountlist_mtx * i - interlock * v - vnode freelist mutex * * Unmarked fields are considered stable as long as a ref is held. * */ struct mount { struct mtx mnt_mtx; /* mount structure interlock */ int mnt_gen; /* struct mount generation */ #define mnt_startzero mnt_list TAILQ_ENTRY(mount) mnt_list; /* (m) mount list */ struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ struct vnode *mnt_syncer; /* syncer vnode */ int mnt_ref; /* (i) Reference count */ struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */ int mnt_nvnodelistsize; /* (i) # of vnodes */ int mnt_writeopcount; /* (i) write syscalls pending */ int mnt_kern_flag; /* (i) kernel only flags */ uint64_t mnt_flag; /* (i) flags shared with user */ struct vfsoptlist *mnt_opt; /* current mount options */ struct vfsoptlist *mnt_optnew; /* new options passed to fs */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ struct ucred *mnt_cred; /* credentials of mounter */ void * mnt_data; /* private data */ time_t mnt_time; /* last time written*/ int mnt_iosize_max; /* max size for clusters, etc */ struct netexport *mnt_export; /* export list */ struct label *mnt_label; /* MAC label for the fs */ u_int mnt_hashseed; /* Random seed for vfs_hash */ int mnt_lockref; /* (i) Lock reference count */ int mnt_secondary_writes; /* (i) # of secondary writes */ int mnt_secondary_accwrites;/* (i) secondary wr. starts */ struct thread *mnt_susp_owner; /* (i) thread owning suspension */ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; struct vnodelst mnt_activevnodelist; /* (l) list of active vnodes */ int mnt_activevnodelistsize;/* (l) # of active vnodes */ struct vnodelst mnt_tmpfreevnodelist; /* (l) list of free vnodes */ int mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */ struct lock mnt_explock; /* vfs_export walkers lock */ TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ }; /* * Definitions for MNT_VNODE_FOREACH_ALL. */ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp); struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp); #define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \ for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \ (vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp))) #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \ do { \ MNT_ILOCK(mp); \ __mnt_vnode_markerfree_all(&(mvp), (mp)); \ /* MNT_IUNLOCK(mp); -- done in above function */ \ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ } while (0) /* * Definitions for MNT_VNODE_FOREACH_ACTIVE. */ struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp); struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \ (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp))) #define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \ __mnt_vnode_markerfree_active(&(mvp), (mp)) #define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx) #define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx) #define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx) #define MNT_MTX(mp) (&(mp)->mnt_mtx) #define MNT_REF(mp) (mp)->mnt_ref++ #define MNT_REL(mp) do { \ KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \ (mp)->mnt_ref--; \ if ((mp)->mnt_ref == 0) \ wakeup((mp)); \ } while (0) #endif /* _KERNEL */ /* * User specifiable flags, stored in mnt_flag. */ #define MNT_RDONLY 0x0000000000000001ULL /* read only filesystem */ #define MNT_SYNCHRONOUS 0x0000000000000002ULL /* fs written synchronously */ #define MNT_NOEXEC 0x0000000000000004ULL /* can't exec from filesystem */ #define MNT_NOSUID 0x0000000000000008ULL /* don't honor setuid fs bits */ #define MNT_NFS4ACLS 0x0000000000000010ULL /* enable NFS version 4 ACLs */ #define MNT_UNION 0x0000000000000020ULL /* union with underlying fs */ #define MNT_ASYNC 0x0000000000000040ULL /* fs written asynchronously */ #define MNT_SUIDDIR 0x0000000000100000ULL /* special SUID dir handling */ #define MNT_SOFTDEP 0x0000000000200000ULL /* using soft updates */ #define MNT_NOSYMFOLLOW 0x0000000000400000ULL /* do not follow symlinks */ #define MNT_GJOURNAL 0x0000000002000000ULL /* GEOM journal support enabled */ #define MNT_MULTILABEL 0x0000000004000000ULL /* MAC support for objects */ #define MNT_ACLS 0x0000000008000000ULL /* ACL support enabled */ #define MNT_NOATIME 0x0000000010000000ULL /* dont update file access time */ #define MNT_NOCLUSTERR 0x0000000040000000ULL /* disable cluster read */ #define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */ #define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */ #define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */ /* * NFS export related mount flags. */ #define MNT_EXRDONLY 0x0000000000000080ULL /* exported read only */ #define MNT_EXPORTED 0x0000000000000100ULL /* filesystem is exported */ #define MNT_DEFEXPORTED 0x0000000000000200ULL /* exported to the world */ #define MNT_EXPORTANON 0x0000000000000400ULL /* anon uid mapping for all */ #define MNT_EXKERB 0x0000000000000800ULL /* exported with Kerberos */ #define MNT_EXPUBLIC 0x0000000020000000ULL /* public export (WebNFS) */ /* * Flags set by internal operations, * but visible to the user. * XXX some of these are not quite right.. (I've never seen the root flag set) */ #define MNT_LOCAL 0x0000000000001000ULL /* filesystem is stored locally */ #define MNT_QUOTA 0x0000000000002000ULL /* quotas are enabled on fs */ #define MNT_ROOTFS 0x0000000000004000ULL /* identifies the root fs */ #define MNT_USER 0x0000000000008000ULL /* mounted by a user */ #define MNT_IGNORE 0x0000000000800000ULL /* do not show entry in df */ +#define MNT_VERIFIED 0x0000000400000000ULL /* filesystem is verified */ /* * Mask of flags that are visible to statfs(). * XXX I think that this could now become (~(MNT_CMDFLAGS)) * but the 'mount' program may need changing to handle this. */ #define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ MNT_NOSUID | MNT_UNION | MNT_SUJ | \ MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ - MNT_NFS4ACLS | MNT_AUTOMOUNTED) + MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | \ MNT_NOATIME | \ MNT_NOSYMFOLLOW | MNT_IGNORE | \ MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \ MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \ MNT_AUTOMOUNTED) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. * XXX: These are not STATES and really should be somewhere else. * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL, * but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2), * and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2), * it's harmless. */ #define MNT_UPDATE 0x0000000000010000ULL /* not real mount, just update */ #define MNT_DELEXPORT 0x0000000000020000ULL /* delete export host lists */ #define MNT_RELOAD 0x0000000000040000ULL /* reload filesystem data */ #define MNT_FORCE 0x0000000000080000ULL /* force unmount or readonly */ #define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */ #define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */ #define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */ #define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \ MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \ MNT_BYFSID) /* * Internal filesystem control flags stored in mnt_kern_flag. * * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed * past the mount point. This keeps the subtree stable during mounts * and unmounts. * * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while * dounmount() is still waiting to lock the mountpoint. This allows * the filesystem to cancel operations that might otherwise deadlock * with the unmount attempt (used by NFS). * * MNTK_NOINSMNTQ is strict subset of MNTK_UNMOUNT. They are separated * to allow for failed unmount attempt to restore the syncer vnode for * the mount. */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ #define MNTK_ASYNC 0x00000002 /* filtered async flag */ #define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */ #define MNTK_NOINSMNTQ 0x00000008 /* insmntque is not allowed */ #define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ #define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */ #define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads and writes. Filesystem shall properly handle i/o state on EFAULT. */ #define MNTK_VGONE_UPPER 0x00000200 #define MNTK_VGONE_WAITER 0x00000400 #define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 #define MNTK_MARKER 0x00001000 #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ #define MNTK_SUSPEND2 0x04000000 /* block secondary writes */ #define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */ #define MNTK_NULL_NOCACHE 0x20000000 /* auto disable cache for nullfs mounts over this fs */ #define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */ #define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */ #ifdef _KERNEL static inline int MNT_SHARED_WRITES(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0); } static inline int MNT_EXTENDED_SHARED(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0); } #endif /* * Sysctl CTL_VFS definitions. * * Second level identifier specifies which filesystem. Second level * identifier VFS_VFSCONF returns information about all filesystems. * Second level identifier VFS_GENERIC is non-terminal. */ #define VFS_VFSCONF 0 /* get configured filesystems */ #define VFS_GENERIC 0 /* generic filesystem information */ /* * Third level identifiers for VFS_GENERIC are given below; third * level identifiers for specific filesystems are given in their * mount specific header files. */ #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ /* * Flags for various system call interfaces. * * waitfor flags to vfs_sync() and getfsstat() */ #define MNT_WAIT 1 /* synchronously wait for I/O to complete */ #define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ #define MNT_LAZY 3 /* push data not written by filesystem syncer */ #define MNT_SUSPEND 4 /* Suspend file system after sync */ /* * Generic file handle */ struct fhandle { fsid_t fh_fsid; /* Filesystem id of mount point */ struct fid fh_fid; /* Filesys specific id */ }; typedef struct fhandle fhandle_t; /* * Old export arguments without security flavor list */ struct oexport_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ }; /* * Export arguments for local filesystem mount calls. */ #define MAXSECFLAVORS 5 struct export_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ int ex_numsecflavors; /* security flavor count */ int ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */ }; /* * Structure holding information for a publicly exported filesystem * (WebNFS). Currently the specs allow just for one such filesystem. */ struct nfs_public { int np_valid; /* Do we hold valid information */ fhandle_t np_handle; /* Filehandle for pub fs (internal) */ struct mount *np_mount; /* Mountpoint of exported fs */ char *np_index; /* Index file */ }; /* * Filesystem configuration information. One of these exists for each * type of filesystem supported by the kernel. These are searched at * mount time to identify the requested filesystem. * * XXX: Never change the first two arguments! */ struct vfsconf { u_int vfc_version; /* ABI version number */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ struct vfsops *vfc_vfsops; /* filesystem operations vector */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsoptdecl *vfc_opts; /* mount options */ TAILQ_ENTRY(vfsconf) vfc_list; /* list of vfscons */ }; /* Userland version of the struct vfsconf. */ struct xvfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsconf *vfc_next; /* next in list */ }; #ifndef BURN_BRIDGES struct ovfsconf { void *vfc_vfsops; char vfc_name[32]; int vfc_index; int vfc_refcount; int vfc_flags; }; #endif /* * NB: these flags refer to IMPLEMENTATION properties, not properties of * any actual mounts; i.e., it does not make sense to change the flags. */ #define VFCF_STATIC 0x00010000 /* statically compiled into kernel */ #define VFCF_NETWORK 0x00020000 /* may get data over the network */ #define VFCF_READONLY 0x00040000 /* writes are not implemented */ #define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ #define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */ #define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ #define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */ #define VFCF_SBDRY 0x01000000 /* defer stop requests */ typedef uint32_t fsctlop_t; struct vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on */ char vc_fstypename[MFSNAMELEN]; /* type of fs 'nfs' or '*' */ fsctlop_t vc_op; /* operation VFS_CTL_* (below) */ void *vc_ptr; /* pointer to data structure */ size_t vc_len; /* sizeof said structure */ u_int32_t vc_spare[12]; /* spare (must be zero) */ }; /* vfsidctl API version. */ #define VFS_CTL_VERS1 0x01 /* * New style VFS sysctls, do not reuse/conflict with the namespace for * private sysctls. * All "global" sysctl ops have the 33rd bit set: * 0x...1.... * Private sysctl ops should have the 33rd bit unset. */ #define VFS_CTL_QUERY 0x00010001 /* anything wrong? (vfsquery) */ #define VFS_CTL_TIMEO 0x00010002 /* set timeout for vfs notification */ #define VFS_CTL_NOLOCKS 0x00010003 /* disable file locking */ struct vfsquery { u_int32_t vq_flags; u_int32_t vq_spare[31]; }; /* vfsquery flags */ #define VQ_NOTRESP 0x0001 /* server down */ #define VQ_NEEDAUTH 0x0002 /* server bad auth */ #define VQ_LOWDISK 0x0004 /* we're low on space */ #define VQ_MOUNT 0x0008 /* new filesystem arrived */ #define VQ_UNMOUNT 0x0010 /* filesystem has left */ #define VQ_DEAD 0x0020 /* filesystem is dead, needs force unmount */ #define VQ_ASSIST 0x0040 /* filesystem needs assistance from external program */ #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ #define VQ_FLAG0100 0x0100 /* placeholder */ #define VQ_FLAG0200 0x0200 /* placeholder */ #define VQ_FLAG0400 0x0400 /* placeholder */ #define VQ_FLAG0800 0x0800 /* placeholder */ #define VQ_FLAG1000 0x1000 /* placeholder */ #define VQ_FLAG2000 0x2000 /* placeholder */ #define VQ_FLAG4000 0x4000 /* placeholder */ #define VQ_FLAG8000 0x8000 /* placeholder */ #ifdef _KERNEL /* Point a sysctl request at a vfsidctl's data. */ #define VCTLTOREQ(vc, req) \ do { \ (req)->newptr = (vc)->vc_ptr; \ (req)->newlen = (vc)->vc_len; \ (req)->newidx = 0; \ } while (0) #endif struct iovec; struct uio; #ifdef _KERNEL /* * vfs_busy specific flags and mask. */ #define MBF_NOWAIT 0x01 #define MBF_MNTLSTLOCK 0x02 #define MBF_MASK (MBF_NOWAIT | MBF_MNTLSTLOCK) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MOUNT); MALLOC_DECLARE(M_STATFS); #endif extern int maxvfsconf; /* highest defined filesystem type */ TAILQ_HEAD(vfsconfhead, vfsconf); extern struct vfsconfhead vfsconf; /* * Operations supported on mounted filesystem. */ struct mount_args; struct nameidata; struct sysctl_req; struct mntarg; typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags); typedef int vfs_unmount_t(struct mount *mp, int mntflags); typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp); typedef int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg); typedef int vfs_statfs_t(struct mount *mp, struct statfs *sbp); typedef int vfs_sync_t(struct mount *mp, int waitfor); typedef int vfs_vget_t(struct mount *mp, ino_t ino, int flags, struct vnode **vpp); typedef int vfs_fhtovp_t(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp); typedef int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp, int *numsecflavors, int **secflavors); typedef int vfs_init_t(struct vfsconf *); typedef int vfs_uninit_t(struct vfsconf *); typedef int vfs_extattrctl_t(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname); typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); typedef void vfs_purge_t(struct mount *mp); struct vfsops { vfs_mount_t *vfs_mount; vfs_cmount_t *vfs_cmount; vfs_unmount_t *vfs_unmount; vfs_root_t *vfs_root; vfs_quotactl_t *vfs_quotactl; vfs_statfs_t *vfs_statfs; vfs_sync_t *vfs_sync; vfs_vget_t *vfs_vget; vfs_fhtovp_t *vfs_fhtovp; vfs_checkexp_t *vfs_checkexp; vfs_init_t *vfs_init; vfs_uninit_t *vfs_uninit; vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; vfs_notify_lowervp_t *vfs_reclaim_lowervp; vfs_notify_lowervp_t *vfs_unlink_lowervp; vfs_purge_t *vfs_purge; vfs_mount_t *vfs_spare[6]; /* spares for ABI compat */ }; vfs_statfs_t __vfs_statfs; #define VFS_PROLOGUE(MP) do { \ struct mount *mp__; \ int _prev_stops; \ \ mp__ = (MP); \ _prev_stops = sigdeferstop((mp__ != NULL && \ (mp__->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0) ? \ SIGDEFERSTOP_SILENT : SIGDEFERSTOP_NOP); #define VFS_EPILOGUE(MP) \ sigallowstop(_prev_stops); \ } while (0) #define VFS_MOUNT(MP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_mount)(MP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_UNMOUNT(MP, FORCE) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_ROOT(MP, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_STATFS(MP, SBP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = __vfs_statfs((MP), (SBP)); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYNC(MP, WAIT) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_VGET(MP, INO, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\ SEC); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYSCTL(MP, OP, REQ) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SUSP_CLEAN(MP) do { \ if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_susp_clean)(MP); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_RECLAIM_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_UNLINK_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_PURGE(MP) do { \ if (*(MP)->mnt_op->vfs_purge != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_purge)(MP); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), KNF_LISTLOCKED); \ } while (0) #define VFS_KNOTE_UNLOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), 0); \ } while (0) #define VFS_NOTIFY_UPPER_RECLAIM 1 #define VFS_NOTIFY_UPPER_UNLINK 2 #include /* * Version numbers. */ #define VFS_VERSION_00 0x19660120 #define VFS_VERSION_01 0x20121030 #define VFS_VERSION VFS_VERSION_01 #define VFS_SET(vfsops, fsname, flags) \ static struct vfsconf fsname ## _vfsconf = { \ .vfc_version = VFS_VERSION, \ .vfc_name = #fsname, \ .vfc_vfsops = &vfsops, \ .vfc_typenum = -1, \ .vfc_flags = flags, \ }; \ static moduledata_t fsname ## _mod = { \ #fsname, \ vfs_modevent, \ & fsname ## _vfsconf \ }; \ DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE) /* * exported vnode operations */ int dounmount(struct mount *, int, struct thread *); int kernel_mount(struct mntarg *ma, uint64_t flags); int kernel_vmount(int flags, ...); struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len); struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name); struct mntarg *mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...); struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, int len); void statfs_scale_blocks(struct statfs *sf, long max_size); struct vfsconf *vfs_byname(const char *); struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *); void vfs_mount_destroy(struct mount *); void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); void vfs_freeopts(struct vfsoptlist *opts); void vfs_deleteopt(struct vfsoptlist *opts, const char *name); int vfs_buildopts(struct uio *auio, struct vfsoptlist **options); int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, uint64_t val); int vfs_getopt(struct vfsoptlist *, const char *, void **, int *); int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); int vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value); char *vfs_getopts(struct vfsoptlist *, const char *, int *error); int vfs_copyopt(struct vfsoptlist *, const char *, void *, int); int vfs_filteropt(struct vfsoptlist *, const char **legal); void vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...); int vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...); int vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value); int vfs_setpublicfs /* set publicly exported fs */ (struct mount *, struct netexport *, struct export_args *); void vfs_msync(struct mount *, int); int vfs_busy(struct mount *, int); int vfs_export /* process mount export info */ (struct mount *, struct export_args *); void vfs_allocate_syncvnode(struct mount *); void vfs_deallocate_syncvnode(struct mount *); int vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions); void vfs_getnewfsid(struct mount *); struct cdev *vfs_getrootfsid(struct mount *); struct mount *vfs_getvfs(fsid_t *); /* return vfs given fsid */ struct mount *vfs_busyfs(fsid_t *); int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); void vfs_notify_upper(struct vnode *, int); void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp); void vfs_ref(struct mount *); void vfs_rel(struct mount *); struct mount *vfs_mount_alloc(struct vnode *, struct vfsconf *, const char *, struct ucred *); int vfs_suser(struct mount *, struct thread *); void vfs_unbusy(struct mount *); void vfs_unmountall(void); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx mountlist_mtx; extern struct nfs_public nfs_pub; extern struct sx vfsconf_sx; #define vfsconf_lock() sx_xlock(&vfsconf_sx) #define vfsconf_unlock() sx_xunlock(&vfsconf_sx) #define vfsconf_slock() sx_slock(&vfsconf_sx) #define vfsconf_sunlock() sx_sunlock(&vfsconf_sx) /* * Declarations for these vfs default operations are located in * kern/vfs_default.c. They will be automatically used to replace * null entries in VFS ops tables when registering a new filesystem * type in the global table. */ vfs_root_t vfs_stdroot; vfs_quotactl_t vfs_stdquotactl; vfs_statfs_t vfs_stdstatfs; vfs_sync_t vfs_stdsync; vfs_sync_t vfs_stdnosync; vfs_vget_t vfs_stdvget; vfs_fhtovp_t vfs_stdfhtovp; vfs_checkexp_t vfs_stdcheckexp; vfs_init_t vfs_stdinit; vfs_uninit_t vfs_stduninit; vfs_extattrctl_t vfs_stdextattrctl; vfs_sysctl_t vfs_stdsysctl; void syncer_suspend(void); void syncer_resume(void); #else /* !_KERNEL */ #include struct stat; __BEGIN_DECLS int fhopen(const struct fhandle *, int); int fhstat(const struct fhandle *, struct stat *); int fhstatfs(const struct fhandle *, struct statfs *); int fstatfs(int, struct statfs *); int getfh(const char *, fhandle_t *); int getfsstat(struct statfs *, long, int); int getmntinfo(struct statfs **, int); int lgetfh(const char *, fhandle_t *); int mount(const char *, const char *, int, void *); int nmount(struct iovec *, unsigned int, int); int statfs(const char *, struct statfs *); int unmount(const char *, int); /* C library stuff */ int getvfsbyname(const char *, struct xvfsconf *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_MOUNT_H_ */