diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index 5b3f93465d89..31e18aad27a0 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -1,826 +1,826 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ /* * zfs diff support */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libzfs_impl.h" #define ZDIFF_SNAPDIR "/.zfs/snapshot/" #define ZDIFF_SHARESDIR "/.zfs/shares/" #define ZDIFF_PREFIX "zfs-diff-%d" #define ZDIFF_ADDED '+' #define ZDIFF_MODIFIED 'M' #define ZDIFF_REMOVED '-' #define ZDIFF_RENAMED 'R' static boolean_t do_name_cmp(const char *fpath, const char *tpath) { char *fname, *tname; fname = strrchr(fpath, '/') + 1; tname = strrchr(tpath, '/') + 1; return (strcmp(fname, tname) == 0); } typedef struct differ_info { zfs_handle_t *zhp; char *fromsnap; char *frommnt; char *tosnap; char *tomnt; char *ds; char *dsmnt; char *tmpsnap; char errbuf[1024]; boolean_t isclone; boolean_t scripted; boolean_t classify; boolean_t timestamped; uint64_t shares; int zerr; int cleanupfd; int outputfd; int datafd; } differ_info_t; /* * Given a {dsname, object id}, get the object path */ static int get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, char *pn, int maxlen, zfs_stat_t *sb) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int error; (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); zc.zc_obj = obj; errno = 0; error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc); di->zerr = errno; /* we can get stats even if we failed to get a path */ (void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t)); if (error == 0) { ASSERT(di->zerr == 0); (void) strlcpy(pn, zc.zc_value, maxlen); return (0); } if (di->zerr == EPERM) { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "The sys_config privilege or diff delegated permission " "is needed\nto discover path names")); return (-1); } else { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Unable to determine path or stats for " "object %lld in %s"), (longlong_t)obj, dsname); return (-1); } } /* * stream_bytes * * Prints a file name out a character at a time. If the character is * not in the range of what we consider "printable" ASCII, display it * as an escaped 3-digit octal value. ASCII values less than a space * are all control characters and we declare the upper end as the * DELete character. This also is the last 7-bit ASCII character. * We choose to treat all 8-bit ASCII as not printable for this * application. */ static void stream_bytes(FILE *fp, const char *string) { while (*string) { if (*string > ' ' && *string != '\\' && *string < '\177') (void) fprintf(fp, "%c", *string++); else (void) fprintf(fp, "\\%03o", *string++); } } static void print_what(FILE *fp, mode_t what) { char symbol; switch (what & S_IFMT) { case S_IFBLK: symbol = 'B'; break; case S_IFCHR: symbol = 'C'; break; case S_IFDIR: symbol = '/'; break; case S_IFDOOR: symbol = '>'; break; case S_IFIFO: symbol = '|'; break; case S_IFLNK: symbol = '@'; break; case S_IFPORT: symbol = 'P'; break; case S_IFSOCK: symbol = '='; break; case S_IFREG: symbol = 'F'; break; default: symbol = '?'; break; } (void) fprintf(fp, "%c", symbol); } static void print_cmn(FILE *fp, differ_info_t *di, const char *file) { stream_bytes(fp, di->dsmnt); stream_bytes(fp, file); } static void print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new, zfs_stat_t *isb) { if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], (longlong_t)isb->zs_ctime[1]); (void) fprintf(fp, "%c\t", ZDIFF_RENAMED); if (di->classify) { print_what(fp, isb->zs_mode); (void) fprintf(fp, "\t"); } print_cmn(fp, di, old); if (di->scripted) (void) fprintf(fp, "\t"); else (void) fprintf(fp, " -> "); print_cmn(fp, di, new); (void) fprintf(fp, "\n"); } static void print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file, zfs_stat_t *isb) { if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], (longlong_t)isb->zs_ctime[1]); (void) fprintf(fp, "%c\t", ZDIFF_MODIFIED); if (di->classify) { print_what(fp, isb->zs_mode); (void) fprintf(fp, "\t"); } print_cmn(fp, di, file); (void) fprintf(fp, "\t(%+d)", delta); (void) fprintf(fp, "\n"); } static void print_file(FILE *fp, differ_info_t *di, char type, const char *file, zfs_stat_t *isb) { if (di->timestamped) (void) fprintf(fp, "%10lld.%09lld\t", (longlong_t)isb->zs_ctime[0], (longlong_t)isb->zs_ctime[1]); (void) fprintf(fp, "%c\t", type); if (di->classify) { print_what(fp, isb->zs_mode); (void) fprintf(fp, "\t"); } print_cmn(fp, di, file); (void) fprintf(fp, "\n"); } static int write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj) { struct zfs_stat fsb, tsb; boolean_t same_name; mode_t fmode, tmode; char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN]; int fobjerr, tobjerr; int change; if (dobj == di->shares) return (0); /* * Check the from and to snapshots for info on the object. If * we get ENOENT, then the object just didn't exist in that * snapshot. If we get ENOTSUP, then we tried to get * info on a non-ZPL object, which we don't care about anyway. */ fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname, MAXPATHLEN, &fsb); if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) return (-1); tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname, MAXPATHLEN, &tsb); if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) return (-1); /* * Unallocated object sharing the same meta dnode block */ if (fobjerr && tobjerr) { ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP); di->zerr = 0; return (0); } di->zerr = 0; /* negate get_stats_for_obj() from side that failed */ fmode = fsb.zs_mode & S_IFMT; tmode = tsb.zs_mode & S_IFMT; if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 || tsb.zs_links == 0) change = 0; else change = tsb.zs_links - fsb.zs_links; if (fobjerr) { if (change) { print_link_change(fp, di, change, tobjname, &tsb); return (0); } print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); return (0); } else if (tobjerr) { if (change) { print_link_change(fp, di, change, fobjname, &fsb); return (0); } print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); return (0); } if (fmode != tmode && fsb.zs_gen == tsb.zs_gen) tsb.zs_gen++; /* Force a generational difference */ same_name = do_name_cmp(fobjname, tobjname); /* Simple modification or no change */ if (fsb.zs_gen == tsb.zs_gen) { /* No apparent changes. Could we assert !this? */ if (fsb.zs_ctime[0] == tsb.zs_ctime[0] && fsb.zs_ctime[1] == tsb.zs_ctime[1]) return (0); if (change) { print_link_change(fp, di, change, change > 0 ? fobjname : tobjname, &tsb); } else if (same_name) { print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb); } else { print_rename(fp, di, fobjname, tobjname, &tsb); } return (0); } else { /* file re-created or object re-used */ print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); return (0); } } static int write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) { uint64_t o; int err; for (o = dr->ddr_first; o <= dr->ddr_last; o++) { if ((err = write_inuse_diffs_one(fp, di, o))) return (err); } return (0); } static int describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf, int maxlen) { struct zfs_stat sb; if (get_stats_for_obj(di, di->fromsnap, object, namebuf, maxlen, &sb) != 0) { /* Let it slide, if in the delete queue on from side */ if (di->zerr == ENOENT && sb.zs_links == 0) { di->zerr = 0; return (0); } return (-1); } print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb); return (0); } static int write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *lhdl = di->zhp->zfs_hdl; char fobjname[MAXPATHLEN]; (void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name)); zc.zc_obj = dr->ddr_first - 1; ASSERT(di->zerr == 0); while (zc.zc_obj < dr->ddr_last) { int err; err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc); if (err == 0) { if (zc.zc_obj == di->shares) { zc.zc_obj++; continue; } if (zc.zc_obj > dr->ddr_last) { break; } err = describe_free(fp, di, zc.zc_obj, fobjname, MAXPATHLEN); if (err) break; } else if (errno == ESRCH) { break; } else { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "next allocated object (> %lld) find failure"), (longlong_t)zc.zc_obj); di->zerr = errno; break; } } if (di->zerr) return (-1); return (0); } static void * differ(void *arg) { differ_info_t *di = arg; dmu_diff_record_t dr; FILE *ofp; int err = 0; if ((ofp = fdopen(di->outputfd, "w")) == NULL) { di->zerr = errno; (void) strerror_r(errno, di->errbuf, sizeof (di->errbuf)); (void) close(di->datafd); return ((void *)-1); } for (;;) { char *cp = (char *)&dr; int len = sizeof (dr); int rv; do { rv = read(di->datafd, cp, len); cp += rv; len -= rv; } while (len > 0 && rv > 0); if (rv < 0 || (rv == 0 && len != sizeof (dr))) { di->zerr = EPIPE; break; } else if (rv == 0) { /* end of file at a natural breaking point */ break; } switch (dr.ddr_type) { case DDR_FREE: err = write_free_diffs(ofp, di, &dr); break; case DDR_INUSE: err = write_inuse_diffs(ofp, di, &dr); break; default: di->zerr = EPIPE; break; } if (err || di->zerr) break; } (void) fclose(ofp); (void) close(di->datafd); if (err) return ((void *)-1); if (di->zerr) { ASSERT(di->zerr == EINVAL); (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Internal error: bad data from diff IOCTL")); return ((void *)-1); } return ((void *)0); } static int find_shares_object(differ_info_t *di) { char fullpath[MAXPATHLEN]; struct stat64 sb = { 0 }; (void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN); (void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN); if (stat64(fullpath, &sb) != 0) { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath); return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf)); } di->shares = (uint64_t)sb.st_ino; return (0); } static int make_temp_snapshot(differ_info_t *di) { libzfs_handle_t *hdl = di->zhp->zfs_hdl; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; (void) snprintf(zc.zc_value, sizeof (zc.zc_value), ZDIFF_PREFIX, getpid()); (void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name)); zc.zc_cleanup_fd = di->cleanupfd; if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) { int err = errno; if (err == EPERM) { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "The diff delegated " "permission is needed in order\nto create a " "just-in-time snapshot for diffing\n")); return (zfs_error(hdl, EZFS_DIFF, di->errbuf)); } else { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Cannot create just-in-time " "snapshot of '%s'"), zc.zc_name); return (zfs_standard_error(hdl, err, di->errbuf)); } } di->tmpsnap = zfs_strdup(hdl, zc.zc_value); di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap); return (0); } static void teardown_differ_info(differ_info_t *di) { free(di->ds); free(di->dsmnt); free(di->fromsnap); free(di->frommnt); free(di->tosnap); free(di->tmpsnap); free(di->tomnt); (void) close(di->cleanupfd); } static int get_snapshot_names(differ_info_t *di, const char *fromsnap, const char *tosnap) { libzfs_handle_t *hdl = di->zhp->zfs_hdl; char *atptrf = NULL; char *atptrt = NULL; int fdslen, fsnlen; int tdslen, tsnlen; /* * Can accept * dataset@snap1 * dataset@snap1 dataset@snap2 * dataset@snap1 @snap2 * dataset@snap1 dataset * @snap1 dataset@snap2 */ if (tosnap == NULL) { /* only a from snapshot given, must be valid */ (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Badly formed snapshot name %s"), fromsnap); if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT, B_FALSE)) { return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); } atptrf = strchr(fromsnap, '@'); ASSERT(atptrf != NULL); fdslen = atptrf - fromsnap; di->fromsnap = zfs_strdup(hdl, fromsnap); di->ds = zfs_strdup(hdl, fromsnap); di->ds[fdslen] = '\0'; /* the to snap will be a just-in-time snap of the head */ return (make_temp_snapshot(di)); } (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Unable to determine which snapshots to compare")); atptrf = strchr(fromsnap, '@'); atptrt = strchr(tosnap, '@'); fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap); tdslen = atptrt ? atptrt - tosnap : strlen(tosnap); fsnlen = strlen(fromsnap) - fdslen; /* includes @ sign */ tsnlen = strlen(tosnap) - tdslen; /* includes @ sign */ if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0) || (fsnlen == 0 && tsnlen == 0)) { return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); } else if ((fdslen > 0 && tdslen > 0) && ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) { /* * not the same dataset name, might be okay if * tosnap is a clone of a fromsnap descendant. */ char origin[ZFS_MAXNAMELEN]; zprop_source_t src; zfs_handle_t *zhp; di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1); (void) strncpy(di->ds, tosnap, tdslen); di->ds[tdslen] = '\0'; zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM); while (zhp != NULL) { (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), &src, NULL, 0, B_FALSE); if (strncmp(origin, fromsnap, fsnlen) == 0) break; (void) zfs_close(zhp); zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM); } if (zhp == NULL) { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); } else { (void) zfs_close(zhp); } di->isclone = B_TRUE; di->fromsnap = zfs_strdup(hdl, fromsnap); if (tsnlen) { di->tosnap = zfs_strdup(hdl, tosnap); } else { return (make_temp_snapshot(di)); } } else { int dslen = fdslen ? fdslen : tdslen; di->ds = zfs_alloc(hdl, dslen + 1); (void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen); di->ds[dslen] = '\0'; di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf); if (tsnlen) { di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt); } else { return (make_temp_snapshot(di)); } } return (0); } static int get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt) { boolean_t mounted; mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt); if (mounted == B_FALSE) { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, "Cannot diff an unmounted snapshot")); return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf)); } /* Avoid a double slash at the beginning of root-mounted datasets */ if (**mntpt == '/' && *(*mntpt + 1) == '\0') **mntpt = '\0'; return (0); } static int get_mountpoints(differ_info_t *di) { char *strptr; char *frommntpt; /* * first get the mountpoint for the parent dataset */ if (get_mountpoint(di, di->ds, &di->dsmnt) != 0) return (-1); strptr = strchr(di->tosnap, '@'); ASSERT3P(strptr, !=, NULL); di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt, ZDIFF_SNAPDIR, ++strptr); strptr = strchr(di->fromsnap, '@'); ASSERT3P(strptr, !=, NULL); frommntpt = di->dsmnt; if (di->isclone) { char *mntpt; int err; *strptr = '\0'; err = get_mountpoint(di, di->fromsnap, &mntpt); *strptr = '@'; if (err != 0) return (-1); frommntpt = mntpt; } di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt, ZDIFF_SNAPDIR, ++strptr); if (di->isclone) free(frommntpt); return (0); } static int setup_differ_info(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, differ_info_t *di) { di->zhp = zhp; - di->cleanupfd = open(ZFS_DEV, O_RDWR|O_EXCL); + di->cleanupfd = open(ZFS_DEV, O_RDWR); VERIFY(di->cleanupfd >= 0); if (get_snapshot_names(di, fromsnap, tosnap) != 0) return (-1); if (get_mountpoints(di) != 0) return (-1); if (find_shares_object(di) != 0) return (-1); return (0); } int zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, const char *tosnap, int flags) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char errbuf[1024]; differ_info_t di = { 0 }; pthread_t tid; int pipefd[2]; int iocerr; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "zfs diff failed")); if (setup_differ_info(zhp, fromsnap, tosnap, &di)) { teardown_differ_info(&di); return (-1); } if (pipe(pipefd)) { zfs_error_aux(zhp->zfs_hdl, strerror(errno)); teardown_differ_info(&di); return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf)); } di.scripted = (flags & ZFS_DIFF_PARSEABLE); di.classify = (flags & ZFS_DIFF_CLASSIFY); di.timestamped = (flags & ZFS_DIFF_TIMESTAMP); di.outputfd = outfd; di.datafd = pipefd[0]; if (pthread_create(&tid, NULL, differ, &di)) { zfs_error_aux(zhp->zfs_hdl, strerror(errno)); (void) close(pipefd[0]); (void) close(pipefd[1]); teardown_differ_info(&di); return (zfs_error(zhp->zfs_hdl, EZFS_THREADCREATEFAILED, errbuf)); } /* do the ioctl() */ (void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1); (void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1); zc.zc_cookie = pipefd[1]; iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc); if (iocerr != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "Unable to obtain diffs")); if (errno == EPERM) { zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "\n The sys_mount privilege or diff delegated " "permission is needed\n to execute the " "diff ioctl")); } else if (errno == EXDEV) { zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "\n Not an earlier snapshot from the same fs")); } else if (errno != EPIPE || di.zerr == 0) { zfs_error_aux(zhp->zfs_hdl, strerror(errno)); } (void) close(pipefd[1]); (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); teardown_differ_info(&di); if (di.zerr != 0 && di.zerr != EPIPE) { zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); } else { return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf)); } } (void) close(pipefd[1]); (void) pthread_join(tid, NULL); if (di.zerr != 0) { zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); } teardown_differ_info(&di); return (0); } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 95c4b35688b5..48e77178f5ec 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -1,3021 +1,3021 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_fletcher.h" #include "libzfs_impl.h" #include #include #include /* in libzfs_dataset.c */ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); static const zio_cksum_t zero_cksum = { { 0 } }; typedef struct dedup_arg { int inputfd; int outputfd; libzfs_handle_t *dedup_hdl; } dedup_arg_t; typedef struct dataref { uint64_t ref_guid; uint64_t ref_object; uint64_t ref_offset; } dataref_t; typedef struct dedup_entry { struct dedup_entry *dde_next; zio_cksum_t dde_chksum; uint64_t dde_prop; dataref_t dde_ref; } dedup_entry_t; #define MAX_DDT_PHYSMEM_PERCENT 20 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128 typedef struct dedup_table { dedup_entry_t **dedup_hash_array; umem_cache_t *ddecache; uint64_t max_ddt_size; /* max dedup table size in bytes */ uint64_t cur_ddt_size; /* current dedup table size in bytes */ uint64_t ddt_count; int numhashbits; boolean_t ddt_full; } dedup_table_t; static int high_order_bit(uint64_t n) { int count; for (count = 0; n != 0; count++) n >>= 1; return (count); } static size_t ssread(void *buf, size_t len, FILE *stream) { size_t outlen; if ((outlen = fread(buf, len, 1, stream)) == 0) return (0); return (outlen); } static void ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp, zio_cksum_t *cs, uint64_t prop, dataref_t *dr) { dedup_entry_t *dde; if (ddt->cur_ddt_size >= ddt->max_ddt_size) { if (ddt->ddt_full == B_FALSE) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Dedup table full. Deduplication will continue " "with existing table entries")); ddt->ddt_full = B_TRUE; } return; } if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT)) != NULL) { assert(*ddepp == NULL); dde->dde_next = NULL; dde->dde_chksum = *cs; dde->dde_prop = prop; dde->dde_ref = *dr; *ddepp = dde; ddt->cur_ddt_size += sizeof (dedup_entry_t); ddt->ddt_count++; } } /* * Using the specified dedup table, do a lookup for an entry with * the checksum cs. If found, return the block's reference info * in *dr. Otherwise, insert a new entry in the dedup table, using * the reference information specified by *dr. * * return value: true - entry was found * false - entry was not found */ static boolean_t ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs, uint64_t prop, dataref_t *dr) { uint32_t hashcode; dedup_entry_t **ddepp; hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits); for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL; ddepp = &((*ddepp)->dde_next)) { if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) && (*ddepp)->dde_prop == prop) { *dr = (*ddepp)->dde_ref; return (B_TRUE); } } ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr); return (B_FALSE); } static int cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) { fletcher_4_incremental_native(buf, len, zc); return (write(outfd, buf, len)); } /* * This function is started in a separate thread when the dedup option * has been requested. The main send thread determines the list of * snapshots to be included in the send stream and makes the ioctl calls * for each one. But instead of having the ioctl send the output to the * the output fd specified by the caller of zfs_send()), the * ioctl is told to direct the output to a pipe, which is read by the * alternate thread running THIS function. This function does the * dedup'ing by: * 1. building a dedup table (the DDT) * 2. doing checksums on each data block and inserting a record in the DDT * 3. looking for matching checksums, and * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever * a duplicate block is found. * The output of this function then goes to the output fd requested * by the caller of zfs_send(). */ static void * cksummer(void *arg) { dedup_arg_t *dda = arg; char *buf = malloc(1<<20); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; struct drr_end *drre = &thedrr.drr_u.drr_end; struct drr_object *drro = &thedrr.drr_u.drr_object; struct drr_write *drrw = &thedrr.drr_u.drr_write; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; FILE *ofp; int outfd; dmu_replay_record_t wbr_drr = {0}; struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; dedup_table_t ddt; zio_cksum_t stream_cksum; uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); uint64_t numbuckets; ddt.max_ddt_size = MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, SMALLEST_POSSIBLE_MAX_DDT_MB<<20); numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); /* * numbuckets must be a power of 2. Increase number to * a power of 2 if necessary. */ if (!ISP2(numbuckets)) numbuckets = 1 << high_order_bit(numbuckets); ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *)); ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *); ddt.numhashbits = high_order_bit(numbuckets) - 1; ddt.ddt_full = B_FALSE; /* Initialize the write-by-reference block. */ wbr_drr.drr_type = DRR_WRITE_BYREF; wbr_drr.drr_payloadlen = 0; outfd = dda->outputfd; ofp = fdopen(dda->inputfd, "r"); while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { switch (drr->drr_type) { case DRR_BEGIN: { int fflags; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); /* set the DEDUP feature flag for this stream */ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); fflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { int sz = drr->drr_payloadlen; if (sz > 1<<20) { free(buf); buf = malloc(sz); } (void) ssread(buf, sz, ofp); if (ferror(stdin)) perror("fread"); if (cksum_and_write(buf, sz, &stream_cksum, outfd) == -1) goto out; } break; } case DRR_END: { /* use the recalculated checksum */ ZIO_SET_CHECKSUM(&drre->drr_checksum, stream_cksum.zc_word[0], stream_cksum.zc_word[1], stream_cksum.zc_word[2], stream_cksum.zc_word[3]); if ((write(outfd, drr, sizeof (dmu_replay_record_t))) == -1) goto out; break; } case DRR_OBJECT: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (drro->drr_bonuslen > 0) { (void) ssread(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), ofp); if (cksum_and_write(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), &stream_cksum, outfd) == -1) goto out; } break; } case DRR_SPILL: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; (void) ssread(buf, drrs->drr_length, ofp); if (cksum_and_write(buf, drrs->drr_length, &stream_cksum, outfd) == -1) goto out; break; } case DRR_FREEOBJECTS: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; break; } case DRR_WRITE: { dataref_t dataref; (void) ssread(buf, drrw->drr_length, ofp); /* * Use the existing checksum if it's dedup-capable, * else calculate a SHA256 checksum for it. */ if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { SHA256_CTX ctx; zio_cksum_t tmpsha256; SHA256Init(&ctx); SHA256Update(&ctx, buf, drrw->drr_length); SHA256Final(&tmpsha256, &ctx); drrw->drr_key.ddk_cksum.zc_word[0] = BE_64(tmpsha256.zc_word[0]); drrw->drr_key.ddk_cksum.zc_word[1] = BE_64(tmpsha256.zc_word[1]); drrw->drr_key.ddk_cksum.zc_word[2] = BE_64(tmpsha256.zc_word[2]); drrw->drr_key.ddk_cksum.zc_word[3] = BE_64(tmpsha256.zc_word[3]); drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; } dataref.ref_guid = drrw->drr_toguid; dataref.ref_object = drrw->drr_object; dataref.ref_offset = drrw->drr_offset; if (ddt_update(dda->dedup_hdl, &ddt, &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, &dataref)) { /* block already present in stream */ wbr_drrr->drr_object = drrw->drr_object; wbr_drrr->drr_offset = drrw->drr_offset; wbr_drrr->drr_length = drrw->drr_length; wbr_drrr->drr_toguid = drrw->drr_toguid; wbr_drrr->drr_refguid = dataref.ref_guid; wbr_drrr->drr_refobject = dataref.ref_object; wbr_drrr->drr_refoffset = dataref.ref_offset; wbr_drrr->drr_checksumtype = drrw->drr_checksumtype; wbr_drrr->drr_checksumflags = drrw->drr_checksumtype; wbr_drrr->drr_key.ddk_cksum = drrw->drr_key.ddk_cksum; wbr_drrr->drr_key.ddk_prop = drrw->drr_key.ddk_prop; if (cksum_and_write(&wbr_drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; } else { /* block not previously seen */ if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; if (cksum_and_write(buf, drrw->drr_length, &stream_cksum, outfd) == -1) goto out; } break; } case DRR_FREE: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), &stream_cksum, outfd) == -1) goto out; break; } default: (void) printf("INVALID record type 0x%x\n", drr->drr_type); /* should never happen, so assert */ assert(B_FALSE); } } out: umem_cache_destroy(ddt.ddecache); free(ddt.dedup_hash_array); free(buf); (void) fclose(ofp); return (NULL); } /* * Routines for dealing with the AVL tree of fs-nvlists */ typedef struct fsavl_node { avl_node_t fn_node; nvlist_t *fn_nvfs; char *fn_snapname; uint64_t fn_guid; } fsavl_node_t; static int fsavl_compare(const void *arg1, const void *arg2) { const fsavl_node_t *fn1 = arg1; const fsavl_node_t *fn2 = arg2; if (fn1->fn_guid > fn2->fn_guid) return (+1); else if (fn1->fn_guid < fn2->fn_guid) return (-1); else return (0); } /* * Given the GUID of a snapshot, find its containing filesystem and * (optionally) name. */ static nvlist_t * fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname) { fsavl_node_t fn_find; fsavl_node_t *fn; fn_find.fn_guid = snapguid; fn = avl_find(avl, &fn_find, NULL); if (fn) { if (snapname) *snapname = fn->fn_snapname; return (fn->fn_nvfs); } return (NULL); } static void fsavl_destroy(avl_tree_t *avl) { fsavl_node_t *fn; void *cookie; if (avl == NULL) return; cookie = NULL; while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL) free(fn); avl_destroy(avl); free(avl); } /* * Given an nvlist, produce an avl tree of snapshots, ordered by guid */ static avl_tree_t * fsavl_create(nvlist_t *fss) { avl_tree_t *fsavl; nvpair_t *fselem = NULL; if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL) return (NULL); avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t), offsetof(fsavl_node_t, fn_node)); while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) { nvlist_t *nvfs, *snaps; nvpair_t *snapelem = NULL; VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); while ((snapelem = nvlist_next_nvpair(snaps, snapelem)) != NULL) { fsavl_node_t *fn; uint64_t guid; VERIFY(0 == nvpair_value_uint64(snapelem, &guid)); if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) { fsavl_destroy(fsavl); return (NULL); } fn->fn_nvfs = nvfs; fn->fn_snapname = nvpair_name(snapelem); fn->fn_guid = guid; /* * Note: if there are multiple snaps with the * same GUID, we ignore all but one. */ if (avl_find(fsavl, fn, NULL) == NULL) avl_add(fsavl, fn); else free(fn); } } return (fsavl); } /* * Routines for dealing with the giant nvlist of fs-nvlists, etc. */ typedef struct send_data { uint64_t parent_fromsnap_guid; nvlist_t *parent_snaps; nvlist_t *fss; nvlist_t *snapprops; const char *fromsnap; const char *tosnap; boolean_t recursive; /* * The header nvlist is of the following format: * { * "tosnap" -> string * "fromsnap" -> string (if incremental) * "fss" -> { * id -> { * * "name" -> string (full name; for debugging) * "parentfromsnap" -> number (guid of fromsnap in parent) * * "props" -> { name -> value (only if set here) } * "snaps" -> { name (lastname) -> number (guid) } * "snapprops" -> { name (lastname) -> { name -> value } } * * "origin" -> number (guid) (if clone) * "sent" -> boolean (not on-disk) * } * } * } * */ } send_data_t; static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv); static int send_iterate_snap(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; uint64_t guid = zhp->zfs_dmustats.dds_guid; char *snapname; nvlist_t *nv; snapname = strrchr(zhp->zfs_name, '@')+1; VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid)); /* * NB: if there is no fromsnap here (it's a newly created fs in * an incremental replication), we will substitute the tosnap. */ if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) || (sd->parent_fromsnap_guid == 0 && sd->tosnap && strcmp(snapname, sd->tosnap) == 0)) { sd->parent_fromsnap_guid = guid; } VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv)); nvlist_free(nv); zfs_close(zhp); return (0); } static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) { nvpair_t *elem = NULL; while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { char *propname = nvpair_name(elem); zfs_prop_t prop = zfs_name_to_prop(propname); nvlist_t *propnv; if (!zfs_prop_user(propname)) { /* * Realistically, this should never happen. However, * we want the ability to add DSL properties without * needing to make incompatible version changes. We * need to ignore unknown properties to allow older * software to still send datasets containing these * properties, with the unknown properties elided. */ if (prop == ZPROP_INVAL) continue; if (zfs_prop_readonly(prop)) continue; } verify(nvpair_value_nvlist(elem, &propnv) == 0); if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || prop == ZFS_PROP_REFQUOTA || prop == ZFS_PROP_REFRESERVATION) { char *source; uint64_t value; verify(nvlist_lookup_uint64(propnv, ZPROP_VALUE, &value) == 0); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) continue; /* * May have no source before SPA_VERSION_RECVD_PROPS, * but is still modifiable. */ if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) == 0) { if ((strcmp(source, zhp->zfs_name) != 0) && (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) continue; } } else { char *source; if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) != 0) continue; if ((strcmp(source, zhp->zfs_name) != 0) && (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) continue; } if (zfs_prop_user(propname) || zfs_prop_get_type(prop) == PROP_TYPE_STRING) { char *value; verify(nvlist_lookup_string(propnv, ZPROP_VALUE, &value) == 0); VERIFY(0 == nvlist_add_string(nv, propname, value)); } else { uint64_t value; verify(nvlist_lookup_uint64(propnv, ZPROP_VALUE, &value) == 0); VERIFY(0 == nvlist_add_uint64(nv, propname, value)); } } } /* * recursively generate nvlists describing datasets. See comment * for the data structure send_data_t above for description of contents * of the nvlist. */ static int send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; nvlist_t *nvfs, *nv; int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t guid = zhp->zfs_dmustats.dds_guid; char guidstring[64]; VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name)); VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap", sd->parent_fromsnap_guid)); if (zhp->zfs_dmustats.dds_origin[0]) { zfs_handle_t *origin = zfs_open(zhp->zfs_hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); if (origin == NULL) return (-1); VERIFY(0 == nvlist_add_uint64(nvfs, "origin", origin->zfs_dmustats.dds_guid)); } /* iterate over props */ VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); nvlist_free(nv); /* iterate over snaps, and set sd->parent_fromsnap_guid */ sd->parent_fromsnap_guid = 0; VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0)); VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0)); (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd); VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps)); VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops)); nvlist_free(sd->parent_snaps); nvlist_free(sd->snapprops); /* add this fs to nvlist */ (void) snprintf(guidstring, sizeof (guidstring), "0x%llx", (longlong_t)guid); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); nvlist_free(nvfs); /* iterate over children */ if (sd->recursive) rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); sd->parent_fromsnap_guid = parent_fromsnap_guid_save; zfs_close(zhp); return (rv); } static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; send_data_t sd = { 0 }; int error; zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (zhp == NULL) return (EZFS_BADTYPE); VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); sd.fromsnap = fromsnap; sd.tosnap = tosnap; sd.recursive = recursive; if ((error = send_iterate_fs(zhp, &sd)) != 0) { nvlist_free(sd.fss); if (avlp != NULL) *avlp = NULL; *nvlp = NULL; return (error); } if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) { nvlist_free(sd.fss); *nvlp = NULL; return (EZFS_NOMEM); } *nvlp = sd.fss; return (0); } /* * Routines for dealing with the sorted snapshot functionality */ typedef struct zfs_node { zfs_handle_t *zn_handle; avl_node_t zn_avlnode; } zfs_node_t; static int zfs_sort_snaps(zfs_handle_t *zhp, void *data) { avl_tree_t *avl = data; zfs_node_t *node; zfs_node_t search; search.zn_handle = zhp; node = avl_find(avl, &search, NULL); if (node) { /* * If this snapshot was renamed while we were creating the * AVL tree, it's possible that we already inserted it under * its old name. Remove the old handle before adding the new * one. */ zfs_close(node->zn_handle); avl_remove(avl, node); free(node); } node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); node->zn_handle = zhp; avl_add(avl, node); return (0); } static int zfs_snapshot_compare(const void *larg, const void *rarg) { zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; uint64_t lcreate, rcreate; /* * Sort them according to creation time. We use the hidden * CREATETXG property to get an absolute ordering of snapshots. */ lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); if (lcreate < rcreate) return (-1); else if (lcreate > rcreate) return (+1); else return (0); } int zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) { int ret = 0; zfs_node_t *node; avl_tree_t avl; void *cookie = NULL; avl_create(&avl, zfs_snapshot_compare, sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl); for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) ret |= callback(node->zn_handle, data); while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) free(node); avl_destroy(&avl); return (ret); } /* * Routines specific to "zfs send" */ typedef struct send_dump_data { /* these are all just the short snapname (the part after the @) */ const char *fromsnap; const char *tosnap; char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose; int outfd; boolean_t err; nvlist_t *fss; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; nvlist_t *debugnv; char holdtag[ZFS_MAXNAMELEN]; int cleanup_fd; } send_dump_data_t; /* * Dumps a backup of the given snapshot (incremental from fromsnap if it's not * NULL) to the file descriptor specified by outfd. */ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, boolean_t fromorigin, int outfd, nvlist_t *debugnv) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *thisdbg; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); assert(fromsnap_obj == 0 || !fromorigin); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zc.zc_cookie = outfd; zc.zc_obj = fromorigin; zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zc.zc_fromobj = fromsnap_obj; VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); if (fromsnap && fromsnap[0] != '\0') { VERIFY(0 == nvlist_add_string(thisdbg, "fromsnap", fromsnap)); } if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno)); if (debugnv) { VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); } nvlist_free(thisdbg); switch (errno) { case EXDEV: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incremental source (@%s) does not exist"), zc.zc_value); } return (zfs_error(hdl, EZFS_NOENT, errbuf)); case EDQUOT: case EFBIG: case EIO: case ENOLINK: case ENOSPC: case ENOSTR: case ENXIO: case EPIPE: case ERANGE: case EFAULT: case EROFS: zfs_error_aux(hdl, strerror(errno)); return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); default: return (zfs_standard_error(hdl, errno, errbuf)); } } if (debugnv) VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); nvlist_free(thisdbg); return (0); } static int hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) { zfs_handle_t *pzhp; int error = 0; char *thissnap; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); /* * zfs_send() only opens a cleanup_fd for sends that need it, * e.g. replication and doall. */ if (sdd->cleanup_fd == -1) return (0); thissnap = strchr(zhp->zfs_name, '@') + 1; *(thissnap - 1) = '\0'; pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); *(thissnap - 1) = '@'; /* * It's OK if the parent no longer exists. The send code will * handle that error. */ if (pzhp) { error = zfs_hold(pzhp, thissnap, sdd->holdtag, B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd, zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID), zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG)); zfs_close(pzhp); } return (error); } static int dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; char *thissnap; int err; boolean_t isfromsnap, istosnap; boolean_t exclude = B_FALSE; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { err = hold_for_send(zhp, sdd); if (err == 0) { sdd->seenfrom = B_TRUE; (void) strcpy(sdd->prevsnap, thissnap); sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); } else if (err == ENOENT) { err = 0; } zfs_close(zhp); return (err); } if (sdd->seento || !sdd->seenfrom) { zfs_close(zhp); return (0); } istosnap = (strcmp(sdd->tosnap, thissnap) == 0); if (istosnap) sdd->seento = B_TRUE; if (!sdd->doall && !isfromsnap && !istosnap) { if (sdd->replicate) { char *snapname; nvlist_t *snapprops; /* * Filter out all intermediate snapshots except origin * snapshots needed to replicate clones. */ nvlist_t *nvfs = fsavl_find(sdd->fsavl, zhp->zfs_dmustats.dds_guid, &snapname); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snapprops", &snapprops)); VERIFY(0 == nvlist_lookup_nvlist(snapprops, thissnap, &snapprops)); exclude = !nvlist_exists(snapprops, "is_clone_origin"); } else { exclude = B_TRUE; } } /* * If a filter function exists, call it to determine whether * this snapshot will be sent. */ if (exclude || (sdd->filter_cb != NULL && sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { /* * This snapshot is filtered out. Don't send it, and don't * set prevsnap_obj, so it will be as if this snapshot didn't * exist, and the next accepted snapshot will be sent as * an incremental from the last accepted one, or as the * first (and full) snapshot in the case of a replication, * non-incremental send. */ zfs_close(zhp); return (0); } err = hold_for_send(zhp, sdd); if (err) { if (err == ENOENT) err = 0; zfs_close(zhp); return (err); } /* send it */ if (sdd->verbose) { (void) fprintf(stderr, "sending from @%s to %s\n", sdd->prevsnap, zhp->zfs_name); } err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), sdd->outfd, sdd->debugnv); (void) strcpy(sdd->prevsnap, thissnap); sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); return (err); } static int dump_filesystem(zfs_handle_t *zhp, void *arg) { int rv = 0; send_dump_data_t *sdd = arg; boolean_t missingfrom = B_FALSE; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->tosnap); if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { (void) fprintf(stderr, "WARNING: " "could not send %s@%s: does not exist\n", zhp->zfs_name, sdd->tosnap); sdd->err = B_TRUE; return (0); } if (sdd->replicate && sdd->fromsnap) { /* * If this fs does not have fromsnap, and we're doing * recursive, we need to send a full stream from the * beginning (or an incremental from the origin if this * is a clone). If we're doing non-recursive, then let * them get the error. */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->fromsnap); if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { missingfrom = B_TRUE; } } sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; sdd->prevsnap_obj = 0; if (sdd->fromsnap == NULL || missingfrom) sdd->seenfrom = B_TRUE; rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); if (!sdd->seenfrom) { (void) fprintf(stderr, "WARNING: could not send %s@%s:\n" "incremental source (%s@%s) does not exist\n", zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); sdd->err = B_TRUE; } else if (!sdd->seento) { if (sdd->fromsnap) { (void) fprintf(stderr, "WARNING: could not send %s@%s:\n" "incremental source (%s@%s) " "is not earlier than it\n", zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); } else { (void) fprintf(stderr, "WARNING: " "could not send %s@%s: does not exist\n", zhp->zfs_name, sdd->tosnap); } sdd->err = B_TRUE; } return (rv); } static int dump_filesystems(zfs_handle_t *rzhp, void *arg) { send_dump_data_t *sdd = arg; nvpair_t *fspair; boolean_t needagain, progress; if (!sdd->replicate) return (dump_filesystem(rzhp, sdd)); /* Mark the clone origin snapshots. */ for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; fspair = nvlist_next_nvpair(sdd->fss, fspair)) { nvlist_t *nvfs; uint64_t origin_guid = 0; VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs)); (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid); if (origin_guid != 0) { char *snapname; nvlist_t *origin_nv = fsavl_find(sdd->fsavl, origin_guid, &snapname); if (origin_nv != NULL) { nvlist_t *snapprops; VERIFY(0 == nvlist_lookup_nvlist(origin_nv, "snapprops", &snapprops)); VERIFY(0 == nvlist_lookup_nvlist(snapprops, snapname, &snapprops)); VERIFY(0 == nvlist_add_boolean( snapprops, "is_clone_origin")); } } } again: needagain = progress = B_FALSE; for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; fspair = nvlist_next_nvpair(sdd->fss, fspair)) { nvlist_t *fslist; char *fsname; zfs_handle_t *zhp; int err; uint64_t origin_guid = 0; VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); if (nvlist_lookup_boolean(fslist, "sent") == 0) continue; VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); if (origin_guid != 0) { nvlist_t *origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL); if (origin_nv != NULL && nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) { /* * origin has not been sent yet; * skip this clone. */ needagain = B_TRUE; continue; } } zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); err = dump_filesystem(zhp, sdd); VERIFY(nvlist_add_boolean(fslist, "sent") == 0); progress = B_TRUE; zfs_close(zhp); if (err) return (err); } if (needagain) { assert(progress); goto again; } return (0); } /* * Generate a send stream for the dataset identified by the argument zhp. * * The content of the send stream is the snapshot identified by * 'tosnap'. Incremental streams are requested in two ways: * - from the snapshot identified by "fromsnap" (if non-null) or * - from the origin of the dataset identified by zhp, which must * be a clone. In this case, "fromsnap" is null and "fromorigin" * is TRUE. * * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM) * if "replicate" is set. If "doall" is set, dump all the intermediate * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall" * case too. If "props" is set, send properties. */ int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sendflags_t flags, int outfd, snapfilter_cb_t filter_func, void *cb_arg, nvlist_t **debugnvp) { char errbuf[1024]; send_dump_data_t sdd = { 0 }; int err; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; boolean_t holdsnaps = B_FALSE; pthread_t tid; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot send '%s'"), zhp->zfs_name); if (fromsnap && fromsnap[0] == '\0') { zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "zero-length incremental source")); return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); } if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { uint64_t version; version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); if (version >= ZPL_VERSION_SA) { featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; } } if (zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS && (flags.doall || flags.replicate)) holdsnaps = B_TRUE; if (flags.dedup) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if ((err = pipe(pipefd))) { zfs_error_aux(zhp->zfs_hdl, strerror(errno)); return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf)); } dda.outputfd = outfd; dda.inputfd = pipefd[1]; dda.dedup_hdl = zhp->zfs_hdl; if ((err = pthread_create(&tid, NULL, cksummer, &dda))) { (void) close(pipefd[0]); (void) close(pipefd[1]); zfs_error_aux(zhp->zfs_hdl, strerror(errno)); return (zfs_error(zhp->zfs_hdl, EZFS_THREADCREATEFAILED, errbuf)); } } if (flags.replicate || flags.doall || flags.props) { dmu_replay_record_t drr = { 0 }; char *packbuf = NULL; size_t buflen = 0; zio_cksum_t zc = { { 0 } }; if (flags.replicate || flags.props) { nvlist_t *hdrnv; VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); if (fromsnap) { VERIFY(0 == nvlist_add_string(hdrnv, "fromsnap", fromsnap)); } VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); if (!flags.replicate) { VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, fromsnap, tosnap, flags.replicate, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); err = nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR, 0); if (debugnvp) *debugnvp = hdrnv; else nvlist_free(hdrnv); if (err) { fsavl_destroy(fsavl); nvlist_free(fss); goto stderr_out; } } /* write first begin record */ drr.drr_type = DRR_BEGIN; drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo, DMU_COMPOUNDSTREAM); DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo, featureflags); (void) snprintf(drr.drr_u.drr_begin.drr_toname, sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", zhp->zfs_name, tosnap); drr.drr_payloadlen = buflen; err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); /* write header nvlist */ if (err != -1 && packbuf != NULL) { err = cksum_and_write(packbuf, buflen, &zc, outfd); } free(packbuf); if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); err = errno; goto stderr_out; } /* write end record */ if (err != -1) { bzero(&drr, sizeof (drr)); drr.drr_type = DRR_END; drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); err = errno; goto stderr_out; } } } /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; if (flags.dedup) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; sdd.replicate = flags.replicate; sdd.doall = flags.doall; sdd.fromorigin = flags.fromorigin; sdd.fss = fss; sdd.fsavl = fsavl; sdd.verbose = flags.verbose; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) sdd.debugnv = *debugnvp; if (holdsnaps) { ++holdseq; (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag), ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); - sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + sdd.cleanup_fd = open(ZFS_DEV, O_RDWR); if (sdd.cleanup_fd < 0) { err = errno; goto stderr_out; } } else { sdd.cleanup_fd = -1; } err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); if (flags.dedup) { (void) close(pipefd[0]); (void) pthread_join(tid, NULL); } if (sdd.cleanup_fd != -1) { VERIFY(0 == close(sdd.cleanup_fd)); sdd.cleanup_fd = -1; } if (flags.replicate || flags.doall || flags.props) { /* * write final end record. NB: want to do this even if * there was some error, because it might not be totally * failed. */ dmu_replay_record_t drr = { 0 }; drr.drr_type = DRR_END; if (write(outfd, &drr, sizeof (drr)) == -1) { return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } } return (err || sdd.err); stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); if (flags.dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); } return (err); } /* * Routines specific to "zfs recv" */ static int recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, boolean_t byteswap, zio_cksum_t *zc) { char *cp = buf; int rv; int len = ilen; do { rv = read(fd, cp, len); cp += rv; len -= rv; } while (rv > 0); if (rv < 0 || len != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to read from stream")); return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN, "cannot receive"))); } if (zc) { if (byteswap) fletcher_4_incremental_byteswap(buf, ilen, zc); else fletcher_4_incremental_native(buf, ilen, zc); } return (0); } static int recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, boolean_t byteswap, zio_cksum_t *zc) { char *buf; int err; buf = zfs_alloc(hdl, len); if (buf == NULL) return (ENOMEM); err = recv_read(hdl, fd, buf, len, byteswap, zc); if (err != 0) { free(buf); return (err); } err = nvlist_unpack(buf, len, nvp, 0); free(buf); if (err != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (malformed nvlist)")); return (EINVAL); } return (0); } static int recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, int baselen, char *newname, recvflags_t flags) { static int seq; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int err; prop_changelist_t *clp; zfs_handle_t *zhp; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, flags.force ? MS_FORCE : 0); zfs_close(zhp); if (clp == NULL) return (-1); err = changelist_prefix(clp); if (err) return (err); zc.zc_objset_type = DMU_OST_ZFS; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); if (tryname) { (void) strcpy(newname, tryname); (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value)); if (flags.verbose) { (void) printf("attempting rename %s to %s\n", zc.zc_name, zc.zc_value); } err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); if (err == 0) changelist_rename(clp, name, tryname); } else { err = ENOENT; } if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) { seq++; (void) strncpy(newname, name, baselen); (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen, "recv-%ld-%u", (long) getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); if (flags.verbose) { (void) printf("failed - trying rename %s to %s\n", zc.zc_name, zc.zc_value); } err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); if (err == 0) changelist_rename(clp, name, newname); if (err && flags.verbose) { (void) printf("failed (%u) - " "will try again on next pass\n", errno); } err = EAGAIN; } else if (flags.verbose) { if (err == 0) (void) printf("success\n"); else (void) printf("failed (%u)\n", errno); } (void) changelist_postfix(clp); changelist_free(clp); return (err); } static int recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, char *newname, recvflags_t flags) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int err = 0; prop_changelist_t *clp; zfs_handle_t *zhp; boolean_t defer = B_FALSE; int spa_version; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, flags.force ? MS_FORCE : 0); if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS) defer = B_TRUE; zfs_close(zhp); if (clp == NULL) return (-1); err = changelist_prefix(clp); if (err) return (err); zc.zc_objset_type = DMU_OST_ZFS; zc.zc_defer_destroy = defer; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); if (flags.verbose) (void) printf("attempting destroy %s\n", zc.zc_name); err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); if (err == 0) { if (flags.verbose) (void) printf("success\n"); changelist_remove(clp, zc.zc_name); } (void) changelist_postfix(clp); changelist_free(clp); /* * Deferred destroy might destroy the snapshot or only mark it to be * destroyed later, and it returns success in either case. */ if (err != 0 || (defer && zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT))) { err = recv_rename(hdl, name, NULL, baselen, newname, flags); } return (err); } typedef struct guid_to_name_data { uint64_t guid; char *name; } guid_to_name_data_t; static int guid_to_name_cb(zfs_handle_t *zhp, void *arg) { guid_to_name_data_t *gtnd = arg; int err; if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); } err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } static int guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, char *name) { /* exhaustive search all local snapshots */ guid_to_name_data_t gtnd; int err = 0; zfs_handle_t *zhp; char *cp; gtnd.guid = guid; gtnd.name = name; if (strchr(parent, '@') == NULL) { zhp = make_dataset_handle(hdl, parent); if (zhp != NULL) { err = zfs_iter_children(zhp, guid_to_name_cb, >nd); zfs_close(zhp); if (err == EEXIST) return (0); } } cp = strchr(parent, '/'); if (cp) *cp = '\0'; zhp = make_dataset_handle(hdl, parent); if (cp) *cp = '/'; if (zhp) { err = zfs_iter_children(zhp, guid_to_name_cb, >nd); zfs_close(zhp); } return (err == EEXIST ? 0 : ENOENT); } /* * Return true if dataset guid1 is created before guid2. */ static int created_before(libzfs_handle_t *hdl, avl_tree_t *avl, uint64_t guid1, uint64_t guid2) { nvlist_t *nvfs; char *fsname, *snapname; char buf[ZFS_MAXNAMELEN]; int rv; zfs_node_t zn1, zn2; if (guid2 == 0) return (0); if (guid1 == 0) return (1); nvfs = fsavl_find(avl, guid1, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); if (zn1.zn_handle == NULL) return (-1); nvfs = fsavl_find(avl, guid2, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); if (zn2.zn_handle == NULL) { zfs_close(zn2.zn_handle); return (-1); } rv = (zfs_snapshot_compare(&zn1, &zn2) == -1); zfs_close(zn1.zn_handle); zfs_close(zn2.zn_handle); return (rv); } static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, nvlist_t *renamed) { nvlist_t *local_nv; avl_tree_t *local_avl; nvpair_t *fselem, *nextfselem; char *fromsnap; char newname[ZFS_MAXNAMELEN]; int error; boolean_t needagain, progress, recursive; char *s1, *s2; VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); if (flags.dryrun) return (0); again: needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, recursive, &local_nv, &local_avl)) != 0) return (error); /* * Process deletes and renames */ for (fselem = nvlist_next_nvpair(local_nv, NULL); fselem; fselem = nextfselem) { nvlist_t *nvfs, *snaps; nvlist_t *stream_nvfs = NULL; nvpair_t *snapelem, *nextsnapelem; uint64_t fromguid = 0; uint64_t originguid = 0; uint64_t stream_originguid = 0; uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid; char *fsname, *stream_fsname; nextfselem = nvlist_next_nvpair(local_nv, fselem); VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap", &parent_fromsnap_guid)); (void) nvlist_lookup_uint64(nvfs, "origin", &originguid); /* * First find the stream's fs, so we can check for * a different origin (due to "zfs promote") */ for (snapelem = nvlist_next_nvpair(snaps, NULL); snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { uint64_t thisguid; VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); if (stream_nvfs != NULL) break; } /* check for promote */ (void) nvlist_lookup_uint64(stream_nvfs, "origin", &stream_originguid); if (stream_nvfs && originguid != stream_originguid) { switch (created_before(hdl, local_avl, stream_originguid, originguid)) { case 1: { /* promote it! */ zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; nvlist_t *origin_nvfs; char *origin_fsname; if (flags.verbose) (void) printf("promoting %s\n", fsname); origin_nvfs = fsavl_find(local_avl, originguid, NULL); VERIFY(0 == nvlist_lookup_string(origin_nvfs, "name", &origin_fsname)); (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); if (error == 0) progress = B_TRUE; break; } default: break; case -1: fsavl_destroy(local_avl); nvlist_free(local_nv); return (-1); } /* * We had/have the wrong origin, therefore our * list of snapshots is wrong. Need to handle * them on the next pass. */ needagain = B_TRUE; continue; } for (snapelem = nvlist_next_nvpair(snaps, NULL); snapelem; snapelem = nextsnapelem) { uint64_t thisguid; char *stream_snapname; nvlist_t *found, *props; nextsnapelem = nvlist_next_nvpair(snaps, snapelem); VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); found = fsavl_find(stream_avl, thisguid, &stream_snapname); /* check for delete */ if (found == NULL) { char name[ZFS_MAXNAMELEN]; if (!flags.force) continue; (void) snprintf(name, sizeof (name), "%s@%s", fsname, nvpair_name(snapelem)); error = recv_destroy(hdl, name, strlen(fsname)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; continue; } stream_nvfs = found; if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", &props) && 0 == nvlist_lookup_nvlist(props, stream_snapname, &props)) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; zc.zc_cookie = B_TRUE; /* received */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", fsname, nvpair_name(snapelem)); if (zcmd_write_src_nvlist(hdl, &zc, props) == 0) { (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); zcmd_free_nvlists(&zc); } } /* check for different snapname */ if (strcmp(nvpair_name(snapelem), stream_snapname) != 0) { char name[ZFS_MAXNAMELEN]; char tryname[ZFS_MAXNAMELEN]; (void) snprintf(name, sizeof (name), "%s@%s", fsname, nvpair_name(snapelem)); (void) snprintf(tryname, sizeof (name), "%s@%s", fsname, stream_snapname); error = recv_rename(hdl, name, tryname, strlen(fsname)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; } if (strcmp(stream_snapname, fromsnap) == 0) fromguid = thisguid; } /* check for delete */ if (stream_nvfs == NULL) { if (!flags.force) continue; error = recv_destroy(hdl, fsname, strlen(tofs)+1, newname, flags); if (error) needagain = B_TRUE; else progress = B_TRUE; continue; } if (fromguid == 0) { if (flags.verbose) { (void) printf("local fs %s does not have " "fromsnap (%s in stream); must have " "been deleted locally; ignoring\n", fsname, fromsnap); } continue; } VERIFY(0 == nvlist_lookup_string(stream_nvfs, "name", &stream_fsname)); VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, "parentfromsnap", &stream_parent_fromsnap_guid)); s1 = strrchr(fsname, '/'); s2 = strrchr(stream_fsname, '/'); /* * Check for rename. If the exact receive path is specified, it * does not count as a rename, but we still need to check the * datasets beneath it. */ if ((stream_parent_fromsnap_guid != 0 && parent_fromsnap_guid != 0 && stream_parent_fromsnap_guid != parent_fromsnap_guid) || ((flags.isprefix || strcmp(tofs, fsname) != 0) && (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; char tryname[ZFS_MAXNAMELEN]; parent = fsavl_find(local_avl, stream_parent_fromsnap_guid, NULL); /* * NB: parent might not be found if we used the * tosnap for stream_parent_fromsnap_guid, * because the parent is a newly-created fs; * we'll be able to rename it after we recv the * new fs. */ if (parent != NULL) { char *pname; VERIFY(0 == nvlist_lookup_string(parent, "name", &pname)); (void) snprintf(tryname, sizeof (tryname), "%s%s", pname, strrchr(stream_fsname, '/')); } else { tryname[0] = '\0'; if (flags.verbose) { (void) printf("local fs %s new parent " "not found\n", fsname); } } newname[0] = '\0'; error = recv_rename(hdl, fsname, tryname, strlen(tofs)+1, newname, flags); if (renamed != NULL && newname[0] != '\0') { VERIFY(0 == nvlist_add_boolean(renamed, newname)); } if (error) needagain = B_TRUE; else progress = B_TRUE; } } fsavl_destroy(local_avl); nvlist_free(local_nv); if (needagain && progress) { /* do another pass to fix up temporary names */ if (flags.verbose) (void) printf("another pass:\n"); goto again; } return (needagain); } static int zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { nvlist_t *stream_nv = NULL; avl_tree_t *stream_avl = NULL; char *fromsnap = NULL; char *cp; char tofs[ZFS_MAXNAMELEN]; char sendfs[ZFS_MAXNAMELEN]; char errbuf[1024]; dmu_replay_record_t drre; int error; boolean_t anyerr = B_FALSE; boolean_t softerr = B_FALSE; boolean_t recursive; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); assert(drr->drr_type == DRR_BEGIN); assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) == DMU_COMPOUNDSTREAM); /* * Read in the nvlist from the stream. */ if (drr->drr_payloadlen != 0) { error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, &stream_nv, flags.byteswap, zc); if (error) { error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } } recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); if (recursive && strchr(destname, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot stream")); error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } /* * Read in the end record and verify checksum. */ if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), flags.byteswap, NULL))) goto out; if (flags.byteswap) { drre.drr_type = BSWAP_32(drre.drr_type); drre.drr_u.drr_end.drr_checksum.zc_word[0] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); drre.drr_u.drr_end.drr_checksum.zc_word[1] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]); drre.drr_u.drr_end.drr_checksum.zc_word[2] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]); drre.drr_u.drr_end.drr_checksum.zc_word[3] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]); } if (drre.drr_type != DRR_END) { error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incorrect header checksum")); error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; } (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap); if (drr->drr_payloadlen != 0) { nvlist_t *stream_fss; VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss)); if ((stream_avl = fsavl_create(stream_fss)) == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "couldn't allocate avl tree")); error = zfs_error(hdl, EZFS_NOMEM, errbuf); goto out; } if (fromsnap != NULL) { nvlist_t *renamed = NULL; nvpair_t *pair = NULL; (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); if (flags.isprefix) { struct drr_begin *drrb = &drr->drr_u.drr_begin; int i; if (flags.istail) { cp = strrchr(drrb->drr_toname, '/'); if (cp == NULL) { (void) strlcat(tofs, "/", ZFS_MAXNAMELEN); i = 0; } else { i = (cp - drrb->drr_toname); } } else { i = strcspn(drrb->drr_toname, "/@"); } /* zfs_receive_one() will create_parents() */ (void) strlcat(tofs, &drrb->drr_toname[i], ZFS_MAXNAMELEN); *strchr(tofs, '@') = '\0'; } if (recursive && !flags.dryrun && !flags.nomount) { VERIFY(0 == nvlist_alloc(&renamed, NV_UNIQUE_NAME, 0)); } softerr = recv_incremental_replication(hdl, tofs, flags, stream_nv, stream_avl, renamed); /* Unmount renamed filesystems before receiving. */ while ((pair = nvlist_next_nvpair(renamed, pair)) != NULL) { zfs_handle_t *zhp; prop_changelist_t *clp = NULL; zhp = zfs_open(hdl, nvpair_name(pair), ZFS_TYPE_FILESYSTEM); if (zhp != NULL) { clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, 0); zfs_close(zhp); if (clp != NULL) { softerr |= changelist_prefix(clp); changelist_free(clp); } } } nvlist_free(renamed); } } /* * Get the fs specified by the first path in the stream (the top level * specified by 'zfs send') and pass it to each invocation of * zfs_receive_one(). */ (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); if ((cp = strchr(sendfs, '@')) != NULL) *cp = '\0'; /* Finally, receive each contained stream */ do { /* * we should figure out if it has a recoverable * error, in which case do a recv_skip() and drive on. * Note, if we fail due to already having this guid, * zfs_receive_one() will take care of it (ie, * recv_skip() and return 0). */ error = zfs_receive_impl(hdl, destname, flags, fd, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep); if (error == ENODATA) { error = 0; break; } anyerr |= error; } while (error == 0); if (drr->drr_payloadlen != 0 && fromsnap != NULL) { /* * Now that we have the fs's they sent us, try the * renames again. */ softerr = recv_incremental_replication(hdl, tofs, flags, stream_nv, stream_avl, NULL); } out: fsavl_destroy(stream_avl); if (stream_nv) nvlist_free(stream_nv); if (softerr) error = -2; if (anyerr) error = -1; return (error); } static void trunc_prop_errs(int truncated) { ASSERT(truncated != 0); if (truncated == 1) (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "1 more property could not be set\n")); else (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "%d more properties could not be set\n"), truncated); } static int recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; void *buf = malloc(1<<20); char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive:")); /* XXX would be great to use lseek if possible... */ drr = buf; while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t), byteswap, NULL) == 0) { if (byteswap) drr->drr_type = BSWAP_32(drr->drr_type); switch (drr->drr_type) { case DRR_BEGIN: /* NB: not to be used on v2 stream packages */ if (drr->drr_payloadlen != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid substream header")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } break; case DRR_END: free(buf); return (0); case DRR_OBJECT: if (byteswap) { drr->drr_u.drr_object.drr_bonuslen = BSWAP_32(drr->drr_u.drr_object. drr_bonuslen); } (void) recv_read(hdl, fd, buf, P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), B_FALSE, NULL); break; case DRR_WRITE: if (byteswap) { drr->drr_u.drr_write.drr_length = BSWAP_64(drr->drr_u.drr_write.drr_length); } (void) recv_read(hdl, fd, buf, drr->drr_u.drr_write.drr_length, B_FALSE, NULL); break; case DRR_SPILL: if (byteswap) { drr->drr_u.drr_write.drr_length = BSWAP_64(drr->drr_u.drr_spill.drr_length); } (void) recv_read(hdl, fd, buf, drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); break; case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid record type")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } } free(buf); return (-1); } /* * Restores a backup of tosnap from the file descriptor specified by infd. */ static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, recvflags_t flags, dmu_replay_record_t *drr, dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; time_t begin_time; int ioctl_err, ioctl_errno, err; char *cp; struct drr_begin *drrb = &drr->drr_u.drr_begin; char errbuf[1024]; char prop_errbuf[1024]; const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; zprop_errflags_t prop_errflags; boolean_t recursive; begin_time = time(NULL); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); if (stream_avl != NULL) { char *snapname; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); nvlist_t *props; int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", &parent_snapguid); err = nvlist_lookup_nvlist(fs, "props", &props); if (err) VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); if (flags.canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } ret = zcmd_write_src_nvlist(hdl, &zc, props); if (err) nvlist_free(props); if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { VERIFY(0 == nvlist_lookup_nvlist(props, snapname, &snapprops_nvlist)); } if (ret != 0) return (-1); } cp = NULL; /* * Determine how much of the snapshot name stored in the stream * we are going to tack on to the name they specified on the * command line, and how much we are going to chop off. * * If they specified a snapshot, chop the entire name stored in * the stream. */ if (flags.istail) { /* * A filesystem was specified with -e. We want to tack on only * the tail of the sent snapshot path. */ if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -e")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } chopprefix = strrchr(sendfs, '/'); if (chopprefix == NULL) { /* * The tail is the poolname, so we need to * prepend a path separator. */ int len = strlen(drrb->drr_toname); cp = malloc(len + 2); cp[0] = '/'; (void) strcpy(&cp[1], drrb->drr_toname); chopprefix = cp; } else { chopprefix = drrb->drr_toname + (chopprefix - sendfs); } } else if (flags.isprefix) { /* * A filesystem was specified with -d. We want to tack on * everything but the first element of the sent snapshot path * (all but the pool name). */ if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -d")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } chopprefix = strchr(drrb->drr_toname, '/'); if (chopprefix == NULL) chopprefix = strchr(drrb->drr_toname, '@'); } else if (strchr(tosnap, '@') == NULL) { /* * If a filesystem was specified without -d or -e, we want to * tack on everything after the fs specified by 'zfs send'. */ chopprefix = drrb->drr_toname + strlen(sendfs); } else { /* A snapshot was specified as an exact path (no -d or -e). */ if (recursive) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot " "stream")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); } ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname); ASSERT(chopprefix > drrb->drr_toname); ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname)); ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' || chopprefix[0] == '\0'); /* * Determine name of destination snapshot, store in zc_value. */ (void) strcpy(zc.zc_top_ds, tosnap); (void) strcpy(zc.zc_value, tosnap); (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); free(cp); if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* * Determine the name of the origin snapshot, store in zc_string. */ if (drrb->drr_flags & DRR_FLAG_CLONE) { if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, zc.zc_string) != 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "local origin for clone %s does not exist"), zc.zc_value); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } if (flags.verbose) (void) printf("found clone origin %s\n", zc.zc_string); } stream_wantsnewfs = (drrb->drr_fromguid == 0 || (drrb->drr_flags & DRR_FLAG_CLONE)); if (stream_wantsnewfs) { /* * if the parent fs does not exist, look for it based on * the parent snap GUID */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive new filesystem stream")); (void) strcpy(zc.zc_name, zc.zc_value); cp = strrchr(zc.zc_name, '/'); if (cp) *cp = '\0'; if (cp && !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char suffix[ZFS_MAXNAMELEN]; (void) strcpy(suffix, strrchr(zc.zc_value, '/')); if (guid_to_name(hdl, tosnap, parent_snapguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, suffix); } } } else { /* * if the fs does not exist, look for it based on the * fromsnap GUID */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive incremental stream")); (void) strcpy(zc.zc_name, zc.zc_value); *strchr(zc.zc_name, '@') = '\0'; /* * If the exact receive path was specified and this is the * topmost path in the stream, then if the fs does not exist we * should look no further. */ if ((flags.isprefix || (*(chopprefix = drrb->drr_toname + strlen(sendfs)) != '\0' && *chopprefix != '@')) && !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char snap[ZFS_MAXNAMELEN]; (void) strcpy(snap, strchr(zc.zc_value, '@')); if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, snap); } } } (void) strcpy(zc.zc_name, zc.zc_value); *strchr(zc.zc_name, '@') = '\0'; if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { zfs_handle_t *zhp; /* * Destination fs exists. Therefore this should either * be an incremental, or the stream specifies a new fs * (full stream or clone) and they want us to blow it * away (and have therefore specified -F and removed any * snapshots). */ if (stream_wantsnewfs) { if (!flags.force) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' exists\n" "must specify -F to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination has snapshots (eg. %s)\n" "must destroy them to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } } if ((zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { zcmd_free_nvlists(&zc); return (-1); } if (stream_wantsnewfs && zhp->zfs_dmustats.dds_origin[0]) { zcmd_free_nvlists(&zc); zfs_close(zhp); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' is a clone\n" "must destroy it to overwrite it"), zc.zc_name); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && stream_wantsnewfs) { /* We can't do online recv in this case */ clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); if (clp == NULL) { zfs_close(zhp); zcmd_free_nvlists(&zc); return (-1); } if (changelist_prefix(clp) != 0) { changelist_free(clp); zfs_close(zhp); zcmd_free_nvlists(&zc); return (-1); } } zfs_close(zhp); } else { /* * Destination filesystem does not exist. Therefore we better * be creating a new filesystem (either from a full backup, or * a clone). It would therefore be invalid if the user * specified only the pool name (i.e. if the destination name * contained no slash character). */ if (!stream_wantsnewfs || (cp = strrchr(zc.zc_name, '/')) == NULL) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' does not exist"), zc.zc_name); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* * Trim off the final dataset component so we perform the * recvbackup ioctl to the filesystems's parent. */ *cp = '\0'; if (flags.isprefix && !flags.istail && !flags.dryrun && create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); } newfs = B_TRUE; } zc.zc_begin_record = drr_noswap->drr_u.drr_begin; zc.zc_cookie = infd; zc.zc_guid = flags.force; if (flags.verbose) { (void) printf("%s %s stream of %s into %s\n", flags.dryrun ? "would receive" : "receiving", drrb->drr_fromguid ? "incremental" : "full", drrb->drr_toname, zc.zc_value); (void) fflush(stdout); } if (flags.dryrun) { zcmd_free_nvlists(&zc); return (recv_skip(hdl, infd, flags.byteswap)); } zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; zc.zc_nvlist_dst_size = sizeof (prop_errbuf); zc.zc_cleanup_fd = cleanup_fd; zc.zc_action_handle = *action_handlep; err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); ioctl_errno = errno; prop_errflags = (zprop_errflags_t)zc.zc_obj; if (err == 0) { nvlist_t *prop_errors; VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, zc.zc_nvlist_dst_size, &prop_errors, 0)); nvpair_t *prop_err = NULL; while ((prop_err = nvlist_next_nvpair(prop_errors, prop_err)) != NULL) { char tbuf[1024]; zfs_prop_t prop; int intval; prop = zfs_name_to_prop(nvpair_name(prop_err)); (void) nvpair_value_int32(prop_err, &intval); if (strcmp(nvpair_name(prop_err), ZPROP_N_MORE_ERRORS) == 0) { trunc_prop_errs(intval); break; } else { (void) snprintf(tbuf, sizeof (tbuf), dgettext(TEXT_DOMAIN, "cannot receive %s property on %s"), nvpair_name(prop_err), zc.zc_name); zfs_setprop_error(hdl, prop, intval, tbuf); } } nvlist_free(prop_errors); } zc.zc_nvlist_dst = 0; zc.zc_nvlist_dst_size = 0; zcmd_free_nvlists(&zc); if (err == 0 && snapprops_nvlist) { zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 }; (void) strcpy(zc2.zc_name, zc.zc_value); zc2.zc_cookie = B_TRUE; /* received */ if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); zcmd_free_nvlists(&zc2); } } if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) { /* * It may be that this snapshot already exists, * in which case we want to consume & ignore it * rather than failing. */ avl_tree_t *local_avl; nvlist_t *local_nv, *fs; cp = strchr(zc.zc_value, '@'); /* * XXX Do this faster by just iterating over snaps in * this fs. Also if zc_value does not exist, we will * get a strange "does not exist" error message. */ *cp = '\0'; if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); fsavl_destroy(local_avl); nvlist_free(local_nv); if (fs != NULL) { if (flags.verbose) { (void) printf("snap %s already exists; " "ignoring\n", zc.zc_value); } err = ioctl_err = recv_skip(hdl, infd, flags.byteswap); } } *cp = '@'; } if (ioctl_err != 0) { switch (ioctl_errno) { case ENODEV: cp = strchr(zc.zc_value, '@'); *cp = '\0'; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "most recent snapshot of %s does not\n" "match incremental source"), zc.zc_value); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); *cp = '@'; break; case ETXTBSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s has been modified\n" "since most recent snapshot"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; case EEXIST: cp = strchr(zc.zc_value, '@'); if (newfs) { /* it's the containing fs that exists */ *cp = '\0'; } zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination already exists")); (void) zfs_error_fmt(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN, "cannot restore to %s"), zc.zc_value); *cp = '@'; break; case EINVAL: (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid stream (checksum mismatch)")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to receive this stream.")); (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EDQUOT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s space quota exceeded"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; default: (void) zfs_standard_error(hdl, ioctl_errno, errbuf); } } /* * Mount the target filesystem (if created). Also mount any * children of the target filesystem if we did a replication * receive (indicated by stream_avl being non-NULL). */ cp = strchr(zc.zc_value, '@'); if (cp && (ioctl_err == 0 || !newfs)) { zfs_handle_t *h; *cp = '\0'; h = zfs_open(hdl, zc.zc_value, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { *cp = '@'; } else if (newfs || stream_avl) { /* * Track the first/top of hierarchy fs, * for mounting and sharing later. */ if (top_zfs && *top_zfs == NULL) *top_zfs = zfs_strdup(hdl, zc.zc_value); } zfs_close(h); } *cp = '@'; } if (clp) { err |= changelist_postfix(clp); changelist_free(clp); } if (prop_errflags & ZPROP_ERR_NOCLEAR) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " "failed to clear unreceived properties on %s"), zc.zc_name); (void) fprintf(stderr, "\n"); } if (prop_errflags & ZPROP_ERR_NORESTORE) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " "failed to restore original properties on %s"), zc.zc_name); (void) fprintf(stderr, "\n"); } if (err || ioctl_err) return (-1); *action_handlep = zc.zc_action_handle; if (flags.verbose) { char buf1[64]; char buf2[64]; uint64_t bytes = zc.zc_cookie; time_t delta = time(NULL) - begin_time; if (delta == 0) delta = 1; zfs_nicenum(bytes, buf1, sizeof (buf1)); zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", buf1, delta, buf2); } return (0); } static int zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { int err; dmu_replay_record_t drr, drr_noswap; struct drr_begin *drrb = &drr.drr_u.drr_begin; char errbuf[1024]; zio_cksum_t zcksum = { { 0 } }; uint64_t featureflags; int hdrtype; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); if (flags.isprefix && !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " "(%s) does not exist"), tosnap); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* read in the BEGIN record */ if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, &zcksum))) return (err); if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) { /* It's the double end record at the end of a package */ return (ENODATA); } /* the kernel needs the non-byteswapped begin record */ drr_noswap = drr; flags.byteswap = B_FALSE; if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { /* * We computed the checksum in the wrong byteorder in * recv_read() above; do it again correctly. */ bzero(&zcksum, sizeof (zio_cksum_t)); fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); flags.byteswap = B_TRUE; drr.drr_type = BSWAP_32(drr.drr_type); drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); drrb->drr_magic = BSWAP_64(drrb->drr_magic); drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); drrb->drr_type = BSWAP_32(drrb->drr_type); drrb->drr_flags = BSWAP_32(drrb->drr_flags); drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad magic number)")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo); if (!DMU_STREAM_SUPPORTED(featureflags) || (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "stream has unsupported feature, feature flags = %lx"), featureflags); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } if (strchr(drrb->drr_toname, '@') == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad snapshot name)")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { char nonpackage_sendfs[ZFS_MAXNAMELEN]; if (sendfs == NULL) { /* * We were not called from zfs_receive_package(). Get * the fs specified by 'zfs send'. */ char *cp; (void) strlcpy(nonpackage_sendfs, drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) *cp = '\0'; sendfs = nonpackage_sendfs; } return (zfs_receive_one(hdl, infd, tosnap, flags, &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); } } /* * Restores a backup of tosnap from the file descriptor specified by infd. * Return 0 on total success, -2 if some things couldn't be * destroyed/renamed/promoted, -1 if some things couldn't be received. * (-1 will override -2). */ int zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, int infd, avl_tree_t *stream_avl) { char *top_zfs = NULL; int err; int cleanup_fd; uint64_t action_handle = 0; - cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + cleanup_fd = open(ZFS_DEV, O_RDWR); VERIFY(cleanup_fd >= 0); err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, stream_avl, &top_zfs, cleanup_fd, &action_handle); VERIFY(0 == close(cleanup_fd)); if (err == 0 && !flags.nomount && top_zfs) { zfs_handle_t *zhp; prop_changelist_t *clp; zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM); if (zhp != NULL) { clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, CL_GATHER_MOUNT_ALWAYS, 0); zfs_close(zhp); if (clp != NULL) { /* mount and share received datasets */ err = changelist_postfix(clp); changelist_free(clp); } } if (zhp == NULL || clp == NULL || err) err = -1; } if (top_zfs) free(top_zfs); return (err); } diff --git a/module/zfs/include/sys/zfs_ioctl.h b/module/zfs/include/sys/zfs_ioctl.h index ad41561ad6f3..4a21cc8e0f48 100644 --- a/module/zfs/include/sys/zfs_ioctl.h +++ b/module/zfs/include/sys/zfs_ioctl.h @@ -1,352 +1,346 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H #define _SYS_ZFS_IOCTL_H #include #include #include #include #include #include #ifdef _KERNEL #include #endif /* _KERNEL */ #ifdef __cplusplus extern "C" { #endif /* * Property values for snapdir */ #define ZFS_SNAPDIR_HIDDEN 0 #define ZFS_SNAPDIR_VISIBLE 1 /* * Field manipulation macros for the drr_versioninfo field of the * send stream header. */ /* * Header types for zfs send streams. */ typedef enum drr_headertype { DMU_SUBSTREAM = 0x1, DMU_COMPOUNDSTREAM = 0x2 } drr_headertype_t; #define DMU_GET_STREAM_HDRTYPE(vi) BF64_GET((vi), 0, 2) #define DMU_SET_STREAM_HDRTYPE(vi, x) BF64_SET((vi), 0, 2, x) #define DMU_GET_FEATUREFLAGS(vi) BF64_GET((vi), 2, 30) #define DMU_SET_FEATUREFLAGS(vi, x) BF64_SET((vi), 2, 30, x) /* * Feature flags for zfs send streams (flags in drr_versioninfo) */ #define DMU_BACKUP_FEATURE_DEDUP (0x1) #define DMU_BACKUP_FEATURE_DEDUPPROPS (0x2) #define DMU_BACKUP_FEATURE_SA_SPILL (0x4) /* * Mask of all supported backup features */ #define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \ DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) /* * The drr_versioninfo field of the dmu_replay_record has the * following layout: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * | reserved | feature-flags |C|S| * +-------+-------+-------+-------+-------+-------+-------+-------+ * * The low order two bits indicate the header type: SUBSTREAM (0x1) * or COMPOUNDSTREAM (0x2). Using two bits for this is historical: * this field used to be a version number, where the two version types * were 1 and 2. Using two bits for this allows earlier versions of * the code to be able to recognize send streams that don't use any * of the features indicated by feature flags. */ #define DMU_BACKUP_MAGIC 0x2F5bacbacULL #define DRR_FLAG_CLONE (1<<0) #define DRR_FLAG_CI_DATA (1<<1) /* * flags in the drr_checksumflags field in the DRR_WRITE and * DRR_WRITE_BYREF blocks */ #define DRR_CHECKSUM_DEDUP (1<<0) #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) /* * zfs ioctl command structure */ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, DRR_SPILL, DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union { struct drr_begin { uint64_t drr_magic; uint64_t drr_versioninfo; /* was drr_version */ uint64_t drr_creation_time; dmu_objset_type_t drr_type; uint32_t drr_flags; uint64_t drr_toguid; uint64_t drr_fromguid; char drr_toname[MAXNAMELEN]; } drr_begin; struct drr_end { zio_cksum_t drr_checksum; uint64_t drr_toguid; } drr_end; struct drr_object { uint64_t drr_object; dmu_object_type_t drr_type; dmu_object_type_t drr_bonustype; uint32_t drr_blksz; uint32_t drr_bonuslen; uint8_t drr_checksumtype; uint8_t drr_compress; uint8_t drr_pad[6]; uint64_t drr_toguid; /* bonus content follows */ } drr_object; struct drr_freeobjects { uint64_t drr_firstobj; uint64_t drr_numobjs; uint64_t drr_toguid; } drr_freeobjects; struct drr_write { uint64_t drr_object; dmu_object_type_t drr_type; uint32_t drr_pad; uint64_t drr_offset; uint64_t drr_length; uint64_t drr_toguid; uint8_t drr_checksumtype; uint8_t drr_checksumflags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ /* content follows */ } drr_write; struct drr_free { uint64_t drr_object; uint64_t drr_offset; uint64_t drr_length; uint64_t drr_toguid; } drr_free; struct drr_write_byref { /* where to put the data */ uint64_t drr_object; uint64_t drr_offset; uint64_t drr_length; uint64_t drr_toguid; /* where to find the prior copy of the data */ uint64_t drr_refguid; uint64_t drr_refobject; uint64_t drr_refoffset; /* properties of the data */ uint8_t drr_checksumtype; uint8_t drr_checksumflags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ } drr_write_byref; struct drr_spill { uint64_t drr_object; uint64_t drr_length; uint64_t drr_toguid; uint64_t drr_pad[4]; /* needed for crypto */ /* spill data follows */ } drr_spill; } drr_u; } dmu_replay_record_t; /* diff record range types */ typedef enum diff_type { DDR_NONE = 0x1, DDR_INUSE = 0x2, DDR_FREE = 0x4 } diff_type_t; /* * The diff reports back ranges of free or in-use objects. */ typedef struct dmu_diff_record { uint64_t ddr_type; uint64_t ddr_first; uint64_t ddr_last; } dmu_diff_record_t; typedef struct zinject_record { uint64_t zi_objset; uint64_t zi_object; uint64_t zi_start; uint64_t zi_end; uint64_t zi_guid; uint32_t zi_level; uint32_t zi_error; uint64_t zi_type; uint32_t zi_freq; uint32_t zi_failfast; char zi_func[MAXNAMELEN]; uint32_t zi_iotype; int32_t zi_duration; uint64_t zi_timer; } zinject_record_t; #define ZINJECT_NULL 0x1 #define ZINJECT_FLUSH_ARC 0x2 #define ZINJECT_UNLOAD_SPA 0x4 #define ZEVENT_NONBLOCK 0x1 #define ZEVENT_SIZE 1024 typedef struct zfs_share { uint64_t z_exportdata; uint64_t z_sharedata; uint64_t z_sharetype; /* 0 = share, 1 = unshare */ uint64_t z_sharemax; /* max length of share string */ } zfs_share_t; /* * ZFS file systems may behave the usual, POSIX-compliant way, where * name lookups are case-sensitive. They may also be set up so that * all the name lookups are case-insensitive, or so that only some * lookups, the ones that set an FIGNORECASE flag, are case-insensitive. */ typedef enum zfs_case { ZFS_CASE_SENSITIVE, ZFS_CASE_INSENSITIVE, ZFS_CASE_MIXED } zfs_case_t; typedef struct zfs_cmd { char zc_name[MAXPATHLEN]; char zc_value[MAXPATHLEN * 2]; char zc_string[MAXNAMELEN]; char zc_top_ds[MAXPATHLEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; uint64_t zc_nvlist_src; /* really (char *) */ uint64_t zc_nvlist_src_size; uint64_t zc_nvlist_dst; /* really (char *) */ uint64_t zc_nvlist_dst_size; uint64_t zc_cookie; uint64_t zc_objset_type; uint64_t zc_perm_action; uint64_t zc_history; /* really (char *) */ uint64_t zc_history_len; uint64_t zc_history_offset; uint64_t zc_obj; uint64_t zc_iflags; /* internal to zfs(7fs) */ zfs_share_t zc_share; dmu_objset_stats_t zc_objset_stats; struct drr_begin zc_begin_record; zinject_record_t zc_inject_record; boolean_t zc_defer_destroy; boolean_t zc_temphold; uint64_t zc_action_handle; int zc_cleanup_fd; uint8_t zc_pad[4]; /* alignment */ uint64_t zc_sendobj; uint64_t zc_fromobj; uint64_t zc_createtxg; zfs_stat_t zc_stat; } zfs_cmd_t; typedef struct zfs_useracct { char zu_domain[256]; uid_t zu_rid; uint32_t zu_pad; uint64_t zu_space; } zfs_useracct_t; #define ZFSDEV_MAX_MINOR (1 << 16) #define ZFS_MIN_MINOR (ZFSDEV_MAX_MINOR + 1) #define ZPOOL_EXPORT_AFTER_SPLIT 0x1 #ifdef _KERNEL typedef struct zfs_creat { nvlist_t *zct_zplprops; nvlist_t *zct_props; } zfs_creat_t; -extern dev_info_t *zfs_dip; - extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); extern int zfs_busy(void); extern int zfs_unmount_snap(const char *, void *); -/* - * ZFS minor numbers can refer to either a control device instance or - * a zvol. Depending on the value of zss_type, zss_data points to either - * a zvol_state_t or a zfs_onexit_t. - */ -enum zfs_soft_state_type { - ZSST_ZVOL, - ZSST_CTLDEV +enum zfsdev_state_type { + ZST_ONEXIT, + ZST_ZEVENT, + ZST_ALL, }; -typedef struct zfs_soft_state { - enum zfs_soft_state_type zss_type; - void *zss_data; -} zfs_soft_state_t; +typedef struct zfsdev_state { + list_node_t zs_next; /* next zfsdev_state_t link */ + struct file *zs_file; /* associated file struct */ + minor_t zs_minor; /* made up minor number */ + void *zs_onexit; /* onexit data */ + void *zs_zevent; /* zevent data */ +} zfsdev_state_t; -extern void *zfsdev_get_soft_state(minor_t minor, - enum zfs_soft_state_type which); +extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which); +extern minor_t zfsdev_getminor(struct file *filp); extern minor_t zfsdev_minor_alloc(void); -extern void *zfsdev_state; -extern kmutex_t zfsdev_state_lock; - #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _SYS_ZFS_IOCTL_H */ diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 2302c0f3c6e0..99287e443d95 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1,5196 +1,5242 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include + +#include #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_deleg.h" #include "zfs_comutil.h" -extern struct modlfs zfs_modlfs; +kmutex_t zfsdev_state_lock; +list_t zfsdev_state_list; extern void zfs_init(void); extern void zfs_fini(void); -ldi_ident_t zfs_li = NULL; -dev_info_t *zfs_dip; - typedef int zfs_ioc_func_t(zfs_cmd_t *); typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); typedef enum { NO_NAME, POOL_NAME, DATASET_NAME } zfs_ioc_namecheck_t; typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, POOL_CHECK_READONLY = 1 << 2 } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; boolean_t zvec_his_log; zfs_ioc_poolcheck_t zvec_pool_check; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ static const char *userquota_perms[] = { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_PERM_GROUPQUOTA, }; static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc); static int zfs_check_settable(const char *name, nvpair_t *property, cred_t *cr); static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ void __dprintf(const char *file, const char *func, int line, const char *fmt, ...) { const char *newfile; char buf[512]; va_list adx; /* * Get rid of annoying "../common/" prefix to filename. */ newfile = strrchr(file, '/'); if (newfile != NULL) { newfile = newfile + 1; /* Get rid of leading / */ } else { newfile = file; } va_start(adx, fmt); (void) vsnprintf(buf, sizeof (buf), fmt, adx); va_end(adx); /* * To get this data, use the zfs-dprintf probe as so: * dtrace -q -n 'zfs-dprintf \ * /stringof(arg0) == "dbuf.c"/ \ * {printf("%s: %s", stringof(arg1), stringof(arg3))}' * arg0 = file name * arg1 = function name * arg2 = line number * arg3 = message */ DTRACE_PROBE4(zfs__dprintf, char *, newfile, char *, func, int, line, char *, buf); } static void history_str_free(char *buf) { kmem_free(buf, HIS_MAX_RECORD_LEN); } static char * history_str_get(zfs_cmd_t *zc) { char *buf; if (zc->zc_history == 0) return (NULL); buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); if (copyinstr((void *)(uintptr_t)zc->zc_history, buf, HIS_MAX_RECORD_LEN, NULL) != 0) { history_str_free(buf); return (NULL); } buf[HIS_MAX_RECORD_LEN -1] = '\0'; return (buf); } /* * Check to see if the named dataset is currently defined as bootable */ static boolean_t zfs_is_bootfs(const char *name) { objset_t *os; if (dmu_objset_hold(name, FTAG, &os) == 0) { boolean_t ret; ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os))); dmu_objset_rele(os, FTAG); return (ret); } return (B_FALSE); } /* * zfs_earlier_version * * Return non-zero if the spa version is less than requested version. */ static int zfs_earlier_version(const char *name, int version) { spa_t *spa; if (spa_open(name, &spa, FTAG) == 0) { if (spa_version(spa) < version) { spa_close(spa, FTAG); return (1); } spa_close(spa, FTAG); } return (0); } /* * zpl_earlier_version * * Return TRUE if the ZPL version is less than requested version. */ static boolean_t zpl_earlier_version(const char *name, int version) { objset_t *os; boolean_t rc = B_TRUE; if (dmu_objset_hold(name, FTAG, &os) == 0) { uint64_t zplversion; if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (B_TRUE); } /* XXX reading from non-owned objset */ if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0) rc = zplversion < version; dmu_objset_rele(os, FTAG); } return (rc); } static void zfs_log_history(zfs_cmd_t *zc) { spa_t *spa; char *buf; if ((buf = history_str_get(zc)) == NULL) return; if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); spa_close(spa, FTAG); } history_str_free(buf); } /* * Policy for top-level read operations (list pools). Requires no privileges, * and can be used in the local zone, as there is no associated dataset. */ /* ARGSUSED */ static int zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) { return (0); } /* * Policy for dataset read operations (list children, get statistics). Requires * no privileges, but must be visible in the local zone. */ /* ARGSUSED */ static int zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) return (0); return (ENOENT); } static int zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) { int writable = 1; /* * The dataset must be visible by this zone -- check this first * so they don't see EPERM on something they shouldn't know about. */ if (!INGLOBALZONE(curproc) && !zone_dataset_visible(dataset, &writable)) return (ENOENT); if (INGLOBALZONE(curproc)) { /* * If the fs is zoned, only root can access it from the * global zone. */ if (secpolicy_zfs(cr) && zoned) return (EPERM); } else { /* * If we are in a local zone, the 'zoned' property must be set. */ if (!zoned) return (EPERM); /* must be writable by this zone */ if (!writable) return (EPERM); } return (0); } static int zfs_dozonecheck(const char *dataset, cred_t *cr) { uint64_t zoned; if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) return (ENOENT); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } static int zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) { uint64_t zoned; rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) { rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); return (ENOENT); } rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; error = zfs_dozonecheck(name, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) error = dsl_deleg_access(name, perm, cr); } return (error); } int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { int error; error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) error = dsl_deleg_access_impl(ds, perm, cr); } return (error); } /* * Policy for setting the security label property. * * Returns 0 for success, non-zero for access and other errors. */ static int zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) { #ifdef HAVE_MLSLABEL char ds_hexsl[MAXNAMELEN]; bslabel_t ds_sl, new_sl; boolean_t new_default = FALSE; uint64_t zoned; int needed_priv = -1; int error; /* First get the existing dataset label. */ error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); if (error) return (EPERM); if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) new_default = TRUE; /* The label must be translatable */ if (!new_default && (hexstr_to_label(strval, &new_sl) != 0)) return (EINVAL); /* * In a non-global zone, disallow attempts to set a label that * doesn't match that of the zone; otherwise no other checks * are needed. */ if (!INGLOBALZONE(curproc)) { if (new_default || !blequal(&new_sl, CR_SL(CRED()))) return (EPERM); return (0); } /* * For global-zone datasets (i.e., those whose zoned property is * "off", verify that the specified new label is valid for the * global zone. */ if (dsl_prop_get_integer(name, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) return (EPERM); if (!zoned) { if (zfs_check_global_label(name, strval) != 0) return (EPERM); } /* * If the existing dataset label is nondefault, check if the * dataset is mounted (label cannot be changed while mounted). * Get the zfsvfs; if there isn't one, then the dataset isn't * mounted (or isn't a dataset, doesn't exist, ...). */ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) { objset_t *os; static char *setsl_tag = "setsl_tag"; /* * Try to own the dataset; abort if there is any error, * (e.g., already mounted, in use, or other error). */ error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, setsl_tag, &os); if (error) return (EPERM); dmu_objset_disown(os, setsl_tag); if (new_default) { needed_priv = PRIV_FILE_DOWNGRADE_SL; goto out_check; } if (hexstr_to_label(strval, &new_sl) != 0) return (EPERM); if (blstrictdom(&ds_sl, &new_sl)) needed_priv = PRIV_FILE_DOWNGRADE_SL; else if (blstrictdom(&new_sl, &ds_sl)) needed_priv = PRIV_FILE_UPGRADE_SL; } else { /* dataset currently has a default label */ if (!new_default) needed_priv = PRIV_FILE_UPGRADE_SL; } out_check: if (needed_priv != -1) return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL)); return (0); #else return ENOTSUP; #endif /* HAVE_MLSLABEL */ } static int zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, cred_t *cr) { char *strval; /* * Check permissions for special properties. */ switch (prop) { default: break; case ZFS_PROP_ZONED: /* * Disallow setting of 'zoned' from within a local zone. */ if (!INGLOBALZONE(curproc)) return (EPERM); break; case ZFS_PROP_QUOTA: if (!INGLOBALZONE(curproc)) { uint64_t zoned; char setpoint[MAXNAMELEN]; /* * Unprivileged users are allowed to modify the * quota on things *under* (ie. contained by) * the thing they own. */ if (dsl_prop_get_integer(dsname, "zoned", &zoned, setpoint)) return (EPERM); if (!zoned || strlen(dsname) <= strlen(setpoint)) return (EPERM); } break; case ZFS_PROP_MLSLABEL: if (!is_system_labeled()) return (EPERM); if (nvpair_value_string(propval, &strval) == 0) { int err; err = zfs_set_slabel_policy(dsname, strval, CRED()); if (err != 0) return (err); } break; } return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } int zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) { int error; error = zfs_dozonecheck(zc->zc_name, cr); if (error) return (error); /* * permission to set permissions will be evaluated later in * dsl_deleg_can_allow() */ return (0); } int zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } int zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; char *cp; int error; /* * Generate the current snapshot name from the given objsetid, then * use that name for the secpolicy/zone checks. */ cp = strchr(zc->zc_name, '@'); if (cp == NULL) return (EINVAL); error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) return (error); dsl_dataset_name(ds, zc->zc_name); error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, ZFS_DELEG_PERM_SEND, cr); dsl_dataset_rele(ds, FTAG); return (error); } +#ifdef HAVE_ZPL static int zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) { vnode_t *vp; int error; if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp)) != 0) return (error); /* Now make sure mntpnt and dataset are ZFS */ if (vp->v_vfsp->vfs_fstype != zfsfstype || (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); return (EPERM); } VN_RELE(vp); return (dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_SHARE, cr)); } +#endif /* HAVE_ZPL */ int zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) { +#ifdef HAVE_ZPL if (!INGLOBALZONE(curproc)) return (EPERM); if (secpolicy_nfs(cr) == 0) { return (0); } else { return (zfs_secpolicy_deleg_share(zc, cr)); } +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } int zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) { +#ifdef HAVE_ZPL if (!INGLOBALZONE(curproc)) return (EPERM); if (secpolicy_smb(cr) == 0) { return (0); } else { return (zfs_secpolicy_deleg_share(zc, cr)); } +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } static int zfs_get_parent(const char *datasetname, char *parent, int parentsize) { char *cp; /* * Remove the @bla or /bla from the end of the name to get the parent. */ (void) strncpy(parent, datasetname, parentsize); cp = strrchr(parent, '@'); if (cp != NULL) { cp[0] = '\0'; } else { cp = strrchr(parent, '/'); if (cp == NULL) return (ENOENT); cp[0] = '\0'; } return (0); } int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) { int error; if ((error = zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } static int zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires * descendent mount and destroy permissions. * Reassemble the full filesystem@snap name so dsl_deleg_access() * can do the correct permission check. * * Since this routine is used when doing a recursive destroy of snapshots * and destroying snapshots requires descendent permissions, a successfull * check of the top level snapshot applies to snapshots of all descendent * datasets as well. */ static int zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) { int error; char *dsname; dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); error = zfs_secpolicy_destroy_perms(dsname, cr); strfree(dsname); return (error); } int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) { char parentname[MAXNAMELEN]; int error; if ((error = zfs_secpolicy_write_perms(from, ZFS_DELEG_PERM_RENAME, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(from, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); if ((error = zfs_get_parent(to, parentname, sizeof (parentname))) != 0) return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (error); } static int zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } static int zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; int error; error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_PROMOTE, cr); if (error) return (error); error = dmu_objset_hold(zc->zc_name, FTAG, &clone); if (error == 0) { dsl_dataset_t *pclone = NULL; dsl_dir_t *dd; dd = clone->os_dsl_dataset->ds_dir; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_origin_obj, FTAG, &pclone); rw_exit(&dd->dd_pool->dp_config_rwlock); if (error) { dmu_objset_rele(clone, FTAG); return (error); } error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); dsl_dataset_name(pclone, parentname); dmu_objset_rele(clone, FTAG); dsl_dataset_rele(pclone, FTAG); if (error == 0) error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_PROMOTE, cr); } return (error); } static int zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) { int error; if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RECEIVE, cr)) != 0) return (error); if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr)) != 0) return (error); return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_CREATE, cr)); } int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) { return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_SNAPSHOT, cr)); } static int zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); } static int zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; int error; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); if (zc->zc_value[0] != '\0') { if ((error = zfs_secpolicy_write_perms(zc->zc_value, ZFS_DELEG_PERM_CLONE, cr)) != 0) return (error); } if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_MOUNT, cr); return (error); } +#ifdef HAVE_ZPL static int zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) { int error; error = secpolicy_fs_unmount(cr, NULL); if (error) { error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); } return (error); } +#endif /* HAVE_ZPL */ /* * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires * SYS_CONFIG privilege, which is not available in a local zone. */ /* ARGSUSED */ static int zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); return (0); } /* * Policy for object to name lookups. */ /* ARGSUSED */ static int zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) { int error; if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0) return (0); error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr); return (error); } /* * Policy for fault injection. Requires all privileges. */ /* ARGSUSED */ static int zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) { return (secpolicy_zinject(cr)); } static int zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); if (prop == ZPROP_INVAL) { if (!zfs_prop_user(zc->zc_value)) return (EINVAL); return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_USERPROP, cr)); } else { return (zfs_secpolicy_setprop(zc->zc_name, prop, NULL, cr)); } } static int zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) { int err = zfs_secpolicy_read(zc, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); if (zc->zc_value[0] == 0) { /* * They are asking about a posix uid/gid. If it's * themself, allow it. */ if (zc->zc_objset_type == ZFS_PROP_USERUSED || zc->zc_objset_type == ZFS_PROP_USERQUOTA) { if (zc->zc_guid == crgetuid(cr)) return (0); } else { if (groupmember(zc->zc_guid, cr)) return (0); } } return (zfs_secpolicy_write_perms(zc->zc_name, userquota_perms[zc->zc_objset_type], cr)); } static int zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) { int err = zfs_secpolicy_read(zc, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); return (zfs_secpolicy_write_perms(zc->zc_name, userquota_perms[zc->zc_objset_type], cr)); } static int zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } static int zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } static int zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); } /* * Policy for allowing temporary snapshots to be taken or released */ static int zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, * hold, destroy and release all rolled into one. * Delegated diff alone is sufficient that we allow this. */ int error; if ((error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); error = zfs_secpolicy_snapshot(zc, cr); if (!error) error = zfs_secpolicy_hold(zc, cr); if (!error) error = zfs_secpolicy_release(zc, cr); if (!error) error = zfs_secpolicy_destroy(zc, cr); return (error); } /* * Returns the nvlist as specified by the user in the zfs_cmd_t. */ static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) { char *packed; int error; nvlist_t *list = NULL; /* * Read in and unpack the user-supplied nvlist. */ if (size == 0) return (EINVAL); packed = kmem_alloc(size, KM_SLEEP); if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag)) != 0) { kmem_free(packed, size); return (error); } if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) { kmem_free(packed, size); return (error); } kmem_free(packed, size); *nvp = list; return (0); } static int fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) { size_t size; VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); if (size > zc->zc_nvlist_dst_size) { nvpair_t *more_errors; int n = 0; if (zc->zc_nvlist_dst_size < 1024) return (ENOMEM); VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); more_errors = nvlist_prev_nvpair(*errors, NULL); do { nvpair_t *pair = nvlist_prev_nvpair(*errors, more_errors); VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); n++; VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); } while (size > zc->zc_nvlist_dst_size); VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); ASSERT(size <= zc->zc_nvlist_dst_size); } return (0); } static int put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) { char *packed = NULL; int error = 0; size_t size; VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { packed = kmem_alloc(size, KM_SLEEP); VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, KM_SLEEP) == 0); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; kmem_free(packed, size); } zc->zc_nvlist_dst_size = size; return (error); } +#ifdef HAVE_ZPL static int getzfsvfs(const char *dsname, zfsvfs_t **zfvp) { objset_t *os; int error; error = dmu_objset_hold(dsname, FTAG, &os); if (error) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } mutex_enter(&os->os_user_ptr_lock); *zfvp = dmu_objset_get_user(os); if (*zfvp) { VFS_HOLD((*zfvp)->z_vfs); } else { error = ESRCH; } mutex_exit(&os->os_user_ptr_lock); dmu_objset_rele(os, FTAG); return (error); } +#endif /* * Find a zfsvfs_t for a mounted filesystem, or create our own, in which * case its z_vfs will be NULL, and it will be opened as the owner. */ static int zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) { +#ifdef HAVE_ZPL int error = 0; if (getzfsvfs(name, zfvp) != 0) error = zfsvfs_create(name, zfvp); if (error == 0) { rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : RW_READER, tag); if ((*zfvp)->z_unmounted) { /* * XXX we could probably try again, since the unmounting * thread should be just about to disassociate the * objset from the zfsvfs. */ rrw_exit(&(*zfvp)->z_teardown_lock, tag); return (EBUSY); } } return (error); +#else + return ENOTSUP; +#endif } static void zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) { +#ifdef HAVE_ZPL rrw_exit(&zfsvfs->z_teardown_lock, tag); if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); } else { dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } +#endif } static int zfs_ioc_pool_create(zfs_cmd_t *zc) { int error; nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; char *buf; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) return (error); if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { nvlist_free(config); return (error); } if (props) { nvlist_t *nvl = NULL; uint64_t version = SPA_VERSION; (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version); if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) { error = EINVAL; goto pool_props_bad; } (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); if (nvl) { error = nvlist_dup(nvl, &rootprops, KM_SLEEP); if (error != 0) { nvlist_free(config); nvlist_free(props); return (error); } (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); if (error) goto pool_props_bad; } buf = history_str_get(zc); error = spa_create(zc->zc_name, config, props, buf, zplprops); /* * Set the remaining root properties */ if (!error && (error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); if (buf != NULL) history_str_free(buf); pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); return (error); } static int zfs_ioc_pool_destroy(zfs_cmd_t *zc) { int error; zfs_log_history(zc); error = spa_destroy(zc->zc_name); if (error == 0) zvol_remove_minors(zc->zc_name); return (error); } static int zfs_ioc_pool_import(zfs_cmd_t *zc) { nvlist_t *config, *props = NULL; uint64_t guid; int error; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) != 0) return (error); if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { nvlist_free(config); return (error); } if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || guid != zc->zc_guid) error = EINVAL; else error = spa_import(zc->zc_name, config, props, zc->zc_cookie); if (zc->zc_nvlist_dst != 0) { int err; if ((err = put_nvlist(zc, config)) != 0) error = err; } nvlist_free(config); if (props) nvlist_free(props); return (error); } static int zfs_ioc_pool_export(zfs_cmd_t *zc) { int error; boolean_t force = (boolean_t)zc->zc_cookie; boolean_t hardforce = (boolean_t)zc->zc_guid; zfs_log_history(zc); error = spa_export(zc->zc_name, NULL, force, hardforce); if (error == 0) zvol_remove_minors(zc->zc_name); return (error); } static int zfs_ioc_pool_configs(zfs_cmd_t *zc) { nvlist_t *configs; int error; if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) return (EEXIST); error = put_nvlist(zc, configs); nvlist_free(configs); return (error); } static int zfs_ioc_pool_stats(zfs_cmd_t *zc) { nvlist_t *config; int error; int ret = 0; error = spa_get_stats(zc->zc_name, &config, zc->zc_value, sizeof (zc->zc_value)); if (config != NULL) { ret = put_nvlist(zc, config); nvlist_free(config); /* * The config may be present even if 'error' is non-zero. * In this case we return success, and preserve the real errno * in 'zc_cookie'. */ zc->zc_cookie = error; } else { ret = error; } return (ret); } /* * Try to import the given pool, returning pool stats as appropriate so that * user land knows which devices are available and overall pool health. */ static int zfs_ioc_pool_tryimport(zfs_cmd_t *zc) { nvlist_t *tryconfig, *config; int error; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &tryconfig)) != 0) return (error); config = spa_tryimport(tryconfig); nvlist_free(tryconfig); if (config == NULL) return (EINVAL); error = put_nvlist(zc, config); nvlist_free(config); return (error); } /* * inputs: * zc_name name of the pool * zc_cookie scan func (pool_scan_func_t) */ static int zfs_ioc_pool_scan(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (zc->zc_cookie == POOL_SCAN_NONE) error = spa_scan_stop(spa); else error = spa_scan(spa, zc->zc_cookie); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_freeze(zfs_cmd_t *zc) { spa_t *spa; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error == 0) { spa_freeze(spa); spa_close(spa, FTAG); } return (error); } static int zfs_ioc_pool_upgrade(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) { spa_close(spa, FTAG); return (EINVAL); } spa_upgrade(spa, zc->zc_cookie); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_get_history(zfs_cmd_t *zc) { spa_t *spa; char *hist_buf; uint64_t size; int error; if ((size = zc->zc_history_len) == 0) return (EINVAL); if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { spa_close(spa, FTAG); return (ENOTSUP); } hist_buf = kmem_alloc(size, KM_SLEEP); if ((error = spa_history_get(spa, &zc->zc_history_offset, &zc->zc_history_len, hist_buf)) == 0) { error = ddi_copyout(hist_buf, (void *)(uintptr_t)zc->zc_history, zc->zc_history_len, zc->zc_iflags); } spa_close(spa, FTAG); kmem_free(hist_buf, size); return (error); } static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { int error; if ((error = dsl_dsobj_to_dsname(zc->zc_name,zc->zc_obj,zc->zc_value))) return (error); return (0); } /* * inputs: * zc_name name of filesystem * zc_obj object to find * * outputs: * zc_value name of object */ static int zfs_ioc_obj_to_path(zfs_cmd_t *zc) { objset_t *os; int error; /* XXX reading from objset not owned */ if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_obj object to find * * outputs: * zc_stat stats on object * zc_value path to object */ static int zfs_ioc_obj_to_stats(zfs_cmd_t *zc) { objset_t *os; int error; /* XXX reading from objset not owned */ if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); return (EINVAL); } error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, sizeof (zc->zc_value)); dmu_objset_rele(os, FTAG); return (error); } static int zfs_ioc_vdev_add(zfs_cmd_t *zc) { spa_t *spa; int error; nvlist_t *config, **l2cache, **spares; uint_t nl2cache = 0, nspares = 0; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config); (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache); (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES, &spares, &nspares); /* * A root pool with concatenated devices is not supported. * Thus, can not add a device to a root pool. * * Intent log device can not be added to a rootpool because * during mountroot, zil is replayed, a seperated log device * can not be accessed during the mountroot time. * * l2cache and spare devices are ok to be added to a rootpool. */ if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) { nvlist_free(config); spa_close(spa, FTAG); return (EDOM); } if (error == 0) { error = spa_vdev_add(spa, config); nvlist_free(config); } spa_close(spa, FTAG); return (error); } /* * inputs: * zc_name name of the pool * zc_nvlist_conf nvlist of devices to remove * zc_cookie to stop the remove? */ static int zfs_ioc_vdev_remove(zfs_cmd_t *zc) { spa_t *spa; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_set_state(zfs_cmd_t *zc) { spa_t *spa; int error; vdev_state_t newstate = VDEV_STATE_UNKNOWN; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); switch (zc->zc_cookie) { case VDEV_STATE_ONLINE: error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate); break; case VDEV_STATE_OFFLINE: error = vdev_offline(spa, zc->zc_guid, zc->zc_obj); break; case VDEV_STATE_FAULTED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && zc->zc_obj != VDEV_AUX_EXTERNAL) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); break; case VDEV_STATE_DEGRADED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && zc->zc_obj != VDEV_AUX_EXTERNAL) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); break; default: error = EINVAL; } zc->zc_cookie = newstate; spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_attach(zfs_cmd_t *zc) { spa_t *spa; int replacing = zc->zc_cookie; nvlist_t *config; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) == 0) { error = spa_vdev_attach(spa, zc->zc_guid, config, replacing); nvlist_free(config); } spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_detach(zfs_cmd_t *zc) { spa_t *spa; int error; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_split(zfs_cmd_t *zc) { spa_t *spa; nvlist_t *config, *props = NULL; int error; boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT); if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) { spa_close(spa, FTAG); return (error); } if (zc->zc_nvlist_src_size != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) { spa_close(spa, FTAG); nvlist_free(config); return (error); } error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp); spa_close(spa, FTAG); nvlist_free(config); nvlist_free(props); return (error); } static int zfs_ioc_vdev_setpath(zfs_cmd_t *zc) { spa_t *spa; char *path = zc->zc_value; uint64_t guid = zc->zc_guid; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_setpath(spa, guid, path); spa_close(spa, FTAG); return (error); } static int zfs_ioc_vdev_setfru(zfs_cmd_t *zc) { spa_t *spa; char *fru = zc->zc_value; uint64_t guid = zc->zc_guid; int error; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) return (error); error = spa_vdev_setfru(spa, guid, fru); spa_close(spa, FTAG); return (error); } static int zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) { int error = 0; nvlist_t *nv; dmu_objset_fast_stat(os, &zc->zc_objset_stats); if (zc->zc_nvlist_dst != 0 && (error = dsl_prop_get_all(os, &nv)) == 0) { dmu_objset_stats(os, nv); /* * NB: zvol_get_stats() will read the objset contents, * which we aren't supposed to do with a * DS_MODE_USER hold, because it could be * inconsistent. So this is a bit of a workaround... * XXX reading with out owning */ if (!zc->zc_objset_stats.dds_inconsistent) { if (dmu_objset_type(os) == DMU_OST_ZVOL) error = zvol_get_stats(os, nv); } if (error == 0) error = put_nvlist(zc, nv); nvlist_free(nv); } return (error); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { objset_t *os = NULL; int error; if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (error); error = zfs_ioc_objset_stats_impl(zc, os); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_nvlist_dst received property nvlist * zc_nvlist_dst_size size of received property nvlist * * Gets received properties (distinct from local properties on or after * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from * local property values. */ static int zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) { objset_t *os = NULL; int error; nvlist_t *nv; if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (error); /* * Without this check, we would return local property values if the * caller has not already received properties on or after * SPA_VERSION_RECVD_PROPS. */ if (!dsl_prop_get_hasrecvd(os)) { dmu_objset_rele(os, FTAG); return (ENOTSUP); } if (zc->zc_nvlist_dst != 0 && (error = dsl_prop_get_received(os, &nv)) == 0) { error = put_nvlist(zc, nv); nvlist_free(nv); } dmu_objset_rele(os, FTAG); return (error); } static int nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop) { uint64_t value; int error; /* * zfs_get_zplprop() will either find a value or give us * the default value (if there is one). */ if ((error = zfs_get_zplprop(os, prop, &value)) != 0) return (error); VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0); return (0); } /* * inputs: * zc_name name of filesystem * zc_nvlist_dst_size size of buffer for zpl property nvlist * * outputs: * zc_nvlist_dst zpl property nvlist * zc_nvlist_dst_size size of zpl property nvlist */ static int zfs_ioc_objset_zplprops(zfs_cmd_t *zc) { objset_t *os; int err; /* XXX reading without owning */ if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os))) return (err); dmu_objset_fast_stat(os, &zc->zc_objset_stats); /* * NB: nvl_add_zplprop() will read the objset contents, * which we aren't supposed to do with a DS_MODE_USER * hold, because it could be inconsistent. */ if (zc->zc_nvlist_dst != 0 && !zc->zc_objset_stats.dds_inconsistent && dmu_objset_type(os) == DMU_OST_ZFS) { nvlist_t *nv; VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0) err = put_nvlist(zc, nv); nvlist_free(nv); } else { err = ENOENT; } dmu_objset_rele(os, FTAG); return (err); } static boolean_t dataset_name_hidden(const char *name) { /* * Skip over datasets that are not visible in this zone, * internal datasets (which have a $ in their name), and * temporary datasets (which have a % in their name). */ if (strchr(name, '$') != NULL) return (B_TRUE); if (strchr(name, '%') != NULL) return (B_TRUE); if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) return (B_TRUE); return (B_FALSE); } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_name name of next filesystem * zc_cookie zap cursor * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_dataset_list_next(zfs_cmd_t *zc) { objset_t *os; int error; char *p; size_t orig_len = strlen(zc->zc_name); top: if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) { if (error == ENOENT) error = ESRCH; return (error); } p = strrchr(zc->zc_name, '/'); if (p == NULL || p[1] != '\0') (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); p = zc->zc_name + strlen(zc->zc_name); /* * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0 * but is not declared void because its called by dmu_objset_find(). */ if (zc->zc_cookie == 0) { uint64_t cookie = 0; int len = sizeof (zc->zc_name) - (p - zc->zc_name); while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) (void) dmu_objset_prefetch(p, NULL); } do { error = dmu_dir_list_next(os, sizeof (zc->zc_name) - (p - zc->zc_name), p, NULL, &zc->zc_cookie); if (error == ENOENT) error = ESRCH; } while (error == 0 && dataset_name_hidden(zc->zc_name) && !(zc->zc_iflags & FKIOCTL)); dmu_objset_rele(os, FTAG); /* * If it's an internal dataset (ie. with a '$' in its name), * don't try to get stats for it, otherwise we'll return ENOENT. */ if (error == 0 && strchr(zc->zc_name, '$') == NULL) { error = zfs_ioc_objset_stats(zc); /* fill in the stats */ if (error == ENOENT) { /* We lost a race with destroy, get the next one. */ zc->zc_name[orig_len] = '\0'; goto top; } } return (error); } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_nvlist_dst_size size of buffer for property nvlist * * outputs: * zc_name name of next snapshot * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist */ static int zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) { objset_t *os; int error; top: if (zc->zc_cookie == 0) (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, NULL, DS_FIND_SNAPSHOTS); error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error == ENOENT ? ESRCH : error); /* * A dataset name of maximum length cannot have any snapshots, * so exit immediately. */ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { dmu_objset_rele(os, FTAG); return (ESRCH); } error = dmu_snapshot_list_next(os, sizeof (zc->zc_name) - strlen(zc->zc_name), zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie, NULL); if (error == 0) { dsl_dataset_t *ds; dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; /* * Since we probably don't have a hold on this snapshot, * it's possible that the objsetid could have been destroyed * and reused for a new objset. It's OK if this happens during * a zfs send operation, since the new createtxg will be * beyond the range we're interested in. */ rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); if (error) { if (error == ENOENT) { /* Racing with destroy, get the next one. */ *strchr(zc->zc_name, '@') = '\0'; dmu_objset_rele(os, FTAG); goto top; } } else { objset_t *ossnap; error = dmu_objset_from_ds(ds, &ossnap); if (error == 0) error = zfs_ioc_objset_stats_impl(zc, ossnap); dsl_dataset_rele(ds, FTAG); } } else if (error == ENOENT) { error = ESRCH; } dmu_objset_rele(os, FTAG); /* if we failed, undo the @ that we tacked on to zc_name */ if (error) *strchr(zc->zc_name, '@') = '\0'; return (error); } static int zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) { +#ifdef HAVE_ZPL const char *propname = nvpair_name(pair); uint64_t *valary; unsigned int vallen; const char *domain; char *dash; zfs_userquota_prop_t type; uint64_t rid; uint64_t quota; zfsvfs_t *zfsvfs; int err; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) != 0) return (EINVAL); } /* * A correctly constructed propname is encoded as * userquota@-. */ if ((dash = strchr(propname, '-')) == NULL || nvpair_value_uint64_array(pair, &valary, &vallen) != 0 || vallen != 3) return (EINVAL); domain = dash + 1; type = valary[0]; rid = valary[1]; quota = valary[2]; err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE); if (err == 0) { err = zfs_set_userquota(zfsvfs, type, domain, rid, quota); zfsvfs_rele(zfsvfs, FTAG); } return (err); +#else + return ENOTSUP; +#endif } /* * If the named property is one that has a special function to set its value, * return 0 on success and a positive error code on failure; otherwise if it is * not one of the special properties handled by this function, return -1. * * XXX: It would be better for callers of the property interface if we handled * these special cases in dsl_prop.c (in the dsl layer). */ static int zfs_prop_set_special(const char *dsname, zprop_source_t source, nvpair_t *pair) { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; int err; if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) return (zfs_prop_set_userquota(dsname, pair)); return (-1); } if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) == 0); } if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) return (-1); VERIFY(0 == nvpair_value_uint64(pair, &intval)); switch (prop) { case ZFS_PROP_QUOTA: err = dsl_dir_set_quota(dsname, source, intval); break; case ZFS_PROP_REFQUOTA: err = dsl_dataset_set_quota(dsname, source, intval); break; case ZFS_PROP_RESERVATION: err = dsl_dir_set_reservation(dsname, source, intval); break; case ZFS_PROP_REFRESERVATION: err = dsl_dataset_set_reservation(dsname, source, intval); break; case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip), intval); break; case ZFS_PROP_VERSION: { zfsvfs_t *zfsvfs; if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0) break; +#ifdef HAVE_ZPL err = zfs_set_version(zfsvfs, intval); +#endif zfsvfs_rele(zfsvfs, FTAG); if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { zfs_cmd_t *zc; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, dsname); (void) zfs_ioc_userspace_upgrade(zc); kmem_free(zc, sizeof (zfs_cmd_t)); } break; } default: err = -1; } return (err); } /* * This function is best effort. If it fails to set any of the given properties, * it continues to set as many as it can and returns the first error * encountered. If the caller provides a non-NULL errlist, it also gives the * complete list of names of all the properties it failed to set along with the * corresponding error numbers. The caller is responsible for freeing the * returned errlist. * * If every property is set successfully, zero is returned and the list pointed * at by errlist is NULL. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, nvlist_t **errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; nvlist_t *genericnvl; nvlist_t *errors; nvlist_t *retrynvl; VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); retry: pair = NULL; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); int err = 0; /* decode the property value */ propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; } /* Validate value type */ if (err == 0 && prop == ZPROP_INVAL) { if (zfs_prop_user(propname)) { if (nvpair_type(propval) != DATA_TYPE_STRING) err = EINVAL; } else if (zfs_prop_userquota(propname)) { if (nvpair_type(propval) != DATA_TYPE_UINT64_ARRAY) err = EINVAL; } } else if (err == 0) { if (nvpair_type(propval) == DATA_TYPE_STRING) { if (zfs_prop_get_type(prop) != PROP_TYPE_STRING) err = EINVAL; } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; VERIFY(nvpair_value_uint64(propval, &intval) == 0); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: break; case PROP_TYPE_STRING: err = EINVAL; break; case PROP_TYPE_INDEX: if (zfs_prop_index_to_string(prop, intval, &unused) != 0) err = EINVAL; break; default: cmn_err(CE_PANIC, "unknown property type"); } } else { err = EINVAL; } } /* Validate permissions */ if (err == 0) err = zfs_check_settable(dsname, pair, CRED()); if (err == 0) { err = zfs_prop_set_special(dsname, source, pair); if (err == -1) { /* * For better performance we build up a list of * properties to set in a single transaction. */ err = nvlist_add_nvpair(genericnvl, pair); } else if (err != 0 && nvl != retrynvl) { /* * This may be a spurious error caused by * receiving quota and reservation out of order. * Try again in a second pass. */ err = nvlist_add_nvpair(retrynvl, pair); } } if (err != 0) VERIFY(nvlist_add_int32(errors, propname, err) == 0); } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { nvl = retrynvl; goto retry; } if (!nvlist_empty(genericnvl) && dsl_props_set(dsname, source, genericnvl) != 0) { /* * If this fails, we still want to set as many properties as we * can, so try setting them individually. */ pair = NULL; while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) { const char *propname = nvpair_name(pair); int err = 0; propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) == 0); } if (nvpair_type(propval) == DATA_TYPE_STRING) { VERIFY(nvpair_value_string(propval, &strval) == 0); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { VERIFY(nvpair_value_uint64(propval, &intval) == 0); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { VERIFY(nvlist_add_int32(errors, propname, err) == 0); } } } nvlist_free(genericnvl); nvlist_free(retrynvl); if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { nvlist_free(errors); errors = NULL; } else { VERIFY(nvpair_value_int32(pair, &rv) == 0); } if (errlist == NULL) nvlist_free(errors); else *errlist = errors; return (rv); } /* * Check that all the properties are valid user properties. */ static int zfs_check_userprops(char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); char *valstr; if (!zfs_prop_user(propname) || nvpair_type(pair) != DATA_TYPE_STRING) return (EINVAL); if ((error = zfs_secpolicy_write_perms(fsname, ZFS_DELEG_PERM_USERPROP, CRED()))) return (error); if (strlen(propname) >= ZAP_MAXNAMELEN) return (ENAMETOOLONG); VERIFY(nvpair_value_string(pair, &valstr) == 0); if (strlen(valstr) >= ZAP_MAXVALUELEN) return (E2BIG); } return (0); } static void props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) { nvpair_t *pair; VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); pair = NULL; while ((pair = nvlist_next_nvpair(props, pair)) != NULL) { if (nvlist_exists(skipped, nvpair_name(pair))) continue; VERIFY(nvlist_add_nvpair(*newprops, pair) == 0); } } static int clear_received_props(objset_t *os, const char *fs, nvlist_t *props, nvlist_t *skipped) { int err = 0; nvlist_t *cleared_props = NULL; props_skip(props, skipped, &cleared_props); if (!nvlist_empty(cleared_props)) { /* * Acts on local properties until the dataset has received * properties at least once on or after SPA_VERSION_RECVD_PROPS. */ zprop_source_t flags = (ZPROP_SRC_NONE | (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); } nvlist_free(cleared_props); return (err); } /* * inputs: * zc_name name of filesystem * zc_value name of property to set * zc_nvlist_src{_size} nvlist of properties to apply * zc_cookie received properties flag * * outputs: * zc_nvlist_dst{_size} error for each unapplied received property */ static int zfs_ioc_set_prop(zfs_cmd_t *zc) { nvlist_t *nvl; boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); nvlist_t *errors = NULL; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvl)) != 0) return (error); if (received) { nvlist_t *origprops; objset_t *os; if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { if (dsl_prop_get_received(os, &origprops) == 0) { (void) clear_received_props(os, zc->zc_name, origprops, nvl); nvlist_free(origprops); } dsl_prop_set_hasrecvd(os); dmu_objset_rele(os, FTAG); } } error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); } nvlist_free(errors); nvlist_free(nvl); return (error); } /* * inputs: * zc_name name of filesystem * zc_value name of property to inherit * zc_cookie revert to received value if TRUE * * outputs: none */ static int zfs_ioc_inherit_prop(zfs_cmd_t *zc) { const char *propname = zc->zc_value; zfs_prop_t prop = zfs_name_to_prop(propname); boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_NONE /* revert to received value, if any */ : ZPROP_SRC_INHERITED); /* explicitly inherit */ if (received) { nvlist_t *dummy; nvpair_t *pair; zprop_type_t type; int err; /* * zfs_prop_set_special() expects properties in the form of an * nvpair with type info. */ if (prop == ZPROP_INVAL) { if (!zfs_prop_user(propname)) return (EINVAL); type = PROP_TYPE_STRING; } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) { return (EINVAL); } else { type = zfs_prop_get_type(prop); } VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0); switch (type) { case PROP_TYPE_STRING: VERIFY(0 == nvlist_add_string(dummy, propname, "")); break; case PROP_TYPE_NUMBER: case PROP_TYPE_INDEX: VERIFY(0 == nvlist_add_uint64(dummy, propname, 0)); break; default: nvlist_free(dummy); return (EINVAL); } pair = nvlist_next_nvpair(dummy, NULL); err = zfs_prop_set_special(zc->zc_name, source, pair); nvlist_free(dummy); if (err != -1) return (err); /* special property already handled */ } else { /* * Only check this in the non-received case. We want to allow * 'inherit -S' to revert non-inheritable properties like quota * and reservation to the received or default values even though * they are not considered inheritable. */ if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop)) return (EINVAL); } /* the property name has been validated by zfs_secpolicy_inherit() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } static int zfs_ioc_pool_set_props(zfs_cmd_t *zc) { nvlist_t *props; spa_t *spa; int error; nvpair_t *pair; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props))) return (error); /* * If the only property is the configfile, then just do a spa_lookup() * to handle the faulted case. */ pair = nvlist_next_nvpair(props, NULL); if (pair != NULL && strcmp(nvpair_name(pair), zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 && nvlist_next_nvpair(props, pair) == NULL) { mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_config_sync(spa, B_FALSE, B_TRUE); } mutex_exit(&spa_namespace_lock); if (spa != NULL) { nvlist_free(props); return (0); } } if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { nvlist_free(props); return (error); } error = spa_prop_set(spa, props); nvlist_free(props); spa_close(spa, FTAG); return (error); } static int zfs_ioc_pool_get_props(zfs_cmd_t *zc) { spa_t *spa; int error; nvlist_t *nvp = NULL; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { /* * If the pool is faulted, there may be properties we can still * get (such as altroot and cachefile), so attempt to get them * anyway. */ mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) error = spa_prop_get(spa, &nvp); mutex_exit(&spa_namespace_lock); } else { error = spa_prop_get(spa, &nvp); spa_close(spa, FTAG); } if (error == 0 && zc->zc_nvlist_dst != 0) error = put_nvlist(zc, nvp); else error = EFAULT; nvlist_free(nvp); return (error); } /* * inputs: * zc_name name of filesystem * zc_nvlist_src{_size} nvlist of delegated permissions * zc_perm_action allow/unallow flag * * outputs: none */ static int zfs_ioc_set_fsacl(zfs_cmd_t *zc) { int error; nvlist_t *fsaclnv = NULL; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &fsaclnv)) != 0) return (error); /* * Verify nvlist is constructed correctly */ if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) { nvlist_free(fsaclnv); return (EINVAL); } /* * If we don't have PRIV_SYS_MOUNT, then validate * that user is allowed to hand out each permission in * the nvlist(s) */ error = secpolicy_zfs(CRED()); if (error) { if (zc->zc_perm_action == B_FALSE) { error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, CRED()); } else { error = dsl_deleg_can_unallow(zc->zc_name, fsaclnv, CRED()); } } if (error == 0) error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action); nvlist_free(fsaclnv); return (error); } /* * inputs: * zc_name name of filesystem * * outputs: * zc_nvlist_src{_size} nvlist of delegated permissions */ static int zfs_ioc_get_fsacl(zfs_cmd_t *zc) { nvlist_t *nvp; int error; if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) { error = put_nvlist(zc, nvp); nvlist_free(nvp); } return (error); } +#ifdef HAVE_ZPL /* * Search the vfs list for a specified resource. Returns a pointer to it * or NULL if no suitable entry is found. The caller of this routine * is responsible for releasing the returned vfs pointer. */ static vfs_t * zfs_get_vfs(const char *resource) { struct vfs *vfsp; struct vfs *vfs_found = NULL; vfs_list_read_lock(); vfsp = rootvfs; do { if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { VFS_HOLD(vfsp); vfs_found = vfsp; break; } vfsp = vfsp->vfs_next; } while (vfsp != rootvfs); vfs_list_unlock(); return (vfs_found); } +#endif /* HAVE_ZPL */ /* ARGSUSED */ static void zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) { zfs_creat_t *zct = arg; zfs_create_fs(os, cr, zct->zct_zplprops, tx); } #define ZFS_PROP_UNDEFINED ((uint64_t)-1) /* * inputs: * createprops list of properties requested by creator * default_zplver zpl version to use if unspecified in createprops * fuids_ok fuids allowed in this version of the spa? * os parent objset pointer (NULL if root fs) * * outputs: * zplprops values for the zplprops we attach to the master node object * is_ci true if requested file system will be purely case-insensitive * * Determine the settings for utf8only, normalization and * casesensitivity. Specific values may have been requested by the * creator and/or we can inherit values from the parent dataset. If * the file system is of too early a vintage, a creator can not * request settings for these properties, even if the requested * setting is the default value. We don't actually want to create dsl * properties for these, so remove them from the source nvlist after * processing. */ static int zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { uint64_t sense = ZFS_PROP_UNDEFINED; uint64_t norm = ZFS_PROP_UNDEFINED; uint64_t u8 = ZFS_PROP_UNDEFINED; ASSERT(zplprops != NULL); /* * Pull out creator prop choices, if any. */ if (createprops) { (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_VERSION), &zplver); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE)); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); (void) nvlist_lookup_uint64(createprops, zfs_prop_to_name(ZFS_PROP_CASE), &sense); (void) nvlist_remove_all(createprops, zfs_prop_to_name(ZFS_PROP_CASE)); } /* * If the zpl version requested is whacky or the file system * or pool is version is too "young" to support normalization * and the creator tried to set a value for one of the props, * error out. */ if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) || (zplver >= ZPL_VERSION_FUID && !fuids_ok) || (zplver >= ZPL_VERSION_SA && !sa_ok) || (zplver < ZPL_VERSION_NORMALIZATION && (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED || sense != ZFS_PROP_UNDEFINED))) return (ENOTSUP); /* * Put the version in the zplprops */ VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0); if (norm == ZFS_PROP_UNDEFINED) VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0); /* * If we're normalizing, names must always be valid UTF-8 strings. */ if (norm) u8 = 1; if (u8 == ZFS_PROP_UNDEFINED) VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0); if (sense == ZFS_PROP_UNDEFINED) VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0); VERIFY(nvlist_add_uint64(zplprops, zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0); if (is_ci) *is_ci = (sense == ZFS_CASE_INSENSITIVE); return (0); } static int zfs_fill_zplprops(const char *dataset, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { boolean_t fuids_ok, sa_ok; uint64_t zplver = ZPL_VERSION; objset_t *os = NULL; char parentname[MAXNAMELEN]; char *cp; spa_t *spa; uint64_t spa_vers; int error; (void) strlcpy(parentname, dataset, sizeof (parentname)); cp = strrchr(parentname, '/'); ASSERT(cp != NULL); cp[0] = '\0'; if ((error = spa_open(dataset, &spa, FTAG)) != 0) return (error); spa_vers = spa_version(spa); spa_close(spa, FTAG); zplver = zfs_zpl_version_map(spa_vers); fuids_ok = (zplver >= ZPL_VERSION_FUID); sa_ok = (zplver >= ZPL_VERSION_SA); /* * Open parent object set so we can inherit zplprop values. */ if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0) return (error); error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops, zplprops, is_ci); dmu_objset_rele(os, FTAG); return (error); } static int zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { boolean_t fuids_ok; boolean_t sa_ok; uint64_t zplver = ZPL_VERSION; int error; zplver = zfs_zpl_version_map(spa_vers); fuids_ok = (zplver >= ZPL_VERSION_FUID); sa_ok = (zplver >= ZPL_VERSION_SA); error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok, createprops, zplprops, is_ci); return (error); } /* * inputs: * zc_objset_type type of objset to create (fs vs zvol) * zc_name name of new objset * zc_value name of snapshot to clone from (may be empty) * zc_nvlist_src{_size} nvlist of properties to apply * * outputs: none */ static int zfs_ioc_create(zfs_cmd_t *zc) { objset_t *clone; int error = 0; zfs_creat_t zct; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); dmu_objset_type_t type = zc->zc_objset_type; switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; case DMU_OST_ZVOL: cbfunc = zvol_create_cb; break; default: cbfunc = NULL; break; } if (strchr(zc->zc_name, '@') || strchr(zc->zc_name, '%')) return (EINVAL); if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvprops)) != 0) return (error); zct.zct_zplprops = NULL; zct.zct_props = nvprops; if (zc->zc_value[0] != '\0') { /* * We're creating a clone of an existing snapshot. */ zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { nvlist_free(nvprops); return (EINVAL); } error = dmu_objset_hold(zc->zc_value, FTAG, &clone); if (error) { nvlist_free(nvprops); return (error); } error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); dmu_objset_rele(clone, FTAG); if (error) { nvlist_free(nvprops); return (error); } } else { boolean_t is_insensitive = B_FALSE; if (cbfunc == NULL) { nvlist_free(nvprops); return (EINVAL); } if (type == DMU_OST_ZVOL) { uint64_t volsize, volblocksize; if (nvprops == NULL || nvlist_lookup_uint64(nvprops, zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) { nvlist_free(nvprops); return (EINVAL); } if ((error = nvlist_lookup_uint64(nvprops, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize)) != 0 && error != ENOENT) { nvlist_free(nvprops); return (EINVAL); } if (error != 0) volblocksize = zfs_prop_default_numeric( ZFS_PROP_VOLBLOCKSIZE); if ((error = zvol_check_volblocksize( volblocksize)) != 0 || (error = zvol_check_volsize(volsize, volblocksize)) != 0) { nvlist_free(nvprops); return (error); } } else if (type == DMU_OST_ZFS) { int error; /* * We have to have normalization and * case-folding flags correct when we do the * file system creation, so go figure them out * now. */ VERIFY(nvlist_alloc(&zct.zct_zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops(zc->zc_name, nvprops, zct.zct_zplprops, &is_insensitive); if (error != 0) { nvlist_free(nvprops); nvlist_free(zct.zct_zplprops); return (error); } } error = dmu_objset_create(zc->zc_name, type, is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); nvlist_free(zct.zct_zplprops); } /* * It would be nice to do this atomically. */ if (error == 0) { error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, nvprops, NULL); if (error != 0) (void) dmu_objset_destroy(zc->zc_name, B_FALSE); } nvlist_free(nvprops); return (error); } /* * inputs: * zc_name name of filesystem * zc_value short name of snapshot * zc_cookie recursive flag * zc_nvlist_src[_size] property list * * outputs: * zc_value short snapname (i.e. part after the '@') */ static int zfs_ioc_snapshot(zfs_cmd_t *zc) { nvlist_t *nvprops = NULL; int error; boolean_t recursive = zc->zc_cookie; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvprops)) != 0) return (error); error = zfs_check_userprops(zc->zc_name, nvprops); if (error) goto out; if (!nvlist_empty(nvprops) && zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { error = ENOTSUP; goto out; } error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, nvprops, recursive, B_FALSE, -1); out: nvlist_free(nvprops); return (error); } int zfs_unmount_snap(const char *name, void *arg) { +#ifdef HAVE_ZPL vfs_t *vfsp = NULL; if (arg) { char *snapname = arg; char *fullname = kmem_asprintf("%s@%s", name, snapname); vfsp = zfs_get_vfs(fullname); strfree(fullname); } else if (strchr(name, '@')) { vfsp = zfs_get_vfs(name); } if (vfsp) { /* * Always force the unmount for snapshots. */ int flag = MS_FORCE; int err; if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { VFS_RELE(vfsp); return (err); } VFS_RELE(vfsp); if ((err = dounmount(vfsp, flag, kcred)) != 0) return (err); } +#endif /* HAVE_ZPL */ return (0); } /* * inputs: * zc_name name of filesystem * zc_value short name of snapshot * zc_defer_destroy mark for deferred destroy * * outputs: none */ static int zfs_ioc_destroy_snaps(zfs_cmd_t *zc) { int err; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); err = dmu_objset_find(zc->zc_name, zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN); if (err) return (err); return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value, zc->zc_defer_destroy)); } /* * inputs: * zc_name name of dataset to destroy * zc_objset_type type of objset * zc_defer_destroy mark for deferred destroy * * outputs: none */ static int zfs_ioc_destroy(zfs_cmd_t *zc) { int err; if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { err = zfs_unmount_snap(zc->zc_name, NULL); if (err) return (err); } err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy); if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0) (void) zvol_remove_minor(zc->zc_name); return (err); } /* * inputs: * zc_name name of dataset to rollback (to most recent snapshot) * * outputs: none */ static int zfs_ioc_rollback(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL dsl_dataset_t *ds, *clone; int error; zfsvfs_t *zfsvfs; char *clone_name; error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); if (error) return (error); /* must not be a snapshot */ if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_rele(ds, FTAG); return (EINVAL); } /* must have a most recent snapshot */ if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { dsl_dataset_rele(ds, FTAG); return (EINVAL); } /* * Create clone of most recent snapshot. */ clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); if (error) goto out; error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); if (error) goto out; /* * Do clone swap. */ if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { error = zfs_suspend_fs(zfsvfs); if (error == 0) { int resume_err; if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { error = dsl_dataset_clone_swap(clone, ds, B_TRUE); dsl_dataset_disown(ds, FTAG); ds = NULL; } else { error = EBUSY; } resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); error = error ? error : resume_err; } VFS_RELE(zfsvfs->z_vfs); } else { if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { error = dsl_dataset_clone_swap(clone, ds, B_TRUE); dsl_dataset_disown(ds, FTAG); ds = NULL; } else { error = EBUSY; } } /* * Destroy clone (which also closes it). */ (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); out: strfree(clone_name); if (ds) dsl_dataset_rele(ds, FTAG); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* * inputs: * zc_name old name of dataset * zc_value new name of dataset * zc_cookie recursive flag (only valid for snapshots) * * outputs: none */ static int zfs_ioc_rename(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie & 1; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '%')) return (EINVAL); /* * Unmount snapshot unless we're doing a recursive rename, * in which case the dataset code figures out which snapshots * to unmount. */ if (!recursive && strchr(zc->zc_name, '@') != NULL && zc->zc_objset_type == DMU_OST_ZFS) { int err = zfs_unmount_snap(zc->zc_name, NULL); if (err) return (err); } if (zc->zc_objset_type == DMU_OST_ZVOL) (void) zvol_remove_minor(zc->zc_name); return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive)); } static int zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) { const char *propname = nvpair_name(pair); boolean_t issnap = (strchr(dsname, '@') != NULL); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; int err; if (prop == ZPROP_INVAL) { if (zfs_prop_user(propname)) { if ((err = zfs_secpolicy_write_perms(dsname, ZFS_DELEG_PERM_USERPROP, cr))) return (err); return (0); } if (!issnap && zfs_prop_userquota(propname)) { const char *perm = NULL; const char *uq_prefix = zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA]; const char *gq_prefix = zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA]; if (strncmp(propname, uq_prefix, strlen(uq_prefix)) == 0) { perm = ZFS_DELEG_PERM_USERQUOTA; } else if (strncmp(propname, gq_prefix, strlen(gq_prefix)) == 0) { perm = ZFS_DELEG_PERM_GROUPQUOTA; } else { /* USERUSED and GROUPUSED are read-only */ return (EINVAL); } if ((err = zfs_secpolicy_write_perms(dsname, perm, cr))) return (err); return (0); } return (EINVAL); } if (issnap) return (EINVAL); if (nvpair_type(pair) == DATA_TYPE_NVLIST) { /* * dsl_prop_get_all_impl() returns properties in this * format. */ nvlist_t *attrs; VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) == 0); } /* * Check that this value is valid for this pool version */ switch (prop) { case ZFS_PROP_COMPRESSION: /* * If the user specified gzip compression, make sure * the SPA supports it. We ignore any errors here since * we'll catch them later. */ if (nvpair_type(pair) == DATA_TYPE_UINT64 && nvpair_value_uint64(pair, &intval) == 0) { if (intval >= ZIO_COMPRESS_GZIP_1 && intval <= ZIO_COMPRESS_GZIP_9 && zfs_earlier_version(dsname, SPA_VERSION_GZIP_COMPRESSION)) { return (ENOTSUP); } if (intval == ZIO_COMPRESS_ZLE && zfs_earlier_version(dsname, SPA_VERSION_ZLE_COMPRESSION)) return (ENOTSUP); /* * If this is a bootable dataset then * verify that the compression algorithm * is supported for booting. We must return * something other than ENOTSUP since it * implies a downrev pool version. */ if (zfs_is_bootfs(dsname) && !BOOTFS_COMPRESS_VALID(intval)) { return (ERANGE); } } break; case ZFS_PROP_COPIES: if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS)) return (ENOTSUP); break; case ZFS_PROP_DEDUP: if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) return (ENOTSUP); break; case ZFS_PROP_SHARESMB: if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) return (ENOTSUP); break; case ZFS_PROP_ACLINHERIT: if (nvpair_type(pair) == DATA_TYPE_UINT64 && nvpair_value_uint64(pair, &intval) == 0) { if (intval == ZFS_ACL_PASSTHROUGH_X && zfs_earlier_version(dsname, SPA_VERSION_PASSTHROUGH_X)) return (ENOTSUP); } break; default: break; } return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); } /* * Removes properties from the given props list that fail permission checks * needed to clear them and to restore them in case of a receive error. For each * property, make sure we have both set and inherit permissions. * * Returns the first error encountered if any permission checks fail. If the * caller provides a non-NULL errlist, it also gives the complete list of names * of all the properties that failed a permission check along with the * corresponding error numbers. The caller is responsible for freeing the * returned errlist. * * If every property checks out successfully, zero is returned and the list * pointed at by errlist is NULL. */ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) { zfs_cmd_t *zc; nvpair_t *pair, *next_pair; nvlist_t *errors; int err, rv = 0; if (props == NULL) return (0); VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, dataset); pair = nvlist_next_nvpair(props, NULL); while (pair != NULL) { next_pair = nvlist_next_nvpair(props, pair); (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); } pair = next_pair; } kmem_free(zc, sizeof (zfs_cmd_t)); if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { nvlist_free(errors); errors = NULL; } else { VERIFY(nvpair_value_int32(pair, &rv) == 0); } if (errlist == NULL) nvlist_free(errors); else *errlist = errors; return (rv); } static boolean_t propval_equals(nvpair_t *p1, nvpair_t *p2) { if (nvpair_type(p1) == DATA_TYPE_NVLIST) { /* dsl_prop_get_all_impl() format */ nvlist_t *attrs; VERIFY(nvpair_value_nvlist(p1, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1) == 0); } if (nvpair_type(p2) == DATA_TYPE_NVLIST) { nvlist_t *attrs; VERIFY(nvpair_value_nvlist(p2, &attrs) == 0); VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2) == 0); } if (nvpair_type(p1) != nvpair_type(p2)) return (B_FALSE); if (nvpair_type(p1) == DATA_TYPE_STRING) { char *valstr1, *valstr2; VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0); VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0); return (strcmp(valstr1, valstr2) == 0); } else { uint64_t intval1, intval2; VERIFY(nvpair_value_uint64(p1, &intval1) == 0); VERIFY(nvpair_value_uint64(p2, &intval2) == 0); return (intval1 == intval2); } } /* * Remove properties from props if they are not going to change (as determined * by comparison with origprops). Remove them from origprops as well, since we * do not need to clear or restore properties that won't change. */ static void props_reduce(nvlist_t *props, nvlist_t *origprops) { nvpair_t *pair, *next_pair; if (origprops == NULL) return; /* all props need to be received */ pair = nvlist_next_nvpair(props, NULL); while (pair != NULL) { const char *propname = nvpair_name(pair); nvpair_t *match; next_pair = nvlist_next_nvpair(props, pair); if ((nvlist_lookup_nvpair(origprops, propname, &match) != 0) || !propval_equals(pair, match)) goto next; /* need to set received value */ /* don't clear the existing received value */ (void) nvlist_remove_nvpair(origprops, match); /* don't bother receiving the property */ (void) nvlist_remove_nvpair(props, pair); next: pair = next_pair; } } #ifdef DEBUG static boolean_t zfs_ioc_recv_inject_err; #endif /* * inputs: * zc_name name of containing filesystem * zc_nvlist_src{_size} nvlist of properties to apply * zc_value name of snapshot to create * zc_string name of clone origin (if DRR_FLAG_CLONE) * zc_cookie file descriptor to recv from * zc_begin_record the BEGIN record of the stream (not byteswapped) * zc_guid force flag * zc_cleanup_fd cleanup-on-exit file descriptor * zc_action_handle handle for this guid/ds mapping (or zero on first call) * * outputs: * zc_cookie number of bytes read * zc_nvlist_dst{_size} error for each unapplied received property * zc_obj zprop_errflags_t * zc_action_handle handle for this guid/ds mapping */ static int zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; objset_t *os; dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int fd; int error = 0; int props_error = 0; nvlist_t *errors; offset_t off; nvlist_t *props = NULL; /* sent properties */ nvlist_t *origprops = NULL; /* existing properties */ objset_t *origin = NULL; char *tosnap; char tofs[ZFS_MAXNAMELEN]; boolean_t first_recvd_props = B_FALSE; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '@') == NULL || strchr(zc->zc_value, '%')) return (EINVAL); (void) strcpy(tofs, zc->zc_value); tosnap = strchr(tofs, '@'); *tosnap++ = '\0'; if (zc->zc_nvlist_src != 0 && (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &props)) != 0) return (error); fd = zc->zc_cookie; fp = getf(fd); if (fp == NULL) { nvlist_free(props); return (EBADF); } VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) && !dsl_prop_get_hasrecvd(os)) { first_recvd_props = B_TRUE; } /* * If new received properties are supplied, they are to * completely replace the existing received properties, so stash * away the existing ones. */ if (dsl_prop_get_received(os, &origprops) == 0) { nvlist_t *errlist = NULL; /* * Don't bother writing a property if its value won't * change (and avoid the unnecessary security checks). * * The first receive after SPA_VERSION_RECVD_PROPS is a * special case where we blow away all local properties * regardless. */ if (!first_recvd_props) props_reduce(props, origprops); if (zfs_check_clearable(tofs, origprops, &errlist) != 0) (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); } dmu_objset_rele(os, FTAG); } if (zc->zc_string[0]) { error = dmu_objset_hold(zc->zc_string, FTAG, &origin); if (error) goto out; } error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds, &zc->zc_begin_record, force, origin, &drc); if (origin) dmu_objset_rele(origin, FTAG); if (error) goto out; /* * Set properties before we receive the stream so that they are applied * to the new data. Note that we must call dmu_recv_stream() if * dmu_recv_begin() succeeds. */ if (props) { nvlist_t *errlist; if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) first_recvd_props = B_TRUE; } else if (origprops != NULL) { if (clear_received_props(os, tofs, origprops, first_recvd_props ? NULL : props) != 0) zc->zc_obj |= ZPROP_ERR_NOCLEAR; } else { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } dsl_prop_set_hasrecvd(os); } else if (!drc.drc_newfs) { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, props, &errlist); (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); } if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. */ props_error = EINVAL; } off = fp->f_offset; error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd, &zc->zc_action_handle); if (error == 0) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs = NULL; if (getzfsvfs(tofs, &zfsvfs) == 0) { /* online recv */ int end_err; error = zfs_suspend_fs(zfsvfs); /* * If the suspend fails, then the recv_end will * likely also fail, and clean up after itself. */ end_err = dmu_recv_end(&drc); if (error == 0) error = zfs_resume_fs(zfsvfs, tofs); error = error ? error : end_err; VFS_RELE(zfsvfs->z_vfs); } else { error = dmu_recv_end(&drc); } +#else + error = dmu_recv_end(&drc); +#endif /* HAVE_ZPL */ } zc->zc_cookie = off - fp->f_offset; if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; #ifdef DEBUG if (zfs_ioc_recv_inject_err) { zfs_ioc_recv_inject_err = B_FALSE; error = 1; } #endif /* * On error, restore the original props. */ if (error && props) { if (dmu_objset_hold(tofs, FTAG, &os) == 0) { if (clear_received_props(os, tofs, props, NULL) != 0) { /* * We failed to clear the received properties. * Since we may have left a $recvd value on the * system, we can't clear the $hasrecvd flag. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } else if (first_recvd_props) { dsl_prop_unset_hasrecvd(os); } dmu_objset_rele(os, FTAG); } else if (!drc.drc_newfs) { /* We failed to clear the received properties. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } if (origprops == NULL && !drc.drc_newfs) { /* We failed to stash the original properties. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } /* * dsl_props_set() will not convert RECEIVED to LOCAL on or * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL * explictly if we're restoring local properties cleared in the * first new-style receive. */ if (origprops != NULL && zfs_set_prop_nvlist(tofs, (first_recvd_props ? ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED), origprops, NULL) != 0) { /* * We stashed the original properties but failed to * restore them. */ zc->zc_obj |= ZPROP_ERR_NORESTORE; } } out: nvlist_free(props); nvlist_free(origprops); nvlist_free(errors); releasef(fd); if (error == 0) error = props_error; return (error); } /* * inputs: * zc_name name of snapshot to send * zc_cookie file descriptor to send stream to * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) * zc_sendobj objsetid of snapshot to send * zc_fromobj objsetid of incremental fromsnap (may be zero) * * outputs: none */ static int zfs_ioc_send(zfs_cmd_t *zc) { objset_t *fromsnap = NULL; objset_t *tosnap; file_t *fp; int error; offset_t off; dsl_dataset_t *ds; dsl_dataset_t *dsfrom = NULL; spa_t *spa; dsl_pool_t *dp; error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); if (error) { spa_close(spa, FTAG); return (error); } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); spa_close(spa, FTAG); return (error); } if (zc->zc_fromobj != 0) { rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); } error = dmu_objset_from_ds(dsfrom, &fromsnap); if (error) { dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } } else { spa_close(spa, FTAG); } fp = getf(zc->zc_cookie); if (fp == NULL) { dsl_dataset_rele(ds, FTAG); if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); return (EBADF); } off = fp->f_offset; error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } static int zfs_ioc_inject_fault(zfs_cmd_t *zc) { int id, error; error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id, &zc->zc_inject_record); if (error == 0) zc->zc_guid = (uint64_t)id; return (error); } static int zfs_ioc_clear_fault(zfs_cmd_t *zc) { return (zio_clear_fault((int)zc->zc_guid)); } static int zfs_ioc_inject_list_next(zfs_cmd_t *zc) { int id = (int)zc->zc_guid; int error; error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name), &zc->zc_inject_record); zc->zc_guid = id; return (error); } static int zfs_ioc_error_log(zfs_cmd_t *zc) { spa_t *spa; int error; size_t count = (size_t)zc->zc_nvlist_dst_size; if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst, &count); if (error == 0) zc->zc_nvlist_dst_size = count; else zc->zc_nvlist_dst_size = spa_get_errlog_size(spa); spa_close(spa, FTAG); return (error); } static int zfs_ioc_clear(zfs_cmd_t *zc) { spa_t *spa; vdev_t *vd; int error; /* * On zpool clear we also fix up missing slogs */ mutex_enter(&spa_namespace_lock); spa = spa_lookup(zc->zc_name); if (spa == NULL) { mutex_exit(&spa_namespace_lock); return (EIO); } if (spa_get_log_state(spa) == SPA_LOG_MISSING) { /* we need to let spa_open/spa_load clear the chains */ spa_set_log_state(spa, SPA_LOG_CLEAR); } spa->spa_last_open_failed = 0; mutex_exit(&spa_namespace_lock); if (zc->zc_cookie & ZPOOL_NO_REWIND) { error = spa_open(zc->zc_name, &spa, FTAG); } else { nvlist_t *policy; nvlist_t *config = NULL; if (zc->zc_nvlist_src == 0) return (EINVAL); if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) { error = spa_open_rewind(zc->zc_name, &spa, FTAG, policy, &config); if (config != NULL) { int err; if ((err = put_nvlist(zc, config)) != 0) error = err; nvlist_free(config); } nvlist_free(policy); } } if (error) return (error); spa_vdev_state_enter(spa, SCL_NONE); if (zc->zc_guid == 0) { vd = NULL; } else { vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE); if (vd == NULL) { (void) spa_vdev_state_exit(spa, NULL, ENODEV); spa_close(spa, FTAG); return (ENODEV); } } vdev_clear(spa, vd); (void) spa_vdev_state_exit(spa, NULL, 0); /* * Resume any suspended I/Os. */ if (zio_resume(spa) != 0) error = EIO; spa_close(spa, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_value name of origin snapshot * * outputs: * zc_string name of conflicting snapshot, if there is one */ static int zfs_ioc_promote(zfs_cmd_t *zc) { char *cp; /* * We don't need to unmount *all* the origin fs's snapshots, but * it's easier. */ cp = strchr(zc->zc_value, '@'); if (cp) *cp = '\0'; (void) dmu_objset_find(zc->zc_value, zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); return (dsl_dataset_promote(zc->zc_name, zc->zc_string)); } /* * Retrieve a single {user|group}{used|quota}@... property. * * inputs: * zc_name name of filesystem * zc_objset_type zfs_userquota_prop_t * zc_value domain name (eg. "S-1-234-567-89") * zc_guid RID/UID/GID * * outputs: * zc_cookie property value */ static int zfs_ioc_userspace_one(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs; int error; if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); if (error) return (error); error = zfs_userspace_one(zfsvfs, zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie); zfsvfs_rele(zfsvfs, FTAG); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* * inputs: * zc_name name of filesystem * zc_cookie zap cursor * zc_objset_type zfs_userquota_prop_t * zc_nvlist_dst[_size] buffer to fill (not really an nvlist) * * outputs: * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t) * zc_cookie zap cursor */ static int zfs_ioc_userspace_many(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs; int bufsize = zc->zc_nvlist_dst_size; if (bufsize <= 0) return (ENOMEM); int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); if (error) return (error); void *buf = kmem_alloc(bufsize, KM_SLEEP); error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie, buf, &zc->zc_nvlist_dst_size); if (error == 0) { error = xcopyout(buf, (void *)(uintptr_t)zc->zc_nvlist_dst, zc->zc_nvlist_dst_size); } kmem_free(buf, bufsize); zfsvfs_rele(zfsvfs, FTAG); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* * inputs: * zc_name name of filesystem * * outputs: * none */ static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL objset_t *os; int error = 0; zfsvfs_t *zfsvfs; if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { if (!dmu_objset_userused_enabled(zfsvfs->z_os)) { /* * If userused is not enabled, it may be because the * objset needs to be closed & reopened (to grow the * objset_phys_t). Suspend/resume the fs will do that. */ error = zfs_suspend_fs(zfsvfs); if (error == 0) error = zfs_resume_fs(zfsvfs, zc->zc_name); } if (error == 0) error = dmu_objset_userspace_upgrade(zfsvfs->z_os); VFS_RELE(zfsvfs->z_vfs); } else { /* XXX kind of reading contents without owning */ error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error); error = dmu_objset_userspace_upgrade(os); dmu_objset_rele(os, FTAG); } return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* * We don't want to have a hard dependency * against some special symbols in sharefs * nfs, and smbsrv. Determine them if needed when * the first file system is shared. * Neither sharefs, nfs or smbsrv are unloadable modules. */ +#ifdef HAVE_ZPL int (*znfsexport_fs)(void *arg); int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t); int (*zsmbexport_fs)(void *arg, boolean_t add_share); int zfs_nfsshare_inited; int zfs_smbshare_inited; ddi_modhandle_t nfs_mod; ddi_modhandle_t sharefs_mod; ddi_modhandle_t smbsrv_mod; kmutex_t zfs_share_lock; static int zfs_init_sharefs() { int error; ASSERT(MUTEX_HELD(&zfs_share_lock)); /* Both NFS and SMB shares also require sharetab support. */ if (sharefs_mod == NULL && ((sharefs_mod = ddi_modopen("fs/sharefs", KRTLD_MODE_FIRST, &error)) == NULL)) { return (ENOSYS); } if (zshare_fs == NULL && ((zshare_fs = (int (*)(enum sharefs_sys_op, share_t *, uint32_t)) ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) { return (ENOSYS); } return (0); } +#endif /* HAVE_ZPL */ static int zfs_ioc_share(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL int error; int opcode; switch (zc->zc_share.z_sharetype) { case ZFS_SHARE_NFS: case ZFS_UNSHARE_NFS: if (zfs_nfsshare_inited == 0) { mutex_enter(&zfs_share_lock); if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs", KRTLD_MODE_FIRST, &error)) == NULL)) { mutex_exit(&zfs_share_lock); return (ENOSYS); } if (znfsexport_fs == NULL && ((znfsexport_fs = (int (*)(void *)) ddi_modsym(nfs_mod, "nfs_export", &error)) == NULL)) { mutex_exit(&zfs_share_lock); return (ENOSYS); } error = zfs_init_sharefs(); if (error) { mutex_exit(&zfs_share_lock); return (ENOSYS); } zfs_nfsshare_inited = 1; mutex_exit(&zfs_share_lock); } break; case ZFS_SHARE_SMB: case ZFS_UNSHARE_SMB: if (zfs_smbshare_inited == 0) { mutex_enter(&zfs_share_lock); if (smbsrv_mod == NULL && ((smbsrv_mod = ddi_modopen("drv/smbsrv", KRTLD_MODE_FIRST, &error)) == NULL)) { mutex_exit(&zfs_share_lock); return (ENOSYS); } if (zsmbexport_fs == NULL && ((zsmbexport_fs = (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod, "smb_server_share", &error)) == NULL)) { mutex_exit(&zfs_share_lock); return (ENOSYS); } error = zfs_init_sharefs(); if (error) { mutex_exit(&zfs_share_lock); return (ENOSYS); } zfs_smbshare_inited = 1; mutex_exit(&zfs_share_lock); } break; default: return (EINVAL); } switch (zc->zc_share.z_sharetype) { case ZFS_SHARE_NFS: case ZFS_UNSHARE_NFS: if (error = znfsexport_fs((void *) (uintptr_t)zc->zc_share.z_exportdata)) return (error); break; case ZFS_SHARE_SMB: case ZFS_UNSHARE_SMB: if (error = zsmbexport_fs((void *) (uintptr_t)zc->zc_share.z_exportdata, zc->zc_share.z_sharetype == ZFS_SHARE_SMB ? B_TRUE: B_FALSE)) { return (error); } break; } opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS || zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ? SHAREFS_ADD : SHAREFS_REMOVE; /* * Add or remove share from sharetab */ error = zshare_fs(opcode, (void *)(uintptr_t)zc->zc_share.z_sharedata, zc->zc_share.z_sharemax); return (error); - +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } ace_t full_access[] = { {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0} }; /* * inputs: * zc_name name of containing filesystem * zc_obj object # beyond which we want next in-use object # * * outputs: * zc_obj next in-use object # */ static int zfs_ioc_next_obj(zfs_cmd_t *zc) { objset_t *os = NULL; int error; error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error); error = dmu_object_next(os, &zc->zc_obj, B_FALSE, os->os_dsl_dataset->ds_phys->ds_prev_snap_txg); dmu_objset_rele(os, FTAG); return (error); } /* * inputs: * zc_name name of filesystem * zc_value prefix name for snapshot * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) { char *snap_name; int error; snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); if (strlen(snap_name) >= MAXNAMELEN) { strfree(snap_name); return (E2BIG); } error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } (void) strcpy(zc->zc_value, snap_name); strfree(snap_name); return (0); } /* * inputs: * zc_name name of "to" snapshot * zc_value name of "from" snapshot * zc_cookie file descriptor to write diff data on * * outputs: * dmu_diff_record_t's to the file descriptor */ static int zfs_ioc_diff(zfs_cmd_t *zc) { objset_t *fromsnap; objset_t *tosnap; file_t *fp; offset_t off; int error; error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); if (error) return (error); error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap); if (error) { dmu_objset_rele(tosnap, FTAG); return (error); } fp = getf(zc->zc_cookie); if (fp == NULL) { dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); return (EBADF); } off = fp->f_offset; error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); return (error); } /* * Remove all ACL files in shares dir */ +#ifdef HAVE_ZPL static int zfs_smb_acl_purge(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred, NULL, 0)) != 0) break; } zap_cursor_fini(&zc); return (error); } +#endif /* HAVE ZPL */ static int zfs_ioc_smb_acl(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL vnode_t *vp; znode_t *dzp; vnode_t *resourcevp = NULL; znode_t *sharedir; zfsvfs_t *zfsvfs; nvlist_t *nvlist; char *src, *target; vattr_t vattr; vsecattr_t vsec; int error = 0; if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp)) != 0) return (error); /* Now make sure mntpnt and dataset are ZFS */ if (vp->v_vfsp->vfs_fstype != zfsfstype || (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); return (EINVAL); } dzp = VTOZ(vp); zfsvfs = dzp->z_zfsvfs; ZFS_ENTER(zfsvfs); /* * Create share dir if its missing. */ mutex_enter(&zfsvfs->z_lock); if (zfsvfs->z_shares_dir == 0) { dmu_tx_t *tx; tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE, ZFS_SHARES_DIR); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { error = zfs_create_share_dir(zfsvfs, tx); dmu_tx_commit(tx); } if (error) { mutex_exit(&zfsvfs->z_lock); VN_RELE(vp); ZFS_EXIT(zfsvfs); return (error); } } mutex_exit(&zfsvfs->z_lock); ASSERT(zfsvfs->z_shares_dir); if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) { VN_RELE(vp); ZFS_EXIT(zfsvfs); return (error); } switch (zc->zc_cookie) { case ZFS_SMB_ACL_ADD: vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VREG; vattr.va_mode = S_IFREG|0777; vattr.va_uid = 0; vattr.va_gid = 0; vsec.vsa_mask = VSA_ACE; vsec.vsa_aclentp = &full_access; vsec.vsa_aclentsz = sizeof (full_access); vsec.vsa_aclcnt = 1; error = VOP_CREATE(ZTOV(sharedir), zc->zc_string, &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec); if (resourcevp) VN_RELE(resourcevp); break; case ZFS_SMB_ACL_REMOVE: error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred, NULL, 0); break; case ZFS_SMB_ACL_RENAME: if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) { VN_RELE(vp); ZFS_EXIT(zfsvfs); return (error); } if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) || nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET, &target)) { VN_RELE(vp); VN_RELE(ZTOV(sharedir)); ZFS_EXIT(zfsvfs); nvlist_free(nvlist); return (error); } error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target, kcred, NULL, 0); nvlist_free(nvlist); break; case ZFS_SMB_ACL_PURGE: error = zfs_smb_acl_purge(sharedir); break; default: error = EINVAL; break; } VN_RELE(vp); VN_RELE(ZTOV(sharedir)); ZFS_EXIT(zfsvfs); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* * inputs: * zc_name name of filesystem * zc_value short name of snap * zc_string user-supplied tag for this hold * zc_cookie recursive flag * zc_temphold set if hold is temporary * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * zc_sendobj if non-zero, the objid for zc_name@zc_value * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg * * outputs: none */ static int zfs_ioc_hold(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie; spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; int error; minor_t minor = 0; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); if (zc->zc_sendobj == 0) { return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd)); } if (recursive) return (EINVAL); error = spa_open(zc->zc_name, &spa, FTAG); if (error) return (error); dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); spa_close(spa, FTAG); if (error) return (error); /* * Until we have a hold on this snapshot, it's possible that * zc_sendobj could've been destroyed and reused as part * of a later txg. Make sure we're looking at the right object. */ if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { dsl_dataset_rele(ds, FTAG); return (ENOENT); } if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); if (error) { dsl_dataset_rele(ds, FTAG); return (error); } } error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, zc->zc_temphold); if (minor != 0) { if (error == 0) { dsl_register_onexit_hold_cleanup(ds, zc->zc_string, minor); } zfs_onexit_fd_rele(zc->zc_cleanup_fd); } dsl_dataset_rele(ds, FTAG); return (error); } /* * inputs: * zc_name name of dataset from which we're releasing a user hold * zc_value short name of snap * zc_string user-supplied tag for this hold * zc_cookie recursive flag * * outputs: none */ static int zfs_ioc_release(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie; if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); return (dsl_dataset_user_release(zc->zc_name, zc->zc_value, zc->zc_string, recursive)); } /* * inputs: * zc_name name of filesystem * * outputs: * zc_nvlist_src{_size} nvlist of snapshot holds */ static int zfs_ioc_get_holds(zfs_cmd_t *zc) { nvlist_t *nvp; int error; if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) { error = put_nvlist(zc, nvp); nvlist_free(nvp); } return (error); } /* * inputs: * zc_guid flags (ZEVENT_NONBLOCK) * * outputs: * zc_nvlist_dst next nvlist event * zc_cookie dropped events since last get * zc_cleanup_fd cleanup-on-exit file descriptor */ static int zfs_ioc_events_next(zfs_cmd_t *zc) { zfs_zevent_t *ze; nvlist_t *event = NULL; minor_t minor; uint64_t dropped = 0; int error; error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze); if (error != 0) return (error); do { error = zfs_zevent_next(ze, &event, &dropped); if (event != NULL) { zc->zc_cookie = dropped; error = put_nvlist(zc, event); nvlist_free(event); } if (zc->zc_guid & ZEVENT_NONBLOCK) break; if ((error == 0) || (error != ENOENT)) break; error = zfs_zevent_wait(ze); if (error) break; } while (1); zfs_zevent_fd_rele(zc->zc_cleanup_fd); return (error); } /* * outputs: * zc_cookie cleared events count */ static int zfs_ioc_events_clear(zfs_cmd_t *zc) { int count; zfs_zevent_drain_all(&count); zc->zc_cookie = count; return 0; } /* * pool create, destroy, and export don't log the history as part of * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export * do the logging of those commands. */ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_NONE }, { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY }, { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, POOL_CHECK_NONE }, { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_NONE }, { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE }, { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE }, }; int pool_status_check(const char *name, zfs_ioc_namecheck_t type, zfs_ioc_poolcheck_t check) { spa_t *spa; int error; ASSERT(type == POOL_NAME || type == DATASET_NAME); if (check & POOL_CHECK_NONE) return (0); error = spa_open(name, &spa, FTAG); if (error == 0) { if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa)) error = EAGAIN; else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa)) error = EROFS; spa_close(spa, FTAG); } return (error); } +static void * +zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which) +{ + zfsdev_state_t *zs; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + for (zs = list_head(&zfsdev_state_list); zs != NULL; + zs = list_next(&zfsdev_state_list, zs)) { + if (zs->zs_minor == minor) { + switch (which) { + case ZST_ONEXIT: return (zs->zs_onexit); + case ZST_ZEVENT: return (zs->zs_zevent); + case ZST_ALL: return (zs); + } + } + } + + return NULL; +} + +void * +zfsdev_get_state(minor_t minor, enum zfsdev_state_type which) +{ + void *ptr; + + mutex_enter(&zfsdev_state_lock); + ptr = zfsdev_get_state_impl(minor, which); + mutex_exit(&zfsdev_state_lock); + + return ptr; +} + +minor_t +zfsdev_getminor(struct file *filp) +{ + ASSERT(filp != NULL); + ASSERT(filp->private_data != NULL); + + return (((zfsdev_state_t *)filp->private_data)->zs_minor); +} + /* - * Find a free minor number. + * Find a free minor number. The zfsdev_state_list is expected to + * be short since it is only a list of currently open file handles. */ minor_t zfsdev_minor_alloc(void) { - static minor_t last_minor; + static minor_t last_minor = 0; minor_t m; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); for (m = last_minor + 1; m != last_minor; m++) { if (m > ZFSDEV_MAX_MINOR) m = 1; - if (ddi_get_soft_state(zfsdev_state, m) == NULL) { + if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) { last_minor = m; return (m); } } return (0); } static int -zfs_ctldev_init(dev_t *devp) +zfsdev_state_init(struct file *filp) { + zfsdev_state_t *zs; minor_t minor; - zfs_soft_state_t *zs; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); - ASSERT(getminor(*devp) == 0); - minor = zfsdev_minor_alloc(); - if (minor == 0) - return (ENXIO); + minor = zfsdev_minor_alloc(); + if (minor == 0) + return (ENXIO); + + zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP); + if (zs == NULL) + return (ENOMEM); - if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) - return (EAGAIN); + zs->zs_file = filp; + zs->zs_minor = minor; + filp->private_data = zs; - *devp = makedevice(getemajor(*devp), minor); + zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit); + zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent); - zs = ddi_get_soft_state(zfsdev_state, minor); - zs->zss_type = ZSST_CTLDEV; - zfs_onexit_init((zfs_onexit_t **)&zs->zss_data); + list_insert_tail(&zfsdev_state_list, zs); return (0); } -static void -zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor) +static int +zfsdev_state_destroy(struct file *filp) { - ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + zfsdev_state_t *zs; - zfs_onexit_destroy(zo); - ddi_soft_state_free(zfsdev_state, minor); -} + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + ASSERT(filp->private_data != NULL); -void * -zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which) -{ - zfs_soft_state_t *zp; + zs = filp->private_data; + zfs_onexit_destroy(zs->zs_onexit); + zfs_zevent_destroy(zs->zs_zevent); - zp = ddi_get_soft_state(zfsdev_state, minor); - if (zp == NULL || zp->zss_type != which) - return (NULL); + list_remove(&zfsdev_state_list, zs); + kmem_free(zs, sizeof(zfsdev_state_t)); - return (zp->zss_data); + return 0; } static int -zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr) +zfsdev_open(struct inode *ino, struct file *filp) { - int error = 0; - - if (getminor(*devp) != 0) - return (zvol_open(devp, flag, otyp, cr)); + int error; - /* This is the control device. Allocate a new minor if requested. */ - if (flag & FEXCL) { - mutex_enter(&zfsdev_state_lock); - error = zfs_ctldev_init(devp); - mutex_exit(&zfsdev_state_lock); - } + mutex_enter(&zfsdev_state_lock); + error = zfsdev_state_init(filp); + mutex_exit(&zfsdev_state_lock); - return (error); + return (-error); } static int -zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr) +zfsdev_release(struct inode *ino, struct file *filp) { - zfs_onexit_t *zo; - minor_t minor = getminor(dev); - - if (minor == 0) - return (0); + int error; mutex_enter(&zfsdev_state_lock); - zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); - if (zo == NULL) { - mutex_exit(&zfsdev_state_lock); - return (zvol_close(dev, flag, otyp, cr)); - } - zfs_ctldev_destroy(zo, minor); + error = zfsdev_state_destroy(filp); mutex_exit(&zfsdev_state_lock); - return (0); + return (-error); } -static int -zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) +static long +zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; uint_t vec; - int error, rc; - minor_t minor = getminor(dev); - - if (minor != 0 && - zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL) - return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); + int error, rc, flag = 0; vec = cmd - ZFS_IOC; - ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) - return (EINVAL); + return (-EINVAL); zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); if (error != 0) error = EFAULT; if ((error == 0) && !(flag & FKIOCTL)) - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); + error = zfs_ioc_vec[vec].zvec_secpolicy(zc, NULL); /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ if (error == 0) { zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; zc->zc_iflags = flag & FKIOCTL; switch (zfs_ioc_vec[vec].zvec_namecheck) { case POOL_NAME: if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) error = EINVAL; error = pool_status_check(zc->zc_name, zfs_ioc_vec[vec].zvec_namecheck, zfs_ioc_vec[vec].zvec_pool_check); break; case DATASET_NAME: if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) error = EINVAL; error = pool_status_check(zc->zc_name, zfs_ioc_vec[vec].zvec_namecheck, zfs_ioc_vec[vec].zvec_pool_check); break; case NO_NAME: break; } } if (error == 0) error = zfs_ioc_vec[vec].zvec_func(zc); rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); if (error == 0) { if (rc != 0) error = EFAULT; if (zfs_ioc_vec[vec].zvec_his_log) zfs_log_history(zc); } kmem_free(zc, sizeof (zfs_cmd_t)); - return (error); + return (-error); } -static int -zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +#ifdef CONFIG_COMPAT +static long +zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0, - DDI_PSEUDO, 0) == DDI_FAILURE) - return (DDI_FAILURE); - - zfs_dip = dip; + return zfsdev_ioctl(filp, cmd, arg); +} +#else +#define zfs_compat_ioctl NULL +#endif - ddi_report_dev(dip); +static const struct file_operations zfsdev_fops = { + .open = zfsdev_open, + .release = zfsdev_release, + .unlocked_ioctl = zfsdev_ioctl, + .compat_ioctl = zfsdev_compat_ioctl, + .owner = THIS_MODULE, +}; - return (DDI_SUCCESS); -} +static struct miscdevice zfs_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = ZFS_DRIVER, + .fops = &zfsdev_fops, +}; static int -zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +zfs_attach(void) { - if (spa_busy() || zfs_busy() || zvol_busy()) - return (DDI_FAILURE); - - if (cmd != DDI_DETACH) - return (DDI_FAILURE); + int error; - zfs_dip = NULL; + mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zfsdev_state_list, sizeof (zfsdev_state_t), + offsetof(zfsdev_state_t, zs_next)); - ddi_prop_remove_all(dip); - ddi_remove_minor_node(dip, NULL); + error = misc_register(&zfs_misc); + if (error) { + printk(KERN_INFO "ZFS: misc_register() failed %d\n", error); + return (error); + } - return (DDI_SUCCESS); + return (0); } -/*ARGSUSED*/ -static int -zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +static void +zfs_detach(void) { - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = zfs_dip; - return (DDI_SUCCESS); + int error; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - return (DDI_SUCCESS); - } + error = misc_deregister(&zfs_misc); + if (error) + printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error); - return (DDI_FAILURE); + mutex_destroy(&zfsdev_state_lock); + list_destroy(&zfsdev_state_list); } -/* - * OK, so this is a little weird. - * - * /dev/zfs is the control node, i.e. minor 0. - * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0. - * - * /dev/zfs has basically nothing to do except serve up ioctls, - * so most of the standard driver entry points are in zvol.c. - */ -static struct cb_ops zfs_cb_ops = { - zfsdev_open, /* open */ - zfsdev_close, /* close */ - zvol_strategy, /* strategy */ - nodev, /* print */ - zvol_dump, /* dump */ - zvol_read, /* read */ - zvol_write, /* write */ - zfsdev_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* prop_op */ - NULL, /* streamtab */ - D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */ - CB_REV, /* version */ - nodev, /* async read */ - nodev, /* async write */ -}; - -static struct dev_ops zfs_dev_ops = { - DEVO_REV, /* version */ - 0, /* refcnt */ - zfs_info, /* info */ - nulldev, /* identify */ - nulldev, /* probe */ - zfs_attach, /* attach */ - zfs_detach, /* detach */ - nodev, /* reset */ - &zfs_cb_ops, /* driver operations */ - NULL, /* no bus operations */ - NULL, /* power */ - ddi_quiesce_not_needed, /* quiesce */ -}; - -static struct modldrv zfs_modldrv = { - &mod_driverops, - "ZFS storage pool", - &zfs_dev_ops -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&zfs_modlfs, - (void *)&zfs_modldrv, - NULL -}; - - +#ifdef HAVE_ZPL uint_t zfs_fsyncer_key; extern uint_t rrw_tsd_key; +#endif + +#ifdef DEBUG +#define ZFS_DEBUG_STR " (DEBUG mode)" +#else +#define ZFS_DEBUG_STR "" +#endif int _init(void) { int error; spa_init(FREAD | FWRITE); zfs_init(); - zvol_init(); - if ((error = mod_install(&modlinkage)) != 0) { - zvol_fini(); - zfs_fini(); - spa_fini(); - return (error); - } + if ((error = zvol_init()) != 0) + goto out1; + + if ((error = zfs_attach()) != 0) + goto out2; +#ifdef HAVE_ZPL tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, NULL); - error = ldi_ident_from_mod(&modlinkage, &zfs_li); - ASSERT(error == 0); mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL); +#endif /* HAVE_ZPL */ + + printk(KERN_NOTICE "ZFS: Loaded ZFS Filesystem v%s%s\n", + ZFS_META_VERSION, ZFS_DEBUG_STR); return (0); + +out2: + (void) zvol_fini(); +out1: + zfs_fini(); + spa_fini(); + printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s%s" + ", rc = %d\n", ZFS_META_VERSION, ZFS_DEBUG_STR, error); + + return (error); } int _fini(void) { - int error; - - if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled) - return (EBUSY); - - if ((error = mod_remove(&modlinkage)) != 0) - return (error); - + zfs_detach(); zvol_fini(); zfs_fini(); spa_fini(); +#ifdef HAVE_ZPL if (zfs_nfsshare_inited) (void) ddi_modclose(nfs_mod); if (zfs_smbshare_inited) (void) ddi_modclose(smbsrv_mod); if (zfs_nfsshare_inited || zfs_smbshare_inited) (void) ddi_modclose(sharefs_mod); - tsd_destroy(&zfs_fsyncer_key); - ldi_ident_release(zfs_li); - zfs_li = NULL; mutex_destroy(&zfs_share_lock); + tsd_destroy(&zfs_fsyncer_key); +#endif /* HAVE_ZPL */ - return (error); -} + printk(KERN_NOTICE "ZFS: Unloaded ZFS Filesystem v%s%s\n", + ZFS_META_VERSION, ZFS_DEBUG_STR); -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); + return (0); } + +#ifdef HAVE_SPL +spl_module_init(_init); +spl_module_exit(_fini); + +MODULE_DESCRIPTION("ZFS"); +MODULE_AUTHOR(ZFS_META_AUTHOR); +MODULE_LICENSE(ZFS_META_LICENSE); +#endif /* HAVE_SPL */ diff --git a/module/zfs/zfs_onexit.c b/module/zfs/zfs_onexit.c index 9706de2b42c2..2f60b5e4de15 100644 --- a/module/zfs/zfs_onexit.c +++ b/module/zfs/zfs_onexit.c @@ -1,246 +1,247 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include /* * ZFS kernel routines may add/delete callback routines to be invoked * upon process exit (triggered via the close operation from the /dev/zfs * driver). * * These cleanup callbacks are intended to allow for the accumulation * of kernel state across multiple ioctls. User processes participate - * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a - * clone-open, generating a unique minor number. The process then passes - * along that file descriptor to each ioctl that might have a cleanup operation. + * simply by opening ZFS_DEV. This causes the ZFS driver to do create + * some private data for the file descriptor and generating a unique + * minor number. The process then passes along that file descriptor to + * each ioctl that might have a cleanup operation. * * Consumers of the onexit routines should call zfs_onexit_fd_hold() early * on to validate the given fd and add a reference to its file table entry. * This allows the consumer to do its work and then add a callback, knowing * that zfs_onexit_add_cb() won't fail with EBADF. When finished, consumers * should call zfs_onexit_fd_rele(). * * A simple example is zfs_ioc_recv(), where we might create an AVL tree * with dataset/GUID mappings and then reuse that tree on subsequent * zfs_ioc_recv() calls. * * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc() * the AVL tree and pass it along with a callback function to * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the * callback and return an action handle. * * The action handle is then passed from user space to subsequent * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree * by calling zfs_onexit_cb_data() with the device minor number and * action handle. * * If the user process exits abnormally, the callback is invoked implicitly * as part of the driver close operation. Once the user space process is * finished with the accumulated kernel state, it can also just call close(2) * on the cleanup fd to trigger the cleanup callback. */ void zfs_onexit_init(zfs_onexit_t **zop) { zfs_onexit_t *zo; zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP); mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t), offsetof(zfs_onexit_action_node_t, za_link)); } void zfs_onexit_destroy(zfs_onexit_t *zo) { zfs_onexit_action_node_t *ap; mutex_enter(&zo->zo_lock); while ((ap = list_head(&zo->zo_actions)) != NULL) { list_remove(&zo->zo_actions, ap); mutex_exit(&zo->zo_lock); ap->za_func(ap->za_data); kmem_free(ap, sizeof (zfs_onexit_action_node_t)); mutex_enter(&zo->zo_lock); } mutex_exit(&zo->zo_lock); list_destroy(&zo->zo_actions); mutex_destroy(&zo->zo_lock); kmem_free(zo, sizeof (zfs_onexit_t)); } static int zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) { - *zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); + *zo = zfsdev_get_state(minor, ZST_ONEXIT); if (*zo == NULL) return (EBADF); return (0); } /* * Consumers might need to operate by minor number instead of fd, since * they might be running in another thread (e.g. txg_sync_thread). Callers * of this function must call zfs_onexit_fd_rele() when they're finished * using the minor number. */ int zfs_onexit_fd_hold(int fd, minor_t *minorp) { file_t *fp; zfs_onexit_t *zo; fp = getf(fd); if (fp == NULL) return (EBADF); - *minorp = getminor(fp->f_vnode->v_rdev); + *minorp = zfsdev_getminor(fp->f_file); return (zfs_onexit_minor_to_state(*minorp, &zo)); } void zfs_onexit_fd_rele(int fd) { releasef(fd); } /* * Add a callback to be invoked when the calling process exits. */ int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, uint64_t *action_handle) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; int error; error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP); list_link_init(&ap->za_link); ap->za_func = func; ap->za_data = data; mutex_enter(&zo->zo_lock); list_insert_tail(&zo->zo_actions, ap); mutex_exit(&zo->zo_lock); if (action_handle) *action_handle = (uint64_t)(uintptr_t)ap; return (0); } static zfs_onexit_action_node_t * zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle) { zfs_onexit_action_node_t *match; zfs_onexit_action_node_t *ap; list_t *l; ASSERT(MUTEX_HELD(&zo->zo_lock)); match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle; l = &zo->zo_actions; for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) { if (match == ap) break; } return (ap); } /* * Delete the callback, triggering it first if 'fire' is set. */ int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; int error; error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); mutex_enter(&zo->zo_lock); ap = zfs_onexit_find_cb(zo, action_handle); if (ap != NULL) { list_remove(&zo->zo_actions, ap); mutex_exit(&zo->zo_lock); if (fire) ap->za_func(ap->za_data); kmem_free(ap, sizeof (zfs_onexit_action_node_t)); } else { mutex_exit(&zo->zo_lock); error = ENOENT; } return (error); } /* * Return the data associated with this callback. This allows consumers * of the cleanup-on-exit interfaces to stash kernel data across system * calls, knowing that it will be cleaned up if the calling process exits. */ int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; int error; *data = NULL; error = zfs_onexit_minor_to_state(minor, &zo); if (error) return (error); mutex_enter(&zo->zo_lock); ap = zfs_onexit_find_cb(zo, action_handle); if (ap != NULL) *data = ap->za_data; else error = ENOENT; mutex_exit(&zo->zo_lock); return (error); } diff --git a/scripts/common.sh.in b/scripts/common.sh similarity index 81% copy from scripts/common.sh.in copy to scripts/common.sh index 00418696c8e4..66097570d76e 100644 --- a/scripts/common.sh.in +++ b/scripts/common.sh @@ -1,373 +1,443 @@ #!/bin/bash # # Common support functions for testing scripts. If a .script-config # files is available it will be sourced so in-tree kernel modules and # utilities will be used. If no .script-config can be found then the # installed kernel modules and utilities will be used. basedir="$(dirname $0)" SCRIPT_CONFIG=.script-config if [ -f "${basedir}/../${SCRIPT_CONFIG}" ]; then . "${basedir}/../${SCRIPT_CONFIG}" else MODULES=(zlib_deflate spl splat zavl znvpair zunicode zcommon zfs) fi PROG="" CLEANUP= VERBOSE= VERBOSE_FLAG= FORCE= FORCE_FLAG= DUMP_LOG= ERROR= RAID0S=() RAID10S=() RAIDZS=() RAIDZ2S=() +TESTS_RUN=${TESTS_RUN:-'*'} +TESTS_SKIP=${TESTS_SKIP:-} -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libexecdir=@libexecdir@ -pkglibexecdir=${libexecdir}/@PACKAGE@ -bindir=@bindir@ -sbindir=@sbindir@ +prefix=/usr/local +exec_prefix=${prefix} +libexecdir=${exec_prefix}/libexec +pkglibexecdir=${libexecdir}/zfs +bindir=${exec_prefix}/bin +sbindir=${exec_prefix}/sbin ETCDIR=${ETCDIR:-/etc} DEVDIR=${DEVDIR:-/dev/disk/zpool} ZPOOLDIR=${ZPOOLDIR:-${pkglibexecdir}/zpool-config} +ZPIOSDIR=${ZPIOSDIR:-${pkglibexecdir}/zpios-test} +ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-${pkglibexecdir}/zpios-profile} ZDB=${ZDB:-${sbindir}/zdb} ZFS=${ZFS:-${sbindir}/zfs} ZINJECT=${ZINJECT:-${sbindir}/zinject} ZPOOL=${ZPOOL:-${sbindir}/zpool} ZPOOL_ID=${ZPOOL_ID:-${bindir}/zpool_id} ZTEST=${ZTEST:-${sbindir}/ztest} +ZPIOS=${ZPIOS:-${sbindir}/zpios} COMMON_SH=${COMMON_SH:-${pkglibexecdir}/common.sh} ZFS_SH=${ZFS_SH:-${pkglibexecdir}/zfs.sh} ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-${pkglibexecdir}/zpool-create.sh} +ZPIOS_SH=${ZPIOS_SH:-${pkglibexecdir}/zpios.sh} +ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-${pkglibexecdir}/zpios-survey.sh} LDMOD=${LDMOD:-/sbin/modprobe} LSMOD=${LSMOD:-/sbin/lsmod} RMMOD=${RMMOD:-/sbin/rmmod} INFOMOD=${INFOMOD:-/sbin/modinfo} LOSETUP=${LOSETUP:-/sbin/losetup} SYSCTL=${SYSCTL:-/sbin/sysctl} UDEVADM=${UDEVADM:-/sbin/udevadm} AWK=${AWK:-/usr/bin/awk} +COLOR_BLACK="\033[0;30m" +COLOR_DK_GRAY="\033[1;30m" +COLOR_BLUE="\033[0;34m" +COLOR_LT_BLUE="\033[1;34m" +COLOR_GREEN="\033[0;32m" +COLOR_LT_GREEN="\033[1;32m" +COLOR_CYAN="\033[0;36m" +COLOR_LT_CYAN="\033[1;36m" +COLOR_RED="\033[0;31m" +COLOR_LT_RED="\033[1;31m" +COLOR_PURPLE="\033[0;35m" +COLOR_LT_PURPLE="\033[1;35m" +COLOR_BROWN="\033[0;33m" +COLOR_YELLOW="\033[1;33m" +COLOR_LT_GRAY="\033[0;37m" +COLOR_WHITE="\033[1;37m" +COLOR_RESET="\033[0m" + die() { echo -e "${PROG}: $1" >&2 exit 1 } msg() { if [ ${VERBOSE} ]; then echo "$@" fi } pass() { - echo "PASS" + echo -e "${COLOR_GREEN}Pass${COLOR_RESET}" } fail() { - echo "FAIL ($1)" + echo -e "${COLOR_RED}Fail${COLOR_RESET} ($1)" exit $1 } +skip() { + echo -e "${COLOR_BROWN}Skip${COLOR_RESET}" +} + spl_dump_log() { ${SYSCTL} -w kernel.spl.debug.dump=1 &>/dev/null local NAME=`dmesg | tail -n 1 | cut -f5 -d' '` ${SPLBUILD}/cmd/spl ${NAME} >${NAME}.log echo echo "Dumped debug log: ${NAME}.log" tail -n1 ${NAME}.log echo return 0 } check_modules() { local LOADED_MODULES=() local MISSING_MODULES=() for MOD in ${MODULES[*]}; do local NAME=`basename $MOD .ko` if ${LSMOD} | egrep -q "^${NAME}"; then LOADED_MODULES=(${NAME} ${LOADED_MODULES[*]}) fi if [ ${INFOMOD} ${MOD} 2>/dev/null ]; then MISSING_MODULES=("\t${MOD}\n" ${MISSING_MODULES[*]}) fi done if [ ${#LOADED_MODULES[*]} -gt 0 ]; then ERROR="Unload these modules with '${PROG} -u':\n" ERROR="${ERROR}${LOADED_MODULES[*]}" return 1 fi if [ ${#MISSING_MODULES[*]} -gt 0 ]; then ERROR="The following modules can not be found," ERROR="${ERROR} ensure your source trees are built:\n" ERROR="${ERROR}${MISSING_MODULES[*]}" return 1 fi return 0 } load_module() { local NAME=`basename $1 .ko` if [ ${VERBOSE} ]; then echo "Loading ${NAME} ($@)" fi ${LDMOD} $* || ERROR="Failed to load $1" return 1 return 0 } load_modules() { mkdir -p /etc/zfs for MOD in ${MODULES[*]}; do local NAME=`basename ${MOD} .ko` local VALUE= for OPT in "$@"; do OPT_NAME=`echo ${OPT} | cut -f1 -d'='` if [ ${NAME} = "${OPT_NAME}" ]; then VALUE=`echo ${OPT} | cut -f2- -d'='` fi done load_module ${MOD} ${VALUE} || return 1 done if [ ${VERBOSE} ]; then echo "Successfully loaded ZFS module stack" fi return 0 } unload_module() { local NAME=`basename $1 .ko` if [ ${VERBOSE} ]; then echo "Unloading ${NAME} ($@)" fi ${RMMOD} ${NAME} || ERROR="Failed to unload ${NAME}" return 1 return 0 } unload_modules() { local MODULES_REVERSE=( $(echo ${MODULES[@]} | ${AWK} '{for (i=NF;i>=1;i--) printf $i" "} END{print ""}') ) for MOD in ${MODULES_REVERSE[*]}; do local NAME=`basename ${MOD} .ko` local USE_COUNT=`${LSMOD} | egrep "^${NAME} "| ${AWK} '{print $3}'` if [ "${USE_COUNT}" = 0 ] ; then if [ "${DUMP_LOG}" -a ${NAME} = "spl" ]; then spl_dump_log fi unload_module ${MOD} || return 1 fi done if [ ${VERBOSE} ]; then echo "Successfully unloaded ZFS module stack" fi return 0 } unused_loop_device() { for DEVICE in `ls -1 /dev/loop*`; do ${LOSETUP} ${DEVICE} &>/dev/null if [ $? -ne 0 ]; then echo ${DEVICE} return fi done die "Error: Unable to find unused loopback device" } # # This can be slightly dangerous because the loop devices we are # cleanup up may not be ours. However, if the devices are currently # in use we will not be able to remove them, and we only remove # devices which include 'zpool' in the name. So any damage we might # do should be limited to other zfs related testing. # cleanup_loop_devices() { local TMP_FILE=`mktemp` ${LOSETUP} -a | tr -d '()' >${TMP_FILE} ${AWK} -F":" -v losetup="$LOSETUP" \ '/zpool/ { system("losetup -d "$1) }' ${TMP_FILE} ${AWK} -F" " '/zpool/ { system("rm -f "$3) }' ${TMP_FILE} rm -f ${TMP_FILE} } # # The following udev helper functions assume that the provided # udev rules file will create a /dev/disk/zpool/ # disk mapping. In this mapping each CHANNEL is represented by # the letters a-z, and the RANK is represented by the numbers # 1-n. A CHANNEL should identify a group of RANKS which are all # attached to a single controller, each RANK represents a disk. # This provides a simply mechanism to locate a specific drive # given a known hardware configuration. # udev_setup() { local SRC_PATH=$1 # When running in tree manually contruct symlinks in tree to # the proper devices. Symlinks are installed for all entires # in the config file regardless of if that device actually # exists. When installed as a package udev can be relied on for # this and it will only create links for devices which exist. if [ ${INTREE} ]; then PWD=`pwd` mkdir -p ${DEVDIR}/ cd ${DEVDIR}/ ${AWK} '!/^#/ && /./ { system( \ "ln -f -s /dev/disk/by-path/"$2" "$1";" \ "ln -f -s /dev/disk/by-path/"$2"-part1 "$1"p1;" \ "ln -f -s /dev/disk/by-path/"$2"-part9 "$1"p9;" \ ) }' $SRC_PATH cd ${PWD} else DST_FILE=`basename ${SRC_PATH} | cut -f1-2 -d'.'` DST_PATH=/etc/zfs/${DST_FILE} if [ -e ${DST_PATH} ]; then die "Error: Config ${DST_PATH} already exists" fi cp ${SRC_PATH} ${DST_PATH} if [ -f ${UDEVADM} ]; then ${UDEVADM} trigger ${UDEVADM} settle else /sbin/udevtrigger /sbin/udevsettle fi fi return 0 } udev_cleanup() { local SRC_PATH=$1 if [ ${INTREE} ]; then PWD=`pwd` cd ${DEVDIR}/ ${AWK} '!/^#/ && /./ { system( \ "rm -f "$1" "$1"p1 "$1"p9") }' $SRC_PATH cd ${PWD} fi return 0 } udev_cr2d() { local CHANNEL=`echo "obase=16; $1+96" | bc` local RANK=$2 printf "\x${CHANNEL}${RANK}" } udev_raid0_setup() { local RANKS=$1 local CHANNELS=$2 local IDX=0 RAID0S=() for RANK in `seq 1 ${RANKS}`; do for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAID0S[${IDX}]="${DEVDIR}/${DISK}" let IDX=IDX+1 done done return 0 } udev_raid10_setup() { local RANKS=$1 local CHANNELS=$2 local IDX=0 RAID10S=() for RANK in `seq 1 ${RANKS}`; do for CHANNEL1 in `seq 1 2 ${CHANNELS}`; do let CHANNEL2=CHANNEL1+1 DISK1=`udev_cr2d ${CHANNEL1} ${RANK}` DISK2=`udev_cr2d ${CHANNEL2} ${RANK}` GROUP="${DEVDIR}/${DISK1} ${DEVDIR}/${DISK2}" RAID10S[${IDX}]="mirror ${GROUP}" let IDX=IDX+1 done done return 0 } udev_raidz_setup() { local RANKS=$1 local CHANNELS=$2 RAIDZS=() for RANK in `seq 1 ${RANKS}`; do RAIDZ=("raidz") for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAIDZ[${CHANNEL}]="${DEVDIR}/${DISK}" done RAIDZS[${RANK}]="${RAIDZ[*]}" done return 0 } udev_raidz2_setup() { local RANKS=$1 local CHANNELS=$2 RAIDZ2S=() for RANK in `seq 1 ${RANKS}`; do RAIDZ2=("raidz2") for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAIDZ2[${CHANNEL}]="${DEVDIR}/${DISK}" done RAIDZ2S[${RANK}]="${RAIDZ2[*]}" done return 0 } + +run_one_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + printf "%-4d %-36s " ${TEST_NUM} "${TEST_NAME}" + test_${TEST_NUM} +} + +skip_one_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + printf "%-4d %-36s " ${TEST_NUM} "${TEST_NAME}" + skip +} + +run_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + for i in ${TESTS_SKIP[@]}; do + if [[ $i == ${TEST_NUM} ]] ; then + skip_one_test ${TEST_NUM} "${TEST_NAME}" + return 0 + fi + done + + if [ "${TESTS_RUN[0]}" = "*" ]; then + run_one_test ${TEST_NUM} "${TEST_NAME}" + else + for i in ${TESTS_RUN[@]}; do + if [[ $i == ${TEST_NUM} ]] ; then + run_one_test ${TEST_NUM} "${TEST_NAME}" + return 0 + fi + done + + skip_one_test ${TEST_NUM} "${TEST_NAME}" + fi +} diff --git a/scripts/common.sh.in b/scripts/common.sh.in index 00418696c8e4..0a8399f18412 100644 --- a/scripts/common.sh.in +++ b/scripts/common.sh.in @@ -1,373 +1,438 @@ #!/bin/bash # # Common support functions for testing scripts. If a .script-config # files is available it will be sourced so in-tree kernel modules and # utilities will be used. If no .script-config can be found then the # installed kernel modules and utilities will be used. basedir="$(dirname $0)" SCRIPT_CONFIG=.script-config if [ -f "${basedir}/../${SCRIPT_CONFIG}" ]; then . "${basedir}/../${SCRIPT_CONFIG}" else MODULES=(zlib_deflate spl splat zavl znvpair zunicode zcommon zfs) fi PROG="" CLEANUP= VERBOSE= VERBOSE_FLAG= FORCE= FORCE_FLAG= DUMP_LOG= ERROR= RAID0S=() RAID10S=() RAIDZS=() RAIDZ2S=() +TESTS_RUN=${TESTS_RUN:-'*'} +TESTS_SKIP=${TESTS_SKIP:-} prefix=@prefix@ exec_prefix=@exec_prefix@ libexecdir=@libexecdir@ pkglibexecdir=${libexecdir}/@PACKAGE@ bindir=@bindir@ sbindir=@sbindir@ ETCDIR=${ETCDIR:-/etc} DEVDIR=${DEVDIR:-/dev/disk/zpool} ZPOOLDIR=${ZPOOLDIR:-${pkglibexecdir}/zpool-config} ZDB=${ZDB:-${sbindir}/zdb} ZFS=${ZFS:-${sbindir}/zfs} ZINJECT=${ZINJECT:-${sbindir}/zinject} ZPOOL=${ZPOOL:-${sbindir}/zpool} ZPOOL_ID=${ZPOOL_ID:-${bindir}/zpool_id} ZTEST=${ZTEST:-${sbindir}/ztest} COMMON_SH=${COMMON_SH:-${pkglibexecdir}/common.sh} ZFS_SH=${ZFS_SH:-${pkglibexecdir}/zfs.sh} ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-${pkglibexecdir}/zpool-create.sh} LDMOD=${LDMOD:-/sbin/modprobe} LSMOD=${LSMOD:-/sbin/lsmod} RMMOD=${RMMOD:-/sbin/rmmod} INFOMOD=${INFOMOD:-/sbin/modinfo} LOSETUP=${LOSETUP:-/sbin/losetup} SYSCTL=${SYSCTL:-/sbin/sysctl} UDEVADM=${UDEVADM:-/sbin/udevadm} AWK=${AWK:-/usr/bin/awk} +COLOR_BLACK="\033[0;30m" +COLOR_DK_GRAY="\033[1;30m" +COLOR_BLUE="\033[0;34m" +COLOR_LT_BLUE="\033[1;34m" +COLOR_GREEN="\033[0;32m" +COLOR_LT_GREEN="\033[1;32m" +COLOR_CYAN="\033[0;36m" +COLOR_LT_CYAN="\033[1;36m" +COLOR_RED="\033[0;31m" +COLOR_LT_RED="\033[1;31m" +COLOR_PURPLE="\033[0;35m" +COLOR_LT_PURPLE="\033[1;35m" +COLOR_BROWN="\033[0;33m" +COLOR_YELLOW="\033[1;33m" +COLOR_LT_GRAY="\033[0;37m" +COLOR_WHITE="\033[1;37m" +COLOR_RESET="\033[0m" + die() { echo -e "${PROG}: $1" >&2 exit 1 } msg() { if [ ${VERBOSE} ]; then echo "$@" fi } pass() { - echo "PASS" + echo -e "${COLOR_GREEN}Pass${COLOR_RESET}" } fail() { - echo "FAIL ($1)" + echo -e "${COLOR_RED}Fail${COLOR_RESET} ($1)" exit $1 } +skip() { + echo -e "${COLOR_BROWN}Skip${COLOR_RESET}" +} + spl_dump_log() { ${SYSCTL} -w kernel.spl.debug.dump=1 &>/dev/null local NAME=`dmesg | tail -n 1 | cut -f5 -d' '` ${SPLBUILD}/cmd/spl ${NAME} >${NAME}.log echo echo "Dumped debug log: ${NAME}.log" tail -n1 ${NAME}.log echo return 0 } check_modules() { local LOADED_MODULES=() local MISSING_MODULES=() for MOD in ${MODULES[*]}; do local NAME=`basename $MOD .ko` if ${LSMOD} | egrep -q "^${NAME}"; then LOADED_MODULES=(${NAME} ${LOADED_MODULES[*]}) fi if [ ${INFOMOD} ${MOD} 2>/dev/null ]; then MISSING_MODULES=("\t${MOD}\n" ${MISSING_MODULES[*]}) fi done if [ ${#LOADED_MODULES[*]} -gt 0 ]; then ERROR="Unload these modules with '${PROG} -u':\n" ERROR="${ERROR}${LOADED_MODULES[*]}" return 1 fi if [ ${#MISSING_MODULES[*]} -gt 0 ]; then ERROR="The following modules can not be found," ERROR="${ERROR} ensure your source trees are built:\n" ERROR="${ERROR}${MISSING_MODULES[*]}" return 1 fi return 0 } load_module() { local NAME=`basename $1 .ko` if [ ${VERBOSE} ]; then echo "Loading ${NAME} ($@)" fi ${LDMOD} $* || ERROR="Failed to load $1" return 1 return 0 } load_modules() { mkdir -p /etc/zfs for MOD in ${MODULES[*]}; do local NAME=`basename ${MOD} .ko` local VALUE= for OPT in "$@"; do OPT_NAME=`echo ${OPT} | cut -f1 -d'='` if [ ${NAME} = "${OPT_NAME}" ]; then VALUE=`echo ${OPT} | cut -f2- -d'='` fi done load_module ${MOD} ${VALUE} || return 1 done if [ ${VERBOSE} ]; then echo "Successfully loaded ZFS module stack" fi return 0 } unload_module() { local NAME=`basename $1 .ko` if [ ${VERBOSE} ]; then echo "Unloading ${NAME} ($@)" fi ${RMMOD} ${NAME} || ERROR="Failed to unload ${NAME}" return 1 return 0 } unload_modules() { local MODULES_REVERSE=( $(echo ${MODULES[@]} | ${AWK} '{for (i=NF;i>=1;i--) printf $i" "} END{print ""}') ) for MOD in ${MODULES_REVERSE[*]}; do local NAME=`basename ${MOD} .ko` local USE_COUNT=`${LSMOD} | egrep "^${NAME} "| ${AWK} '{print $3}'` if [ "${USE_COUNT}" = 0 ] ; then if [ "${DUMP_LOG}" -a ${NAME} = "spl" ]; then spl_dump_log fi unload_module ${MOD} || return 1 fi done if [ ${VERBOSE} ]; then echo "Successfully unloaded ZFS module stack" fi return 0 } unused_loop_device() { for DEVICE in `ls -1 /dev/loop*`; do ${LOSETUP} ${DEVICE} &>/dev/null if [ $? -ne 0 ]; then echo ${DEVICE} return fi done die "Error: Unable to find unused loopback device" } # # This can be slightly dangerous because the loop devices we are # cleanup up may not be ours. However, if the devices are currently # in use we will not be able to remove them, and we only remove # devices which include 'zpool' in the name. So any damage we might # do should be limited to other zfs related testing. # cleanup_loop_devices() { local TMP_FILE=`mktemp` ${LOSETUP} -a | tr -d '()' >${TMP_FILE} ${AWK} -F":" -v losetup="$LOSETUP" \ '/zpool/ { system("losetup -d "$1) }' ${TMP_FILE} ${AWK} -F" " '/zpool/ { system("rm -f "$3) }' ${TMP_FILE} rm -f ${TMP_FILE} } # # The following udev helper functions assume that the provided # udev rules file will create a /dev/disk/zpool/ # disk mapping. In this mapping each CHANNEL is represented by # the letters a-z, and the RANK is represented by the numbers # 1-n. A CHANNEL should identify a group of RANKS which are all # attached to a single controller, each RANK represents a disk. # This provides a simply mechanism to locate a specific drive # given a known hardware configuration. # udev_setup() { local SRC_PATH=$1 # When running in tree manually contruct symlinks in tree to # the proper devices. Symlinks are installed for all entires # in the config file regardless of if that device actually # exists. When installed as a package udev can be relied on for # this and it will only create links for devices which exist. if [ ${INTREE} ]; then PWD=`pwd` mkdir -p ${DEVDIR}/ cd ${DEVDIR}/ ${AWK} '!/^#/ && /./ { system( \ "ln -f -s /dev/disk/by-path/"$2" "$1";" \ "ln -f -s /dev/disk/by-path/"$2"-part1 "$1"p1;" \ "ln -f -s /dev/disk/by-path/"$2"-part9 "$1"p9;" \ ) }' $SRC_PATH cd ${PWD} else DST_FILE=`basename ${SRC_PATH} | cut -f1-2 -d'.'` DST_PATH=/etc/zfs/${DST_FILE} if [ -e ${DST_PATH} ]; then die "Error: Config ${DST_PATH} already exists" fi cp ${SRC_PATH} ${DST_PATH} if [ -f ${UDEVADM} ]; then ${UDEVADM} trigger ${UDEVADM} settle else /sbin/udevtrigger /sbin/udevsettle fi fi return 0 } udev_cleanup() { local SRC_PATH=$1 if [ ${INTREE} ]; then PWD=`pwd` cd ${DEVDIR}/ ${AWK} '!/^#/ && /./ { system( \ "rm -f "$1" "$1"p1 "$1"p9") }' $SRC_PATH cd ${PWD} fi return 0 } udev_cr2d() { local CHANNEL=`echo "obase=16; $1+96" | bc` local RANK=$2 printf "\x${CHANNEL}${RANK}" } udev_raid0_setup() { local RANKS=$1 local CHANNELS=$2 local IDX=0 RAID0S=() for RANK in `seq 1 ${RANKS}`; do for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAID0S[${IDX}]="${DEVDIR}/${DISK}" let IDX=IDX+1 done done return 0 } udev_raid10_setup() { local RANKS=$1 local CHANNELS=$2 local IDX=0 RAID10S=() for RANK in `seq 1 ${RANKS}`; do for CHANNEL1 in `seq 1 2 ${CHANNELS}`; do let CHANNEL2=CHANNEL1+1 DISK1=`udev_cr2d ${CHANNEL1} ${RANK}` DISK2=`udev_cr2d ${CHANNEL2} ${RANK}` GROUP="${DEVDIR}/${DISK1} ${DEVDIR}/${DISK2}" RAID10S[${IDX}]="mirror ${GROUP}" let IDX=IDX+1 done done return 0 } udev_raidz_setup() { local RANKS=$1 local CHANNELS=$2 RAIDZS=() for RANK in `seq 1 ${RANKS}`; do RAIDZ=("raidz") for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAIDZ[${CHANNEL}]="${DEVDIR}/${DISK}" done RAIDZS[${RANK}]="${RAIDZ[*]}" done return 0 } udev_raidz2_setup() { local RANKS=$1 local CHANNELS=$2 RAIDZ2S=() for RANK in `seq 1 ${RANKS}`; do RAIDZ2=("raidz2") for CHANNEL in `seq 1 ${CHANNELS}`; do DISK=`udev_cr2d ${CHANNEL} ${RANK}` RAIDZ2[${CHANNEL}]="${DEVDIR}/${DISK}" done RAIDZ2S[${RANK}]="${RAIDZ2[*]}" done return 0 } + +run_one_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + printf "%-4d %-36s " ${TEST_NUM} "${TEST_NAME}" + test_${TEST_NUM} +} + +skip_one_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + printf "%-4d %-36s " ${TEST_NUM} "${TEST_NAME}" + skip +} + +run_test() { + local TEST_NUM=$1 + local TEST_NAME=$2 + + for i in ${TESTS_SKIP[@]}; do + if [[ $i == ${TEST_NUM} ]] ; then + skip_one_test ${TEST_NUM} "${TEST_NAME}" + return 0 + fi + done + + if [ "${TESTS_RUN[0]}" = "*" ]; then + run_one_test ${TEST_NUM} "${TEST_NAME}" + else + for i in ${TESTS_RUN[@]}; do + if [[ $i == ${TEST_NUM} ]] ; then + run_one_test ${TEST_NUM} "${TEST_NAME}" + return 0 + fi + done + + skip_one_test ${TEST_NUM} "${TEST_NAME}" + fi +} diff --git a/scripts/zconfig.sh b/scripts/zconfig.sh index 98f00fa0edae..c2e5d9754007 100755 --- a/scripts/zconfig.sh +++ b/scripts/zconfig.sh @@ -1,572 +1,564 @@ #!/bin/bash # # ZFS/ZPOOL configuration test script. basedir="$(dirname $0)" SCRIPT_COMMON=common.sh if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then . "${basedir}/${SCRIPT_COMMON}" else echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 fi PROG=zconfig.sh usage() { cat << EOF USAGE: $0 [hvc] DESCRIPTION: ZFS/ZPOOL configuration tests OPTIONS: -h Show this message -v Verbose -c Cleanup lo+file devices at start EOF } -while getopts 'hvc?' OPTION; do +while getopts 'hvct:s:?' OPTION; do case $OPTION in h) usage exit 1 ;; v) VERBOSE=1 ;; c) CLEANUP=1 ;; + t) + TESTS_RUN=($OPTARG) + ;; + s) + TESTS_SKIP=($OPTARG) + ;; ?) usage exit ;; esac done if [ $(id -u) != 0 ]; then die "Must run as root" fi # Perform pre-cleanup is requested if [ ${CLEANUP} ]; then cleanup_loop_devices rm -f /tmp/zpool.cache.* fi zconfig_partition() { local DEVICE=$1 local START=$2 local END=$3 local TMP_FILE=`mktemp` /sbin/sfdisk -q ${DEVICE} << EOF &>${TMP_FILE} || fail 4 ${START},${END} ; ; ; EOF rm ${TMP_FILE} } # Validate persistent zpool.cache configuration. -zconfig_test1() { +test_1() { local POOL_NAME=test1 local TMP_FILE1=`mktemp` local TMP_FILE2=`mktemp` local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 1 - persistent zpool.cache: " - # Create a pool save its status for comparison. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZPOOL} status ${POOL_NAME} >${TMP_FILE1} || fail 3 # Unload/load the module stack and verify the pool persists. ${ZFS_SH} -u || fail 4 ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 5 ${ZPOOL} status ${POOL_NAME} >${TMP_FILE2} || fail 6 cmp ${TMP_FILE1} ${TMP_FILE2} || fail 7 # Cleanup the test pool and temporary files ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 8 rm -f ${TMP_FILE1} ${TMP_FILE2} ${TMP_CACHE} || fail 9 ${ZFS_SH} -u || fail 10 pass } -zconfig_test1 +run_test 1 "persistent zpool.cache" # Validate ZFS disk scanning and import w/out zpool.cache configuration. -zconfig_test2() { +test_2() { local POOL_NAME=test2 local TMP_FILE1=`mktemp` local TMP_FILE2=`mktemp` local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 2 - scan disks for pools to import: " - # Create a pool save its status for comparison. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZPOOL} status ${POOL_NAME} >${TMP_FILE1} || fail 3 # Unload the module stack, remove the cache file, load the module # stack and attempt to probe the disks to import the pool. As # a cross check verify the old pool state against the imported. ${ZFS_SH} -u || fail 4 rm -f ${TMP_CACHE} || fail 5 ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 6 ${ZPOOL} import | grep ${POOL_NAME} >/dev/null || fail 7 ${ZPOOL} import ${POOL_NAME} || fail 8 ${ZPOOL} status ${POOL_NAME} >${TMP_FILE2} || fail 9 cmp ${TMP_FILE1} ${TMP_FILE2} || fail 10 # Cleanup the test pool and temporary files ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 11 rm -f ${TMP_FILE1} ${TMP_FILE2} || fail 12 ${ZFS_SH} -u || fail 13 pass } -zconfig_test2 +run_test 2 "scan disks for pools to import" zconfig_zvol_device_stat() { local EXPECT=$1 local POOL_NAME=/dev/$2 local ZVOL_NAME=/dev/$3 local SNAP_NAME=/dev/$4 local CLONE_NAME=/dev/$5 local COUNT=0 # Briefly delay for udev sleep 1 # Pool exists stat ${POOL_NAME} &>/dev/null && let COUNT=$COUNT+1 # Volume and partitions stat ${ZVOL_NAME} &>/dev/null && let COUNT=$COUNT+1 stat ${ZVOL_NAME}1 &>/dev/null && let COUNT=$COUNT+1 stat ${ZVOL_NAME}2 &>/dev/null && let COUNT=$COUNT+1 # Snapshot with partitions stat ${SNAP_NAME} &>/dev/null && let COUNT=$COUNT+1 stat ${SNAP_NAME}1 &>/dev/null && let COUNT=$COUNT+1 stat ${SNAP_NAME}2 &>/dev/null && let COUNT=$COUNT+1 # Clone with partitions stat ${CLONE_NAME} &>/dev/null && let COUNT=$COUNT+1 stat ${CLONE_NAME}1 &>/dev/null && let COUNT=$COUNT+1 stat ${CLONE_NAME}2 &>/dev/null && let COUNT=$COUNT+1 if [ $EXPECT -ne $COUNT ]; then return 1 fi return 0 } # zpool import/export device check # (1 volume, 2 partitions, 1 snapshot, 1 clone) -zconfig_test3() { +test_3() { local POOL_NAME=tank local ZVOL_NAME=volume local SNAP_NAME=snap local CLONE_NAME=clone local FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} local FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} local FULL_CLONE_NAME=${POOL_NAME}/${CLONE_NAME} local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 3 - zpool import/export device: " - # Create a pool, volume, partition, snapshot, and clone. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 100M ${FULL_ZVOL_NAME} || fail 3 zconfig_partition /dev/${FULL_ZVOL_NAME} 0 64 || fail 4 ${ZFS} snapshot ${FULL_SNAP_NAME} || fail 5 ${ZFS} clone ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 6 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 7 # Export the pool ${ZPOOL} export ${POOL_NAME} || fail 8 # verify the devices were removed zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 9 # Import the pool, wait 1 second for udev ${ZPOOL} import ${POOL_NAME} || fail 10 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 11 # Destroy the pool and consequently the devices ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 12 # verify the devices were removed zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 13 ${ZFS_SH} -u || fail 14 rm -f ${TMP_CACHE} || fail 15 pass } -zconfig_test3 +run_test 3 "zpool import/export device" # zpool insmod/rmmod device check (1 volume, 1 snapshot, 1 clone) -zconfig_test4() { +test_4() { POOL_NAME=tank ZVOL_NAME=volume SNAP_NAME=snap CLONE_NAME=clone FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} FULL_CLONE_NAME=${POOL_NAME}/${CLONE_NAME} TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 4 - zpool insmod/rmmod device: " - # Create a pool, volume, snapshot, and clone ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 100M ${FULL_ZVOL_NAME} || fail 3 zconfig_partition /dev/${FULL_ZVOL_NAME} 0 64 || fail 4 ${ZFS} snapshot ${FULL_SNAP_NAME} || fail 5 ${ZFS} clone ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 6 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 7 # Unload the modules ${ZFS_SH} -u || fail 8 # Verify the devices were removed zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 9 # Load the modules, wait 1 second for udev ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 10 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 11 # Destroy the pool and consequently the devices ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 12 # Verify the devices were removed zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 13 ${ZFS_SH} -u || fail 14 rm -f ${TMP_CACHE} || fail 15 pass } -zconfig_test4 +run_test 4 "zpool insmod/rmmod device" # ZVOL volume sanity check -zconfig_test5() { +test_5() { local POOL_NAME=tank local ZVOL_NAME=fish local FULL_NAME=${POOL_NAME}/${ZVOL_NAME} local SRC_DIR=/bin/ local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 5 - zvol+ext3 volume: " - # Create a pool and volume. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 400M ${FULL_NAME} || fail 3 # Partition the volume, for a 400M volume there will be # 812 cylinders, 16 heads, and 63 sectors per track. zconfig_partition /dev/${FULL_NAME} 0 812 # Format the partition with ext3. /sbin/mkfs.ext3 -q /dev/${FULL_NAME}1 || fail 5 # Mount the ext3 filesystem and copy some data to it. mkdir -p /tmp/${ZVOL_NAME}1 || fail 6 mount /dev/${FULL_NAME}1 /tmp/${ZVOL_NAME}1 || fail 7 cp -RL ${SRC_DIR} /tmp/${ZVOL_NAME}1 || fail 8 sync # Verify the copied files match the original files. diff -ur ${SRC_DIR} /tmp/${ZVOL_NAME}1${SRC_DIR} &>/dev/null || fail 9 # Remove the files, umount, destroy the volume and pool. rm -Rf /tmp/${ZVOL_NAME}1${SRC_DIR}* || fail 10 umount /tmp/${ZVOL_NAME}1 || fail 11 rmdir /tmp/${ZVOL_NAME}1 || fail 12 ${ZFS} destroy ${FULL_NAME} || fail 13 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 14 ${ZFS_SH} -u || fail 15 rm -f ${TMP_CACHE} || fail 16 pass } -zconfig_test5 +run_test 5 "zvol+ext3 volume" # ZVOL snapshot sanity check -zconfig_test6() { +test_6() { local POOL_NAME=tank local ZVOL_NAME=fish local SNAP_NAME=pristine local FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} local FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} local SRC_DIR=/bin/ local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 6 - zvol+ext2 snapshot: " - # Create a pool and volume. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 400M ${FULL_ZVOL_NAME} || fail 3 # Partition the volume, for a 400M volume there will be # 812 cylinders, 16 heads, and 63 sectors per track. zconfig_partition /dev/${FULL_ZVOL_NAME} 0 812 # Format the partition with ext2 (no journal). /sbin/mkfs.ext2 -q /dev/${FULL_ZVOL_NAME}1 || fail 5 # Mount the ext3 filesystem and copy some data to it. mkdir -p /tmp/${ZVOL_NAME}1 || fail 6 mount /dev/${FULL_ZVOL_NAME}1 /tmp/${ZVOL_NAME}1 || fail 7 # Snapshot the pristine ext2 filesystem and mount it read-only. ${ZFS} snapshot ${FULL_SNAP_NAME} && sleep 1 || fail 8 mkdir -p /tmp/${SNAP_NAME}1 || fail 9 mount /dev/${FULL_SNAP_NAME}1 /tmp/${SNAP_NAME}1 &>/dev/null || fail 10 # Copy to original volume cp -RL ${SRC_DIR} /tmp/${ZVOL_NAME}1 || fail 11 sync # Verify the copied files match the original files, # and the copied files do NOT appear in the snapshot. diff -ur ${SRC_DIR} /tmp/${ZVOL_NAME}1${SRC_DIR} &>/dev/null || fail 12 diff -ur ${SRC_DIR} /tmp/${SNAP_NAME}1${SRC_DIR} &>/dev/null && fail 13 # umount, destroy the snapshot, volume, and pool. umount /tmp/${SNAP_NAME}1 || fail 14 rmdir /tmp/${SNAP_NAME}1 || fail 15 ${ZFS} destroy ${FULL_SNAP_NAME} || fail 16 umount /tmp/${ZVOL_NAME}1 || fail 17 rmdir /tmp/${ZVOL_NAME}1 || fail 18 ${ZFS} destroy ${FULL_ZVOL_NAME} || fail 19 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 20 ${ZFS_SH} -u || fail 21 rm -f ${TMP_CACHE} || fail 22 pass } -zconfig_test6 +run_test 6 "zvol+ext2 snapshot" # ZVOL clone sanity check -zconfig_test7() { +test_7() { local POOL_NAME=tank local ZVOL_NAME=fish local SNAP_NAME=pristine local CLONE_NAME=clone local FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} local FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} local FULL_CLONE_NAME=${POOL_NAME}/${CLONE_NAME} local SRC_DIR=/bin/ local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` - echo -n "test 7 - zvol+ext2 clone: " - # Create a pool and volume. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 400M ${FULL_ZVOL_NAME} || fail 3 # Partition the volume, for a 400M volume there will be # 812 cylinders, 16 heads, and 63 sectors per track. zconfig_partition /dev/${FULL_ZVOL_NAME} 0 812 # Format the partition with ext2 (no journal). /sbin/mkfs.ext2 -q /dev/${FULL_ZVOL_NAME}1 || fail 5 # Mount the ext3 filesystem and copy some data to it. mkdir -p /tmp/${ZVOL_NAME}1 || fail 6 mount /dev/${FULL_ZVOL_NAME}1 /tmp/${ZVOL_NAME}1 || fail 7 # Snapshot the pristine ext2 filesystem and mount it read-only. ${ZFS} snapshot ${FULL_SNAP_NAME} && sleep 1 || fail 8 mkdir -p /tmp/${SNAP_NAME}1 || fail 9 mount /dev/${FULL_SNAP_NAME}1 /tmp/${SNAP_NAME}1 &>/dev/null || fail 10 # Copy to original volume. cp -RL ${SRC_DIR} /tmp/${ZVOL_NAME}1 || fail 11 sync # Verify the copied files match the original files, # and the copied files do NOT appear in the snapshot. diff -ur ${SRC_DIR} /tmp/${ZVOL_NAME}1${SRC_DIR} &>/dev/null || fail 12 diff -ur ${SRC_DIR} /tmp/${SNAP_NAME}1${SRC_DIR} &>/dev/null && fail 13 # Clone from the original pristine snapshot ${ZFS} clone ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} && sleep 1 || fail 14 mkdir -p /tmp/${CLONE_NAME}1 || fail 15 mount /dev/${FULL_CLONE_NAME}1 /tmp/${CLONE_NAME}1 || fail 16 # Verify the clone matches the pristine snapshot, # and the files copied to the original volume are NOT there. diff -ur /tmp/${SNAP_NAME}1 /tmp/${CLONE_NAME}1 &>/dev/null || fail 17 diff -ur /tmp/${ZVOL_NAME}1 /tmp/${CLONE_NAME}1 &>/dev/null && fail 18 # Copy to cloned volume. cp -RL ${SRC_DIR} /tmp/${CLONE_NAME}1 || fail 19 sync # Verify the clone matches the modified original volume. diff -ur /tmp/${ZVOL_NAME}1 /tmp/${CLONE_NAME}1 &>/dev/null || fail 20 # umount, destroy the snapshot, volume, and pool. umount /tmp/${CLONE_NAME}1 || fail 21 rmdir /tmp/${CLONE_NAME}1 || fail 22 ${ZFS} destroy ${FULL_CLONE_NAME} || fail 23 umount /tmp/${SNAP_NAME}1 || fail 24 rmdir /tmp/${SNAP_NAME}1 || fail 25 ${ZFS} destroy ${FULL_SNAP_NAME} || fail 26 umount /tmp/${ZVOL_NAME}1 || fail 27 rmdir /tmp/${ZVOL_NAME}1 || fail 28 ${ZFS} destroy ${FULL_ZVOL_NAME} || fail 29 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 30 ${ZFS_SH} -u || fail 31 rm -f ${TMP_CACHE} || fail 32 pass } -zconfig_test7 +run_test 7 "zvol+ext2 clone" # Send/Receive sanity check test_8() { local POOL_NAME1=tank1 local POOL_NAME2=tank2 local ZVOL_NAME=fish local SNAP_NAME=snap local FULL_ZVOL_NAME1=${POOL_NAME1}/${ZVOL_NAME} local FULL_ZVOL_NAME2=${POOL_NAME2}/${ZVOL_NAME} local FULL_SNAP_NAME1=${POOL_NAME1}/${ZVOL_NAME}@${SNAP_NAME} local FULL_SNAP_NAME2=${POOL_NAME2}/${ZVOL_NAME}@${SNAP_NAME} local SRC_DIR=/bin/ local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` # Create two pools and a volume ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME1} -c lo-raidz2 || fail 2 ${ZPOOL_CREATE_SH} -p ${POOL_NAME2} -c lo-raidz2 || fail 3 ${ZFS} create -V 400M ${FULL_ZVOL_NAME1} || fail 4 # Partition the volume, for a 400M volume there will be # 812 cylinders, 16 heads, and 63 sectors per track. zconfig_partition /dev/${FULL_ZVOL_NAME1} 0 812 # Format the partition with ext2. /sbin/mkfs.ext2 -q /dev/${FULL_ZVOL_NAME1}1 || fail 5 # Mount the ext3 filesystem and copy some data to it. mkdir -p /tmp/${FULL_ZVOL_NAME1}1 || fail 6 mount /dev/${FULL_ZVOL_NAME1}1 /tmp/${FULL_ZVOL_NAME1}1 || fail 7 cp -RL ${SRC_DIR} /tmp/${FULL_ZVOL_NAME1}1 || fail 8 sync || fail 9 # Snapshot the ext3 filesystem so it may be sent. ${ZFS} snapshot ${FULL_SNAP_NAME1} && sleep 1 || fail 11 # Send/receive the snapshot from POOL_NAME1 to POOL_NAME2 (${ZFS} send ${FULL_SNAP_NAME1} | \ ${ZFS} receive ${FULL_ZVOL_NAME2}) && sleep 1 || fail 12 # Mount the sent ext3 filesystem. mkdir -p /tmp/${FULL_ZVOL_NAME2}1 || fail 13 mount /dev/${FULL_ZVOL_NAME2}1 /tmp/${FULL_ZVOL_NAME2}1 || fail 14 # Verify the contents of the volumes match diff -ur /tmp/${FULL_ZVOL_NAME1}1 /tmp/${FULL_ZVOL_NAME2}1 \ &>/dev/null || fail 15 # Umount, destroy the volume and pool. umount /tmp/${FULL_ZVOL_NAME1}1 || fail 16 umount /tmp/${FULL_ZVOL_NAME2}1 || fail 17 rmdir /tmp/${FULL_ZVOL_NAME1}1 || fail 18 rmdir /tmp/${FULL_ZVOL_NAME2}1 || fail 19 rmdir /tmp/${POOL_NAME1} || fail 20 rmdir /tmp/${POOL_NAME2} || fail 21 ${ZFS} destroy ${FULL_SNAP_NAME1} || fail 22 ${ZFS} destroy ${FULL_SNAP_NAME2} || fail 23 ${ZFS} destroy ${FULL_ZVOL_NAME1} || fail 24 ${ZFS} destroy ${FULL_ZVOL_NAME2} || fail 25 ${ZPOOL_CREATE_SH} -p ${POOL_NAME1} -c lo-raidz2 -d || fail 26 ${ZPOOL_CREATE_SH} -p ${POOL_NAME2} -c lo-raidz2 -d || fail 27 ${ZFS_SH} -u || fail 28 rm -f ${TMP_CACHE} || fail 29 pass } run_test 8 "zfs send/receive" # zpool event sanity check test_9() { local POOL_NAME=tank local ZVOL_NAME=fish local FULL_NAME=${POOL_NAME}/${ZVOL_NAME} local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX` local TMP_EVENTS=`mktemp -p /tmp zpool.events.XXXXXXXX` # Create a pool and volume. ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 400M ${FULL_NAME} || fail 3 # Dump the events, there should be at least 5 lines. ${ZPOOL} events >${TMP_EVENTS} || fail 4 EVENTS=`wc -l ${TMP_EVENTS} | cut -f1 -d' '` [ $EVENTS -lt 5 ] && fail 5 # Clear the events and ensure there are none. ${ZPOOL} events -c >/dev/null || fail 6 ${ZPOOL} events >${TMP_EVENTS} || fail 7 EVENTS=`wc -l ${TMP_EVENTS} | cut -f1 -d' '` [ $EVENTS -gt 1 ] && fail 8 ${ZFS} destroy ${FULL_NAME} || fail 9 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 10 ${ZFS_SH} -u || fail 11 rm -f ${TMP_CACHE} || fail 12 rm -f ${TMP_EVENTS} || fail 13 pass } run_test 9 "zpool events" exit 0