Index: stable/11/include/rpcsvc/rquota.x =================================================================== --- stable/11/include/rpcsvc/rquota.x (revision 339007) +++ stable/11/include/rpcsvc/rquota.x (revision 339008) @@ -1,67 +1,144 @@ +/* @(#)rquota.x 2.1 88/08/01 4.0 RPCSRC */ +/* @(#)rquota.x 1.2 87/09/20 Copyr 1987 Sun Micro */ + /* * Remote quota protocol * Requires unix authentication */ #ifndef RPC_HDR -%#ifndef lint -%/*static char sccsid[] = "from: @(#)rquota.x 1.2 87/09/20 Copyr 1987 Sun Micro";*/ -%/*static char sccsid[] = "from: @(#)rquota.x 2.1 88/08/01 4.0 RPCSRC";*/ -%#endif /* not lint */ %#include %__FBSDID("$FreeBSD$"); #endif const RQ_PATHLEN = 1024; +struct sq_dqblk { + unsigned int rq_bhardlimit; /* absolute limit on disk blks alloc */ + unsigned int rq_bsoftlimit; /* preferred limit on disk blks */ + unsigned int rq_curblocks; /* current block count */ + unsigned int rq_fhardlimit; /* absolute limit on allocated files */ + unsigned int rq_fsoftlimit; /* preferred file limit */ + unsigned int rq_curfiles; /* current # allocated files */ + unsigned int rq_btimeleft; /* time left for excessive disk use */ + unsigned int rq_ftimeleft; /* time left for excessive files */ +}; + struct getquota_args { string gqa_pathp; /* path to filesystem of interest */ - int gqa_uid; /* inquire about quota for uid */ + int gqa_uid; /* Inquire about quota for uid */ }; +struct setquota_args { + int sqa_qcmd; + string sqa_pathp; /* path to filesystem of interest */ + int sqa_id; /* Set quota for uid */ + sq_dqblk sqa_dqblk; +}; + +struct ext_getquota_args { + string gqa_pathp; /* path to filesystem of interest */ + int gqa_type; /* Type of quota info is needed about */ + int gqa_id; /* Inquire about quota for id */ +}; + +struct ext_setquota_args { + int sqa_qcmd; + string sqa_pathp; /* path to filesystem of interest */ + int sqa_id; /* Set quota for id */ + int sqa_type; /* Type of quota to set */ + sq_dqblk sqa_dqblk; +}; + /* * remote quota structure */ struct rquota { int rq_bsize; /* block size for block counts */ bool rq_active; /* indicates whether quota is active */ unsigned int rq_bhardlimit; /* absolute limit on disk blks alloc */ unsigned int rq_bsoftlimit; /* preferred limit on disk blks */ unsigned int rq_curblocks; /* current block count */ unsigned int rq_fhardlimit; /* absolute limit on allocated files */ unsigned int rq_fsoftlimit; /* preferred file limit */ unsigned int rq_curfiles; /* current # allocated files */ unsigned int rq_btimeleft; /* time left for excessive disk use */ unsigned int rq_ftimeleft; /* time left for excessive files */ }; enum gqr_status { Q_OK = 1, /* quota returned */ - Q_NOQUOTA = 2, /* noquota for uid */ + Q_NOQUOTA = 2, /* noquota for uid */ Q_EPERM = 3 /* no permission to access quota */ }; union getquota_rslt switch (gqr_status status) { case Q_OK: rquota gqr_rquota; /* valid if status == Q_OK */ case Q_NOQUOTA: void; case Q_EPERM: void; }; +union setquota_rslt switch (gqr_status status) { +case Q_OK: + rquota sqr_rquota; /* valid if status == Q_OK */ +case Q_NOQUOTA: + void; +case Q_EPERM: + void; +}; + program RQUOTAPROG { version RQUOTAVERS { /* * Get all quotas */ getquota_rslt RQUOTAPROC_GETQUOTA(getquota_args) = 1; /* * Get active quotas only */ getquota_rslt RQUOTAPROC_GETACTIVEQUOTA(getquota_args) = 2; + + /* + * Set all quotas + */ + setquota_rslt + RQUOTAPROC_SETQUOTA(setquota_args) = 3; + + /* + * Get active quotas only + */ + setquota_rslt + RQUOTAPROC_SETACTIVEQUOTA(setquota_args) = 4; } = 1; + version EXT_RQUOTAVERS { + /* + * Get all quotas + */ + getquota_rslt + RQUOTAPROC_GETQUOTA(ext_getquota_args) = 1; + + /* + * Get active quotas only + */ + getquota_rslt + RQUOTAPROC_GETACTIVEQUOTA(ext_getquota_args) = 2; + + /* + * Set all quotas + */ + setquota_rslt + RQUOTAPROC_SETQUOTA(ext_setquota_args) = 3; + + /* + * Set active quotas only + */ + setquota_rslt + RQUOTAPROC_SETACTIVEQUOTA(ext_setquota_args) = 4; + } = 2; } = 100011; Index: stable/11/lib/libutil/quotafile.c =================================================================== --- stable/11/lib/libutil/quotafile.c (revision 339007) +++ stable/11/lib/libutil/quotafile.c (revision 339008) @@ -1,595 +1,598 @@ /*- * Copyright (c) 2008 Dag-Erling Coïdan Smørgrav * Copyright (c) 2008 Marshall Kirk McKusick * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct quotafile { int fd; /* -1 means using quotactl for access */ int accmode; /* access mode */ int wordsize; /* 32-bit or 64-bit limits */ int quotatype; /* USRQUOTA or GRPQUOTA */ dev_t dev; /* device */ char fsname[MAXPATHLEN + 1]; /* mount point of filesystem */ char qfname[MAXPATHLEN + 1]; /* quota file if not using quotactl */ }; static const char *qfextension[] = INITQFNAMES; /* * Check to see if a particular quota is to be enabled. */ static int hasquota(struct fstab *fs, int type, char *qfnamep, int qfbufsize) { char *opt; char *cp; struct statfs sfb; char buf[BUFSIZ]; static char initname, usrname[100], grpname[100]; /* * 1) we only need one of these * 2) fstab may specify a different filename */ if (!initname) { (void)snprintf(usrname, sizeof(usrname), "%s%s", qfextension[USRQUOTA], QUOTAFILENAME); (void)snprintf(grpname, sizeof(grpname), "%s%s", qfextension[GRPQUOTA], QUOTAFILENAME); initname = 1; } strcpy(buf, fs->fs_mntops); for (opt = strtok(buf, ","); opt; opt = strtok(NULL, ",")) { if ((cp = strchr(opt, '='))) *cp++ = '\0'; if (type == USRQUOTA && strcmp(opt, usrname) == 0) break; if (type == GRPQUOTA && strcmp(opt, grpname) == 0) break; } if (!opt) return (0); /* * Ensure that the filesystem is mounted. */ if (statfs(fs->fs_file, &sfb) != 0 || strcmp(fs->fs_file, sfb.f_mntonname)) { return (0); } if (cp) { strncpy(qfnamep, cp, qfbufsize); } else { (void)snprintf(qfnamep, qfbufsize, "%s/%s.%s", fs->fs_file, QUOTAFILENAME, qfextension[type]); } return (1); } struct quotafile * quota_open(struct fstab *fs, int quotatype, int openflags) { struct quotafile *qf; struct dqhdr64 dqh; struct group *grp; struct stat st; int qcmd, serrno; - if (strcmp(fs->fs_vfstype, "ufs")) - return (NULL); if ((qf = calloc(1, sizeof(*qf))) == NULL) return (NULL); qf->fd = -1; qf->quotatype = quotatype; strlcpy(qf->fsname, fs->fs_file, sizeof(qf->fsname)); if (stat(qf->fsname, &st) != 0) goto error; qf->dev = st.st_dev; - serrno = hasquota(fs, quotatype, qf->qfname, sizeof(qf->qfname)); qcmd = QCMD(Q_GETQUOTASIZE, quotatype); if (quotactl(qf->fsname, qcmd, 0, &qf->wordsize) == 0) return (qf); + /* We only check the quota file for ufs */ + if (strcmp(fs->fs_vfstype, "ufs")) { + errno = 0; + goto error; + } + serrno = hasquota(fs, quotatype, qf->qfname, sizeof(qf->qfname)); if (serrno == 0) { errno = EOPNOTSUPP; goto error; } qf->accmode = openflags & O_ACCMODE; if ((qf->fd = open(qf->qfname, qf->accmode|O_CLOEXEC)) < 0 && (openflags & O_CREAT) != O_CREAT) goto error; /* File open worked, so process it */ if (qf->fd != -1) { qf->wordsize = 32; switch (read(qf->fd, &dqh, sizeof(dqh))) { case -1: goto error; case sizeof(dqh): if (strcmp(dqh.dqh_magic, Q_DQHDR64_MAGIC) != 0) { /* no magic, assume 32 bits */ qf->wordsize = 32; return (qf); } if (be32toh(dqh.dqh_version) != Q_DQHDR64_VERSION || be32toh(dqh.dqh_hdrlen) != sizeof(struct dqhdr64) || be32toh(dqh.dqh_reclen) != sizeof(struct dqblk64)) { /* correct magic, wrong version / lengths */ errno = EINVAL; goto error; } qf->wordsize = 64; return (qf); default: qf->wordsize = 32; return (qf); } /* not reached */ } /* open failed, but O_CREAT was specified, so create a new file */ if ((qf->fd = open(qf->qfname, O_RDWR|O_CREAT|O_TRUNC|O_CLOEXEC, 0)) < 0) goto error; qf->wordsize = 64; memset(&dqh, 0, sizeof(dqh)); memcpy(dqh.dqh_magic, Q_DQHDR64_MAGIC, sizeof(dqh.dqh_magic)); dqh.dqh_version = htobe32(Q_DQHDR64_VERSION); dqh.dqh_hdrlen = htobe32(sizeof(struct dqhdr64)); dqh.dqh_reclen = htobe32(sizeof(struct dqblk64)); if (write(qf->fd, &dqh, sizeof(dqh)) != sizeof(dqh)) { /* it was one we created ourselves */ unlink(qf->qfname); goto error; } grp = getgrnam(QUOTAGROUP); fchown(qf->fd, 0, grp ? grp->gr_gid : 0); fchmod(qf->fd, 0640); return (qf); error: serrno = errno; /* did we have an open file? */ if (qf->fd != -1) close(qf->fd); free(qf); errno = serrno; return (NULL); } void quota_close(struct quotafile *qf) { if (qf->fd != -1) close(qf->fd); free(qf); } int quota_on(struct quotafile *qf) { int qcmd; qcmd = QCMD(Q_QUOTAON, qf->quotatype); return (quotactl(qf->fsname, qcmd, 0, qf->qfname)); } int quota_off(struct quotafile *qf) { return (quotactl(qf->fsname, QCMD(Q_QUOTAOFF, qf->quotatype), 0, 0)); } const char * quota_fsname(const struct quotafile *qf) { return (qf->fsname); } const char * quota_qfname(const struct quotafile *qf) { return (qf->qfname); } int quota_check_path(const struct quotafile *qf, const char *path) { struct stat st; if (stat(path, &st) == -1) return (-1); return (st.st_dev == qf->dev); } int quota_maxid(struct quotafile *qf) { struct stat st; int maxid; if (stat(qf->qfname, &st) < 0) return (0); switch (qf->wordsize) { case 32: maxid = st.st_size / sizeof(struct dqblk32) - 1; break; case 64: maxid = st.st_size / sizeof(struct dqblk64) - 2; break; default: maxid = 0; break; } return (maxid > 0 ? maxid : 0); } static int quota_read32(struct quotafile *qf, struct dqblk *dqb, int id) { struct dqblk32 dqb32; off_t off; off = id * sizeof(struct dqblk32); if (lseek(qf->fd, off, SEEK_SET) == -1) return (-1); switch (read(qf->fd, &dqb32, sizeof(dqb32))) { case 0: memset(dqb, 0, sizeof(*dqb)); return (0); case sizeof(dqb32): dqb->dqb_bhardlimit = dqb32.dqb_bhardlimit; dqb->dqb_bsoftlimit = dqb32.dqb_bsoftlimit; dqb->dqb_curblocks = dqb32.dqb_curblocks; dqb->dqb_ihardlimit = dqb32.dqb_ihardlimit; dqb->dqb_isoftlimit = dqb32.dqb_isoftlimit; dqb->dqb_curinodes = dqb32.dqb_curinodes; dqb->dqb_btime = dqb32.dqb_btime; dqb->dqb_itime = dqb32.dqb_itime; return (0); default: return (-1); } } static int quota_read64(struct quotafile *qf, struct dqblk *dqb, int id) { struct dqblk64 dqb64; off_t off; off = sizeof(struct dqhdr64) + id * sizeof(struct dqblk64); if (lseek(qf->fd, off, SEEK_SET) == -1) return (-1); switch (read(qf->fd, &dqb64, sizeof(dqb64))) { case 0: memset(dqb, 0, sizeof(*dqb)); return (0); case sizeof(dqb64): dqb->dqb_bhardlimit = be64toh(dqb64.dqb_bhardlimit); dqb->dqb_bsoftlimit = be64toh(dqb64.dqb_bsoftlimit); dqb->dqb_curblocks = be64toh(dqb64.dqb_curblocks); dqb->dqb_ihardlimit = be64toh(dqb64.dqb_ihardlimit); dqb->dqb_isoftlimit = be64toh(dqb64.dqb_isoftlimit); dqb->dqb_curinodes = be64toh(dqb64.dqb_curinodes); dqb->dqb_btime = be64toh(dqb64.dqb_btime); dqb->dqb_itime = be64toh(dqb64.dqb_itime); return (0); default: return (-1); } } int quota_read(struct quotafile *qf, struct dqblk *dqb, int id) { int qcmd; if (qf->fd == -1) { qcmd = QCMD(Q_GETQUOTA, qf->quotatype); return (quotactl(qf->fsname, qcmd, id, dqb)); } switch (qf->wordsize) { case 32: return (quota_read32(qf, dqb, id)); case 64: return (quota_read64(qf, dqb, id)); default: errno = EINVAL; return (-1); } /* not reached */ } #define CLIP32(u64) ((u64) > UINT32_MAX ? UINT32_MAX : (uint32_t)(u64)) static int quota_write32(struct quotafile *qf, const struct dqblk *dqb, int id) { struct dqblk32 dqb32; off_t off; dqb32.dqb_bhardlimit = CLIP32(dqb->dqb_bhardlimit); dqb32.dqb_bsoftlimit = CLIP32(dqb->dqb_bsoftlimit); dqb32.dqb_curblocks = CLIP32(dqb->dqb_curblocks); dqb32.dqb_ihardlimit = CLIP32(dqb->dqb_ihardlimit); dqb32.dqb_isoftlimit = CLIP32(dqb->dqb_isoftlimit); dqb32.dqb_curinodes = CLIP32(dqb->dqb_curinodes); dqb32.dqb_btime = CLIP32(dqb->dqb_btime); dqb32.dqb_itime = CLIP32(dqb->dqb_itime); off = id * sizeof(struct dqblk32); if (lseek(qf->fd, off, SEEK_SET) == -1) return (-1); if (write(qf->fd, &dqb32, sizeof(dqb32)) == sizeof(dqb32)) return (0); return (-1); } static int quota_write64(struct quotafile *qf, const struct dqblk *dqb, int id) { struct dqblk64 dqb64; off_t off; dqb64.dqb_bhardlimit = htobe64(dqb->dqb_bhardlimit); dqb64.dqb_bsoftlimit = htobe64(dqb->dqb_bsoftlimit); dqb64.dqb_curblocks = htobe64(dqb->dqb_curblocks); dqb64.dqb_ihardlimit = htobe64(dqb->dqb_ihardlimit); dqb64.dqb_isoftlimit = htobe64(dqb->dqb_isoftlimit); dqb64.dqb_curinodes = htobe64(dqb->dqb_curinodes); dqb64.dqb_btime = htobe64(dqb->dqb_btime); dqb64.dqb_itime = htobe64(dqb->dqb_itime); off = sizeof(struct dqhdr64) + id * sizeof(struct dqblk64); if (lseek(qf->fd, off, SEEK_SET) == -1) return (-1); if (write(qf->fd, &dqb64, sizeof(dqb64)) == sizeof(dqb64)) return (0); return (-1); } int quota_write_usage(struct quotafile *qf, struct dqblk *dqb, int id) { struct dqblk dqbuf; int qcmd; if (qf->fd == -1) { qcmd = QCMD(Q_SETUSE, qf->quotatype); return (quotactl(qf->fsname, qcmd, id, dqb)); } /* * Have to do read-modify-write of quota in file. */ if ((qf->accmode & O_RDWR) != O_RDWR) { errno = EBADF; return (-1); } if (quota_read(qf, &dqbuf, id) != 0) return (-1); /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dqbuf.dqb_bsoftlimit && id != 0 && dqbuf.dqb_curblocks < dqbuf.dqb_bsoftlimit && dqb->dqb_curblocks >= dqbuf.dqb_bsoftlimit) dqbuf.dqb_btime = 0; if (dqbuf.dqb_isoftlimit && id != 0 && dqbuf.dqb_curinodes < dqbuf.dqb_isoftlimit && dqb->dqb_curinodes >= dqbuf.dqb_isoftlimit) dqbuf.dqb_itime = 0; dqbuf.dqb_curinodes = dqb->dqb_curinodes; dqbuf.dqb_curblocks = dqb->dqb_curblocks; /* * Write it back. */ switch (qf->wordsize) { case 32: return (quota_write32(qf, &dqbuf, id)); case 64: return (quota_write64(qf, &dqbuf, id)); default: errno = EINVAL; return (-1); } /* not reached */ } int quota_write_limits(struct quotafile *qf, struct dqblk *dqb, int id) { struct dqblk dqbuf; int qcmd; if (qf->fd == -1) { qcmd = QCMD(Q_SETQUOTA, qf->quotatype); return (quotactl(qf->fsname, qcmd, id, dqb)); } /* * Have to do read-modify-write of quota in file. */ if ((qf->accmode & O_RDWR) != O_RDWR) { errno = EBADF; return (-1); } if (quota_read(qf, &dqbuf, id) != 0) return (-1); /* * Reset time limit if have a soft limit and were * previously under it, but are now over it * or if there previously was no soft limit, but * now have one and are over it. */ if (dqbuf.dqb_bsoftlimit && id != 0 && dqbuf.dqb_curblocks < dqbuf.dqb_bsoftlimit && dqbuf.dqb_curblocks >= dqb->dqb_bsoftlimit) dqb->dqb_btime = 0; if (dqbuf.dqb_bsoftlimit == 0 && id != 0 && dqb->dqb_bsoftlimit > 0 && dqbuf.dqb_curblocks >= dqb->dqb_bsoftlimit) dqb->dqb_btime = 0; if (dqbuf.dqb_isoftlimit && id != 0 && dqbuf.dqb_curinodes < dqbuf.dqb_isoftlimit && dqbuf.dqb_curinodes >= dqb->dqb_isoftlimit) dqb->dqb_itime = 0; if (dqbuf.dqb_isoftlimit == 0 && id !=0 && dqb->dqb_isoftlimit > 0 && dqbuf.dqb_curinodes >= dqb->dqb_isoftlimit) dqb->dqb_itime = 0; dqb->dqb_curinodes = dqbuf.dqb_curinodes; dqb->dqb_curblocks = dqbuf.dqb_curblocks; /* * Write it back. */ switch (qf->wordsize) { case 32: return (quota_write32(qf, dqb, id)); case 64: return (quota_write64(qf, dqb, id)); default: errno = EINVAL; return (-1); } /* not reached */ } /* * Convert a quota file from one format to another. */ int quota_convert(struct quotafile *qf, int wordsize) { struct quotafile *newqf; struct dqhdr64 dqh; struct dqblk dqblk; struct group *grp; int serrno, maxid, id, fd; /* * Quotas must not be active and quotafile must be open * for reading and writing. */ if ((qf->accmode & O_RDWR) != O_RDWR || qf->fd == -1) { errno = EBADF; return (-1); } if ((wordsize != 32 && wordsize != 64) || wordsize == qf->wordsize) { errno = EINVAL; return (-1); } maxid = quota_maxid(qf); if ((newqf = calloc(1, sizeof(*qf))) == NULL) { errno = ENOMEM; return (-1); } *newqf = *qf; snprintf(newqf->qfname, MAXPATHLEN + 1, "%s_%d.orig", qf->qfname, qf->wordsize); if (rename(qf->qfname, newqf->qfname) < 0) { free(newqf); return (-1); } if ((newqf->fd = open(qf->qfname, O_RDWR|O_CREAT|O_TRUNC|O_CLOEXEC, 0)) < 0) { serrno = errno; goto error; } newqf->wordsize = wordsize; if (wordsize == 64) { memset(&dqh, 0, sizeof(dqh)); memcpy(dqh.dqh_magic, Q_DQHDR64_MAGIC, sizeof(dqh.dqh_magic)); dqh.dqh_version = htobe32(Q_DQHDR64_VERSION); dqh.dqh_hdrlen = htobe32(sizeof(struct dqhdr64)); dqh.dqh_reclen = htobe32(sizeof(struct dqblk64)); if (write(newqf->fd, &dqh, sizeof(dqh)) != sizeof(dqh)) { serrno = errno; goto error; } } grp = getgrnam(QUOTAGROUP); fchown(newqf->fd, 0, grp ? grp->gr_gid : 0); fchmod(newqf->fd, 0640); for (id = 0; id <= maxid; id++) { if ((quota_read(qf, &dqblk, id)) < 0) break; switch (newqf->wordsize) { case 32: if ((quota_write32(newqf, &dqblk, id)) < 0) break; continue; case 64: if ((quota_write64(newqf, &dqblk, id)) < 0) break; continue; default: errno = EINVAL; break; } } if (id < maxid) { serrno = errno; goto error; } /* * Update the passed in quotafile to reference the new file * of the converted format size. */ fd = qf->fd; qf->fd = newqf->fd; newqf->fd = fd; qf->wordsize = newqf->wordsize; quota_close(newqf); return (0); error: /* put back the original file */ (void) rename(newqf->qfname, qf->qfname); quota_close(newqf); errno = serrno; return (-1); } Index: stable/11/libexec/rpc.rquotad/rquotad.c =================================================================== --- stable/11/libexec/rpc.rquotad/rquotad.c (revision 339007) +++ stable/11/libexec/rpc.rquotad/rquotad.c (revision 339008) @@ -1,222 +1,317 @@ /* * by Manuel Bouyer (bouyer@ensta.fr) * * There is no copyright, you can use it as you want. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include -static void rquota_service(struct svc_req *request, SVCXPRT *transp); +static void rquota_service_1(struct svc_req *request, SVCXPRT *transp); +static void rquota_service_2(struct svc_req *request, SVCXPRT *transp); static void sendquota(struct svc_req *request, SVCXPRT *transp); -static void initfs(void); -static int getfsquota(long id, char *path, struct dqblk *dqblk); +static void sendquota_extended(struct svc_req *request, SVCXPRT *transp); +static int getfsquota(int type, long id, char *path, struct dqblk *dqblk); -static struct quotafile **qfa; /* array of qfs */ -static int nqf, szqf; /* number of qfs and size of array */ static int from_inetd = 1; +static int debug = 0; static void cleanup(int sig) { (void)sig; (void)rpcb_unset(RQUOTAPROG, RQUOTAVERS, NULL); exit(0); } int -main(void) +main(int argc, char **argv) { SVCXPRT *transp; int ok; struct sockaddr_storage from; socklen_t fromlen; + int vers; + int ch; + while ((ch = getopt(argc, argv, "d")) != -1) { + switch (ch) { + case 'd': + debug++; + break; + default: + break; + } + } + fromlen = sizeof(from); if (getsockname(0, (struct sockaddr *)&from, &fromlen) < 0) from_inetd = 0; if (!from_inetd) { - daemon(0, 0); + if (!debug) + daemon(0, 0); (void)rpcb_unset(RQUOTAPROG, RQUOTAVERS, NULL); (void)signal(SIGINT, cleanup); (void)signal(SIGTERM, cleanup); (void)signal(SIGHUP, cleanup); } openlog("rpc.rquotad", LOG_CONS|LOG_PID, LOG_DAEMON); /* create and register the service */ if (from_inetd) { transp = svc_tli_create(0, NULL, NULL, 0, 0); if (transp == NULL) { syslog(LOG_ERR, "couldn't create udp service."); exit(1); } + vers = RQUOTAVERS; ok = svc_reg(transp, RQUOTAPROG, RQUOTAVERS, - rquota_service, NULL); + rquota_service_1, NULL); + if (ok) { + vers = EXT_RQUOTAVERS; + ok = svc_reg(transp, RQUOTAPROG, EXT_RQUOTAVERS, + rquota_service_2, NULL); + } } else { - ok = svc_create(rquota_service, + vers = RQUOTAVERS; + ok = svc_create(rquota_service_1, RQUOTAPROG, RQUOTAVERS, "udp"); + if (ok) { + vers = EXT_RQUOTAVERS; + ok = svc_create(rquota_service_2, + RQUOTAPROG, EXT_RQUOTAVERS, "udp"); + + } } if (!ok) { syslog(LOG_ERR, - "unable to register (RQUOTAPROG, RQUOTAVERS, %s)", - from_inetd ? "(inetd)" : "udp"); + "unable to register (RQUOTAPROG, %s, %s)", + vers == RQUOTAVERS ? "RQUOTAVERS" : "EXT_RQUOTAVERS", + from_inetd ? "(inetd)" : "udp"); exit(1); } - initfs(); svc_run(); syslog(LOG_ERR, "svc_run returned"); exit(1); } static void -rquota_service(struct svc_req *request, SVCXPRT *transp) +rquota_service_2(struct svc_req *request, SVCXPRT *transp) { switch (request->rq_proc) { case NULLPROC: (void)svc_sendreply(transp, (xdrproc_t)xdr_void, (char *)NULL); break; case RQUOTAPROC_GETQUOTA: case RQUOTAPROC_GETACTIVEQUOTA: + sendquota_extended(request, transp); + break; + default: + svcerr_noproc(transp); + break; + } + if (from_inetd) + exit(0); +} + +static void +rquota_service_1(struct svc_req *request, SVCXPRT *transp) +{ + + switch (request->rq_proc) { + case NULLPROC: + (void)svc_sendreply(transp, (xdrproc_t)xdr_void, (char *)NULL); + break; + case RQUOTAPROC_GETQUOTA: + case RQUOTAPROC_GETACTIVEQUOTA: sendquota(request, transp); break; default: svcerr_noproc(transp); break; } if (from_inetd) exit(0); } /* read quota for the specified id, and send it */ static void sendquota(struct svc_req *request, SVCXPRT *transp) { struct getquota_args getq_args; struct getquota_rslt getq_rslt; struct dqblk dqblk; struct timeval timev; int scale; bzero(&getq_args, sizeof(getq_args)); if (!svc_getargs(transp, (xdrproc_t)xdr_getquota_args, &getq_args)) { svcerr_decode(transp); return; } if (request->rq_cred.oa_flavor != AUTH_UNIX) { /* bad auth */ getq_rslt.status = Q_EPERM; - } else if (!getfsquota(getq_args.gqa_uid, getq_args.gqa_pathp, &dqblk)) { + } else if (!getfsquota(USRQUOTA, getq_args.gqa_uid, getq_args.gqa_pathp, &dqblk)) { /* failed, return noquota */ getq_rslt.status = Q_NOQUOTA; } else { gettimeofday(&timev, NULL); getq_rslt.status = Q_OK; getq_rslt.getquota_rslt_u.gqr_rquota.rq_active = TRUE; scale = 1 << flsll(dqblk.dqb_bhardlimit >> 32); getq_rslt.getquota_rslt_u.gqr_rquota.rq_bsize = DEV_BSIZE * scale; getq_rslt.getquota_rslt_u.gqr_rquota.rq_bhardlimit = dqblk.dqb_bhardlimit / scale; getq_rslt.getquota_rslt_u.gqr_rquota.rq_bsoftlimit = dqblk.dqb_bsoftlimit / scale; getq_rslt.getquota_rslt_u.gqr_rquota.rq_curblocks = dqblk.dqb_curblocks / scale; getq_rslt.getquota_rslt_u.gqr_rquota.rq_fhardlimit = dqblk.dqb_ihardlimit; getq_rslt.getquota_rslt_u.gqr_rquota.rq_fsoftlimit = dqblk.dqb_isoftlimit; getq_rslt.getquota_rslt_u.gqr_rquota.rq_curfiles = dqblk.dqb_curinodes; getq_rslt.getquota_rslt_u.gqr_rquota.rq_btimeleft = dqblk.dqb_btime - timev.tv_sec; getq_rslt.getquota_rslt_u.gqr_rquota.rq_ftimeleft = dqblk.dqb_itime - timev.tv_sec; } if (!svc_sendreply(transp, (xdrproc_t)xdr_getquota_rslt, &getq_rslt)) svcerr_systemerr(transp); if (!svc_freeargs(transp, (xdrproc_t)xdr_getquota_args, &getq_args)) { syslog(LOG_ERR, "unable to free arguments"); exit(1); } } static void -initfs(void) +sendquota_extended(struct svc_req *request, SVCXPRT *transp) { - struct fstab *fs; + struct ext_getquota_args getq_args; + struct getquota_rslt getq_rslt; + struct dqblk dqblk; + struct timeval timev; + int scale; - setfsent(); - szqf = 8; - if ((qfa = malloc(szqf * sizeof *qfa)) == NULL) - goto enomem; - while ((fs = getfsent())) { - if (strcmp(fs->fs_vfstype, "ufs")) - continue; - if (nqf >= szqf) { - szqf *= 2; - if ((qfa = reallocf(qfa, szqf * sizeof *qfa)) == NULL) - goto enomem; - } - if ((qfa[nqf] = quota_open(fs, USRQUOTA, O_RDONLY)) == NULL) { - if (errno != EOPNOTSUPP) - goto fserr; - continue; - } - ++nqf; - /* XXX */ + bzero(&getq_args, sizeof(getq_args)); + if (!svc_getargs(transp, (xdrproc_t)xdr_ext_getquota_args, &getq_args)) { + svcerr_decode(transp); + return; } - endfsent(); - return; -enomem: - syslog(LOG_ERR, "out of memory"); - exit(1); -fserr: - syslog(LOG_ERR, "%s: %s", fs->fs_file, strerror(errno)); - exit(1); + if (request->rq_cred.oa_flavor != AUTH_UNIX) { + /* bad auth */ + getq_rslt.status = Q_EPERM; + } else if (!getfsquota(getq_args.gqa_type, getq_args.gqa_id, getq_args.gqa_pathp, &dqblk)) { + /* failed, return noquota */ + getq_rslt.status = Q_NOQUOTA; + } else { + gettimeofday(&timev, NULL); + getq_rslt.status = Q_OK; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_active = TRUE; + scale = 1 << flsll(dqblk.dqb_bhardlimit >> 32); + getq_rslt.getquota_rslt_u.gqr_rquota.rq_bsize = + DEV_BSIZE * scale; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_bhardlimit = + dqblk.dqb_bhardlimit / scale; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_bsoftlimit = + dqblk.dqb_bsoftlimit / scale; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_curblocks = + dqblk.dqb_curblocks / scale; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_fhardlimit = + dqblk.dqb_ihardlimit; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_fsoftlimit = + dqblk.dqb_isoftlimit; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_curfiles = + dqblk.dqb_curinodes; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_btimeleft = + dqblk.dqb_btime - timev.tv_sec; + getq_rslt.getquota_rslt_u.gqr_rquota.rq_ftimeleft = + dqblk.dqb_itime - timev.tv_sec; + } + if (!svc_sendreply(transp, (xdrproc_t)xdr_getquota_rslt, &getq_rslt)) + svcerr_systemerr(transp); + if (!svc_freeargs(transp, (xdrproc_t)xdr_getquota_args, &getq_args)) { + syslog(LOG_ERR, "unable to free arguments"); + exit(1); + } } /* * gets the quotas for id, filesystem path. * Return 0 if fail, 1 otherwise */ static int -getfsquota(long id, char *path, struct dqblk *dqblk) +getfsquota(int type, long id, char *path, struct dqblk *dqblk) { - int i; + struct quotafile *qf; + /* + * Remote quota checking is limited to mounted filesystems. + * Since UFS and ZFS support the quota system calls, we + * only need to make an fstab object that has the path, and + * a blank name for the filesystem type. + * This allows the quota_open() call to work the way we + * expect it to. + * + * The static char declaration is because compiler warnings + * don't allow passing a const char * to a char *. + */ + int rv; + static char blank[] = ""; + struct fstab fst; - for (i = 0; i < nqf; ++i) - if (quota_check_path(qfa[i], path) == 1) - return (quota_read(qfa[i], dqblk, id) == 0); - return (0); + fst.fs_file = path; + fst.fs_mntops = blank; + fst.fs_vfstype = blank; + + if (type != USRQUOTA && type != GRPQUOTA) + return (0); + + qf = quota_open(&fst, type, O_RDONLY); + if (debug) + warnx("quota_open(<%s, %s>, %d) returned %p", + fst.fs_file, fst.fs_mntops, type, + qf); + if (qf == NULL) + return (0); + + rv = quota_read(qf, dqblk, id) == 0; + quota_close(qf); + if (debug) + warnx("getfsquota(%d, %ld, %s, %p) -> %d", + type, id, path, dqblk, rv); + return (rv); } Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c =================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c (revision 339007) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c (revision 339008) @@ -1,2590 +1,2751 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include + #include "zfs_comutil.h" struct mtx zfs_debug_mtx; MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); int zfs_super_owner; SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, "File system owner can perform privileged operation on his file systems"); int zfs_debug_level; SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, "Debug level"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); static int zfs_version_acl = ZFS_ACL_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, "ZFS_ACL_VERSION"); static int zfs_version_spa = SPA_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, "SPA_VERSION"); static int zfs_version_zpl = ZPL_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, "ZPL_VERSION"); +static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); static int zfs_mount(vfs_t *vfsp); static int zfs_umount(vfs_t *vfsp, int fflag); static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); static int zfs_sync(vfs_t *vfsp, int waitfor); static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp, int *numsecflavors, int **secflavors); static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); static void zfs_objset_close(zfsvfs_t *zfsvfs); static void zfs_freevfs(vfs_t *vfsp); struct vfsops zfs_vfsops = { .vfs_mount = zfs_mount, .vfs_unmount = zfs_umount, .vfs_root = zfs_root, .vfs_statfs = zfs_statfs, .vfs_vget = zfs_vget, .vfs_sync = zfs_sync, .vfs_checkexp = zfs_checkexp, .vfs_fhtovp = zfs_fhtovp, + .vfs_quotactl = zfs_quotactl, }; VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); /* * We need to keep a count of active fs's. * This is necessary to prevent our module * from being unloaded after a umount -f */ static uint32_t zfs_active_fs_count = 0; + +static int +zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) +{ + int error = 0; + char buf[32]; + int err; + uint64_t usedobj, quotaobj; + uint64_t quota, used = 0; + timespec_t now; + + usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; + quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + + if (quotaobj == 0 || zfsvfs->z_replay) { + error = ENOENT; + goto done; + } + (void)sprintf(buf, "%llx", (longlong_t)id); + if ((error = zap_lookup(zfsvfs->z_os, quotaobj, + buf, sizeof(quota), 1, "a)) != 0) { + dprintf("%s(%d): quotaobj lookup failed\n", __FUNCTION__, __LINE__); + goto done; + } + /* + * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". + * So we set them to be the same. + */ + dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); + error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof(used), 1, &used); + if (error && error != ENOENT) { + dprintf("%s(%d): usedobj failed; %d\n", __FUNCTION__, __LINE__, error); + goto done; + } + dqp->dqb_curblocks = btodb(used); + dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; + vfs_timestamp(&now); + /* + * Setting this to 0 causes FreeBSD quota(8) to print + * the number of days since the epoch, which isn't + * particularly useful. + */ + dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; +done: + return (error); +} + +static int +zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + struct thread *td; + int cmd, type, error = 0; + int bitsize; + uint64_t fuid; + zfs_userquota_prop_t quota_type; + struct dqblk64 dqblk = { 0 }; + + td = curthread; + cmd = cmds >> SUBCMDSHIFT; + type = cmds & SUBCMDMASK; + + ZFS_ENTER(zfsvfs); + if (id == -1) { + switch (type) { + case USRQUOTA: + id = td->td_ucred->cr_ruid; + break; + case GRPQUOTA: + id = td->td_ucred->cr_rgid; + break; + default: + error = EINVAL; + if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) + vfs_unbusy(vfsp); + goto done; + } + } + /* + * Map BSD type to: + * ZFS_PROP_USERUSED, + * ZFS_PROP_USERQUOTA, + * ZFS_PROP_GROUPUSED, + * ZFS_PROP_GROUPQUOTA + */ + switch (cmd) { + case Q_SETQUOTA: + case Q_SETQUOTA32: + if (type == USRQUOTA) + quota_type = ZFS_PROP_USERQUOTA; + else if (type == GRPQUOTA) + quota_type = ZFS_PROP_GROUPQUOTA; + else + error = EINVAL; + break; + case Q_GETQUOTA: + case Q_GETQUOTA32: + if (type == USRQUOTA) + quota_type = ZFS_PROP_USERUSED; + else if (type == GRPQUOTA) + quota_type = ZFS_PROP_GROUPUSED; + else + error = EINVAL; + break; + } + + /* + * Depending on the cmd, we may need to get + * the ruid and domain (see fuidstr_to_sid?), + * the fuid (how?), or other information. + * Create fuid using zfs_fuid_create(zfsvfs, id, + * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? + * I think I can use just the id? + * + * Look at zfs_fuid_overquota() to look up a quota. + * zap_lookup(something, quotaobj, fuidstring, sizeof(long long), 1, "a) + * + * See zfs_set_userquota() to set a quota. + */ + if ((u_int)type >= MAXQUOTAS) { + error = EINVAL; + goto done; + } + + switch (cmd) { + case Q_GETQUOTASIZE: + bitsize = 64; + error = copyout(&bitsize, arg, sizeof(int)); + break; + case Q_QUOTAON: + // As far as I can tell, you can't turn quotas on or off on zfs + error = 0; + vfs_unbusy(vfsp); + break; + case Q_QUOTAOFF: + error = ENOTSUP; + vfs_unbusy(vfsp); + break; + case Q_SETQUOTA: + error = copyin(&dqblk, arg, sizeof(dqblk)); + if (error == 0) + error = zfs_set_userquota(zfsvfs, quota_type, + "", id, dbtob(dqblk.dqb_bhardlimit)); + break; + case Q_GETQUOTA: + error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); + if (error == 0) + error = copyout(&dqblk, arg, sizeof(dqblk)); + break; + default: + error = EINVAL; + break; + } +done: + ZFS_EXIT(zfsvfs); + return (error); +} /*ARGSUSED*/ static int zfs_sync(vfs_t *vfsp, int waitfor) { /* * Data integrity is job one. We don't want a compromised kernel * writing to the storage pool, so we never sync during panic. */ if (panicstr) return (0); /* * Ignore the system syncher. ZFS already commits async data * at zfs_txg_timeout intervals. */ if (waitfor == MNT_LAZY) return (0); if (vfsp != NULL) { /* * Sync a specific filesystem. */ zfsvfs_t *zfsvfs = vfsp->vfs_data; dsl_pool_t *dp; int error; error = vfs_stdsync(vfsp, waitfor); if (error != 0) return (error); ZFS_ENTER(zfsvfs); dp = dmu_objset_pool(zfsvfs->z_os); /* * If the system is shutting down, then skip any * filesystems which may exist on a suspended pool. */ if (sys_shutdown && spa_suspended(dp->dp_spa)) { ZFS_EXIT(zfsvfs); return (0); } if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, 0); ZFS_EXIT(zfsvfs); } else { /* * Sync all ZFS filesystems. This is what happens when you * run sync(1M). Unlike other filesystems, ZFS honors the * request by waiting for all pools to commit all dirty data. */ spa_sync_allpools(); } return (0); } #ifndef __FreeBSD_kernel__ static int zfs_create_unique_device(dev_t *dev) { major_t new_major; do { ASSERT3U(zfs_minor, <=, MAXMIN32); minor_t start = zfs_minor; do { mutex_enter(&zfs_dev_mtx); if (zfs_minor >= MAXMIN32) { /* * If we're still using the real major * keep out of /dev/zfs and /dev/zvol minor * number space. If we're using a getudev()'ed * major number, we can use all of its minors. */ if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) zfs_minor = ZFS_MIN_MINOR; else zfs_minor = 0; } else { zfs_minor++; } *dev = makedevice(zfs_major, zfs_minor); mutex_exit(&zfs_dev_mtx); } while (vfs_devismounted(*dev) && zfs_minor != start); if (zfs_minor == start) { /* * We are using all ~262,000 minor numbers for the * current major number. Create a new major number. */ if ((new_major = getudev()) == (major_t)-1) { cmn_err(CE_WARN, "zfs_mount: Can't get unique major " "device number."); return (-1); } mutex_enter(&zfs_dev_mtx); zfs_major = new_major; zfs_minor = 0; mutex_exit(&zfs_dev_mtx); } else { break; } /* CONSTANTCONDITION */ } while (1); return (0); } #endif /* !__FreeBSD_kernel__ */ static void atime_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval == TRUE) { zfsvfs->z_atime = TRUE; zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); } else { zfsvfs->z_atime = FALSE; zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); } } static void xattr_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval == TRUE) { /* XXX locking on vfs_flag? */ #ifdef TODO zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; #endif vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); } else { /* XXX locking on vfs_flag? */ #ifdef TODO zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; #endif vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); } } static void blksz_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); ASSERT(ISP2(newval)); zfsvfs->z_max_blksz = newval; zfsvfs->z_vfs->mnt_stat.f_iosize = newval; } static void readonly_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval) { /* XXX locking on vfs_flag? */ zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); } else { /* XXX locking on vfs_flag? */ zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); } } static void setuid_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval == FALSE) { zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); } else { zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); } } static void exec_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval == FALSE) { zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); } else { zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); } } /* * The nbmand mount option can be changed at mount time. * We can't allow it to be toggled on live file systems or incorrect * behavior may be seen from cifs clients * * This property isn't registered via dsl_prop_register(), but this callback * will be called when a file system is first mounted */ static void nbmand_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; if (newval == FALSE) { vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); } else { vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); } } static void snapdir_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; zfsvfs->z_show_ctldir = newval; } static void vscan_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; zfsvfs->z_vscan = newval; } static void acl_mode_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; zfsvfs->z_acl_mode = newval; } static void acl_inherit_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; zfsvfs->z_acl_inherit = newval; } static int zfs_register_callbacks(vfs_t *vfsp) { struct dsl_dataset *ds = NULL; objset_t *os = NULL; zfsvfs_t *zfsvfs = NULL; uint64_t nbmand; boolean_t readonly = B_FALSE; boolean_t do_readonly = B_FALSE; boolean_t setuid = B_FALSE; boolean_t do_setuid = B_FALSE; boolean_t exec = B_FALSE; boolean_t do_exec = B_FALSE; #ifdef illumos boolean_t devices = B_FALSE; boolean_t do_devices = B_FALSE; #endif boolean_t xattr = B_FALSE; boolean_t do_xattr = B_FALSE; boolean_t atime = B_FALSE; boolean_t do_atime = B_FALSE; int error = 0; ASSERT(vfsp); zfsvfs = vfsp->vfs_data; ASSERT(zfsvfs); os = zfsvfs->z_os; /* * This function can be called for a snapshot when we update snapshot's * mount point, which isn't really supported. */ if (dmu_objset_is_snapshot(os)) return (EOPNOTSUPP); /* * The act of registering our callbacks will destroy any mount * options we may have. In order to enable temporary overrides * of mount options, we stash away the current values and * restore them after we register the callbacks. */ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || !spa_writeable(dmu_objset_spa(os))) { readonly = B_TRUE; do_readonly = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { readonly = B_FALSE; do_readonly = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else { if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { setuid = B_TRUE; do_setuid = B_TRUE; } } if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { exec = B_FALSE; do_exec = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { exec = B_TRUE; do_exec = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { xattr = B_FALSE; do_xattr = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { xattr = B_TRUE; do_xattr = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { atime = B_FALSE; do_atime = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { atime = B_TRUE; do_atime = B_TRUE; } /* * We need to enter pool configuration here, so that we can use * dsl_prop_get_int_ds() to handle the special nbmand property below. * dsl_prop_get_integer() can not be used, because it has to acquire * spa_namespace_lock and we can not do that because we already hold * z_teardown_lock. The problem is that spa_write_cachefile() is called * with spa_namespace_lock held and the function calls ZFS vnode * operations to write the cache file and thus z_teardown_lock is * acquired after spa_namespace_lock. */ ds = dmu_objset_ds(os); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); /* * nbmand is a special property. It can only be changed at * mount time. * * This is weird, but it is documented to only be changeable * at mount time. */ if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { nbmand = B_FALSE; } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { nbmand = B_TRUE; } else if (error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0) { dsl_pool_config_exit(dmu_objset_pool(os), FTAG); return (error); } /* * Register property callbacks. * * It would probably be fine to just check for i/o error from * the first prop_register(), but I guess I like to go * overboard... */ error = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); #ifdef illumos error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs); #endif error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; /* * Invoke our callbacks to restore temporary mount options. */ if (do_readonly) readonly_changed_cb(zfsvfs, readonly); if (do_setuid) setuid_changed_cb(zfsvfs, setuid); if (do_exec) exec_changed_cb(zfsvfs, exec); if (do_xattr) xattr_changed_cb(zfsvfs, xattr); if (do_atime) atime_changed_cb(zfsvfs, atime); nbmand_changed_cb(zfsvfs, nbmand); return (0); unregister: dsl_prop_unregister_all(ds, zfsvfs); return (error); } static int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, uint64_t *userp, uint64_t *groupp) { /* * Is it a valid type of object to track? */ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) return (SET_ERROR(ENOENT)); /* * If we have a NULL data pointer * then assume the id's aren't changing and * return EEXIST to the dmu to let it know to * use the same ids */ if (data == NULL) return (SET_ERROR(EEXIST)); if (bonustype == DMU_OT_ZNODE) { znode_phys_t *znp = data; *userp = znp->zp_uid; *groupp = znp->zp_gid; } else { int hdrsize; sa_hdr_phys_t *sap = data; sa_hdr_phys_t sa = *sap; boolean_t swap = B_FALSE; ASSERT(bonustype == DMU_OT_SA); if (sa.sa_magic == 0) { /* * This should only happen for newly created * files that haven't had the znode data filled * in yet. */ *userp = 0; *groupp = 0; return (0); } if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { sa.sa_magic = SA_MAGIC; sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); swap = B_TRUE; } else { VERIFY3U(sa.sa_magic, ==, SA_MAGIC); } hdrsize = sa_hdrsize(&sa); VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); *userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET)); *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET)); if (swap) { *userp = BSWAP_64(*userp); *groupp = BSWAP_64(*groupp); } } return (0); } static void fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, char *domainbuf, int buflen, uid_t *ridp) { uint64_t fuid; const char *domain; fuid = zfs_strtonum(fuidstr, NULL); domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); if (domain) (void) strlcpy(domainbuf, domain, buflen); else domainbuf[0] = '\0'; *ridp = FUID_RID(fuid); } static uint64_t zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) { switch (type) { case ZFS_PROP_USERUSED: return (DMU_USERUSED_OBJECT); case ZFS_PROP_GROUPUSED: return (DMU_GROUPUSED_OBJECT); case ZFS_PROP_USERQUOTA: return (zfsvfs->z_userquota_obj); case ZFS_PROP_GROUPQUOTA: return (zfsvfs->z_groupquota_obj); } return (0); } int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) { int error; zap_cursor_t zc; zap_attribute_t za; zfs_useracct_t *buf = vbuf; uint64_t obj; if (!dmu_objset_userspace_present(zfsvfs->z_os)) return (SET_ERROR(ENOTSUP)); obj = zfs_userquota_prop_to_obj(zfsvfs, type); if (obj == 0) { *bufsizep = 0; return (0); } for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); (error = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > *bufsizep) break; fuidstr_to_sid(zfsvfs, za.za_name, buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); buf->zu_space = za.za_first_integer; buf++; } if (error == ENOENT) error = 0; ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; *cookiep = zap_cursor_serialize(&zc); zap_cursor_fini(&zc); return (error); } /* * buf must be big enough (eg, 32 bytes) */ static int id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, char *buf, boolean_t addok) { uint64_t fuid; int domainid = 0; if (domain && domain[0]) { domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); if (domainid == -1) return (SET_ERROR(ENOENT)); } fuid = FUID_ENCODE(domainid, rid); (void) sprintf(buf, "%llx", (longlong_t)fuid); return (0); } int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t *valp) { char buf[32]; int err; uint64_t obj; *valp = 0; if (!dmu_objset_userspace_present(zfsvfs->z_os)) return (SET_ERROR(ENOTSUP)); obj = zfs_userquota_prop_to_obj(zfsvfs, type); if (obj == 0) return (0); err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); if (err) return (err); err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); if (err == ENOENT) err = 0; return (err); } int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t quota) { char buf[32]; int err; dmu_tx_t *tx; uint64_t *objp; boolean_t fuid_dirtied; if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) return (SET_ERROR(EINVAL)); if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) return (SET_ERROR(ENOTSUP)); objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : &zfsvfs->z_groupquota_obj; err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); if (err) return (err); fuid_dirtied = zfsvfs->z_fuid_dirty; tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); if (*objp == 0) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, zfs_userquota_prop_prefixes[type]); } if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } mutex_enter(&zfsvfs->z_lock); if (*objp == 0) { *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, DMU_OT_NONE, 0, tx); VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); } mutex_exit(&zfsvfs->z_lock); if (quota == 0) { err = zap_remove(zfsvfs->z_os, *objp, buf, tx); if (err == ENOENT) err = 0; } else { err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); } ASSERT(err == 0); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); dmu_tx_commit(tx); return (err); } boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) { char buf[32]; uint64_t used, quota, usedobj, quotaobj; int err; usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; if (quotaobj == 0 || zfsvfs->z_replay) return (B_FALSE); (void) sprintf(buf, "%llx", (longlong_t)fuid); err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); if (err != 0) return (B_FALSE); err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); if (err != 0) return (B_FALSE); return (used >= quota); } boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) { uint64_t fuid; uint64_t quotaobj; quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; fuid = isgroup ? zp->z_gid : zp->z_uid; if (quotaobj == 0 || zfsvfs->z_replay) return (B_FALSE); return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); } /* * Associate this zfsvfs with the given objset, which must be owned. * This will cache a bunch of on-disk state from the objset in the * zfsvfs. */ static int zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) { int error; uint64_t val; zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; zfsvfs->z_os = os; error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); if (error != 0) return (error); if (zfsvfs->z_version > zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { (void) printf("Can't mount a version %lld file system " "on a version %lld pool\n. Pool must be upgraded to mount " "this file system.", (u_longlong_t)zfsvfs->z_version, (u_longlong_t)spa_version(dmu_objset_spa(os))); return (SET_ERROR(ENOTSUP)); } error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); if (error != 0) return (error); zfsvfs->z_norm = (int)val; error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); if (error != 0) return (error); zfsvfs->z_utf8 = (val != 0); error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); if (error != 0) return (error); zfsvfs->z_case = (uint_t)val; /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || zfsvfs->z_case == ZFS_CASE_MIXED) zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); uint64_t sa_obj = 0; if (zfsvfs->z_use_sa) { /* should either have both of these objects or none */ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error != 0) return (error); } error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table); if (error != 0) return (error); if (zfsvfs->z_version >= ZPL_VERSION_SA) sa_register_update_callback(os, zfs_sa_upgrade); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zfsvfs->z_root); if (error != 0) return (error); ASSERT(zfsvfs->z_root != 0); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, &zfsvfs->z_unlinkedobj); if (error != 0) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 8, 1, &zfsvfs->z_userquota_obj); if (error == ENOENT) zfsvfs->z_userquota_obj = 0; else if (error != 0) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 8, 1, &zfsvfs->z_groupquota_obj); if (error == ENOENT) zfsvfs->z_groupquota_obj = 0; else if (error != 0) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj); if (error == ENOENT) zfsvfs->z_fuid_obj = 0; else if (error != 0) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, &zfsvfs->z_shares_dir); if (error == ENOENT) zfsvfs->z_shares_dir = 0; else if (error != 0) return (error); /* * Only use the name cache if we are looking for a * name on a file system that does not require normalization * or case folding. We can also look there if we happen to be * on a non-normalizing, mixed sensitivity file system IF we * are looking for the exact name (which is always the case on * FreeBSD). */ zfsvfs->z_use_namecache = !zfsvfs->z_norm || ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); return (0); } #if defined(__FreeBSD__) taskq_t *zfsvfs_taskq; static void zfsvfs_task_unlinked_drain(void *context, int pending __unused) { zfs_unlinked_drain((zfsvfs_t *)context); } #endif int zfsvfs_create(const char *osname, zfsvfs_t **zfvp) { objset_t *os; zfsvfs_t *zfsvfs; int error; /* * XXX: Fix struct statfs so this isn't necessary! * * The 'osname' is used as the filesystem's special node, which means * it must fit in statfs.f_mntfromname, or else it can't be * enumerated, so libzfs_mnttab_find() returns NULL, which causes * 'zfs unmount' to think it's not mounted when it is. */ if (strlen(osname) >= MNAMELEN) return (SET_ERROR(ENAMETOOLONG)); zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); /* * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); if (error != 0) { kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); } error = zfsvfs_create_impl(zfvp, zfsvfs, os); if (error != 0) { dmu_objset_disown(os, zfsvfs); } return (error); } int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) { int error; zfsvfs->z_vfs = NULL; zfsvfs->z_parent = zfsvfs; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); #if defined(__FreeBSD__) TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, zfsvfs_task_unlinked_drain, zfsvfs); #endif #ifdef DIAGNOSTIC rrm_init(&zfsvfs->z_teardown_lock, B_TRUE); #else rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); #endif rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); error = zfsvfs_init(zfsvfs, os); if (error != 0) { *zfvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); } *zfvp = zfsvfs; return (0); } static int zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) { int error; error = zfs_register_callbacks(zfsvfs->z_vfs); if (error) return (error); zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); /* * If we are not mounting (ie: online recv), then we don't * have to worry about replaying the log as we blocked all * operations out since we closed the ZIL. */ if (mounting) { boolean_t readonly; /* * During replay we remove the read only flag to * allow replays to succeed. */ readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; if (readonly != 0) zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; else zfs_unlinked_drain(zfsvfs); /* * Parse and replay the intent log. * * Because of ziltest, this must be done after * zfs_unlinked_drain(). (Further note: ziltest * doesn't use readonly mounts, where * zfs_unlinked_drain() isn't called.) This is because * ziltest causes spa_sync() to think it's committed, * but actually it is not, so the intent log contains * many txg's worth of changes. * * In particular, if object N is in the unlinked set in * the last txg to actually sync, then it could be * actually freed in a later txg and then reallocated * in a yet later txg. This would write a "create * object N" record to the intent log. Normally, this * would be fine because the spa_sync() would have * written out the fact that object N is free, before * we could write the "create object N" intent log * record. * * But when we are in ziltest mode, we advance the "open * txg" without actually spa_sync()-ing the changes to * disk. So we would see that object N is still * allocated and in the unlinked set, and there is an * intent log record saying to allocate it. */ if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { if (zil_replay_disable) { zil_destroy(zfsvfs->z_log, B_FALSE); } else { zfsvfs->z_replay = B_TRUE; zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); zfsvfs->z_replay = B_FALSE; } } zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ } /* * Set the objset user_ptr to track its zfsvfs. */ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); return (0); } extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ void zfsvfs_free(zfsvfs_t *zfsvfs) { int i; /* * This is a barrier to prevent the filesystem from going away in * zfs_znode_move() until we can safely ensure that the filesystem is * not unmounted. We consider the filesystem valid before the barrier * and invalid after the barrier. */ rw_enter(&zfsvfs_lock, RW_READER); rw_exit(&zfsvfs_lock); zfs_fuid_destroy(zfsvfs); mutex_destroy(&zfsvfs->z_znodes_lock); mutex_destroy(&zfsvfs->z_lock); list_destroy(&zfsvfs->z_all_znodes); rrm_destroy(&zfsvfs->z_teardown_lock); rw_destroy(&zfsvfs->z_teardown_inactive_lock); rw_destroy(&zfsvfs->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } static void zfs_set_fuid_feature(zfsvfs_t *zfsvfs) { zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); if (zfsvfs->z_vfs) { if (zfsvfs->z_use_fuids) { vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); } else { vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); } } zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); } static int zfs_domount(vfs_t *vfsp, char *osname) { uint64_t recordsize, fsid_guid; int error = 0; zfsvfs_t *zfsvfs; vnode_t *vp; ASSERT(vfsp); ASSERT(osname); error = zfsvfs_create(osname, &zfsvfs); if (error) return (error); zfsvfs->z_vfs = vfsp; #ifdef illumos /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = SET_ERROR(ENODEV); goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); #endif if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; vfsp->vfs_data = zfsvfs; vfsp->mnt_flag |= MNT_LOCAL; vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ /* * The fsid is 64 bits, composed of an 8-bit fs type, which * separates our fsid from any other filesystem types, and a * 56-bit objset unique ID. The objset unique ID is unique to * all objsets open on this system, provided by unique_create(). * The 8-bit fs type must be put in the low bits of fsid[1] * because that's where other Solaris filesystems put it. */ fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); vfsp->vfs_fsid.val[0] = fsid_guid; vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | vfsp->mnt_vfc->vfc_typenum & 0xFF; /* * Set features for file system. */ zfs_set_fuid_feature(zfsvfs); if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); } vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) goto out; xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } vfs_mountedfrom(vfsp, osname); if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_inc_32(&zfs_active_fs_count); } return (error); } void zfs_unregister_callbacks(zfsvfs_t *zfsvfs) { objset_t *os = zfsvfs->z_os; if (!dmu_objset_is_snapshot(os)) dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); } #ifdef SECLABEL /* * Convert a decimal digit string to a uint64_t integer. */ static int str_to_uint64(char *str, uint64_t *objnum) { uint64_t num = 0; while (*str) { if (*str < '0' || *str > '9') return (SET_ERROR(EINVAL)); num = num*10 + *str++ - '0'; } *objnum = num; return (0); } /* * The boot path passed from the boot loader is in the form of * "rootpool-name/root-filesystem-object-number'. Convert this * string to a dataset name: "rootpool-name/root-filesystem-name". */ static int zfs_parse_bootfs(char *bpath, char *outpath) { char *slashp; uint64_t objnum; int error; if (*bpath == 0 || *bpath == '/') return (SET_ERROR(EINVAL)); (void) strcpy(outpath, bpath); slashp = strchr(bpath, '/'); /* if no '/', just return the pool name */ if (slashp == NULL) { return (0); } /* if not a number, just return the root dataset name */ if (str_to_uint64(slashp+1, &objnum)) { return (0); } *slashp = '\0'; error = dsl_dsobj_to_dsname(bpath, objnum, outpath); *slashp = '/'; return (error); } /* * Check that the hex label string is appropriate for the dataset being * mounted into the global_zone proper. * * Return an error if the hex label string is not default or * admin_low/admin_high. For admin_low labels, the corresponding * dataset must be readonly. */ int zfs_check_global_label(const char *dsname, const char *hexsl) { if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) return (0); if (strcasecmp(hexsl, ADMIN_HIGH) == 0) return (0); if (strcasecmp(hexsl, ADMIN_LOW) == 0) { /* must be readonly */ uint64_t rdonly; if (dsl_prop_get_integer(dsname, zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) return (SET_ERROR(EACCES)); return (rdonly ? 0 : EACCES); } return (SET_ERROR(EACCES)); } /* * Determine whether the mount is allowed according to MAC check. * by comparing (where appropriate) label of the dataset against * the label of the zone being mounted into. If the dataset has * no label, create one. * * Returns 0 if access allowed, error otherwise (e.g. EACCES) */ static int zfs_mount_label_policy(vfs_t *vfsp, char *osname) { int error, retv; zone_t *mntzone = NULL; ts_label_t *mnt_tsl; bslabel_t *mnt_sl; bslabel_t ds_sl; char ds_hexsl[MAXNAMELEN]; retv = EACCES; /* assume the worst */ /* * Start by getting the dataset label if it exists. */ error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); if (error) return (SET_ERROR(EACCES)); /* * If labeling is NOT enabled, then disallow the mount of datasets * which have a non-default label already. No other label checks * are needed. */ if (!is_system_labeled()) { if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) return (0); return (SET_ERROR(EACCES)); } /* * Get the label of the mountpoint. If mounting into the global * zone (i.e. mountpoint is not within an active zone and the * zoned property is off), the label must be default or * admin_low/admin_high only; no other checks are needed. */ mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); if (mntzone->zone_id == GLOBAL_ZONEID) { uint64_t zoned; zone_rele(mntzone); if (dsl_prop_get_integer(osname, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) return (SET_ERROR(EACCES)); if (!zoned) return (zfs_check_global_label(osname, ds_hexsl)); else /* * This is the case of a zone dataset being mounted * initially, before the zone has been fully created; * allow this mount into global zone. */ return (0); } mnt_tsl = mntzone->zone_slabel; ASSERT(mnt_tsl != NULL); label_hold(mnt_tsl); mnt_sl = label2bslabel(mnt_tsl); if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { /* * The dataset doesn't have a real label, so fabricate one. */ char *str = NULL; if (l_to_str_internal(mnt_sl, &str) == 0 && dsl_prop_set_string(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), ZPROP_SRC_LOCAL, str) == 0) retv = 0; if (str != NULL) kmem_free(str, strlen(str) + 1); } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { /* * Now compare labels to complete the MAC check. If the * labels are equal then allow access. If the mountpoint * label dominates the dataset label, allow readonly access. * Otherwise, access is denied. */ if (blequal(mnt_sl, &ds_sl)) retv = 0; else if (bldominates(mnt_sl, &ds_sl)) { vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); retv = 0; } } label_rele(mnt_tsl); zone_rele(mntzone); return (retv); } #endif /* SECLABEL */ #ifdef OPENSOLARIS_MOUNTROOT static int zfs_mountroot(vfs_t *vfsp, enum whymountroot why) { int error = 0; static int zfsrootdone = 0; zfsvfs_t *zfsvfs = NULL; znode_t *zp = NULL; vnode_t *vp = NULL; char *zfs_bootfs; char *zfs_devid; ASSERT(vfsp); /* * The filesystem that we mount as root is defined in the * boot property "zfs-bootfs" with a format of * "poolname/root-dataset-objnum". */ if (why == ROOT_INIT) { if (zfsrootdone++) return (SET_ERROR(EBUSY)); /* * the process of doing a spa_load will require the * clock to be set before we could (for example) do * something better by looking at the timestamp on * an uberblock, so just set it to -1. */ clkset(-1); if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { cmn_err(CE_NOTE, "spa_get_bootfs: can not get " "bootfs name"); return (SET_ERROR(EINVAL)); } zfs_devid = spa_get_bootprop("diskdevid"); error = spa_import_rootpool(rootfs.bo_name, zfs_devid); if (zfs_devid) spa_free_bootprop(zfs_devid); if (error) { spa_free_bootprop(zfs_bootfs); cmn_err(CE_NOTE, "spa_import_rootpool: error %d", error); return (error); } if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { spa_free_bootprop(zfs_bootfs); cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", error); return (error); } spa_free_bootprop(zfs_bootfs); if (error = vfs_lock(vfsp)) return (error); if (error = zfs_domount(vfsp, rootfs.bo_name)) { cmn_err(CE_NOTE, "zfs_domount: error %d", error); goto out; } zfsvfs = (zfsvfs_t *)vfsp->vfs_data; ASSERT(zfsvfs); if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { cmn_err(CE_NOTE, "zfs_zget: error %d", error); goto out; } vp = ZTOV(zp); mutex_enter(&vp->v_lock); vp->v_flag |= VROOT; mutex_exit(&vp->v_lock); rootvp = vp; /* * Leave rootvp held. The root file system is never unmounted. */ vfs_add((struct vnode *)0, vfsp, (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); out: vfs_unlock(vfsp); return (error); } else if (why == ROOT_REMOUNT) { readonly_changed_cb(vfsp->vfs_data, B_FALSE); vfsp->vfs_flag |= VFS_REMOUNT; /* refresh mount options */ zfs_unregister_callbacks(vfsp->vfs_data); return (zfs_register_callbacks(vfsp)); } else if (why == ROOT_UNMOUNT) { zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); (void) zfs_sync(vfsp, 0, 0); return (0); } /* * if "why" is equal to anything else other than ROOT_INIT, * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. */ return (SET_ERROR(ENOTSUP)); } #endif /* OPENSOLARIS_MOUNTROOT */ static int getpoolname(const char *osname, char *poolname) { char *p; p = strchr(osname, '/'); if (p == NULL) { if (strlen(osname) >= MAXNAMELEN) return (ENAMETOOLONG); (void) strcpy(poolname, osname); } else { if (p - osname >= MAXNAMELEN) return (ENAMETOOLONG); (void) strncpy(poolname, osname, p - osname); poolname[p - osname] = '\0'; } return (0); } /*ARGSUSED*/ static int zfs_mount(vfs_t *vfsp) { kthread_t *td = curthread; vnode_t *mvp = vfsp->mnt_vnodecovered; cred_t *cr = td->td_ucred; char *osname; int error = 0; int canwrite; #ifdef illumos if (mvp->v_type != VDIR) return (SET_ERROR(ENOTDIR)); mutex_enter(&mvp->v_lock); if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 && (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { mutex_exit(&mvp->v_lock); return (SET_ERROR(EBUSY)); } mutex_exit(&mvp->v_lock); /* * ZFS does not support passing unparsed data in via MS_DATA. * Users should use the MS_OPTIONSTR interface; this means * that all option parsing is already done and the options struct * can be interrogated. */ if ((uap->flags & MS_DATA) && uap->datalen > 0) return (SET_ERROR(EINVAL)); /* * Get the objset name (the "special" mount argument). */ if (error = pn_get(uap->spec, fromspace, &spn)) return (error); osname = spn.pn_path; #else /* !illumos */ if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS)) return (SET_ERROR(EPERM)); if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) return (SET_ERROR(EINVAL)); /* * If full-owner-access is enabled and delegated administration is * turned on, we must set nosuid. */ if (zfs_super_owner && dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { secpolicy_fs_mount_clearopts(cr, vfsp); } #endif /* illumos */ /* * Check for mount privilege? * * If we don't have privilege then see if * we have local permission to allow it */ error = secpolicy_fs_mount(cr, mvp, vfsp); if (error) { if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) goto out; if (!(vfsp->vfs_flag & MS_REMOUNT)) { vattr_t vattr; /* * Make sure user is the owner of the mount point * or has sufficient privileges. */ vattr.va_mask = AT_UID; vn_lock(mvp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(mvp, &vattr, cr)) { VOP_UNLOCK(mvp, 0); goto out; } if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { VOP_UNLOCK(mvp, 0); goto out; } VOP_UNLOCK(mvp, 0); } secpolicy_fs_mount_clearopts(cr, vfsp); } /* * Refuse to mount a filesystem if we are in a local zone and the * dataset is not visible. */ if (!INGLOBALZONE(curthread) && (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { error = SET_ERROR(EPERM); goto out; } #ifdef SECLABEL error = zfs_mount_label_policy(vfsp, osname); if (error) goto out; #endif vfsp->vfs_flag |= MNT_NFS4ACLS; /* * When doing a remount, we simply refresh our temporary properties * according to those options set in the current VFS options. */ if (vfsp->vfs_flag & MS_REMOUNT) { zfsvfs_t *zfsvfs = vfsp->vfs_data; /* * Refresh mount options with z_teardown_lock blocking I/O while * the filesystem is in an inconsistent state. * The lock also serializes this code with filesystem * manipulations between entry to zfs_suspend_fs() and return * from zfs_resume_fs(). */ rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); zfs_unregister_callbacks(zfsvfs); error = zfs_register_callbacks(vfsp); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); goto out; } /* Initial root mount: try hard to import the requested root pool. */ if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && (vfsp->vfs_flag & MNT_UPDATE) == 0) { char pname[MAXNAMELEN]; error = getpoolname(osname, pname); if (error == 0) error = spa_import_rootpool(pname); if (error) goto out; } DROP_GIANT(); error = zfs_domount(vfsp, osname); PICKUP_GIANT(); #ifdef illumos /* * Add an extra VFS_HOLD on our parent vfs so that it can't * disappear due to a forced unmount. */ if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) VFS_HOLD(mvp->v_vfsp); #endif out: return (error); } static int zfs_statfs(vfs_t *vfsp, struct statfs *statp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; uint64_t refdbytes, availbytes, usedobjs, availobjs; statp->f_version = STATFS_VERSION; ZFS_ENTER(zfsvfs); dmu_objset_space(zfsvfs->z_os, &refdbytes, &availbytes, &usedobjs, &availobjs); /* * The underlying storage pool actually uses multiple block sizes. * We report the fragsize as the smallest block size we support, * and we report our blocksize as the filesystem's maximum blocksize. */ statp->f_bsize = SPA_MINBLOCKSIZE; statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; /* * The following report "total" blocks of various kinds in the * file system, but reported in terms of f_frsize - the * "fragment" size. */ statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; statp->f_bfree = availbytes / statp->f_bsize; statp->f_bavail = statp->f_bfree; /* no root reservation */ /* * statvfs() should really be called statufs(), because it assumes * static metadata. ZFS doesn't preallocate files, so the best * we can do is report the max that could possibly fit in f_files, * and that minus the number actually used in f_ffree. * For f_ffree, report the smaller of the number of object available * and the number of blocks (each object will take at least a block). */ statp->f_ffree = MIN(availobjs, statp->f_bfree); statp->f_files = statp->f_ffree + usedobjs; /* * We're a zfs filesystem. */ (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, sizeof(statp->f_mntfromname)); strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, sizeof(statp->f_mntonname)); statp->f_namemax = MAXNAMELEN - 1; ZFS_EXIT(zfsvfs); return (0); } static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; znode_t *rootzp; int error; ZFS_ENTER(zfsvfs); error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); if (error == 0) *vpp = ZTOV(rootzp); ZFS_EXIT(zfsvfs); if (error == 0) { error = vn_lock(*vpp, flags); if (error != 0) { VN_RELE(*vpp); *vpp = NULL; } } return (error); } /* * Teardown the zfsvfs::z_os. * * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ static int zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) { znode_t *zp; rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's vfsp as the parent * filesystem and all of its snapshots have their vnode's * v_vfsp set to the parent's filesystem's vfsp. Note, * 'z_parent' is self referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); #ifdef FREEBSD_NAMECACHE cache_purgevfs(zfsvfs->z_parent->z_vfs, true); #endif } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { rw_exit(&zfsvfs->z_teardown_inactive_lock); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); return (SET_ERROR(EIO)); } /* * At this point there are no vops active, and any new vops will * fail with EIO since we have z_teardown_lock for writer (only * relavent for forced unmount). * * Release all holds on dbufs. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) if (zp->z_sa_hdl) { ASSERT(ZTOV(zp)->v_count >= 0); zfs_znode_dmu_fini(zp); } mutex_exit(&zfsvfs->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new vops * unblock. zfs_inactive will have the unmounted behavior, and all * other vops will fail with EIO. */ if (unmounting) { zfsvfs->z_unmounted = B_TRUE; rw_exit(&zfsvfs->z_teardown_inactive_lock); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); } /* * z_os will be NULL if there was an error in attempting to reopen * zfsvfs, so just return as the properties had already been * unregistered and cached data had been evicted before. */ if (zfsvfs->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zfsvfs); /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); dmu_objset_evict_dbufs(zfsvfs->z_os); return (0); } /*ARGSUSED*/ static int zfs_umount(vfs_t *vfsp, int fflag) { kthread_t *td = curthread; zfsvfs_t *zfsvfs = vfsp->vfs_data; objset_t *os; cred_t *cr = td->td_ucred; int ret; ret = secpolicy_fs_unmount(cr, vfsp); if (ret) { if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), ZFS_DELEG_PERM_MOUNT, cr)) return (ret); } /* * We purge the parent filesystem's vfsp as the parent filesystem * and all of its snapshots have their vnode's v_vfsp set to the * parent's filesystem's vfsp. Note, 'z_parent' is self * referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); /* * Unmount any snapshots mounted under .zfs before unmounting the * dataset itself. */ if (zfsvfs->z_ctldir != NULL) { if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) return (ret); } if (fflag & MS_FORCE) { /* * Mark file system as unmounted before calling * vflush(FORCECLOSE). This way we ensure no future vnops * will be called and risk operating on DOOMED vnodes. */ rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); zfsvfs->z_unmounted = B_TRUE; rrm_exit(&zfsvfs->z_teardown_lock, FTAG); } /* * Flush all the files. */ ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); if (ret != 0) return (ret); #ifdef illumos if (!(fflag & MS_FORCE)) { /* * Check the number of active vnodes in the file system. * Our count is maintained in the vfs structure, but the * number is off by 1 to indicate a hold on the vfs * structure itself. * * The '.zfs' directory maintains a reference of its * own, and any active references underneath are * reflected in the vnode count. */ if (zfsvfs->z_ctldir == NULL) { if (vfsp->vfs_count > 1) return (SET_ERROR(EBUSY)); } else { if (vfsp->vfs_count > 2 || zfsvfs->z_ctldir->v_count > 1) return (SET_ERROR(EBUSY)); } } #endif while (taskqueue_cancel(zfsvfs_taskq->tq_queue, &zfsvfs->z_unlinked_drain_task, NULL) != 0) taskqueue_drain(zfsvfs_taskq->tq_queue, &zfsvfs->z_unlinked_drain_task); VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; /* * z_os will be NULL if there was an error in * attempting to reopen zfsvfs. */ if (os != NULL) { /* * Unset the objset user_ptr. */ mutex_enter(&os->os_user_ptr_lock); dmu_objset_set_user(os, NULL); mutex_exit(&os->os_user_ptr_lock); /* * Finally release the objset */ dmu_objset_disown(os, zfsvfs); } /* * We can now safely destroy the '.zfs' directory node. */ if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); zfs_freevfs(vfsp); return (0); } static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; znode_t *zp; int err; /* * zfs_zget() can't operate on virtual entries like .zfs/ or * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. * This will make NFS to switch to LOOKUP instead of using VGET. */ if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) return (EOPNOTSUPP); ZFS_ENTER(zfsvfs); err = zfs_zget(zfsvfs, ino, &zp); if (err == 0 && zp->z_unlinked) { vrele(ZTOV(zp)); err = EINVAL; } if (err == 0) *vpp = ZTOV(zp); ZFS_EXIT(zfsvfs); if (err == 0) err = vn_lock(*vpp, flags); if (err != 0) *vpp = NULL; return (err); } static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp, int *numsecflavors, int **secflavors) { zfsvfs_t *zfsvfs = vfsp->vfs_data; /* * If this is regular file system vfsp is the same as * zfsvfs->z_parent->z_vfs, but if it is snapshot, * zfsvfs->z_parent->z_vfs represents parent file system * which we have to use here, because only this file system * has mnt_export configured. */ return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, credanonp, numsecflavors, secflavors)); } CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) { struct componentname cn; zfsvfs_t *zfsvfs = vfsp->vfs_data; znode_t *zp; vnode_t *dvp; uint64_t object = 0; uint64_t fid_gen = 0; uint64_t gen_mask; uint64_t zp_gen; int i, err; *vpp = NULL; ZFS_ENTER(zfsvfs); /* * On FreeBSD we can get snapshot's mount point or its parent file * system mount point depending if snapshot is already mounted or not. */ if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { zfid_long_t *zlfid = (zfid_long_t *)fidp; uint64_t objsetid = 0; uint64_t setgen = 0; for (i = 0; i < sizeof (zlfid->zf_setid); i++) objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); for (i = 0; i < sizeof (zlfid->zf_setgen); i++) setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); ZFS_EXIT(zfsvfs); err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); if (err) return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); } if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { zfid_short_t *zfid = (zfid_short_t *)fidp; for (i = 0; i < sizeof (zfid->zf_object); i++) object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); for (i = 0; i < sizeof (zfid->zf_gen); i++) fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); } else { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * A zero fid_gen means we are in .zfs or the .zfs/snapshot * directory tree. If the object == zfsvfs->z_shares_dir, then * we are in the .zfs/shares directory tree. */ if ((fid_gen == 0 && (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { ZFS_EXIT(zfsvfs); VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); if (object == ZFSCTL_INO_SNAPDIR) { cn.cn_nameptr = "snapshot"; cn.cn_namelen = strlen(cn.cn_nameptr); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN | LOCKLEAF; cn.cn_lkflags = flags; VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); vput(dvp); } else if (object == zfsvfs->z_shares_dir) { /* * XXX This branch must not be taken, * if it is, then the lookup below will * explode. */ cn.cn_nameptr = "shares"; cn.cn_namelen = strlen(cn.cn_nameptr); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; cn.cn_lkflags = flags; VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); vput(dvp); } else { *vpp = dvp; } return (err); } gen_mask = -1ULL >> (64 - 8 * i); dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); if (err = zfs_zget(zfsvfs, object, &zp)) { ZFS_EXIT(zfsvfs); return (err); } (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, sizeof (uint64_t)); zp_gen = zp_gen & gen_mask; if (zp_gen == 0) zp_gen = 1; if (zp->z_unlinked || zp_gen != fid_gen) { dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); vrele(ZTOV(zp)); ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } *vpp = ZTOV(zp); ZFS_EXIT(zfsvfs); err = vn_lock(*vpp, flags); if (err == 0) vnode_create_vobject(*vpp, zp->z_size, curthread); else *vpp = NULL; return (err); } /* * Block out VOPs and close zfsvfs_t::z_os * * Note, if successful, then we return with the 'z_teardown_lock' and * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying * dataset and objset intact so that they can be atomically handed off during * a subsequent rollback or recv operation and the resume thereafter. */ int zfs_suspend_fs(zfsvfs_t *zfsvfs) { int error; if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) return (error); return (0); } /* * Rebuild SA and release VOPs. Note that ownership of the underlying dataset * is an invariant across any of the operations that can be performed while the * filesystem was suspended. Whether it succeeded or failed, the preconditions * are the same: the relevant objset and associated dataset are owned by * zfsvfs, held, and long held on entry. */ int zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) { int err; znode_t *zp; ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); /* * We already own this, so just update the objset_t, as the one we * had before may have been evicted. */ objset_t *os; VERIFY3P(ds->ds_owner, ==, zfsvfs); VERIFY(dsl_dataset_long_held(ds)); VERIFY0(dmu_objset_from_ds(ds, &os)); err = zfsvfs_init(zfsvfs, os); if (err != 0) goto bail; VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); zfs_set_fuid_feature(zfsvfs); /* * Attempt to re-establish all the active znodes with * their dbufs. If a zfs_rezget() fails, then we'll let * any potential callers discover that via ZFS_ENTER_VERIFY_VP * when they try to use their znode. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = list_next(&zfsvfs->z_all_znodes, zp)) { (void) zfs_rezget(zp); } mutex_exit(&zfsvfs->z_znodes_lock); bail: /* release the VOPs */ rw_exit(&zfsvfs->z_teardown_inactive_lock); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't setup the sa framework, try to force * unmount this file system. */ if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { vfs_ref(zfsvfs->z_vfs); (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); } } return (err); } static void zfs_freevfs(vfs_t *vfsp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; #ifdef illumos /* * If this is a snapshot, we have an extra VFS_HOLD on our parent * from zfs_mount(). Release it here. If we came through * zfs_mountroot() instead, we didn't grab an extra hold, so * skip the VFS_RELE for rootvfs. */ if (zfsvfs->z_issnap && (vfsp != rootvfs)) VFS_RELE(zfsvfs->z_parent->z_vfs); #endif zfsvfs_free(zfsvfs); atomic_dec_32(&zfs_active_fs_count); } #ifdef __i386__ static int desiredvnodes_backup; #endif static void zfs_vnodes_adjust(void) { #ifdef __i386__ int newdesiredvnodes; desiredvnodes_backup = desiredvnodes; /* * We calculate newdesiredvnodes the same way it is done in * vntblinit(). If it is equal to desiredvnodes, it means that * it wasn't tuned by the administrator and we can tune it down. */ newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * vm_kmem_size / (5 * (sizeof(struct vm_object) + sizeof(struct vnode)))); if (newdesiredvnodes == desiredvnodes) desiredvnodes = (3 * newdesiredvnodes) / 4; #endif } static void zfs_vnodes_adjust_back(void) { #ifdef __i386__ desiredvnodes = desiredvnodes_backup; #endif } void zfs_init(void) { printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); /* * Initialize .zfs directory structures */ zfsctl_init(); /* * Initialize znode cache, vnode ops, etc... */ zfs_znode_init(); /* * Reduce number of vnodes. Originally number of vnodes is calculated * with UFS inode in mind. We reduce it here, because it's too big for * ZFS/i386. */ zfs_vnodes_adjust(); dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); #if defined(__FreeBSD__) zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); #endif } void zfs_fini(void) { #if defined(__FreeBSD__) taskq_destroy(zfsvfs_taskq); #endif zfsctl_fini(); zfs_znode_fini(); zfs_vnodes_adjust_back(); } int zfs_busy(void) { return (zfs_active_fs_count != 0); } int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) { int error; objset_t *os = zfsvfs->z_os; dmu_tx_t *tx; if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (SET_ERROR(EINVAL)); if (newvers < zfsvfs->z_version) return (SET_ERROR(EINVAL)); if (zfs_spa_version_map(newvers) > spa_version(dmu_objset_spa(zfsvfs->z_os))) return (SET_ERROR(ENOTSUP)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, ZFS_SA_ATTRS); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); if (error) { dmu_tx_commit(tx); return (error); } if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { uint64_t sa_obj; ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, SPA_VERSION_SA); sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); VERIFY(0 == sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); } spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, "from %llu to %llu", zfsvfs->z_version, newvers); dmu_tx_commit(tx); zfsvfs->z_version = newvers; zfs_set_fuid_feature(zfsvfs); return (0); } /* * Read a property stored within the master node. */ int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) { const char *pname; int error = ENOENT; /* * Look up the file system's value for the property. For the * version property, we look up a slightly different string. */ if (prop == ZFS_PROP_VERSION) pname = ZPL_VERSION_STR; else pname = zfs_prop_to_name(prop); if (os != NULL) { ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); } if (error == ENOENT) { /* No value set, use the default value */ switch (prop) { case ZFS_PROP_VERSION: *value = ZPL_VERSION; break; case ZFS_PROP_NORMALIZE: case ZFS_PROP_UTF8ONLY: *value = 0; break; case ZFS_PROP_CASE: *value = ZFS_CASE_SENSITIVE; break; default: return (error); } error = 0; } return (error); } /* * Return true if the coresponding vfs's unmounted flag is set. * Otherwise return false. * If this function returns true we know VFS unmount has been initiated. */ boolean_t zfs_get_vfs_flag_unmounted(objset_t *os) { zfsvfs_t *zfvp; boolean_t unmounted = B_FALSE; ASSERT(dmu_objset_type(os) == DMU_OST_ZFS); mutex_enter(&os->os_user_ptr_lock); zfvp = dmu_objset_get_user(os); if (zfvp != NULL && zfvp->z_vfs != NULL && (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) unmounted = B_TRUE; mutex_exit(&os->os_user_ptr_lock); return (unmounted); } #ifdef _KERNEL void zfsvfs_update_fromname(const char *oldname, const char *newname) { char tmpbuf[MAXPATHLEN]; struct mount *mp; char *fromname; size_t oldlen; oldlen = strlen(oldname); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { fromname = mp->mnt_stat.f_mntfromname; if (strcmp(fromname, oldname) == 0) { (void)strlcpy(fromname, newname, sizeof(mp->mnt_stat.f_mntfromname)); continue; } if (strncmp(fromname, oldname, oldlen) == 0 && (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s", newname, fromname + oldlen); (void)strlcpy(fromname, tmpbuf, sizeof(mp->mnt_stat.f_mntfromname)); continue; } } mtx_unlock(&mountlist_mtx); } #endif Index: stable/11/usr.bin/quota/quota.c =================================================================== --- stable/11/usr.bin/quota/quota.c (revision 339007) +++ stable/11/usr.bin/quota/quota.c (revision 339008) @@ -1,687 +1,697 @@ /* * Copyright (c) 1980, 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Elz at The University of Melbourne. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1990, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif #ifndef lint static const char sccsid[] = "from: @(#)quota.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ /* * Disk quota reporting program. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const char *qfextension[] = INITQFNAMES; struct quotause { struct quotause *next; long flags; struct dqblk dqblk; char fsname[MAXPATHLEN + 1]; }; static char *timeprt(int64_t seconds); static struct quotause *getprivs(long id, int quotatype); static void usage(void); static int showuid(u_long uid); static int showgid(u_long gid); static int showusrname(char *name); static int showgrpname(char *name); static int showquotas(int type, u_long id, const char *name); static void showrawquotas(int type, u_long id, struct quotause *qup); static void heading(int type, u_long id, const char *name, const char *tag); static int getufsquota(struct fstab *fs, struct quotause *qup, long id, int quotatype); static int getnfsquota(struct statfs *fst, struct quotause *qup, long id, int quotatype); -static int callaurpc(char *host, int prognum, int versnum, int procnum, +static enum clnt_stat callaurpc(char *host, int prognum, int versnum, int procnum, xdrproc_t inproc, char *in, xdrproc_t outproc, char *out); static int alldigits(char *s); static int hflag; static int lflag; static int rflag; static int qflag; static int vflag; static char *filename = NULL; int main(int argc, char *argv[]) { int ngroups; gid_t mygid, gidset[NGROUPS]; int i, ch, gflag = 0, uflag = 0, errflag = 0; while ((ch = getopt(argc, argv, "f:ghlrquv")) != -1) { switch(ch) { case 'f': filename = optarg; break; case 'g': gflag++; break; case 'h': hflag++; break; case 'l': lflag++; break; case 'q': qflag++; break; case 'r': rflag++; break; case 'u': uflag++; break; case 'v': vflag++; break; default: usage(); } } argc -= optind; argv += optind; if (!uflag && !gflag) uflag++; if (argc == 0) { if (uflag) errflag += showuid(getuid()); if (gflag) { mygid = getgid(); ngroups = getgroups(NGROUPS, gidset); if (ngroups < 0) err(1, "getgroups"); errflag += showgid(mygid); for (i = 0; i < ngroups; i++) if (gidset[i] != mygid) errflag += showgid(gidset[i]); } return(errflag); } if (uflag && gflag) usage(); if (uflag) { for (; argc > 0; argc--, argv++) { if (alldigits(*argv)) errflag += showuid(atoi(*argv)); else errflag += showusrname(*argv); } return(errflag); } if (gflag) { for (; argc > 0; argc--, argv++) { if (alldigits(*argv)) errflag += showgid(atoi(*argv)); else errflag += showgrpname(*argv); } } return(errflag); } static void usage(void) { fprintf(stderr, "%s\n%s\n%s\n", "usage: quota [-ghlu] [-f path] [-v | -q | -r]", " quota [-hlu] [-f path] [-v | -q | -r] user ...", " quota -g [-hl] [-f path] [-v | -q | -r] group ..."); exit(1); } /* * Print out quotas for a specified user identifier. */ static int showuid(u_long uid) { struct passwd *pwd = getpwuid(uid); const char *name; if (pwd == NULL) name = "(no account)"; else name = pwd->pw_name; return(showquotas(USRQUOTA, uid, name)); } /* * Print out quotas for a specifed user name. */ static int showusrname(char *name) { struct passwd *pwd = getpwnam(name); if (pwd == NULL) { warnx("%s: unknown user", name); return(1); } return(showquotas(USRQUOTA, pwd->pw_uid, name)); } /* * Print out quotas for a specified group identifier. */ static int showgid(u_long gid) { struct group *grp = getgrgid(gid); const char *name; if (grp == NULL) name = "(no entry)"; else name = grp->gr_name; return(showquotas(GRPQUOTA, gid, name)); } /* * Print out quotas for a specifed group name. */ static int showgrpname(char *name) { struct group *grp = getgrnam(name); if (grp == NULL) { warnx("%s: unknown group", name); return(1); } return(showquotas(GRPQUOTA, grp->gr_gid, name)); } static void prthumanval(int len, u_int64_t bytes) { char buf[len + 1]; /* * Limit the width to 5 bytes as that is what users expect. */ humanize_number(buf, MIN(sizeof(buf), 5), bytes, "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); (void)printf(" %*s", len, buf); } static int showquotas(int type, u_long id, const char *name) { struct quotause *qup; struct quotause *quplist; const char *msgi, *msgb; const char *nam; char *bgrace = NULL, *igrace = NULL; int lines = 0, overquota = 0; static time_t now; if (now == 0) time(&now); quplist = getprivs(id, type); for (qup = quplist; qup; qup = qup->next) { msgi = NULL; if (qup->dqblk.dqb_ihardlimit && qup->dqblk.dqb_curinodes >= qup->dqblk.dqb_ihardlimit) { overquota++; msgi = "File limit reached on"; } else if (qup->dqblk.dqb_isoftlimit && qup->dqblk.dqb_curinodes >= qup->dqblk.dqb_isoftlimit) { overquota++; if (qup->dqblk.dqb_itime > now) msgi = "In file grace period on"; else msgi = "Over file quota on"; } msgb = NULL; if (qup->dqblk.dqb_bhardlimit && qup->dqblk.dqb_curblocks >= qup->dqblk.dqb_bhardlimit) { overquota++; msgb = "Block limit reached on"; } else if (qup->dqblk.dqb_bsoftlimit && qup->dqblk.dqb_curblocks >= qup->dqblk.dqb_bsoftlimit) { overquota++; if (qup->dqblk.dqb_btime > now) msgb = "In block grace period on"; else msgb = "Over block quota on"; } if (rflag) { showrawquotas(type, id, qup); continue; } if (!vflag && qup->dqblk.dqb_isoftlimit == 0 && qup->dqblk.dqb_ihardlimit == 0 && qup->dqblk.dqb_bsoftlimit == 0 && qup->dqblk.dqb_bhardlimit == 0) continue; if (qflag) { if ((msgi != NULL || msgb != NULL) && lines++ == 0) heading(type, id, name, ""); if (msgi != NULL) printf("\t%s %s\n", msgi, qup->fsname); if (msgb != NULL) printf("\t%s %s\n", msgb, qup->fsname); continue; } if (!vflag && qup->dqblk.dqb_curblocks == 0 && qup->dqblk.dqb_curinodes == 0) continue; if (lines++ == 0) heading(type, id, name, ""); nam = qup->fsname; if (strlen(qup->fsname) > 15) { printf("%s\n", qup->fsname); nam = ""; } printf("%-15s", nam); if (hflag) { prthumanval(7, dbtob(qup->dqblk.dqb_curblocks)); printf("%c", (msgb == NULL) ? ' ' : '*'); prthumanval(7, dbtob(qup->dqblk.dqb_bsoftlimit)); prthumanval(7, dbtob(qup->dqblk.dqb_bhardlimit)); } else { printf(" %7ju%c %7ju %7ju", (uintmax_t)dbtob(qup->dqblk.dqb_curblocks) / 1024, (msgb == NULL) ? ' ' : '*', (uintmax_t)dbtob(qup->dqblk.dqb_bsoftlimit) / 1024, (uintmax_t)dbtob(qup->dqblk.dqb_bhardlimit) / 1024); } if (msgb != NULL) bgrace = timeprt(qup->dqblk.dqb_btime); if (msgi != NULL) igrace = timeprt(qup->dqblk.dqb_itime); printf("%8s %6ju%c %6ju %6ju%8s\n" , (msgb == NULL) ? "" : bgrace , (uintmax_t)qup->dqblk.dqb_curinodes , (msgi == NULL) ? ' ' : '*' , (uintmax_t)qup->dqblk.dqb_isoftlimit , (uintmax_t)qup->dqblk.dqb_ihardlimit , (msgi == NULL) ? "" : igrace ); if (msgb != NULL) free(bgrace); if (msgi != NULL) free(igrace); } if (!qflag && !rflag && lines == 0) heading(type, id, name, "none"); return (overquota); } static void showrawquotas(int type, u_long id, struct quotause *qup) { time_t t; printf("Raw %s quota information for id %lu on %s\n", type == USRQUOTA ? "user" : "group", id, qup->fsname); printf("block hard limit: %ju\n", (uintmax_t)qup->dqblk.dqb_bhardlimit); printf("block soft limit: %ju\n", (uintmax_t)qup->dqblk.dqb_bsoftlimit); printf("current block count: %ju\n", (uintmax_t)qup->dqblk.dqb_curblocks); printf("i-node hard limit: %ju\n", (uintmax_t)qup->dqblk.dqb_ihardlimit); printf("i-node soft limit: %ju\n", (uintmax_t)qup->dqblk.dqb_isoftlimit); printf("current i-node count: %ju\n", (uintmax_t)qup->dqblk.dqb_curinodes); printf("block grace time: %jd", (intmax_t)qup->dqblk.dqb_btime); if (qup->dqblk.dqb_btime != 0) { t = qup->dqblk.dqb_btime; printf(" %s", ctime(&t)); } else { printf("\n"); } printf("i-node grace time: %jd", (intmax_t)qup->dqblk.dqb_itime); if (qup->dqblk.dqb_itime != 0) { t = qup->dqblk.dqb_itime; printf(" %s", ctime(&t)); } else { printf("\n"); } } static void heading(int type, u_long id, const char *name, const char *tag) { printf("Disk quotas for %s %s (%cid %lu): %s\n", qfextension[type], name, *qfextension[type], id, tag); if (!qflag && tag[0] == '\0') { printf("%-15s %7s %8s %7s %7s %6s %7s %6s%8s\n" , "Filesystem" , "usage" , "quota" , "limit" , "grace" , "files" , "quota" , "limit" , "grace" ); } } /* * Calculate the grace period and return a printable string for it. */ static char * timeprt(int64_t seconds) { time_t hours, minutes; char *buf; static time_t now; if (now == 0) time(&now); if (now > seconds) { if ((buf = strdup("none")) == NULL) errx(1, "strdup() failed in timeprt()"); return (buf); } seconds -= now; minutes = (seconds + 30) / 60; hours = (minutes + 30) / 60; if (hours >= 36) { if (asprintf(&buf, "%lddays", ((long)hours + 12) / 24) < 0) errx(1, "asprintf() failed in timeprt(1)"); return (buf); } if (minutes >= 60) { if (asprintf(&buf, "%2ld:%ld", (long)minutes / 60, (long)minutes % 60) < 0) errx(1, "asprintf() failed in timeprt(2)"); return (buf); } if (asprintf(&buf, "%2ld", (long)minutes) < 0) errx(1, "asprintf() failed in timeprt(3)"); return (buf); } /* * Collect the requested quota information. */ static struct quotause * getprivs(long id, int quotatype) { struct quotause *qup, *quptail = NULL; struct fstab *fs; struct quotause *quphead; struct statfs *fst; int nfst, i; struct statfs sfb; qup = quphead = (struct quotause *)0; if (filename != NULL && statfs(filename, &sfb) != 0) err(1, "cannot statfs %s", filename); nfst = getmntinfo(&fst, MNT_NOWAIT); if (nfst == 0) errx(2, "no filesystems mounted!"); setfsent(); for (i = 0; i < nfst; i++) { if (qup == NULL) { if ((qup = (struct quotause *)malloc(sizeof *qup)) == NULL) errx(2, "out of memory"); } /* * See if the user requested a specific file system * or specified a file inside a mounted file system. */ if (filename != NULL && strcmp(sfb.f_mntonname, fst[i].f_mntonname) != 0) continue; if (strcmp(fst[i].f_fstypename, "nfs") == 0) { if (lflag) continue; if (getnfsquota(&fst[i], qup, id, quotatype) == 0) continue; } else if (strcmp(fst[i].f_fstypename, "ufs") == 0) { /* * XXX * UFS filesystems must be in /etc/fstab, and must * indicate that they have quotas on (?!) This is quite * unlike SunOS where quotas can be enabled/disabled * on a filesystem independent of /etc/fstab, and it * will still print quotas for them. */ if ((fs = getfsspec(fst[i].f_mntfromname)) == NULL) continue; if (getufsquota(fs, qup, id, quotatype) == 0) continue; } else continue; strcpy(qup->fsname, fst[i].f_mntonname); if (quphead == NULL) quphead = qup; else quptail->next = qup; quptail = qup; quptail->next = 0; qup = NULL; } if (qup) free(qup); endfsent(); return (quphead); } /* * Check to see if a particular quota is available. */ static int getufsquota(struct fstab *fs, struct quotause *qup, long id, int quotatype) { struct quotafile *qf; if ((qf = quota_open(fs, quotatype, O_RDONLY)) == NULL) return (0); if (quota_read(qf, &qup->dqblk, id) != 0) return (0); quota_close(qf); return (1); } static int getnfsquota(struct statfs *fst, struct quotause *qup, long id, int quotatype) { - struct getquota_args gq_args; + struct ext_getquota_args gq_args; + struct getquota_args old_gq_args; struct getquota_rslt gq_rslt; struct dqblk *dqp = &qup->dqblk; struct timeval tv; char *cp, host[NI_MAXHOST]; + enum clnt_stat call_stat; if (fst->f_flags & MNT_LOCAL) return (0); /* - * rpc.rquotad does not support group quotas - */ - if (quotatype != USRQUOTA) - return (0); - - /* * must be some form of "hostname:/path" */ cp = fst->f_mntfromname; do { cp = strrchr(cp, ':'); } while (cp != NULL && *(cp + 1) != '/'); if (cp == NULL) { warnx("cannot find hostname for %s", fst->f_mntfromname); return (0); } memset(host, 0, sizeof(host)); memcpy(host, fst->f_mntfromname, cp - fst->f_mntfromname); host[sizeof(host) - 1] = '\0'; /* Avoid attempting the RPC for special amd(8) filesystems. */ if (strncmp(fst->f_mntfromname, "pid", 3) == 0 && strchr(fst->f_mntfromname, '@') != NULL) return (0); gq_args.gqa_pathp = cp + 1; - gq_args.gqa_uid = id; - if (callaurpc(host, RQUOTAPROG, RQUOTAVERS, - RQUOTAPROC_GETQUOTA, (xdrproc_t)xdr_getquota_args, (char *)&gq_args, - (xdrproc_t)xdr_getquota_rslt, (char *)&gq_rslt) != 0) - return (0); + gq_args.gqa_id = id; + gq_args.gqa_type = quotatype; + call_stat = callaurpc(host, RQUOTAPROG, EXT_RQUOTAVERS, + RQUOTAPROC_GETQUOTA, (xdrproc_t)xdr_ext_getquota_args, (char *)&gq_args, + (xdrproc_t)xdr_getquota_rslt, (char *)&gq_rslt); + if (call_stat == RPC_PROGVERSMISMATCH) { + if (quotatype == USRQUOTA) { + old_gq_args.gqa_pathp = cp + 1; + old_gq_args.gqa_uid = id; + call_stat = callaurpc(host, RQUOTAPROG, RQUOTAVERS, + RQUOTAPROC_GETQUOTA, (xdrproc_t)xdr_getquota_args, (char *)&old_gq_args, + (xdrproc_t)xdr_getquota_rslt, (char *)&gq_rslt); + } else { + /* Old rpc quota does not support group type */ + return (0); + } + } + if (call_stat != 0) + return (call_stat); + switch (gq_rslt.status) { case Q_NOQUOTA: break; case Q_EPERM: warnx("quota permission error, host: %s", fst->f_mntfromname); break; case Q_OK: gettimeofday(&tv, NULL); /* blocks*/ dqp->dqb_bhardlimit = gq_rslt.getquota_rslt_u.gqr_rquota.rq_bhardlimit * (gq_rslt.getquota_rslt_u.gqr_rquota.rq_bsize / DEV_BSIZE); dqp->dqb_bsoftlimit = gq_rslt.getquota_rslt_u.gqr_rquota.rq_bsoftlimit * (gq_rslt.getquota_rslt_u.gqr_rquota.rq_bsize / DEV_BSIZE); dqp->dqb_curblocks = gq_rslt.getquota_rslt_u.gqr_rquota.rq_curblocks * (gq_rslt.getquota_rslt_u.gqr_rquota.rq_bsize / DEV_BSIZE); /* inodes */ dqp->dqb_ihardlimit = gq_rslt.getquota_rslt_u.gqr_rquota.rq_fhardlimit; dqp->dqb_isoftlimit = gq_rslt.getquota_rslt_u.gqr_rquota.rq_fsoftlimit; dqp->dqb_curinodes = gq_rslt.getquota_rslt_u.gqr_rquota.rq_curfiles; /* grace times */ dqp->dqb_btime = tv.tv_sec + gq_rslt.getquota_rslt_u.gqr_rquota.rq_btimeleft; dqp->dqb_itime = tv.tv_sec + gq_rslt.getquota_rslt_u.gqr_rquota.rq_ftimeleft; return (1); default: warnx("bad rpc result, host: %s", fst->f_mntfromname); break; } return (0); } -static int +static enum clnt_stat callaurpc(char *host, int prognum, int versnum, int procnum, xdrproc_t inproc, char *in, xdrproc_t outproc, char *out) { enum clnt_stat clnt_stat; struct timeval timeout, tottimeout; CLIENT *client = NULL; client = clnt_create(host, prognum, versnum, "udp"); if (client == NULL) return ((int)rpc_createerr.cf_stat); timeout.tv_usec = 0; timeout.tv_sec = 6; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)(void *)&timeout); client->cl_auth = authunix_create_default(); tottimeout.tv_sec = 25; tottimeout.tv_usec = 0; clnt_stat = clnt_call(client, procnum, inproc, in, outproc, out, tottimeout); - - return ((int) clnt_stat); + return (clnt_stat); } static int alldigits(char *s) { int c; c = *s++; do { if (!isdigit(c)) return (0); } while ((c = *s++)); return (1); } Index: stable/11 =================================================================== --- stable/11 (revision 339007) +++ stable/11 (revision 339008) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r336017,338799