diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -3259,8 +3259,16 @@ { int error; - error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, - &exp->nes_numsecflavor, exp->nes_secflavors); + error = 0; + *credp = NULL; + MNT_ILOCK(mp); + if (mp->mnt_exjail == NULL || + mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) + error = EACCES; + MNT_IUNLOCK(mp); + if (error == 0) + error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, + &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (NFSD_VNET(nfs_rootfhset)) { exp->nes_exflag = 0; @@ -3294,8 +3302,14 @@ /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { - error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, - &exp->nes_numsecflavor, exp->nes_secflavors); + MNT_ILOCK(mp); + if (mp->mnt_exjail == NULL || + mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) + error = EACCES; + MNT_IUNLOCK(mp); + if (error == 0) + error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, + &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (NFSD_VNET(nfs_rootfhset)) { exp->nes_exflag = 0; @@ -3465,7 +3479,7 @@ struct nameidata nd; fhandle_t fh; - error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export); + error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) NFSD_VNET(nfs_rootfhset) = 0; else if (error == 0) { diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -3332,6 +3332,7 @@ { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_NOTOWNED); + vfs_exjail_delete(pr); shm_remove_prison(pr); (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); } diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -296,12 +297,18 @@ * and the passed in netexport. * Struct export_args *argp is the variable used to twiddle options, * the structure is described in sys/mount.h + * The do_exjail argument should be true if *mp is in the mountlist + * and false if not. It is not in the mountlist for the NFSv4 rootfs + * fake mount point just used for exports. */ int -vfs_export(struct mount *mp, struct export_args *argp) +vfs_export(struct mount *mp, struct export_args *argp, bool do_exjail) { struct netexport *nep; + struct ucred *cr; + struct prison *pr; int error; + bool new_nep; if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0) return (EINVAL); @@ -312,6 +319,7 @@ return (EINVAL); error = 0; + pr = curthread->td_ucred->cr_prison; lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL); nep = mp->mnt_export; if (argp->ex_flags & MNT_DELEXPORT) { @@ -319,6 +327,21 @@ error = ENOENT; goto out; } + MNT_ILOCK(mp); + if (mp->mnt_exjail != NULL && mp->mnt_exjail->cr_prison != pr && + pr == &prison0) { + MNT_IUNLOCK(mp); + /* EXDEV will not get logged by mountd(8). */ + error = EXDEV; + goto out; + } else if (mp->mnt_exjail != NULL && + mp->mnt_exjail->cr_prison != pr) { + MNT_IUNLOCK(mp); + /* EPERM will get logged by mountd(8). */ + error = EPERM; + goto out; + } + MNT_IUNLOCK(mp); if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); MNT_ILOCK(mp); @@ -330,18 +353,51 @@ free(nep, M_MOUNT); nep = NULL; MNT_ILOCK(mp); + cr = mp->mnt_exjail; + mp->mnt_exjail = NULL; mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); MNT_IUNLOCK(mp); + if (cr != NULL) { + atomic_subtract_int(&pr->pr_exportcnt, 1); + crfree(cr); + } } if (argp->ex_flags & MNT_EXPORTED) { + new_nep = false; + MNT_ILOCK(mp); + if (mp->mnt_exjail == NULL) { + MNT_IUNLOCK(mp); + if (do_exjail && nep != NULL) { + vfs_free_addrlist(nep); + memset(nep, 0, sizeof(*nep)); + new_nep = true; + } + } else if (mp->mnt_exjail->cr_prison != pr) { + MNT_IUNLOCK(mp); + error = EPERM; + goto out; + } else + MNT_IUNLOCK(mp); if (nep == NULL) { - nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO); + nep = malloc(sizeof(struct netexport), M_MOUNT, + M_WAITOK | M_ZERO); mp->mnt_export = nep; + new_nep = true; } if (argp->ex_flags & MNT_EXPUBLIC) { - if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) + if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) { + if (new_nep) { + mp->mnt_export = NULL; + free(nep, M_MOUNT); + } goto out; + } + new_nep = false; MNT_ILOCK(mp); + if (do_exjail && mp->mnt_exjail == NULL) { + mp->mnt_exjail = crhold(curthread->td_ucred); + atomic_add_int(&pr->pr_exportcnt, 1); + } mp->mnt_flag |= MNT_EXPUBLIC; MNT_IUNLOCK(mp); } @@ -349,9 +405,18 @@ argp->ex_numsecflavors = 1; argp->ex_secflavors[0] = AUTH_SYS; } - if ((error = vfs_hang_addrlist(mp, nep, argp))) + if ((error = vfs_hang_addrlist(mp, nep, argp))) { + if (new_nep) { + mp->mnt_export = NULL; + free(nep, M_MOUNT); + } goto out; + } MNT_ILOCK(mp); + if (do_exjail && mp->mnt_exjail == NULL) { + mp->mnt_exjail = crhold(curthread->td_ucred); + atomic_add_int(&pr->pr_exportcnt, 1); + } mp->mnt_flag |= MNT_EXPORTED; MNT_IUNLOCK(mp); } @@ -371,6 +436,97 @@ return (error); } +/* + * Get rid of credential references for this prison. + */ +void +vfs_exjail_delete(struct prison *pr) +{ + struct mount *mp; + struct ucred *cr; + int error, i; + + /* + * Since this function is called from prison_cleanup() after + * all processes in the prison have exited, the value of + * pr_exportcnt can no longer increase. It is possible for + * a dismount of a file system exported within this prison + * to be in progress. In this case, the file system is no + * longer in the mountlist and the mnt_exjail will be free'd + * by vfs_mount_destroy() at some time. As such, pr_exportcnt + * and, therefore "i", is the upper bound on the number of + * mnt_exjail entries to be found by this function. + */ + i = atomic_load_int(&pr->pr_exportcnt); + KASSERT(i >= 0, ("vfs_exjail_delete: pr_exportcnt negative")); + if (i == 0) + return; + mtx_lock(&mountlist_mtx); +tryagain: + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + MNT_ILOCK(mp); + if (mp->mnt_exjail != NULL && + mp->mnt_exjail->cr_prison == pr) { + MNT_IUNLOCK(mp); + error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT); + if (error != 0) { + /* + * If the vfs_busy() fails, we still want to + * get rid of mnt_exjail for two reasons: + * - a credential reference will result in + * a prison not being removed + * - setting mnt_exjail NULL indicates that + * the exports are no longer valid + * The now invalid exports will be deleted + * when the file system is dismounted or + * the file system is re-exported by mountd. + */ + cr = NULL; + MNT_ILOCK(mp); + if (mp->mnt_exjail != NULL && + mp->mnt_exjail->cr_prison == pr) { + cr = mp->mnt_exjail; + mp->mnt_exjail = NULL; + } + MNT_IUNLOCK(mp); + if (cr != NULL) { + crfree(cr); + i--; + } + if (i == 0) + break; + continue; + } + cr = NULL; + lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL); + MNT_ILOCK(mp); + if (mp->mnt_exjail != NULL && + mp->mnt_exjail->cr_prison == pr) { + cr = mp->mnt_exjail; + mp->mnt_exjail = NULL; + mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); + MNT_IUNLOCK(mp); + vfs_free_addrlist(mp->mnt_export); + free(mp->mnt_export, M_MOUNT); + mp->mnt_export = NULL; + } else + MNT_IUNLOCK(mp); + lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); + if (cr != NULL) { + crfree(cr); + i--; + } + mtx_lock(&mountlist_mtx); + vfs_unbusy(mp); + if (i == 0) + break; + goto tryagain; + } + MNT_IUNLOCK(mp); + } + mtx_unlock(&mountlist_mtx); +} + /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -761,6 +761,11 @@ #endif if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); + if (mp->mnt_exjail != NULL) { + atomic_subtract_int(&mp->mnt_exjail->cr_prison->pr_exportcnt, + 1); + crfree(mp->mnt_exjail); + } if (mp->mnt_export != NULL) { vfs_free_addrlist(mp->mnt_export); free(mp->mnt_export, M_MOUNT); @@ -1395,7 +1400,7 @@ } else export_error = EINVAL; if (export_error == 0) - export_error = vfs_export(mp, &export); + export_error = vfs_export(mp, &export, true); free(export.ex_groups, M_TEMP); break; case (sizeof(export)): @@ -1417,7 +1422,7 @@ else export_error = EINVAL; if (export_error == 0) - export_error = vfs_export(mp, &export); + export_error = vfs_export(mp, &export, true); free(grps, M_TEMP); break; default: diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -196,7 +196,8 @@ int pr_enforce_statfs; /* (p) statfs permission */ int pr_devfs_rsnum; /* (p) devfs ruleset */ enum prison_state pr_state; /* (q) state in life cycle */ - int pr_spare[2]; + volatile int pr_exportcnt; /* (r) count of mount exports */ + int pr_spare; int pr_osreldate; /* (c) kern.osreldate value */ unsigned long pr_hostid; /* (p) jail hostid */ char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -216,6 +216,7 @@ * i - interlock * v - vnode freelist mutex * d - deferred unmount list mutex + * e - mnt_explock * * Unmarked fields are considered stable as long as a ref is held. * @@ -245,13 +246,14 @@ void * mnt_data; /* private data */ time_t mnt_time; /* last time written*/ int mnt_iosize_max; /* max size for clusters, etc */ - struct netexport *mnt_export; /* export list */ + struct netexport *mnt_export; /* (e) export list */ struct label *mnt_label; /* MAC label for the fs */ u_int mnt_hashseed; /* Random seed for vfs_hash */ int mnt_lockref; /* (i) Lock reference count */ int mnt_secondary_writes; /* (i) # of secondary writes */ int mnt_secondary_accwrites;/* (i) secondary wr. starts */ struct thread *mnt_susp_owner; /* (i) thread owning suspension */ + struct ucred *mnt_exjail; /* (i) jail which did exports */ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; @@ -1015,8 +1017,9 @@ (struct mount *, struct netexport *, struct export_args *); void vfs_periodic(struct mount *, int); int vfs_busy(struct mount *, int); +void vfs_exjail_delete(struct prison *); int vfs_export /* process mount export info */ - (struct mount *, struct export_args *); + (struct mount *, struct export_args *, bool); void vfs_free_addrlist(struct netexport *); void vfs_allocate_syncvnode(struct mount *); void vfs_deallocate_syncvnode(struct mount *);