Index: lib/libc/sys/fcntl.2 =================================================================== --- lib/libc/sys/fcntl.2 +++ lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd Nov 15, 2018 +.Dd September 4, 2019 .Dt FCNTL 2 .Os .Sh NAME @@ -180,6 +180,11 @@ A zero value in .Fa arg turns off read ahead. +.It Dv F_ADD_SEALS +Add seals to the file as described below, if the underlying filesystem supports +seals. +.It Dv F_GET_SEALS +Get seals associated with the file, if the underlying filesystem supports seals. .El .Pp The flags for the @@ -217,6 +222,37 @@ upon availability of data to be read. .El .Pp +The seals that may be applied with +.Dv F_ADD_SEALS +are as follows: +.Bl -tag -width F_SEAL_SHRINK +.It Dv F_SEAL_SEAL +Prevent any further seals from being applied to the file. +.It Dv F_SEAL_SHRINK +Prevent the file from being shrunk with +.Xr ftruncate 2 . +.It Dv F_SEAL_GROW +Prevent the file from being enlarged with +.Xr ftruncate 2 . +.It Dv F_SEAL_WRITE +Prevent any further +.Xr write 2 +calls to the file. +Any writes in progress will finish before +.Fn fcntl +returns. +If any writeable mappings exist, F_ADD_SEALS will fail and return +.Dv EBUSY . +.El +.Pp +Seals are on a per-inode basis and require support by the underlying filesystem. +If the underlying filesystem does not support seals, +.Dv F_ADD_SEALS +and +.Dv F_GET_SEALS +will fail and return +.Dv EINVAL . +.Pp Several commands are available for doing advisory file locking; they all operate on the following structure: .Bd -literal @@ -528,6 +564,14 @@ and .Fa fd is not a valid file descriptor open for writing. +.It Bq Er EBUSY +The argument +.Fa cmd +is +.Dv F_ADD_SEALS , +attempting to set +.Dv F_SEAL_WRITE , +and writeable mappings to the file exist. .It Bq Er EDEADLK The argument .Fa cmd @@ -565,6 +609,14 @@ .Pp The argument .Fa cmd +is +.Dv F_ADD_SEALS +or +.Dv F_GET_SEALS , +and the underlying filesystem does not support sealing. +.Pp +The argument +.Fa cmd is invalid. .It Bq Er EMFILE The argument @@ -624,6 +676,15 @@ and the process ID or process group given as an argument is in a different session than the caller. +.Pp +The +.Fa cmd +argument +is +.Dv F_ADD_SEALS +and the +.Dv F_SEAL_SEAL +seal has already been set. .It Bq Er ESRCH The .Fa cmd Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -489,7 +489,7 @@ struct filedescent *fde; struct proc *p; struct vnode *vp; - int error, flg, tmp; + int error, flg, seals, tmp; uint64_t bsize; off_t foffset; @@ -756,6 +756,25 @@ fdrop(fp, td); break; + case F_ADD_SEALS: + error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL); + if (error != 0) + break; + error = fo_add_seals(fp, arg); + fdrop(fp, td); + break; + + case F_GET_SEALS: + error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL); + if (error != 0) + break; + if (fo_get_seals(fp, &seals) == 0) + td->td_retval[0] = seals; + else + error = EINVAL; + fdrop(fp, td); + break; + case F_RDAHEAD: arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -115,6 +115,16 @@ #define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) +#define SHM_SEAL_XLOCK(shmfd) sx_xlock(&(shmfd)->shm_seal_sx) +#define SHM_SEAL_XUNLOCK(shmfd) sx_xunlock(&(shmfd)->shm_seal_sx) +#define SHM_SEAL_SLOCK(shmfd) sx_slock(&(shmfd)->shm_seal_sx) +#define SHM_SEAL_SUNLOCK(shmfd) sx_sunlock(&(shmfd)->shm_seal_sx) +#define SHM_SEAL_UNLOCK(shmfd) sx_unlock(&(shmfd)->shm_seal_sx) +#define SHM_SEAL_DOWNGRADE(shmfd) sx_downgrade(&(shmfd)->shm_seal_sx) + +#define SHM_SEAL_ASSERT_LOCKED(shmfd) \ + sx_assert(&(shmfd)->shm_seal_sx, SA_LOCKED) + static void shm_init(void *arg); static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); @@ -131,6 +141,8 @@ static fo_seek_t shm_seek; static fo_fill_kinfo_t shm_fill_kinfo; static fo_mmap_t shm_mmap; +static fo_get_seals_t shm_get_seals; +static fo_add_seals_t shm_add_seals; /* File descriptor operations. */ struct fileops shm_ops = { @@ -148,6 +160,8 @@ .fo_seek = shm_seek, .fo_fill_kinfo = shm_fill_kinfo, .fo_mmap = shm_mmap, + .fo_get_seals = shm_get_seals, + .fo_add_seals = shm_add_seals, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -323,6 +337,11 @@ if (error) return (error); #endif + SHM_SEAL_SLOCK(shmfd); + if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { + SHM_SEAL_SUNLOCK(shmfd); + return (EPERM); + } foffset_lock_uio(fp, uio, flags); if ((flags & FOF_OFFSET) == 0) { rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, @@ -335,6 +354,7 @@ error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio); rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); foffset_unlock_uio(fp, uio, flags); + SHM_SEAL_SUNLOCK(shmfd); return (error); } @@ -427,8 +447,8 @@ return (0); } -int -shm_dotruncate(struct shmfd *shmfd, off_t length) +static int +shm_dotruncate_locked(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m; @@ -437,6 +457,8 @@ int base, rv; KASSERT(length >= 0, ("shm_dotruncate: length < 0")); + SHM_SEAL_ASSERT_LOCKED(shmfd); + object = shmfd->shm_object; VM_OBJECT_WLOCK(object); if (length == shmfd->shm_size) { @@ -447,6 +469,11 @@ /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { + if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) { + VM_OBJECT_WUNLOCK(object); + return (EPERM); + } + /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. @@ -517,6 +544,11 @@ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { + if ((shmfd->shm_seals & F_SEAL_GROW) != 0) { + VM_OBJECT_WUNLOCK(object); + return (EPERM); + } + /* Try to reserve additional swap space. */ delta = IDX_TO_OFF(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { @@ -535,6 +567,17 @@ return (0); } +int +shm_dotruncate(struct shmfd *shmfd, off_t length) +{ + int rv; + + SHM_SEAL_SLOCK(shmfd); + rv = shm_dotruncate_locked(shmfd, length); + SHM_SEAL_SUNLOCK(shmfd); + return (rv); +} + /* * shmfd object management including creation and reference counting * routines. @@ -563,6 +606,7 @@ shmfd->shm_ino = alloc_unr64(&shm_ino_unr); refcount_init(&shmfd->shm_refs, 1); mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); + sx_init(&shmfd->shm_seal_sx, "shmseal"); rangelock_init(&shmfd->shm_rl); #ifdef MAC mac_posixshm_init(shmfd); @@ -589,6 +633,7 @@ mac_posixshm_destroy(shmfd); #endif rangelock_destroy(&shmfd->shm_rl); + sx_destroy(&shmfd->shm_seal_sx); mtx_destroy(&shmfd->shm_mtx); vm_object_deallocate(shmfd->shm_object); free(shmfd, M_SHMFD); @@ -900,6 +945,8 @@ shmfd = fp->f_data; maxprot = VM_PROT_NONE; + SHM_SEAL_SLOCK(shmfd); + /* FREAD should always be set. */ if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; @@ -908,9 +955,16 @@ writecnt = (flags & MAP_SHARED) != 0 && (prot & VM_PROT_WRITE) != 0; + if (writecnt && (shmfd->shm_seals & F_SEAL_WRITE) != 0) { + SHM_SEAL_SUNLOCK(shmfd); + return (EPERM); + } + /* Don't permit shared writable mappings on read-only descriptors. */ - if (writecnt && (maxprot & VM_PROT_WRITE) == 0) + if (writecnt && (maxprot & VM_PROT_WRITE) == 0) { + SHM_SEAL_SUNLOCK(shmfd); return (EACCES); + } maxprot &= cap_maxprot; /* See comment in vn_mmap(). */ @@ -918,13 +972,17 @@ #ifdef _LP64 objsize > OFF_MAX || #endif - foff < 0 || foff > OFF_MAX - objsize) + foff < 0 || foff > OFF_MAX - objsize) { + SHM_SEAL_SUNLOCK(shmfd); return (EINVAL); + } #ifdef MAC error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags); - if (error != 0) + if (error != 0) { + SHM_SEAL_SUNLOCK(shmfd); return (error); + } #endif mtx_lock(&shm_timestamp_lock); @@ -942,6 +1000,7 @@ objsize); vm_object_deallocate(shmfd->shm_object); } + SHM_SEAL_SUNLOCK(shmfd); return (error); } @@ -1149,6 +1208,52 @@ return (res); } +static int +shm_add_seals(struct file *fp, int seals) +{ + struct shmfd *shmfd; + vm_ooffset_t writemappings; + int nseals; + + shmfd = fp->f_data; + SHM_SEAL_XLOCK(shmfd); + /* Even already-set seals should result in EPERM. */ + if ((shmfd->shm_seals & F_SEAL_SEAL) != 0) { + SHM_SEAL_XUNLOCK(shmfd); + return (EPERM); + } + nseals = seals & ~shmfd->shm_seals; + if ((nseals & F_SEAL_WRITE) != 0) { + /* + * RLOCK because we're already protected from a writable mapping + * being added during this process by the shm_seals lock. + */ + VM_OBJECT_RLOCK(shmfd->shm_object); + writemappings = shmfd->shm_object->un_pager.swp.writemappings; + VM_OBJECT_RUNLOCK(shmfd->shm_object); + /* kmappings are also writable */ + if (writemappings > 0) { + SHM_SEAL_XUNLOCK(shmfd); + return (EBUSY); + } + } + shmfd->shm_seals |= nseals; + SHM_SEAL_XUNLOCK(shmfd); + return (0); +} + +static int +shm_get_seals(struct file *fp, int *seals) +{ + struct shmfd *shmfd; + + shmfd = fp->f_data; + SHM_SEAL_SLOCK(shmfd); + *seals = shmfd->shm_seals; + SHM_SEAL_SUNLOCK(shmfd); + return (0); +} + static int sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS) { Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -248,7 +248,15 @@ #endif #if __BSD_VISIBLE #define F_DUP2FD_CLOEXEC 18 /* Like F_DUP2FD, but FD_CLOEXEC is set */ -#endif +#define F_ADD_SEALS 19 +#define F_GET_SEALS 20 + +/* Seals (F_ADD_SEALS, F_GET_SEALS). */ +#define F_SEAL_SEAL 0x0001 /* Prevent adding sealings */ +#define F_SEAL_SHRINK 0x0002 /* May not shrink */ +#define F_SEAL_GROW 0x0004 /* May not grow */ +#define F_SEAL_WRITE 0x0008 /* May not write */ +#endif /* __BSD_VISIBLE */ /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -123,6 +123,8 @@ vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td); typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job); +typedef int fo_add_seals_t(struct file *fp, int flags); +typedef int fo_get_seals_t(struct file *fp, int *flags); typedef int fo_flags_t; struct fileops { @@ -141,6 +143,8 @@ fo_fill_kinfo_t *fo_fill_kinfo; fo_mmap_t *fo_mmap; fo_aio_queue_t *fo_aio_queue; + fo_add_seals_t *fo_add_seals; + fo_get_seals_t *fo_get_seals; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -426,6 +430,24 @@ return ((*fp->f_ops->fo_aio_queue)(fp, job)); } +static __inline int +fo_add_seals(struct file *fp, int seals) +{ + + if (fp->f_ops->fo_add_seals == NULL) + return (EINVAL); + return ((*fp->f_ops->fo_add_seals)(fp, seals)); +} + +static __inline int +fo_get_seals(struct file *fp, int *seals) +{ + + if (fp->f_ops->fo_get_seals == NULL) + return (EINVAL); + return ((*fp->f_ops->fo_get_seals)(fp, seals)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/mman.h =================================================================== --- sys/sys/mman.h +++ sys/sys/mman.h @@ -210,6 +210,7 @@ #include #include #include +#include #include struct file; @@ -238,6 +239,9 @@ struct rangelock shm_rl; struct mtx shm_mtx; + + int shm_seals; + struct sx shm_seal_sx; }; #endif