Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -131,6 +131,8 @@ void *rl_cookie); static int shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out); +static int shm_deallocate(struct shmfd *shmfd, off_t *offset, + off_t *length, int flags, void *rl_cookie, struct thread *td); static fo_rdwr_t shm_read; static fo_rdwr_t shm_write; @@ -146,6 +148,7 @@ static fo_get_seals_t shm_get_seals; static fo_add_seals_t shm_add_seals; static fo_fallocate_t shm_fallocate; +static fo_fspacectl_t shm_fspacectl; /* File descriptor operations. */ struct fileops shm_ops = { @@ -166,6 +169,7 @@ .fo_get_seals = shm_get_seals, .fo_add_seals = shm_add_seals, .fo_fallocate = shm_fallocate, + .fo_fspacectl = shm_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, }; @@ -626,14 +630,67 @@ return (error); } +/* + * Partial page invalidation. This must be called with vm object write lock + * held. + */ +static int +shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, + int end) +{ + vm_page_t m; + int rv; + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(base >= 0, ("%s: base %d", __func__, base)); + KASSERT(end <= PAGE_SIZE, ("%s: end %d", __func__, end)); + +retry: + m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); + if (m != NULL) { + MPASS(vm_page_all_valid(m)); + } else if (vm_pager_has_page(object, idx, NULL, NULL)) { + m = vm_page_alloc(object, idx, + VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); + if (m == NULL) + goto retry; + vm_object_pip_add(object, 1); + VM_OBJECT_WUNLOCK(object); + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeup(object); + if (rv == VM_PAGER_OK) { + /* + * Since the page was not resident, and therefore not + * recently accessed, immediately enqueue it for + * asynchronous laundering. The current operation is + * not regarded as an access. + */ + vm_page_launder(m); + } else { + vm_page_free(m); + VM_OBJECT_WUNLOCK(object); + return (EIO); + } + } + if (m != NULL) { + pmap_zero_page_area(m, base, end - base); + KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid", + __func__, m)); + vm_page_set_dirty(m); + vm_page_xunbusy(m); + } + + return (0); +} + static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie) { vm_object_t object; - vm_page_t m; - vm_pindex_t idx, nobjsize; + vm_pindex_t nobjsize; vm_ooffset_t delta; - int base, rv; + int base, error; KASSERT(length >= 0, ("shm_dotruncate: length < 0")); object = shmfd->shm_object; @@ -660,45 +717,10 @@ */ base = length & PAGE_MASK; if (base != 0) { - idx = OFF_TO_IDX(length); -retry: - m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); - if (m != NULL) { - MPASS(vm_page_all_valid(m)); - } else if (vm_pager_has_page(object, idx, NULL, NULL)) { - m = vm_page_alloc(object, idx, - VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); - if (m == NULL) - goto retry; - vm_object_pip_add(object, 1); - VM_OBJECT_WUNLOCK(object); - rv = vm_pager_get_pages(object, &m, 1, NULL, - NULL); - VM_OBJECT_WLOCK(object); - vm_object_pip_wakeup(object); - if (rv == VM_PAGER_OK) { - /* - * Since the page was not resident, - * and therefore not recently - * accessed, immediately enqueue it - * for asynchronous laundering. The - * current operation is not regarded - * as an access. - */ - vm_page_launder(m); - } else { - vm_page_free(m); - VM_OBJECT_WUNLOCK(object); - return (EIO); - } - } - if (m != NULL) { - pmap_zero_page_area(m, base, PAGE_SIZE - base); - KASSERT(vm_page_all_valid(m), - ("shm_dotruncate: page %p is invalid", m)); - vm_page_set_dirty(m); - vm_page_xunbusy(m); - } + error = shm_partial_page_invalidate(object, + OFF_TO_IDX(length), base, PAGE_SIZE); + if (error) + return (error); } delta = IDX_TO_OFF(object->size - nobjsize); @@ -1874,6 +1896,101 @@ return (0); } +static int +shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags, + void *rl_cookie, struct thread *td) +{ + vm_object_t object; + vm_pindex_t pistart, pi, piend; + off_t off, len; + int startofs, endofs, end; + int error; + + object = shmfd->shm_object; + off = *offset; + len = *length; + startofs = off & PAGE_MASK; + endofs = (off + len) & PAGE_MASK; + pistart = OFF_TO_IDX(off); + piend = OFF_TO_IDX(off + len); + pi = (off > OFF_MAX - PAGE_MASK) ? piend : + OFF_TO_IDX(off + PAGE_MASK); + error = 0; + + VM_OBJECT_WLOCK(object); + + if (startofs != 0) { + end = pistart != piend ? PAGE_SIZE : endofs; + error = shm_partial_page_invalidate(object, pistart, startofs, + end); + if (error) + goto out; + off += end - startofs; + len -= end - startofs; + } + + if (pi < piend) { + vm_object_page_remove(object, pi, piend, 0); + off += IDX_TO_OFF(piend - pi); + len -= IDX_TO_OFF(piend - pi); + } + + if (endofs != 0 && pistart != piend) { + error = shm_partial_page_invalidate(object, piend, 0, endofs); + if (error) + goto out; + off += endofs; + len -= endofs; + } + +out: + VM_OBJECT_WUNLOCK(shmfd->shm_object); + *offset = off; + *length = len; + return (error); +} + +static int +shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, + struct ucred *active_cred, struct thread *td) +{ + void *rl_cookie; + struct shmfd *shmfd; + off_t off, len; + int error; + + /* This assumes that the caller already checked for overflow. */ + error = 0; + shmfd = fp->f_data; + off = *offset; + len = *length; + + if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 || + len > OFF_MAX - off || flags != 0) + return (EINVAL); + + rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len, + &shmfd->shm_mtx); + switch (cmd) { + case SPACECTL_DEALLOC: + if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { + error = EPERM; + break; + } + error = shm_deallocate(shmfd, &off, &len, flags, rl_cookie, td); + if (error != 0) + break; + *offset = off; + *length = len; + break; + default: + panic("%s: unknown cmd %d", __func__, cmd); + } + rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); + return (error); +} + + static int shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) { Index: tests/sys/posixshm/posixshm_test.c =================================================================== --- tests/sys/posixshm/posixshm_test.c +++ tests/sys/posixshm/posixshm_test.c @@ -1,6 +1,9 @@ /*- * Copyright (c) 2006 Robert N. M. Watson - * All rights reserved. + * Copyright (c) 2021 The FreeBSD Foundation + * + * Portions of this software were developed by Ka Ho Ng + * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -173,6 +176,126 @@ close(fd); } +static off_t shm_max_pages = 32; +static const char byte_to_fill = 0x5f; + +static int +shm_fill(int fd, off_t offset, off_t len) +{ + int error; + size_t blen; + char *buf; + error = 0; + buf = malloc(PAGE_SIZE); + if (buf == NULL) + return (1); + + while (len > 0) { + blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE; + memset(buf, byte_to_fill, blen); + if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + len -= blen; + offset += blen; + } + + free(buf); + return (error); +} + +static int +check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz) +{ + int error; + size_t blen; + off_t offset, resid; + struct stat statbuf; + char *buf, *sblk; + + error = 0; + buf = malloc(PAGE_SIZE * 2); + if (buf == NULL) + return (1); + sblk = buf + PAGE_SIZE; + + memset(sblk, 0, PAGE_SIZE); + + if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz) + hole_len = shm_sz - hole_start; + + /* + * Check hole is zeroed. + */ + offset = hole_start; + resid = hole_len; + while (resid > 0) { + blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + memset(sblk, byte_to_fill, PAGE_SIZE); + + /* + * Check file region before hole is zeroed. + */ + offset = 0; + resid = hole_start; + while (resid > 0) { + blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file region after hole is zeroed. + */ + offset = hole_start + hole_len; + resid = shm_sz - offset; + while (resid > 0) { + blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file size matches with expected file size. + */ + if (fstat(fd, &statbuf) == -1) + error = -1; + if (statbuf.st_size != shm_sz) + error = -1; + + free(buf); + return (error); +} + ATF_TC_WITHOUT_HEAD(remap_object); ATF_TC_BODY(remap_object, tc) { @@ -958,6 +1081,80 @@ close(fd); } +ATF_TC_WITHOUT_HEAD(fspacectl); +ATF_TC_BODY(fspacectl, tc) +{ + struct spacectl_range range; + off_t offset, length, shm_sz; + int fd, error; + + shm_sz = shm_max_pages << PAGE_SHIFT; + + fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666); + ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno); + ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0, + "posix_fallocate failed; error=%d", error); + + /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = PAGE_SIZE; + range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) - + range.r_offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Aligned fspacectl failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Aligned fspacectl content checking failed", errno); + + /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = 1 << (PAGE_SHIFT - 1); + range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) + + (1 << (PAGE_SHIFT - 1)) - offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Unaligned fspacectl failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Unaligned fspacectl content checking failed", errno); + + /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = PAGE_SHIFT; + range.r_len = length = OFF_MAX - offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Aligned fspacectl to OFF_MAX failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Aligned fspacectl to OFF_MAX content checking failed", errno); + + /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = 1 << (PAGE_SHIFT - 1); + range.r_len = length = OFF_MAX - offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Unaligned fspacectl to OFF_MAX failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Unaligned fspacectl to OFF_MAX content checking failed", errno); + + /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = PAGE_SIZE; + range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Aligned fspacectl past shm_sz failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Aligned fspacectl past shm_sz content checking failed", errno); + + /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */ + ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0); + range.r_offset = offset = 1 << (PAGE_SHIFT - 1); + range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset; + ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0, + "Unaligned fspacectl past shm_sz failed; errno=%d", errno); + ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0, + "Unaligned fspacectl past shm_sz content checking failed", errno); + + + ATF_REQUIRE(close(fd) == 0); +} + static int shm_open_large(int psind, int policy, size_t sz) { @@ -1704,6 +1901,7 @@ ATF_TP_ADD_TC(tp, cloexec); ATF_TP_ADD_TC(tp, mode); ATF_TP_ADD_TC(tp, fallocate); + ATF_TP_ADD_TC(tp, fspacectl); ATF_TP_ADD_TC(tp, largepage_basic); ATF_TP_ADD_TC(tp, largepage_config); ATF_TP_ADD_TC(tp, largepage_mmap);