Changeset View
Standalone View
sys/kern/uipc_shm.c
/*- | /*- | ||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | * SPDX-License-Identifier: BSD-2-Clause-FreeBSD | ||||
* | * | ||||
* Copyright (c) 2006, 2011, 2016-2017 Robert N. M. Watson | * Copyright (c) 2006, 2011, 2016-2017 Robert N. M. Watson | ||||
* Copyright 2020 The FreeBSD Foundation | |||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* Portions of this software were developed by BAE Systems, the University of | * Portions of this software were developed by BAE Systems, the University of | ||||
* Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL | * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL | ||||
* contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent | * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent | ||||
* Computing (TC) research program. | * Computing (TC) research program. | ||||
* | * | ||||
* Portions of this software were developed by Konstantin Belousov | |||||
* under sponsorship from the FreeBSD Foundation. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
* are met: | * are met: | ||||
* 1. Redistributions of source code must retain the above copyright | * 1. Redistributions of source code must retain the above copyright | ||||
* notice, this list of conditions and the following disclaimer. | * notice, this list of conditions and the following disclaimer. | ||||
* 2. Redistributions in binary form must reproduce the above copyright | * 2. Redistributions in binary form must reproduce the above copyright | ||||
* notice, this list of conditions and the following disclaimer in the | * notice, this list of conditions and the following disclaimer in the | ||||
* documentation and/or other materials provided with the distribution. | * documentation and/or other materials provided with the distribution. | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | |||||
#include <sys/sbuf.h> | #include <sys/sbuf.h> | ||||
#include <sys/stat.h> | #include <sys/stat.h> | ||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <sys/vmmeter.h> | |||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#include <sys/user.h> | #include <sys/user.h> | ||||
#include <security/audit/audit.h> | #include <security/audit/audit.h> | ||||
#include <security/mac/mac_framework.h> | #include <security/mac/mac_framework.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
Show All 24 Lines | |||||
static dev_t shm_dev_ino; | static dev_t shm_dev_ino; | ||||
#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) | #define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) | ||||
static void shm_init(void *arg); | static void shm_init(void *arg); | ||||
static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); | static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); | ||||
static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); | static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); | ||||
static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); | static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); | ||||
static int shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, | |||||
void *rl_cookie); | |||||
static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, | static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, | ||||
void *rl_cookie); | void *rl_cookie); | ||||
static int shm_copyin_path(struct thread *td, const char *userpath_in, | static int shm_copyin_path(struct thread *td, const char *userpath_in, | ||||
char **path_out); | char **path_out); | ||||
static fo_rdwr_t shm_read; | static fo_rdwr_t shm_read; | ||||
static fo_rdwr_t shm_write; | static fo_rdwr_t shm_write; | ||||
static fo_truncate_t shm_truncate; | static fo_truncate_t shm_truncate; | ||||
Show All 23 Lines | struct fileops shm_ops = { | ||||
.fo_chown = shm_chown, | .fo_chown = shm_chown, | ||||
.fo_sendfile = vn_sendfile, | .fo_sendfile = vn_sendfile, | ||||
.fo_seek = shm_seek, | .fo_seek = shm_seek, | ||||
.fo_fill_kinfo = shm_fill_kinfo, | .fo_fill_kinfo = shm_fill_kinfo, | ||||
.fo_mmap = shm_mmap, | .fo_mmap = shm_mmap, | ||||
.fo_get_seals = shm_get_seals, | .fo_get_seals = shm_get_seals, | ||||
.fo_add_seals = shm_add_seals, | .fo_add_seals = shm_add_seals, | ||||
.fo_fallocate = shm_fallocate, | .fo_fallocate = shm_fallocate, | ||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, | ||||
}; | }; | ||||
FEATURE(posix_shm, "POSIX shared memory"); | FEATURE(posix_shm, "POSIX shared memory"); | ||||
static SYSCTL_NODE(_vm, OID_AUTO, largepages, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, | |||||
""); | |||||
static int largepage_reclaim_tries = 1; | |||||
SYSCTL_INT(_vm_largepages, OID_AUTO, reclaim_tries, | |||||
CTLFLAG_RWTUN, &largepage_reclaim_tries, 0, | |||||
"Number of contig reclaims before giving up for default alloc policy"); | |||||
static int | static int | ||||
uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) | uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
vm_pindex_t idx; | vm_pindex_t idx; | ||||
size_t tlen; | size_t tlen; | ||||
int error, offset, rv; | int error, offset, rv; | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | if (len == 0) | ||||
break; | break; | ||||
error = uiomove_object_page(obj, len, uio); | error = uiomove_object_page(obj, len, uio); | ||||
if (error != 0 || resid == uio->uio_resid) | if (error != 0 || resid == uio->uio_resid) | ||||
break; | break; | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
static u_long count_largepages[MAXPAGESIZES]; | |||||
static int | static int | ||||
shm_largepage_phys_populate(vm_object_t object, vm_pindex_t pidx, | |||||
int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) | |||||
{ | |||||
vm_page_t m; | |||||
int psind; | |||||
psind = object->un_pager.phys.data_val; | |||||
if (psind == 0 || pidx >= object->size) | |||||
return (VM_PAGER_FAIL); | |||||
*first = rounddown2(pidx, pagesizes[psind] / PAGE_SIZE); | |||||
markj: Can be rounddown2(). | |||||
/* | |||||
* We only busy the first page in the superpage run. It is | |||||
* useless to busy whole run since we only remove full | |||||
* superpage, and it takes too long to busy e.g. 512 * 512 == | |||||
Done Inline Actions"it takes" markj: "it takes" | |||||
* 262144 pages constituing 1G amd64 superage. | |||||
Done Inline ActionsDid you look at all at the "object busy" mechanism that Jeff added? It effectively allows one to busy all pages in an object. markj: Did you look at all at the "object busy" mechanism that Jeff added? It effectively allows one… | |||||
Done Inline ActionsWell, vm_object_busy() prevents new busy, or rather, it allows new busy but quickly backs them out. But existing busy pages are left as is, so something that iterates over all pages in the superpage would be needed, like vm_page_ps_test(). But all of that is not needed there, the busy of the single page is performed only to avoid a truncation while pmap_enter() is done, in some future where truncation will be implemented for largepage shmfd. kib: Well, vm_object_busy() prevents new busy, or rather, it allows new busy but quickly backs them… | |||||
*/ | |||||
m = vm_page_grab(object, *first, VM_ALLOC_NORMAL | VM_ALLOC_NOCREAT); | |||||
Done Inline ActionsWe should probably add VM_ALLOC_NOCREAT here. markj: We should probably add VM_ALLOC_NOCREAT here. | |||||
Done Inline ActionsThis is pure cosmetic. kib: This is pure cosmetic. | |||||
Done Inline ActionsI meant to say, also assert that m != NULL, so we can verify that a new page allocation did not occur. markj: I meant to say, also assert that m != NULL, so we can verify that a new page allocation did not… | |||||
Done Inline ActionsOk, I added assert, but I think that system would already get some damage anyway. It should either panic or loop infinitely. kib: Ok, I added assert, but I think that system would already get some damage anyway. It should… | |||||
MPASS(m != NULL); | |||||
Done Inline ActionsThis is wrong if pidx % atop(pagesizes[psind]) == 0. It can be written a bit more simply as *last = *first + atop(pagesizes[psind]) - 1. The fault code does not really use this value for largepage mappings, but it still makes some assertions about it. markj: This is wrong if `pidx % atop(pagesizes[psind]) == 0`. It can be written a bit more simply as… | |||||
*last = *first + atop(pagesizes[psind]) - 1; | |||||
return (VM_PAGER_OK); | |||||
} | |||||
static boolean_t | |||||
Done Inline ActionsStyle, extra space after object,. markj: Style, extra space after `object,`. | |||||
shm_largepage_phys_haspage(vm_object_t object, vm_pindex_t pindex, | |||||
int *before, int *after) | |||||
{ | |||||
int psind; | |||||
psind = object->un_pager.phys.data_val; | |||||
if (psind == 0 || pindex >= object->size) | |||||
return (FALSE); | |||||
if (before != NULL) { | |||||
Done Inline ActionsCan be rounddown2(). markj: Can be rounddown2(). | |||||
*before = pindex - rounddown2(pindex, pagesizes[psind] / | |||||
PAGE_SIZE); | |||||
} | |||||
if (after != NULL) { | |||||
Done Inline ActionsCan be roundup2(). markj: Can be roundup2(). | |||||
*after = roundup2(pindex, pagesizes[psind] / PAGE_SIZE) - | |||||
pindex; | |||||
} | |||||
return (TRUE); | |||||
} | |||||
static void | |||||
shm_largepage_phys_ctor(vm_object_t object, vm_prot_t prot, | |||||
vm_ooffset_t foff, struct ucred *cred) | |||||
{ | |||||
} | |||||
static void | |||||
shm_largepage_phys_dtor(vm_object_t object) | |||||
{ | |||||
int psind; | |||||
psind = object->un_pager.phys.data_val; | |||||
Done Inline ActionsPerhaps assert object->size == 0 || psind != 0? markj: Perhaps assert `object->size == 0 || psind != 0`? | |||||
if (psind != 0) { | |||||
atomic_subtract_long(&count_largepages[psind], | |||||
object->size / (pagesizes[psind] / PAGE_SIZE)); | |||||
vm_wire_sub(object->size); | |||||
} else { | |||||
KASSERT(object->size == 0, | |||||
("largepage phys obj %p not initialized bit size %#jx > 0", | |||||
object, (uintmax_t)object->size)); | |||||
} | |||||
} | |||||
static struct phys_pager_ops shm_largepage_phys_ops = { | |||||
.phys_pg_populate = shm_largepage_phys_populate, | |||||
.phys_pg_haspage = shm_largepage_phys_haspage, | |||||
.phys_pg_ctor = shm_largepage_phys_ctor, | |||||
.phys_pg_dtor = shm_largepage_phys_dtor, | |||||
}; | |||||
static inline bool | |||||
shm_largepage(struct shmfd *shmfd) | |||||
{ | |||||
return (shmfd->shm_object->type == OBJT_PHYS); | |||||
} | |||||
static int | |||||
shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) | shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) | ||||
{ | { | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
off_t foffset; | off_t foffset; | ||||
int error; | int error; | ||||
shmfd = fp->f_data; | shmfd = fp->f_data; | ||||
foffset = foffset_lock(fp, 0); | foffset = foffset_lock(fp, 0); | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, | ||||
off_t size; | off_t size; | ||||
shmfd = fp->f_data; | shmfd = fp->f_data; | ||||
#ifdef MAC | #ifdef MAC | ||||
error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd); | error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
#endif | #endif | ||||
if (shm_largepage(shmfd) && shmfd->shm_lp_psind == 0) | |||||
return (EINVAL); | |||||
foffset_lock_uio(fp, uio, flags); | foffset_lock_uio(fp, uio, flags); | ||||
if (uio->uio_resid > OFF_MAX - uio->uio_offset) { | if (uio->uio_resid > OFF_MAX - uio->uio_offset) { | ||||
/* | /* | ||||
* Overflow is only an error if we're supposed to expand on | * Overflow is only an error if we're supposed to expand on | ||||
* write. Otherwise, we'll just truncate the write to the | * write. Otherwise, we'll just truncate the write to the | ||||
* size of the file, which can only grow up to OFF_MAX. | * size of the file, which can only grow up to OFF_MAX. | ||||
*/ | */ | ||||
if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0) { | if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0) { | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
#endif | #endif | ||||
return (shm_dotruncate(shmfd, length)); | return (shm_dotruncate(shmfd, length)); | ||||
} | } | ||||
int | int | ||||
shm_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, | shm_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, | ||||
struct thread *td) | struct thread *td) | ||||
{ | { | ||||
struct shmfd *shmfd; | |||||
struct shm_largepage_conf *conf; | |||||
void *rl_cookie; | |||||
shmfd = fp->f_data; | |||||
switch (com) { | switch (com) { | ||||
case FIONBIO: | case FIONBIO: | ||||
case FIOASYNC: | case FIOASYNC: | ||||
/* | /* | ||||
* Allow fcntl(fd, F_SETFL, O_NONBLOCK) to work, | * Allow fcntl(fd, F_SETFL, O_NONBLOCK) to work, | ||||
* just like it would on an unlinked regular file | * just like it would on an unlinked regular file | ||||
*/ | */ | ||||
return (0); | return (0); | ||||
case FIOSSHMLPGCNF: | |||||
if (!shm_largepage(shmfd)) | |||||
return (ENOTTY); | |||||
conf = data; | |||||
if (shmfd->shm_lp_psind != 0 && | |||||
conf->psind != shmfd->shm_lp_psind) | |||||
return (EINVAL); | |||||
if (conf->psind <= 0 || conf->psind >= MAXPAGESIZES || | |||||
pagesizes[conf->psind] == 0) | |||||
return (EINVAL); | |||||
if (conf->alloc_policy != SHM_LARGEPAGE_ALLOC_DEFAULT && | |||||
conf->alloc_policy != SHM_LARGEPAGE_ALLOC_NOWAIT && | |||||
conf->alloc_policy != SHM_LARGEPAGE_ALLOC_HARD) | |||||
return (EINVAL); | |||||
rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, | |||||
&shmfd->shm_mtx); | |||||
shmfd->shm_lp_psind = conf->psind; | |||||
shmfd->shm_lp_alloc_policy = conf->alloc_policy; | |||||
shmfd->shm_object->un_pager.phys.data_val = conf->psind; | |||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | |||||
return (0); | |||||
case FIOGSHMLPGCNF: | |||||
if (!shm_largepage(shmfd)) | |||||
return (ENOTTY); | |||||
conf = data; | |||||
rl_cookie = rangelock_rlock(&shmfd->shm_rl, 0, OFF_MAX, | |||||
&shmfd->shm_mtx); | |||||
conf->psind = shmfd->shm_lp_psind; | |||||
conf->alloc_policy = shmfd->shm_lp_alloc_policy; | |||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | |||||
return (0); | |||||
default: | default: | ||||
return (ENOTTY); | return (ENOTTY); | ||||
} | } | ||||
} | } | ||||
static int | static int | ||||
shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, | shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, | ||||
struct thread *td) | struct thread *td) | ||||
Show All 26 Lines | #endif | ||||
sb->st_birthtim = shmfd->shm_birthtime; | sb->st_birthtim = shmfd->shm_birthtime; | ||||
sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ | sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ | ||||
sb->st_uid = shmfd->shm_uid; | sb->st_uid = shmfd->shm_uid; | ||||
sb->st_gid = shmfd->shm_gid; | sb->st_gid = shmfd->shm_gid; | ||||
mtx_unlock(&shm_timestamp_lock); | mtx_unlock(&shm_timestamp_lock); | ||||
sb->st_dev = shm_dev_ino; | sb->st_dev = shm_dev_ino; | ||||
sb->st_ino = shmfd->shm_ino; | sb->st_ino = shmfd->shm_ino; | ||||
sb->st_nlink = shmfd->shm_object->ref_count; | sb->st_nlink = shmfd->shm_object->ref_count; | ||||
sb->st_blocks = shmfd->shm_object->size / | |||||
(pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT); | |||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
shm_close(struct file *fp, struct thread *td) | shm_close(struct file *fp, struct thread *td) | ||||
{ | { | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines | retry: | ||||
mtx_lock(&shm_timestamp_lock); | mtx_lock(&shm_timestamp_lock); | ||||
vfs_timestamp(&shmfd->shm_ctime); | vfs_timestamp(&shmfd->shm_ctime); | ||||
shmfd->shm_mtime = shmfd->shm_ctime; | shmfd->shm_mtime = shmfd->shm_ctime; | ||||
mtx_unlock(&shm_timestamp_lock); | mtx_unlock(&shm_timestamp_lock); | ||||
object->size = nobjsize; | object->size = nobjsize; | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | |||||
shm_dotruncate_largepage(struct shmfd *shmfd, off_t length, void *rl_cookie) | |||||
{ | |||||
vm_object_t object; | |||||
vm_page_t m; | |||||
vm_pindex_t newobjsz, oldobjsz; | |||||
int aflags, error, i, psind, try; | |||||
KASSERT(length >= 0, ("shm_dotruncate: length < 0")); | |||||
object = shmfd->shm_object; | |||||
VM_OBJECT_ASSERT_WLOCKED(object); | |||||
rangelock_cookie_assert(rl_cookie, RA_WLOCKED); | |||||
oldobjsz = object->size; | |||||
newobjsz = OFF_TO_IDX(length); | |||||
if (length == shmfd->shm_size) | |||||
return (0); | |||||
psind = shmfd->shm_lp_psind; | |||||
if (psind == 0 && length != 0) | |||||
return (EINVAL); | |||||
if ((length & (pagesizes[psind] - 1)) != 0) | |||||
return (EINVAL); | |||||
if (length < shmfd->shm_size) { | |||||
if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) | |||||
return (EPERM); | |||||
if (shmfd->shm_kmappings > 0) | |||||
return (EBUSY); | |||||
return (ENOTSUP); /* Pages are unmanaged. */ | |||||
#if 0 | |||||
vm_object_page_remove(object, newobjsz, oldobjsz, 0); | |||||
object->size = newobjsz; | |||||
shmfd->shm_size = length; | |||||
return (0); | |||||
#endif | |||||
} | |||||
aflags = VM_ALLOC_NORMAL | VM_ALLOC_ZERO; | |||||
if (shmfd->shm_lp_alloc_policy == SHM_LARGEPAGE_ALLOC_NOWAIT) | |||||
aflags |= VM_ALLOC_WAITFAIL; | |||||
try = 0; | |||||
/* | |||||
* Extend shmfd and object, keeping all already fully | |||||
* allocated large pages intact even on error, because dropped | |||||
* object lock might allowed mapping of them. | |||||
*/ | |||||
while (object->size < newobjsz) { | |||||
m = vm_page_alloc_contig(object, object->size, aflags, | |||||
pagesizes[psind] / PAGE_SIZE, 0, ~0, | |||||
pagesizes[psind], 0, | |||||
Done Inline ActionsWe (reasonably) do not wire every page here, so the system wired page count does not reflect allocated large pages. I think you could call vm_wire_add(atop(pagesizes[psind])) here and decrement in the pager dtor. markj: We (reasonably) do not wire every page here, so the system wired page count does not reflect… | |||||
Done Inline ActionsBTW I think we are very close to a moment where int argument for vm_wire_add etc is no longer enough. kib: BTW I think we are very close to a moment where int argument for vm_wire_add etc is no longer… | |||||
Done Inline ActionsYes, I started writing a patch to widen page counters to u_long. markj: Yes, I started writing a patch to widen page counters to u_long. | |||||
VM_MEMATTR_DEFAULT); | |||||
if (m == NULL) { | |||||
VM_OBJECT_WUNLOCK(object); | |||||
if (shmfd->shm_lp_alloc_policy == | |||||
SHM_LARGEPAGE_ALLOC_NOWAIT || | |||||
(shmfd->shm_lp_alloc_policy == | |||||
SHM_LARGEPAGE_ALLOC_DEFAULT && | |||||
Done Inline ActionsI am thinking about a function that attempts to reclaim from the domain(s) specified by the policy. The implementation should:
markj: I am thinking about a function that attempts to reclaim from the domain(s) specified by the… | |||||
try >= largepage_reclaim_tries)) | |||||
return (ENOMEM); | |||||
markjUnsubmitted Done Inline ActionsNeed to relock here as well. markj: Need to relock here as well. | |||||
error = vm_page_reclaim_contig(aflags, | |||||
pagesizes[psind] / PAGE_SIZE, 0, ~0, | |||||
pagesizes[psind], 0) ? 0 : | |||||
Done Inline ActionsSo in the default policy, if insufficient contiguous memory is available we will just keep retrying in a loop? markj: So in the default policy, if insufficient contiguous memory is available we will just keep… | |||||
Done Inline ActionsYes, this should be improved somehow. I think the improvement should be mostly local to the function that you propose above. The detail that I do not like now is that vm_wait() sleep is uninterruptible. I think that e.g. vm_wait_intr() should be added, then the thread_susp_check() call below can be removed. I restructured the loop to make it easier to change. kib: Yes, this should be improved somehow. I think the improvement should be mostly local to the… | |||||
vm_wait_intr(object); | |||||
if (error != 0) { | |||||
VM_OBJECT_WUNLOCK(object); | |||||
Done Inline ActionsThis function is called with the object locked, but here we're returning with the object unlocked. markj: This function is called with the object locked, but here we're returning with the object… | |||||
kibAuthorUnsubmitted Done Inline ActionsThis should be VM_OBJECT_WLOCK(). kib: This should be VM_OBJECT_WLOCK(). | |||||
return (error); | |||||
} | |||||
try++; | |||||
VM_OBJECT_WLOCK(object); | |||||
continue; | |||||
} | |||||
try = 0; | |||||
for (i = 0; i < pagesizes[psind] / PAGE_SIZE; i++) { | |||||
if ((m[i].flags & PG_ZERO) == 0) | |||||
pmap_zero_page(&m[i]); | |||||
vm_page_valid(&m[i]); | |||||
vm_page_xunbusy(&m[i]); | |||||
} | |||||
object->size += OFF_TO_IDX(pagesizes[psind]); | |||||
Done Inline ActionsIt seems reasonable to define a local variable for shmfd->shm_lp_psind. markj: It seems reasonable to define a local variable for `shmfd->shm_lp_psind`. | |||||
shmfd->shm_size += pagesizes[psind]; | |||||
atomic_add_long(&count_largepages[psind], 1); | |||||
vm_wire_add(atop(pagesizes[psind])); | |||||
} | |||||
return (0); | |||||
} | |||||
static int | |||||
shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, void *rl_cookie) | |||||
{ | |||||
int error; | |||||
VM_OBJECT_WLOCK(shmfd->shm_object); | |||||
error = shm_largepage(shmfd) ? shm_dotruncate_largepage(shmfd, length, | |||||
rl_cookie) : shm_dotruncate_locked(shmfd, length, rl_cookie); | |||||
VM_OBJECT_WUNLOCK(shmfd->shm_object); | |||||
return (error); | |||||
} | |||||
int | int | ||||
shm_dotruncate(struct shmfd *shmfd, off_t length) | shm_dotruncate(struct shmfd *shmfd, off_t length) | ||||
{ | { | ||||
void *rl_cookie; | void *rl_cookie; | ||||
int error; | int error; | ||||
rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, | rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, | ||||
&shmfd->shm_mtx); | &shmfd->shm_mtx); | ||||
VM_OBJECT_WLOCK(shmfd->shm_object); | error = shm_dotruncate_cookie(shmfd, length, rl_cookie); | ||||
error = shm_dotruncate_locked(shmfd, length, rl_cookie); | |||||
VM_OBJECT_WUNLOCK(shmfd->shm_object); | |||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* shmfd object management including creation and reference counting | * shmfd object management including creation and reference counting | ||||
* routines. | * routines. | ||||
*/ | */ | ||||
struct shmfd * | struct shmfd * | ||||
shm_alloc(struct ucred *ucred, mode_t mode) | shm_alloc(struct ucred *ucred, mode_t mode, bool largepage) | ||||
{ | { | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); | shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); | ||||
shmfd->shm_size = 0; | shmfd->shm_size = 0; | ||||
shmfd->shm_uid = ucred->cr_uid; | shmfd->shm_uid = ucred->cr_uid; | ||||
shmfd->shm_gid = ucred->cr_gid; | shmfd->shm_gid = ucred->cr_gid; | ||||
shmfd->shm_mode = mode; | shmfd->shm_mode = mode; | ||||
if (largepage) { | |||||
shmfd->shm_object = phys_pager_allocate(NULL, | |||||
&shm_largepage_phys_ops, NULL, shmfd->shm_size, | |||||
VM_PROT_DEFAULT, 0, ucred); | |||||
shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT; | |||||
} else { | |||||
shmfd->shm_object = vm_pager_allocate(OBJT_SWAP, NULL, | shmfd->shm_object = vm_pager_allocate(OBJT_SWAP, NULL, | ||||
shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); | shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); | ||||
} | |||||
KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); | KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); | ||||
vfs_timestamp(&shmfd->shm_birthtime); | vfs_timestamp(&shmfd->shm_birthtime); | ||||
shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = | shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = | ||||
shmfd->shm_birthtime; | shmfd->shm_birthtime; | ||||
shmfd->shm_ino = alloc_unr64(&shm_ino_unr); | shmfd->shm_ino = alloc_unr64(&shm_ino_unr); | ||||
refcount_init(&shmfd->shm_refs, 1); | refcount_init(&shmfd->shm_refs, 1); | ||||
mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); | mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); | ||||
rangelock_init(&shmfd->shm_rl); | rangelock_init(&shmfd->shm_rl); | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | if (flags & FWRITE) | ||||
accmode |= VWRITE; | accmode |= VWRITE; | ||||
mtx_lock(&shm_timestamp_lock); | mtx_lock(&shm_timestamp_lock); | ||||
error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, | error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, | ||||
accmode, ucred); | accmode, ucred); | ||||
mtx_unlock(&shm_timestamp_lock); | mtx_unlock(&shm_timestamp_lock); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | |||||
* Dictionary management. We maintain an in-kernel dictionary to map | |||||
* paths to shmfd objects. We use the FNV hash on the path to store | |||||
* the mappings in a hash table. | |||||
*/ | |||||
static void | static void | ||||
shm_init(void *arg) | shm_init(void *arg) | ||||
{ | { | ||||
char name[32]; | |||||
int i; | |||||
mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); | mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); | ||||
sx_init(&shm_dict_lock, "shm dictionary"); | sx_init(&shm_dict_lock, "shm dictionary"); | ||||
shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); | shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); | ||||
new_unrhdr64(&shm_ino_unr, 1); | new_unrhdr64(&shm_ino_unr, 1); | ||||
shm_dev_ino = devfs_alloc_cdp_inode(); | shm_dev_ino = devfs_alloc_cdp_inode(); | ||||
KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); | KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); | ||||
for (i = 1; i < MAXPAGESIZES; i++) { | |||||
if (pagesizes[i] == 0) | |||||
break; | |||||
#define M (1024 * 1024) | |||||
#define G (1024 * M) | |||||
if (pagesizes[i] >= G) | |||||
snprintf(name, sizeof(name), "%luG", pagesizes[i] / G); | |||||
else if (pagesizes[i] >= M) | |||||
snprintf(name, sizeof(name), "%luM", pagesizes[i] / M); | |||||
else | |||||
snprintf(name, sizeof(name), "%lu", pagesizes[i]); | |||||
#undef G | |||||
#undef M | |||||
SYSCTL_ADD_ULONG(NULL, SYSCTL_STATIC_CHILDREN(_vm_largepages), | |||||
OID_AUTO, name, CTLFLAG_RD, &count_largepages[i], | |||||
"number of non-transient largepages allocated"); | |||||
} | } | ||||
} | |||||
SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL); | SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL); | ||||
/* | |||||
* Dictionary management. We maintain an in-kernel dictionary to map | |||||
* paths to shmfd objects. We use the FNV hash on the path to store | |||||
* the mappings in a hash table. | |||||
*/ | |||||
static struct shmfd * | static struct shmfd * | ||||
shm_lookup(char *path, Fnv32_t fnv) | shm_lookup(char *path, Fnv32_t fnv) | ||||
{ | { | ||||
struct shm_mapping *map; | struct shm_mapping *map; | ||||
LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { | LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { | ||||
if (map->sm_fnv != fnv) | if (map->sm_fnv != fnv) | ||||
continue; | continue; | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, | ||||
struct filedesc *fdp; | struct filedesc *fdp; | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
struct file *fp; | struct file *fp; | ||||
char *path; | char *path; | ||||
void *rl_cookie; | void *rl_cookie; | ||||
Fnv32_t fnv; | Fnv32_t fnv; | ||||
mode_t cmode; | mode_t cmode; | ||||
int error, fd, initial_seals; | int error, fd, initial_seals; | ||||
bool largepage; | |||||
if ((shmflags & ~(SHM_ALLOW_SEALING | SHM_GROW_ON_WRITE)) != 0) | if ((shmflags & ~(SHM_ALLOW_SEALING | SHM_GROW_ON_WRITE | | ||||
SHM_LARGEPAGE)) != 0) | |||||
return (EINVAL); | return (EINVAL); | ||||
initial_seals = F_SEAL_SEAL; | initial_seals = F_SEAL_SEAL; | ||||
if ((shmflags & SHM_ALLOW_SEALING) != 0) | if ((shmflags & SHM_ALLOW_SEALING) != 0) | ||||
initial_seals &= ~F_SEAL_SEAL; | initial_seals &= ~F_SEAL_SEAL; | ||||
#ifdef CAPABILITY_MODE | #ifdef CAPABILITY_MODE | ||||
/* | /* | ||||
* shm_open(2) is only allowed for anonymous objects. | * shm_open(2) is only allowed for anonymous objects. | ||||
*/ | */ | ||||
if (IN_CAPABILITY_MODE(td) && (userpath != SHM_ANON)) | if (IN_CAPABILITY_MODE(td) && (userpath != SHM_ANON)) | ||||
return (ECAPMODE); | return (ECAPMODE); | ||||
Done Inline ActionsWe set shmfd->shm_flags = shmflags after every shm_open2(), not just the one that created the object. So, if an object is created with SHM_LARGEPAGE, a subsequent shm_open2() of the object without SHM_LARGEPAGE will clobber the flag. markj: We set `shmfd->shm_flags = shmflags` after every shm_open2(), not just the one that created the… | |||||
Done Inline ActionsUgh, this is my bug. =( kevans: Ugh, this is my bug. =( | |||||
#endif | #endif | ||||
AUDIT_ARG_FFLAGS(flags); | AUDIT_ARG_FFLAGS(flags); | ||||
AUDIT_ARG_MODE(mode); | AUDIT_ARG_MODE(mode); | ||||
if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) | if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) | ||||
return (EINVAL); | return (EINVAL); | ||||
if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) | if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) | ||||
return (EINVAL); | return (EINVAL); | ||||
largepage = (shmflags & SHM_LARGEPAGE) != 0; | |||||
/* | /* | ||||
* Currently only F_SEAL_SEAL may be set when creating or opening shmfd. | * Currently only F_SEAL_SEAL may be set when creating or opening shmfd. | ||||
* If the decision is made later to allow additional seals, care must be | * If the decision is made later to allow additional seals, care must be | ||||
* taken below to ensure that the seals are properly set if the shmfd | * taken below to ensure that the seals are properly set if the shmfd | ||||
* already existed -- this currently assumes that only F_SEAL_SEAL can | * already existed -- this currently assumes that only F_SEAL_SEAL can | ||||
* be set and doesn't take further precautions to ensure the validity of | * be set and doesn't take further precautions to ensure the validity of | ||||
* the seals being added with respect to current mappings. | * the seals being added with respect to current mappings. | ||||
*/ | */ | ||||
Show All 17 Lines | #endif | ||||
/* A SHM_ANON path pointer creates an anonymous object. */ | /* A SHM_ANON path pointer creates an anonymous object. */ | ||||
if (userpath == SHM_ANON) { | if (userpath == SHM_ANON) { | ||||
/* A read-only anonymous object is pointless. */ | /* A read-only anonymous object is pointless. */ | ||||
if ((flags & O_ACCMODE) == O_RDONLY) { | if ((flags & O_ACCMODE) == O_RDONLY) { | ||||
fdclose(td, fp, fd); | fdclose(td, fp, fd); | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
shmfd = shm_alloc(td->td_ucred, cmode); | shmfd = shm_alloc(td->td_ucred, cmode, largepage); | ||||
shmfd->shm_seals = initial_seals; | shmfd->shm_seals = initial_seals; | ||||
} else { | } else { | ||||
error = shm_copyin_path(td, userpath, &path); | error = shm_copyin_path(td, userpath, &path); | ||||
if (error != 0) { | if (error != 0) { | ||||
fdclose(td, fp, fd); | fdclose(td, fp, fd); | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
return (error); | return (error); | ||||
} | } | ||||
AUDIT_ARG_UPATH1_CANON(path); | AUDIT_ARG_UPATH1_CANON(path); | ||||
fnv = fnv_32_str(path, FNV1_32_INIT); | fnv = fnv_32_str(path, FNV1_32_INIT); | ||||
sx_xlock(&shm_dict_lock); | sx_xlock(&shm_dict_lock); | ||||
shmfd = shm_lookup(path, fnv); | shmfd = shm_lookup(path, fnv); | ||||
if (shmfd == NULL) { | if (shmfd == NULL) { | ||||
/* Object does not yet exist, create it if requested. */ | /* Object does not yet exist, create it if requested. */ | ||||
if (flags & O_CREAT) { | if (flags & O_CREAT) { | ||||
#ifdef MAC | #ifdef MAC | ||||
error = mac_posixshm_check_create(td->td_ucred, | error = mac_posixshm_check_create(td->td_ucred, | ||||
path); | path); | ||||
if (error == 0) { | if (error == 0) { | ||||
#endif | #endif | ||||
shmfd = shm_alloc(td->td_ucred, cmode); | shmfd = shm_alloc(td->td_ucred, cmode, | ||||
largepage); | |||||
shmfd->shm_seals = initial_seals; | shmfd->shm_seals = initial_seals; | ||||
shm_insert(path, fnv, shmfd); | shm_insert(path, fnv, shmfd); | ||||
#ifdef MAC | #ifdef MAC | ||||
} | } | ||||
#endif | #endif | ||||
} else { | } else { | ||||
free(path, M_SHMFD); | free(path, M_SHMFD); | ||||
error = ENOENT; | error = ENOENT; | ||||
▲ Show 20 Lines • Show All 266 Lines • ▼ Show 20 Lines | out_locked: | ||||
sx_xunlock(&shm_dict_lock); | sx_xunlock(&shm_dict_lock); | ||||
out: | out: | ||||
free(path_from, M_SHMFD); | free(path_from, M_SHMFD); | ||||
free(path_to, M_SHMFD); | free(path_to, M_SHMFD); | ||||
return (error); | return (error); | ||||
} | } | ||||
int | static int | ||||
shm_mmap_large(struct shmfd *shmfd, vm_map_t map, vm_offset_t *addr, | |||||
vm_size_t size, vm_prot_t prot, vm_prot_t max_prot, int flags, | |||||
vm_ooffset_t foff, bool writecounted, struct thread *td) | |||||
{ | |||||
struct vmspace *vms; | |||||
vm_map_entry_t next_entry, prev_entry; | |||||
vm_offset_t align, mask, maxaddr; | |||||
int docow, error, rv, try; | |||||
bool curmap; | |||||
if (shmfd->shm_lp_psind == 0) | |||||
return (EINVAL); | |||||
/* MAP_PRIVATE is disabled */ | |||||
if ((flags & ~(MAP_SHARED | MAP_FIXED | MAP_EXCL | | |||||
Done Inline ActionsWe should probably handle MAP_ALIGNMENT_MASK here, since it could in principle be a multiple of the superpage size. markj: We should probably handle MAP_ALIGNMENT_MASK here, since it could in principle be a multiple of… | |||||
MAP_NOCORE | | |||||
#ifdef MAP_32BIT | |||||
MAP_32BIT | | |||||
#endif | |||||
MAP_ALIGNMENT_MASK)) != 0) | |||||
return (EINVAL); | |||||
vms = td->td_proc->p_vmspace; | |||||
curmap = map == &vms->vm_map; | |||||
if (curmap) { | |||||
error = kern_mmap_racct_check(td, map, size); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
docow = shmfd->shm_lp_psind << MAP_SPLIT_BOUNDARY_SHIFT; | |||||
docow |= MAP_INHERIT_SHARE; | |||||
if ((flags & MAP_NOCORE) != 0) | |||||
docow |= MAP_DISABLE_COREDUMP; | |||||
if (writecounted) | |||||
docow |= MAP_WRITECOUNT; | |||||
mask = pagesizes[shmfd->shm_lp_psind] - 1; | |||||
if ((foff & mask) != 0) | |||||
return (EINVAL); | |||||
maxaddr = vm_map_max(map); | |||||
#ifdef MAP_32BIT | |||||
if ((flags & MAP_32BIT) != 0 && maxaddr > MAP_32BIT_MAX_ADDR) | |||||
maxaddr = MAP_32BIT_MAX_ADDR; | |||||
#endif | |||||
if (size == 0 || (size & mask) != 0 || | |||||
(*addr != 0 && ((*addr & mask) != 0 || | |||||
*addr + size < *addr || *addr + size > maxaddr))) | |||||
return (EINVAL); | |||||
align = flags & MAP_ALIGNMENT_MASK; | |||||
if (align == 0) { | |||||
align = pagesizes[shmfd->shm_lp_psind]; | |||||
} else if (align == MAP_ALIGNED_SUPER) { | |||||
if (shmfd->shm_lp_psind != 1) | |||||
return (EINVAL); | |||||
align = pagesizes[1]; | |||||
} else { | |||||
align >>= MAP_ALIGNMENT_SHIFT; | |||||
align = 1ULL << align; | |||||
/* Also handles overflow. */ | |||||
if (align < pagesizes[shmfd->shm_lp_psind]) | |||||
return (EINVAL); | |||||
} | |||||
vm_map_lock(map); | |||||
if ((flags & MAP_FIXED) == 0) { | |||||
try = 1; | |||||
if (curmap && (*addr == 0 || | |||||
(*addr >= round_page((vm_offset_t)vms->vm_taddr) && | |||||
*addr < round_page((vm_offset_t)vms->vm_daddr + | |||||
lim_max(td, RLIMIT_DATA))))) { | |||||
Done Inline Actionsvm_map_delete() can fail now, but we are not checking the return value. vm_map_insert() will fail, so an error is returned, but it will be ENOMEM or ENOSPC instead of EINVAL as the documentation indicates. markj: `vm_map_delete()` can fail now, but we are not checking the return value. `vm_map_insert()`… | |||||
*addr = roundup2((vm_offset_t)vms->vm_daddr + | |||||
lim_max(td, RLIMIT_DATA), | |||||
pagesizes[shmfd->shm_lp_psind]); | |||||
} | |||||
again: | |||||
rv = vm_map_find_aligned(map, addr, size, maxaddr, align); | |||||
if (rv != KERN_SUCCESS) { | |||||
if (try == 1) { | |||||
try = 2; | |||||
*addr = vm_map_min(map); | |||||
if ((*addr & mask) != 0) | |||||
*addr = (*addr + mask) & mask; | |||||
goto again; | |||||
} | |||||
goto fail1; | |||||
} | |||||
} else if ((flags & MAP_EXCL) == 0) { | |||||
rv = vm_map_delete(map, *addr, *addr + size); | |||||
if (rv != KERN_SUCCESS) | |||||
goto fail1; | |||||
} else { | |||||
error = ENOSPC; | |||||
if (vm_map_lookup_entry(map, *addr, &prev_entry)) | |||||
goto fail; | |||||
next_entry = vm_map_entry_succ(prev_entry); | |||||
if (next_entry->start < *addr + size) | |||||
goto fail; | |||||
} | |||||
rv = vm_map_insert(map, shmfd->shm_object, foff, *addr, *addr + size, | |||||
prot, max_prot, docow); | |||||
fail1: | |||||
error = vm_mmap_to_errno(rv); | |||||
fail: | |||||
vm_map_unlock(map); | |||||
return (error); | |||||
} | |||||
static int | |||||
shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, | shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, | ||||
vm_prot_t prot, vm_prot_t cap_maxprot, int flags, | vm_prot_t prot, vm_prot_t cap_maxprot, int flags, | ||||
vm_ooffset_t foff, struct thread *td) | vm_ooffset_t foff, struct thread *td) | ||||
{ | { | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
vm_prot_t maxprot; | vm_prot_t maxprot; | ||||
int error; | int error; | ||||
bool writecnt; | bool writecnt; | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | #endif | ||||
mtx_lock(&shm_timestamp_lock); | mtx_lock(&shm_timestamp_lock); | ||||
vfs_timestamp(&shmfd->shm_atime); | vfs_timestamp(&shmfd->shm_atime); | ||||
mtx_unlock(&shm_timestamp_lock); | mtx_unlock(&shm_timestamp_lock); | ||||
vm_object_reference(shmfd->shm_object); | vm_object_reference(shmfd->shm_object); | ||||
if (writecnt) | if (writecnt) | ||||
vm_pager_update_writecount(shmfd->shm_object, 0, objsize); | vm_pager_update_writecount(shmfd->shm_object, 0, objsize); | ||||
if (shm_largepage(shmfd)) { | |||||
error = shm_mmap_large(shmfd, map, addr, objsize, prot, | |||||
maxprot, flags, foff, writecnt, td); | |||||
} else { | |||||
error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, | error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, | ||||
shmfd->shm_object, foff, writecnt, td); | shmfd->shm_object, foff, writecnt, td); | ||||
} | |||||
if (error != 0) { | if (error != 0) { | ||||
if (writecnt) | if (writecnt) | ||||
vm_pager_release_writecount(shmfd->shm_object, 0, | vm_pager_release_writecount(shmfd->shm_object, 0, | ||||
objsize); | objsize); | ||||
vm_object_deallocate(shmfd->shm_object); | vm_object_deallocate(shmfd->shm_object); | ||||
} | } | ||||
out: | out: | ||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | ||||
▲ Show 20 Lines • Show All 251 Lines • ▼ Show 20 Lines | shm_get_seals(struct file *fp, int *seals) | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
shmfd = fp->f_data; | shmfd = fp->f_data; | ||||
*seals = shmfd->shm_seals; | *seals = shmfd->shm_seals; | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) | shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) | ||||
Done Inline ActionsThis function is missing largepage handling. markj: This function is missing largepage handling. | |||||
{ | { | ||||
void *rl_cookie; | void *rl_cookie; | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
size_t size; | size_t size; | ||||
int error; | int error; | ||||
/* This assumes that the caller already checked for overflow. */ | /* This assumes that the caller already checked for overflow. */ | ||||
error = 0; | error = 0; | ||||
shmfd = fp->f_data; | shmfd = fp->f_data; | ||||
size = offset + len; | size = offset + len; | ||||
/* | /* | ||||
* Just grab the rangelock for the range that we may be attempting to | * Just grab the rangelock for the range that we may be attempting to | ||||
* grow, rather than blocking read/write for regions we won't be | * grow, rather than blocking read/write for regions we won't be | ||||
* touching while this (potential) resize is in progress. Other | * touching while this (potential) resize is in progress. Other | ||||
* attempts to resize the shmfd will have to take a write lock from 0 to | * attempts to resize the shmfd will have to take a write lock from 0 to | ||||
* OFF_MAX, so this being potentially beyond the current usable range of | * OFF_MAX, so this being potentially beyond the current usable range of | ||||
* the shmfd is not necessarily a concern. If other mechanisms are | * the shmfd is not necessarily a concern. If other mechanisms are | ||||
* added to grow a shmfd, this may need to be re-evaluated. | * added to grow a shmfd, this may need to be re-evaluated. | ||||
*/ | */ | ||||
rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, | rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, | ||||
&shmfd->shm_mtx); | &shmfd->shm_mtx); | ||||
if (size > shmfd->shm_size) { | if (size > shmfd->shm_size) | ||||
VM_OBJECT_WLOCK(shmfd->shm_object); | error = shm_dotruncate_cookie(shmfd, size, rl_cookie); | ||||
error = shm_dotruncate_locked(shmfd, size, rl_cookie); | |||||
VM_OBJECT_WUNLOCK(shmfd->shm_object); | |||||
} | |||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); | ||||
/* Translate to posix_fallocate(2) return value as needed. */ | /* Translate to posix_fallocate(2) return value as needed. */ | ||||
if (error == ENOMEM) | if (error == ENOMEM) | ||||
error = ENOSPC; | error = ENOSPC; | ||||
return (error); | return (error); | ||||
} | } | ||||
static int | static int | ||||
▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines |
Can be rounddown2().