Changeset View
Standalone View
sys/kern/vfs_aio.c
Show First 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | |||||
#ifndef MAX_AIO_QUEUE | #ifndef MAX_AIO_QUEUE | ||||
#define MAX_AIO_QUEUE 1024 /* Bigger than MAX_AIO_QUEUE_PER_PROC */ | #define MAX_AIO_QUEUE 1024 /* Bigger than MAX_AIO_QUEUE_PER_PROC */ | ||||
#endif | #endif | ||||
#ifndef MAX_BUF_AIO | #ifndef MAX_BUF_AIO | ||||
#define MAX_BUF_AIO 16 | #define MAX_BUF_AIO 16 | ||||
#endif | #endif | ||||
#ifndef MAX_VFS_XFER | |||||
#define MAX_VFS_XFER (32 * 1024 * 1024) /* 32MB - DMU_MAX_ACCESS/2 */ | |||||
#endif | |||||
FEATURE(aio, "Asynchronous I/O"); | FEATURE(aio, "Asynchronous I/O"); | ||||
SYSCTL_DECL(_p1003_1b); | SYSCTL_DECL(_p1003_1b); | ||||
static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); | static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); | ||||
static MALLOC_DEFINE(M_AIOS, "aios", "aio_suspend aio control block list"); | static MALLOC_DEFINE(M_AIOS, "aios", "aio_suspend aio control block list"); | ||||
static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"Async IO management"); | "Async IO management"); | ||||
▲ Show 20 Lines • Show All 202 Lines • ▼ Show 20 Lines | |||||
static void aio_process_sync(struct kaiocb *job); | static void aio_process_sync(struct kaiocb *job); | ||||
static void aio_process_mlock(struct kaiocb *job); | static void aio_process_mlock(struct kaiocb *job); | ||||
static void aio_schedule_fsync(void *context, int pending); | static void aio_schedule_fsync(void *context, int pending); | ||||
static int aio_newproc(int *); | static int aio_newproc(int *); | ||||
int aio_aqueue(struct thread *td, struct aiocb *ujob, | int aio_aqueue(struct thread *td, struct aiocb *ujob, | ||||
struct aioliojob *lio, int type, struct aiocb_ops *ops); | struct aioliojob *lio, int type, struct aiocb_ops *ops); | ||||
static int aio_queue_file(struct file *fp, struct kaiocb *job); | static int aio_queue_file(struct file *fp, struct kaiocb *job); | ||||
static void aio_biowakeup(struct bio *bp); | static void aio_biowakeup(struct bio *bp); | ||||
static void aio_ubiowakeup(struct uio_bio *ubio); | |||||
static void aio_proc_rundown(void *arg, struct proc *p); | static void aio_proc_rundown(void *arg, struct proc *p); | ||||
static void aio_proc_rundown_exec(void *arg, struct proc *p, | static void aio_proc_rundown_exec(void *arg, struct proc *p, | ||||
struct image_params *imgp); | struct image_params *imgp); | ||||
static int aio_queue_vfs(struct kaiocb *job); | |||||
static int aio_qbio(struct proc *p, struct kaiocb *job); | static int aio_qbio(struct proc *p, struct kaiocb *job); | ||||
static void aio_daemon(void *param); | static void aio_daemon(void *param); | ||||
static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job); | static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job); | ||||
static bool aio_clear_cancel_function_locked(struct kaiocb *job); | static bool aio_clear_cancel_function_locked(struct kaiocb *job); | ||||
static int aio_kick(struct proc *userp); | static int aio_kick(struct proc *userp); | ||||
static void aio_kick_nowait(struct proc *userp); | static void aio_kick_nowait(struct proc *userp); | ||||
static void aio_kick_helper(void *context, int pending); | static void aio_kick_helper(void *context, int pending); | ||||
static int filt_aioattach(struct knote *kn); | static int filt_aioattach(struct knote *kn); | ||||
▲ Show 20 Lines • Show All 946 Lines • ▼ Show 20 Lines | aio_qbio(struct proc *p, struct kaiocb *job) | ||||
bp->bio_bcount = cb->aio_nbytes; | bp->bio_bcount = cb->aio_nbytes; | ||||
bp->bio_done = aio_biowakeup; | bp->bio_done = aio_biowakeup; | ||||
bp->bio_offset = cb->aio_offset; | bp->bio_offset = cb->aio_offset; | ||||
bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; | bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; | ||||
bp->bio_dev = dev; | bp->bio_dev = dev; | ||||
bp->bio_caller1 = (void *)job; | bp->bio_caller1 = (void *)job; | ||||
prot = VM_PROT_READ; | prot = VM_PROT_READ; | ||||
/* Reading from disk means writing to memory */ | |||||
if (cb->aio_lio_opcode == LIO_READ) | if (cb->aio_lio_opcode == LIO_READ) | ||||
prot |= VM_PROT_WRITE; /* Less backwards than it looks */ | prot |= VM_PROT_WRITE; | ||||
job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, | job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, | ||||
(vm_offset_t)cb->aio_buf, bp->bio_length, prot, job->pages, | (vm_offset_t)cb->aio_buf, bp->bio_length, prot, job->pages, | ||||
nitems(job->pages)); | nitems(job->pages)); | ||||
if (job->npages < 0) { | if (job->npages < 0) { | ||||
error = EFAULT; | error = EFAULT; | ||||
goto doerror; | goto doerror; | ||||
} | } | ||||
if (pbuf != NULL) { | if (pbuf != NULL) { | ||||
Show All 25 Lines | doerror: | ||||
} | } | ||||
g_destroy_bio(bp); | g_destroy_bio(bp); | ||||
job->bp = NULL; | job->bp = NULL; | ||||
unref: | unref: | ||||
dev_relthread(dev, ref); | dev_relthread(dev, ref); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | |||||
* aio_queue_vfs works similarly to aio_qbio. It checks | |||||
* that it supports the aio operation in question and | |||||
* then if the vnode's file system support asynchronous | |||||
* requests. It then sets up the request by holding the | |||||
* user's pages with the appropriate permissions. If that | |||||
* succeeds it call VOP_UBOP. The uio_bio callback | |||||
* aio_ubiowakeup will be called when the operation completes. | |||||
*/ | |||||
static int | |||||
aio_queue_vfs(struct kaiocb *job) | |||||
{ | |||||
struct aiocb *cb; | |||||
struct file *fp; | |||||
struct vnode *vp; | |||||
struct uio_bio *ubio, ubio_local; | |||||
vm_prot_t prot; | |||||
uint32_t io_size, bio_size; | |||||
int error, cmd; | |||||
vm_offset_t page_offset; | |||||
cb = &job->uaiocb; | |||||
fp = job->fd_file; | |||||
if (!(cb->aio_lio_opcode == LIO_WRITE || | |||||
cb->aio_lio_opcode == LIO_READ)) | |||||
return (-1); | |||||
if (fp == NULL || fp->f_type != DTYPE_VNODE) | |||||
return (-1); | |||||
vp = fp->f_vnode; | |||||
/* | |||||
* Zero length read should always succeed | |||||
* if supported. | |||||
*/ | |||||
bzero(&ubio_local, sizeof(ubio_local)); | |||||
ubio_local.uio_cmd = UIO_BIO_READ; | |||||
if (VOP_UBOP(vp, &ubio_local, FOF_OFFSET) == EOPNOTSUPP) | |||||
return (-1); | |||||
/* | |||||
* Don't punt here - XXX | |||||
*/ | |||||
if (cb->aio_nbytes > MAX_VFS_XFER) | |||||
return (-1); | |||||
page_offset = ((vm_offset_t)cb->aio_buf) & PAGE_MASK; | |||||
cmd = cb->aio_lio_opcode == LIO_WRITE ? UIO_BIO_WRITE : UIO_BIO_READ; | |||||
io_size = cb->aio_nbytes + page_offset + PAGE_MASK; | |||||
io_size &= ~PAGE_MASK; | |||||
bio_size = sizeof(*ubio); | |||||
asomers: Why not use `<< PAGE_SHIFT`? | |||||
if (io_size <= MAXPHYS) { | |||||
Not Done Inline ActionsIs io_pages the same thing as btoc(cb->aio_nbytes + cb->aio_buf) - btoc(cb->aio_buf) + 1? asomers: Is io_pages the same thing as `btoc(cb->aio_nbytes + cb->aio_buf) - btoc(cb->aio_buf) + 1`? | |||||
ubio = malloc(bio_size, M_AIOS, M_WAITOK); | |||||
ubio->uio_ma = job->pages; | |||||
Not Done Inline Actionspages is a field from the bio backend's union member. But you're adding a new backend. I think you should create a new union member in struct kaiocb, and put vfs_pages or something in there. Especially because I'm eliminating the pages field in D27624. Also, what initializes pages at this point? I thought it was only used by aio_qbio, which hasn't been called yet, right? asomers: pages is a field from the bio backend's union member. But you're adding a new backend. I… | |||||
} else { | |||||
bio_size += sizeof(vm_page_t )*btoc(io_size); | |||||
ubio = malloc(bio_size, M_AIOS, M_WAITOK); | |||||
ubio->uio_ma = (vm_page_t*)(ubio + 1); | |||||
} | |||||
ubio->uio_cmd = cmd; | |||||
ubio->uio_error = 0; | |||||
ubio->uio_flags = 0; | |||||
ubio->uio_ma_offset = page_offset; | |||||
ubio->uio_offset = cb->aio_offset; | |||||
ubio->uio_resid = cb->aio_nbytes; | |||||
Not Done Inline ActionsJust say "Reading from disk means writing into memory" scottl: Just say "Reading from disk means writing into memory" | |||||
ubio->uio_td = curthread; | |||||
ubio->uio_bio_done = aio_ubiowakeup; | |||||
ubio->uio_arg = job; | |||||
prot = VM_PROT_READ; | |||||
/* Reading from disk means writing to memory */ | |||||
if (cb->aio_lio_opcode == LIO_READ) | |||||
prot |= VM_PROT_WRITE; | |||||
ubio->uio_ma_cnt = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, | |||||
(vm_offset_t)cb->aio_buf, cb->aio_nbytes, prot, ubio->uio_ma, | |||||
btoc(MAX(io_size, MAXPHYS))); | |||||
if (ubio->uio_ma_cnt < 0) { | |||||
error = EFAULT; | |||||
goto err; | |||||
} | |||||
error = VOP_UBOP(vp, ubio, FOF_OFFSET); | |||||
if (error == EINPROGRESS || error == 0) | |||||
return (0); | |||||
err: | |||||
free(ubio, M_AIOS); | |||||
return (error); | |||||
Not Done Inline ActionsThis is dead code. Since io_pages is unsigned, you'll always return -1 in the previous block before you get here. Or maybe this check should come before the previous one? asomers: This is dead code. Since io_pages is unsigned, you'll always return -1 in the previous block… | |||||
} | |||||
#ifdef COMPAT_FREEBSD6 | #ifdef COMPAT_FREEBSD6 | ||||
static int | static int | ||||
convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) | convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) | ||||
{ | { | ||||
/* | /* | ||||
* Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are | * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are | ||||
* supported by AIO with the old sigevent structure. | * supported by AIO with the old sigevent structure. | ||||
▲ Show 20 Lines • Show All 352 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct kaioinfo *ki; | struct kaioinfo *ki; | ||||
struct kaiocb *job2; | struct kaiocb *job2; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct mount *mp; | struct mount *mp; | ||||
int error; | int error; | ||||
bool safe; | bool safe; | ||||
error = aio_queue_vfs(job); | |||||
Not Done Inline ActionsMaybe call it aio_queue_vfs? qasync is a pretty generic name but the function is VFS specific. jhb: Maybe call it `aio_queue_vfs`? qasync is a pretty generic name but the function is VFS… | |||||
if (error >= 0) | |||||
return (error); | |||||
ki = job->userproc->p_aioinfo; | ki = job->userproc->p_aioinfo; | ||||
error = aio_qbio(job->userproc, job); | error = aio_qbio(job->userproc, job); | ||||
if (error >= 0) | if (error >= 0) | ||||
return (error); | return (error); | ||||
safe = false; | safe = false; | ||||
if (fp->f_type == DTYPE_VNODE) { | if (fp->f_type == DTYPE_VNODE) { | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
if (vp->v_type == VREG || vp->v_type == VDIR) { | if (vp->v_type == VREG || vp->v_type == VDIR) { | ||||
▲ Show 20 Lines • Show All 616 Lines • ▼ Show 20 Lines | sys_lio_listio(struct thread *td, struct lio_listio_args *uap) | ||||
acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); | acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); | ||||
error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); | error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); | ||||
if (error == 0) | if (error == 0) | ||||
error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, | error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, | ||||
nent, sigp, &aiocb_ops); | nent, sigp, &aiocb_ops); | ||||
free(acb_list, M_LIO); | free(acb_list, M_LIO); | ||||
return (error); | return (error); | ||||
} | |||||
/* | |||||
* aio_ubiowakeup is the uio_bio completion callback for | |||||
* aio_queue_vfs. It just drops the hold on the pages | |||||
* from aio_queue_vfs and marks the aio as completed. | |||||
*/ | |||||
static void | |||||
aio_ubiowakeup(struct uio_bio *ubio) | |||||
{ | |||||
struct kaiocb *job = (struct kaiocb *)ubio->uio_arg; | |||||
size_t nbytes; | |||||
int error; | |||||
vm_page_unhold_pages(ubio->uio_ma, ubio->uio_ma_cnt); | |||||
nbytes = job->uaiocb.aio_nbytes - ubio->uio_resid; | |||||
error = 0; | |||||
if (ubio->uio_flags & UIO_BIO_ERROR) | |||||
error = ubio->uio_error; | |||||
if (error) | |||||
aio_complete(job, -1, error); | |||||
else | |||||
aio_complete(job, nbytes, 0); | |||||
free(ubio, M_AIOS); | |||||
} | } | ||||
static void | static void | ||||
aio_biowakeup(struct bio *bp) | aio_biowakeup(struct bio *bp) | ||||
{ | { | ||||
struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; | struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; | ||||
struct proc *userp; | struct proc *userp; | ||||
struct kaioinfo *ki; | struct kaioinfo *ki; | ||||
▲ Show 20 Lines • Show All 649 Lines • Show Last 20 Lines |
Why not use << PAGE_SHIFT?