Changeset View
Standalone View
sys/kern/vfs_aio.c
Show First 20 Lines • Show All 553 Lines • ▼ Show 20 Lines | aio_free_entry(struct kaiocb *job) | ||||
* knlist_delete(). This does mean that it is possible for the | * knlist_delete(). This does mean that it is possible for the | ||||
* thread pointer at close time to differ from the thread pointer | * thread pointer at close time to differ from the thread pointer | ||||
* at open time, but this is already true of file descriptors in | * at open time, but this is already true of file descriptors in | ||||
* a multithreaded process. | * a multithreaded process. | ||||
*/ | */ | ||||
if (job->fd_file) | if (job->fd_file) | ||||
fdrop(job->fd_file, curthread); | fdrop(job->fd_file, curthread); | ||||
crfree(job->cred); | crfree(job->cred); | ||||
if (job->uiop != &job->uio) | |||||
free(job->uiop, M_IOV); | |||||
uma_zfree(aiocb_zone, job); | uma_zfree(aiocb_zone, job); | ||||
AIO_LOCK(ki); | AIO_LOCK(ki); | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | static void | ||||
aio_proc_rundown_exec(void *arg, struct proc *p, | aio_proc_rundown_exec(void *arg, struct proc *p, | ||||
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
static void | static void | ||||
aio_process_rw(struct kaiocb *job) | aio_process_rw(struct kaiocb *job) | ||||
{ | { | ||||
struct ucred *td_savedcred; | struct ucred *td_savedcred; | ||||
struct thread *td; | struct thread *td; | ||||
struct aiocb *cb; | struct aiocb *cb; | ||||
struct file *fp; | struct file *fp; | ||||
struct uio auio; | |||||
struct iovec aiov; | |||||
ssize_t cnt; | ssize_t cnt; | ||||
long msgsnd_st, msgsnd_end; | long msgsnd_st, msgsnd_end; | ||||
long msgrcv_st, msgrcv_end; | long msgrcv_st, msgrcv_end; | ||||
long oublock_st, oublock_end; | long oublock_st, oublock_end; | ||||
long inblock_st, inblock_end; | long inblock_st, inblock_end; | ||||
int error; | int error, opcode; | ||||
KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || | KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || | ||||
job->uaiocb.aio_lio_opcode == LIO_WRITE, | job->uaiocb.aio_lio_opcode == LIO_READV || | ||||
job->uaiocb.aio_lio_opcode == LIO_WRITE || | |||||
job->uaiocb.aio_lio_opcode == LIO_WRITEV, | |||||
("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); | ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); | ||||
aio_switch_vmspace(job); | aio_switch_vmspace(job); | ||||
td = curthread; | td = curthread; | ||||
td_savedcred = td->td_ucred; | td_savedcred = td->td_ucred; | ||||
td->td_ucred = job->cred; | td->td_ucred = job->cred; | ||||
job->uiop->uio_td = td; | |||||
cb = &job->uaiocb; | cb = &job->uaiocb; | ||||
fp = job->fd_file; | fp = job->fd_file; | ||||
aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; | opcode = job->uaiocb.aio_lio_opcode; | ||||
aiov.iov_len = cb->aio_nbytes; | cnt = job->uiop->uio_resid; | ||||
auio.uio_iov = &aiov; | |||||
auio.uio_iovcnt = 1; | |||||
auio.uio_offset = cb->aio_offset; | |||||
auio.uio_resid = cb->aio_nbytes; | |||||
cnt = cb->aio_nbytes; | |||||
auio.uio_segflg = UIO_USERSPACE; | |||||
auio.uio_td = td; | |||||
msgrcv_st = td->td_ru.ru_msgrcv; | msgrcv_st = td->td_ru.ru_msgrcv; | ||||
msgsnd_st = td->td_ru.ru_msgsnd; | msgsnd_st = td->td_ru.ru_msgsnd; | ||||
inblock_st = td->td_ru.ru_inblock; | inblock_st = td->td_ru.ru_inblock; | ||||
oublock_st = td->td_ru.ru_oublock; | oublock_st = td->td_ru.ru_oublock; | ||||
/* | /* | ||||
* aio_aqueue() acquires a reference to the file that is | * aio_aqueue() acquires a reference to the file that is | ||||
* released in aio_free_entry(). | * released in aio_free_entry(). | ||||
*/ | */ | ||||
if (cb->aio_lio_opcode == LIO_READ) { | if (opcode == LIO_READ || opcode == LIO_READV) { | ||||
auio.uio_rw = UIO_READ; | if (job->uiop->uio_resid == 0) | ||||
if (auio.uio_resid == 0) | |||||
error = 0; | error = 0; | ||||
else | else | ||||
error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); | error = fo_read(fp, job->uiop, fp->f_cred, FOF_OFFSET, | ||||
td); | |||||
} else { | } else { | ||||
if (fp->f_type == DTYPE_VNODE) | if (fp->f_type == DTYPE_VNODE) | ||||
bwillwrite(); | bwillwrite(); | ||||
auio.uio_rw = UIO_WRITE; | error = fo_write(fp, job->uiop, fp->f_cred, FOF_OFFSET, td); | ||||
error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); | |||||
} | } | ||||
msgrcv_end = td->td_ru.ru_msgrcv; | msgrcv_end = td->td_ru.ru_msgrcv; | ||||
msgsnd_end = td->td_ru.ru_msgsnd; | msgsnd_end = td->td_ru.ru_msgsnd; | ||||
inblock_end = td->td_ru.ru_inblock; | inblock_end = td->td_ru.ru_inblock; | ||||
oublock_end = td->td_ru.ru_oublock; | oublock_end = td->td_ru.ru_oublock; | ||||
job->msgrcv = msgrcv_end - msgrcv_st; | job->msgrcv = msgrcv_end - msgrcv_st; | ||||
job->msgsnd = msgsnd_end - msgsnd_st; | job->msgsnd = msgsnd_end - msgsnd_st; | ||||
job->inblock = inblock_end - inblock_st; | job->inblock = inblock_end - inblock_st; | ||||
job->outblock = oublock_end - oublock_st; | job->outblock = oublock_end - oublock_st; | ||||
if ((error) && (auio.uio_resid != cnt)) { | if (error != 0 && (job->uiop->uio_resid != cnt)) { | ||||
kib: Extra () | |||||
if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) | if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) | ||||
error = 0; | error = 0; | ||||
if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { | if (error == EPIPE && | ||||
(opcode == LIO_WRITE || opcode == LIO_WRITEV)) { | |||||
PROC_LOCK(job->userproc); | PROC_LOCK(job->userproc); | ||||
kern_psignal(job->userproc, SIGPIPE); | kern_psignal(job->userproc, SIGPIPE); | ||||
PROC_UNLOCK(job->userproc); | PROC_UNLOCK(job->userproc); | ||||
} | } | ||||
} | } | ||||
cnt -= auio.uio_resid; | cnt -= job->uiop->uio_resid; | ||||
td->td_ucred = td_savedcred; | td->td_ucred = td_savedcred; | ||||
if (error) | if (error) | ||||
aio_complete(job, -1, error); | aio_complete(job, -1, error); | ||||
else | else | ||||
aio_complete(job, cnt, 0); | aio_complete(job, cnt, 0); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 367 Lines • ▼ Show 20 Lines | |||||
* structure's reference count, preventing its deallocation for the | * structure's reference count, preventing its deallocation for the | ||||
* duration of this call. | * duration of this call. | ||||
*/ | */ | ||||
static int | static int | ||||
aio_qbio(struct proc *p, struct kaiocb *job) | aio_qbio(struct proc *p, struct kaiocb *job) | ||||
{ | { | ||||
struct aiocb *cb; | struct aiocb *cb; | ||||
struct file *fp; | struct file *fp; | ||||
struct bio *bp; | |||||
struct buf *pbuf; | struct buf *pbuf; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct cdevsw *csw; | struct cdevsw *csw; | ||||
struct cdev *dev; | struct cdev *dev; | ||||
struct kaioinfo *ki; | struct kaioinfo *ki; | ||||
struct vm_page **pages; | off_t offset; | ||||
int error, npages, poff, ref; | int bio_cmd, error, i, iovcnt, opcode, poff, ref; | ||||
vm_prot_t prot; | vm_prot_t prot; | ||||
cb = &job->uaiocb; | cb = &job->uaiocb; | ||||
fp = job->fd_file; | fp = job->fd_file; | ||||
opcode = cb->aio_lio_opcode; | |||||
if (!(cb->aio_lio_opcode == LIO_WRITE || | if (!(opcode == LIO_WRITE || opcode == LIO_WRITEV || | ||||
cb->aio_lio_opcode == LIO_READ)) | opcode == LIO_READ || opcode == LIO_READV)) | ||||
return (-1); | return (-1); | ||||
if (fp == NULL || fp->f_type != DTYPE_VNODE) | if (fp == NULL || fp->f_type != DTYPE_VNODE) | ||||
return (-1); | return (-1); | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
if (vp->v_type != VCHR) | if (vp->v_type != VCHR) | ||||
return (-1); | return (-1); | ||||
if (vp->v_bufobj.bo_bsize == 0) | if (vp->v_bufobj.bo_bsize == 0) | ||||
return (-1); | return (-1); | ||||
if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) | |||||
bio_cmd = opcode == LIO_WRITE || opcode == LIO_WRITEV ? BIO_WRITE : | |||||
BIO_READ; | |||||
iovcnt = job->uiop->uio_iovcnt; | |||||
if (iovcnt > max_buf_aio) | |||||
return (-1); | return (-1); | ||||
for (i = 0; i < iovcnt; i++) { | |||||
if (job->uiop->uio_iov[i].iov_len % vp->v_bufobj.bo_bsize != 0) | |||||
return (-1); | |||||
} | |||||
offset = cb->aio_offset; | |||||
ref = 0; | ref = 0; | ||||
csw = devvn_refthread(vp, &dev, &ref); | csw = devvn_refthread(vp, &dev, &ref); | ||||
if (csw == NULL) | if (csw == NULL) | ||||
return (ENXIO); | return (ENXIO); | ||||
if ((csw->d_flags & D_DISK) == 0) { | if ((csw->d_flags & D_DISK) == 0) { | ||||
error = -1; | error = -1; | ||||
goto unref; | goto unref; | ||||
} | } | ||||
if (cb->aio_nbytes > dev->si_iosize_max) { | if (job->uiop->uio_resid > dev->si_iosize_max) { | ||||
error = -1; | error = -1; | ||||
goto unref; | goto unref; | ||||
} | } | ||||
ki = p->p_aioinfo; | ki = p->p_aioinfo; | ||||
poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; | job->error = 0; | ||||
if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { | atomic_store_int(&job->nbio, iovcnt); | ||||
if (cb->aio_nbytes > maxphys) { | for (i = 0; i < iovcnt; i++) { | ||||
struct vm_page** pages; | |||||
struct bio *bp; | |||||
void *buf; | |||||
size_t nbytes; | |||||
int npages; | |||||
buf = job->uiop->uio_iov[i].iov_base; | |||||
nbytes = job->uiop->uio_iov[i].iov_len; | |||||
if (nbytes > maxphys) { | |||||
kibUnsubmitted Done Inline ActionsSo if an error occurs in the loop, after some iovs are already queued, you return -1 and aio_aqueue_file() re-queues them ? Am I missing something ? kib: So if an error occurs in the loop, after some iovs are already queued, you return -1 and… | |||||
asomersAuthorUnsubmitted Done Inline ActionsIf aio_qbio returns -1, then aio_aqueue_file will queue it using the slow path, aio_process_rw. asomers: If `aio_qbio` returns `-1`, then `aio_aqueue_file` will queue it using the slow path… | |||||
kibUnsubmitted Done Inline ActionsSo you confirm my observation, and it is a valid bug. kib: So you confirm my observation, and it is a valid bug. | |||||
asomersAuthorUnsubmitted Done Inline ActionsNo, it's not a bug. aio_queue_file tries the fast path first, and if that doesn't work it tries the slow path. Perhaps you're thinking that aio_process_rw calls aio_qbio? It doesn't. Only aio_queue_file does. asomers: No, it's not a bug. `aio_queue_file` tries the fast path first, and if that doesn't work it… | |||||
kibUnsubmitted Done Inline ActionsThe loop processes iovs one by one. If e.g. first iov was queued, but second causes an error, aio_queue_file() requeues everything. So first iov is queued twice. kib: The loop processes iovs one by one. If e.g. first iov was queued, but second causes an error… | |||||
asomersAuthorUnsubmitted Done Inline ActionsOk, I see what you're saying. Some of the iovs will be issued twice in that case. I think I can fix that. asomers: Ok, I see what you're saying. Some of the iovs will be issued twice in that case. I think I… | |||||
error = -1; | error = -1; | ||||
goto unref; | goto unref; | ||||
} | } | ||||
bp = g_alloc_bio(); | |||||
poff = (vm_offset_t)buf & PAGE_MASK; | |||||
if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { | |||||
pbuf = NULL; | pbuf = NULL; | ||||
pages = malloc(sizeof(vm_page_t) * (atop(round_page( | pages = malloc(sizeof(vm_page_t) * (atop(round_page( | ||||
cb->aio_nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO); | nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO); | ||||
} else { | } else { | ||||
if (cb->aio_nbytes > maxphys) { | if (ki->kaio_buffer_count + iovcnt > max_buf_aio) { | ||||
error = -1; | g_destroy_bio(bp); | ||||
goto unref; | |||||
} | |||||
if (ki->kaio_buffer_count >= max_buf_aio) { | |||||
error = EAGAIN; | error = EAGAIN; | ||||
goto unref; | goto unref; | ||||
} | } | ||||
pbuf = uma_zalloc(pbuf_zone, M_WAITOK); | pbuf = uma_zalloc(pbuf_zone, M_WAITOK); | ||||
BUF_KERNPROC(pbuf); | BUF_KERNPROC(pbuf); | ||||
AIO_LOCK(ki); | AIO_LOCK(ki); | ||||
ki->kaio_buffer_count++; | ki->kaio_buffer_count++; | ||||
AIO_UNLOCK(ki); | AIO_UNLOCK(ki); | ||||
pages = pbuf->b_pages; | pages = pbuf->b_pages; | ||||
} | } | ||||
bp = g_alloc_bio(); | |||||
bp->bio_length = cb->aio_nbytes; | bp->bio_length = nbytes; | ||||
bp->bio_bcount = cb->aio_nbytes; | bp->bio_bcount = nbytes; | ||||
bp->bio_done = aio_biowakeup; | bp->bio_done = aio_biowakeup; | ||||
bp->bio_offset = cb->aio_offset; | bp->bio_offset = offset; | ||||
bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; | bp->bio_cmd = bio_cmd; | ||||
bp->bio_dev = dev; | bp->bio_dev = dev; | ||||
bp->bio_caller1 = job; | bp->bio_caller1 = job; | ||||
bp->bio_caller2 = pbuf; | bp->bio_caller2 = pbuf; | ||||
prot = VM_PROT_READ; | prot = VM_PROT_READ; | ||||
if (cb->aio_lio_opcode == LIO_READ) | if (opcode == LIO_READ || opcode == LIO_READV) | ||||
prot |= VM_PROT_WRITE; /* Less backwards than it looks */ | prot |= VM_PROT_WRITE; /* Less backwards than it looks */ | ||||
npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, | npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, | ||||
(vm_offset_t)cb->aio_buf, bp->bio_length, prot, pages, | (vm_offset_t)buf, bp->bio_length, prot, pages, | ||||
atop(maxphys) + 1); | atop(maxphys) + 1); | ||||
if (npages < 0) { | if (npages < 0) { | ||||
if (pbuf != NULL) { | |||||
AIO_LOCK(ki); | |||||
ki->kaio_buffer_count--; | |||||
AIO_UNLOCK(ki); | |||||
uma_zfree(pbuf_zone, pbuf); | |||||
} else { | |||||
free(pages, M_TEMP); | |||||
} | |||||
g_destroy_bio(bp); | |||||
error = EFAULT; | error = EFAULT; | ||||
goto doerror; | goto unref; | ||||
} | } | ||||
if (pbuf != NULL) { | if (pbuf != NULL) { | ||||
pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages); | pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages); | ||||
bp->bio_data = pbuf->b_data + poff; | bp->bio_data = pbuf->b_data + poff; | ||||
atomic_add_int(&num_buf_aio, 1); | |||||
pbuf->b_npages = npages; | pbuf->b_npages = npages; | ||||
atomic_add_int(&num_buf_aio, 1); | |||||
} else { | } else { | ||||
bp->bio_ma = pages; | bp->bio_ma = pages; | ||||
bp->bio_ma_n = npages; | bp->bio_ma_n = npages; | ||||
bp->bio_ma_offset = poff; | bp->bio_ma_offset = poff; | ||||
bp->bio_data = unmapped_buf; | bp->bio_data = unmapped_buf; | ||||
bp->bio_flags |= BIO_UNMAPPED; | bp->bio_flags |= BIO_UNMAPPED; | ||||
atomic_add_int(&num_unmapped_aio, 1); | atomic_add_int(&num_unmapped_aio, 1); | ||||
} | } | ||||
/* Perform transfer. */ | /* Perform transfer. */ | ||||
Done Inline Actionsstruct bio * (space before star) BTW might be mallocarray(9) is better kib: `struct bio *` (space before star)
BTW might be mallocarray(9) is better | |||||
csw->d_strategy(bp); | csw->d_strategy(bp); | ||||
offset += nbytes; | |||||
} | |||||
dev_relthread(dev, ref); | dev_relthread(dev, ref); | ||||
return (0); | return (0); | ||||
doerror: | |||||
if (pbuf != NULL) { | |||||
AIO_LOCK(ki); | |||||
ki->kaio_buffer_count--; | |||||
AIO_UNLOCK(ki); | |||||
uma_zfree(pbuf_zone, pbuf); | |||||
} else { | |||||
free(pages, M_TEMP); | |||||
} | |||||
g_destroy_bio(bp); | |||||
unref: | unref: | ||||
dev_relthread(dev, ref); | dev_relthread(dev, ref); | ||||
Done Inline ActionsYou need to qremove and unhold pages for already processed buffers. kib: You need to qremove and unhold pages for already processed buffers. | |||||
return (error); | return (error); | ||||
} | } | ||||
#ifdef COMPAT_FREEBSD6 | #ifdef COMPAT_FREEBSD6 | ||||
static int | static int | ||||
convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) | convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) | ||||
{ | { | ||||
Show All 18 Lines | convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) | ||||
} | } | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) | aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) | ||||
{ | { | ||||
struct oaiocb *ojob; | struct oaiocb *ojob; | ||||
int error; | int error; | ||||
Done Inline ActionsWhy -i ? Also the cleanup for kaio_buffer_count might be more logical in destroy_bios. kib: Why `-i` ?
Also the cleanup for kaio_buffer_count might be more logical in destroy_bios. | |||||
Done Inline ActionsLeftover from an earlier revision. Should be just iovcnt. asomers: Leftover from an earlier revision. Should be just `iovcnt.` | |||||
bzero(kjob, sizeof(struct aiocb)); | bzero(kjob, sizeof(struct aiocb)); | ||||
error = copyin(ujob, kjob, sizeof(struct oaiocb)); | error = copyin(ujob, kjob, sizeof(struct oaiocb)); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
ojob = (struct oaiocb *)kjob; | ojob = (struct oaiocb *)kjob; | ||||
return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); | return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || | ||||
uma_zfree(aiocb_zone, job); | uma_zfree(aiocb_zone, job); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
ksiginfo_init(&job->ksi); | ksiginfo_init(&job->ksi); | ||||
/* Save userspace address of the job info. */ | /* Save userspace address of the job info. */ | ||||
job->ujob = ujob; | job->ujob = ujob; | ||||
Done Inline ActionsThe blank line here in the old code is a style feature. :) jhb: The blank line here in the old code is a style feature. :) | |||||
/* Get the opcode. */ | /* Get the opcode. */ | ||||
if (type != LIO_NOP) | if (type != LIO_NOP) | ||||
job->uaiocb.aio_lio_opcode = type; | job->uaiocb.aio_lio_opcode = type; | ||||
opcode = job->uaiocb.aio_lio_opcode; | opcode = job->uaiocb.aio_lio_opcode; | ||||
/* | /* | ||||
* Validate the opcode and fetch the file object for the specified | * Validate the opcode and fetch the file object for the specified | ||||
* file descriptor. | * file descriptor. | ||||
* | * | ||||
* XXXRW: Moved the opcode validation up here so that we don't | * XXXRW: Moved the opcode validation up here so that we don't | ||||
* retrieve a file descriptor without knowing what the capabiltity | * retrieve a file descriptor without knowing what the capabiltity | ||||
* should be. | * should be. | ||||
*/ | */ | ||||
fd = job->uaiocb.aio_fildes; | fd = job->uaiocb.aio_fildes; | ||||
switch (opcode) { | switch (opcode) { | ||||
case LIO_WRITE: | case LIO_WRITE: | ||||
case LIO_WRITEV: | |||||
error = fget_write(td, fd, &cap_pwrite_rights, &fp); | error = fget_write(td, fd, &cap_pwrite_rights, &fp); | ||||
break; | break; | ||||
case LIO_READ: | case LIO_READ: | ||||
case LIO_READV: | |||||
error = fget_read(td, fd, &cap_pread_rights, &fp); | error = fget_read(td, fd, &cap_pread_rights, &fp); | ||||
break; | break; | ||||
case LIO_SYNC: | case LIO_SYNC: | ||||
error = fget(td, fd, &cap_fsync_rights, &fp); | error = fget(td, fd, &cap_fsync_rights, &fp); | ||||
break; | break; | ||||
case LIO_MLOCK: | case LIO_MLOCK: | ||||
fp = NULL; | fp = NULL; | ||||
break; | break; | ||||
Show All 9 Lines | if (error) { | ||||
return (error); | return (error); | ||||
} | } | ||||
if (opcode == LIO_SYNC && fp->f_vnode == NULL) { | if (opcode == LIO_SYNC && fp->f_vnode == NULL) { | ||||
error = EINVAL; | error = EINVAL; | ||||
goto aqueue_fail; | goto aqueue_fail; | ||||
} | } | ||||
if ((opcode == LIO_READ || opcode == LIO_WRITE) && | if ((opcode == LIO_READ || opcode == LIO_READV || | ||||
opcode == LIO_WRITE || opcode == LIO_WRITEV) && | |||||
job->uaiocb.aio_offset < 0 && | job->uaiocb.aio_offset < 0 && | ||||
(fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) { | (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) { | ||||
error = EINVAL; | error = EINVAL; | ||||
goto aqueue_fail; | goto aqueue_fail; | ||||
} | } | ||||
job->fd_file = fp; | job->fd_file = fp; | ||||
Show All 36 Lines | no_kqueue: | ||||
ops->store_error(ujob, EINPROGRESS); | ops->store_error(ujob, EINPROGRESS); | ||||
job->uaiocb._aiocb_private.error = EINPROGRESS; | job->uaiocb._aiocb_private.error = EINPROGRESS; | ||||
job->userproc = p; | job->userproc = p; | ||||
job->cred = crhold(td->td_ucred); | job->cred = crhold(td->td_ucred); | ||||
job->jobflags = KAIOCB_QUEUEING; | job->jobflags = KAIOCB_QUEUEING; | ||||
job->lio = lj; | job->lio = lj; | ||||
switch (opcode) { | |||||
case LIO_READV: | |||||
case LIO_WRITEV: | |||||
/* malloc a uio */ | |||||
error = copyinuio(job->uaiocb.aio_iov, job->uaiocb.aio_iovcnt, | |||||
Done Inline ActionsThis needs to be done in the syscalls themselves as different ABIs need to populate the uio. For example, the freebsd32 versions need to use freebsd32_copyinuio rather than copyinuio. Either that or you have to make a new ops->copyinuio callback that matches the copyinuio signature. It can use copyinuio for the native ABI and freebsd32_copyinuio for freebsd32. jhb: This needs to be done in the syscalls themselves as different ABIs need to populate the uio. | |||||
Done Inline ActionsGood catch! asomers: Good catch! | |||||
&job->uiop); | |||||
if (error) | |||||
goto aqueue_fail; | |||||
break; | |||||
case LIO_READ: | |||||
case LIO_WRITE: | |||||
/* Use the inline uio */ | |||||
job->iov[0].iov_base = (void *)(uintptr_t)job->uaiocb.aio_buf; | |||||
job->iov[0].iov_len = job->uaiocb.aio_nbytes; | |||||
job->uio.uio_iov = job->iov; | |||||
job->uio.uio_iovcnt = 1; | |||||
job->uio.uio_resid = job->uaiocb.aio_nbytes; | |||||
job->uio.uio_segflg = UIO_USERSPACE; | |||||
/* FALLTHROUGH */ | |||||
default: | |||||
job->uiop = &job->uio; | |||||
break; | |||||
} | |||||
switch (opcode) { | |||||
case LIO_READ: | |||||
case LIO_READV: | |||||
job->uiop->uio_rw = UIO_READ; | |||||
break; | |||||
case LIO_WRITE: | |||||
case LIO_WRITEV: | |||||
job->uiop->uio_rw = UIO_WRITE; | |||||
break; | |||||
} | |||||
job->uiop->uio_offset = job->uaiocb.aio_offset; | |||||
job->uiop->uio_td = td; | |||||
if (opcode == LIO_MLOCK) { | if (opcode == LIO_MLOCK) { | ||||
aio_schedule(job, aio_process_mlock); | aio_schedule(job, aio_process_mlock); | ||||
error = 0; | error = 0; | ||||
} else if (fp->f_ops->fo_aio_queue == NULL) | } else if (fp->f_ops->fo_aio_queue == NULL) | ||||
error = aio_queue_file(fp, job); | error = aio_queue_file(fp, job); | ||||
else | else | ||||
error = fo_aio_queue(fp, job); | error = fo_aio_queue(fp, job); | ||||
if (error) | if (error) | ||||
Show All 14 Lines | if (job->jobflags & KAIOCB_FINISHED) { | ||||
*/ | */ | ||||
aio_bio_done_notify(p, job); | aio_bio_done_notify(p, job); | ||||
} else | } else | ||||
TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist); | TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist); | ||||
AIO_UNLOCK(ki); | AIO_UNLOCK(ki); | ||||
return (0); | return (0); | ||||
aqueue_fail: | aqueue_fail: | ||||
if (job->uiop != &job->uio) | |||||
free(job->uiop, M_IOV); | |||||
knlist_delete(&job->klist, curthread, 0); | knlist_delete(&job->klist, curthread, 0); | ||||
if (fp) | if (fp) | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
uma_zfree(aiocb_zone, job); | uma_zfree(aiocb_zone, job); | ||||
ops->store_error(ujob, error); | ops->store_error(ujob, error); | ||||
return (error); | return (error); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | aio_queue_file(struct file *fp, struct kaiocb *job) | ||||
if (!(safe || enable_aio_unsafe)) { | if (!(safe || enable_aio_unsafe)) { | ||||
counted_warning(&unsafe_warningcnt, | counted_warning(&unsafe_warningcnt, | ||||
"is attempting to use unsafe AIO requests"); | "is attempting to use unsafe AIO requests"); | ||||
return (EOPNOTSUPP); | return (EOPNOTSUPP); | ||||
} | } | ||||
switch (job->uaiocb.aio_lio_opcode) { | switch (job->uaiocb.aio_lio_opcode) { | ||||
case LIO_READ: | case LIO_READ: | ||||
case LIO_READV: | |||||
case LIO_WRITE: | case LIO_WRITE: | ||||
case LIO_WRITEV: | |||||
aio_schedule(job, aio_process_rw); | aio_schedule(job, aio_process_rw); | ||||
error = 0; | error = 0; | ||||
break; | break; | ||||
case LIO_SYNC: | case LIO_SYNC: | ||||
AIO_LOCK(ki); | AIO_LOCK(ki); | ||||
TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { | TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { | ||||
if (job2->fd_file == job->fd_file && | if (job2->fd_file == job->fd_file && | ||||
job2->uaiocb.aio_lio_opcode != LIO_SYNC && | job2->uaiocb.aio_lio_opcode != LIO_SYNC && | ||||
▲ Show 20 Lines • Show All 357 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
sys_aio_read(struct thread *td, struct aio_read_args *uap) | sys_aio_read(struct thread *td, struct aio_read_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); | return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); | ||||
} | } | ||||
int | |||||
sys_aio_readv(struct thread *td, struct aio_readv_args *uap) | |||||
{ | |||||
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READV, &aiocb_ops)); | |||||
} | |||||
/* syscall - asynchronous write to a file (REALTIME) */ | /* syscall - asynchronous write to a file (REALTIME) */ | ||||
#ifdef COMPAT_FREEBSD6 | #ifdef COMPAT_FREEBSD6 | ||||
int | int | ||||
freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap) | freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | ||||
&aiocb_ops_osigevent)); | &aiocb_ops_osigevent)); | ||||
} | } | ||||
#endif | #endif | ||||
int | int | ||||
sys_aio_write(struct thread *td, struct aio_write_args *uap) | sys_aio_write(struct thread *td, struct aio_write_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); | return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); | ||||
} | } | ||||
int | int | ||||
sys_aio_writev(struct thread *td, struct aio_writev_args *uap) | |||||
{ | |||||
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITEV, &aiocb_ops)); | |||||
} | |||||
int | |||||
sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) | sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); | return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); | ||||
} | } | ||||
static int | static int | ||||
kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, | kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, | ||||
▲ Show 20 Lines • Show All 209 Lines • ▼ Show 20 Lines | sys_lio_listio(struct thread *td, struct lio_listio_args *uap) | ||||
return (error); | return (error); | ||||
} | } | ||||
static void | static void | ||||
aio_biowakeup(struct bio *bp) | aio_biowakeup(struct bio *bp) | ||||
{ | { | ||||
struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; | struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; | ||||
struct kaioinfo *ki; | struct kaioinfo *ki; | ||||
struct buf *pbuf = (struct buf*)bp->bio_caller2; | struct buf *pbuf = (struct buf*)bp->bio_caller2; | ||||
size_t nbytes; | size_t nbytes; | ||||
int error, nblks; | int error, opcode, nblks; | ||||
opcode = job->uaiocb.aio_lio_opcode; | |||||
Done Inline ActionsSpace before * in the cast type. kib: Space before * in the cast type. | |||||
/* Release mapping into kernel space. */ | /* Release mapping into kernel space. */ | ||||
if (pbuf != NULL) { | if (pbuf != NULL) { | ||||
MPASS(pbuf->b_npages <= atop(maxphys) + 1); | MPASS(pbuf->b_npages <= atop(maxphys) + 1); | ||||
pmap_qremove((vm_offset_t)pbuf->b_data, pbuf->b_npages); | pmap_qremove((vm_offset_t)pbuf->b_data, pbuf->b_npages); | ||||
vm_page_unhold_pages(pbuf->b_pages, pbuf->b_npages); | vm_page_unhold_pages(pbuf->b_pages, pbuf->b_npages); | ||||
uma_zfree(pbuf_zone, pbuf); | uma_zfree(pbuf_zone, pbuf); | ||||
atomic_subtract_int(&num_buf_aio, 1); | atomic_subtract_int(&num_buf_aio, 1); | ||||
ki = job->userproc->p_aioinfo; | ki = job->userproc->p_aioinfo; | ||||
AIO_LOCK(ki); | AIO_LOCK(ki); | ||||
ki->kaio_buffer_count--; | ki->kaio_buffer_count--; | ||||
AIO_UNLOCK(ki); | AIO_UNLOCK(ki); | ||||
} else { | } else { | ||||
MPASS(bp->bio_ma_n <= atop(maxphys) + 1); | MPASS(bp->bio_ma_n <= atop(maxphys) + 1); | ||||
vm_page_unhold_pages(bp->bio_ma, bp->bio_ma_n); | vm_page_unhold_pages(bp->bio_ma, bp->bio_ma_n); | ||||
free(bp->bio_ma, M_TEMP); | free(bp->bio_ma, M_TEMP); | ||||
atomic_subtract_int(&num_unmapped_aio, 1); | atomic_subtract_int(&num_unmapped_aio, 1); | ||||
} | } | ||||
nbytes = job->uaiocb.aio_nbytes - bp->bio_resid; | nbytes = bp->bio_bcount - bp->bio_resid; | ||||
atomic_add_acq_long(&job->nbytes, nbytes); | |||||
nblks = btodb(nbytes); | |||||
error = 0; | error = 0; | ||||
/* | |||||
* If multiple bios experienced an error, the job will reflect the | |||||
* error of whichever failed bio completed last. | |||||
*/ | |||||
if (bp->bio_flags & BIO_ERROR) | if (bp->bio_flags & BIO_ERROR) | ||||
error = bp->bio_error; | atomic_set_int(&job->error, bp->bio_error); | ||||
nblks = btodb(nbytes); | if (opcode == LIO_WRITE || opcode == LIO_WRITEV) | ||||
if (job->uaiocb.aio_lio_opcode == LIO_WRITE) | atomic_add_int(&job->outblock, nblks); | ||||
job->outblock += nblks; | |||||
else | else | ||||
job->inblock += nblks; | atomic_add_int(&job->inblock, nblks); | ||||
atomic_subtract_int(&job->nbio, 1); | |||||
if (error) | |||||
aio_complete(job, -1, error); | |||||
else | |||||
aio_complete(job, nbytes, 0); | |||||
g_destroy_bio(bp); | g_destroy_bio(bp); | ||||
if (atomic_load_int(&job->nbio) == 0) { | |||||
if (atomic_load_int(&job->error)) | |||||
aio_complete(job, -1, job->error); | |||||
else | |||||
aio_complete(job, atomic_load_int(&job->nbytes), 0); | |||||
} | } | ||||
} | |||||
/* syscall - wait for the next completion of an aio request */ | /* syscall - wait for the next completion of an aio request */ | ||||
static int | static int | ||||
kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, | kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, | ||||
struct timespec *ts, struct aiocb_ops *ops) | struct timespec *ts, struct aiocb_ops *ops) | ||||
{ | { | ||||
struct proc *p = td->td_proc; | struct proc *p = td->td_proc; | ||||
struct timeval atv; | struct timeval atv; | ||||
▲ Show 20 Lines • Show All 274 Lines • ▼ Show 20 Lines | aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) | ||||
CP(job32, *kjob, aio_fildes); | CP(job32, *kjob, aio_fildes); | ||||
CP(job32, *kjob, aio_offset); | CP(job32, *kjob, aio_offset); | ||||
PTRIN_CP(job32, *kjob, aio_buf); | PTRIN_CP(job32, *kjob, aio_buf); | ||||
CP(job32, *kjob, aio_nbytes); | CP(job32, *kjob, aio_nbytes); | ||||
CP(job32, *kjob, aio_lio_opcode); | CP(job32, *kjob, aio_lio_opcode); | ||||
CP(job32, *kjob, aio_reqprio); | CP(job32, *kjob, aio_reqprio); | ||||
CP(job32, *kjob, _aiocb_private.status); | CP(job32, *kjob, _aiocb_private.status); | ||||
CP(job32, *kjob, _aiocb_private.error); | CP(job32, *kjob, _aiocb_private.error); | ||||
PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); | PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); | ||||
Done Inline Actionsstyle nit: blank line before this comment. jhb: style nit: blank line before this comment. | |||||
return (convert_old_sigevent32(&job32.aio_sigevent, | return (convert_old_sigevent32(&job32.aio_sigevent, | ||||
&kjob->aio_sigevent)); | &kjob->aio_sigevent)); | ||||
} | } | ||||
#endif | #endif | ||||
static int | static int | ||||
aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) | aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 153 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) | freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, | return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, | ||||
&aiocb32_ops)); | &aiocb32_ops)); | ||||
} | } | ||||
int | |||||
freebsd32_aio_readv(struct thread *td, struct freebsd32_aio_readv_args *uap) | |||||
{ | |||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READV, | |||||
&aiocb32_ops)); | |||||
} | |||||
#ifdef COMPAT_FREEBSD6 | #ifdef COMPAT_FREEBSD6 | ||||
int | int | ||||
freebsd6_freebsd32_aio_write(struct thread *td, | freebsd6_freebsd32_aio_write(struct thread *td, | ||||
struct freebsd6_freebsd32_aio_write_args *uap) | struct freebsd6_freebsd32_aio_write_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | ||||
&aiocb32_ops_osigevent)); | &aiocb32_ops_osigevent)); | ||||
} | } | ||||
#endif | #endif | ||||
int | int | ||||
freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) | freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, | ||||
&aiocb32_ops)); | |||||
} | |||||
int | |||||
freebsd32_aio_writev(struct thread *td, struct freebsd32_aio_writev_args *uap) | |||||
{ | |||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITEV, | |||||
&aiocb32_ops)); | &aiocb32_ops)); | ||||
} | } | ||||
int | int | ||||
freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) | freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) | ||||
{ | { | ||||
return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, | return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, | ||||
▲ Show 20 Lines • Show All 128 Lines • Show Last 20 Lines |
Extra ()