Index: sys/kern/vfs_aio.c =================================================================== --- sys/kern/vfs_aio.c +++ sys/kern/vfs_aio.c @@ -1048,6 +1048,13 @@ } } +static void +aio_switch_vmspace(struct aiocblist *aiocbe) +{ + + vmspace_switch_aio(aiocbe->userproc->p_vmspace); +} + /* * The AIO daemon, most of the actual work is done in aio_process_*, * but the setup (and address space mgmt) is done in this routine. @@ -1058,18 +1065,20 @@ struct aiocblist *aiocbe; struct aiothreadlist *aiop; struct kaioinfo *ki; - struct proc *curcp, *mycp, *userp; - struct vmspace *myvm, *tmpvm; + struct proc *p, *userp; + struct vmspace *myvm; struct thread *td = curthread; int id = (intptr_t)_id; /* - * Local copies of curproc (cp) and vmspace (myvm) + * Grab an extra reference on the daemon's vmspace so that it + * doesn't get freed by jobs that switch to a different + * vmspace. */ - mycp = td->td_proc; - myvm = mycp->p_vmspace; + p = td->td_proc; + myvm = vmspace_acquire_ref(p); - KASSERT(mycp->p_textvp == NULL, ("kthread has a textvp")); + KASSERT(p->p_textvp == NULL, ("kthread has a textvp")); /* * Allocate and ready the aio control info. There is one aiop structure @@ -1091,12 +1100,6 @@ mtx_lock(&aio_job_mtx); for (;;) { /* - * curcp is the current daemon process context. - * userp is the current user process context. - */ - curcp = mycp; - - /* * Take daemon off of free queue */ if (aiop->aiothreadflags & AIOP_FREE) { @@ -1114,34 +1117,7 @@ /* * Connect to process address space for user program. */ - if (userp != curcp) { - /* - * Save the current address space that we are - * connected to. - */ - tmpvm = mycp->p_vmspace; - - /* - * Point to the new user address space, and - * refer to it. - */ - mycp->p_vmspace = userp->p_vmspace; - atomic_add_int(&mycp->p_vmspace->vm_refcnt, 1); - - /* Activate the new mapping. */ - pmap_activate(FIRST_THREAD_IN_PROC(mycp)); - - /* - * If the old address space wasn't the daemons - * own address space, then we need to remove the - * daemon's reference from the other process - * that it was acting on behalf of. - */ - if (tmpvm != myvm) { - vmspace_free(tmpvm); - } - curcp = userp; - } + aio_switch_vmspace(aiocbe); ki = userp->p_aioinfo; @@ -1175,34 +1151,13 @@ /* * Disconnect from user address space. */ - if (curcp != mycp) { - + if (p->p_vmspace != myvm) { mtx_unlock(&aio_job_mtx); - - /* Get the user address space to disconnect from. */ - tmpvm = mycp->p_vmspace; - - /* Get original address space for daemon. */ - mycp->p_vmspace = myvm; - - /* Activate the daemon's address space. */ - pmap_activate(FIRST_THREAD_IN_PROC(mycp)); -#ifdef DIAGNOSTIC - if (tmpvm == myvm) { - printf("AIOD: vmspace problem -- %d\n", - mycp->p_pid); - } -#endif - /* Remove our vmspace reference. */ - vmspace_free(tmpvm); - - curcp = mycp; - + vmspace_switch_aio(myvm); mtx_lock(&aio_job_mtx); /* * We have to restart to avoid race, we only sleep if - * no job can be selected, that should be - * curcp == mycp. + * no job can be selected. */ continue; } @@ -1217,29 +1172,23 @@ * thereby freeing resources. */ if (msleep(aiop->aiothread, &aio_job_mtx, PRIBIO, "aiordy", - aiod_lifetime)) { - if (TAILQ_EMPTY(&aio_jobs)) { - if ((aiop->aiothreadflags & AIOP_FREE) && - (num_aio_procs > target_aio_procs)) { - TAILQ_REMOVE(&aio_freeproc, aiop, list); - num_aio_procs--; - mtx_unlock(&aio_job_mtx); - uma_zfree(aiop_zone, aiop); - free_unr(aiod_unr, id); -#ifdef DIAGNOSTIC - if (mycp->p_vmspace->vm_refcnt <= 1) { - printf("AIOD: bad vm refcnt for" - " exiting daemon: %d\n", - mycp->p_vmspace->vm_refcnt); - } -#endif - kproc_exit(0); - } - } - } + aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) && + (aiop->aiothreadflags & AIOP_FREE) && + (num_aio_procs > target_aio_procs)) + break; } + TAILQ_REMOVE(&aio_freeproc, aiop, list); + num_aio_procs--; mtx_unlock(&aio_job_mtx); - panic("shouldn't be here\n"); + uma_zfree(aiop_zone, aiop); + free_unr(aiod_unr, id); + vmspace_free(myvm); + + KASSERT(p->p_vmspace == myvm, + ("AIOD: bad vmspace for exiting daemon")); + KASSERT(myvm->vm_refcnt > 1, + ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt)); + kproc_exit(0); } /* Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h +++ sys/vm/vm_extern.h @@ -106,6 +106,7 @@ struct vmspace *vmspace_acquire_ref(struct proc *); void vmspace_free(struct vmspace *); void vmspace_exitfree(struct proc *); +void vmspace_switch_aio(struct vmspace *); void vnode_pager_setsize(struct vnode *, vm_ooffset_t); int vslock(void *, size_t); void vsunlock(void *, size_t); Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -451,6 +451,46 @@ return (vm); } +/* + * Switch between vmspaces in an AIO kernel process. + * + * The AIO kernel processes switch to a user process's vmspace while + * performing an I/O operation on behalf of a user process. The new + * vmspace is either the vmspace of a user process obtained from an + * active AIO request or the initial vmspace of the AIO kernel process + * (when it is idling). Because user processes will block to drain + * any active AIO requests before proceeding in exit() or execve(), + * the vmspace reference count for these vmspaces can never be 0. + * This allows for a much simpler implementation than the loop in + * vmspace_acquire_ref() above. Similarly, AIO kernel processes hold + * an extra reference on their initial vmspace for the life of the + * process so that this guarantee is true for any vmspace passed as + * 'newvm'. + */ +void +vmspace_switch_aio(struct vmspace *newvm) +{ + struct vmspace *oldvm; + + /* XXX: Need some way to assert that this is an aio daemon. */ + + oldvm = curproc->p_vmspace; + if (oldvm == newvm) + return; + + /* + * Point to the new address space and refer to it. + */ + curproc->p_vmspace = newvm; + atomic_add_int(&newvm->vm_refcnt, 1); + + /* Activate the new mapping. */ + pmap_activate(curthread); + + /* Remove the daemon's reference to the old address space. */ + vmspace_free(oldvm); +} + void _vm_map_lock(vm_map_t map, const char *file, int line) {