Index: head/sys/dev/md/md.c =================================================================== --- head/sys/dev/md/md.c +++ head/sys/dev/md/md.c @@ -1118,10 +1118,7 @@ } vm_page_valid(m); - if (m->dirty != VM_PAGE_BITS_ALL) { - vm_page_dirty(m); - vm_pager_page_unswapped(m); - } + vm_page_set_dirty(m); } else if (bp->bio_cmd == BIO_DELETE) { if (len == PAGE_SIZE || vm_page_all_valid(m)) rv = VM_PAGER_OK; @@ -1138,10 +1135,7 @@ /* Page is valid. */ if (len != PAGE_SIZE) { pmap_zero_page_area(m, offs, len); - if (m->dirty != VM_PAGE_BITS_ALL) { - vm_page_dirty(m); - vm_pager_page_unswapped(m); - } + vm_page_set_dirty(m); } else { vm_pager_page_unswapped(m); vm_page_free(m); Index: head/sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_subr.c +++ head/sys/fs/tmpfs/tmpfs_subr.c @@ -1505,9 +1505,8 @@ } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); - vm_page_dirty(m); + vm_page_set_dirty(m); vm_page_xunbusy(m); - vm_pager_page_unswapped(m); } } Index: head/sys/kern/uipc_shm.c =================================================================== --- head/sys/kern/uipc_shm.c +++ head/sys/kern/uipc_shm.c @@ -198,7 +198,7 @@ * type object. */ rv = vm_page_grab_valid(&m, obj, idx, - VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_NOBUSY); + VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (rv != VM_PAGER_OK) { VM_OBJECT_WUNLOCK(obj); printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", @@ -207,13 +207,10 @@ } VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); - if (uio->uio_rw == UIO_WRITE && error == 0) { - VM_OBJECT_WLOCK(obj); - vm_page_dirty(m); - vm_pager_page_unswapped(m); - VM_OBJECT_WUNLOCK(obj); - } - vm_page_unwire(m, PQ_ACTIVE); + if (uio->uio_rw == UIO_WRITE && error == 0) + vm_page_set_dirty(m); + vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_sunbusy(m); return (error); } @@ -527,9 +524,8 @@ pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(vm_page_all_valid(m), ("shm_dotruncate: page %p is invalid", m)); - vm_page_dirty(m); + vm_page_set_dirty(m); vm_page_xunbusy(m); - vm_pager_page_unswapped(m); } } delta = IDX_TO_OFF(object->size - nobjsize); Index: head/sys/vm/swap_pager.c =================================================================== --- head/sys/vm/swap_pager.c +++ head/sys/vm/swap_pager.c @@ -155,6 +155,9 @@ static u_long swap_reserved; static u_long swap_total; static int sysctl_page_shift(SYSCTL_HANDLER_ARGS); + +static SYSCTL_NODE(_vm_stats, OID_AUTO, swap, CTLFLAG_RD, 0, "VM swap stats"); + SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &swap_reserved, 0, sysctl_page_shift, "A", "Amount of swap storage needed to back all allocated anonymous memory."); @@ -173,6 +176,16 @@ SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, "Maximum amount of swap supported"); +static counter_u64_t swap_free_deferred; +SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_deferred, + CTLFLAG_RD, &swap_free_deferred, + "Number of pages that deferred freeing swap space"); + +static counter_u64_t swap_free_completed; +SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_completed, + CTLFLAG_RD, &swap_free_completed, + "Number of deferred frees completed"); + /* bits from overcommit */ #define SWAP_RESERVE_FORCE_ON (1 << 0) #define SWAP_RESERVE_RLIMIT_ON (1 << 1) @@ -513,6 +526,15 @@ sx_init(&swdev_syscall_lock, "swsysc"); } +static void +swap_pager_counters(void) +{ + + swap_free_deferred = counter_u64_alloc(M_WAITOK); + swap_free_completed = counter_u64_alloc(M_WAITOK); +} +SYSINIT(swap_counters, SI_SUB_CPU, SI_ORDER_ANY, swap_pager_counters, NULL); + /* * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process * @@ -1112,14 +1134,37 @@ * * This routine may not sleep. * - * The object containing the page must be locked. + * The object containing the page may be locked. */ static void swap_pager_unswapped(vm_page_t m) { struct swblk *sb; + vm_object_t obj; - VM_OBJECT_ASSERT_WLOCKED(m->object); + /* + * Handle enqueing deferred frees first. If we do not have the + * object lock we wait for the page daemon to clear the space. + */ + obj = m->object; + if (!VM_OBJECT_WOWNED(obj)) { + VM_PAGE_OBJECT_BUSY_ASSERT(m); + /* + * The caller is responsible for synchronization but we + * will harmlessly handle races. This is typically provided + * by only calling unswapped() when a page transitions from + * clean to dirty. + */ + if ((m->a.flags & (PGA_SWAP_SPACE | PGA_SWAP_FREE)) == + PGA_SWAP_SPACE) { + vm_page_aflag_set(m, PGA_SWAP_FREE); + counter_u64_add(swap_free_deferred, 1); + } + return; + } + if ((m->a.flags & PGA_SWAP_FREE) != 0) + counter_u64_add(swap_free_completed, 1); + vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE); /* * The meta data only exists if the object is OBJT_SWAP @@ -1436,6 +1481,7 @@ VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { mreq = ma[i + j]; + vm_page_aflag_clear(mreq, PGA_SWAP_FREE); addr = swp_pager_meta_build(mreq->object, mreq->pindex, blk + j); if (addr != SWAPBLK_NONE) @@ -1560,6 +1606,9 @@ wakeup(&object->handle); } + /* We always have space after I/O, successful or not. */ + vm_page_aflag_set(m, PGA_SWAP_SPACE); + if (bp->b_ioflags & BIO_ERROR) { /* * If an error occurs I'd love to throw the swapblk @@ -1581,6 +1630,7 @@ * then finish the I/O. */ MPASS(m->dirty == VM_PAGE_BITS_ALL); + /* PQ_UNSWAPPABLE? */ vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); Index: head/sys/vm/vm_fault.c =================================================================== --- head/sys/vm/vm_fault.c +++ head/sys/vm/vm_fault.c @@ -214,7 +214,7 @@ static void vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, - vm_prot_t fault_type, int fault_flags, bool excl) + vm_prot_t fault_type, int fault_flags) { bool need_dirty; @@ -223,7 +223,6 @@ (m->oflags & VPO_UNMANAGED) != 0) return; - VM_OBJECT_ASSERT_LOCKED(m->object); VM_PAGE_OBJECT_BUSY_ASSERT(m); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && @@ -232,49 +231,29 @@ vm_object_set_writeable_dirty(m->object); - if (!excl) - /* - * If two callers of vm_fault_dirty() with excl == - * FALSE, one for the map entry with MAP_ENTRY_NOSYNC - * flag set, other with flag clear, race, it is - * possible for the no-NOSYNC thread to see m->dirty - * != 0 and not clear PGA_NOSYNC. Take vm_page lock - * around manipulation of PGA_NOSYNC and - * vm_page_dirty() call to avoid the race. - */ - vm_page_lock(m); - /* - * If this is a NOSYNC mmap we do not want to set PGA_NOSYNC - * if the page is already dirty to prevent data written with - * the expectation of being synced from not being synced. - * Likewise if this entry does not request NOSYNC then make - * sure the page isn't marked NOSYNC. Applications sharing - * data should use the same flags to avoid ping ponging. - */ - if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) { - if (m->dirty == 0) { - vm_page_aflag_set(m, PGA_NOSYNC); - } - } else { - vm_page_aflag_clear(m, PGA_NOSYNC); - } - - /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also, since the page is now dirty, we can possibly tell - * the pager to release any swap backing the page. Calling - * the pager requires a write lock on the object. + * the pager to release any swap backing the page. */ - if (need_dirty) - vm_page_dirty(m); - if (!excl) - vm_page_unlock(m); - else if (need_dirty) - vm_pager_page_unswapped(m); + if (need_dirty && vm_page_set_dirty(m) == 0) { + /* + * If this is a NOSYNC mmap we do not want to set PGA_NOSYNC + * if the page is already dirty to prevent data written with + * the expectation of being synced from not being synced. + * Likewise if this entry does not request NOSYNC then make + * sure the page isn't marked NOSYNC. Applications sharing + * data should use the same flags to avoid ping ponging. + */ + if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) + vm_page_aflag_set(m, PGA_NOSYNC); + else + vm_page_aflag_clear(m, PGA_NOSYNC); + } + } /* @@ -344,7 +323,7 @@ *m_hold = m; vm_page_wire(m); } - vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); + vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); VM_OBJECT_RUNLOCK(fs->first_object); @@ -502,7 +481,7 @@ for (i = 0; i < npages; i++) { vm_fault_populate_check_page(&m[i]); vm_fault_dirty(fs->entry, &m[i], prot, fault_type, - fault_flags, true); + fault_flags); } VM_OBJECT_WUNLOCK(fs->first_object); rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | @@ -1381,7 +1360,7 @@ fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE; vm_page_assert_xbusied(fs.m); - vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, true); + vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags); /* * Page must be completely valid or it is not fit to Index: head/sys/vm/vm_page.h =================================================================== --- head/sys/vm/vm_page.h +++ head/sys/vm/vm_page.h @@ -429,6 +429,10 @@ * PGA_REQUEUE_HEAD is a special flag for enqueuing pages near the head of * the inactive queue, thus bypassing LRU. The page lock must be held to * set this flag, and the queue lock for the page must be held to clear it. + * + * PGA_SWAP_FREE is used to defer freeing swap space to the pageout daemon + * when the context that dirties the page does not have the object write lock + * held. */ #define PGA_WRITEABLE 0x0001 /* page may be mapped writeable */ #define PGA_REFERENCED 0x0002 /* page has been referenced */ @@ -438,6 +442,8 @@ #define PGA_REQUEUE 0x0020 /* page is due to be requeued */ #define PGA_REQUEUE_HEAD 0x0040 /* page requeue should bypass LRU */ #define PGA_NOSYNC 0x0080 /* do not collect for syncer */ +#define PGA_SWAP_FREE 0x0100 /* page with swap space was dirtied */ +#define PGA_SWAP_SPACE 0x0200 /* page has allocated swap space */ #define PGA_QUEUE_OP_MASK (PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD) #define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_QUEUE_OP_MASK) @@ -647,6 +653,7 @@ int vm_page_sbusied(vm_page_t m); vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); +vm_page_bits_t vm_page_set_dirty(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg); Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -1584,6 +1584,10 @@ KASSERT((m->ref_count & VPRC_OBJREF) != 0, ("page %p is missing its object ref", m)); + /* Deferred free of swap space. */ + if ((m->a.flags & PGA_SWAP_FREE) != 0) + vm_pager_page_unswapped(m); + mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -4633,6 +4637,62 @@ #endif /* PAGE_SIZE */ } +static inline vm_page_bits_t +vm_page_bits_swap(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t newbits) +{ +#if PAGE_SIZE == 32768 + uint64_t old; + + old = *bits; + while (atomic_fcmpset_64(bits, &old, newbits) == 0); + return (old); +#elif PAGE_SIZE == 16384 + uint32_t old; + + old = *bits; + while (atomic_fcmpset_32(bits, &old, newbits) == 0); + return (old); +#elif (PAGE_SIZE == 8192) && defined(atomic_fcmpset_16) + uint16_t old; + + old = *bits; + while (atomic_fcmpset_16(bits, &old, newbits) == 0); + return (old); +#elif (PAGE_SIZE == 4096) && defined(atomic_fcmpset_8) + uint8_t old; + + old = *bits; + while (atomic_fcmpset_8(bits, &old, newbits) == 0); + return (old); +#else /* PAGE_SIZE <= 4096*/ + uintptr_t addr; + uint32_t old, new, mask; + int shift; + + addr = (uintptr_t)bits; + /* + * Use a trick to perform a 32-bit atomic on the + * containing aligned word, to not depend on the existence + * of atomic_{set, swap, clear}_{8, 16}. + */ + shift = addr & (sizeof(uint32_t) - 1); +#if BYTE_ORDER == BIG_ENDIAN + shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; +#else + shift *= NBBY; +#endif + addr &= ~(sizeof(uint32_t) - 1); + mask = VM_PAGE_BITS_ALL << shift; + + old = *bits; + do { + new = old & ~mask; + new |= newbits << shift; + } while (atomic_fcmpset_32((uint32_t *)addr, &old, new) == 0); + return (old >> shift); +#endif /* PAGE_SIZE */ +} + /* * vm_page_set_valid_range: * @@ -4688,6 +4748,28 @@ m->valid |= pagebits; else vm_page_bits_set(m, &m->valid, pagebits); +} + +/* + * Set the page dirty bits and free the invalid swap space if + * present. Returns the previous dirty bits. + */ +vm_page_bits_t +vm_page_set_dirty(vm_page_t m) +{ + vm_page_bits_t old; + + VM_PAGE_OBJECT_BUSY_ASSERT(m); + + if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) { + old = m->dirty; + m->dirty = VM_PAGE_BITS_ALL; + } else + old = vm_page_bits_swap(m, &m->dirty, VM_PAGE_BITS_ALL); + if (old == 0 && (m->a.flags & PGA_SWAP_SPACE) != 0) + vm_pager_page_unswapped(m); + + return (old); } /* Index: head/sys/vm/vm_pageout.c =================================================================== --- head/sys/vm/vm_pageout.c +++ head/sys/vm/vm_pageout.c @@ -1307,6 +1307,14 @@ act_delta++; } + /* Deferred free of swap space. */ + if ((m->a.flags & PGA_SWAP_FREE) != 0 && + VM_OBJECT_TRYWLOCK(object)) { + if (m->object == object) + vm_pager_page_unswapped(m); + VM_OBJECT_WUNLOCK(object); + } + /* * Advance or decay the act_count based on recent usage. */ @@ -1541,6 +1549,10 @@ addl_page_shortage++; goto reinsert; } + + /* Deferred free of swap space. */ + if ((m->a.flags & PGA_SWAP_FREE) != 0) + vm_pager_page_unswapped(m); /* * Re-check for wirings now that we hold the object lock and Index: head/sys/vm/vm_pager.h =================================================================== --- head/sys/vm/vm_pager.h +++ head/sys/vm/vm_pager.h @@ -179,9 +179,6 @@ * * Destroy swap associated with the page. * - * The object containing the page must be locked. - * This function may not block. - * * XXX: A much better name would be "vm_pager_page_dirtied()" * XXX: It is not obvious if this could be profitably used by any * XXX: pagers besides the swap_pager or if it should even be a @@ -191,7 +188,6 @@ vm_pager_page_unswapped(vm_page_t m) { - VM_OBJECT_ASSERT_LOCKED(m->object); if (pagertab[m->object->type]->pgo_pageunswapped) (*pagertab[m->object->type]->pgo_pageunswapped)(m); }