diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -528,6 +528,37 @@ * specific ones. */ +static inline struct vnode * +VM_TO_TMPFS_VP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + /* + * swp_priv is the back-pointer to the tmpfs node, if any, + * which uses the vm object as backing store. The object + * handle is not used to avoid locking sw_alloc_sx on tmpfs + * node instantiation/destroy. + */ + node = obj->un_pager.swp.swp_priv; + return (node->tn_vnode); +} + +static inline struct tmpfs_mount * +VM_TO_TMPFS_MP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + return (node->tn_reg.tn_tmp); +} + static inline struct tmpfs_mount * VFS_TO_TMPFS(struct mount *mp) { diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -113,7 +113,7 @@ VM_OBJECT_ASSERT_WLOCKED(object); - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); /* * Forced unmount? @@ -194,15 +194,14 @@ /* * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type - * type, but not OBJ_TMPFS flag. In this case there is no - * v_writecount to adjust. + * type. In this case there is no v_writecount to adjust. */ if (vp_heldp != NULL) VM_OBJECT_RLOCK(object); else VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_TMPFS) != 0) { - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp != NULL) { *vpp = vp; if (vp_heldp != NULL) { @@ -215,6 +214,38 @@ VM_OBJECT_RUNLOCK(object); } +static void +tmpfs_page_inserted(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_mount *tm; + + tm = VM_TO_TMPFS_MP(obj); + if (tm != NULL && !vm_pager_has_page(obj, m->pindex, NULL, NULL)) + atomic_add_long(&tm->tm_pages_used, 1); +} + +static void +tmpfs_page_removed(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_mount *tm; + + tm = VM_TO_TMPFS_MP(obj); + if (tm != NULL && !vm_pager_has_page(obj, m->pindex, NULL, NULL)) + atomic_add_long(&tm->tm_pages_used, -1); +} + +static boolean_t +tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex) +{ + struct tmpfs_mount *tm; + + tm = VM_TO_TMPFS_MP(obj); + if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) || + tm->tm_pages_max == 0) + return (true); + return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used)); +} + struct pagerops tmpfs_pager_ops = { .pgo_kvme_type = KVME_TYPE_VNODE, .pgo_alloc = tmpfs_pager_alloc, @@ -223,6 +254,9 @@ .pgo_release_writecount = tmpfs_pager_release_writecount, .pgo_mightbedirty = vm_object_mightbedirty_, .pgo_getvp = tmpfs_pager_getvp, + .pgo_page_inserted = tmpfs_page_inserted, + .pgo_page_removed = tmpfs_page_removed, + .pgo_can_alloc_page = tmpfs_can_alloc_page, }; static int @@ -572,9 +606,10 @@ case VREG: nnode->tn_reg.tn_aobj = vm_pager_allocate(tmpfs_pager_type, NULL, 0, - VM_PROT_DEFAULT, 0, - NULL /* XXXKIB - tmpfs needs swap reservation */); - /* OBJ_TMPFS is set together with the setting of vp->v_object */ + VM_PROT_DEFAULT, 0, + NULL /* XXXKIB - tmpfs needs swap reservation */); + nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode; + vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS); nnode->tn_reg.tn_tmp = tmp; break; @@ -665,16 +700,16 @@ switch (node->tn_type) { case VREG: uobj = node->tn_reg.tn_aobj; - if (uobj != NULL && uobj->size != 0) - atomic_subtract_long(&tmp->tm_pages_used, uobj->size); - - tmpfs_free_tmp(tmp); - + node->tn_reg.tn_aobj = NULL; if (uobj != NULL) { - KASSERT((uobj->flags & OBJ_TMPFS) == 0, - ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); + VM_OBJECT_WLOCK(uobj); + vm_object_clear_flag(uobj, OBJ_TMPFS); + atomic_add_long(&tmp->tm_pages_used, + -swap_pager_total_pages(uobj)); + VM_OBJECT_WUNLOCK(uobj); vm_object_deallocate(uobj); } + tmpfs_free_tmp(tmp); break; case VLNK: tmpfs_free_tmp(tmp); @@ -816,8 +851,6 @@ want_vrele = true; } - vm_object_clear_flag(obj, OBJ_TMPFS); - obj->un_pager.swp.swp_tmpfs = NULL; if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); @@ -955,8 +988,6 @@ VI_LOCK(vp); KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); vp->v_object = object; - object->un_pager.swp.swp_tmpfs = vp; - vm_object_set_flag(object, OBJ_TMPFS); vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); @@ -1706,7 +1737,6 @@ int tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { - struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; vm_pindex_t idx, newpages, oldpages; @@ -1718,7 +1748,6 @@ node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_reg.tn_aobj; - tmp = VFS_TO_TMPFS(vp->v_mount); /* * Convert the old and new sizes to the number of pages needed to @@ -1736,10 +1765,6 @@ return (0); } - if (newpages > oldpages && - !tmpfs_pages_check_avail(tmp, newpages - oldpages)) - return (ENOSPC); - VM_OBJECT_WLOCK(uobj); if (newsize < oldsize) { /* @@ -1765,8 +1790,6 @@ uobj->size = newpages; VM_OBJECT_WUNLOCK(uobj); - atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); - node->tn_size = newsize; return (0); } diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -44,6 +44,7 @@ */ #include "opt_tmpfs.h" +#include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); @@ -245,7 +246,7 @@ VM_OBJECT_RUNLOCK(object); continue; } - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp->v_mount != mp) { VM_OBJECT_RUNLOCK(object); continue; @@ -556,7 +557,11 @@ TMPFS_UNLOCK(tmp); mtx_destroy(&tmp->tm_allnode_lock); - MPASS(tmp->tm_pages_used == 0); + /* + * We cannot assert that tmp->tm_pages_used == 0 there, + * because tmpfs vm_objects might be still mapped by some + * process and outlive the mount due to reference counting. + */ MPASS(tmp->tm_nodes_inuse == 0); free(tmp, M_TMPFSMNT); @@ -696,3 +701,44 @@ .vfs_uninit = tmpfs_uninit, }; VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); + +#ifdef DDB +#include + +static void +db_print_tmpfs(struct mount *mp, struct tmpfs_mount *tmp) +{ + db_printf("mp %p (%s) tmp %p\n", mp, + mp->mnt_stat.f_mntonname, tmp); + db_printf( + "\tsize max %ju pages max %lu pages used %lu\n" + "\tinodes max %ju inodes inuse %ju refcount %ju\n" + "\tmaxfilesize %ju r%c %snamecache %smtime\n", + (uintmax_t)tmp->tm_size_max, tmp->tm_pages_max, tmp->tm_pages_used, + (uintmax_t)tmp->tm_nodes_max, (uintmax_t)tmp->tm_nodes_inuse, + (uintmax_t)tmp->tm_refcount, (uintmax_t)tmp->tm_maxfilesize, + tmp->tm_ronly ? 'o' : 'w', tmp->tm_nonc ? "no" : "", + tmp->tm_nomtime ? "no" : ""); +} + +DB_SHOW_COMMAND(tmpfs, db_show_tmpfs) +{ + struct mount *mp; + struct tmpfs_mount *tmp; + + if (have_addr) { + mp = (struct mount *)addr; + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + return; + } + + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_stat.f_fstypename, tmpfs_vfsconf.vfc_name) == + 0) { + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + } + } +} +#endif /* DDB */ diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -225,9 +225,11 @@ VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (rv != VM_PAGER_OK) { VM_OBJECT_WUNLOCK(obj); - printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", - obj, idx, rv); - return (EIO); + if (bootverbose) { + printf("uiomove_object: vm_obj %p idx %jd " + "pager error %d\n", obj, idx, rv); + } + return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO); } VM_OBJECT_WUNLOCK(obj); diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -83,6 +83,7 @@ int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_pindex_t); void swap_pager_status(int *total, int *used); u_long swap_pager_swapped_pages(vm_object_t object); +u_long swap_pager_total_pages(vm_object_t object); void swapoff_all(void); bool swap_pager_init_object(vm_object_t object, void *handle, struct ucred *cred, vm_ooffset_t size, vm_ooffset_t offset); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1795,6 +1795,39 @@ return (res); } +/* + * Calculates the total number of used pages, either resident or + * swapped. If the page is simultaneously resident and swapped, it is + * counted only once. + */ +u_long +swap_pager_total_pages(vm_object_t object) +{ + struct swblk *sb; + vm_pindex_t pindex; + u_long res; + int i; + + VM_OBJECT_ASSERT_LOCKED(object); + MPASS((object->flags & OBJ_SWAP) != 0); + + res = object->resident_page_count; + if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) + return (res); + + for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( + &object->un_pager.swp.swp_blks, pindex)) != NULL;) { + pindex = sb->p + SWAP_META_PAGES; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] == SWAPBLK_NONE) + continue; + if (vm_page_lookup(object, pindex) == NULL) + res++; + } + } + return (res); +} + /* * swap_pager_swapoff_object: * diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1212,6 +1212,10 @@ #if VM_NRESERVLEVEL > 0 vm_object_color(fs->object, atop(fs->vaddr) - fs->pindex); #endif + if (!vm_pager_can_alloc_page(fs->object, fs->pindex)) { + unlock_and_deallocate(fs); + return (FAULT_FAILURE); + } fs->m = vm_page_alloc(fs->object, fs->pindex, P_KILLED(curproc) ? VM_ALLOC_SYSTEM : 0); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -156,20 +156,13 @@ /* * Swap pager * - * swp_tmpfs - back-pointer to the tmpfs vnode, - * if any, which uses the vm object - * as backing store. The handle - * cannot be reused for linking, - * because the vnode can be - * reclaimed and recreated, making - * the handle changed and hash-chain - * invalid. - * - * swp_blks - pc-trie of the allocated swap blocks. + * swp_priv - pager-private. + * swp_blks - pc-trie of the allocated swap blocks. + * writemappings - count of bytes mapped for write * */ struct { - void *swp_tmpfs; + void *swp_priv; struct pctrie swp_blks; vm_ooffset_t writemappings; } swp; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1483,6 +1483,7 @@ return (1); } vm_page_insert_radixdone(m, object, mpred); + vm_pager_page_inserted(object, m); return (0); } @@ -1557,6 +1558,8 @@ if ((m->a.flags & PGA_SWAP_FREE) != 0) vm_pager_page_unswapped(m); + vm_pager_page_removed(object, m); + m->object = NULL; mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -2023,6 +2026,8 @@ flags = 0; m = NULL; + if (!vm_pager_can_alloc_page(object, pindex)) + return (NULL); again: #if VM_NRESERVLEVEL > 0 /* @@ -4693,6 +4698,8 @@ *mp = NULL; return (VM_PAGER_FAIL); } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { + if (!vm_pager_can_alloc_page(object, pindex)) + return (VM_PAGER_AGAIN); goto retrylookup; } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -69,6 +69,9 @@ bool *vp_heldp); typedef void pgo_freespace_t(vm_object_t object, vm_pindex_t start, vm_size_t size); +typedef void pgo_page_inserted_t(vm_object_t object, vm_page_t m); +typedef void pgo_page_removed_t(vm_object_t object, vm_page_t m); +typedef boolean_t pgo_can_alloc_page_t(vm_object_t object, vm_pindex_t pindex); struct pagerops { int pgo_kvme_type; @@ -87,6 +90,9 @@ pgo_mightbedirty_t *pgo_mightbedirty; pgo_getvp_t *pgo_getvp; pgo_freespace_t *pgo_freespace; + pgo_page_inserted_t *pgo_page_inserted; + pgo_page_removed_t *pgo_page_removed; + pgo_can_alloc_page_t *pgo_can_alloc_page; }; extern const struct pagerops defaultpagerops; @@ -249,6 +255,35 @@ method(object, start, size); } +static __inline void +vm_pager_page_inserted(vm_object_t object, vm_page_t m) +{ + pgo_page_inserted_t *method; + + method = pagertab[object->type]->pgo_page_inserted; + if (method != NULL) + method(object, m); +} + +static __inline void +vm_pager_page_removed(vm_object_t object, vm_page_t m) +{ + pgo_page_removed_t *method; + + method = pagertab[object->type]->pgo_page_removed; + if (method != NULL) + method(object, m); +} + +static __inline bool +vm_pager_can_alloc_page(vm_object_t object, vm_pindex_t pindex) +{ + pgo_can_alloc_page_t *method; + + method = pagertab[object->type]->pgo_can_alloc_page; + return (method != NULL ? method(object, pindex) : true); +} + int vm_pager_alloc_dyn_type(struct pagerops *ops, int base_type); void vm_pager_free_dyn_type(objtype_t type);