diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -148,6 +148,7 @@ * (i) tn_interlock * (m) tmpfs_mount tm_allnode_lock * (c) stable after creation + * (v) tn_reg.tn_aobj vm_object lock */ struct tmpfs_node { /* @@ -299,6 +300,7 @@ */ vm_object_t tn_aobj; /* (c) */ struct tmpfs_mount *tn_tmp; /* (c) */ + vm_pindex_t tn_pages; /* (v) */ } tn_reg; } tn_spec; /* (v) */ }; @@ -528,6 +530,37 @@ * specific ones. */ +static inline struct vnode * +VM_TO_TMPFS_VP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + /* + * swp_priv is the back-pointer to the tmpfs node, if any, + * which uses the vm object as backing store. The object + * handle is not used to avoid locking sw_alloc_sx on tmpfs + * node instantiation/destroy. + */ + node = obj->un_pager.swp.swp_priv; + return (node->tn_vnode); +} + +static inline struct tmpfs_mount * +VM_TO_TMPFS_MP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + return (node->tn_reg.tn_tmp); +} + static inline struct tmpfs_mount * VFS_TO_TMPFS(struct mount *mp) { diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -113,7 +113,7 @@ VM_OBJECT_ASSERT_WLOCKED(object); - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); /* * Forced unmount? @@ -194,15 +194,14 @@ /* * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type - * type, but not OBJ_TMPFS flag. In this case there is no - * v_writecount to adjust. + * type. In this case there is no v_writecount to adjust. */ if (vp_heldp != NULL) VM_OBJECT_RLOCK(object); else VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_TMPFS) != 0) { - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp != NULL) { *vpp = vp; if (vp_heldp != NULL) { @@ -215,6 +214,80 @@ VM_OBJECT_RUNLOCK(object); } +static void +tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + vm_page_t m; + vm_pindex_t c; + + swap_pager_freespace(obj, start, size); + if ((obj->flags & OBJ_TMPFS) == 0) + return; + + for (c = 0, m = vm_page_find_least(obj, start); + m != NULL && m->pindex < start + size; m = TAILQ_NEXT(m, listq)) + c++; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + atomic_add_long(&tm->tm_pages_used, -c); + node->tn_reg.tn_pages -= c; +} + +static void +tmpfs_page_inserted(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + + if ((obj->flags & OBJ_TMPFS) == 0) + return; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { + atomic_add_long(&tm->tm_pages_used, 1); + node->tn_reg.tn_pages += 1; + } +} + +static void +tmpfs_page_removed(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + + if ((obj->flags & OBJ_TMPFS) == 0) + return; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { + atomic_add_long(&tm->tm_pages_used, -1); + node->tn_reg.tn_pages -= 1; + } +} + +static boolean_t +tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex) +{ + struct tmpfs_mount *tm; + + tm = VM_TO_TMPFS_MP(obj); + if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) || + tm->tm_pages_max == 0) + return (true); + return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used)); +} + struct pagerops tmpfs_pager_ops = { .pgo_kvme_type = KVME_TYPE_VNODE, .pgo_alloc = tmpfs_pager_alloc, @@ -223,6 +296,10 @@ .pgo_release_writecount = tmpfs_pager_release_writecount, .pgo_mightbedirty = vm_object_mightbedirty_, .pgo_getvp = tmpfs_pager_getvp, + .pgo_freespace = tmpfs_pager_freespace, + .pgo_page_inserted = tmpfs_page_inserted, + .pgo_page_removed = tmpfs_page_removed, + .pgo_can_alloc_page = tmpfs_can_alloc_page, }; static int @@ -572,9 +649,10 @@ case VREG: nnode->tn_reg.tn_aobj = vm_pager_allocate(tmpfs_pager_type, NULL, 0, - VM_PROT_DEFAULT, 0, - NULL /* XXXKIB - tmpfs needs swap reservation */); - /* OBJ_TMPFS is set together with the setting of vp->v_object */ + VM_PROT_DEFAULT, 0, + NULL /* XXXKIB - tmpfs needs swap reservation */); + nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode; + vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS); nnode->tn_reg.tn_tmp = tmp; break; @@ -665,16 +743,16 @@ switch (node->tn_type) { case VREG: uobj = node->tn_reg.tn_aobj; - if (uobj != NULL && uobj->size != 0) - atomic_subtract_long(&tmp->tm_pages_used, uobj->size); - - tmpfs_free_tmp(tmp); - + node->tn_reg.tn_aobj = NULL; if (uobj != NULL) { - KASSERT((uobj->flags & OBJ_TMPFS) == 0, - ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); + VM_OBJECT_WLOCK(uobj); + vm_object_clear_flag(uobj, OBJ_TMPFS); + atomic_add_long(&tmp->tm_pages_used, + -node->tn_reg.tn_pages); + VM_OBJECT_WUNLOCK(uobj); vm_object_deallocate(uobj); } + tmpfs_free_tmp(tmp); break; case VLNK: tmpfs_free_tmp(tmp); @@ -816,8 +894,6 @@ want_vrele = true; } - vm_object_clear_flag(obj, OBJ_TMPFS); - obj->un_pager.swp.swp_tmpfs = NULL; if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); @@ -955,8 +1031,6 @@ VI_LOCK(vp); KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); vp->v_object = object; - object->un_pager.swp.swp_tmpfs = vp; - vm_object_set_flag(object, OBJ_TMPFS); vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); @@ -1706,7 +1780,6 @@ int tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { - struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; vm_pindex_t idx, newpages, oldpages; @@ -1718,7 +1791,6 @@ node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_reg.tn_aobj; - tmp = VFS_TO_TMPFS(vp->v_mount); /* * Convert the old and new sizes to the number of pages needed to @@ -1736,10 +1808,6 @@ return (0); } - if (newpages > oldpages && - !tmpfs_pages_check_avail(tmp, newpages - oldpages)) - return (ENOSPC); - VM_OBJECT_WLOCK(uobj); if (newsize < oldsize) { /* @@ -1765,8 +1833,6 @@ uobj->size = newpages; VM_OBJECT_WUNLOCK(uobj); - atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); - node->tn_size = newsize; return (0); } diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -43,6 +43,7 @@ * allocate and release resources. */ +#include "opt_ddb.h" #include "opt_tmpfs.h" #include @@ -245,7 +246,7 @@ VM_OBJECT_RUNLOCK(object); continue; } - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp->v_mount != mp) { VM_OBJECT_RUNLOCK(object); continue; @@ -556,7 +557,11 @@ TMPFS_UNLOCK(tmp); mtx_destroy(&tmp->tm_allnode_lock); - MPASS(tmp->tm_pages_used == 0); + /* + * We cannot assert that tmp->tm_pages_used == 0 there, + * because tmpfs vm_objects might be still mapped by some + * process and outlive the mount due to reference counting. + */ MPASS(tmp->tm_nodes_inuse == 0); free(tmp, M_TMPFSMNT); @@ -696,3 +701,44 @@ .vfs_uninit = tmpfs_uninit, }; VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); + +#ifdef DDB +#include + +static void +db_print_tmpfs(struct mount *mp, struct tmpfs_mount *tmp) +{ + db_printf("mp %p (%s) tmp %p\n", mp, + mp->mnt_stat.f_mntonname, tmp); + db_printf( + "\tsize max %ju pages max %lu pages used %lu\n" + "\tinodes max %ju inodes inuse %ju refcount %ju\n" + "\tmaxfilesize %ju r%c %snamecache %smtime\n", + (uintmax_t)tmp->tm_size_max, tmp->tm_pages_max, tmp->tm_pages_used, + (uintmax_t)tmp->tm_nodes_max, (uintmax_t)tmp->tm_nodes_inuse, + (uintmax_t)tmp->tm_refcount, (uintmax_t)tmp->tm_maxfilesize, + tmp->tm_ronly ? 'o' : 'w', tmp->tm_nonc ? "no" : "", + tmp->tm_nomtime ? "no" : ""); +} + +DB_SHOW_COMMAND(tmpfs, db_show_tmpfs) +{ + struct mount *mp; + struct tmpfs_mount *tmp; + + if (have_addr) { + mp = (struct mount *)addr; + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + return; + } + + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_stat.f_fstypename, tmpfs_vfsconf.vfc_name) == + 0) { + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + } + } +} +#endif /* DDB */ diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -471,9 +471,12 @@ sb->st_gen = node->tn_gen; if (vp->v_type == VREG) { obj = node->tn_reg.tn_aobj; - sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE; - } else + VM_OBJECT_RLOCK(obj); + sb->st_blocks = ptoa(node->tn_reg.tn_pages); + VM_OBJECT_RUNLOCK(obj); + } else { sb->st_blocks = node->tn_size; + } sb->st_blocks /= S_BLKSIZE; return (vop_stat_helper_post(v, error)); } @@ -506,12 +509,15 @@ vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? - node->tn_rdev : NODEV; + node->tn_rdev : NODEV; if (vp->v_type == VREG) { obj = node->tn_reg.tn_aobj; - vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE; - } else + VM_OBJECT_RLOCK(obj); + vap->va_bytes = ptoa(node->tn_reg.tn_pages); + VM_OBJECT_RUNLOCK(obj); + } else { vap->va_bytes = node->tn_size; + } vap->va_filerev = 0; return (0); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -225,9 +225,11 @@ VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (rv != VM_PAGER_OK) { VM_OBJECT_WUNLOCK(obj); - printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", - obj, idx, rv); - return (EIO); + if (bootverbose) { + printf("uiomove_object: vm_obj %p idx %jd " + "pager error %d\n", obj, idx, rv); + } + return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO); } VM_OBJECT_WUNLOCK(obj); diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -78,6 +78,8 @@ int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len); void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); +void swap_pager_freespace(vm_object_t object, vm_pindex_t start, + vm_size_t size); void swap_pager_swap_init(void); int swap_pager_nswapdev(void); int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_pindex_t); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -436,8 +436,6 @@ vm_offset_t start, vm_offset_t end); static void swap_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end); -static void swap_pager_freespace(vm_object_t object, vm_pindex_t start, - vm_size_t size); const struct pagerops swappagerops = { .pgo_kvme_type = KVME_TYPE_SWAP, @@ -980,9 +978,10 @@ * * The object must be locked. */ -static void +void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) { + MPASS((object->flags & OBJ_SWAP) != 0); swp_pager_meta_free(object, start, size); } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1212,6 +1212,10 @@ #if VM_NRESERVLEVEL > 0 vm_object_color(fs->object, atop(fs->vaddr) - fs->pindex); #endif + if (!vm_pager_can_alloc_page(fs->object, fs->pindex)) { + unlock_and_deallocate(fs); + return (FAULT_FAILURE); + } fs->m = vm_page_alloc(fs->object, fs->pindex, P_KILLED(curproc) ? VM_ALLOC_SYSTEM : 0); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -156,20 +156,13 @@ /* * Swap pager * - * swp_tmpfs - back-pointer to the tmpfs vnode, - * if any, which uses the vm object - * as backing store. The handle - * cannot be reused for linking, - * because the vnode can be - * reclaimed and recreated, making - * the handle changed and hash-chain - * invalid. - * - * swp_blks - pc-trie of the allocated swap blocks. + * swp_priv - pager-private. + * swp_blks - pc-trie of the allocated swap blocks. + * writemappings - count of bytes mapped for write * */ struct { - void *swp_tmpfs; + void *swp_priv; struct pctrie swp_blks; vm_ooffset_t writemappings; } swp; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1483,6 +1483,7 @@ return (1); } vm_page_insert_radixdone(m, object, mpred); + vm_pager_page_inserted(object, m); return (0); } @@ -1557,6 +1558,8 @@ if ((m->a.flags & PGA_SWAP_FREE) != 0) vm_pager_page_unswapped(m); + vm_pager_page_removed(object, m); + m->object = NULL; mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -2023,6 +2026,8 @@ flags = 0; m = NULL; + if (!vm_pager_can_alloc_page(object, pindex)) + return (NULL); again: #if VM_NRESERVLEVEL > 0 /* @@ -4693,6 +4698,8 @@ *mp = NULL; return (VM_PAGER_FAIL); } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { + if (!vm_pager_can_alloc_page(object, pindex)) + return (VM_PAGER_AGAIN); goto retrylookup; } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -69,6 +69,9 @@ bool *vp_heldp); typedef void pgo_freespace_t(vm_object_t object, vm_pindex_t start, vm_size_t size); +typedef void pgo_page_inserted_t(vm_object_t object, vm_page_t m); +typedef void pgo_page_removed_t(vm_object_t object, vm_page_t m); +typedef boolean_t pgo_can_alloc_page_t(vm_object_t object, vm_pindex_t pindex); struct pagerops { int pgo_kvme_type; @@ -87,6 +90,9 @@ pgo_mightbedirty_t *pgo_mightbedirty; pgo_getvp_t *pgo_getvp; pgo_freespace_t *pgo_freespace; + pgo_page_inserted_t *pgo_page_inserted; + pgo_page_removed_t *pgo_page_removed; + pgo_can_alloc_page_t *pgo_can_alloc_page; }; extern const struct pagerops defaultpagerops; @@ -249,6 +255,35 @@ method(object, start, size); } +static __inline void +vm_pager_page_inserted(vm_object_t object, vm_page_t m) +{ + pgo_page_inserted_t *method; + + method = pagertab[object->type]->pgo_page_inserted; + if (method != NULL) + method(object, m); +} + +static __inline void +vm_pager_page_removed(vm_object_t object, vm_page_t m) +{ + pgo_page_removed_t *method; + + method = pagertab[object->type]->pgo_page_removed; + if (method != NULL) + method(object, m); +} + +static __inline bool +vm_pager_can_alloc_page(vm_object_t object, vm_pindex_t pindex) +{ + pgo_can_alloc_page_t *method; + + method = pagertab[object->type]->pgo_can_alloc_page; + return (method != NULL ? method(object, pindex) : true); +} + int vm_pager_alloc_dyn_type(struct pagerops *ops, int base_type); void vm_pager_free_dyn_type(objtype_t type);