diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -148,6 +148,7 @@ * (i) tn_interlock * (m) tmpfs_mount tm_allnode_lock * (c) stable after creation + * (v) tn_reg.tn_aobj vm_object lock */ struct tmpfs_node { /* @@ -299,6 +300,7 @@ */ vm_object_t tn_aobj; /* (c) */ struct tmpfs_mount *tn_tmp; /* (c) */ + vm_pindex_t tn_pages; /* (v) */ } tn_reg; } tn_spec; /* (v) */ }; @@ -528,6 +530,37 @@ * specific ones. */ +static inline struct vnode * +VM_TO_TMPFS_VP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + /* + * swp_priv is the back-pointer to the tmpfs node, if any, + * which uses the vm object as backing store. The object + * handle is not used to avoid locking sw_alloc_sx on tmpfs + * node instantiation/destroy. + */ + node = obj->un_pager.swp.swp_priv; + return (node->tn_vnode); +} + +static inline struct tmpfs_mount * +VM_TO_TMPFS_MP(vm_object_t obj) +{ + struct tmpfs_node *node; + + if ((obj->flags & OBJ_TMPFS) == 0) + return (NULL); + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + return (node->tn_reg.tn_tmp); +} + static inline struct tmpfs_mount * VFS_TO_TMPFS(struct mount *mp) { diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -113,7 +113,7 @@ VM_OBJECT_ASSERT_WLOCKED(object); - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); /* * Forced unmount? @@ -194,15 +194,14 @@ /* * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type - * type, but not OBJ_TMPFS flag. In this case there is no - * v_writecount to adjust. + * type. In this case there is no v_writecount to adjust. */ if (vp_heldp != NULL) VM_OBJECT_RLOCK(object); else VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_TMPFS) != 0) { - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp != NULL) { *vpp = vp; if (vp_heldp != NULL) { @@ -215,6 +214,87 @@ VM_OBJECT_RUNLOCK(object); } +static void +tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + vm_size_t c; + + swap_pager_freespace(obj, start, size, &c); + if ((obj->flags & OBJ_TMPFS) == 0 || c == 0) + return; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + KASSERT(tm->tm_pages_used >= c, + ("tmpfs tm %p pages %jd free %jd", tm, + (uintmax_t)tm->tm_pages_used, (uintmax_t)c)); + atomic_add_long(&tm->tm_pages_used, -c); + KASSERT(node->tn_reg.tn_pages >= c, + ("tmpfs node %p pages %jd free %jd", node, + (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c)); + node->tn_reg.tn_pages -= c; +} + +static void +tmpfs_page_inserted(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + + if ((obj->flags & OBJ_TMPFS) == 0) + return; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { + atomic_add_long(&tm->tm_pages_used, 1); + node->tn_reg.tn_pages += 1; + } +} + +static void +tmpfs_page_removed(vm_object_t obj, vm_page_t m) +{ + struct tmpfs_node *node; + struct tmpfs_mount *tm; + + if ((obj->flags & OBJ_TMPFS) == 0) + return; + + node = obj->un_pager.swp.swp_priv; + MPASS(node->tn_type == VREG); + tm = node->tn_reg.tn_tmp; + + if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { + KASSERT(tm->tm_pages_used >= 1, + ("tmpfs tm %p pages %jd free 1", tm, + (uintmax_t)tm->tm_pages_used)); + atomic_add_long(&tm->tm_pages_used, -1); + KASSERT(node->tn_reg.tn_pages >= 1, + ("tmpfs node %p pages %jd free 1", node, + (uintmax_t)node->tn_reg.tn_pages)); + node->tn_reg.tn_pages -= 1; + } +} + +static boolean_t +tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex) +{ + struct tmpfs_mount *tm; + + tm = VM_TO_TMPFS_MP(obj); + if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) || + tm->tm_pages_max == 0) + return (true); + return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used)); +} + struct pagerops tmpfs_pager_ops = { .pgo_kvme_type = KVME_TYPE_VNODE, .pgo_alloc = tmpfs_pager_alloc, @@ -223,6 +303,10 @@ .pgo_release_writecount = tmpfs_pager_release_writecount, .pgo_mightbedirty = vm_object_mightbedirty_, .pgo_getvp = tmpfs_pager_getvp, + .pgo_freespace = tmpfs_pager_freespace, + .pgo_page_inserted = tmpfs_page_inserted, + .pgo_page_removed = tmpfs_page_removed, + .pgo_can_alloc_page = tmpfs_can_alloc_page, }; static int @@ -572,10 +656,12 @@ case VREG: nnode->tn_reg.tn_aobj = vm_pager_allocate(tmpfs_pager_type, NULL, 0, - VM_PROT_DEFAULT, 0, - NULL /* XXXKIB - tmpfs needs swap reservation */); - /* OBJ_TMPFS is set together with the setting of vp->v_object */ + VM_PROT_DEFAULT, 0, + NULL /* XXXKIB - tmpfs needs swap reservation */); + nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode; + vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS); nnode->tn_reg.tn_tmp = tmp; + nnode->tn_reg.tn_pages = 0; break; default: @@ -665,16 +751,31 @@ switch (node->tn_type) { case VREG: uobj = node->tn_reg.tn_aobj; - if (uobj != NULL && uobj->size != 0) - atomic_subtract_long(&tmp->tm_pages_used, uobj->size); - + node->tn_reg.tn_aobj = NULL; + if (uobj != NULL) { + VM_OBJECT_WLOCK(uobj); + KASSERT((uobj->flags & OBJ_TMPFS) != 0, + ("tmpfs node %p uobj %p not tmpfs", node, uobj)); + vm_object_clear_flag(uobj, OBJ_TMPFS); + KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages, + ("tmpfs tmp %p node %p pages %jd free %jd", tmp, + node, (uintmax_t)tmp->tm_pages_used, + (uintmax_t)node->tn_reg.tn_pages)); + atomic_add_long(&tmp->tm_pages_used, + -node->tn_reg.tn_pages); + VM_OBJECT_WUNLOCK(uobj); + } tmpfs_free_tmp(tmp); - if (uobj != NULL) { - KASSERT((uobj->flags & OBJ_TMPFS) == 0, - ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); + /* + * vm_object_deallocate() must not be called while + * owning tm_allnode_lock, because deallocate might + * sleep. Call it after tmpfs_free_tmp() does the + * unlock. + */ + if (uobj != NULL) vm_object_deallocate(uobj); - } + break; case VLNK: tmpfs_free_tmp(tmp); @@ -816,8 +917,6 @@ want_vrele = true; } - vm_object_clear_flag(obj, OBJ_TMPFS); - obj->un_pager.swp.swp_tmpfs = NULL; if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); @@ -955,8 +1054,6 @@ VI_LOCK(vp); KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); vp->v_object = object; - object->un_pager.swp.swp_tmpfs = vp; - vm_object_set_flag(object, OBJ_TMPFS); vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); @@ -1706,7 +1803,6 @@ int tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { - struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; vm_pindex_t idx, newpages, oldpages; @@ -1718,7 +1814,6 @@ node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_reg.tn_aobj; - tmp = VFS_TO_TMPFS(vp->v_mount); /* * Convert the old and new sizes to the number of pages needed to @@ -1736,10 +1831,6 @@ return (0); } - if (newpages > oldpages && - !tmpfs_pages_check_avail(tmp, newpages - oldpages)) - return (ENOSPC); - VM_OBJECT_WLOCK(uobj); if (newsize < oldsize) { /* @@ -1765,8 +1856,6 @@ uobj->size = newpages; VM_OBJECT_WUNLOCK(uobj); - atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); - node->tn_size = newsize; return (0); } diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -43,6 +43,7 @@ * allocate and release resources. */ +#include "opt_ddb.h" #include "opt_tmpfs.h" #include @@ -245,7 +246,7 @@ VM_OBJECT_RUNLOCK(object); continue; } - vp = object->un_pager.swp.swp_tmpfs; + vp = VM_TO_TMPFS_VP(object); if (vp->v_mount != mp) { VM_OBJECT_RUNLOCK(object); continue; @@ -556,7 +557,11 @@ TMPFS_UNLOCK(tmp); mtx_destroy(&tmp->tm_allnode_lock); - MPASS(tmp->tm_pages_used == 0); + /* + * We cannot assert that tmp->tm_pages_used == 0 there, + * because tmpfs vm_objects might be still mapped by some + * process and outlive the mount due to reference counting. + */ MPASS(tmp->tm_nodes_inuse == 0); free(tmp, M_TMPFSMNT); @@ -696,3 +701,44 @@ .vfs_uninit = tmpfs_uninit, }; VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); + +#ifdef DDB +#include + +static void +db_print_tmpfs(struct mount *mp, struct tmpfs_mount *tmp) +{ + db_printf("mp %p (%s) tmp %p\n", mp, + mp->mnt_stat.f_mntonname, tmp); + db_printf( + "\tsize max %ju pages max %lu pages used %lu\n" + "\tinodes max %ju inodes inuse %ju refcount %ju\n" + "\tmaxfilesize %ju r%c %snamecache %smtime\n", + (uintmax_t)tmp->tm_size_max, tmp->tm_pages_max, tmp->tm_pages_used, + (uintmax_t)tmp->tm_nodes_max, (uintmax_t)tmp->tm_nodes_inuse, + (uintmax_t)tmp->tm_refcount, (uintmax_t)tmp->tm_maxfilesize, + tmp->tm_ronly ? 'o' : 'w', tmp->tm_nonc ? "no" : "", + tmp->tm_nomtime ? "no" : ""); +} + +DB_SHOW_COMMAND(tmpfs, db_show_tmpfs) +{ + struct mount *mp; + struct tmpfs_mount *tmp; + + if (have_addr) { + mp = (struct mount *)addr; + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + return; + } + + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_stat.f_fstypename, tmpfs_vfsconf.vfc_name) == + 0) { + tmp = VFS_TO_TMPFS(mp); + db_print_tmpfs(mp, tmp); + } + } +} +#endif /* DDB */ diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -437,7 +437,6 @@ { struct vnode *vp = v->a_vp; struct stat *sb = v->a_sb; - vm_object_t obj; struct tmpfs_node *node; int error; @@ -470,10 +469,19 @@ sb->st_flags = node->tn_flags; sb->st_gen = node->tn_gen; if (vp->v_type == VREG) { - obj = node->tn_reg.tn_aobj; - sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE; - } else +#ifdef __ILP32__ + vm_object_t obj = node->tn_reg.tn_aobj; + + /* Handle torn read */ + VM_OBJECT_RLOCK(obj); +#endif + sb->st_blocks = ptoa(node->tn_reg.tn_pages); +#ifdef __ILP32__ + VM_OBJECT_RUNLOCK(obj); +#endif + } else { sb->st_blocks = node->tn_size; + } sb->st_blocks /= S_BLKSIZE; return (vop_stat_helper_post(v, error)); } @@ -506,12 +514,15 @@ vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? - node->tn_rdev : NODEV; + node->tn_rdev : NODEV; if (vp->v_type == VREG) { obj = node->tn_reg.tn_aobj; - vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE; - } else + VM_OBJECT_RLOCK(obj); + vap->va_bytes = ptoa(node->tn_reg.tn_pages); + VM_OBJECT_RUNLOCK(obj); + } else { vap->va_bytes = node->tn_size; + } vap->va_filerev = 0; return (0); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -225,9 +225,11 @@ VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (rv != VM_PAGER_OK) { VM_OBJECT_WUNLOCK(obj); - printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", - obj, idx, rv); - return (EIO); + if (bootverbose) { + printf("uiomove_object: vm_obj %p idx %jd " + "pager error %d\n", obj, idx, rv); + } + return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO); } VM_OBJECT_WUNLOCK(obj); @@ -345,6 +347,58 @@ return (shmfd->shm_object->type == OBJT_PHYS); } +static void +shm_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) +{ + struct shmfd *shm; + vm_size_t c; + + swap_pager_freespace(obj, start, size, &c); + if (c == 0) + return; + + shm = obj->un_pager.swp.swp_priv; + if (shm == NULL) + return; + KASSERT(shm->shm_pages >= c, + ("shm %p pages %jd free %jd", shm, + (uintmax_t)shm->shm_pages, (uintmax_t)c)); + shm->shm_pages -= c; +} + +static void +shm_page_inserted(vm_object_t obj, vm_page_t m) +{ + struct shmfd *shm; + + shm = obj->un_pager.swp.swp_priv; + if (shm == NULL) + return; + shm->shm_pages += 1; +} + +static void +shm_page_removed(vm_object_t obj, vm_page_t m) +{ + struct shmfd *shm; + + shm = obj->un_pager.swp.swp_priv; + if (shm == NULL) + return; + KASSERT(shm->shm_pages >= 1, + ("shm %p pages %jd free 1", shm, + (uintmax_t)shm->shm_pages)); + shm->shm_pages += 1; +} + +static struct pagerops shm_swap_pager_ops = { + .pgo_kvme_type = KVME_TYPE_SWAP, + .pgo_freespace = shm_pager_freespace, + .pgo_page_inserted = shm_page_inserted, + .pgo_page_removed = shm_page_removed, +}; +static int shmfd_pager_type = -1; + static int shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) { @@ -561,7 +615,6 @@ bzero(sb, sizeof(*sb)); sb->st_blksize = PAGE_SIZE; sb->st_size = shmfd->shm_size; - sb->st_blocks = howmany(sb->st_size, sb->st_blksize); mtx_lock(&shm_timestamp_lock); sb->st_atim = shmfd->shm_atime; sb->st_ctim = shmfd->shm_ctime; @@ -574,8 +627,12 @@ sb->st_dev = shm_dev_ino; sb->st_ino = shmfd->shm_ino; sb->st_nlink = shmfd->shm_object->ref_count; - sb->st_blocks = shmfd->shm_object->size / - (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT); + if (shm_largepage(shmfd)) { + sb->st_blocks = shmfd->shm_object->size / + (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT); + } else { + sb->st_blocks = shmfd->shm_pages; + } return (0); } @@ -886,7 +943,7 @@ VM_PROT_DEFAULT, 0, ucred); shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT; } else { - shmfd->shm_object = vm_pager_allocate(OBJT_SWAP, NULL, + shmfd->shm_object = vm_pager_allocate(shmfd_pager_type, NULL, shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); } KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); @@ -916,6 +973,7 @@ void shm_drop(struct shmfd *shmfd) { + vm_object_t obj; if (refcount_release(&shmfd->shm_refs)) { #ifdef MAC @@ -923,7 +981,13 @@ #endif rangelock_destroy(&shmfd->shm_rl); mtx_destroy(&shmfd->shm_mtx); - vm_object_deallocate(shmfd->shm_object); + obj = shmfd->shm_object; + if (!shm_largepage(shmfd)) { + VM_OBJECT_WLOCK(obj); + obj->un_pager.swp.swp_priv = NULL; + VM_OBJECT_WUNLOCK(obj); + } + vm_object_deallocate(obj); free(shmfd, M_SHMFD); } } @@ -962,6 +1026,9 @@ new_unrhdr64(&shm_ino_unr, 1); shm_dev_ino = devfs_alloc_cdp_inode(); KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); + shmfd_pager_type = vm_pager_alloc_dyn_type(&shm_swap_pager_ops, + OBJT_SWAP); + MPASS(shmfd_pager_type != -1); for (i = 1; i < MAXPAGESIZES; i++) { if (pagesizes[i] == 0) diff --git a/sys/sys/mman.h b/sys/sys/mman.h --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -268,6 +268,7 @@ struct shmfd { vm_ooffset_t shm_size; vm_object_t shm_object; + vm_pindex_t shm_pages; /* allocated pages */ int shm_refs; uid_t shm_uid; gid_t shm_gid; diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -78,6 +78,8 @@ int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len); void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); +void swap_pager_freespace(vm_object_t object, vm_pindex_t start, + vm_size_t size, vm_size_t *freed); void swap_pager_swap_init(void); int swap_pager_nswapdev(void); int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_pindex_t); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -436,7 +436,7 @@ vm_offset_t start, vm_offset_t end); static void swap_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end); -static void swap_pager_freespace(vm_object_t object, vm_pindex_t start, +static void swap_pager_freespace_pgo(vm_object_t object, vm_pindex_t start, vm_size_t size); const struct pagerops swappagerops = { @@ -451,7 +451,7 @@ .pgo_pageunswapped = swap_pager_unswapped, /* remove swap related to page */ .pgo_update_writecount = swap_pager_update_writecount, .pgo_release_writecount = swap_pager_release_writecount, - .pgo_freespace = swap_pager_freespace, + .pgo_freespace = swap_pager_freespace_pgo, }; /* @@ -483,9 +483,10 @@ * Metadata functions */ static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); -static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); +static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_size_t, + vm_size_t *); static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, - vm_pindex_t pindex, vm_pindex_t count); + vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t); @@ -980,11 +981,21 @@ * * The object must be locked. */ +void +swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size, + vm_size_t *freed) +{ + MPASS((object->flags & OBJ_SWAP) != 0); + + swp_pager_meta_free(object, start, size, freed); +} + static void -swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) +swap_pager_freespace_pgo(vm_object_t object, vm_pindex_t start, vm_size_t size) { + MPASS((object->flags & OBJ_SWAP) != 0); - swp_pager_meta_free(object, start, size); + swp_pager_meta_free(object, start, size, NULL); } /* @@ -1008,7 +1019,7 @@ n = MIN(size - i, INT_MAX); blk = swp_pager_getswapspace(&n); if (blk == SWAPBLK_NONE) { - swp_pager_meta_free(object, start, i); + swp_pager_meta_free(object, start, i, NULL); VM_OBJECT_WUNLOCK(object); return (-1); } @@ -1096,7 +1107,8 @@ /* * Transfer source to destination. */ - swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size); + swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size, + NULL); /* * Free left over swap blocks in source. @@ -2119,16 +2131,22 @@ */ static void swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, - vm_pindex_t pindex, vm_pindex_t count) + vm_pindex_t pindex, vm_pindex_t count, vm_size_t *moved) { struct swblk *sb; + vm_page_t m; daddr_t n_free, s_free; vm_pindex_t offset, last; + vm_size_t mc; int i, limit, start; VM_OBJECT_ASSERT_WLOCKED(srcobject); + MPASS(moved == NULL || dstobject == NULL); + + mc = 0; + m = NULL; if (count == 0 || pctrie_is_empty(&srcobject->un_pager.swp.swp_blks)) - return; + goto out; swp_pager_init_freerange(&s_free, &n_free); offset = pindex; @@ -2150,6 +2168,14 @@ swp_pager_update_freerange(&s_free, &n_free, sb->d[i]); } + if (moved != NULL) { + if (m != NULL && m->pindex != pindex + i - 1) + m = NULL; + m = m != NULL ? vm_page_next(m) : + vm_page_lookup(srcobject, pindex + i); + if (m == NULL || vm_page_none_valid(m)) + mc++; + } sb->d[i] = SWAPBLK_NONE; } pindex = sb->p + SWAP_META_PAGES; @@ -2161,6 +2187,9 @@ } } swp_pager_freeswapspace(s_free, n_free); +out: + if (moved != NULL) + *moved = mc; } /* @@ -2174,9 +2203,10 @@ * with resident pages. */ static void -swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) +swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count, + vm_size_t *freed) { - swp_pager_meta_transfer(object, NULL, pindex, count); + swp_pager_meta_transfer(object, NULL, pindex, count, freed); } /* diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1212,6 +1212,10 @@ #if VM_NRESERVLEVEL > 0 vm_object_color(fs->object, atop(fs->vaddr) - fs->pindex); #endif + if (!vm_pager_can_alloc_page(fs->object, fs->pindex)) { + unlock_and_deallocate(fs); + return (FAULT_FAILURE); + } fs->m = vm_page_alloc(fs->object, fs->pindex, P_KILLED(curproc) ? VM_ALLOC_SYSTEM : 0); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -156,20 +156,13 @@ /* * Swap pager * - * swp_tmpfs - back-pointer to the tmpfs vnode, - * if any, which uses the vm object - * as backing store. The handle - * cannot be reused for linking, - * because the vnode can be - * reclaimed and recreated, making - * the handle changed and hash-chain - * invalid. - * - * swp_blks - pc-trie of the allocated swap blocks. + * swp_priv - pager-private. + * swp_blks - pc-trie of the allocated swap blocks. + * writemappings - count of bytes mapped for write * */ struct { - void *swp_tmpfs; + void *swp_priv; struct pctrie swp_blks; vm_ooffset_t writemappings; } swp; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1483,6 +1483,7 @@ return (1); } vm_page_insert_radixdone(m, object, mpred); + vm_pager_page_inserted(object, m); return (0); } @@ -1557,6 +1558,8 @@ if ((m->a.flags & PGA_SWAP_FREE) != 0) vm_pager_page_unswapped(m); + vm_pager_page_removed(object, m); + m->object = NULL; mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -1879,6 +1882,7 @@ vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); + vm_pager_page_inserted(new_object, m); return (0); } @@ -2023,6 +2027,8 @@ flags = 0; m = NULL; + if (!vm_pager_can_alloc_page(object, pindex)) + return (NULL); again: #if VM_NRESERVLEVEL > 0 /* @@ -4694,6 +4700,8 @@ *mp = NULL; return (VM_PAGER_FAIL); } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { + if (!vm_pager_can_alloc_page(object, pindex)) + return (VM_PAGER_AGAIN); goto retrylookup; } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -69,6 +69,9 @@ bool *vp_heldp); typedef void pgo_freespace_t(vm_object_t object, vm_pindex_t start, vm_size_t size); +typedef void pgo_page_inserted_t(vm_object_t object, vm_page_t m); +typedef void pgo_page_removed_t(vm_object_t object, vm_page_t m); +typedef boolean_t pgo_can_alloc_page_t(vm_object_t object, vm_pindex_t pindex); struct pagerops { int pgo_kvme_type; @@ -87,6 +90,9 @@ pgo_mightbedirty_t *pgo_mightbedirty; pgo_getvp_t *pgo_getvp; pgo_freespace_t *pgo_freespace; + pgo_page_inserted_t *pgo_page_inserted; + pgo_page_removed_t *pgo_page_removed; + pgo_can_alloc_page_t *pgo_can_alloc_page; }; extern const struct pagerops defaultpagerops; @@ -249,6 +255,35 @@ method(object, start, size); } +static __inline void +vm_pager_page_inserted(vm_object_t object, vm_page_t m) +{ + pgo_page_inserted_t *method; + + method = pagertab[object->type]->pgo_page_inserted; + if (method != NULL) + method(object, m); +} + +static __inline void +vm_pager_page_removed(vm_object_t object, vm_page_t m) +{ + pgo_page_removed_t *method; + + method = pagertab[object->type]->pgo_page_removed; + if (method != NULL) + method(object, m); +} + +static __inline bool +vm_pager_can_alloc_page(vm_object_t object, vm_pindex_t pindex) +{ + pgo_can_alloc_page_t *method; + + method = pagertab[object->type]->pgo_can_alloc_page; + return (method != NULL ? method(object, pindex) : true); +} + int vm_pager_alloc_dyn_type(struct pagerops *ops, int base_type); void vm_pager_free_dyn_type(objtype_t type); diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -430,6 +430,9 @@ FIX(mightbedirty); FIX(getvp); FIX(freespace); + FIX(page_inserted); + FIX(page_removed); + FIX(can_alloc_page); #undef FIX } pagertab[res] = ops; /* XXXKIB should be rel, but acq is too much */ diff --git a/usr.bin/posixshmcontrol/posixshmcontrol.c b/usr.bin/posixshmcontrol/posixshmcontrol.c --- a/usr.bin/posixshmcontrol/posixshmcontrol.c +++ b/usr.bin/posixshmcontrol/posixshmcontrol.c @@ -404,6 +404,8 @@ char sizebuf[8]; struct stat st; int error, fd, ret; + struct shm_largepage_conf conf_dummy; + bool largepage; fd = shm_open(path, O_RDONLY, 0); if (fd == -1) { @@ -442,9 +444,13 @@ (long)st.st_ctim.tv_nsec); printf("birth\t%ld.%09ld\n", (long)st.st_birthtim.tv_sec, (long)st.st_birthtim.tv_nsec); - if (st.st_blocks != 0) + error = ioctl(fd, FIOGSHMLPGCNF, &conf_dummy); + largepage = error == 0; + if (st.st_blocks != 0 && largepage) printf("pagesz\t%jd\n", roundup((uintmax_t)st.st_size, PAGE_SIZE) / st.st_blocks); + else + printf("pages\t%jd\n", st.st_blocks); } close(fd); return (ret);