Changeset View
Standalone View
sys/fs/tmpfs/tmpfs_subr.c
Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, | tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, | ||||
vm_offset_t new) | vm_offset_t new) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
VM_OBJECT_ASSERT_WLOCKED(object); | VM_OBJECT_ASSERT_WLOCKED(object); | ||||
vp = object->un_pager.swp.swp_tmpfs; | vp = VM_TO_TMPFS_VP(object); | ||||
/* | /* | ||||
* Forced unmount? | * Forced unmount? | ||||
*/ | */ | ||||
if (vp == NULL) { | if (vp == NULL) { | ||||
KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, | KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, | ||||
("object %p with OBJ_TMPFS_VREF but without vnode", | ("object %p with OBJ_TMPFS_VREF but without vnode", | ||||
object)); | object)); | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) | tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
/* | /* | ||||
* Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type | * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type | ||||
* type, but not OBJ_TMPFS flag. In this case there is no | * type. In this case there is no v_writecount to adjust. | ||||
* v_writecount to adjust. | |||||
*/ | */ | ||||
if (vp_heldp != NULL) | if (vp_heldp != NULL) | ||||
VM_OBJECT_RLOCK(object); | VM_OBJECT_RLOCK(object); | ||||
else | else | ||||
VM_OBJECT_ASSERT_LOCKED(object); | VM_OBJECT_ASSERT_LOCKED(object); | ||||
if ((object->flags & OBJ_TMPFS) != 0) { | if ((object->flags & OBJ_TMPFS) != 0) { | ||||
vp = object->un_pager.swp.swp_tmpfs; | vp = VM_TO_TMPFS_VP(object); | ||||
if (vp != NULL) { | if (vp != NULL) { | ||||
*vpp = vp; | *vpp = vp; | ||||
if (vp_heldp != NULL) { | if (vp_heldp != NULL) { | ||||
vhold(vp); | vhold(vp); | ||||
*vp_heldp = true; | *vp_heldp = true; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (vp_heldp != NULL) | if (vp_heldp != NULL) | ||||
VM_OBJECT_RUNLOCK(object); | VM_OBJECT_RUNLOCK(object); | ||||
} | } | ||||
static void | |||||
tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) | |||||
{ | |||||
struct tmpfs_node *node; | |||||
struct tmpfs_mount *tm; | |||||
vm_size_t c; | |||||
swap_pager_freespace(obj, start, size, &c); | |||||
if ((obj->flags & OBJ_TMPFS) == 0 || c == 0) | |||||
return; | |||||
node = obj->un_pager.swp.swp_priv; | |||||
MPASS(node->tn_type == VREG); | |||||
tm = node->tn_reg.tn_tmp; | |||||
KASSERT(tm->tm_pages_used >= c, | |||||
("tmpfs tm %p pages %jd free %jd", tm, | |||||
(uintmax_t)tm->tm_pages_used, (uintmax_t)c)); | |||||
atomic_add_long(&tm->tm_pages_used, -c); | |||||
KASSERT(node->tn_reg.tn_pages >= c, | |||||
("tmpfs node %p pages %jd free %jd", node, | |||||
(uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c)); | |||||
node->tn_reg.tn_pages -= c; | |||||
} | |||||
static void | |||||
tmpfs_page_inserted(vm_object_t obj, vm_page_t m) | |||||
{ | |||||
struct tmpfs_node *node; | |||||
struct tmpfs_mount *tm; | |||||
if ((obj->flags & OBJ_TMPFS) == 0) | |||||
return; | |||||
node = obj->un_pager.swp.swp_priv; | |||||
MPASS(node->tn_type == VREG); | |||||
markj: Suppose I write 4096 bytes to a new tmpfs file, so tn_pages == 1 and the page is dirty. Suppose… | |||||
Done Inline ActionsSo it must be handled in tmpfs_pager_freespace(), thank you for noting this. kib: So it must be handled in tmpfs_pager_freespace(), thank you for noting this. | |||||
Done Inline ActionsI'm still confused about the scenario where we have a resident page and a copy exists in the pager. Should we count it once or twice? For tmpfs, it makes more sense to count it twice I think. But then the vm_pager_has_page() checks in tmpfs_page_inserted() and tmpfs_page_removed() should be removed, and tmpfs needs to intercept pageout ops and increment the count if a new swap block is to be allocated. And possibly putpages should return an error if the mount size limit would be exceeded. If we count such a page just once, however, then tmpfs_pager_freespace() should only decrement the count if there is no resident copy of the page. Otherwise:
markj: I'm still confused about the scenario where we have a resident page and a copy exists in the… | |||||
Done Inline ActionsI believe that we should only count the page once in tn_pages, if it exists both in swap and in queue. tn_pages counts all page-sized ranges in the tmpfs node that are not holes. I agree that there is a bug in swap_pager_freespace() then, should be fixed. kib: I believe that we should only count the page once in tn_pages, if it exists both in swap and in… | |||||
tm = node->tn_reg.tn_tmp; | |||||
if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { | |||||
atomic_add_long(&tm->tm_pages_used, 1); | |||||
node->tn_reg.tn_pages += 1; | |||||
} | |||||
} | |||||
static void | |||||
tmpfs_page_removed(vm_object_t obj, vm_page_t m) | |||||
{ | |||||
struct tmpfs_node *node; | |||||
struct tmpfs_mount *tm; | |||||
if ((obj->flags & OBJ_TMPFS) == 0) | |||||
return; | |||||
node = obj->un_pager.swp.swp_priv; | |||||
MPASS(node->tn_type == VREG); | |||||
tm = node->tn_reg.tn_tmp; | |||||
if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { | |||||
KASSERT(tm->tm_pages_used >= 1, | |||||
("tmpfs tm %p pages %jd free 1", tm, | |||||
(uintmax_t)tm->tm_pages_used)); | |||||
atomic_add_long(&tm->tm_pages_used, -1); | |||||
KASSERT(node->tn_reg.tn_pages >= 1, | |||||
("tmpfs node %p pages %jd free 1", node, | |||||
(uintmax_t)node->tn_reg.tn_pages)); | |||||
node->tn_reg.tn_pages -= 1; | |||||
} | |||||
} | |||||
static boolean_t | |||||
tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex) | |||||
{ | |||||
struct tmpfs_mount *tm; | |||||
tm = VM_TO_TMPFS_MP(obj); | |||||
if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) || | |||||
tm->tm_pages_max == 0) | |||||
return (true); | |||||
return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used)); | |||||
} | |||||
struct pagerops tmpfs_pager_ops = { | struct pagerops tmpfs_pager_ops = { | ||||
.pgo_kvme_type = KVME_TYPE_VNODE, | .pgo_kvme_type = KVME_TYPE_VNODE, | ||||
.pgo_alloc = tmpfs_pager_alloc, | .pgo_alloc = tmpfs_pager_alloc, | ||||
.pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, | .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, | ||||
.pgo_update_writecount = tmpfs_pager_update_writecount, | .pgo_update_writecount = tmpfs_pager_update_writecount, | ||||
.pgo_release_writecount = tmpfs_pager_release_writecount, | .pgo_release_writecount = tmpfs_pager_release_writecount, | ||||
.pgo_mightbedirty = vm_object_mightbedirty_, | .pgo_mightbedirty = vm_object_mightbedirty_, | ||||
.pgo_getvp = tmpfs_pager_getvp, | .pgo_getvp = tmpfs_pager_getvp, | ||||
.pgo_freespace = tmpfs_pager_freespace, | |||||
.pgo_page_inserted = tmpfs_page_inserted, | |||||
.pgo_page_removed = tmpfs_page_removed, | |||||
.pgo_can_alloc_page = tmpfs_can_alloc_page, | |||||
}; | }; | ||||
static int | static int | ||||
tmpfs_node_ctor(void *mem, int size, void *arg, int flags) | tmpfs_node_ctor(void *mem, int size, void *arg, int flags) | ||||
{ | { | ||||
struct tmpfs_node *node; | struct tmpfs_node *node; | ||||
node = mem; | node = mem; | ||||
▲ Show 20 Lines • Show All 333 Lines • ▼ Show 20 Lines | case VLNK: | ||||
atomic_store_ptr(&nnode->tn_link_target, symlink); | atomic_store_ptr(&nnode->tn_link_target, symlink); | ||||
atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); | atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); | ||||
atomic_thread_fence_rel(); | atomic_thread_fence_rel(); | ||||
break; | break; | ||||
case VREG: | case VREG: | ||||
nnode->tn_reg.tn_aobj = | nnode->tn_reg.tn_aobj = | ||||
vm_pager_allocate(tmpfs_pager_type, NULL, 0, | vm_pager_allocate(tmpfs_pager_type, NULL, 0, | ||||
VM_PROT_DEFAULT, 0, | VM_PROT_DEFAULT, 0, | ||||
NULL /* XXXKIB - tmpfs needs swap reservation */); | NULL /* XXXKIB - tmpfs needs swap reservation */); | ||||
/* OBJ_TMPFS is set together with the setting of vp->v_object */ | nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode; | ||||
vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS); | |||||
nnode->tn_reg.tn_tmp = tmp; | nnode->tn_reg.tn_tmp = tmp; | ||||
nnode->tn_reg.tn_pages = 0; | |||||
break; | break; | ||||
default: | default: | ||||
panic("tmpfs_alloc_node: type %p %d", nnode, | panic("tmpfs_alloc_node: type %p %d", nnode, | ||||
(int)nnode->tn_type); | (int)nnode->tn_type); | ||||
} | } | ||||
TMPFS_LOCK(tmp); | TMPFS_LOCK(tmp); | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | case VMARKER: | ||||
panic("%s: bad type %d for node %p", __func__, | panic("%s: bad type %d for node %p", __func__, | ||||
(int)node->tn_type, node); | (int)node->tn_type, node); | ||||
} | } | ||||
#endif | #endif | ||||
switch (node->tn_type) { | switch (node->tn_type) { | ||||
case VREG: | case VREG: | ||||
uobj = node->tn_reg.tn_aobj; | uobj = node->tn_reg.tn_aobj; | ||||
if (uobj != NULL && uobj->size != 0) | node->tn_reg.tn_aobj = NULL; | ||||
atomic_subtract_long(&tmp->tm_pages_used, uobj->size); | if (uobj != NULL) { | ||||
VM_OBJECT_WLOCK(uobj); | |||||
KASSERT((uobj->flags & OBJ_TMPFS) != 0, | |||||
("tmpfs node %p uobj %p not tmpfs", node, uobj)); | |||||
vm_object_clear_flag(uobj, OBJ_TMPFS); | |||||
KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages, | |||||
("tmpfs tmp %p node %p pages %jd free %jd", tmp, | |||||
node, (uintmax_t)tmp->tm_pages_used, | |||||
(uintmax_t)node->tn_reg.tn_pages)); | |||||
atomic_add_long(&tmp->tm_pages_used, | |||||
-node->tn_reg.tn_pages); | |||||
VM_OBJECT_WUNLOCK(uobj); | |||||
} | |||||
tmpfs_free_tmp(tmp); | tmpfs_free_tmp(tmp); | ||||
if (uobj != NULL) { | /* | ||||
KASSERT((uobj->flags & OBJ_TMPFS) == 0, | * vm_object_deallocate() must not be called while | ||||
("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); | * owning tm_allnode_lock, because deallocate might | ||||
* sleep. Call it after tmpfs_free_tmp() does the | |||||
Done Inline ActionsShouldn't this be a subtraction? markj: Shouldn't this be a subtraction? | |||||
Done Inline ActionsOf course, this was non-tested late minute change to switch to tn_pages. kib: Of course, this was non-tested late minute change to switch to tn_pages. | |||||
* unlock. | |||||
*/ | |||||
if (uobj != NULL) | |||||
vm_object_deallocate(uobj); | vm_object_deallocate(uobj); | ||||
} | |||||
break; | break; | ||||
case VLNK: | case VLNK: | ||||
tmpfs_free_tmp(tmp); | tmpfs_free_tmp(tmp); | ||||
symlink = node->tn_link_target; | symlink = node->tn_link_target; | ||||
atomic_store_ptr(&node->tn_link_target, NULL); | atomic_store_ptr(&node->tn_link_target, NULL); | ||||
if (atomic_load_char(&node->tn_link_smr)) { | if (atomic_load_char(&node->tn_link_smr)) { | ||||
cache_symlink_free(symlink, node->tn_size + 1); | cache_symlink_free(symlink, node->tn_size + 1); | ||||
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) | ||||
* May be going through forced unmount. | * May be going through forced unmount. | ||||
*/ | */ | ||||
want_vrele = false; | want_vrele = false; | ||||
if ((obj->flags & OBJ_TMPFS_VREF) != 0) { | if ((obj->flags & OBJ_TMPFS_VREF) != 0) { | ||||
vm_object_clear_flag(obj, OBJ_TMPFS_VREF); | vm_object_clear_flag(obj, OBJ_TMPFS_VREF); | ||||
want_vrele = true; | want_vrele = true; | ||||
} | } | ||||
vm_object_clear_flag(obj, OBJ_TMPFS); | |||||
obj->un_pager.swp.swp_tmpfs = NULL; | |||||
if (vp->v_writecount < 0) | if (vp->v_writecount < 0) | ||||
vp->v_writecount = 0; | vp->v_writecount = 0; | ||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
VM_OBJECT_WUNLOCK(obj); | VM_OBJECT_WUNLOCK(obj); | ||||
if (want_vrele) { | if (want_vrele) { | ||||
vrele(vp); | vrele(vp); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 121 Lines • ▼ Show 20 Lines | KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, | ||||
("%s: object %p with OBJ_TMPFS_VREF but without vnode", | ("%s: object %p with OBJ_TMPFS_VREF but without vnode", | ||||
__func__, object)); | __func__, object)); | ||||
KASSERT(object->un_pager.swp.writemappings == 0, | KASSERT(object->un_pager.swp.writemappings == 0, | ||||
("%s: object %p has writemappings", | ("%s: object %p has writemappings", | ||||
__func__, object)); | __func__, object)); | ||||
VI_LOCK(vp); | VI_LOCK(vp); | ||||
KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); | KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); | ||||
vp->v_object = object; | vp->v_object = object; | ||||
object->un_pager.swp.swp_tmpfs = vp; | |||||
vm_object_set_flag(object, OBJ_TMPFS); | |||||
vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); | vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); | ||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
break; | break; | ||||
case VDIR: | case VDIR: | ||||
MPASS(node->tn_dir.tn_parent != NULL); | MPASS(node->tn_dir.tn_parent != NULL); | ||||
if (node->tn_dir.tn_parent == node) | if (node->tn_dir.tn_parent == node) | ||||
vp->v_vflag |= VV_ROOT; | vp->v_vflag |= VV_ROOT; | ||||
▲ Show 20 Lines • Show All 733 Lines • ▼ Show 20 Lines | |||||
* size 'newsize'. 'vp' must point to a vnode that represents a regular file. | * size 'newsize'. 'vp' must point to a vnode that represents a regular file. | ||||
* 'newsize' must be positive. | * 'newsize' must be positive. | ||||
* | * | ||||
* Returns zero on success or an appropriate error code on failure. | * Returns zero on success or an appropriate error code on failure. | ||||
*/ | */ | ||||
int | int | ||||
tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) | tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) | ||||
{ | { | ||||
struct tmpfs_mount *tmp; | |||||
struct tmpfs_node *node; | struct tmpfs_node *node; | ||||
vm_object_t uobj; | vm_object_t uobj; | ||||
vm_pindex_t idx, newpages, oldpages; | vm_pindex_t idx, newpages, oldpages; | ||||
off_t oldsize; | off_t oldsize; | ||||
int base, error; | int base, error; | ||||
MPASS(vp->v_type == VREG); | MPASS(vp->v_type == VREG); | ||||
MPASS(newsize >= 0); | MPASS(newsize >= 0); | ||||
node = VP_TO_TMPFS_NODE(vp); | node = VP_TO_TMPFS_NODE(vp); | ||||
uobj = node->tn_reg.tn_aobj; | uobj = node->tn_reg.tn_aobj; | ||||
tmp = VFS_TO_TMPFS(vp->v_mount); | |||||
/* | /* | ||||
* Convert the old and new sizes to the number of pages needed to | * Convert the old and new sizes to the number of pages needed to | ||||
* store them. It may happen that we do not need to do anything | * store them. It may happen that we do not need to do anything | ||||
* because the last allocated page can accommodate the change on | * because the last allocated page can accommodate the change on | ||||
* its own. | * its own. | ||||
*/ | */ | ||||
oldsize = node->tn_size; | oldsize = node->tn_size; | ||||
oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); | oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); | ||||
MPASS(oldpages == uobj->size); | MPASS(oldpages == uobj->size); | ||||
newpages = OFF_TO_IDX(newsize + PAGE_MASK); | newpages = OFF_TO_IDX(newsize + PAGE_MASK); | ||||
if (__predict_true(newpages == oldpages && newsize >= oldsize)) { | if (__predict_true(newpages == oldpages && newsize >= oldsize)) { | ||||
node->tn_size = newsize; | node->tn_size = newsize; | ||||
return (0); | return (0); | ||||
} | } | ||||
if (newpages > oldpages && | |||||
!tmpfs_pages_check_avail(tmp, newpages - oldpages)) | |||||
return (ENOSPC); | |||||
VM_OBJECT_WLOCK(uobj); | VM_OBJECT_WLOCK(uobj); | ||||
if (newsize < oldsize) { | if (newsize < oldsize) { | ||||
/* | /* | ||||
* Zero the truncated part of the last page. | * Zero the truncated part of the last page. | ||||
*/ | */ | ||||
base = newsize & PAGE_MASK; | base = newsize & PAGE_MASK; | ||||
if (base != 0) { | if (base != 0) { | ||||
idx = OFF_TO_IDX(newsize); | idx = OFF_TO_IDX(newsize); | ||||
error = tmpfs_partial_page_invalidate(uobj, idx, base, | error = tmpfs_partial_page_invalidate(uobj, idx, base, | ||||
PAGE_SIZE, ignerr); | PAGE_SIZE, ignerr); | ||||
if (error != 0) { | if (error != 0) { | ||||
VM_OBJECT_WUNLOCK(uobj); | VM_OBJECT_WUNLOCK(uobj); | ||||
return (error); | return (error); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Release any swap space and free any whole pages. | * Release any swap space and free any whole pages. | ||||
*/ | */ | ||||
if (newpages < oldpages) | if (newpages < oldpages) | ||||
vm_object_page_remove(uobj, newpages, 0, 0); | vm_object_page_remove(uobj, newpages, 0, 0); | ||||
} | } | ||||
uobj->size = newpages; | uobj->size = newpages; | ||||
VM_OBJECT_WUNLOCK(uobj); | VM_OBJECT_WUNLOCK(uobj); | ||||
atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); | |||||
node->tn_size = newsize; | node->tn_size = newsize; | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Punch hole in the aobj associated with the regular file pointed to by 'vp'. | * Punch hole in the aobj associated with the regular file pointed to by 'vp'. | ||||
* Requests completely beyond the end-of-file are converted to no-op. | * Requests completely beyond the end-of-file are converted to no-op. | ||||
▲ Show 20 Lines • Show All 501 Lines • Show Last 20 Lines |
Suppose I write 4096 bytes to a new tmpfs file, so tn_pages == 1 and the page is dirty. Suppose the page is laundered, so a copy exists on the swap device. Suppose that the page is not freed after this point (but it remains clean). Then I truncate the file to 0 bytes. vm_object_page_remove() will free the page, but here we will not decrement tn_pages because the pager still has a copy. Then vm_object_page_remove() frees swap space in the truncated region, but nothing will decrement tn_pages, I think.