Page MenuHomeFreeBSD

D20863.id60489.diff
No OneTemporary

D20863.id60489.diff

Index: sys/kern/subr_blist.c
===================================================================
--- sys/kern/subr_blist.c
+++ sys/kern/subr_blist.c
@@ -295,7 +295,7 @@
* not be allocated.
*/
daddr_t
-blist_alloc(blist_t bl, int *count, int maxcount)
+blist_alloc(blist_t bl, daddr_t *io_cursor, int *count, int maxcount)
{
daddr_t blk, cursor;
@@ -310,14 +310,14 @@
* non-zero. When the cursor is zero, an allocation failure will
* stop further iterations.
*/
- for (cursor = bl->bl_cursor;; cursor = 0) {
+ for (cursor = *io_cursor;; cursor = 0) {
blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
bl->bl_radix);
if (blk != SWAPBLK_NONE) {
bl->bl_avail -= *count;
- bl->bl_cursor = blk + *count;
- if (bl->bl_cursor == bl->bl_blocks)
- bl->bl_cursor = 0;
+ *io_cursor = blk + *count;
+ if (*io_cursor == bl->bl_blocks)
+ *io_cursor = 0;
return (blk);
}
if (cursor == 0)
@@ -404,8 +404,7 @@
void
blist_print(blist_t bl)
{
- printf("BLIST avail = %jd, cursor = %08jx {\n",
- (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
+ printf("BLIST avail = %jd {\n", (uintmax_t)bl->bl_avail);
if (bl->bl_root->bm_bitmap != 0)
blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
Index: sys/sys/blist.h
===================================================================
--- sys/sys/blist.h
+++ sys/sys/blist.h
@@ -81,7 +81,6 @@
daddr_t bl_blocks; /* area of coverage */
daddr_t bl_avail; /* # available blocks */
u_daddr_t bl_radix; /* coverage radix */
- daddr_t bl_cursor; /* next-fit search starts at */
blmeta_t bl_root[1]; /* root of radix tree */
} *blist_t;
@@ -92,7 +91,7 @@
struct sbuf;
-daddr_t blist_alloc(blist_t blist, int *count, int maxcount);
+daddr_t blist_alloc(blist_t blist, daddr_t *cursor, int *count, int maxcount);
daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags);
void blist_destroy(blist_t blist);
Index: sys/vm/swap_pager.h
===================================================================
--- sys/vm/swap_pager.h
+++ sys/vm/swap_pager.h
@@ -57,6 +57,7 @@
*/
struct swdevt {
int sw_flags;
+ int sw_priority;
int sw_nblks;
int sw_used;
dev_t sw_dev;
@@ -65,12 +66,15 @@
swblk_t sw_first;
swblk_t sw_end;
struct blist *sw_blist;
+ daddr_t sw_cursor; /* next-fit search starts at */
+ daddr_t sw_trimmer; /* where to look for free space to trim */
TAILQ_ENTRY(swdevt) sw_list;
sw_strategy_t *sw_strategy;
sw_close_t *sw_close;
};
#define SW_UNMAPPED 0x01
+#define SW_TRIM 0x02
#define SW_CLOSING 0x04
#ifdef _KERNEL
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -400,8 +400,9 @@
static void swp_sizecheck(void);
static void swp_pager_async_iodone(struct buf *bp);
static bool swp_pager_swblk_empty(struct swblk *sb, int start, int limit);
-static int swapongeom(struct vnode *);
-static int swaponvp(struct thread *, struct vnode *, u_long);
+static int swapongeom(struct vnode *, int flags, int priority);
+static int swaponvp(struct thread *, struct vnode *, u_long nblks,
+ int flags, int priority);
static int swapoff_one(struct swdevt *sp, struct ucred *cred);
/*
@@ -737,7 +738,8 @@
if (sp == NULL)
sp = TAILQ_FIRST(&swtailq);
if ((sp->sw_flags & SW_CLOSING) == 0)
- blk = blist_alloc(sp->sw_blist, &npages, mpages);
+ blk = blist_alloc(sp->sw_blist, &sp->sw_cursor,
+ &npages, mpages);
if (blk != SWAPBLK_NONE)
break;
sp = TAILQ_NEXT(sp, sw_list);
@@ -800,7 +802,149 @@
}
+static void
+swp_pager_async_trimdone(struct buf *bp)
+{
+ struct swdevt *sp;
+ daddr_t blk;
+ int npages;
+
+ sp = (struct swdevt *)bp->b_fsprivate1;
+ blk = bp->b_blkno;
+ npages = bp->b_npages;
+ uma_zfree(swwbuf_zone, bp);
+
+ mtx_lock(&swbuf_mtx);
+ if (++nsw_wcount_async == 1)
+ wakeup(&nsw_wcount_async);
+ mtx_unlock(&swbuf_mtx);
+
+ mtx_lock(&sw_dev_mtx);
+ blk -= sp->sw_first;
+ blist_free(sp->sw_blist, blk, npages);
+ mtx_unlock(&sw_dev_mtx);
+}
+
+static int trimzone_numer = 1;
+static int trimzone_denom = 8;
+static int min_trim_alloc = 1;
+static int max_trim_alloc = 32768;
/*
+ * The 'trim zone' is the small range of addresses just ahead of the cursor
+ * likely to be allocated soon. When the trimmer falls into the trim zone,
+ * we allocate blocks with the trimmer.
+ */
+static bool
+swapdev_in_trim_zone(daddr_t start, daddr_t end, daddr_t nblks)
+{
+ return ((end + nblks - start) % nblks / trimzone_numer <
+ nblks / trimzone_denom);
+}
+
+
+/* sw_dev_mtx lock is held. */
+
+static struct buf *
+swapdev_trim(struct swdevt *sp)
+{
+ struct buf *bp;
+ daddr_t blk;
+ u_long nblks;
+ int npages;
+
+ /* Quit if trimming is disabled. */
+ if ((sp->sw_flags & SW_TRIM) == 0)
+ return (NULL);
+
+ /* Quit if the cursor is too far behind the trimmer. */
+ nblks = sp->sw_nblks;
+ if (!swapdev_in_trim_zone(sp->sw_cursor, sp->sw_trimmer, nblks))
+ return (NULL);
+
+ /* Grab a (hopefully) lot of free space allocated long ago. */
+ npages = min_trim_alloc;
+ blk = blist_alloc(sp->sw_blist, &sp->sw_trimmer,
+ &npages, max_trim_alloc);
+
+ /* Quit if there's nothing free. */
+ if (blk == SWAPBLK_NONE)
+ return (NULL);
+ if (swapdev_in_trim_zone(sp->sw_cursor, sp->sw_trimmer, nblks)) {
+ /*
+ * Trim allocation is too close to the cursor. Either we've
+ * wrapped around and jumped the cursor, or we've found too
+ * little free space close to the cursor to move the trimmer
+ * out of the trim zone. Bail out.
+ *
+ */
+ CTR5(KTR_SPARE5, "%s: cursor %p blk %p size %5d trimmer %p, too close",
+ __func__,
+ (void*)sp->sw_cursor, (void*)blk, npages, (void*)sp->sw_trimmer);
+ blist_free(sp->sw_blist, blk, npages);
+ return (NULL);
+ }
+ mtx_unlock(&sw_dev_mtx);
+ mtx_lock(&swbuf_mtx);
+ if (nsw_wcount_async == 0) {
+ mtx_unlock(&swbuf_mtx);
+ CTR5(KTR_SPARE5, "%s: cursor %p blk %p size %5d trimmer %p, low count",
+ __func__,
+ (void*)sp->sw_cursor, (void*)blk, npages, (void*)sp->sw_trimmer);
+ mtx_lock(&sw_dev_mtx);
+ blist_free(sp->sw_blist, blk, npages);
+ sp->sw_trimmer = blk;
+ return (NULL);
+ }
+ nsw_wcount_async--;
+ mtx_unlock(&swbuf_mtx);
+ bp = uma_zalloc(swwbuf_zone, M_NOWAIT);
+ if (bp == NULL) {
+ CTR5(KTR_SPARE5, "%s: cursor %p blk %p size %5d trimmer %p, no buf",
+ __func__,
+ (void*)sp->sw_cursor, (void*)blk, npages, (void*)sp->sw_trimmer);
+ mtx_lock(&sw_dev_mtx);
+ blist_free(sp->sw_blist, blk, npages);
+ sp->sw_trimmer = blk;
+ return (NULL);
+ }
+ CTR5(KTR_SPARE5, "%s: cursor %p blk %p size %5d trimmer %p, start trim",
+ __func__,
+ (void*)sp->sw_cursor, (void*)blk, npages, (void*)sp->sw_trimmer);
+ bp->b_flags = B_ASYNC;
+ bp->b_data = NULL;
+ bp->b_iocmd = BIO_DELETE;
+ bp->b_rcred = crhold(thread0.td_ucred);
+ bp->b_wcred = crhold(thread0.td_ucred);
+ bp->b_bcount = PAGE_SIZE * npages;
+ bp->b_bufsize = PAGE_SIZE * npages;
+ bp->b_blkno = blk + sp->sw_first;
+ bp->b_npages = npages;
+ bp->b_iodone = swp_pager_async_trimdone;
+ bp->b_fsprivate1 = sp;
+ BUF_KERNPROC(bp);
+ return (bp);
+}
+
+/* find a swap device that needs trimming, and start trimming */
+static void
+swp_pager_trimswapspace(void)
+{
+ struct swdevt *sp;
+ struct buf *bp;
+
+ mtx_lock(&sw_dev_mtx);
+ TAILQ_FOREACH(sp, &swtailq, sw_list) {
+ bp = swapdev_trim(sp);
+ if (bp == NULL)
+ continue;
+ /* sw_dev_mtx released in swapdev_trim */
+ sp->sw_strategy(bp,sp);
+ return;
+ }
+ mtx_unlock(&sw_dev_mtx);
+}
+
+/*
* SWP_PAGER_FREESWAPSPACE() - free raw swap space
*
* This routine returns the specified swap blocks back to the bitmap.
@@ -908,8 +1052,7 @@
blk = swp_pager_getswapspace(&n, 1);
if (blk == SWAPBLK_NONE) {
swp_pager_meta_free(object, start, i);
- VM_OBJECT_WUNLOCK(object);
- return (-1);
+ break;
}
for (j = 0; j < n; ++j) {
addr = swp_pager_meta_build(object,
@@ -919,9 +1062,10 @@
addr);
}
}
+ VM_OBJECT_WUNLOCK(object);
swp_pager_freeswapspace(s_free, n_free);
- VM_OBJECT_WUNLOCK(object);
- return (0);
+ swp_pager_trimswapspace();
+ return (i == size ? 0 : -1);
}
/*
@@ -1471,6 +1615,7 @@
swp_pager_async_iodone(bp);
}
swp_pager_freeswapspace(s_free, n_free);
+ swp_pager_trimswapspace();
VM_OBJECT_WLOCK(object);
}
@@ -2178,12 +2323,8 @@
};
#endif
-/*
- * MPSAFE
- */
-/* ARGSUSED */
-int
-sys_swapon(struct thread *td, struct swapon_args *uap)
+static int
+kern_swapon(struct thread *td, const char *name, int flags, int priority)
{
struct vattr attr;
struct vnode *vp;
@@ -2206,7 +2347,7 @@
}
NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
- uap->name, td);
+ name, td);
error = namei(&nd);
if (error)
goto done;
@@ -2215,7 +2356,7 @@
vp = nd.ni_vp;
if (vn_isdisk(vp, &error)) {
- error = swapongeom(vp);
+ error = swapongeom(vp, flags, priority);
} else if (vp->v_type == VREG &&
(vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 &&
(error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) {
@@ -2223,7 +2364,8 @@
* Allow direct swapping to NFS regular files in the same
* way that nfs_mountroot() sets up diskless swapping.
*/
- error = swaponvp(td, vp, attr.va_size / DEV_BSIZE);
+ error = swaponvp(td, vp, attr.va_size / DEV_BSIZE,
+ flags, priority);
}
if (error)
@@ -2233,6 +2375,14 @@
return (error);
}
+/* ARGSUSED */
+int
+sys_swapon(struct thread *td, struct swapon_args *uap)
+{
+
+ return kern_swapon(td, uap->name, SW_TRIM, 0);
+}
+
/*
* Check that the total amount of swap currently configured does not
* exceed half the theoretical maximum. If it does, print a warning
@@ -2259,7 +2409,8 @@
static void
swaponsomething(struct vnode *vp, void *id, u_long nblks,
- sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
+ sw_strategy_t *strategy, sw_close_t *close, dev_t dev,
+ int flags, int priority)
{
struct swdevt *sp, *tsp;
swblk_t dvbase;
@@ -2295,8 +2446,11 @@
sp->sw_strategy = strategy;
sp->sw_close = close;
sp->sw_flags = flags;
+ sp->sw_priority = priority;
sp->sw_blist = blist_create(nblks, M_WAITOK);
+ sp->sw_cursor = 0;
+ sp->sw_trimmer = 0;
/*
* Do not free the first two block in order to avoid overwriting
* any bsd label at the front of the partition
@@ -2736,7 +2890,7 @@
}
swapgeom_acquire(cp);
mtx_unlock(&sw_dev_mtx);
- if (bp->b_iocmd == BIO_WRITE)
+ if (bp->b_iocmd == BIO_WRITE || bp->b_iocmd == BIO_DELETE)
bio = g_new_bio();
else
bio = g_alloc_bio();
@@ -2819,7 +2973,7 @@
}
static int
-swapongeom_locked(struct cdev *dev, struct vnode *vp)
+swapongeom_locked(struct cdev *dev, struct vnode *vp, int flags, int priority)
{
struct g_provider *pp;
struct g_consumer *cp;
@@ -2859,14 +3013,15 @@
return (error);
}
nblks = pp->mediasize / DEV_BSIZE;
+ if ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0)
+ flags |= SW_UNMAPPED;
swaponsomething(vp, cp, nblks, swapgeom_strategy,
- swapgeom_close, dev2udev(dev),
- (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
+ swapgeom_close, dev2udev(dev), flags, priority);
return (0);
}
static int
-swapongeom(struct vnode *vp)
+swapongeom(struct vnode *vp, int flags, int priority)
{
int error;
@@ -2875,7 +3030,7 @@
error = ENOENT;
} else {
g_topology_lock();
- error = swapongeom_locked(vp->v_rdev, vp);
+ error = swapongeom_locked(vp->v_rdev, vp, flags, priority);
g_topology_unlock();
}
VOP_UNLOCK(vp, 0);
@@ -2922,7 +3077,8 @@
static int
-swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
+swaponvp(struct thread *td, struct vnode *vp, u_long nblks, int flags,
+ int priority)
{
struct swdevt *sp;
int error;
@@ -2949,7 +3105,7 @@
return (error);
swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
- NODEV, 0);
+ NODEV, flags, priority);
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Feb 10, 3:16 PM (22 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28627171
Default Alt Text
D20863.id60489.diff (11 KB)

Event Timeline