Page MenuHomeFreeBSD

D18351.id65452.diff
No OneTemporary

D18351.id65452.diff

Index: sys/cam/cam_iosched.c
===================================================================
--- sys/cam/cam_iosched.c
+++ sys/cam/cam_iosched.c
@@ -281,6 +281,8 @@
int trim_ticks; /* Max ticks to hold trims */
int last_trim_tick; /* Last 'tick' time ld a trim */
int queued_trims; /* Number of trims in the queue */
+ int max_trims; /* Maximum number of trims pending at once */
+ int pend_trims; /* Number of pending trims now */
#ifdef CAM_IOSCHED_DYNAMIC
int read_bias; /* Read bias setting */
int current_read_bias; /* Current read bias state */
@@ -707,11 +709,6 @@
}
#endif
-/*
- * Trim or similar currently pending completion. Should only be set for
- * those drivers wishing only one Trim active at a time.
- */
-#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0)
/* Callout active, and needs to be torn down */
#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1)
@@ -755,6 +752,19 @@
static inline bool
cam_iosched_has_more_trim(struct cam_iosched_softc *isc)
{
+ struct bio *bp;
+
+ bp = bioq_first(&isc->trim_queue);
+#ifdef CAM_IOSCHED_DYNAMIC
+ if (do_dynamic_iosched) {
+ /*
+ * If we're limiting trims, then defer action on trims
+ * for a bit.
+ */
+ if (bp == NULL || cam_iosched_limiter_caniop(&isc->trim_stats, bp) != 0)
+ return false;
+ }
+#endif
/*
* If we've set a trim_goal, then if we exceed that allow trims
@@ -771,8 +781,7 @@
return false;
}
- return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) &&
- bioq_first(&isc->trim_queue);
+ return isc->pend_trims <= isc->max_trims && bp != NULL;
}
#define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \
@@ -1096,6 +1105,7 @@
(*iscp)->sort_io_queue = -1;
bioq_init(&(*iscp)->bio_queue);
bioq_init(&(*iscp)->trim_queue);
+ (*iscp)->max_trims = 1;
#ifdef CAM_IOSCHED_DYNAMIC
if (do_dynamic_iosched) {
bioq_init(&(*iscp)->write_queue);
@@ -1389,10 +1399,17 @@
struct bio *
cam_iosched_get_trim(struct cam_iosched_softc *isc)
{
+#ifdef CAM_IOSCHED_DYNAMIC
+ struct bio *bp;
+#endif
if (!cam_iosched_has_more_trim(isc))
return NULL;
#ifdef CAM_IOSCHED_DYNAMIC
+ bp = bioq_first(&isc->trim_queue);
+ if (bp == NULL)
+ return NULL;
+
/*
* If pending read, prefer that based on current read bias setting. The
* read bias is shared for both writes and TRIMs, but on TRIMs the bias
@@ -1414,6 +1431,26 @@
*/
isc->current_read_bias = isc->read_bias;
}
+
+ /*
+ * See if our current limiter allows this I/O. Because we only call this
+ * here, and not in next_trim, the 'bandwidth' limits for trims won't
+ * work, while the iops or max queued limits will work. It's tricky
+ * because we want the limits to be from the perspective of the
+ * "commands sent to the device." To make iops work, we need to check
+ * only here (since we want all the ops we combine to count as one). To
+ * make bw limits work, we'd need to check in next_trim, but that would
+ * have the effect of limiting the iops as seen from the upper layers.
+ */
+ if (cam_iosched_limiter_iop(&isc->trim_stats, bp) != 0) {
+ if (iosched_debug)
+ printf("Can't trim because limiter says no.\n");
+ isc->trim_stats.state_flags |= IOP_RATE_LIMITED;
+ return NULL;
+ }
+ isc->current_read_bias = isc->read_bias;
+ isc->trim_stats.state_flags &= ~IOP_RATE_LIMITED;
+ /* cam_iosched_next_trim below keeps proper book */
#endif
return cam_iosched_next_trim(isc);
}
@@ -1496,6 +1533,41 @@
cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp)
{
+ /*
+ * A BIO_SPEEDUP from the uppper layers means that they have a block
+ * shortage. At the present, this is only sent when we're trying to
+ * allocate blocks, but have a shortage before giving up. bio_length is
+ * the size of their shortage. We will complete just enough BIO_DELETEs
+ * in the queue to satisfy the need. If bio_length is 0, we'll complete
+ * them all. This allows the scheduler to delay BIO_DELETEs to improve
+ * read/write performance without worrying about the upper layers. When
+ * it's possibly a problem, we respond by pretending the BIO_DELETEs
+ * just worked. We can't do anything about the BIO_DELETEs in the
+ * hardware, though. We have to wait for them to complete.
+ */
+ if (bp->bio_cmd == BIO_SPEEDUP) {
+ off_t len;
+ struct bio *nbp;
+
+ len = 0;
+ while (bioq_first(&isc->trim_queue) &&
+ (bp->bio_length == 0 || len < bp->bio_length)) {
+ nbp = bioq_takefirst(&isc->trim_queue);
+ len += nbp->bio_length;
+ nbp->bio_error = 0;
+ biodone(nbp);
+ }
+ if (bp->bio_length > 0) {
+ if (bp->bio_length > len)
+ bp->bio_resid = bp->bio_length - len;
+ else
+ bp->bio_resid = 0;
+ }
+ bp->bio_error = 0;
+ biodone(bp);
+ return;
+ }
+
/*
* If we get a BIO_FLUSH, and we're doing delayed BIO_DELETEs then we
* set the last tick time to one less than the current ticks minus the
@@ -1569,7 +1641,7 @@
cam_iosched_trim_done(struct cam_iosched_softc *isc)
{
- isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+ isc->pend_trims--;
}
/*
@@ -1637,7 +1709,7 @@
cam_iosched_submit_trim(struct cam_iosched_softc *isc)
{
- isc->flags |= CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+ isc->pend_trims++;
}
/*
@@ -1863,7 +1935,7 @@
db_printf("in_reads: %d\n", isc->read_stats.in);
db_printf("out_reads: %d\n", isc->read_stats.out);
db_printf("queued_reads: %d\n", isc->read_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->bio_queue));
+ db_printf("Read Q len %d\n", biolen(&isc->bio_queue));
db_printf("pending_writes: %d\n", isc->write_stats.pending);
db_printf("min_writes: %d\n", isc->write_stats.min);
db_printf("max_writes: %d\n", isc->write_stats.max);
@@ -1871,7 +1943,7 @@
db_printf("in_writes: %d\n", isc->write_stats.in);
db_printf("out_writes: %d\n", isc->write_stats.out);
db_printf("queued_writes: %d\n", isc->write_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->write_queue));
+ db_printf("Write Q len %d\n", biolen(&isc->write_queue));
db_printf("pending_trims: %d\n", isc->trim_stats.pending);
db_printf("min_trims: %d\n", isc->trim_stats.min);
db_printf("max_trims: %d\n", isc->trim_stats.max);
@@ -1879,11 +1951,11 @@
db_printf("in_trims: %d\n", isc->trim_stats.in);
db_printf("out_trims: %d\n", isc->trim_stats.out);
db_printf("queued_trims: %d\n", isc->trim_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->trim_queue));
+ db_printf("Trim Q len %d\n", biolen(&isc->trim_queue));
db_printf("read_bias: %d\n", isc->read_bias);
db_printf("current_read_bias: %d\n", isc->current_read_bias);
- db_printf("Trim active? %s\n",
- (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no");
+ db_printf("Trims active %d\n", isc->pend_trims);
+ db_printf("Max trims active %d\n", isc->max_trims);
}
#endif
#endif
Index: sys/cam/nvme/nvme_da.c
===================================================================
--- sys/cam/nvme/nvme_da.c
+++ sys/cam/nvme/nvme_da.c
@@ -177,6 +177,14 @@
SYSCTL_INT(_kern_cam_nda, OID_AUTO, max_trim, CTLFLAG_RDTUN,
&nda_max_trim_entries, NDA_MAX_TRIM_ENTRIES,
"Maximum number of BIO_DELETE to send down as a DSM TRIM.");
+static int nda_goal_trim_entries = NDA_MAX_TRIM_ENTRIES / 2;
+SYSCTL_INT(_kern_cam_nda, OID_AUTO, goal_trim, CTLFLAG_RDTUN,
+ &nda_goal_trim_entries, NDA_MAX_TRIM_ENTRIES / 2,
+ "Number of BIO_DELETE to try to accumulate before sending a DSM TRIM.");
+static int nda_trim_ticks = 50; /* 50ms ~ 1000 Hz */
+SYSCTL_INT(_kern_cam_nda, OID_AUTO, trim_ticks, CTLFLAG_RDTUN,
+ &nda_trim_ticks, 50,
+ "Number of ticks to hold BIO_DELETEs before sending down a trim");
/*
* All NVMe media is non-rotational, so all nvme device instances
@@ -741,6 +749,9 @@
free(softc, M_DEVBUF);
return(CAM_REQ_CMP_ERR);
}
+ /* Statically set these for the moment */
+ cam_iosched_set_trim_goal(softc->cam_iosched, nda_goal_trim_entries);
+ cam_iosched_set_trim_ticks(softc->cam_iosched, nda_trim_ticks);
/* ident_data parsing */
Index: sys/geom/geom.h
===================================================================
--- sys/geom/geom.h
+++ sys/geom/geom.h
@@ -343,6 +343,7 @@
int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
int g_io_flush(struct g_consumer *cp);
+int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
int g_register_classifier(struct g_classifier_hook *hook);
void g_unregister_classifier(struct g_classifier_hook *hook);
void g_io_request(struct bio *bp, struct g_consumer *cp);
Index: sys/geom/geom_io.c
===================================================================
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -340,6 +340,42 @@
return (error);
}
+/*
+ * Send a IBO_SPEEDUP down the stack. This is used to tell the lower layers that
+ * the upper layers has detected a resource shortage. The lower layers are
+ * advised to stop delaying I/O that they might be holding for performance
+ * reasons and to schedule it (non-trims) or complete it successfully (trims) as
+ * quickly as it can. bio_length is the amount of the shortage. This call
+ * should be non-blocking. bio_resid is used to communicate back if the lower
+ * layers couldn't find bio_length worth of I/O to schedule or discard. A length
+ * of 0 means to do as much as you can (schedule the h/w queues full, discard
+ * all trims). flags are a hint from the upper layers to the lower layers what
+ * operation should be done.
+ */
+int
+g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
+{
+ struct bio *bp;
+ int error;
+
+ KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
+ ("Invalid flags passed to g_io_speedup: %#x", flags));
+ g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
+ shortage, flags);
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_SPEEDUP;
+ bp->bio_length = shortage;
+ bp->bio_done = NULL;
+ bp->bio_flags |= flags;
+ g_io_request(bp, cp);
+ error = biowait(bp, "gflush");
+ *resid = bp->bio_resid;
+ g_destroy_bio(bp);
+ return (error);
+}
+
int
g_io_flush(struct g_consumer *cp)
{
Index: sys/sys/bio.h
===================================================================
--- sys/sys/bio.h
+++ sys/sys/bio.h
@@ -53,6 +53,7 @@
#define BIO_CMD1 0x07 /* Available for local hacks */
#define BIO_CMD2 0x08 /* Available for local hacks */
#define BIO_ZONE 0x09 /* Zone command */
+#define BIO_SPEEDUP 0x0a /* Upper layers face shortage */
/* bio_flags */
#define BIO_ERROR 0x01 /* An error occurred processing this bio. */
@@ -70,6 +71,9 @@
#define PRINT_BIO_FLAGS "\20\7vlist\6transient_mapping\5unmapped" \
"\4ordered\3onqueue\2done\1error"
+#define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */
+#define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */
+
#ifdef _KERNEL
struct disk;
struct bio;
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
--- sys/ufs/ffs/ffs_softdep.c
+++ sys/ufs/ffs/ffs_softdep.c
@@ -13352,7 +13352,9 @@
{
struct ufsmount *ump;
struct mount *mp;
+ struct g_consumer *cp;
long starttime;
+ size_t resid;
ufs2_daddr_t needed;
int error, failed_vnode;
@@ -13374,6 +13376,7 @@
mp = vp->v_mount;
ump = VFSTOUFS(mp);
+ cp = (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private;
mtx_assert(UFS_MTX(ump), MA_OWNED);
UFS_UNLOCK(ump);
error = ffs_update(vp, 1);
@@ -13428,6 +13431,9 @@
}
starttime = time_second;
retry:
+ if (resource == FLUSH_BLOCKS_WAIT &&
+ fs->fs_cstotal.cs_nbfree <= needed)
+ g_io_speedup(needed * fs->fs_bsize, BIO_SPEEDUP_TRIM, &resid, cp);
if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
fs->fs_cstotal.cs_nbfree <= needed) ||
(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
@@ -13574,6 +13580,8 @@
{
struct mount *mp;
struct ufsmount *ump;
+ struct g_consumer *cp;
+ size_t resid;
int error;
bool req;
@@ -13585,6 +13593,8 @@
return;
if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
ump = VFSTOUFS(mp);
+ cp = (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private;
+ g_io_speedup(0, BIO_SPEEDUP_TRIM, &resid, cp);
for (;;) {
req = false;
ACQUIRE_LOCK(ump);

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 4:45 PM (11 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28475174
Default Alt Text
D18351.id65452.diff (12 KB)

Event Timeline