Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144293503
D18351.id65452.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D18351.id65452.diff
View Options
Index: sys/cam/cam_iosched.c
===================================================================
--- sys/cam/cam_iosched.c
+++ sys/cam/cam_iosched.c
@@ -281,6 +281,8 @@
int trim_ticks; /* Max ticks to hold trims */
int last_trim_tick; /* Last 'tick' time ld a trim */
int queued_trims; /* Number of trims in the queue */
+ int max_trims; /* Maximum number of trims pending at once */
+ int pend_trims; /* Number of pending trims now */
#ifdef CAM_IOSCHED_DYNAMIC
int read_bias; /* Read bias setting */
int current_read_bias; /* Current read bias state */
@@ -707,11 +709,6 @@
}
#endif
-/*
- * Trim or similar currently pending completion. Should only be set for
- * those drivers wishing only one Trim active at a time.
- */
-#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0)
/* Callout active, and needs to be torn down */
#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1)
@@ -755,6 +752,19 @@
static inline bool
cam_iosched_has_more_trim(struct cam_iosched_softc *isc)
{
+ struct bio *bp;
+
+ bp = bioq_first(&isc->trim_queue);
+#ifdef CAM_IOSCHED_DYNAMIC
+ if (do_dynamic_iosched) {
+ /*
+ * If we're limiting trims, then defer action on trims
+ * for a bit.
+ */
+ if (bp == NULL || cam_iosched_limiter_caniop(&isc->trim_stats, bp) != 0)
+ return false;
+ }
+#endif
/*
* If we've set a trim_goal, then if we exceed that allow trims
@@ -771,8 +781,7 @@
return false;
}
- return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) &&
- bioq_first(&isc->trim_queue);
+ return isc->pend_trims <= isc->max_trims && bp != NULL;
}
#define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \
@@ -1096,6 +1105,7 @@
(*iscp)->sort_io_queue = -1;
bioq_init(&(*iscp)->bio_queue);
bioq_init(&(*iscp)->trim_queue);
+ (*iscp)->max_trims = 1;
#ifdef CAM_IOSCHED_DYNAMIC
if (do_dynamic_iosched) {
bioq_init(&(*iscp)->write_queue);
@@ -1389,10 +1399,17 @@
struct bio *
cam_iosched_get_trim(struct cam_iosched_softc *isc)
{
+#ifdef CAM_IOSCHED_DYNAMIC
+ struct bio *bp;
+#endif
if (!cam_iosched_has_more_trim(isc))
return NULL;
#ifdef CAM_IOSCHED_DYNAMIC
+ bp = bioq_first(&isc->trim_queue);
+ if (bp == NULL)
+ return NULL;
+
/*
* If pending read, prefer that based on current read bias setting. The
* read bias is shared for both writes and TRIMs, but on TRIMs the bias
@@ -1414,6 +1431,26 @@
*/
isc->current_read_bias = isc->read_bias;
}
+
+ /*
+ * See if our current limiter allows this I/O. Because we only call this
+ * here, and not in next_trim, the 'bandwidth' limits for trims won't
+ * work, while the iops or max queued limits will work. It's tricky
+ * because we want the limits to be from the perspective of the
+ * "commands sent to the device." To make iops work, we need to check
+ * only here (since we want all the ops we combine to count as one). To
+ * make bw limits work, we'd need to check in next_trim, but that would
+ * have the effect of limiting the iops as seen from the upper layers.
+ */
+ if (cam_iosched_limiter_iop(&isc->trim_stats, bp) != 0) {
+ if (iosched_debug)
+ printf("Can't trim because limiter says no.\n");
+ isc->trim_stats.state_flags |= IOP_RATE_LIMITED;
+ return NULL;
+ }
+ isc->current_read_bias = isc->read_bias;
+ isc->trim_stats.state_flags &= ~IOP_RATE_LIMITED;
+ /* cam_iosched_next_trim below keeps proper book */
#endif
return cam_iosched_next_trim(isc);
}
@@ -1496,6 +1533,41 @@
cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp)
{
+ /*
+ * A BIO_SPEEDUP from the uppper layers means that they have a block
+ * shortage. At the present, this is only sent when we're trying to
+ * allocate blocks, but have a shortage before giving up. bio_length is
+ * the size of their shortage. We will complete just enough BIO_DELETEs
+ * in the queue to satisfy the need. If bio_length is 0, we'll complete
+ * them all. This allows the scheduler to delay BIO_DELETEs to improve
+ * read/write performance without worrying about the upper layers. When
+ * it's possibly a problem, we respond by pretending the BIO_DELETEs
+ * just worked. We can't do anything about the BIO_DELETEs in the
+ * hardware, though. We have to wait for them to complete.
+ */
+ if (bp->bio_cmd == BIO_SPEEDUP) {
+ off_t len;
+ struct bio *nbp;
+
+ len = 0;
+ while (bioq_first(&isc->trim_queue) &&
+ (bp->bio_length == 0 || len < bp->bio_length)) {
+ nbp = bioq_takefirst(&isc->trim_queue);
+ len += nbp->bio_length;
+ nbp->bio_error = 0;
+ biodone(nbp);
+ }
+ if (bp->bio_length > 0) {
+ if (bp->bio_length > len)
+ bp->bio_resid = bp->bio_length - len;
+ else
+ bp->bio_resid = 0;
+ }
+ bp->bio_error = 0;
+ biodone(bp);
+ return;
+ }
+
/*
* If we get a BIO_FLUSH, and we're doing delayed BIO_DELETEs then we
* set the last tick time to one less than the current ticks minus the
@@ -1569,7 +1641,7 @@
cam_iosched_trim_done(struct cam_iosched_softc *isc)
{
- isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+ isc->pend_trims--;
}
/*
@@ -1637,7 +1709,7 @@
cam_iosched_submit_trim(struct cam_iosched_softc *isc)
{
- isc->flags |= CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+ isc->pend_trims++;
}
/*
@@ -1863,7 +1935,7 @@
db_printf("in_reads: %d\n", isc->read_stats.in);
db_printf("out_reads: %d\n", isc->read_stats.out);
db_printf("queued_reads: %d\n", isc->read_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->bio_queue));
+ db_printf("Read Q len %d\n", biolen(&isc->bio_queue));
db_printf("pending_writes: %d\n", isc->write_stats.pending);
db_printf("min_writes: %d\n", isc->write_stats.min);
db_printf("max_writes: %d\n", isc->write_stats.max);
@@ -1871,7 +1943,7 @@
db_printf("in_writes: %d\n", isc->write_stats.in);
db_printf("out_writes: %d\n", isc->write_stats.out);
db_printf("queued_writes: %d\n", isc->write_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->write_queue));
+ db_printf("Write Q len %d\n", biolen(&isc->write_queue));
db_printf("pending_trims: %d\n", isc->trim_stats.pending);
db_printf("min_trims: %d\n", isc->trim_stats.min);
db_printf("max_trims: %d\n", isc->trim_stats.max);
@@ -1879,11 +1951,11 @@
db_printf("in_trims: %d\n", isc->trim_stats.in);
db_printf("out_trims: %d\n", isc->trim_stats.out);
db_printf("queued_trims: %d\n", isc->trim_stats.queued);
- db_printf("Current Q len %d\n", biolen(&isc->trim_queue));
+ db_printf("Trim Q len %d\n", biolen(&isc->trim_queue));
db_printf("read_bias: %d\n", isc->read_bias);
db_printf("current_read_bias: %d\n", isc->current_read_bias);
- db_printf("Trim active? %s\n",
- (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no");
+ db_printf("Trims active %d\n", isc->pend_trims);
+ db_printf("Max trims active %d\n", isc->max_trims);
}
#endif
#endif
Index: sys/cam/nvme/nvme_da.c
===================================================================
--- sys/cam/nvme/nvme_da.c
+++ sys/cam/nvme/nvme_da.c
@@ -177,6 +177,14 @@
SYSCTL_INT(_kern_cam_nda, OID_AUTO, max_trim, CTLFLAG_RDTUN,
&nda_max_trim_entries, NDA_MAX_TRIM_ENTRIES,
"Maximum number of BIO_DELETE to send down as a DSM TRIM.");
+static int nda_goal_trim_entries = NDA_MAX_TRIM_ENTRIES / 2;
+SYSCTL_INT(_kern_cam_nda, OID_AUTO, goal_trim, CTLFLAG_RDTUN,
+ &nda_goal_trim_entries, NDA_MAX_TRIM_ENTRIES / 2,
+ "Number of BIO_DELETE to try to accumulate before sending a DSM TRIM.");
+static int nda_trim_ticks = 50; /* 50ms ~ 1000 Hz */
+SYSCTL_INT(_kern_cam_nda, OID_AUTO, trim_ticks, CTLFLAG_RDTUN,
+ &nda_trim_ticks, 50,
+ "Number of ticks to hold BIO_DELETEs before sending down a trim");
/*
* All NVMe media is non-rotational, so all nvme device instances
@@ -741,6 +749,9 @@
free(softc, M_DEVBUF);
return(CAM_REQ_CMP_ERR);
}
+ /* Statically set these for the moment */
+ cam_iosched_set_trim_goal(softc->cam_iosched, nda_goal_trim_entries);
+ cam_iosched_set_trim_ticks(softc->cam_iosched, nda_trim_ticks);
/* ident_data parsing */
Index: sys/geom/geom.h
===================================================================
--- sys/geom/geom.h
+++ sys/geom/geom.h
@@ -343,6 +343,7 @@
int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
int g_io_flush(struct g_consumer *cp);
+int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
int g_register_classifier(struct g_classifier_hook *hook);
void g_unregister_classifier(struct g_classifier_hook *hook);
void g_io_request(struct bio *bp, struct g_consumer *cp);
Index: sys/geom/geom_io.c
===================================================================
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -340,6 +340,42 @@
return (error);
}
+/*
+ * Send a IBO_SPEEDUP down the stack. This is used to tell the lower layers that
+ * the upper layers has detected a resource shortage. The lower layers are
+ * advised to stop delaying I/O that they might be holding for performance
+ * reasons and to schedule it (non-trims) or complete it successfully (trims) as
+ * quickly as it can. bio_length is the amount of the shortage. This call
+ * should be non-blocking. bio_resid is used to communicate back if the lower
+ * layers couldn't find bio_length worth of I/O to schedule or discard. A length
+ * of 0 means to do as much as you can (schedule the h/w queues full, discard
+ * all trims). flags are a hint from the upper layers to the lower layers what
+ * operation should be done.
+ */
+int
+g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
+{
+ struct bio *bp;
+ int error;
+
+ KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
+ ("Invalid flags passed to g_io_speedup: %#x", flags));
+ g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
+ shortage, flags);
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_SPEEDUP;
+ bp->bio_length = shortage;
+ bp->bio_done = NULL;
+ bp->bio_flags |= flags;
+ g_io_request(bp, cp);
+ error = biowait(bp, "gflush");
+ *resid = bp->bio_resid;
+ g_destroy_bio(bp);
+ return (error);
+}
+
int
g_io_flush(struct g_consumer *cp)
{
Index: sys/sys/bio.h
===================================================================
--- sys/sys/bio.h
+++ sys/sys/bio.h
@@ -53,6 +53,7 @@
#define BIO_CMD1 0x07 /* Available for local hacks */
#define BIO_CMD2 0x08 /* Available for local hacks */
#define BIO_ZONE 0x09 /* Zone command */
+#define BIO_SPEEDUP 0x0a /* Upper layers face shortage */
/* bio_flags */
#define BIO_ERROR 0x01 /* An error occurred processing this bio. */
@@ -70,6 +71,9 @@
#define PRINT_BIO_FLAGS "\20\7vlist\6transient_mapping\5unmapped" \
"\4ordered\3onqueue\2done\1error"
+#define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */
+#define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */
+
#ifdef _KERNEL
struct disk;
struct bio;
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
--- sys/ufs/ffs/ffs_softdep.c
+++ sys/ufs/ffs/ffs_softdep.c
@@ -13352,7 +13352,9 @@
{
struct ufsmount *ump;
struct mount *mp;
+ struct g_consumer *cp;
long starttime;
+ size_t resid;
ufs2_daddr_t needed;
int error, failed_vnode;
@@ -13374,6 +13376,7 @@
mp = vp->v_mount;
ump = VFSTOUFS(mp);
+ cp = (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private;
mtx_assert(UFS_MTX(ump), MA_OWNED);
UFS_UNLOCK(ump);
error = ffs_update(vp, 1);
@@ -13428,6 +13431,9 @@
}
starttime = time_second;
retry:
+ if (resource == FLUSH_BLOCKS_WAIT &&
+ fs->fs_cstotal.cs_nbfree <= needed)
+ g_io_speedup(needed * fs->fs_bsize, BIO_SPEEDUP_TRIM, &resid, cp);
if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
fs->fs_cstotal.cs_nbfree <= needed) ||
(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
@@ -13574,6 +13580,8 @@
{
struct mount *mp;
struct ufsmount *ump;
+ struct g_consumer *cp;
+ size_t resid;
int error;
bool req;
@@ -13585,6 +13593,8 @@
return;
if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
ump = VFSTOUFS(mp);
+ cp = (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private;
+ g_io_speedup(0, BIO_SPEEDUP_TRIM, &resid, cp);
for (;;) {
req = false;
ACQUIRE_LOCK(ump);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 8, 4:45 PM (11 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28475174
Default Alt Text
D18351.id65452.diff (12 KB)
Attached To
Mode
D18351: Add BIO_SPEEDUP
Attached
Detach File
Event Timeline
Log In to Comment