D18351.id51588.diff
No OneTemporary
Actions

Size

17 KB

Referenced Files

None

Subscribers

None

D18351.id51588.diff
View Options

	Index: sys/cam/cam_iosched.c
	===================================================================
	--- sys/cam/cam_iosched.c
	+++ sys/cam/cam_iosched.c
	@@ -281,11 +281,15 @@
	int trim_ticks; /* Max ticks to hold trims */
	int last_trim_tick; /* Last 'tick' time ld a trim */
	int queued_trims; /* Number of trims in the queue */
	+ int max_trims; /* Maximum number of trims pending at once */
	+ int pend_trims; /* Number of pending trims now */
	#ifdef CAM_IOSCHED_DYNAMIC
	int read_bias; /* Read bias setting */
	int current_read_bias; /* Current read bias state */
	int total_ticks;
	int load; /* EMA of 'load average' of disk / 2^16 */
	+ int speedup_ticks; /* When != 0, don't delay I/O for performance */
	+#define SPEEDUP_TICKS 11

	struct bio_queue_head write_queue;
	struct iop_stats read_stats, write_stats, trim_stats;
	@@ -574,6 +578,11 @@
	isc->this_frac = (uint32_t)delta >> 16; /* Note: discards seconds -- should be 0 harmless if not */
	isc->last_time = now;

	+ if (isc->speedup_ticks > 0) {
	+ isc->current_read_bias = 1;
	+ isc->speedup_ticks--;
	+ }
	+
	cam_iosched_cl_maybe_steer(&isc->cl);

	cam_iosched_limiter_tick(&isc->read_stats);
	@@ -707,11 +716,6 @@
	}
	#endif

	-/*
	- * Trim or similar currently pending completion. Should only be set for
	- * those drivers wishing only one Trim active at a time.
	- */
	-#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0)
	/* Callout active, and needs to be torn down */
	#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1)

	@@ -755,6 +759,19 @@
	static inline bool
	cam_iosched_has_more_trim(struct cam_iosched_softc *isc)
	{
	+ struct bio *bp;
	+
	+ bp = bioq_first(&isc->trim_queue);
	+#ifdef CAM_IOSCHED_DYNAMIC
	+ if (do_dynamic_iosched) {
	+ /*
	+ * If we're limiting trims, then defer action on trims
	+ * for a bit.
	+ */
	+ if (bp == NULL \|\| cam_iosched_limiter_caniop(&isc->trim_stats, bp) != 0)
	+ return false;
	+ }
	+#endif

	/*
	* If we've set a trim_goal, then if we exceed that allow trims
	@@ -771,8 +788,7 @@
	return false;
	}

	- return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) &&
	- bioq_first(&isc->trim_queue);
	+ return isc->pend_trims <= isc->max_trims && bp != NULL;
	}

	#define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \
	@@ -1096,6 +1112,7 @@
	(*iscp)->sort_io_queue = -1;
	bioq_init(&(*iscp)->bio_queue);
	bioq_init(&(*iscp)->trim_queue);
	+ (*iscp)->max_trims = 1;
	#ifdef CAM_IOSCHED_DYNAMIC
	if (do_dynamic_iosched) {
	bioq_init(&(*iscp)->write_queue);
	@@ -1311,7 +1328,7 @@
	/*
	* See if our current limiter allows this I/O.
	*/
	- if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
	+ if (isc->speedup_ticks == 0 && cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
	if (iosched_debug)
	printf("Can't write because limiter says no.\n");
	isc->write_stats.state_flags \|= IOP_RATE_LIMITED;
	@@ -1322,7 +1339,10 @@
	* Let's do this: We've passed all the gates and we're a go
	* to schedule the I/O in the SIM.
	*/
	- isc->current_read_bias = isc->read_bias;
	+ if (isc->speedup_ticks > 0)
	+ isc->current_read_bias = 1;
	+ else
	+ isc->current_read_bias = isc->read_bias;
	bioq_remove(&isc->write_queue, bp);
	if (bp->bio_cmd == BIO_WRITE) {
	isc->write_stats.queued--;
	@@ -1389,31 +1409,59 @@
	struct bio *
	cam_iosched_get_trim(struct cam_iosched_softc *isc)
	{
	+#ifdef CAM_IOSCHED_DYNAMIC
	+ struct bio *bp;
	+#endif

	if (!cam_iosched_has_more_trim(isc))
	return NULL;
	#ifdef CAM_IOSCHED_DYNAMIC
	+ if (!do_dynamic_iosched)
	+ return cam_iosched_next_trim(isc);
	+
	+ bp = bioq_first(&isc->trim_queue);
	+ if (bp == NULL)
	+ return NULL;
	+
	/*
	* If pending read, prefer that based on current read bias setting. The
	* read bias is shared for both writes and TRIMs, but on TRIMs the bias
	- * is for a combined TRIM not a single TRIM request that's come in.
	+ * is for a combined TRIM not a single TRIM request that's come in. If
	+ * we do return NULL, we're not rate-limiting TRIMs, so we don't change
	+ * the limiter flag.
	*/
	- if (do_dynamic_iosched) {
	- if (bioq_first(&isc->bio_queue) && isc->current_read_bias) {
	- if (iosched_debug)
	- printf("Reads present and current_read_bias is %d"
	- " queued trims %d queued reads %d\n",
	- isc->current_read_bias, isc->trim_stats.queued,
	- isc->read_stats.queued);
	- isc->current_read_bias--;
	- /* We're not limiting TRIMS, per se, just doing reads first */
	- return NULL;
	- }
	- /*
	- * We're going to do a trim, so reset the bias.
	- */
	- isc->current_read_bias = isc->read_bias;
	+ if (bioq_first(&isc->bio_queue) && isc->current_read_bias) {
	+ if (iosched_debug)
	+ printf("Reads present and current_read_bias is %d"
	+ " queued trims %d queued reads %d\n",
	+ isc->current_read_bias, isc->trim_stats.queued,
	+ isc->read_stats.queued);
	+ isc->current_read_bias--;
	+ return NULL;
	+ }
	+
	+ /*
	+ * See if our current limiter allows this I/O. Because we only call this
	+ * here, and not in next_trim, the 'bandwidth' limits for trims won't
	+ * work, while the iops or max queued limits will work. It's tricky
	+ * because we want the limits to be from the perspective of the
	+ * "commands sent to the device." To make iops work, we need to check
	+ * only here (since we want all the ops we combine to count as one). To
	+ * make bw limits work, we'd need to check in next_trim, but that would
	+ * have the effect of limiting the iops as seen from the upper layers.
	+ */
	+ if (isc->speedup_ticks == 0 && cam_iosched_limiter_iop(&isc->trim_stats, bp) != 0) {
	+ if (iosched_debug)
	+ printf("Can't trim because limiter says no.\n");
	+ isc->trim_stats.state_flags \|= IOP_RATE_LIMITED;
	+ return NULL;
	}
	+ if (isc->speedup_ticks > 0)
	+ isc->current_read_bias = 1;
	+ else
	+ isc->current_read_bias = isc->read_bias;
	+ isc->trim_stats.state_flags &= ~IOP_RATE_LIMITED;
	+ /* cam_iosched_next_trim below keeps proper book */
	#endif
	return cam_iosched_next_trim(isc);
	}
	@@ -1496,6 +1544,60 @@
	cam_iosched_queue_work(struct cam_iosched_softc isc, struct bio bp)
	{

	+ /*
	+ * A BIO_SPEEDUP from the uppper layers means that they have a block
	+ * shortage. At the present, this is only sent when we're trying to
	+ * allocate blocks, but have a shortage before giving up. bio_length is
	+ * the size of their shortage. We will complete just enough BIO_DELETEs
	+ * in the queue to satisfy the need. If bio_length is 0, we'll complete
	+ * them all. This allows the scheduler to delay BIO_DELETEs to improve
	+ * read/write performance without worrying about the upper layers. When
	+ * it's possibly a problem, we respond by pretending the BIO_DELETEs
	+ * just worked. We can't do anything about the BIO_DELETEs in the
	+ * hardware, though. We have to wait for them to complete.
	+ */
	+ if (bp->bio_cmd == BIO_SPEEDUP) {
	+ off_t len;
	+ struct bio *nbp;
	+
	+ /*
	+ * Either request of 0 length puts us into a special mdoe.
	+ */
	+ if (bp->bio_length == 0) {
	+ isc->speedup_ticks = SPEEDUP_TICKS;
	+ bp->bio_error = 0;
	+ biodone(bp);
	+ return;
	+ }
	+
	+ /*
	+ * Ignore non-trim speedup requests.
	+ */
	+ if ((bp->bio_flags & BIO_SPEEDUP_TRIM) == 0) {
	+ bp->bio_error = 0;
	+ biodone(bp);
	+ return;
	+ }
	+
	+ len = 0;
	+ while (bioq_first(&isc->trim_queue) &&
	+ (bp->bio_length == 0 \|\| len < bp->bio_length)) {
	+ nbp = bioq_takefirst(&isc->trim_queue);
	+ len += nbp->bio_length;
	+ nbp->bio_error = 0;
	+ biodone(nbp);
	+ }
	+ if (bp->bio_length > 0) {
	+ if (bp->bio_length > len)
	+ bp->bio_resid = bp->bio_length - len;
	+ else
	+ bp->bio_resid = 0;
	+ }
	+ bp->bio_error = 0;
	+ biodone(bp);
	+ return;
	+ }
	+
	/*
	* If we get a BIO_FLUSH, and we're doing delayed BIO_DELETEs then we
	* set the last tick time to one less than the current ticks minus the
	@@ -1569,7 +1671,7 @@
	cam_iosched_trim_done(struct cam_iosched_softc *isc)
	{

	- isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE;
	+ isc->pend_trims--;
	}

	/*
	@@ -1637,7 +1739,7 @@
	cam_iosched_submit_trim(struct cam_iosched_softc *isc)
	{

	- isc->flags \|= CAM_IOSCHED_FLAG_TRIM_ACTIVE;
	+ isc->pend_trims++;
	}

	/*
	@@ -1863,7 +1965,7 @@
	db_printf("in_reads: %d\n", isc->read_stats.in);
	db_printf("out_reads: %d\n", isc->read_stats.out);
	db_printf("queued_reads: %d\n", isc->read_stats.queued);
	- db_printf("Current Q len %d\n", biolen(&isc->bio_queue));
	+ db_printf("Read Q len %d\n", biolen(&isc->bio_queue));
	db_printf("pending_writes: %d\n", isc->write_stats.pending);
	db_printf("min_writes: %d\n", isc->write_stats.min);
	db_printf("max_writes: %d\n", isc->write_stats.max);
	@@ -1871,7 +1973,7 @@
	db_printf("in_writes: %d\n", isc->write_stats.in);
	db_printf("out_writes: %d\n", isc->write_stats.out);
	db_printf("queued_writes: %d\n", isc->write_stats.queued);
	- db_printf("Current Q len %d\n", biolen(&isc->write_queue));
	+ db_printf("Write Q len %d\n", biolen(&isc->write_queue));
	db_printf("pending_trims: %d\n", isc->trim_stats.pending);
	db_printf("min_trims: %d\n", isc->trim_stats.min);
	db_printf("max_trims: %d\n", isc->trim_stats.max);
	@@ -1879,11 +1981,11 @@
	db_printf("in_trims: %d\n", isc->trim_stats.in);
	db_printf("out_trims: %d\n", isc->trim_stats.out);
	db_printf("queued_trims: %d\n", isc->trim_stats.queued);
	- db_printf("Current Q len %d\n", biolen(&isc->trim_queue));
	+ db_printf("Trim Q len %d\n", biolen(&isc->trim_queue));
	db_printf("read_bias: %d\n", isc->read_bias);
	db_printf("current_read_bias: %d\n", isc->current_read_bias);
	- db_printf("Trim active? %s\n",
	- (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no");
	+ db_printf("Trims active %d\n", isc->pend_trims);
	+ db_printf("Max trims active %d\n", isc->max_trims);
	}
	#endif
	#endif
	Index: sys/geom/geom.h
	===================================================================
	--- sys/geom/geom.h
	+++ sys/geom/geom.h
	@@ -336,6 +336,7 @@
	int g_io_getattr(const char attr, struct g_consumer cp, int len, void ptr);
	int g_io_zonecmd(struct disk_zone_args zone_args, struct g_consumer cp);
	int g_io_flush(struct g_consumer *cp);
	+int g_io_speedup(size_t shortage, u_int flags, size_t resid, struct g_consumer cp);
	int g_register_classifier(struct g_classifier_hook *hook);
	void g_unregister_classifier(struct g_classifier_hook *hook);
	void g_io_request(struct bio bp, struct g_consumer cp);
	Index: sys/geom/geom_io.c
	===================================================================
	--- sys/geom/geom_io.c
	+++ sys/geom/geom_io.c
	@@ -338,6 +338,42 @@
	return (error);
	}

	+/*
	+ * Send a BIO_SPEEDUP down the stack. It tells the lower layers that the upper
	+ * layers have encountered a resource shortage. The lower layers are advised to
	+ * stop delaying bio transactions that they might be holding for performance
	+ * reasons and to schedule them (read/write/flush) or complete them successfully
	+ * (trims) as quickly as it can. bio_length is the amount of the shortage.
	+ * bio_resid is used to communicate back if the lower layers couldn't find
	+ * bio_length worth of I/O to schedule or discard. A length of 0 means to do as
	+ * much as you can (schedule the h/w queues full, discard all trims). flags are
	+ * a hint from the upper layers to the lower layers what operation should be
	+ * done. The call should be non-blocking.
	+ */
	+int
	+g_io_speedup(size_t shortage, u_int flags, size_t resid, struct g_consumer cp)
	+{
	+ struct bio *bp;
	+ int error;
	+
	+ KASSERT((flags & (BIO_SPEEDUP_TRIM \| BIO_SPEEDUP_WRITE)) != 0,
	+ ("Invalid flags passed to g_io_speedup: %#x", flags));
	+ g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
	+ shortage, flags);
	+ bp = g_new_bio();
	+ if (bp == NULL)
	+ return (ENOMEM);
	+ bp->bio_cmd = BIO_SPEEDUP;
	+ bp->bio_length = shortage;
	+ bp->bio_done = NULL;
	+ bp->bio_flags \|= flags;
	+ g_io_request(bp, cp);
	+ error = biowait(bp, "gflush");
	+ *resid = bp->bio_resid;
	+ g_destroy_bio(bp);
	+ return (error);
	+}
	+
	int
	g_io_flush(struct g_consumer *cp)
	{
	Index: sys/sys/bio.h
	===================================================================
	--- sys/sys/bio.h
	+++ sys/sys/bio.h
	@@ -53,6 +53,7 @@
	#define BIO_CMD1 0x07 /* Available for local hacks */
	#define BIO_CMD2 0x08 /* Available for local hacks */
	#define BIO_ZONE 0x09 /* Zone command */
	+#define BIO_SPEEDUP 0x0a /* Upper layers face shortage */

	/* bio_flags */
	#define BIO_ERROR 0x01 /* An error occurred processing this bio. */
	@@ -67,6 +68,9 @@
	#define BIO_TRANSIENT_MAPPING 0x20
	#define BIO_VLIST 0x40

	+#define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */
	+#define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */
	+
	#ifdef _KERNEL
	struct disk;
	struct bio;
	Index: sys/ufs/ffs/ffs_softdep.c
	===================================================================
	--- sys/ufs/ffs/ffs_softdep.c
	+++ sys/ufs/ffs/ffs_softdep.c
	@@ -903,7 +903,6 @@
	int, struct pagedep **);
	static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
	struct pagedep **);
	-static void pause_timer(void *);
	static int request_cleanup(struct mount *, int);
	static int softdep_request_cleanup_flush(struct mount , struct ufsmount );
	static void schedule_cleanup(struct mount *);
	@@ -1256,9 +1255,6 @@
	*/
	static int max_softdeps; /* maximum number of structs before slowdown */
	static int tickdelay = 2; /* number of ticks to pause during slowdown */
	-static int proc_waiting; /* tracks whether we have a timeout posted */
	-static int stat_countp; / statistic to count in proc_waiting timeout */
	-static struct callout softdep_callout;
	static int req_clear_inodedeps; /* syncer process flush some inodedeps */
	static int req_clear_remove; /* syncer process flush some freeblks */
	static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
	@@ -1448,6 +1444,7 @@

	LOCK_OWNED(ump);
	worklist_speedup(ump->um_mountp);
	+ g_io_speedup(0, BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
	bd_speedup();
	/*
	* If we have global shortages, then we need other
	@@ -2429,9 +2426,6 @@
	bioops.io_deallocate = softdep_deallocate_dependencies;
	bioops.io_countdeps = softdep_count_dependencies;
	softdep_ast_cleanup = softdep_ast_cleanup_proc;
	-
	- /* Initialize the callout with an mtx. */
	- callout_init_mtx(&softdep_callout, &lk, 0);
	}

	/*
	@@ -2448,8 +2442,6 @@
	bioops.io_deallocate = NULL;
	bioops.io_countdeps = NULL;
	softdep_ast_cleanup = NULL;
	-
	- callout_drain(&softdep_callout);
	}

	/*
	@@ -13328,6 +13320,7 @@
	struct ufsmount *ump;
	struct mount *mp;
	long starttime;
	+ size_t resid;
	ufs2_daddr_t needed;
	int error, failed_vnode;

	@@ -13402,14 +13395,17 @@
	}
	starttime = time_second;
	retry:
	- if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
	+ if (resource == FLUSH_BLOCKS_WAIT &&
	+ fs->fs_cstotal.cs_nbfree <= needed)
	+ g_io_speedup(needed * fs->fs_bsize, BIO_SPEEDUP_TRIM, &resid,
	+ ump->um_cp);
	+ while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
	fs->fs_cstotal.cs_nbfree <= needed) \|\|
	(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
	fs->fs_cstotal.cs_nifree <= needed)) {
	ACQUIRE_LOCK(ump);
	if (ump->softdep_on_worklist > 0 &&
	- process_worklist_item(UFSTOVFS(ump),
	- ump->softdep_on_worklist, LK_NOWAIT) != 0)
	+ process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0)
	stat_worklist_push += 1;
	FREE_LOCK(ump);
	}
	@@ -13640,11 +13636,7 @@
	* If we are resource constrained on inode dependencies, try
	* flushing some dirty inodes. Otherwise, we are constrained
	* by file deletions, so try accelerating flushes of directories
	- * with removal dependencies. We would like to do the cleanup
	- * here, but we probably hold an inode locked at this point and
	- * that might deadlock against one that we try to clean. So,
	- * the best that we can do is request the syncer daemon to do
	- * the cleanup for us.
	+ * with removal dependencies.
	*/
	switch (resource) {

	@@ -13654,7 +13646,7 @@
	stat_ino_limit_push += 1;
	req_clear_inodedeps += 1;
	FREE_GBLLOCK(&lk);
	- stat_countp = &stat_ino_limit_hit;
	+ clear_inodedeps(mp);
	break;

	case FLUSH_BLOCKS:
	@@ -13663,50 +13655,16 @@
	stat_blk_limit_push += 1;
	req_clear_remove += 1;
	FREE_GBLLOCK(&lk);
	- stat_countp = &stat_blk_limit_hit;
	+ g_io_speedup(0, BIO_SPEEDUP_TRIM \| BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
	+ clear_remove(mp);
	break;

	default:
	panic("request_cleanup: unknown type");
	}
	- /*
	- * Hopefully the syncer daemon will catch up and awaken us.
	- * We wait at most tickdelay before proceeding in any case.
	- */
	- ACQUIRE_GBLLOCK(&lk);
	- FREE_LOCK(ump);
	- proc_waiting += 1;
	- if (callout_pending(&softdep_callout) == FALSE)
	- callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
	- pause_timer, 0);
	-
	- if ((td->td_pflags & TDP_KTHREAD) == 0)
	- msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
	- proc_waiting -= 1;
	- FREE_GBLLOCK(&lk);
	- ACQUIRE_LOCK(ump);
	return (1);
	}

	-/*
	- * Awaken processes pausing in request_cleanup and clear proc_waiting
	- * to indicate that there is no longer a timer running. Pause_timer
	- * will be called with the global softdep mutex (&lk) locked.
	- */
	-static void
	-pause_timer(arg)
	- void *arg;
	-{
	-
	- GBLLOCK_OWNED(&lk);
	- /*
	- * The callout_ API has acquired mtx and will hold it around this
	- * function call.
	- */
	- *stat_countp += proc_waiting;
	- wakeup(&proc_waiting);
	-}
	-
	/*
	* If requested, try removing inode or removal dependencies.
	*/
	@@ -13730,14 +13688,12 @@
	FREE_GBLLOCK(&lk);
	clear_inodedeps(mp);
	ACQUIRE_GBLLOCK(&lk);
	- wakeup(&proc_waiting);
	}
	if (req_clear_remove) {
	req_clear_remove -= 1;
	FREE_GBLLOCK(&lk);
	clear_remove(mp);
	ACQUIRE_GBLLOCK(&lk);
	- wakeup(&proc_waiting);
	}
	FREE_GBLLOCK(&lk);
	}

File Metadata

Mime Type: text/plain
Expires: Tue, Apr 21, 7:48 AM (14 h, 48 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 31897835
Default Alt Text: D18351.id51588.diff (17 KB)

D18351.id51588.diffNo OneTemporaryActions

D18351.id51588.diffView Options

File Metadata

Event Timeline

D18351.id51588.diff
No OneTemporary
Actions

D18351.id51588.diff
View Options