Index: sys/cam/cam_iosched.c
===================================================================
--- sys/cam/cam_iosched.c
+++ sys/cam/cam_iosched.c
@@ -281,11 +281,15 @@
 	int		trim_ticks;		/* Max ticks to hold trims */
 	int		last_trim_tick;		/* Last 'tick' time ld a trim */
 	int		queued_trims;		/* Number of trims in the queue */
+	int		max_trims;		/* Maximum number of trims pending at once */
+	int		pend_trims;		/* Number of pending trims now */
 #ifdef CAM_IOSCHED_DYNAMIC
 	int		read_bias;		/* Read bias setting */
 	int		current_read_bias;	/* Current read bias state */
 	int		total_ticks;
 	int		load;			/* EMA of 'load average' of disk / 2^16 */
+	int		speedup_ticks;		/* When != 0, don't delay I/O for performance */
+#define SPEEDUP_TICKS	11
 
 	struct bio_queue_head write_queue;
 	struct iop_stats read_stats, write_stats, trim_stats;
@@ -574,6 +578,11 @@
 	isc->this_frac = (uint32_t)delta >> 16;		/* Note: discards seconds -- should be 0 harmless if not */
 	isc->last_time = now;
 
+	if (isc->speedup_ticks > 0) {
+		isc->current_read_bias = 1;
+		isc->speedup_ticks--;
+	}
+
 	cam_iosched_cl_maybe_steer(&isc->cl);
 
 	cam_iosched_limiter_tick(&isc->read_stats);
@@ -707,11 +716,6 @@
 }
 #endif
 
-/*
- * Trim or similar currently pending completion. Should only be set for
- * those drivers wishing only one Trim active at a time.
- */
-#define CAM_IOSCHED_FLAG_TRIM_ACTIVE	(1ul << 0)
 			/* Callout active, and needs to be torn down */
 #define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1)
 
@@ -755,6 +759,19 @@
 static inline bool
 cam_iosched_has_more_trim(struct cam_iosched_softc *isc)
 {
+	struct bio *bp;
+
+	bp = bioq_first(&isc->trim_queue);
+#ifdef CAM_IOSCHED_DYNAMIC
+	if (do_dynamic_iosched) {
+		/*
+		 * If we're limiting trims, then defer action on trims
+		 * for a bit.
+		 */
+		if (bp == NULL || cam_iosched_limiter_caniop(&isc->trim_stats, bp) != 0)
+			return false;
+	}
+#endif
 
 	/*
 	 * If we've set a trim_goal, then if we exceed that allow trims
@@ -771,8 +788,7 @@
 		return false;
 	}
 
-	return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) &&
-	    bioq_first(&isc->trim_queue);
+	return isc->pend_trims <= isc->max_trims && bp != NULL;
 }
 
 #define cam_iosched_sort_queue(isc)	((isc)->sort_io_queue >= 0 ?	\
@@ -1096,6 +1112,7 @@
 	(*iscp)->sort_io_queue = -1;
 	bioq_init(&(*iscp)->bio_queue);
 	bioq_init(&(*iscp)->trim_queue);
+	(*iscp)->max_trims = 1;
 #ifdef CAM_IOSCHED_DYNAMIC
 	if (do_dynamic_iosched) {
 		bioq_init(&(*iscp)->write_queue);
@@ -1311,7 +1328,7 @@
 	/*
 	 * See if our current limiter allows this I/O.
 	 */
-	if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
+	if (isc->speedup_ticks == 0 && cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
 		if (iosched_debug)
 			printf("Can't write because limiter says no.\n");
 		isc->write_stats.state_flags |= IOP_RATE_LIMITED;
@@ -1322,7 +1339,10 @@
 	 * Let's do this: We've passed all the gates and we're a go
 	 * to schedule the I/O in the SIM.
 	 */
-	isc->current_read_bias = isc->read_bias;
+	if (isc->speedup_ticks > 0)
+		isc->current_read_bias = 1;
+	else
+		isc->current_read_bias = isc->read_bias;
 	bioq_remove(&isc->write_queue, bp);
 	if (bp->bio_cmd == BIO_WRITE) {
 		isc->write_stats.queued--;
@@ -1389,31 +1409,59 @@
 struct bio *
 cam_iosched_get_trim(struct cam_iosched_softc *isc)
 {
+#ifdef CAM_IOSCHED_DYNAMIC
+	struct bio *bp;
+#endif
 
 	if (!cam_iosched_has_more_trim(isc))
 		return NULL;
 #ifdef CAM_IOSCHED_DYNAMIC
+	if (!do_dynamic_iosched)
+		return cam_iosched_next_trim(isc);
+
+	bp  = bioq_first(&isc->trim_queue);
+	if (bp == NULL)
+		return NULL;
+
 	/*
 	 * If pending read, prefer that based on current read bias setting. The
 	 * read bias is shared for both writes and TRIMs, but on TRIMs the bias
-	 * is for a combined TRIM not a single TRIM request that's come in.
+	 * is for a combined TRIM not a single TRIM request that's come in. If
+	 * we do return NULL, we're not rate-limiting TRIMs, so we don't change
+	 * the limiter flag.
 	 */
-	if (do_dynamic_iosched) {
-		if (bioq_first(&isc->bio_queue) && isc->current_read_bias) {
-			if (iosched_debug)
-				printf("Reads present and current_read_bias is %d"
-				    " queued trims %d queued reads %d\n",
-				    isc->current_read_bias, isc->trim_stats.queued,
-				    isc->read_stats.queued);
-			isc->current_read_bias--;
-			/* We're not limiting TRIMS, per se, just doing reads first */
-			return NULL;
-		}
-		/*
-		 * We're going to do a trim, so reset the bias.
-		 */
-		isc->current_read_bias = isc->read_bias;
+	if (bioq_first(&isc->bio_queue) && isc->current_read_bias) {
+		if (iosched_debug)
+			printf("Reads present and current_read_bias is %d"
+			    " queued trims %d queued reads %d\n",
+			    isc->current_read_bias, isc->trim_stats.queued,
+			    isc->read_stats.queued);
+		isc->current_read_bias--;
+		return NULL;
+	}
+
+	/*
+	 * See if our current limiter allows this I/O. Because we only call this
+	 * here, and not in next_trim, the 'bandwidth' limits for trims won't
+	 * work, while the iops or max queued limits will work. It's tricky
+	 * because we want the limits to be from the perspective of the
+	 * "commands sent to the device." To make iops work, we need to check
+	 * only here (since we want all the ops we combine to count as one). To
+	 * make bw limits work, we'd need to check in next_trim, but that would
+	 * have the effect of limiting the iops as seen from the upper layers.
+	 */
+	if (isc->speedup_ticks == 0 && cam_iosched_limiter_iop(&isc->trim_stats, bp) != 0) {
+		if (iosched_debug)
+			printf("Can't trim because limiter says no.\n");
+		isc->trim_stats.state_flags |= IOP_RATE_LIMITED;
+		return NULL;
 	}
+	if (isc->speedup_ticks > 0)
+		isc->current_read_bias = 1;
+	else
+		isc->current_read_bias = isc->read_bias;
+	isc->trim_stats.state_flags &= ~IOP_RATE_LIMITED;
+	/* cam_iosched_next_trim below keeps proper book */
 #endif
 	return cam_iosched_next_trim(isc);
 }
@@ -1496,6 +1544,60 @@
 cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp)
 {
 
+	/*
+	 * A BIO_SPEEDUP from the uppper layers means that they have a block
+	 * shortage. At the present, this is only sent when we're trying to
+	 * allocate blocks, but have a shortage before giving up. bio_length is
+	 * the size of their shortage. We will complete just enough BIO_DELETEs
+	 * in the queue to satisfy the need. If bio_length is 0, we'll complete
+	 * them all. This allows the scheduler to delay BIO_DELETEs to improve
+	 * read/write performance without worrying about the upper layers. When
+	 * it's possibly a problem, we respond by pretending the BIO_DELETEs
+	 * just worked. We can't do anything about the BIO_DELETEs in the
+	 * hardware, though. We have to wait for them to complete.
+	 */
+	if (bp->bio_cmd == BIO_SPEEDUP) {
+		off_t len;
+		struct bio *nbp;
+	
+		/*
+		 * Either request of 0 length puts us into a special mdoe.
+		 */
+		if (bp->bio_length == 0) {
+			isc->speedup_ticks = SPEEDUP_TICKS;
+			bp->bio_error = 0;
+			biodone(bp);
+			return;
+		}
+			
+		/*
+		 * Ignore non-trim speedup requests.
+		 */
+		if ((bp->bio_flags & BIO_SPEEDUP_TRIM) == 0) {
+			bp->bio_error = 0;
+			biodone(bp);
+			return;
+		}
+
+		len = 0;
+		while (bioq_first(&isc->trim_queue) &&
+		    (bp->bio_length == 0 || len < bp->bio_length)) {
+			nbp = bioq_takefirst(&isc->trim_queue);
+			len += nbp->bio_length;
+			nbp->bio_error = 0;
+			biodone(nbp);
+		}
+		if (bp->bio_length > 0) {
+			if (bp->bio_length > len)
+				bp->bio_resid = bp->bio_length - len;
+			else
+				bp->bio_resid = 0;
+		}
+		bp->bio_error = 0;
+		biodone(bp);
+		return;
+	}
+
 	/*
 	 * If we get a BIO_FLUSH, and we're doing delayed BIO_DELETEs then we
 	 * set the last tick time to one less than the current ticks minus the
@@ -1569,7 +1671,7 @@
 cam_iosched_trim_done(struct cam_iosched_softc *isc)
 {
 
-	isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+	isc->pend_trims--;
 }
 
 /*
@@ -1637,7 +1739,7 @@
 cam_iosched_submit_trim(struct cam_iosched_softc *isc)
 {
 
-	isc->flags |= CAM_IOSCHED_FLAG_TRIM_ACTIVE;
+	isc->pend_trims++;
 }
 
 /*
@@ -1863,7 +1965,7 @@
 	db_printf("in_reads:          %d\n", isc->read_stats.in);
 	db_printf("out_reads:         %d\n", isc->read_stats.out);
 	db_printf("queued_reads:      %d\n", isc->read_stats.queued);
-	db_printf("Current Q len      %d\n", biolen(&isc->bio_queue));
+	db_printf("Read Q len         %d\n", biolen(&isc->bio_queue));
 	db_printf("pending_writes:    %d\n", isc->write_stats.pending);
 	db_printf("min_writes:        %d\n", isc->write_stats.min);
 	db_printf("max_writes:        %d\n", isc->write_stats.max);
@@ -1871,7 +1973,7 @@
 	db_printf("in_writes:         %d\n", isc->write_stats.in);
 	db_printf("out_writes:        %d\n", isc->write_stats.out);
 	db_printf("queued_writes:     %d\n", isc->write_stats.queued);
-	db_printf("Current Q len      %d\n", biolen(&isc->write_queue));
+	db_printf("Write Q len        %d\n", biolen(&isc->write_queue));
 	db_printf("pending_trims:     %d\n", isc->trim_stats.pending);
 	db_printf("min_trims:         %d\n", isc->trim_stats.min);
 	db_printf("max_trims:         %d\n", isc->trim_stats.max);
@@ -1879,11 +1981,11 @@
 	db_printf("in_trims:          %d\n", isc->trim_stats.in);
 	db_printf("out_trims:         %d\n", isc->trim_stats.out);
 	db_printf("queued_trims:      %d\n", isc->trim_stats.queued);
-	db_printf("Current Q len      %d\n", biolen(&isc->trim_queue));
+	db_printf("Trim Q len         %d\n", biolen(&isc->trim_queue));
 	db_printf("read_bias:         %d\n", isc->read_bias);
 	db_printf("current_read_bias: %d\n", isc->current_read_bias);
-	db_printf("Trim active?       %s\n",
-	    (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no");
+	db_printf("Trims active       %d\n", isc->pend_trims);
+	db_printf("Max trims active   %d\n", isc->max_trims);
 }
 #endif
 #endif
Index: sys/geom/geom.h
===================================================================
--- sys/geom/geom.h
+++ sys/geom/geom.h
@@ -336,6 +336,7 @@
 int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
 int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
 int g_io_flush(struct g_consumer *cp);
+int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
 int g_register_classifier(struct g_classifier_hook *hook);
 void g_unregister_classifier(struct g_classifier_hook *hook);
 void g_io_request(struct bio *bp, struct g_consumer *cp);
Index: sys/geom/geom_io.c
===================================================================
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -338,6 +338,42 @@
 	return (error);
 }
 
+/*
+ * Send a BIO_SPEEDUP down the stack. It tells the lower layers that the upper
+ * layers have encountered a resource shortage. The lower layers are advised to
+ * stop delaying bio transactions that they might be holding for performance
+ * reasons and to schedule them (read/write/flush) or complete them successfully
+ * (trims) as quickly as it can. bio_length is the amount of the shortage.
+ * bio_resid is used to communicate back if the lower layers couldn't find
+ * bio_length worth of I/O to schedule or discard. A length of 0 means to do as
+ * much as you can (schedule the h/w queues full, discard all trims). flags are
+ * a hint from the upper layers to the lower layers what operation should be
+ * done. The call should be non-blocking.
+ */
+int
+g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
+{
+	struct bio *bp;
+	int error;
+
+	KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
+	    ("Invalid flags passed to g_io_speedup: %#x", flags));
+	g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
+	    shortage, flags);
+	bp = g_new_bio();
+	if (bp == NULL)
+		return (ENOMEM);
+	bp->bio_cmd = BIO_SPEEDUP;
+	bp->bio_length = shortage;
+	bp->bio_done = NULL;
+	bp->bio_flags |= flags;
+	g_io_request(bp, cp);
+	error = biowait(bp, "gflush");
+	*resid = bp->bio_resid;
+	g_destroy_bio(bp);
+	return (error);
+}
+
 int
 g_io_flush(struct g_consumer *cp)
 {
Index: sys/sys/bio.h
===================================================================
--- sys/sys/bio.h
+++ sys/sys/bio.h
@@ -53,6 +53,7 @@
 #define BIO_CMD1	0x07	/* Available for local hacks */
 #define BIO_CMD2	0x08	/* Available for local hacks */
 #define BIO_ZONE	0x09	/* Zone command */
+#define BIO_SPEEDUP	0x0a	/* Upper layers face shortage */
 
 /* bio_flags */
 #define BIO_ERROR	0x01	/* An error occurred processing this bio. */
@@ -67,6 +68,9 @@
 #define	BIO_TRANSIENT_MAPPING	0x20
 #define	BIO_VLIST	0x40
 
+#define BIO_SPEEDUP_WRITE	0x4000	/* Resource shortage at upper layers */
+#define BIO_SPEEDUP_TRIM	0x8000	/* Resource shortage at upper layers */
+
 #ifdef _KERNEL
 struct disk;
 struct bio;
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
--- sys/ufs/ffs/ffs_softdep.c
+++ sys/ufs/ffs/ffs_softdep.c
@@ -903,7 +903,6 @@
 	    int, struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
 	    struct pagedep **);
-static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
 static	int softdep_request_cleanup_flush(struct mount *, struct ufsmount *);
 static	void schedule_cleanup(struct mount *);
@@ -1256,9 +1255,6 @@
  */
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
-static int proc_waiting;	/* tracks whether we have a timeout posted */
-static int *stat_countp;	/* statistic to count in proc_waiting timeout */
-static struct callout softdep_callout;
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 static int req_clear_remove;	/* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1448,6 +1444,7 @@
 
 	LOCK_OWNED(ump);
 	worklist_speedup(ump->um_mountp);
+	g_io_speedup(0, BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
 	bd_speedup();
 	/*
 	 * If we have global shortages, then we need other
@@ -2429,9 +2426,6 @@
 	bioops.io_deallocate = softdep_deallocate_dependencies;
 	bioops.io_countdeps = softdep_count_dependencies;
 	softdep_ast_cleanup = softdep_ast_cleanup_proc;
-
-	/* Initialize the callout with an mtx. */
-	callout_init_mtx(&softdep_callout, &lk, 0);
 }
 
 /*
@@ -2448,8 +2442,6 @@
 	bioops.io_deallocate = NULL;
 	bioops.io_countdeps = NULL;
 	softdep_ast_cleanup = NULL;
-
-	callout_drain(&softdep_callout);
 }
 
 /*
@@ -13328,6 +13320,7 @@
 	struct ufsmount *ump;
 	struct mount *mp;
 	long starttime;
+	size_t resid;
 	ufs2_daddr_t needed;
 	int error, failed_vnode;
 
@@ -13402,14 +13395,17 @@
 	}
 	starttime = time_second;
 retry:
-	if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
+	if (resource == FLUSH_BLOCKS_WAIT &&
+	    fs->fs_cstotal.cs_nbfree <= needed)
+		g_io_speedup(needed * fs->fs_bsize, BIO_SPEEDUP_TRIM, &resid,
+		    ump->um_cp);
+	while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
 	    fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	    fs->fs_cstotal.cs_nifree <= needed)) {
 		ACQUIRE_LOCK(ump);
 		if (ump->softdep_on_worklist > 0 &&
-		    process_worklist_item(UFSTOVFS(ump),
-		    ump->softdep_on_worklist, LK_NOWAIT) != 0)
+		    process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0)
 			stat_worklist_push += 1;
 		FREE_LOCK(ump);
 	}
@@ -13640,11 +13636,7 @@
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
-	 * with removal dependencies. We would like to do the cleanup
-	 * here, but we probably hold an inode locked at this point and 
-	 * that might deadlock against one that we try to clean. So,
-	 * the best that we can do is request the syncer daemon to do
-	 * the cleanup for us.
+	 * with removal dependencies.
 	 */
 	switch (resource) {
 
@@ -13654,7 +13646,7 @@
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps += 1;
 		FREE_GBLLOCK(&lk);
-		stat_countp = &stat_ino_limit_hit;
+		clear_inodedeps(mp);
 		break;
 
 	case FLUSH_BLOCKS:
@@ -13663,50 +13655,16 @@
 		stat_blk_limit_push += 1;
 		req_clear_remove += 1;
 		FREE_GBLLOCK(&lk);
-		stat_countp = &stat_blk_limit_hit;
+		g_io_speedup(0, BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
+		clear_remove(mp);
 		break;
 
 	default:
 		panic("request_cleanup: unknown type");
 	}
-	/*
-	 * Hopefully the syncer daemon will catch up and awaken us.
-	 * We wait at most tickdelay before proceeding in any case.
-	 */
-	ACQUIRE_GBLLOCK(&lk);
-	FREE_LOCK(ump);
-	proc_waiting += 1;
-	if (callout_pending(&softdep_callout) == FALSE)
-		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
-		    pause_timer, 0);
-
-	if ((td->td_pflags & TDP_KTHREAD) == 0)
-		msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
-	proc_waiting -= 1;
-	FREE_GBLLOCK(&lk);
-	ACQUIRE_LOCK(ump);
 	return (1);
 }
 
-/*
- * Awaken processes pausing in request_cleanup and clear proc_waiting
- * to indicate that there is no longer a timer running. Pause_timer
- * will be called with the global softdep mutex (&lk) locked.
- */
-static void
-pause_timer(arg)
-	void *arg;
-{
-
-	GBLLOCK_OWNED(&lk);
-	/*
-	 * The callout_ API has acquired mtx and will hold it around this
-	 * function call.
-	 */
-	*stat_countp += proc_waiting;
-	wakeup(&proc_waiting);
-}
-
 /*
  * If requested, try removing inode or removal dependencies.
  */
@@ -13730,14 +13688,12 @@
 		FREE_GBLLOCK(&lk);
 		clear_inodedeps(mp);
 		ACQUIRE_GBLLOCK(&lk);
-		wakeup(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		req_clear_remove -= 1;
 		FREE_GBLLOCK(&lk);
 		clear_remove(mp);
 		ACQUIRE_GBLLOCK(&lk);
-		wakeup(&proc_waiting);
 	}
 	FREE_GBLLOCK(&lk);
 }