Index: sys/geom/geom_io.c
===================================================================
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -688,7 +688,7 @@
 	bp->bio_driver2 = NULL;
 	bp->bio_pflags = 0;
 	g_io_request(bp, cp);
-	pace++;
+	atomic_add_int(&pace, 1);
 	return;
 }
 
@@ -777,10 +777,31 @@
 		}
 		CTR0(KTR_GEOM, "g_down has work to do");
 		g_bioq_unlock(&g_bio_run_down);
-		if (pace > 0) {
+		if (atomic_readandclear_int(&pace) > 0) {
+			/*
+			 * There have been at least one memory allocation
+			 * failure since the last I/O completed. Pause 1ms to
+			 * give the system a chance to free up memory. We only
+			 * do this once because a large number of allocations
+			 * can fail in the direct dispatch case and there's no
+			 * relationship between the number of these failures and
+			 * the length of the outage. If there's still an outage,
+			 * we'll pause again and again until it's
+			 * resolved. Older versions paused longer and once per
+			 * allocation failure. This was OK for a single threaded
+			 * g_down, but with direct dispatch would lead to max of
+			 * 10 IOPs for minutes at a time when transient memory
+			 * issues prevented allocation for a batch of requests
+			 * from the upper layers.
+			 *
+			 * XXX This pacing is really lame. It needs to be solve
+			 * by other methods. This is OK, but in the worst case
+			 * scenario all memory is tied up waiting for I/O to
+			 * complete which can never happen since we can't
+			 * allocate bios for that I/O.
+			 */
 			CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace);
-			pause("g_down", hz/10);
-			pace--;
+			pause("g_down", min(hz/1000, 1));
 		}
 		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
 		    bp->bio_to->name);