Page MenuHomeFreeBSD

D18351.id51300.diff
No OneTemporary

D18351.id51300.diff

Index: sys/cam/cam_iosched.c
===================================================================
--- sys/cam/cam_iosched.c
+++ sys/cam/cam_iosched.c
@@ -286,6 +286,8 @@
int current_read_bias; /* Current read bias state */
int total_ticks;
int load; /* EMA of 'load average' of disk / 2^16 */
+ int speedup_ticks; /* When != 0, don't delay I/O for performance */
+#define SPEEDUP_TICKS 11
struct bio_queue_head write_queue;
struct iop_stats read_stats, write_stats, trim_stats;
@@ -574,6 +576,11 @@
isc->this_frac = (uint32_t)delta >> 16; /* Note: discards seconds -- should be 0 harmless if not */
isc->last_time = now;
+ if (isc->speedup_ticks > 0) {
+ isc->current_read_bias = 1;
+ isc->speedup_ticks--;
+ }
+
cam_iosched_cl_maybe_steer(&isc->cl);
cam_iosched_limiter_tick(&isc->read_stats);
@@ -1311,7 +1318,7 @@
/*
* See if our current limiter allows this I/O.
*/
- if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
+ if (isc->speedup_ticks == 0 && cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
if (iosched_debug)
printf("Can't write because limiter says no.\n");
isc->write_stats.state_flags |= IOP_RATE_LIMITED;
@@ -1322,7 +1329,10 @@
* Let's do this: We've passed all the gates and we're a go
* to schedule the I/O in the SIM.
*/
- isc->current_read_bias = isc->read_bias;
+ if (isc->speedup_ticks > 0)
+ isc->current_read_bias = 1;
+ else
+ isc->current_read_bias = isc->read_bias;
bioq_remove(&isc->write_queue, bp);
if (bp->bio_cmd == BIO_WRITE) {
isc->write_stats.queued--;
@@ -1496,6 +1506,60 @@
cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp)
{
+ /*
+ * A BIO_SPEEDUP from the uppper layers means that they have a block
+ * shortage. At the present, this is only sent when we're trying to
+ * allocate blocks, but have a shortage before giving up. bio_length is
+ * the size of their shortage. We will complete just enough BIO_DELETEs
+ * in the queue to satisfy the need. If bio_length is 0, we'll complete
+ * them all. This allows the scheduler to delay BIO_DELETEs to improve
+ * read/write performance without worrying about the upper layers. When
+ * it's possibly a problem, we respond by pretending the BIO_DELETEs
+ * just worked. We can't do anything about the BIO_DELETEs in the
+ * hardware, though. We have to wait for them to complete.
+ */
+ if (bp->bio_cmd == BIO_SPEEDUP) {
+ off_t len;
+ struct bio *nbp;
+
+ /*
+ * Either request of 0 length puts us into a special mdoe.
+ */
+ if (bp->bio_length == 0) {
+ isc->speedup_ticks = SPEEDUP_TICKS;
+ bp->bio_error = 0;
+ biodone(bp);
+ return;
+ }
+
+ /*
+ * Ignore non-trim speedup requests.
+ */
+ if ((bp->bio_flags & BIO_SPEEDUP_TRIM) == 0) {
+ bp->bio_error = 0;
+ biodone(bp);
+ return;
+ }
+
+ len = 0;
+ while (bioq_first(&isc->trim_queue) &&
+ (bp->bio_length == 0 || len < bp->bio_length)) {
+ nbp = bioq_takefirst(&isc->trim_queue);
+ len += nbp->bio_length;
+ nbp->bio_error = 0;
+ biodone(nbp);
+ }
+ if (bp->bio_length > 0) {
+ if (bp->bio_length > len)
+ bp->bio_resid = bp->bio_length - len;
+ else
+ bp->bio_resid = 0;
+ }
+ bp->bio_error = 0;
+ biodone(bp);
+ return;
+ }
+
/*
* If we get a BIO_FLUSH, and we're doing delayed BIO_DELETEs then we
* set the last tick time to one less than the current ticks minus the
Index: sys/geom/geom.h
===================================================================
--- sys/geom/geom.h
+++ sys/geom/geom.h
@@ -336,6 +336,7 @@
int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
int g_io_flush(struct g_consumer *cp);
+int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
int g_register_classifier(struct g_classifier_hook *hook);
void g_unregister_classifier(struct g_classifier_hook *hook);
void g_io_request(struct bio *bp, struct g_consumer *cp);
Index: sys/geom/geom_io.c
===================================================================
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -338,6 +338,42 @@
return (error);
}
+/*
+ * Send a BIO_SPEEDUP down the stack. It tells the lower layers that the upper
+ * layers have encountered a resource shortage. The lower layers are advised to
+ * stop delaying bio transactions that they might be holding for performance
+ * reasons and to schedule them (read/write/flush) or complete them successfully
+ * (trims) as quickly as it can. bio_length is the amount of the shortage.
+ * bio_resid is used to communicate back if the lower layers couldn't find
+ * bio_length worth of I/O to schedule or discard. A length of 0 means to do as
+ * much as you can (schedule the h/w queues full, discard all trims). flags are
+ * a hint from the upper layers to the lower layers what operation should be
+ * done. The call should be non-blocking.
+ */
+int
+g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
+{
+ struct bio *bp;
+ int error;
+
+ KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
+ ("Invalid flags passed to g_io_speedup: %#x", flags));
+ g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
+ shortage, flags);
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_SPEEDUP;
+ bp->bio_length = shortage;
+ bp->bio_done = NULL;
+ bp->bio_flags |= flags;
+ g_io_request(bp, cp);
+ error = biowait(bp, "gflush");
+ *resid = bp->bio_resid;
+ g_destroy_bio(bp);
+ return (error);
+}
+
int
g_io_flush(struct g_consumer *cp)
{
Index: sys/sys/bio.h
===================================================================
--- sys/sys/bio.h
+++ sys/sys/bio.h
@@ -53,6 +53,7 @@
#define BIO_CMD1 0x07 /* Available for local hacks */
#define BIO_CMD2 0x08 /* Available for local hacks */
#define BIO_ZONE 0x09 /* Zone command */
+#define BIO_SPEEDUP 0x0a /* Upper layers face shortage */
/* bio_flags */
#define BIO_ERROR 0x01 /* An error occurred processing this bio. */
@@ -67,6 +68,9 @@
#define BIO_TRANSIENT_MAPPING 0x20
#define BIO_VLIST 0x40
+#define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */
+#define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */
+
#ifdef _KERNEL
struct disk;
struct bio;
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
--- sys/ufs/ffs/ffs_softdep.c
+++ sys/ufs/ffs/ffs_softdep.c
@@ -903,7 +903,6 @@
int, struct pagedep **);
static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
struct pagedep **);
-static void pause_timer(void *);
static int request_cleanup(struct mount *, int);
static int softdep_request_cleanup_flush(struct mount *, struct ufsmount *);
static void schedule_cleanup(struct mount *);
@@ -1256,9 +1255,6 @@
*/
static int max_softdeps; /* maximum number of structs before slowdown */
static int tickdelay = 2; /* number of ticks to pause during slowdown */
-static int proc_waiting; /* tracks whether we have a timeout posted */
-static int *stat_countp; /* statistic to count in proc_waiting timeout */
-static struct callout softdep_callout;
static int req_clear_inodedeps; /* syncer process flush some inodedeps */
static int req_clear_remove; /* syncer process flush some freeblks */
static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1448,6 +1444,7 @@
LOCK_OWNED(ump);
worklist_speedup(ump->um_mountp);
+ g_io_speedup(0, BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
bd_speedup();
/*
* If we have global shortages, then we need other
@@ -2429,9 +2426,6 @@
bioops.io_deallocate = softdep_deallocate_dependencies;
bioops.io_countdeps = softdep_count_dependencies;
softdep_ast_cleanup = softdep_ast_cleanup_proc;
-
- /* Initialize the callout with an mtx. */
- callout_init_mtx(&softdep_callout, &lk, 0);
}
/*
@@ -2448,8 +2442,6 @@
bioops.io_deallocate = NULL;
bioops.io_countdeps = NULL;
softdep_ast_cleanup = NULL;
-
- callout_drain(&softdep_callout);
}
/*
@@ -13328,6 +13320,7 @@
struct ufsmount *ump;
struct mount *mp;
long starttime;
+ size_t resid;
ufs2_daddr_t needed;
int error, failed_vnode;
@@ -13402,6 +13395,10 @@
}
starttime = time_second;
retry:
+ if (resource == FLUSH_BLOCKS_WAIT &&
+ fs->fs_cstotal.cs_nbfree <= needed)
+ g_io_speedup(needed * fs->fs_bsize, BIO_SPEEDUP_TRIM, &resid,
+ ump->um_cp);
if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
fs->fs_cstotal.cs_nbfree <= needed) ||
(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
@@ -13640,11 +13637,7 @@
* If we are resource constrained on inode dependencies, try
* flushing some dirty inodes. Otherwise, we are constrained
* by file deletions, so try accelerating flushes of directories
- * with removal dependencies. We would like to do the cleanup
- * here, but we probably hold an inode locked at this point and
- * that might deadlock against one that we try to clean. So,
- * the best that we can do is request the syncer daemon to do
- * the cleanup for us.
+ * with removal dependencies.
*/
switch (resource) {
@@ -13654,7 +13647,7 @@
stat_ino_limit_push += 1;
req_clear_inodedeps += 1;
FREE_GBLLOCK(&lk);
- stat_countp = &stat_ino_limit_hit;
+ clear_inodedeps(mp);
break;
case FLUSH_BLOCKS:
@@ -13663,50 +13656,16 @@
stat_blk_limit_push += 1;
req_clear_remove += 1;
FREE_GBLLOCK(&lk);
- stat_countp = &stat_blk_limit_hit;
+ g_io_speedup(0, BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE, NULL, ump->um_cp);
+ clear_remove(mp);
break;
default:
panic("request_cleanup: unknown type");
}
- /*
- * Hopefully the syncer daemon will catch up and awaken us.
- * We wait at most tickdelay before proceeding in any case.
- */
- ACQUIRE_GBLLOCK(&lk);
- FREE_LOCK(ump);
- proc_waiting += 1;
- if (callout_pending(&softdep_callout) == FALSE)
- callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
- pause_timer, 0);
-
- if ((td->td_pflags & TDP_KTHREAD) == 0)
- msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
- proc_waiting -= 1;
- FREE_GBLLOCK(&lk);
- ACQUIRE_LOCK(ump);
return (1);
}
-/*
- * Awaken processes pausing in request_cleanup and clear proc_waiting
- * to indicate that there is no longer a timer running. Pause_timer
- * will be called with the global softdep mutex (&lk) locked.
- */
-static void
-pause_timer(arg)
- void *arg;
-{
-
- GBLLOCK_OWNED(&lk);
- /*
- * The callout_ API has acquired mtx and will hold it around this
- * function call.
- */
- *stat_countp += proc_waiting;
- wakeup(&proc_waiting);
-}
-
/*
* If requested, try removing inode or removal dependencies.
*/
@@ -13730,14 +13689,12 @@
FREE_GBLLOCK(&lk);
clear_inodedeps(mp);
ACQUIRE_GBLLOCK(&lk);
- wakeup(&proc_waiting);
}
if (req_clear_remove) {
req_clear_remove -= 1;
FREE_GBLLOCK(&lk);
clear_remove(mp);
ACQUIRE_GBLLOCK(&lk);
- wakeup(&proc_waiting);
}
FREE_GBLLOCK(&lk);
}

File Metadata

Mime Type
text/plain
Expires
Mon, Dec 15, 6:39 AM (1 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26977808
Default Alt Text
D18351.id51300.diff (10 KB)

Event Timeline