Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F136990070
D21895.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
D21895.diff
View Options
Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
*/
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -62,14 +63,15 @@
&zfs_nopwrite_enabled, 0, "Enable nopwrite feature");
/*
- * Tunable to control percentage of dirtied blocks from frees in one TXG.
- * After this threshold is crossed, additional dirty blocks from frees
- * wait until the next TXG.
+ * Tunable to control percentage of dirtied L1 blocks from frees allowed into
+ * one TXG. After this threshold is crossed, additional dirty blocks from frees
+ * will wait until the next TXG.
* A value of zero will disable this throttle.
*/
-uint32_t zfs_per_txg_dirty_frees_percent = 30;
+uint32_t zfs_per_txg_dirty_frees_percent = 5;
SYSCTL_INT(_vfs_zfs, OID_AUTO, per_txg_dirty_frees_percent, CTLFLAG_RWTUN,
- &zfs_per_txg_dirty_frees_percent, 0, "Percentage of dirtied blocks from frees in one txg");
+ &zfs_per_txg_dirty_frees_percent, 0,
+ "Percentage of dirtied indirect blocks from frees allowed in one txg");
/*
* This can be used for testing, to ensure that certain actions happen
@@ -683,11 +685,13 @@
*
* On input, *start should be the first offset that does not need to be
* freed (e.g. "offset + length"). On return, *start will be the first
- * offset that should be freed.
+ * offset that should be freed and l1blks is set to the number of level 1
+ * indirect blocks found within the chunk.
*/
static int
-get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
+get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks)
{
+ uint64_t blks;
uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
/* bytes of data covered by a level-1 indirect block */
uint64_t iblkrange =
@@ -695,13 +699,23 @@
ASSERT3U(minimum, <=, *start);
- if (*start - minimum <= iblkrange * maxblks) {
+ /*
+ * Check if we can free the entire range assuming that all of the
+ * L1 blocks in this range have data. If we can, we use this
+ * worst case value as an estimate so we can avoid having to look
+ * at the object's actual data.
+ */
+ uint64_t total_l1blks =
+ (roundup(*start, iblkrange) - (minimum / iblkrange * iblkrange)) /
+ iblkrange;
+ if (total_l1blks <= maxblks) {
+ *l1blks = total_l1blks;
*start = minimum;
return (0);
}
ASSERT(ISP2(iblkrange));
- for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
+ for (blks = 0; *start > minimum && blks < maxblks; blks++) {
int err;
/*
@@ -711,6 +725,7 @@
* to search.
*/
(*start)--;
+
err = dnode_next_offset(dn,
DNODE_FIND_BACKWARDS, start, 2, 1, 0);
@@ -719,6 +734,7 @@
*start = minimum;
break;
} else if (err != 0) {
+ *l1blks = blks;
return (err);
}
@@ -727,6 +743,8 @@
}
if (*start < minimum)
*start = minimum;
+ *l1blks = blks;
+
return (0);
}
@@ -762,14 +780,14 @@
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
- dirty_frees_threshold = zfs_dirty_data_max / 4;
+ dirty_frees_threshold = zfs_dirty_data_max / 20;
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
while (length != 0) {
uint64_t chunk_end, chunk_begin, chunk_len;
- uint64_t long_free_dirty_all_txgs = 0;
+ uint64_t l1blks;
dmu_tx_t *tx;
if (dmu_objset_zfs_unmounting(dn->dn_objset))
@@ -778,7 +796,7 @@
chunk_end = chunk_begin = offset + length;
/* move chunk_begin backwards to the beginning of this chunk */
- err = get_next_chunk(dn, &chunk_begin, offset);
+ err = get_next_chunk(dn, &chunk_begin, offset, &l1blks);
if (err)
return (err);
ASSERT3U(chunk_begin, >=, offset);
@@ -786,24 +804,6 @@
chunk_len = chunk_end - chunk_begin;
- mutex_enter(&dp->dp_lock);
- for (int t = 0; t < TXG_SIZE; t++) {
- long_free_dirty_all_txgs +=
- dp->dp_long_free_dirty_pertxg[t];
- }
- mutex_exit(&dp->dp_lock);
-
- /*
- * To avoid filling up a TXG with just frees wait for
- * the next TXG to open before freeing more chunks if
- * we have reached the threshold of frees
- */
- if (dirty_frees_threshold != 0 &&
- long_free_dirty_all_txgs >= dirty_frees_threshold) {
- txg_wait_open(dp, 0);
- continue;
- }
-
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_len);
@@ -818,13 +818,42 @@
return (err);
}
+ uint64_t txg = dmu_tx_get_txg(tx);
+
mutex_enter(&dp->dp_lock);
- dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
- chunk_len;
+ uint64_t long_free_dirty =
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK];
mutex_exit(&dp->dp_lock);
+
+ /*
+ * To avoid filling up a TXG with just frees, wait for
+ * the next TXG to open before freeing more chunks if
+ * we have reached the threshold of frees.
+ */
+ if (dirty_frees_threshold != 0 &&
+ long_free_dirty >= dirty_frees_threshold) {
+ dmu_tx_commit(tx);
+ txg_wait_open(dp, 0);
+ continue;
+ }
+
+ /*
+ * In order to prevent unnecessary write throttling, for each
+ * TXG, we track the cumulative size of L1 blocks being dirtied
+ * in dnode_free_range() below. We compare this number to a
+ * tunable threshold, past which we prevent new L1 dirty freeing
+ * blocks from being added into the open TXG. See
+ * dmu_free_long_range_impl() for details. The threshold
+ * prevents write throttle activation due to dirty freeing L1
+ * blocks taking up a large percentage of zfs_dirty_data_max.
+ */
+ mutex_enter(&dp->dp_lock);
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] +=
+ l1blks << dn->dn_indblkshift;
+ mutex_exit(&dp->dp_lock);
DTRACE_PROBE3(free__long__range,
- uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
- uint64_t, dmu_tx_get_txg(tx));
+ uint64_t, long_free_dirty, uint64_t, chunk_len,
+ uint64_t, txg);
dnode_free_range(dn, chunk_begin, chunk_len, tx);
dmu_tx_commit(tx);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 21, 10:36 PM (5 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25817834
Default Alt Text
D21895.diff (6 KB)
Attached To
Mode
D21895: MFZol: Fix performance of "zfs recv" with many deletions
Attached
Detach File
Event Timeline
Log In to Comment