Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F131661772
D2789.id6133.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D2789.id6133.diff
View Options
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -847,6 +847,7 @@
zbookmark_phys_t l2rcb_zb; /* original bookmark */
int l2rcb_flags; /* original flags */
enum zio_compress l2rcb_compress; /* applied compress */
+ void *l2rcb_data; /* temporary buffer */
} l2arc_read_callback_t;
typedef struct l2arc_write_callback {
@@ -861,8 +862,8 @@
/* compression applied to buffer data */
enum zio_compress b_compress;
/* real alloc'd buffer size depending on b_compress applied */
- int b_asize;
- /* temporary buffer holder for in-flight compressed data */
+ uint64_t b_asize;
+ /* temporary buffer holder for in-flight compressed or padded data */
void *b_tmp_cdata;
};
@@ -889,7 +890,7 @@
static void l2arc_hdr_stat_add(void);
static void l2arc_hdr_stat_remove(void);
-static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *);
+static boolean_t l2arc_transform_buf(l2arc_buf_hdr_t *, boolean_t);
static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
@@ -1716,6 +1717,7 @@
arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ size_t align, asize, len;
ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
@@ -1723,9 +1725,12 @@
return;
ASSERT(HDR_L2_WRITING(hdr));
- arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
- zio_data_buf_free);
+
ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
+ len = hdr->b_size;
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ arc_buf_free_on_write(l2hdr->b_tmp_cdata, asize, zio_data_buf_free);
l2hdr->b_tmp_cdata = NULL;
}
@@ -3545,6 +3550,7 @@
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
+ void* b_data;
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
@@ -3557,6 +3563,14 @@
cb->l2rcb_zb = *zb;
cb->l2rcb_flags = zio_flags;
cb->l2rcb_compress = b_compress;
+ if (b_asize > hdr->b_size) {
+ ASSERT3U(b_compress, ==,
+ ZIO_COMPRESS_OFF);
+ b_data = zio_data_buf_alloc(b_asize);
+ cb->l2rcb_data = b_data;
+ } else {
+ b_data = buf->b_data;
+ }
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
addr + size < vd->vdev_psize -
@@ -3569,6 +3583,7 @@
* was squashed to zero size by compression.
*/
if (b_compress == ZIO_COMPRESS_EMPTY) {
+ ASSERT3U(b_asize, ==, 0);
rzio = zio_null(pio, spa, vd,
l2arc_read_done, cb,
zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -3577,7 +3592,7 @@
ZIO_FLAG_DONT_RETRY);
} else {
rzio = zio_read_phys(pio, vd, addr,
- b_asize, buf->b_data,
+ b_asize, b_data,
ZIO_CHECKSUM_OFF,
l2arc_read_done, cb, priority,
zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -4788,9 +4803,9 @@
abl2 = hdr->b_l2hdr;
/*
- * Release the temporary compressed buffer as soon as possible.
+ * Release the temporary buffer as soon as possible.
*/
- if (abl2->b_compress != ZIO_COMPRESS_OFF)
+ if (abl2->b_tmp_cdata != NULL)
l2arc_release_cdata_buf(hdr);
hash_lock = HDR_LOCK(hdr);
@@ -4867,6 +4882,32 @@
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
/*
+ * If the data was read into a temporary buffer,
+ * move it and free the buffer.
+ */
+ if (cb->l2rcb_data != NULL) {
+ ASSERT3U(hdr->b_size, <, zio->io_size);
+ ASSERT3U(cb->l2rcb_compress, ==, ZIO_COMPRESS_OFF);
+ if (zio->io_error == 0)
+ bcopy(cb->l2rcb_data, buf->b_data, hdr->b_size);
+
+ /*
+ * The following must be done regardless of whether
+ * there was an error:
+ * - free the temporary buffer
+ * - point zio to the real ARC buffer
+ * - set zio size accordingly
+ * These are required because zio is either re-used for
+ * an I/O of the block in the case of the error
+ * or the zio is passed to arc_read_done() and it
+ * needs real data.
+ */
+ zio_data_buf_free(cb->l2rcb_data, zio->io_size);
+ zio->io_size = zio->io_orig_size = hdr->b_size;
+ zio->io_data = zio->io_orig_data = buf->b_data;
+ }
+
+ /*
* If the buffer was compressed, decompress it first.
*/
if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
@@ -5174,6 +5215,7 @@
kmutex_t *hash_lock;
uint64_t buf_sz;
uint64_t buf_a_sz;
+ size_t align;
if (arc_warm == B_FALSE)
hdr_prev = list_next(list, hdr);
@@ -5211,7 +5253,8 @@
* disk block size.
*/
buf_sz = hdr->b_size;
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+ align = (size_t)1 << dev->l2ad_vdev->vdev_ashift;
+ buf_a_sz = P2ROUNDUP(buf_sz, align);
if ((write_asize + buf_a_sz) > target_sz) {
full = B_TRUE;
@@ -5288,27 +5331,16 @@
}
/*
- * Note that elsewhere in this file arcstat_l2_asize
- * and the used space on l2ad_vdev are updated using b_asize,
- * which is not necessarily rounded up to the device block size.
- * Too keep accounting consistent we do the same here as well:
- * stats_size accumulates the sum of b_asize of the written buffers,
- * while write_asize accumulates the sum of b_asize rounded up
- * to the device block size.
- * The latter sum is used only to validate the corectness of the code.
- */
- uint64_t stats_size = 0;
- write_asize = 0;
-
- /*
* Now start writing the buffers. We're starting at the write head
* and work backwards, retracing the course of the buffer selector
* loop above.
*/
+ write_asize = 0;
for (hdr = list_prev(dev->l2ad_buflist, head); hdr;
hdr = list_prev(dev->l2ad_buflist, hdr)) {
l2arc_buf_hdr_t *l2hdr;
uint64_t buf_sz;
+ boolean_t compress;
/*
* We shouldn't need to lock the buffer here, since we flagged
@@ -5320,36 +5352,36 @@
l2hdr = hdr->b_l2hdr;
l2hdr->b_daddr = dev->l2ad_hand;
- if ((hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
- l2hdr->b_asize >= buf_compress_minsz) {
- if (l2arc_compress_buf(l2hdr)) {
- /*
- * If compression succeeded, enable headroom
- * boost on the next scan cycle.
- */
- *headroom_boost = B_TRUE;
- }
- }
-
/*
- * Pick up the buffer data we had previously stashed away
- * (and now potentially also compressed).
+ * Save a pointer to the original buffer data we had previously
+ * stashed away.
*/
buf_data = l2hdr->b_tmp_cdata;
- buf_sz = l2hdr->b_asize;
+
+ compress = (hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
+ l2hdr->b_asize >= buf_compress_minsz;
+ if (l2arc_transform_buf(l2hdr, compress)) {
+ /*
+ * If compression succeeded, enable headroom
+ * boost on the next scan cycle.
+ */
+ *headroom_boost = B_TRUE;
+ }
/*
- * If the data has not been compressed, then clear b_tmp_cdata
- * to make sure that it points only to a temporary compression
- * buffer.
+ * Get the new buffer size that accounts for compression
+ * and padding.
*/
- if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
- l2hdr->b_tmp_cdata = NULL;
+ buf_sz = l2hdr->b_asize;
/* Compression may have squashed the buffer to zero length. */
if (buf_sz != 0) {
- uint64_t buf_a_sz;
-
+ /*
+ * If the data was padded or compressed, then it
+ * it is in a new buffer.
+ */
+ if (l2hdr->b_tmp_cdata != NULL)
+ buf_data = l2hdr->b_tmp_cdata;
wzio = zio_write_phys(pio, dev->l2ad_vdev,
dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
@@ -5359,14 +5391,8 @@
zio_t *, wzio);
(void) zio_nowait(wzio);
- stats_size += buf_sz;
-
- /*
- * Keep the clock hand suitably device-aligned.
- */
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
- write_asize += buf_a_sz;
- dev->l2ad_hand += buf_a_sz;
+ write_asize += buf_sz;
+ dev->l2ad_hand += buf_sz;
}
}
@@ -5376,8 +5402,8 @@
ARCSTAT_BUMP(arcstat_l2_writes_sent);
ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
ARCSTAT_INCR(arcstat_l2_size, write_sz);
- ARCSTAT_INCR(arcstat_l2_asize, stats_size);
- vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
+ ARCSTAT_INCR(arcstat_l2_asize, write_asize);
+ vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
/*
* Bump device hand to the device start if it is approaching the end.
@@ -5397,12 +5423,18 @@
}
/*
- * Compresses an L2ARC buffer.
+ * Transforms, possibly compresses and pads, an L2ARC buffer.
* The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
* size in l2hdr->b_asize. This routine tries to compress the data and
* depending on the compression result there are three possible outcomes:
- * *) The buffer was incompressible. The original l2hdr contents were left
- * untouched and are ready for writing to an L2 device.
+ * *) The buffer was incompressible. The buffer size was already ashift aligned.
+ * The original l2hdr contents were left untouched except for b_tmp_cdata,
+ * which is reset to NULL. The caller must keep a pointer to the original
+ * data.
+ * *) The buffer was incompressible. The buffer size was not ashift aligned.
+ * b_tmp_cdata was replaced with a temporary data buffer which holds a padded
+ * (aligned) copy of the data. Once writing is done, invoke
+ * l2arc_release_cdata_buf on this l2hdr to free the temporary buffer.
* *) The buffer was all-zeros, so there is no need to write it to an L2
* device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
* set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
@@ -5416,22 +5448,27 @@
* buffer was incompressible).
*/
static boolean_t
-l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
+l2arc_transform_buf(l2arc_buf_hdr_t *l2hdr, boolean_t compress)
{
void *cdata;
- size_t csize, len, rounded;
+ size_t align, asize, csize, len, rounded;
ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
ASSERT(l2hdr->b_tmp_cdata != NULL);
len = l2hdr->b_asize;
- cdata = zio_data_buf_alloc(len);
- csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
- cdata, l2hdr->b_asize);
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ cdata = zio_data_buf_alloc(asize);
+ if (compress)
+ csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
+ cdata, len);
+ else
+ csize = len;
if (csize == 0) {
/* zero block, indicate that there's nothing to write */
- zio_data_buf_free(cdata, len);
+ zio_data_buf_free(cdata, asize);
l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
l2hdr->b_asize = 0;
l2hdr->b_tmp_cdata = NULL;
@@ -5439,8 +5476,7 @@
return (B_TRUE);
}
- rounded = P2ROUNDUP(csize,
- (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift);
+ rounded = P2ROUNDUP(csize, align);
if (rounded < len) {
/*
* Compression succeeded, we'll keep the cdata around for
@@ -5451,16 +5487,35 @@
csize = rounded;
}
l2hdr->b_compress = ZIO_COMPRESS_LZ4;
- l2hdr->b_asize = csize;
+ l2hdr->b_asize = rounded;
l2hdr->b_tmp_cdata = cdata;
ARCSTAT_BUMP(arcstat_l2_compress_successes);
return (B_TRUE);
} else {
/*
- * Compression failed, release the compressed buffer.
- * l2hdr will be left unmodified.
+ * Compression did not save space.
*/
- zio_data_buf_free(cdata, len);
+ if (P2PHASE(len, align) != 0) {
+ /*
+ * Use compression buffer for a copy of data padded to
+ * the proper size. Compression algorithm remains set
+ * to ZIO_COMPRESS_OFF.
+ */
+ ASSERT3U(len, <, asize);
+ bcopy(l2hdr->b_tmp_cdata, cdata, len);
+ bzero((char *)cdata + len, asize - len);
+ l2hdr->b_asize = asize;
+ l2hdr->b_tmp_cdata = cdata;
+ } else {
+ ASSERT3U(len, ==, asize);
+ /*
+ * The original buffer is good as is,
+ * release the compressed buffer.
+ * l2hdr will be left unmodified except for b_tmp_cdata.
+ */
+ zio_data_buf_free(cdata, asize);
+ l2hdr->b_tmp_cdata = NULL;
+ }
ARCSTAT_BUMP(arcstat_l2_compress_failures);
return (B_FALSE);
}
@@ -5530,7 +5585,7 @@
/*
* Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
- * This buffer serves as a temporary holder of compressed data while
+ * This buffer serves as a temporary holder of compressed or padded data while
* the buffer entry is being written to an l2arc device. Once that is
* done, we can dispose of it.
*/
@@ -5538,18 +5593,14 @@
l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ size_t align, asize, len;
- ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
- if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
- /*
- * If the data was compressed, then we've allocated a
- * temporary buffer for it, so now we need to release it.
- */
- ASSERT(l2hdr->b_tmp_cdata != NULL);
- zio_data_buf_free(l2hdr->b_tmp_cdata, hdr->b_size);
+ if (l2hdr->b_tmp_cdata != NULL) {
+ len = hdr->b_size;
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ zio_data_buf_free(l2hdr->b_tmp_cdata, asize);
l2hdr->b_tmp_cdata = NULL;
- } else {
- ASSERT(l2hdr->b_tmp_cdata == NULL);
}
}
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -2730,11 +2730,13 @@
ASSERT0(P2PHASE(zio->io_size, align));
} else {
/*
- * For physical writes, we allow 512b aligned writes and assume
- * the device will perform a read-modify-write as necessary.
+ * For the physical io we allow alignment
+ * to a logical block size.
*/
- ASSERT0(P2PHASE(zio->io_offset, SPA_MINBLOCKSIZE));
- ASSERT0(P2PHASE(zio->io_size, SPA_MINBLOCKSIZE));
+ uint64_t log_align =
+ 1ULL << vd->vdev_top->vdev_logical_ashift;
+ ASSERT0(P2PHASE(zio->io_offset, log_align));
+ ASSERT0(P2PHASE(zio->io_size, log_align));
}
VERIFY(zio->io_type == ZIO_TYPE_READ || spa_writeable(spa));
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Oct 11, 3:28 AM (14 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23560982
Default Alt Text
D2789.id6133.diff (13 KB)
Attached To
Mode
D2789: l2arc: make sure that all writes honor ashift of a cache device
Attached
Detach File
Event Timeline
Log In to Comment