Page MenuHomeFreeBSD

D2789.id6133.diff
No OneTemporary

D2789.id6133.diff

Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -847,6 +847,7 @@
zbookmark_phys_t l2rcb_zb; /* original bookmark */
int l2rcb_flags; /* original flags */
enum zio_compress l2rcb_compress; /* applied compress */
+ void *l2rcb_data; /* temporary buffer */
} l2arc_read_callback_t;
typedef struct l2arc_write_callback {
@@ -861,8 +862,8 @@
/* compression applied to buffer data */
enum zio_compress b_compress;
/* real alloc'd buffer size depending on b_compress applied */
- int b_asize;
- /* temporary buffer holder for in-flight compressed data */
+ uint64_t b_asize;
+ /* temporary buffer holder for in-flight compressed or padded data */
void *b_tmp_cdata;
};
@@ -889,7 +890,7 @@
static void l2arc_hdr_stat_add(void);
static void l2arc_hdr_stat_remove(void);
-static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *);
+static boolean_t l2arc_transform_buf(l2arc_buf_hdr_t *, boolean_t);
static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
@@ -1716,6 +1717,7 @@
arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ size_t align, asize, len;
ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
@@ -1723,9 +1725,12 @@
return;
ASSERT(HDR_L2_WRITING(hdr));
- arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
- zio_data_buf_free);
+
ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
+ len = hdr->b_size;
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ arc_buf_free_on_write(l2hdr->b_tmp_cdata, asize, zio_data_buf_free);
l2hdr->b_tmp_cdata = NULL;
}
@@ -3545,6 +3550,7 @@
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
+ void* b_data;
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
@@ -3557,6 +3563,14 @@
cb->l2rcb_zb = *zb;
cb->l2rcb_flags = zio_flags;
cb->l2rcb_compress = b_compress;
+ if (b_asize > hdr->b_size) {
+ ASSERT3U(b_compress, ==,
+ ZIO_COMPRESS_OFF);
+ b_data = zio_data_buf_alloc(b_asize);
+ cb->l2rcb_data = b_data;
+ } else {
+ b_data = buf->b_data;
+ }
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
addr + size < vd->vdev_psize -
@@ -3569,6 +3583,7 @@
* was squashed to zero size by compression.
*/
if (b_compress == ZIO_COMPRESS_EMPTY) {
+ ASSERT3U(b_asize, ==, 0);
rzio = zio_null(pio, spa, vd,
l2arc_read_done, cb,
zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -3577,7 +3592,7 @@
ZIO_FLAG_DONT_RETRY);
} else {
rzio = zio_read_phys(pio, vd, addr,
- b_asize, buf->b_data,
+ b_asize, b_data,
ZIO_CHECKSUM_OFF,
l2arc_read_done, cb, priority,
zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -4788,9 +4803,9 @@
abl2 = hdr->b_l2hdr;
/*
- * Release the temporary compressed buffer as soon as possible.
+ * Release the temporary buffer as soon as possible.
*/
- if (abl2->b_compress != ZIO_COMPRESS_OFF)
+ if (abl2->b_tmp_cdata != NULL)
l2arc_release_cdata_buf(hdr);
hash_lock = HDR_LOCK(hdr);
@@ -4867,6 +4882,32 @@
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
/*
+ * If the data was read into a temporary buffer,
+ * move it and free the buffer.
+ */
+ if (cb->l2rcb_data != NULL) {
+ ASSERT3U(hdr->b_size, <, zio->io_size);
+ ASSERT3U(cb->l2rcb_compress, ==, ZIO_COMPRESS_OFF);
+ if (zio->io_error == 0)
+ bcopy(cb->l2rcb_data, buf->b_data, hdr->b_size);
+
+ /*
+ * The following must be done regardless of whether
+ * there was an error:
+ * - free the temporary buffer
+ * - point zio to the real ARC buffer
+ * - set zio size accordingly
+ * These are required because zio is either re-used for
+ * an I/O of the block in the case of the error
+ * or the zio is passed to arc_read_done() and it
+ * needs real data.
+ */
+ zio_data_buf_free(cb->l2rcb_data, zio->io_size);
+ zio->io_size = zio->io_orig_size = hdr->b_size;
+ zio->io_data = zio->io_orig_data = buf->b_data;
+ }
+
+ /*
* If the buffer was compressed, decompress it first.
*/
if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
@@ -5174,6 +5215,7 @@
kmutex_t *hash_lock;
uint64_t buf_sz;
uint64_t buf_a_sz;
+ size_t align;
if (arc_warm == B_FALSE)
hdr_prev = list_next(list, hdr);
@@ -5211,7 +5253,8 @@
* disk block size.
*/
buf_sz = hdr->b_size;
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+ align = (size_t)1 << dev->l2ad_vdev->vdev_ashift;
+ buf_a_sz = P2ROUNDUP(buf_sz, align);
if ((write_asize + buf_a_sz) > target_sz) {
full = B_TRUE;
@@ -5288,27 +5331,16 @@
}
/*
- * Note that elsewhere in this file arcstat_l2_asize
- * and the used space on l2ad_vdev are updated using b_asize,
- * which is not necessarily rounded up to the device block size.
- * Too keep accounting consistent we do the same here as well:
- * stats_size accumulates the sum of b_asize of the written buffers,
- * while write_asize accumulates the sum of b_asize rounded up
- * to the device block size.
- * The latter sum is used only to validate the corectness of the code.
- */
- uint64_t stats_size = 0;
- write_asize = 0;
-
- /*
* Now start writing the buffers. We're starting at the write head
* and work backwards, retracing the course of the buffer selector
* loop above.
*/
+ write_asize = 0;
for (hdr = list_prev(dev->l2ad_buflist, head); hdr;
hdr = list_prev(dev->l2ad_buflist, hdr)) {
l2arc_buf_hdr_t *l2hdr;
uint64_t buf_sz;
+ boolean_t compress;
/*
* We shouldn't need to lock the buffer here, since we flagged
@@ -5320,36 +5352,36 @@
l2hdr = hdr->b_l2hdr;
l2hdr->b_daddr = dev->l2ad_hand;
- if ((hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
- l2hdr->b_asize >= buf_compress_minsz) {
- if (l2arc_compress_buf(l2hdr)) {
- /*
- * If compression succeeded, enable headroom
- * boost on the next scan cycle.
- */
- *headroom_boost = B_TRUE;
- }
- }
-
/*
- * Pick up the buffer data we had previously stashed away
- * (and now potentially also compressed).
+ * Save a pointer to the original buffer data we had previously
+ * stashed away.
*/
buf_data = l2hdr->b_tmp_cdata;
- buf_sz = l2hdr->b_asize;
+
+ compress = (hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
+ l2hdr->b_asize >= buf_compress_minsz;
+ if (l2arc_transform_buf(l2hdr, compress)) {
+ /*
+ * If compression succeeded, enable headroom
+ * boost on the next scan cycle.
+ */
+ *headroom_boost = B_TRUE;
+ }
/*
- * If the data has not been compressed, then clear b_tmp_cdata
- * to make sure that it points only to a temporary compression
- * buffer.
+ * Get the new buffer size that accounts for compression
+ * and padding.
*/
- if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
- l2hdr->b_tmp_cdata = NULL;
+ buf_sz = l2hdr->b_asize;
/* Compression may have squashed the buffer to zero length. */
if (buf_sz != 0) {
- uint64_t buf_a_sz;
-
+ /*
+ * If the data was padded or compressed, then it
+ * it is in a new buffer.
+ */
+ if (l2hdr->b_tmp_cdata != NULL)
+ buf_data = l2hdr->b_tmp_cdata;
wzio = zio_write_phys(pio, dev->l2ad_vdev,
dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
@@ -5359,14 +5391,8 @@
zio_t *, wzio);
(void) zio_nowait(wzio);
- stats_size += buf_sz;
-
- /*
- * Keep the clock hand suitably device-aligned.
- */
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
- write_asize += buf_a_sz;
- dev->l2ad_hand += buf_a_sz;
+ write_asize += buf_sz;
+ dev->l2ad_hand += buf_sz;
}
}
@@ -5376,8 +5402,8 @@
ARCSTAT_BUMP(arcstat_l2_writes_sent);
ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
ARCSTAT_INCR(arcstat_l2_size, write_sz);
- ARCSTAT_INCR(arcstat_l2_asize, stats_size);
- vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
+ ARCSTAT_INCR(arcstat_l2_asize, write_asize);
+ vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
/*
* Bump device hand to the device start if it is approaching the end.
@@ -5397,12 +5423,18 @@
}
/*
- * Compresses an L2ARC buffer.
+ * Transforms, possibly compresses and pads, an L2ARC buffer.
* The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
* size in l2hdr->b_asize. This routine tries to compress the data and
* depending on the compression result there are three possible outcomes:
- * *) The buffer was incompressible. The original l2hdr contents were left
- * untouched and are ready for writing to an L2 device.
+ * *) The buffer was incompressible. The buffer size was already ashift aligned.
+ * The original l2hdr contents were left untouched except for b_tmp_cdata,
+ * which is reset to NULL. The caller must keep a pointer to the original
+ * data.
+ * *) The buffer was incompressible. The buffer size was not ashift aligned.
+ * b_tmp_cdata was replaced with a temporary data buffer which holds a padded
+ * (aligned) copy of the data. Once writing is done, invoke
+ * l2arc_release_cdata_buf on this l2hdr to free the temporary buffer.
* *) The buffer was all-zeros, so there is no need to write it to an L2
* device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
* set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
@@ -5416,22 +5448,27 @@
* buffer was incompressible).
*/
static boolean_t
-l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
+l2arc_transform_buf(l2arc_buf_hdr_t *l2hdr, boolean_t compress)
{
void *cdata;
- size_t csize, len, rounded;
+ size_t align, asize, csize, len, rounded;
ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
ASSERT(l2hdr->b_tmp_cdata != NULL);
len = l2hdr->b_asize;
- cdata = zio_data_buf_alloc(len);
- csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
- cdata, l2hdr->b_asize);
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ cdata = zio_data_buf_alloc(asize);
+ if (compress)
+ csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
+ cdata, len);
+ else
+ csize = len;
if (csize == 0) {
/* zero block, indicate that there's nothing to write */
- zio_data_buf_free(cdata, len);
+ zio_data_buf_free(cdata, asize);
l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
l2hdr->b_asize = 0;
l2hdr->b_tmp_cdata = NULL;
@@ -5439,8 +5476,7 @@
return (B_TRUE);
}
- rounded = P2ROUNDUP(csize,
- (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift);
+ rounded = P2ROUNDUP(csize, align);
if (rounded < len) {
/*
* Compression succeeded, we'll keep the cdata around for
@@ -5451,16 +5487,35 @@
csize = rounded;
}
l2hdr->b_compress = ZIO_COMPRESS_LZ4;
- l2hdr->b_asize = csize;
+ l2hdr->b_asize = rounded;
l2hdr->b_tmp_cdata = cdata;
ARCSTAT_BUMP(arcstat_l2_compress_successes);
return (B_TRUE);
} else {
/*
- * Compression failed, release the compressed buffer.
- * l2hdr will be left unmodified.
+ * Compression did not save space.
*/
- zio_data_buf_free(cdata, len);
+ if (P2PHASE(len, align) != 0) {
+ /*
+ * Use compression buffer for a copy of data padded to
+ * the proper size. Compression algorithm remains set
+ * to ZIO_COMPRESS_OFF.
+ */
+ ASSERT3U(len, <, asize);
+ bcopy(l2hdr->b_tmp_cdata, cdata, len);
+ bzero((char *)cdata + len, asize - len);
+ l2hdr->b_asize = asize;
+ l2hdr->b_tmp_cdata = cdata;
+ } else {
+ ASSERT3U(len, ==, asize);
+ /*
+ * The original buffer is good as is,
+ * release the compressed buffer.
+ * l2hdr will be left unmodified except for b_tmp_cdata.
+ */
+ zio_data_buf_free(cdata, asize);
+ l2hdr->b_tmp_cdata = NULL;
+ }
ARCSTAT_BUMP(arcstat_l2_compress_failures);
return (B_FALSE);
}
@@ -5530,7 +5585,7 @@
/*
* Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
- * This buffer serves as a temporary holder of compressed data while
+ * This buffer serves as a temporary holder of compressed or padded data while
* the buffer entry is being written to an l2arc device. Once that is
* done, we can dispose of it.
*/
@@ -5538,18 +5593,14 @@
l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ size_t align, asize, len;
- ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
- if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
- /*
- * If the data was compressed, then we've allocated a
- * temporary buffer for it, so now we need to release it.
- */
- ASSERT(l2hdr->b_tmp_cdata != NULL);
- zio_data_buf_free(l2hdr->b_tmp_cdata, hdr->b_size);
+ if (l2hdr->b_tmp_cdata != NULL) {
+ len = hdr->b_size;
+ align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift;
+ asize = P2ROUNDUP(len, align);
+ zio_data_buf_free(l2hdr->b_tmp_cdata, asize);
l2hdr->b_tmp_cdata = NULL;
- } else {
- ASSERT(l2hdr->b_tmp_cdata == NULL);
}
}
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -2730,11 +2730,13 @@
ASSERT0(P2PHASE(zio->io_size, align));
} else {
/*
- * For physical writes, we allow 512b aligned writes and assume
- * the device will perform a read-modify-write as necessary.
+ * For the physical io we allow alignment
+ * to a logical block size.
*/
- ASSERT0(P2PHASE(zio->io_offset, SPA_MINBLOCKSIZE));
- ASSERT0(P2PHASE(zio->io_size, SPA_MINBLOCKSIZE));
+ uint64_t log_align =
+ 1ULL << vd->vdev_top->vdev_logical_ashift;
+ ASSERT0(P2PHASE(zio->io_offset, log_align));
+ ASSERT0(P2PHASE(zio->io_size, log_align));
}
VERIFY(zio->io_type == ZIO_TYPE_READ || spa_writeable(spa));

File Metadata

Mime Type
text/plain
Expires
Sat, Oct 11, 3:28 AM (14 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23560982
Default Alt Text
D2789.id6133.diff (13 KB)

Event Timeline