Changeset View
Changeset View
Standalone View
Standalone View
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Show First 20 Lines • Show All 6,176 Lines • ▼ Show 20 Lines | |||||
* Returns the number of bytes actually written (which may be smaller than | * Returns the number of bytes actually written (which may be smaller than | ||||
* the delta by which the device hand has changed due to alignment). | * the delta by which the device hand has changed due to alignment). | ||||
*/ | */ | ||||
static uint64_t | static uint64_t | ||||
l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, | l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, | ||||
boolean_t *headroom_boost) | boolean_t *headroom_boost) | ||||
{ | { | ||||
arc_buf_hdr_t *hdr, *hdr_prev, *head; | arc_buf_hdr_t *hdr, *hdr_prev, *head; | ||||
uint64_t write_asize, write_psize, write_sz, headroom, | uint64_t write_asize, write_sz, headroom, buf_compress_minsz; | ||||
buf_compress_minsz; | |||||
void *buf_data; | void *buf_data; | ||||
boolean_t full; | boolean_t full; | ||||
l2arc_write_callback_t *cb; | l2arc_write_callback_t *cb; | ||||
zio_t *pio, *wzio; | zio_t *pio, *wzio; | ||||
uint64_t guid = spa_load_guid(spa); | uint64_t guid = spa_load_guid(spa); | ||||
const boolean_t do_headroom_boost = *headroom_boost; | const boolean_t do_headroom_boost = *headroom_boost; | ||||
int try; | int try; | ||||
ASSERT(dev->l2ad_vdev != NULL); | ASSERT(dev->l2ad_vdev != NULL); | ||||
/* Lower the flag now, we might want to raise it again later. */ | /* Lower the flag now, we might want to raise it again later. */ | ||||
*headroom_boost = B_FALSE; | *headroom_boost = B_FALSE; | ||||
pio = NULL; | pio = NULL; | ||||
write_sz = write_asize = write_psize = 0; | write_sz = write_asize = 0; | ||||
full = B_FALSE; | full = B_FALSE; | ||||
head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE); | head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE); | ||||
head->b_flags |= ARC_FLAG_L2_WRITE_HEAD; | head->b_flags |= ARC_FLAG_L2_WRITE_HEAD; | ||||
head->b_flags |= ARC_FLAG_HAS_L2HDR; | head->b_flags |= ARC_FLAG_HAS_L2HDR; | ||||
ARCSTAT_BUMP(arcstat_l2_write_buffer_iter); | ARCSTAT_BUMP(arcstat_l2_write_buffer_iter); | ||||
/* | /* | ||||
* We will want to try to compress buffers that are at least 2x the | * We will want to try to compress buffers that are at least 2x the | ||||
Show All 25 Lines | for (try = 0; try <= 3; try++) { | ||||
headroom = target_sz * l2arc_headroom; | headroom = target_sz * l2arc_headroom; | ||||
if (do_headroom_boost) | if (do_headroom_boost) | ||||
headroom = (headroom * l2arc_headroom_boost) / 100; | headroom = (headroom * l2arc_headroom_boost) / 100; | ||||
for (; hdr; hdr = hdr_prev) { | for (; hdr; hdr = hdr_prev) { | ||||
kmutex_t *hash_lock; | kmutex_t *hash_lock; | ||||
uint64_t buf_sz; | uint64_t buf_sz; | ||||
uint64_t buf_a_sz; | |||||
if (arc_warm == B_FALSE) | if (arc_warm == B_FALSE) | ||||
hdr_prev = multilist_sublist_next(mls, hdr); | hdr_prev = multilist_sublist_next(mls, hdr); | ||||
else | else | ||||
hdr_prev = multilist_sublist_prev(mls, hdr); | hdr_prev = multilist_sublist_prev(mls, hdr); | ||||
ARCSTAT_INCR(arcstat_l2_write_buffer_bytes_scanned, hdr->b_size); | ARCSTAT_INCR(arcstat_l2_write_buffer_bytes_scanned, hdr->b_size); | ||||
hash_lock = HDR_LOCK(hdr); | hash_lock = HDR_LOCK(hdr); | ||||
Show All 15 Lines | for (; hdr; hdr = hdr_prev) { | ||||
break; | break; | ||||
} | } | ||||
if (!l2arc_write_eligible(guid, hdr)) { | if (!l2arc_write_eligible(guid, hdr)) { | ||||
mutex_exit(hash_lock); | mutex_exit(hash_lock); | ||||
continue; | continue; | ||||
} | } | ||||
if ((write_sz + hdr->b_size) > target_sz) { | /* | ||||
* Assume that the buffer is not going to be compressed | |||||
* and could take more space on disk because of a larger | |||||
* disk block size. | |||||
*/ | |||||
buf_sz = hdr->b_size; | |||||
buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); | |||||
if ((write_asize + buf_a_sz) > target_sz) { | |||||
full = B_TRUE; | full = B_TRUE; | ||||
mutex_exit(hash_lock); | mutex_exit(hash_lock); | ||||
ARCSTAT_BUMP(arcstat_l2_write_full); | ARCSTAT_BUMP(arcstat_l2_write_full); | ||||
break; | break; | ||||
} | } | ||||
if (pio == NULL) { | if (pio == NULL) { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | for (; hdr; hdr = hdr_prev) { | ||||
* of this function. Thus, we can't simply | * of this function. Thus, we can't simply | ||||
* change the b_flags field to denote that the | * change the b_flags field to denote that the | ||||
* IO has been sent. We can change the b_daddr | * IO has been sent. We can change the b_daddr | ||||
* field of the L2 portion, though, since we'll | * field of the L2 portion, though, since we'll | ||||
* be holding the l2ad_mtx; which is why we're | * be holding the l2ad_mtx; which is why we're | ||||
* using it to denote the header's state change. | * using it to denote the header's state change. | ||||
*/ | */ | ||||
hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET; | hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET; | ||||
buf_sz = hdr->b_size; | |||||
hdr->b_flags |= ARC_FLAG_HAS_L2HDR; | hdr->b_flags |= ARC_FLAG_HAS_L2HDR; | ||||
mutex_enter(&dev->l2ad_mtx); | mutex_enter(&dev->l2ad_mtx); | ||||
list_insert_head(&dev->l2ad_buflist, hdr); | list_insert_head(&dev->l2ad_buflist, hdr); | ||||
mutex_exit(&dev->l2ad_mtx); | mutex_exit(&dev->l2ad_mtx); | ||||
/* | /* | ||||
* Compute and store the buffer cksum before | * Compute and store the buffer cksum before | ||||
* writing. On debug the cksum is verified first. | * writing. On debug the cksum is verified first. | ||||
*/ | */ | ||||
arc_cksum_verify(hdr->b_l1hdr.b_buf); | arc_cksum_verify(hdr->b_l1hdr.b_buf); | ||||
arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE); | arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE); | ||||
mutex_exit(hash_lock); | mutex_exit(hash_lock); | ||||
write_sz += buf_sz; | write_sz += buf_sz; | ||||
write_asize += buf_a_sz; | |||||
} | } | ||||
multilist_sublist_unlock(mls); | multilist_sublist_unlock(mls); | ||||
if (full == B_TRUE) | if (full == B_TRUE) | ||||
break; | break; | ||||
} | } | ||||
/* No buffers selected for writing? */ | /* No buffers selected for writing? */ | ||||
if (pio == NULL) { | if (pio == NULL) { | ||||
ASSERT0(write_sz); | ASSERT0(write_sz); | ||||
ASSERT(!HDR_HAS_L1HDR(head)); | ASSERT(!HDR_HAS_L1HDR(head)); | ||||
kmem_cache_free(hdr_l2only_cache, head); | kmem_cache_free(hdr_l2only_cache, head); | ||||
return (0); | return (0); | ||||
} | } | ||||
mutex_enter(&dev->l2ad_mtx); | mutex_enter(&dev->l2ad_mtx); | ||||
/* | /* | ||||
* Note that elsewhere in this file arcstat_l2_asize | |||||
* and the used space on l2ad_vdev are updated using b_asize, | |||||
* which is not necessarily rounded up to the device block size. | |||||
* Too keep accounting consistent we do the same here as well: | |||||
* stats_size accumulates the sum of b_asize of the written buffers, | |||||
* while write_asize accumulates the sum of b_asize rounded up | |||||
* to the device block size. | |||||
* The latter sum is used only to validate the corectness of the code. | |||||
*/ | |||||
uint64_t stats_size = 0; | |||||
write_asize = 0; | |||||
/* | |||||
* Now start writing the buffers. We're starting at the write head | * Now start writing the buffers. We're starting at the write head | ||||
* and work backwards, retracing the course of the buffer selector | * and work backwards, retracing the course of the buffer selector | ||||
* loop above. | * loop above. | ||||
*/ | */ | ||||
for (hdr = list_prev(&dev->l2ad_buflist, head); hdr; | for (hdr = list_prev(&dev->l2ad_buflist, head); hdr; | ||||
hdr = list_prev(&dev->l2ad_buflist, hdr)) { | hdr = list_prev(&dev->l2ad_buflist, hdr)) { | ||||
uint64_t buf_sz; | uint64_t buf_sz; | ||||
Show All 36 Lines | for (hdr = list_prev(&dev->l2ad_buflist, head); hdr; | ||||
* We need to do this regardless if buf_sz is zero or | * We need to do this regardless if buf_sz is zero or | ||||
* not, otherwise, when this l2hdr is evicted we'll | * not, otherwise, when this l2hdr is evicted we'll | ||||
* remove a reference that was never added. | * remove a reference that was never added. | ||||
*/ | */ | ||||
(void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr); | (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr); | ||||
/* Compression may have squashed the buffer to zero length. */ | /* Compression may have squashed the buffer to zero length. */ | ||||
if (buf_sz != 0) { | if (buf_sz != 0) { | ||||
uint64_t buf_p_sz; | uint64_t buf_a_sz; | ||||
wzio = zio_write_phys(pio, dev->l2ad_vdev, | wzio = zio_write_phys(pio, dev->l2ad_vdev, | ||||
dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, | dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, | ||||
NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, | NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, | ||||
ZIO_FLAG_CANFAIL, B_FALSE); | ZIO_FLAG_CANFAIL, B_FALSE); | ||||
DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, | DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, | ||||
zio_t *, wzio); | zio_t *, wzio); | ||||
(void) zio_nowait(wzio); | (void) zio_nowait(wzio); | ||||
write_asize += buf_sz; | stats_size += buf_sz; | ||||
/* | /* | ||||
* Keep the clock hand suitably device-aligned. | * Keep the clock hand suitably device-aligned. | ||||
*/ | */ | ||||
buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); | buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); | ||||
write_psize += buf_p_sz; | write_asize += buf_a_sz; | ||||
dev->l2ad_hand += buf_p_sz; | dev->l2ad_hand += buf_a_sz; | ||||
} | } | ||||
} | } | ||||
mutex_exit(&dev->l2ad_mtx); | mutex_exit(&dev->l2ad_mtx); | ||||
ASSERT3U(write_asize, <=, target_sz); | ASSERT3U(write_asize, <=, target_sz); | ||||
ARCSTAT_BUMP(arcstat_l2_writes_sent); | ARCSTAT_BUMP(arcstat_l2_writes_sent); | ||||
ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize); | ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize); | ||||
ARCSTAT_INCR(arcstat_l2_size, write_sz); | ARCSTAT_INCR(arcstat_l2_size, write_sz); | ||||
ARCSTAT_INCR(arcstat_l2_asize, write_asize); | ARCSTAT_INCR(arcstat_l2_asize, stats_size); | ||||
vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0); | vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0); | ||||
/* | /* | ||||
* Bump device hand to the device start if it is approaching the end. | * Bump device hand to the device start if it is approaching the end. | ||||
* l2arc_evict() will already have evicted ahead for this case. | * l2arc_evict() will already have evicted ahead for this case. | ||||
*/ | */ | ||||
if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { | if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { | ||||
dev->l2ad_hand = dev->l2ad_start; | dev->l2ad_hand = dev->l2ad_start; | ||||
dev->l2ad_first = B_FALSE; | dev->l2ad_first = B_FALSE; | ||||
▲ Show 20 Lines • Show All 447 Lines • Show Last 20 Lines |