Changeset View
Changeset View
Standalone View
Standalone View
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 1,554 Lines • ▼ Show 20 Lines | typedef struct l2arc_data_free { | ||||
arc_buf_contents_t l2df_type; | arc_buf_contents_t l2df_type; | ||||
list_node_t l2df_list_node; | list_node_t l2df_list_node; | ||||
} l2arc_data_free_t; | } l2arc_data_free_t; | ||||
static kmutex_t l2arc_feed_thr_lock; | static kmutex_t l2arc_feed_thr_lock; | ||||
static kcondvar_t l2arc_feed_thr_cv; | static kcondvar_t l2arc_feed_thr_cv; | ||||
static uint8_t l2arc_thread_exit; | static uint8_t l2arc_thread_exit; | ||||
static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); | static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t); | ||||
static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); | static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); | ||||
static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); | static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t); | ||||
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); | static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); | ||||
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); | static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); | ||||
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); | static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); | ||||
static void arc_hdr_free_pabd(arc_buf_hdr_t *); | static void arc_hdr_free_pabd(arc_buf_hdr_t *); | ||||
static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); | static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t); | ||||
static void arc_access(arc_buf_hdr_t *, kmutex_t *); | static void arc_access(arc_buf_hdr_t *, kmutex_t *); | ||||
static boolean_t arc_is_overflowing(); | static boolean_t arc_is_overflowing(); | ||||
static void arc_buf_watch(arc_buf_t *); | static void arc_buf_watch(arc_buf_t *); | ||||
static void arc_prune_async(int64_t); | static void arc_prune_async(int64_t); | ||||
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); | static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); | ||||
static uint32_t arc_bufc_to_flags(arc_buf_contents_t); | static uint32_t arc_bufc_to_flags(arc_buf_contents_t); | ||||
static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); | static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); | ||||
▲ Show 20 Lines • Show All 1,769 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
/* clean up the buf */ | /* clean up the buf */ | ||||
buf->b_hdr = NULL; | buf->b_hdr = NULL; | ||||
kmem_cache_free(buf_cache, buf); | kmem_cache_free(buf_cache, buf); | ||||
} | } | ||||
static void | static void | ||||
arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) | arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr, boolean_t do_adapt) | ||||
{ | { | ||||
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); | ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); | ||||
ASSERT(HDR_HAS_L1HDR(hdr)); | ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
ASSERT(!HDR_SHARED_DATA(hdr)); | ASSERT(!HDR_SHARED_DATA(hdr)); | ||||
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); | hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, do_adapt); | ||||
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; | hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; | ||||
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); | ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); | ||||
ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); | ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, | ||||
hdr->b_l1hdr.b_bufcnt = 0; | hdr->b_l1hdr.b_bufcnt = 0; | ||||
hdr->b_l1hdr.b_buf = NULL; | hdr->b_l1hdr.b_buf = NULL; | ||||
/* | /* | ||||
* Allocate the hdr's buffer. This will contain either | * Allocate the hdr's buffer. This will contain either | ||||
* the compressed or uncompressed data depending on the block | * the compressed or uncompressed data depending on the block | ||||
* it references and compressed arc enablement. | * it references and compressed arc enablement. | ||||
*/ | */ | ||||
arc_hdr_alloc_pabd(hdr); | arc_hdr_alloc_pabd(hdr, B_TRUE); | ||||
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); | ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); | ||||
return (hdr); | return (hdr); | ||||
} | } | ||||
/* | /* | ||||
* Transition between the two allocation states for the arc_buf_hdr struct. | * Transition between the two allocation states for the arc_buf_hdr struct. | ||||
* The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without | * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without | ||||
▲ Show 20 Lines • Show All 1,704 Lines • ▼ Show 20 Lines | arc_is_overflowing(void) | ||||
* 2 * aggsum_borrow_multiplier * NUM_CPUS * the average size of a block | * 2 * aggsum_borrow_multiplier * NUM_CPUS * the average size of a block | ||||
* in the ARC. In practice, that's in the tens of MB, which is low | * in the ARC. In practice, that's in the tens of MB, which is low | ||||
* enough to be safe. | * enough to be safe. | ||||
*/ | */ | ||||
return (aggsum_lower_bound(&arc_size) >= arc_c + overflow); | return (aggsum_lower_bound(&arc_size) >= arc_c + overflow); | ||||
} | } | ||||
static abd_t * | static abd_t * | ||||
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag, boolean_t do_adapt) | ||||
{ | { | ||||
arc_buf_contents_t type = arc_buf_type(hdr); | arc_buf_contents_t type = arc_buf_type(hdr); | ||||
arc_get_data_impl(hdr, size, tag); | arc_get_data_impl(hdr, size, tag, do_adapt); | ||||
if (type == ARC_BUFC_METADATA) { | if (type == ARC_BUFC_METADATA) { | ||||
return (abd_alloc(size, B_TRUE)); | return (abd_alloc(size, B_TRUE)); | ||||
} else { | } else { | ||||
ASSERT(type == ARC_BUFC_DATA); | ASSERT(type == ARC_BUFC_DATA); | ||||
return (abd_alloc(size, B_FALSE)); | return (abd_alloc(size, B_FALSE)); | ||||
} | } | ||||
} | } | ||||
static void * | static void * | ||||
arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
{ | { | ||||
arc_buf_contents_t type = arc_buf_type(hdr); | arc_buf_contents_t type = arc_buf_type(hdr); | ||||
arc_get_data_impl(hdr, size, tag); | arc_get_data_impl(hdr, size, tag, B_TRUE); | ||||
if (type == ARC_BUFC_METADATA) { | if (type == ARC_BUFC_METADATA) { | ||||
return (zio_buf_alloc(size)); | return (zio_buf_alloc(size)); | ||||
} else { | } else { | ||||
ASSERT(type == ARC_BUFC_DATA); | ASSERT(type == ARC_BUFC_DATA); | ||||
return (zio_data_buf_alloc(size)); | return (zio_data_buf_alloc(size)); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Allocate a block and return it to the caller. If we are hitting the | * Allocate a block and return it to the caller. If we are hitting the | ||||
* hard limit for the cache size, we must sleep, waiting for the eviction | * hard limit for the cache size, we must sleep, waiting for the eviction | ||||
* thread to catch up. If we're past the target size but below the hard | * thread to catch up. If we're past the target size but below the hard | ||||
* limit, we'll only signal the reclaim thread and continue on. | * limit, we'll only signal the reclaim thread and continue on. | ||||
*/ | */ | ||||
static void | static void | ||||
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag, boolean_t do_adapt) | ||||
{ | { | ||||
arc_state_t *state = hdr->b_l1hdr.b_state; | arc_state_t *state = hdr->b_l1hdr.b_state; | ||||
arc_buf_contents_t type = arc_buf_type(hdr); | arc_buf_contents_t type = arc_buf_type(hdr); | ||||
if (do_adapt) | |||||
arc_adapt(size, state); | arc_adapt(size, state); | ||||
/* | /* | ||||
* If arc_size is currently overflowing, and has grown past our | * If arc_size is currently overflowing, and has grown past our | ||||
* upper limit, we must be adding data faster than the evict | * upper limit, we must be adding data faster than the evict | ||||
* thread can evict. Thus, to ensure we don't compound the | * thread can evict. Thus, to ensure we don't compound the | ||||
* problem by adding more data and forcing arc_size to grow even | * problem by adding more data and forcing arc_size to grow even | ||||
* further past it's target size, we halt and wait for the | * further past it's target size, we halt and wait for the | ||||
* eviction thread to catch up. | * eviction thread to catch up. | ||||
▲ Show 20 Lines • Show All 734 Lines • ▼ Show 20 Lines | if (hdr == NULL) { | ||||
* This hdr is in the ghost list so we access it | * This hdr is in the ghost list so we access it | ||||
* to move it out of the ghost list before we | * to move it out of the ghost list before we | ||||
* initiate the read. If it's a prefetch then | * initiate the read. If it's a prefetch then | ||||
* it won't have a callback so we'll remove the | * it won't have a callback so we'll remove the | ||||
* reference that arc_buf_alloc_impl() created. We | * reference that arc_buf_alloc_impl() created. We | ||||
* do this after we've called arc_access() to | * do this after we've called arc_access() to | ||||
* avoid hitting an assert in remove_reference(). | * avoid hitting an assert in remove_reference(). | ||||
*/ | */ | ||||
arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state); | |||||
arc_access(hdr, hash_lock); | arc_access(hdr, hash_lock); | ||||
arc_hdr_alloc_pabd(hdr); | arc_hdr_alloc_pabd(hdr, B_FALSE); | ||||
} | } | ||||
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
size = arc_hdr_size(hdr); | size = arc_hdr_size(hdr); | ||||
/* | /* | ||||
* If compression is enabled on the hdr, then will do | * If compression is enabled on the hdr, then will do | ||||
* RAW I/O and will store the compressed data in the hdr's | * RAW I/O and will store the compressed data in the hdr's | ||||
* data block. Otherwise, the hdr's data block will contain | * data block. Otherwise, the hdr's data block will contain | ||||
▲ Show 20 Lines • Show All 397 Lines • ▼ Show 20 Lines | if (arc_buf_is_shared(buf)) { | ||||
* Now we need to recreate the hdr's b_pabd. Since we | * Now we need to recreate the hdr's b_pabd. Since we | ||||
* have lastbuf handy, we try to share with it, but if | * have lastbuf handy, we try to share with it, but if | ||||
* we can't then we allocate a new b_pabd and copy the | * we can't then we allocate a new b_pabd and copy the | ||||
* data from buf into it. | * data from buf into it. | ||||
*/ | */ | ||||
if (arc_can_share(hdr, lastbuf)) { | if (arc_can_share(hdr, lastbuf)) { | ||||
arc_share_buf(hdr, lastbuf); | arc_share_buf(hdr, lastbuf); | ||||
} else { | } else { | ||||
arc_hdr_alloc_pabd(hdr); | arc_hdr_alloc_pabd(hdr, B_TRUE); | ||||
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, | abd_copy_from_buf(hdr->b_l1hdr.b_pabd, | ||||
buf->b_data, psize); | buf->b_data, psize); | ||||
} | } | ||||
VERIFY3P(lastbuf->b_data, !=, NULL); | VERIFY3P(lastbuf->b_data, !=, NULL); | ||||
} else if (HDR_SHARED_DATA(hdr)) { | } else if (HDR_SHARED_DATA(hdr)) { | ||||
/* | /* | ||||
* Uncompressed shared buffers are always at the end | * Uncompressed shared buffers are always at the end | ||||
* of the list. Compressed buffers don't have the | * of the list. Compressed buffers don't have the | ||||
▲ Show 20 Lines • Show All 146 Lines • ▼ Show 20 Lines | #endif | ||||
* doing so would cause the ARC to be full of linear ABDs if we write a | * doing so would cause the ARC to be full of linear ABDs if we write a | ||||
* lot of shareable data. As a compromise, we check whether scattered | * lot of shareable data. As a compromise, we check whether scattered | ||||
* ABDs are allowed, and assume that if they are then the user wants | * ABDs are allowed, and assume that if they are then the user wants | ||||
* the ARC to be primarily filled with them regardless of the data being | * the ARC to be primarily filled with them regardless of the data being | ||||
* written. Therefore, if they're allowed then we allocate one and copy | * written. Therefore, if they're allowed then we allocate one and copy | ||||
* the data into it; otherwise, we share the data directly if we can. | * the data into it; otherwise, we share the data directly if we can. | ||||
*/ | */ | ||||
if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { | if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { | ||||
arc_hdr_alloc_pabd(hdr); | arc_hdr_alloc_pabd(hdr, B_TRUE); | ||||
/* | /* | ||||
* Ideally, we would always copy the io_abd into b_pabd, but the | * Ideally, we would always copy the io_abd into b_pabd, but the | ||||
* user may have disabled compressed ARC, thus we must check the | * user may have disabled compressed ARC, thus we must check the | ||||
* hdr's compression setting rather than the io_bp's. | * hdr's compression setting rather than the io_bp's. | ||||
*/ | */ | ||||
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { | if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { | ||||
ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, | ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, | ||||
▲ Show 20 Lines • Show All 2,018 Lines • Show Last 20 Lines |