Changeset View
Changeset View
Standalone View
Standalone View
sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
Show First 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | |||||
static uint_t | static uint_t | ||||
abd_chunkcnt_for_bytes(size_t size) | abd_chunkcnt_for_bytes(size_t size) | ||||
{ | { | ||||
return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE); | return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE); | ||||
} | } | ||||
abd_t * | abd_t * | ||||
abd_alloc_struct(size_t size) | abd_alloc_struct_impl(size_t size) | ||||
{ | { | ||||
/* | /* | ||||
* In Linux we do not use the size passed in during ABD | * In Linux we do not use the size passed in during ABD | ||||
* allocation, so we just ignore it. | * allocation, so we just ignore it. | ||||
*/ | */ | ||||
abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE); | abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE); | ||||
ASSERT3P(abd, !=, NULL); | ASSERT3P(abd, !=, NULL); | ||||
list_link_init(&abd->abd_gang_link); | |||||
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL); | |||||
ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t)); | ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t)); | ||||
return (abd); | return (abd); | ||||
} | } | ||||
void | void | ||||
abd_free_struct(abd_t *abd) | abd_free_struct_impl(abd_t *abd) | ||||
{ | { | ||||
mutex_destroy(&abd->abd_mtx); | |||||
ASSERT(!list_link_active(&abd->abd_gang_link)); | |||||
kmem_cache_free(abd_cache, abd); | kmem_cache_free(abd_cache, abd); | ||||
ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t)); | ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t)); | ||||
} | } | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
/* | /* | ||||
* Mark zfs data pages so they can be excluded from kernel crash dumps | * Mark zfs data pages so they can be excluded from kernel crash dumps | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 251 Lines • ▼ Show 20 Lines | abd_alloc_zero_scatter(void) | ||||
while (sg_alloc_table(&table, nr_pages, gfp)) { | while (sg_alloc_table(&table, nr_pages, gfp)) { | ||||
ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); | ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); | ||||
schedule_timeout_interruptible(1); | schedule_timeout_interruptible(1); | ||||
} | } | ||||
ASSERT3U(table.nents, ==, nr_pages); | ASSERT3U(table.nents, ==, nr_pages); | ||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); | abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); | ||||
abd_zero_scatter->abd_flags = ABD_FLAG_OWNER; | abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER; | ||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0; | ABD_SCATTER(abd_zero_scatter).abd_offset = 0; | ||||
ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl; | ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl; | ||||
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; | ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; | ||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; | abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; | ||||
abd_zero_scatter->abd_parent = NULL; | |||||
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; | abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; | ||||
zfs_refcount_create(&abd_zero_scatter->abd_children); | |||||
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) { | abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) { | ||||
sg_set_page(sg, abd_zero_page, PAGESIZE, 0); | sg_set_page(sg, abd_zero_page, PAGESIZE, 0); | ||||
} | } | ||||
ABDSTAT_BUMP(abdstat_scatter_cnt); | ABDSTAT_BUMP(abdstat_scatter_cnt); | ||||
ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE); | ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE); | ||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); | ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); | ||||
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
unsigned nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE); | unsigned nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE); | ||||
struct scatterlist *sg; | struct scatterlist *sg; | ||||
int i; | int i; | ||||
abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); | abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); | ||||
memset(abd_zero_page, 0, PAGESIZE); | memset(abd_zero_page, 0, PAGESIZE); | ||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); | abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); | ||||
abd_zero_scatter->abd_flags = ABD_FLAG_OWNER; | abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER; | ||||
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; | abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; | ||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0; | ABD_SCATTER(abd_zero_scatter).abd_offset = 0; | ||||
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; | ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; | ||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; | abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; | ||||
abd_zero_scatter->abd_parent = NULL; | |||||
zfs_refcount_create(&abd_zero_scatter->abd_children); | zfs_refcount_create(&abd_zero_scatter->abd_children); | ||||
ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages * | ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages * | ||||
sizeof (struct scatterlist), KM_SLEEP); | sizeof (struct scatterlist), KM_SLEEP); | ||||
sg_init_table(ABD_SCATTER(abd_zero_scatter).abd_sgl, nr_pages); | sg_init_table(ABD_SCATTER(abd_zero_scatter).abd_sgl, nr_pages); | ||||
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) { | abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) { | ||||
sg_set_page(sg, abd_zero_page, PAGESIZE, 0); | sg_set_page(sg, abd_zero_page, PAGESIZE, 0); | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | abd_verify_scatter(abd_t *abd) | ||||
abd_for_each_sg(abd, sg, n, i) { | abd_for_each_sg(abd, sg, n, i) { | ||||
ASSERT3P(sg_page(sg), !=, NULL); | ASSERT3P(sg_page(sg), !=, NULL); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
abd_free_zero_scatter(void) | abd_free_zero_scatter(void) | ||||
{ | { | ||||
zfs_refcount_destroy(&abd_zero_scatter->abd_children); | |||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); | ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); | ||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE); | ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE); | ||||
ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); | ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); | ||||
abd_free_sg_table(abd_zero_scatter); | abd_free_sg_table(abd_zero_scatter); | ||||
abd_free_struct(abd_zero_scatter); | abd_free_struct(abd_zero_scatter); | ||||
abd_zero_scatter = NULL; | abd_zero_scatter = NULL; | ||||
ASSERT3P(abd_zero_page, !=, NULL); | ASSERT3P(abd_zero_page, !=, NULL); | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | abd_free_linear_page(abd_t *abd) | ||||
struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; | struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; | ||||
abd->abd_flags &= ~ABD_FLAG_LINEAR; | abd->abd_flags &= ~ABD_FLAG_LINEAR; | ||||
abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; | abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; | ||||
ABD_SCATTER(abd).abd_nents = 1; | ABD_SCATTER(abd).abd_nents = 1; | ||||
ABD_SCATTER(abd).abd_offset = 0; | ABD_SCATTER(abd).abd_offset = 0; | ||||
ABD_SCATTER(abd).abd_sgl = sg; | ABD_SCATTER(abd).abd_sgl = sg; | ||||
abd_free_chunks(abd); | abd_free_chunks(abd); | ||||
zfs_refcount_destroy(&abd->abd_children); | |||||
abd_update_scatter_stats(abd, ABDSTAT_DECR); | abd_update_scatter_stats(abd, ABDSTAT_DECR); | ||||
abd_free_struct(abd); | |||||
} | } | ||||
/* | /* | ||||
* If we're going to use this ABD for doing I/O using the block layer, the | * If we're going to use this ABD for doing I/O using the block layer, the | ||||
* consumer of the ABD data doesn't care if it's scattered or not, and we don't | * consumer of the ABD data doesn't care if it's scattered or not, and we don't | ||||
* plan to store this ABD in memory for a long period of time, we should | * plan to store this ABD in memory for a long period of time, we should | ||||
* allocate the ABD type that requires the least data copying to do the I/O. | * allocate the ABD type that requires the least data copying to do the I/O. | ||||
* | * | ||||
* On Linux the optimal thing to do would be to use abd_get_offset() and | * On Linux the optimal thing to do would be to use abd_get_offset() and | ||||
* construct a new ABD which shares the original pages thereby eliminating | * construct a new ABD which shares the original pages thereby eliminating | ||||
* the copy. But for the moment a new linear ABD is allocated until this | * the copy. But for the moment a new linear ABD is allocated until this | ||||
* performance optimization can be implemented. | * performance optimization can be implemented. | ||||
*/ | */ | ||||
abd_t * | abd_t * | ||||
abd_alloc_for_io(size_t size, boolean_t is_metadata) | abd_alloc_for_io(size_t size, boolean_t is_metadata) | ||||
{ | { | ||||
return (abd_alloc(size, is_metadata)); | return (abd_alloc(size, is_metadata)); | ||||
} | } | ||||
abd_t * | abd_t * | ||||
abd_get_offset_scatter(abd_t *sabd, size_t off) | abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off) | ||||
{ | { | ||||
abd_t *abd = NULL; | |||||
int i = 0; | int i = 0; | ||||
struct scatterlist *sg = NULL; | struct scatterlist *sg = NULL; | ||||
abd_verify(sabd); | abd_verify(sabd); | ||||
ASSERT3U(off, <=, sabd->abd_size); | ASSERT3U(off, <=, sabd->abd_size); | ||||
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; | size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; | ||||
if (abd == NULL) | |||||
abd = abd_alloc_struct(0); | abd = abd_alloc_struct(0); | ||||
/* | /* | ||||
* Even if this buf is filesystem metadata, we only track that | * Even if this buf is filesystem metadata, we only track that | ||||
* if we own the underlying data buffer, which is not true in | * if we own the underlying data buffer, which is not true in | ||||
* this case. Therefore, we don't ever use ABD_FLAG_META here. | * this case. Therefore, we don't ever use ABD_FLAG_META here. | ||||
*/ | */ | ||||
abd->abd_flags = 0; | |||||
abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) { | abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) { | ||||
if (new_offset < sg->length) | if (new_offset < sg->length) | ||||
break; | break; | ||||
new_offset -= sg->length; | new_offset -= sg->length; | ||||
} | } | ||||
ABD_SCATTER(abd).abd_sgl = sg; | ABD_SCATTER(abd).abd_sgl = sg; | ||||
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines | |||||
* bio_nr_pages for ABD. | * bio_nr_pages for ABD. | ||||
* @off is the offset in @abd | * @off is the offset in @abd | ||||
*/ | */ | ||||
unsigned long | unsigned long | ||||
abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off) | abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off) | ||||
{ | { | ||||
unsigned long pos; | unsigned long pos; | ||||
while (abd_is_gang(abd)) | if (abd_is_gang(abd)) { | ||||
abd = abd_gang_get_offset(abd, &off); | unsigned long count = 0; | ||||
ASSERT(!abd_is_gang(abd)); | for (abd_t *cabd = abd_gang_get_offset(abd, &off); | ||||
cabd != NULL && size != 0; | |||||
cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { | |||||
ASSERT3U(off, <, cabd->abd_size); | |||||
int mysize = MIN(size, cabd->abd_size - off); | |||||
count += abd_nr_pages_off(cabd, mysize, off); | |||||
size -= mysize; | |||||
off = 0; | |||||
} | |||||
return (count); | |||||
} | |||||
if (abd_is_linear(abd)) | if (abd_is_linear(abd)) | ||||
pos = (unsigned long)abd_to_buf(abd) + off; | pos = (unsigned long)abd_to_buf(abd) + off; | ||||
else | else | ||||
pos = ABD_SCATTER(abd).abd_offset + off; | pos = ABD_SCATTER(abd).abd_offset + off; | ||||
return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) - | return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) - | ||||
(pos >> PAGE_SHIFT); | (pos >> PAGE_SHIFT)); | ||||
} | } | ||||
static unsigned int | static unsigned int | ||||
bio_map(struct bio *bio, void *buf_ptr, unsigned int bio_size) | bio_map(struct bio *bio, void *buf_ptr, unsigned int bio_size) | ||||
{ | { | ||||
unsigned int offset, size, i; | unsigned int offset, size, i; | ||||
struct page *page; | struct page *page; | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | |||||
* bio_map for ABD. | * bio_map for ABD. | ||||
* @off is the offset in @abd | * @off is the offset in @abd | ||||
* Remaining IO size is returned | * Remaining IO size is returned | ||||
*/ | */ | ||||
unsigned int | unsigned int | ||||
abd_bio_map_off(struct bio *bio, abd_t *abd, | abd_bio_map_off(struct bio *bio, abd_t *abd, | ||||
unsigned int io_size, size_t off) | unsigned int io_size, size_t off) | ||||
{ | { | ||||
int i; | |||||
struct abd_iter aiter; | struct abd_iter aiter; | ||||
ASSERT3U(io_size, <=, abd->abd_size - off); | ASSERT3U(io_size, <=, abd->abd_size - off); | ||||
if (abd_is_linear(abd)) | if (abd_is_linear(abd)) | ||||
return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, io_size)); | return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, io_size)); | ||||
ASSERT(!abd_is_linear(abd)); | ASSERT(!abd_is_linear(abd)); | ||||
if (abd_is_gang(abd)) | if (abd_is_gang(abd)) | ||||
return (abd_gang_bio_map_off(bio, abd, io_size, off)); | return (abd_gang_bio_map_off(bio, abd, io_size, off)); | ||||
abd_iter_init(&aiter, abd); | abd_iter_init(&aiter, abd); | ||||
abd_iter_advance(&aiter, off); | abd_iter_advance(&aiter, off); | ||||
for (i = 0; i < bio->bi_max_vecs; i++) { | for (int i = 0; i < bio->bi_max_vecs; i++) { | ||||
struct page *pg; | struct page *pg; | ||||
size_t len, sgoff, pgoff; | size_t len, sgoff, pgoff; | ||||
struct scatterlist *sg; | struct scatterlist *sg; | ||||
if (io_size <= 0) | if (io_size <= 0) | ||||
break; | break; | ||||
sg = aiter.iter_sg; | sg = aiter.iter_sg; | ||||
Show All 28 Lines |