diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index b867ac407458..ad21882e9986 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -1,872 +1,872 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * The 512-byte leaf is broken into 32 16-byte chunks. * chunk number n means l_chunk[n], even though the header precedes it. * the names are stored null-terminated. */ #include #include #include #include #include #include #include #include #include static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); #define CHAIN_END 0xffff /* end of the chunk chain */ /* half the (current) minimum block size */ #define MAX_ARRAY_BYTES (8<<10) #define LEAF_HASH(l, h) \ ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) static void zap_memset(void *a, int c, size_t n) { char *cp = a; char *cpend = cp + n; while (cp < cpend) *cp++ = c; } static void stv(int len, void *addr, uint64_t value) { switch (len) { case 1: *(uint8_t *)addr = value; return; case 2: *(uint16_t *)addr = value; return; case 4: *(uint32_t *)addr = value; return; case 8: *(uint64_t *)addr = value; return; } ASSERT(!"bad int len"); } static uint64_t ldv(int len, const void *addr) { switch (len) { case 1: return (*(uint8_t *)addr); case 2: return (*(uint16_t *)addr); case 4: return (*(uint32_t *)addr); case 8: return (*(uint64_t *)addr); } ASSERT(!"bad int len"); return (0xFEEDFACEDEADBEEFULL); } void zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) { int i; zap_leaf_t l; l.l_bs = highbit(size)-1; l.l_phys = buf; buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); struct zap_leaf_entry *le; switch (lc->l_free.lf_type) { case ZAP_CHUNK_ENTRY: le = &lc->l_entry; le->le_type = BSWAP_8(le->le_type); le->le_value_intlen = BSWAP_8(le->le_value_intlen); le->le_next = BSWAP_16(le->le_next); le->le_name_chunk = BSWAP_16(le->le_name_chunk); le->le_name_numints = BSWAP_16(le->le_name_numints); le->le_value_chunk = BSWAP_16(le->le_value_chunk); le->le_value_numints = BSWAP_16(le->le_value_numints); le->le_cd = BSWAP_32(le->le_cd); le->le_hash = BSWAP_64(le->le_hash); break; case ZAP_CHUNK_FREE: lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); break; case ZAP_CHUNK_ARRAY: lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); /* la_array doesn't need swapping */ break; default: ASSERT(!"bad leaf type"); } } } void zap_leaf_init(zap_leaf_t *l, boolean_t sort) { int i; l->l_bs = highbit(l->l_dbuf->db_size)-1; zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; } ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); if (sort) l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; } /* * Routines which manipulate leaf chunks (l_chunk[]). */ static uint16_t zap_leaf_chunk_alloc(zap_leaf_t *l) { int chunk; ASSERT(l->l_phys->l_hdr.lh_nfree > 0); chunk = l->l_phys->l_hdr.lh_freelist; ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; l->l_phys->l_hdr.lh_nfree--; return (chunk); } static void zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) { struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); zlf->lf_type = ZAP_CHUNK_FREE; zlf->lf_next = l->l_phys->l_hdr.lh_freelist; bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ l->l_phys->l_hdr.lh_freelist = chunk; l->l_phys->l_hdr.lh_nfree++; } /* * Routines which manipulate leaf arrays (zap_leaf_array type chunks). */ static uint16_t zap_leaf_array_create(zap_leaf_t *l, const char *buf, int integer_size, int num_integers) { uint16_t chunk_head; uint16_t *chunkp = &chunk_head; int byten = 0; uint64_t value = 0; int shift = (integer_size-1)*8; int len = num_integers; ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); while (len > 0) { uint16_t chunk = zap_leaf_chunk_alloc(l); struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; int i; la->la_type = ZAP_CHUNK_ARRAY; for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { if (byten == 0) value = ldv(integer_size, buf); la->la_array[i] = value >> shift; value <<= 8; if (++byten == integer_size) { byten = 0; buf += integer_size; if (--len == 0) break; } } *chunkp = chunk; chunkp = &la->la_next; } *chunkp = CHAIN_END; return (chunk_head); } static void zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) { uint16_t chunk = *chunkp; *chunkp = CHAIN_END; while (chunk != CHAIN_END) { int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, ZAP_CHUNK_ARRAY); zap_leaf_chunk_free(l, chunk); chunk = nextchunk; } } /* array_len and buf_len are in integers, not bytes */ static void zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, void *buf) { int len = MIN(array_len, buf_len); int byten = 0; uint64_t value = 0; char *p = buf; ASSERT3U(array_int_len, <=, buf_int_len); /* Fast path for one 8-byte integer */ if (array_int_len == 8 && buf_int_len == 8 && len == 1) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; uint8_t *ip = la->la_array; uint64_t *buf64 = buf; *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; return; } /* Fast path for an array of 1-byte integers (eg. the entry name) */ if (array_int_len == 1 && buf_int_len == 1 && buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { while (chunk != CHAIN_END) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES); p += ZAP_LEAF_ARRAY_BYTES; chunk = la->la_next; } return; } while (len > 0) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; int i; ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { value = (value << 8) | la->la_array[i]; byten++; if (byten == array_int_len) { stv(buf_int_len, p, value); byten = 0; len--; if (len == 0) return; p += buf_int_len; } } chunk = la->la_next; } } static boolean_t zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_numints) { int bseen = 0; if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { uint64_t *thiskey; boolean_t match; ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); thiskey = kmem_alloc(array_numints * sizeof (*thiskey), - KM_SLEEP); + KM_PUSHPAGE); zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, sizeof (*thiskey), array_numints, thiskey); match = bcmp(thiskey, zn->zn_key_orig, array_numints * sizeof (*thiskey)) == 0; kmem_free(thiskey, array_numints * sizeof (*thiskey)); return (match); } ASSERT(zn->zn_key_intlen == 1); if (zn->zn_matchtype == MT_FIRST) { - char *thisname = kmem_alloc(array_numints, KM_SLEEP); + char *thisname = kmem_alloc(array_numints, KM_PUSHPAGE); boolean_t match; zap_leaf_array_read(l, chunk, sizeof (char), array_numints, sizeof (char), array_numints, thisname); match = zap_match(zn, thisname); kmem_free(thisname, array_numints); return (match); } /* * Fast path for exact matching. * First check that the lengths match, so that we don't read * past the end of the zn_key_orig array. */ if (array_numints != zn->zn_key_orig_numints) return (B_FALSE); while (bseen < array_numints) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread)) break; chunk = la->la_next; bseen += toread; } return (bseen == array_numints); } /* * Routines which manipulate leaf entries. */ int zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) { uint16_t *chunkp; struct zap_leaf_entry *le; ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); again: for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); *chunkp != CHAIN_END; chunkp = &le->le_next) { uint16_t chunk = *chunkp; le = ZAP_LEAF_ENTRY(l, chunk); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); if (le->le_hash != zn->zn_hash) continue; /* * NB: the entry chain is always sorted by cd on * normalized zap objects, so this will find the * lowest-cd match for MT_FIRST. */ ASSERT(zn->zn_matchtype == MT_EXACT || (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); if (zap_leaf_array_match(l, zn, le->le_name_chunk, le->le_name_numints)) { zeh->zeh_num_integers = le->le_value_numints; zeh->zeh_integer_size = le->le_value_intlen; zeh->zeh_cd = le->le_cd; zeh->zeh_hash = le->le_hash; zeh->zeh_chunkp = chunkp; zeh->zeh_leaf = l; return (0); } } /* * NB: we could of course do this in one pass, but that would be * a pain. We'll see if MT_BEST is even used much. */ if (zn->zn_matchtype == MT_BEST) { zn->zn_matchtype = MT_FIRST; goto again; } return (ENOENT); } /* Return (h1,cd1 >= h2,cd2) */ #define HCD_GTEQ(h1, cd1, h2, cd2) \ ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) int zap_leaf_lookup_closest(zap_leaf_t *l, uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) { uint16_t chunk; uint64_t besth = -1ULL; uint32_t bestcd = -1U; uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; uint16_t lh; struct zap_leaf_entry *le; ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { for (chunk = l->l_phys->l_hash[lh]; chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(l, chunk); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { ASSERT3U(bestlh, >=, lh); bestlh = lh; besth = le->le_hash; bestcd = le->le_cd; zeh->zeh_num_integers = le->le_value_numints; zeh->zeh_integer_size = le->le_value_intlen; zeh->zeh_cd = le->le_cd; zeh->zeh_hash = le->le_hash; zeh->zeh_fakechunk = chunk; zeh->zeh_chunkp = &zeh->zeh_fakechunk; zeh->zeh_leaf = l; } } } return (bestcd == -1U ? ENOENT : 0); } int zap_entry_read(const zap_entry_handle_t *zeh, uint8_t integer_size, uint64_t num_integers, void *buf) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); if (le->le_value_intlen > integer_size) return (EINVAL); zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_value_intlen, le->le_value_numints, integer_size, num_integers, buf); if (zeh->zeh_num_integers > num_integers) return (EOVERFLOW); return (0); } int zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, le->le_name_numints, 8, buflen / 8, buf); } else { zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, le->le_name_numints, 1, buflen, buf); } if (le->le_name_numints > buflen) return (EOVERFLOW); return (0); } int zap_entry_update(zap_entry_handle_t *zeh, uint8_t integer_size, uint64_t num_integers, const void *buf) { int delta_chunks; zap_leaf_t *l = zeh->zeh_leaf; struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) return (EAGAIN); zap_leaf_array_free(l, &le->le_value_chunk); le->le_value_chunk = zap_leaf_array_create(l, buf, integer_size, num_integers); le->le_value_numints = num_integers; le->le_value_intlen = integer_size; return (0); } void zap_entry_remove(zap_entry_handle_t *zeh) { uint16_t entry_chunk; struct zap_leaf_entry *le; zap_leaf_t *l = zeh->zeh_leaf; ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); entry_chunk = *zeh->zeh_chunkp; le = ZAP_LEAF_ENTRY(l, entry_chunk); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); zap_leaf_array_free(l, &le->le_name_chunk); zap_leaf_array_free(l, &le->le_value_chunk); *zeh->zeh_chunkp = le->le_next; zap_leaf_chunk_free(l, entry_chunk); l->l_phys->l_hdr.lh_nentries--; } int zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, uint8_t integer_size, uint64_t num_integers, const void *buf, zap_entry_handle_t *zeh) { uint16_t chunk; uint16_t *chunkp; struct zap_leaf_entry *le; uint64_t valuelen; int numchunks; uint64_t h = zn->zn_hash; valuelen = integer_size * num_integers; numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) return (E2BIG); if (cd == ZAP_NEED_CD) { /* find the lowest unused cd */ if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { cd = 0; for (chunk = *LEAF_HASH_ENTPTR(l, h); chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(l, chunk); if (le->le_cd > cd) break; if (le->le_hash == h) { ASSERT3U(cd, ==, le->le_cd); cd++; } } } else { /* old unsorted format; do it the O(n^2) way */ for (cd = 0; ; cd++) { for (chunk = *LEAF_HASH_ENTPTR(l, h); chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(l, chunk); if (le->le_hash == h && le->le_cd == cd) { break; } } /* If this cd is not in use, we are good. */ if (chunk == CHAIN_END) break; } } /* * We would run out of space in a block before we could * store enough entries to run out of CD values. */ ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); } if (l->l_phys->l_hdr.lh_nfree < numchunks) return (EAGAIN); /* make the entry */ chunk = zap_leaf_chunk_alloc(l); le = ZAP_LEAF_ENTRY(l, chunk); le->le_type = ZAP_CHUNK_ENTRY; le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, zn->zn_key_intlen, zn->zn_key_orig_numints); le->le_name_numints = zn->zn_key_orig_numints; le->le_value_chunk = zap_leaf_array_create(l, buf, integer_size, num_integers); le->le_value_numints = num_integers; le->le_value_intlen = integer_size; le->le_hash = h; le->le_cd = cd; /* link it into the hash chain */ /* XXX if we did the search above, we could just use that */ chunkp = zap_leaf_rehash_entry(l, chunk); l->l_phys->l_hdr.lh_nentries++; zeh->zeh_leaf = l; zeh->zeh_num_integers = num_integers; zeh->zeh_integer_size = le->le_value_intlen; zeh->zeh_cd = le->le_cd; zeh->zeh_hash = le->le_hash; zeh->zeh_chunkp = chunkp; return (0); } /* * Determine if there is another entry with the same normalized form. * For performance purposes, either zn or name must be provided (the * other can be NULL). Note, there usually won't be any hash * conflicts, in which case we don't need the concatenated/normalized * form of the name. But all callers have one of these on hand anyway, * so might as well take advantage. A cleaner but slower interface * would accept neither argument, and compute the normalized name as * needed (using zap_name_alloc(zap_entry_read_name(zeh))). */ boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, const char *name, zap_t *zap) { uint64_t chunk; struct zap_leaf_entry *le; boolean_t allocdzn = B_FALSE; if (zap->zap_normflags == 0) return (B_FALSE); for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); if (le->le_hash != zeh->zeh_hash) continue; if (le->le_cd == zeh->zeh_cd) continue; if (zn == NULL) { zn = zap_name_alloc(zap, name, MT_FIRST); allocdzn = B_TRUE; } if (zap_leaf_array_match(zeh->zeh_leaf, zn, le->le_name_chunk, le->le_name_numints)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); } } if (allocdzn) zap_name_free(zn); return (B_FALSE); } /* * Routines for transferring entries between leafs. */ static uint16_t * zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); struct zap_leaf_entry *le2; uint16_t *chunkp; /* * keep the entry chain sorted by cd * NB: this will not cause problems for unsorted leafs, though * it is unnecessary there. */ for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); *chunkp != CHAIN_END; chunkp = &le2->le_next) { le2 = ZAP_LEAF_ENTRY(l, *chunkp); if (le2->le_cd > le->le_cd) break; } le->le_next = *chunkp; *chunkp = entry; return (chunkp); } static uint16_t zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) { uint16_t new_chunk; uint16_t *nchunkp = &new_chunk; while (chunk != CHAIN_END) { uint16_t nchunk = zap_leaf_chunk_alloc(nl); struct zap_leaf_array *nla = &ZAP_LEAF_CHUNK(nl, nchunk).l_array; struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; int nextchunk = la->la_next; ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); *nla = *la; /* structure assignment */ zap_leaf_chunk_free(l, chunk); chunk = nextchunk; *nchunkp = nchunk; nchunkp = &nla->la_next; } *nchunkp = CHAIN_END; return (new_chunk); } static void zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) { struct zap_leaf_entry *le, *nle; uint16_t chunk; le = ZAP_LEAF_ENTRY(l, entry); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); chunk = zap_leaf_chunk_alloc(nl); nle = ZAP_LEAF_ENTRY(nl, chunk); *nle = *le; /* structure assignment */ (void) zap_leaf_rehash_entry(nl, chunk); nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); nle->le_value_chunk = zap_leaf_transfer_array(l, le->le_value_chunk, nl); zap_leaf_chunk_free(l, entry); l->l_phys->l_hdr.lh_nentries--; nl->l_phys->l_hdr.lh_nentries++; } /* * Transfer the entries whose hash prefix ends in 1 to the new leaf. */ void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) { int i; int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; /* set new prefix and prefix_len */ l->l_phys->l_hdr.lh_prefix <<= 1; l->l_phys->l_hdr.lh_prefix_len++; nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; /* break existing hash chains */ zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); if (sort) l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; /* * Transfer entries whose hash bit 'bit' is set to nl; rehash * the remaining entries * * NB: We could find entries via the hashtable instead. That * would be O(hashents+numents) rather than O(numblks+numents), * but this accesses memory more sequentially, and when we're * called, the block is usually pretty full. */ for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); if (le->le_type != ZAP_CHUNK_ENTRY) continue; if (le->le_hash & (1ULL << bit)) zap_leaf_transfer_entry(l, i, nl); else (void) zap_leaf_rehash_entry(l, i); } } void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) { int i, n; n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->l_phys->l_hdr.lh_prefix_len; n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_leafs_with_2n_pointers[n]++; n = l->l_phys->l_hdr.lh_nentries/5; n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_blocks_with_n5_entries[n]++; n = ((1<l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / (1<zs_blocks_n_tenths_full[n]++; for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { int nentries = 0; int chunk = l->l_phys->l_hash[i]; while (chunk != CHAIN_END) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, chunk); n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_entries_using_n_chunks[n]++; chunk = le->le_next; nentries++; } n = nentries; n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_buckets_with_n_entries[n]++; } } diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 178ab02719a5..d5b97dac91bd 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -1,1500 +1,1500 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #endif static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); uint64_t zap_getflags(zap_t *zap) { if (zap->zap_ismicro) return (0); return (zap->zap_u.zap_fat.zap_phys->zap_flags); } int zap_hashbits(zap_t *zap) { if (zap_getflags(zap) & ZAP_FLAG_HASH64) return (48); else return (28); } uint32_t zap_maxcd(zap_t *zap) { if (zap_getflags(zap) & ZAP_FLAG_HASH64) return ((1<<16)-1); else return (-1U); } static uint64_t zap_hash(zap_name_t *zn) { zap_t *zap = zn->zn_zap; uint64_t h = 0; if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); h = *(uint64_t *)zn->zn_key_orig; } else { h = zap->zap_salt; ASSERT(h != 0); ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { int i; const uint64_t *wp = zn->zn_key_norm; ASSERT(zn->zn_key_intlen == 8); for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { int j; uint64_t word = *wp; for (j = 0; j < zn->zn_key_intlen; j++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ word) & 0xFF]; word >>= NBBY; } } } else { int i, len; const uint8_t *cp = zn->zn_key_norm; /* * We previously stored the terminating null on * disk, but didn't hash it, so we need to * continue to not hash it. (The * zn_key_*_numints includes the terminating * null for non-binary keys.) */ len = zn->zn_key_norm_numints - 1; ASSERT(zn->zn_key_intlen == 1); for (i = 0; i < len; cp++, i++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ *cp) & 0xFF]; } } } /* * Don't use all 64 bits, since we need some in the cookie for * the collision differentiator. We MUST use the high bits, * since those are the ones that we first pay attention to when * chosing the bucket. */ h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); return (h); } static int zap_normalize(zap_t *zap, const char *name, char *namenorm) { size_t inlen, outlen; int err; ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); inlen = strlen(name) + 1; outlen = ZAP_MAXNAMELEN; err = 0; (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); return (err); } boolean_t zap_match(zap_name_t *zn, const char *matchname) { ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); if (zn->zn_matchtype == MT_FIRST) { char norm[ZAP_MAXNAMELEN]; if (zap_normalize(zn->zn_zap, matchname, norm) != 0) return (B_FALSE); return (strcmp(zn->zn_key_norm, norm) == 0); } else { /* MT_BEST or MT_EXACT */ return (strcmp(zn->zn_key_orig, matchname) == 0); } } void zap_name_free(zap_name_t *zn) { kmem_free(zn, sizeof (zap_name_t)); } zap_name_t * zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) { zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = key; zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; zn->zn_matchtype = mt; if (zap->zap_normflags) { if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { zap_name_free(zn); return (NULL); } zn->zn_key_norm = zn->zn_normbuf; zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } else { if (mt != MT_EXACT) { zap_name_free(zn); return (NULL); } zn->zn_key_norm = zn->zn_key_orig; zn->zn_key_norm_numints = zn->zn_key_orig_numints; } zn->zn_hash = zap_hash(zn); return (zn); } zap_name_t * zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); ASSERT(zap->zap_normflags == 0); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = zn->zn_key_norm = key; zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; zn->zn_matchtype = MT_EXACT; zn->zn_hash = zap_hash(zn); return (zn); } static void mzap_byteswap(mzap_phys_t *buf, size_t size) { int i, max; buf->mz_block_type = BSWAP_64(buf->mz_block_type); buf->mz_salt = BSWAP_64(buf->mz_salt); buf->mz_normflags = BSWAP_64(buf->mz_normflags); max = (size / MZAP_ENT_LEN) - 1; for (i = 0; i < max; i++) { buf->mz_chunk[i].mze_value = BSWAP_64(buf->mz_chunk[i].mze_value); buf->mz_chunk[i].mze_cd = BSWAP_32(buf->mz_chunk[i].mze_cd); } } void zap_byteswap(void *buf, size_t size) { uint64_t block_type; block_type = *(uint64_t *)buf; if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { /* ASSERT(magic == ZAP_LEAF_MAGIC); */ mzap_byteswap(buf, size); } else { fzap_byteswap(buf, size); } } static int mze_compare(const void *arg1, const void *arg2) { const mzap_ent_t *mze1 = arg1; const mzap_ent_t *mze2 = arg2; if (mze1->mze_hash > mze2->mze_hash) return (+1); if (mze1->mze_hash < mze2->mze_hash) return (-1); if (mze1->mze_cd > mze2->mze_cd) return (+1); if (mze1->mze_cd < mze2->mze_cd) return (-1); return (0); } static void mze_insert(zap_t *zap, int chunkid, uint64_t hash) { mzap_ent_t *mze; ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); mze = kmem_alloc(sizeof (mzap_ent_t), KM_PUSHPAGE); mze->mze_chunkid = chunkid; mze->mze_hash = hash; mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); avl_add(&zap->zap_m.zap_avl, mze); } static mzap_ent_t * mze_find(zap_name_t *zn) { mzap_ent_t mze_tofind; mzap_ent_t *mze; avl_index_t idx; avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; ASSERT(zn->zn_zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); mze_tofind.mze_hash = zn->zn_hash; mze_tofind.mze_cd = 0; again: mze = avl_find(avl, &mze_tofind, &idx); if (mze == NULL) mze = avl_nearest(avl, idx, AVL_AFTER); for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) return (mze); } if (zn->zn_matchtype == MT_BEST) { zn->zn_matchtype = MT_FIRST; goto again; } return (NULL); } static uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash) { mzap_ent_t mze_tofind; mzap_ent_t *mze; avl_index_t idx; avl_tree_t *avl = &zap->zap_m.zap_avl; uint32_t cd; ASSERT(zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; cd = 0; for (mze = avl_find(avl, &mze_tofind, &idx); mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { if (mze->mze_cd != cd) break; cd++; } return (cd); } static void mze_remove(zap_t *zap, mzap_ent_t *mze) { ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); avl_remove(&zap->zap_m.zap_avl, mze); kmem_free(mze, sizeof (mzap_ent_t)); } static void mze_destroy(zap_t *zap) { mzap_ent_t *mze; void *avlcookie = NULL; while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) kmem_free(mze, sizeof (mzap_ent_t)); avl_destroy(&zap->zap_m.zap_avl); } static zap_t * mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) { zap_t *winner; zap_t *zap; int i; ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); zap = kmem_zalloc(sizeof (zap_t), KM_PUSHPAGE); rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); rw_enter(&zap->zap_rwlock, RW_WRITER); zap->zap_objset = os; zap->zap_object = obj; zap->zap_dbuf = db; if (*(uint64_t *)db->db_data != ZBT_MICRO) { mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; } else { zap->zap_ismicro = TRUE; } /* * Make sure that zap_ismicro is set before we let others see * it, because zap_lockdir() checks zap_ismicro without the lock * held. */ winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); if (winner != NULL) { rw_exit(&zap->zap_rwlock); rw_destroy(&zap->zap_rwlock); if (!zap->zap_ismicro) mutex_destroy(&zap->zap_f.zap_num_entries_mtx); kmem_free(zap, sizeof (zap_t)); return (winner); } if (zap->zap_ismicro) { zap->zap_salt = zap->zap_m.zap_phys->mz_salt; zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; avl_create(&zap->zap_m.zap_avl, mze_compare, sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; if (mze->mze_name[0]) { zap_name_t *zn; zap->zap_m.zap_num_entries++; zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); mze_insert(zap, i, zn->zn_hash); zap_name_free(zn); } } } else { zap->zap_salt = zap->zap_f.zap_phys->zap_salt; zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; ASSERT3U(sizeof (struct zap_leaf_header), ==, 2*ZAP_LEAF_CHUNKSIZE); /* * The embedded pointer table should not overlap the * other members. */ ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, &zap->zap_f.zap_phys->zap_salt); /* * The embedded pointer table should end at the end of * the block */ ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 1<zap_f.zap_phys, ==, zap->zap_dbuf->db_size); } rw_exit(&zap->zap_rwlock); return (zap); } int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) { zap_t *zap; dmu_buf_t *db; krw_t lt; int err; *zapp = NULL; err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); if (err) return (err); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif zap = dmu_buf_get_user(db); if (zap == NULL) zap = mzap_open(os, obj, db); /* * We're checking zap_ismicro without the lock held, in order to * tell what type of lock we want. Once we have some sort of * lock, see if it really is the right type. In practice this * can only be different if it was upgraded from micro to fat, * and micro wanted WRITER but fat only needs READER. */ lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; rw_enter(&zap->zap_rwlock, lt); if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { /* it was upgraded, now we only need reader */ ASSERT(lt == RW_WRITER); ASSERT(RW_READER == (!zap->zap_ismicro && fatreader) ? RW_READER : lti); rw_downgrade(&zap->zap_rwlock); lt = RW_READER; } zap->zap_objset = os; if (lt == RW_WRITER) dmu_buf_will_dirty(db, tx); ASSERT3P(zap->zap_dbuf, ==, db); ASSERT(!zap->zap_ismicro || zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); if (zap->zap_ismicro && tx && adding && zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; if (newsz > MZAP_MAX_BLKSZ) { dprintf("upgrading obj %llu: num_entries=%u\n", obj, zap->zap_m.zap_num_entries); *zapp = zap; return (mzap_upgrade(zapp, tx, 0)); } err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); ASSERT3U(err, ==, 0); zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; } *zapp = zap; return (0); } void zap_unlockdir(zap_t *zap) { rw_exit(&zap->zap_rwlock); dmu_buf_rele(zap->zap_dbuf, NULL); } static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) { mzap_phys_t *mzp; int i, sz, nchunks; int err = 0; zap_t *zap = *zapp; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); sz = zap->zap_dbuf->db_size; mzp = kmem_alloc(sz, KM_PUSHPAGE | KM_NODEBUG); bcopy(zap->zap_dbuf->db_data, mzp, sz); nchunks = zap->zap_m.zap_num_chunks; if (!flags) { err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 1ULL << fzap_default_block_shift, 0, tx); if (err) { kmem_free(mzp, sz); return (err); } } dprintf("upgrading obj=%llu with %u chunks\n", zap->zap_object, nchunks); /* XXX destroy the avl later, so we can use the stored hash value */ mze_destroy(zap); fzap_upgrade(zap, tx, flags); for (i = 0; i < nchunks; i++) { mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; zap_name_t *zn; if (mze->mze_name[0] == 0) continue; dprintf("adding %s=%llu\n", mze->mze_name, mze->mze_value); zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); zap = zn->zn_zap; /* fzap_add_cd() may change zap */ zap_name_free(zn); if (err) break; } kmem_free(mzp, sz); *zapp = zap; return (err); } static void mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, dmu_tx_t *tx) { dmu_buf_t *db; mzap_phys_t *zp; VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif dmu_buf_will_dirty(db, tx); zp = db->db_data; zp->mz_block_type = ZBT_MICRO; zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; zp->mz_normflags = normflags; dmu_buf_rele(db, FTAG); if (flags != 0) { zap_t *zap; /* Only fat zap supports flags; upgrade immediately. */ VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, B_FALSE, B_FALSE, &zap)); VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); zap_unlockdir(zap); } } int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (zap_create_claim_norm(os, obj, 0, ot, bonustype, bonuslen, tx)); } int zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { int err; err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); if (err != 0) return (err); mzap_create_impl(os, obj, normflags, 0, tx); return (0); } uint64_t zap_create(objset_t *os, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); } uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); mzap_create_impl(os, obj, normflags, 0, tx); return (obj); } uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && leaf_blockshift <= SPA_MAXBLOCKSHIFT && indirect_blockshift >= SPA_MINBLOCKSHIFT && indirect_blockshift <= SPA_MAXBLOCKSHIFT); VERIFY(dmu_object_set_blocksize(os, obj, 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); mzap_create_impl(os, obj, normflags, flags, tx); return (obj); } int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) { /* * dmu_object_free will free the object number and free the * data. Freeing the data will cause our pageout function to be * called, which will destroy our data (zap_leaf_t's and zap_t). */ return (dmu_object_free(os, zapobj, tx)); } _NOTE(ARGSUSED(0)) void zap_evict(dmu_buf_t *db, void *vzap) { zap_t *zap = vzap; rw_destroy(&zap->zap_rwlock); if (zap->zap_ismicro) mze_destroy(zap); else mutex_destroy(&zap->zap_f.zap_num_entries_mtx); kmem_free(zap, sizeof (zap_t)); } int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) { zap_t *zap; int err; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); if (!zap->zap_ismicro) { err = fzap_count(zap, count); } else { *count = zap->zap_m.zap_num_entries; } zap_unlockdir(zap); return (err); } /* * zn may be NULL; if not specified, it will be computed if needed. * See also the comment above zap_entry_normalization_conflict(). */ static boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) { mzap_ent_t *other; int direction = AVL_BEFORE; boolean_t allocdzn = B_FALSE; if (zap->zap_normflags == 0) return (B_FALSE); again: for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); other && other->mze_hash == mze->mze_hash; other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { if (zn == NULL) { zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, MT_FIRST); allocdzn = B_TRUE; } if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); } } if (direction == AVL_BEFORE) { direction = AVL_AFTER; goto again; } if (allocdzn) zap_name_free(zn); return (B_FALSE); } /* * Routines for manipulating attributes. */ int zap_lookup(objset_t *os, uint64_t zapobj, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf) { return (zap_lookup_norm(os, zapobj, name, integer_size, num_integers, buf, MT_EXACT, NULL, 0, NULL)); } int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf, matchtype_t mt, char *realname, int rn_len, boolean_t *ncp) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_lookup(zn, integer_size, num_integers, buf, realname, rn_len, ncp); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { if (num_integers < 1) { err = EOVERFLOW; } else if (integer_size != 8) { err = EINVAL; } else { *(uint64_t *)buf = MZE_PHYS(zap, mze)->mze_value; (void) strlcpy(realname, MZE_PHYS(zap, mze)->mze_name, rn_len); if (ncp) { *ncp = mzap_normalization_conflict(zap, zn, mze); } } } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } fzap_prefetch(zn); zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_lookup(zn, integer_size, num_integers, buf, NULL, 0, NULL); zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_contains(objset_t *os, uint64_t zapobj, const char *name) { int err = (zap_lookup_norm(os, zapobj, name, 0, 0, NULL, MT_EXACT, NULL, 0, NULL)); if (err == EOVERFLOW || err == EINVAL) err = 0; /* found, but skipped reading the value */ return (err); } int zap_length(objset_t *os, uint64_t zapobj, const char *name, uint64_t *integer_size, uint64_t *num_integers) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_length(zn, integer_size, num_integers); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { if (integer_size) *integer_size = 8; if (num_integers) *num_integers = 1; } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t *integer_size, uint64_t *num_integers) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_length(zn, integer_size, num_integers); zap_name_free(zn); zap_unlockdir(zap); return (err); } static void mzap_addent(zap_name_t *zn, uint64_t value) { int i; zap_t *zap = zn->zn_zap; int start = zap->zap_m.zap_alloc_next; uint32_t cd; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); #ifdef ZFS_DEBUG for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { ASSERTV(mzap_ent_phys_t *mze=&zap->zap_m.zap_phys->mz_chunk[i]); ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); } #endif cd = mze_find_unused_cd(zap, zn->zn_hash); /* given the limited size of the microzap, this can't happen */ ASSERT(cd < zap_maxcd(zap)); again: for (i = start; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; if (mze->mze_name[0] == 0) { mze->mze_value = value; mze->mze_cd = cd; (void) strcpy(mze->mze_name, zn->zn_key_orig); zap->zap_m.zap_num_entries++; zap->zap_m.zap_alloc_next = i+1; if (zap->zap_m.zap_alloc_next == zap->zap_m.zap_num_chunks) zap->zap_m.zap_alloc_next = 0; mze_insert(zap, i, zn->zn_hash); return; } } if (start != 0) { start = 0; goto again; } ASSERT(!"out of entries!"); } int zap_add(objset_t *os, uint64_t zapobj, const char *key, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; int err; mzap_ent_t *mze; const uint64_t *intval = val; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc(zap, key, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else if (integer_size != 8 || num_integers != 1 || strlen(key) >= MZAP_NAME_LEN) { err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else { mze = mze_find(zn); if (mze != NULL) { err = EEXIST; } else { mzap_addent(zn, *intval); } } ASSERT(zap == zn->zn_zap); zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_add() failed */ zap_unlockdir(zap); return (err); } int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_add() failed */ zap_unlockdir(zap); return (err); } int zap_update(objset_t *os, uint64_t zapobj, const char *name, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; mzap_ent_t *mze; const uint64_t *intval = val; zap_name_t *zn; int err; #ifdef ZFS_DEBUG uint64_t oldval; /* * If there is an old value, it shouldn't change across the * lockdir (eg, due to bprewrite's xlation). */ if (integer_size == 8 && num_integers == 1) (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); #endif err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ } else if (integer_size != 8 || num_integers != 1 || strlen(name) >= MZAP_NAME_LEN) { dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", zapobj, integer_size, num_integers, name); err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ } else { mze = mze_find(zn); if (mze != NULL) { ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); MZE_PHYS(zap, mze)->mze_value = *intval; } else { mzap_addent(zn, *intval); } } ASSERT(zap == zn->zn_zap); zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ zap_unlockdir(zap); return (err); } int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; zap_name_t *zn; int err; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ zap_name_free(zn); if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ zap_unlockdir(zap); return (err); } int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) { return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); } int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, matchtype_t mt, dmu_tx_t *tx) { zap_t *zap; int err; mzap_ent_t *mze; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } if (!zap->zap_ismicro) { err = fzap_remove(zn, tx); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; } else { zap->zap_m.zap_num_entries--; bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], sizeof (mzap_ent_phys_t)); mze_remove(zap, mze); } } zap_name_free(zn); zap_unlockdir(zap); return (err); } int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, dmu_tx_t *tx) { zap_t *zap; int err; zap_name_t *zn; err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); if (err) return (err); zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); } err = fzap_remove(zn, tx); zap_name_free(zn); zap_unlockdir(zap); return (err); } /* * Routines for iterating over the attributes. */ void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, uint64_t serialized) { zc->zc_objset = os; zc->zc_zap = NULL; zc->zc_leaf = NULL; zc->zc_zapobj = zapobj; zc->zc_serialized = serialized; zc->zc_hash = 0; zc->zc_cd = 0; } void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { zap_cursor_init_serialized(zc, os, zapobj, 0); } void zap_cursor_fini(zap_cursor_t *zc) { if (zc->zc_zap) { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); zap_unlockdir(zc->zc_zap); zc->zc_zap = NULL; } if (zc->zc_leaf) { rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); zap_put_leaf(zc->zc_leaf); zc->zc_leaf = NULL; } zc->zc_objset = NULL; } uint64_t zap_cursor_serialize(zap_cursor_t *zc) { if (zc->zc_hash == -1ULL) return (-1ULL); if (zc->zc_zap == NULL) return (zc->zc_serialized); ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); /* * We want to keep the high 32 bits of the cursor zero if we can, so * that 32-bit programs can access this. So usually use a small * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits * of the cursor. * * [ collision differentiator | zap_hashbits()-bit hash value ] */ return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); } int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) { int err; avl_index_t idx; mzap_ent_t mze_tofind; mzap_ent_t *mze; if (zc->zc_hash == -1ULL) return (ENOENT); if (zc->zc_zap == NULL) { int hb; err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, RW_READER, TRUE, FALSE, &zc->zc_zap); if (err) return (err); /* * To support zap_cursor_init_serialized, advance, retrieve, * we must add to the existing zc_cd, which may already * be 1 due to the zap_cursor_advance. */ ASSERT(zc->zc_hash == 0); hb = zap_hashbits(zc->zc_zap); zc->zc_hash = zc->zc_serialized << (64 - hb); zc->zc_cd += zc->zc_serialized >> hb; if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ zc->zc_cd = 0; } else { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); } if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { err = ENOENT; mze_tofind.mze_hash = zc->zc_hash; mze_tofind.mze_cd = zc->zc_cd; mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); if (mze == NULL) { mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, idx, AVL_AFTER); } if (mze) { mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); za->za_normalization_conflict = mzap_normalization_conflict(zc->zc_zap, NULL, mze); za->za_integer_length = 8; za->za_num_integers = 1; za->za_first_integer = mzep->mze_value; (void) strcpy(za->za_name, mzep->mze_name); zc->zc_hash = mze->mze_hash; zc->zc_cd = mze->mze_cd; err = 0; } else { zc->zc_hash = -1ULL; } } rw_exit(&zc->zc_zap->zap_rwlock); return (err); } void zap_cursor_advance(zap_cursor_t *zc) { if (zc->zc_hash == -1ULL) return; zc->zc_cd++; } int zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) { int err = 0; mzap_ent_t *mze; zap_name_t *zn; if (zc->zc_zap == NULL) { err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, RW_READER, TRUE, FALSE, &zc->zc_zap); if (err) return (err); } else { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); } zn = zap_name_alloc(zc->zc_zap, name, mt); if (zn == NULL) { rw_exit(&zc->zc_zap->zap_rwlock); return (ENOTSUP); } if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_move_to_key(zc, zn); } else { mze = mze_find(zn); if (mze == NULL) { err = ENOENT; goto out; } zc->zc_hash = mze->mze_hash; zc->zc_cd = mze->mze_cd; } out: zap_name_free(zn); rw_exit(&zc->zc_zap->zap_rwlock); return (err); } int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) { int err; zap_t *zap; err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); bzero(zs, sizeof (zap_stats_t)); if (zap->zap_ismicro) { zs->zs_blocksize = zap->zap_dbuf->db_size; zs->zs_num_entries = zap->zap_m.zap_num_entries; zs->zs_num_blocks = 1; } else { fzap_get_stats(zap, zs); } zap_unlockdir(zap); return (0); } int zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, uint64_t *towrite, uint64_t *tooverwrite) { zap_t *zap; int err = 0; /* * Since, we don't have a name, we cannot figure out which blocks will * be affected in this operation. So, account for the worst case : * - 3 blocks overwritten: target leaf, ptrtbl block, header block * - 4 new blocks written if adding: * - 2 blocks for possibly split leaves, * - 2 grown ptrtbl blocks * * This also accomodates the case where an add operation to a fairly * large microzap results in a promotion to fatzap. */ if (name == NULL) { *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; return (err); } /* * We lock the zap with adding == FALSE. Because, if we pass * the actual value of add, it could trigger a mzap_upgrade(). * At present we are just evaluating the possibility of this operation * and hence we donot want to trigger an upgrade. */ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); if (err) return (err); if (!zap->zap_ismicro) { zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); if (zn) { err = fzap_count_write(zn, add, towrite, tooverwrite); zap_name_free(zn); } else { /* * We treat this case as similar to (name == NULL) */ *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; } } else { /* * We are here if (name != NULL) and this is a micro-zap. * We account for the header block depending on whether it * is freeable. * * Incase of an add-operation it is hard to find out * if this add will promote this microzap to fatzap. * Hence, we consider the worst case and account for the * blocks assuming this microzap would be promoted to a * fatzap. * * 1 block overwritten : header block * 4 new blocks written : 2 new split leaf, 2 grown * ptrtbl blocks */ if (dmu_buf_freeable(zap->zap_dbuf)) *tooverwrite += SPA_MAXBLOCKSIZE; else *towrite += SPA_MAXBLOCKSIZE; if (add) { *towrite += 4 * SPA_MAXBLOCKSIZE; } } zap_unlockdir(zap); return (err); } #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(zap_create); EXPORT_SYMBOL(zap_create_norm); EXPORT_SYMBOL(zap_create_flags); EXPORT_SYMBOL(zap_create_claim); EXPORT_SYMBOL(zap_create_claim_norm); EXPORT_SYMBOL(zap_destroy); EXPORT_SYMBOL(zap_lookup); EXPORT_SYMBOL(zap_lookup_norm); EXPORT_SYMBOL(zap_lookup_uint64); EXPORT_SYMBOL(zap_contains); EXPORT_SYMBOL(zap_prefetch_uint64); EXPORT_SYMBOL(zap_count_write); EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add_uint64); EXPORT_SYMBOL(zap_update); EXPORT_SYMBOL(zap_update_uint64); EXPORT_SYMBOL(zap_length); EXPORT_SYMBOL(zap_length_uint64); EXPORT_SYMBOL(zap_remove); EXPORT_SYMBOL(zap_remove_norm); EXPORT_SYMBOL(zap_remove_uint64); EXPORT_SYMBOL(zap_count); EXPORT_SYMBOL(zap_value_search); EXPORT_SYMBOL(zap_join); EXPORT_SYMBOL(zap_join_increment); EXPORT_SYMBOL(zap_add_int); EXPORT_SYMBOL(zap_remove_int); EXPORT_SYMBOL(zap_lookup_int); EXPORT_SYMBOL(zap_increment_int); EXPORT_SYMBOL(zap_add_int_key); EXPORT_SYMBOL(zap_lookup_int_key); EXPORT_SYMBOL(zap_increment); EXPORT_SYMBOL(zap_cursor_init); EXPORT_SYMBOL(zap_cursor_fini); EXPORT_SYMBOL(zap_cursor_retrieve); EXPORT_SYMBOL(zap_cursor_advance); EXPORT_SYMBOL(zap_cursor_serialize); EXPORT_SYMBOL(zap_cursor_move_to_key); EXPORT_SYMBOL(zap_cursor_init_serialized); EXPORT_SYMBOL(zap_get_stats); #endif