Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: head/sys/cddl/contrib/opensolaris/common/unicode/u8_textprep.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/common/unicode/u8_textprep.c (revision 366779)
+++ head/sys/cddl/contrib/opensolaris/common/unicode/u8_textprep.c (nonexistent)
@@ -1,2130 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-
-
-/*
- * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458).
- *
- * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F),
- * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also
- * the section 3C man pages.
- * Interface stability: Committed.
- */
-
-#include <sys/types.h>
-#ifdef _KERNEL
-#include <sys/param.h>
-#include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/debug.h>
-#include <sys/kmem.h>
-#include <sys/sunddi.h>
-#else
-#include <strings.h>
-#endif /* _KERNEL */
-#include <sys/byteorder.h>
-#include <sys/errno.h>
-#include <sys/u8_textprep.h>
-#include <sys/u8_textprep_data.h>
-
-
-/* The maximum possible number of bytes in a UTF-8 character. */
-#define U8_MB_CUR_MAX (4)
-
-/*
- * The maximum number of bytes needed for a UTF-8 character to cover
- * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2.
- */
-#define U8_MAX_BYTES_UCS2 (3)
-
-/* The maximum possible number of bytes in a Stream-Safe Text. */
-#define U8_STREAM_SAFE_TEXT_MAX (128)
-
-/*
- * The maximum number of characters in a combining/conjoining sequence and
- * the actual upperbound limit of a combining/conjoining sequence.
- */
-#define U8_MAX_CHARS_A_SEQ (32)
-#define U8_UPPER_LIMIT_IN_A_SEQ (31)
-
-/* The combining class value for Starter. */
-#define U8_COMBINING_CLASS_STARTER (0)
-
-/*
- * Some Hangul related macros at below.
- *
- * The first and the last of Hangul syllables, Hangul Jamo Leading consonants,
- * Vowels, and optional Trailing consonants in Unicode scalar values.
- *
- * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not
- * the actual U+11A8. This is due to that the trailing consonant is optional
- * and thus we are doing a pre-calculation of subtracting one.
- *
- * Each of 19 modern leading consonants has total 588 possible syllables since
- * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for
- * no trailing consonant case, i.e., 21 x 28 = 588.
- *
- * We also have bunch of Hangul related macros at below. Please bear in mind
- * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is
- * a Hangul Jamo or not but the value does not guarantee that it is a Hangul
- * Jamo; it just guarantee that it will be most likely.
- */
-#define U8_HANGUL_SYL_FIRST (0xAC00U)
-#define U8_HANGUL_SYL_LAST (0xD7A3U)
-
-#define U8_HANGUL_JAMO_L_FIRST (0x1100U)
-#define U8_HANGUL_JAMO_L_LAST (0x1112U)
-#define U8_HANGUL_JAMO_V_FIRST (0x1161U)
-#define U8_HANGUL_JAMO_V_LAST (0x1175U)
-#define U8_HANGUL_JAMO_T_FIRST (0x11A7U)
-#define U8_HANGUL_JAMO_T_LAST (0x11C2U)
-
-#define U8_HANGUL_V_COUNT (21)
-#define U8_HANGUL_VT_COUNT (588)
-#define U8_HANGUL_T_COUNT (28)
-
-#define U8_HANGUL_JAMO_1ST_BYTE (0xE1U)
-
-#define U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \
- (s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \
- (s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \
- (s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU));
-
-#define U8_HANGUL_JAMO_L(u) \
- ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST)
-
-#define U8_HANGUL_JAMO_V(u) \
- ((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST)
-
-#define U8_HANGUL_JAMO_T(u) \
- ((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
-
-#define U8_HANGUL_JAMO(u) \
- ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
-
-#define U8_HANGUL_SYLLABLE(u) \
- ((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST)
-
-#define U8_HANGUL_COMPOSABLE_L_V(s, u) \
- ((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u)))
-
-#define U8_HANGUL_COMPOSABLE_LV_T(s, u) \
- ((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u)))
-
-/* The types of decomposition mappings. */
-#define U8_DECOMP_BOTH (0xF5U)
-#define U8_DECOMP_CANONICAL (0xF6U)
-
-/* The indicator for 16-bit table. */
-#define U8_16BIT_TABLE_INDICATOR (0x8000U)
-
-/* The following are some convenience macros. */
-#define U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \
- (u) = ((((uint32_t)(b1) & 0x0F) << 12) | \
- (((uint32_t)(b2) & 0x3F) << 6) | \
- ((uint32_t)(b3) & 0x3F));
-#define U8_SIMPLE_SWAP(a, b, t) \
- (t) = (a); \
- (a) = (b); \
- (b) = (t);
-
-#define U8_ASCII_TOUPPER(c) \
- (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c))
-
-#define U8_ASCII_TOLOWER(c) \
- (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c))
-
-#define U8_ISASCII(c) (((uchar_t)(c)) < 0x80U)
-/*
- * The following macro assumes that the two characters that are to be
- * swapped are adjacent to each other and 'a' comes before 'b'.
- *
- * If the assumptions are not met, then, the macro will fail.
- */
-#define U8_SWAP_COMB_MARKS(a, b) \
- for (k = 0; k < disp[(a)]; k++) \
- u8t[k] = u8s[start[(a)] + k]; \
- for (k = 0; k < disp[(b)]; k++) \
- u8s[start[(a)] + k] = u8s[start[(b)] + k]; \
- start[(b)] = start[(a)] + disp[(b)]; \
- for (k = 0; k < disp[(a)]; k++) \
- u8s[start[(b)] + k] = u8t[k]; \
- U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \
- U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc);
-
-/* The possible states during normalization. */
-typedef enum {
- U8_STATE_START = 0,
- U8_STATE_HANGUL_L = 1,
- U8_STATE_HANGUL_LV = 2,
- U8_STATE_HANGUL_LVT = 3,
- U8_STATE_HANGUL_V = 4,
- U8_STATE_HANGUL_T = 5,
- U8_STATE_COMBINING_MARK = 6
-} u8_normalization_states_t;
-
-/*
- * The three vectors at below are used to check bytes of a given UTF-8
- * character are valid and not containing any malformed byte values.
- *
- * We used to have a quite relaxed UTF-8 binary representation but then there
- * was some security related issues and so the Unicode Consortium defined
- * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it
- * one more time at the Unicode 3.2. The following three tables are based on
- * that.
- */
-
-#define U8_ILLEGAL_NEXT_BYTE_COMMON(c) ((c) < 0x80 || (c) > 0xBF)
-
-#define I_ U8_ILLEGAL_CHAR
-#define O_ U8_OUT_OF_RANGE_CHAR
-
-const int8_t u8_number_of_bytes[0x100] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-/* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */
- I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
-
-/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */
- I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
-
-/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */
- I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
-
-/* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */
- I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
-
-/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */
- I_, I_, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-
-/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-
-/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-
-/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */
- 4, 4, 4, 4, 4, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_,
-};
-
-#undef I_
-#undef O_
-
-const uint8_t u8_valid_min_2nd_byte[0x100] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-/* C0 C1 C2 C3 C4 C5 C6 C7 */
- 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* C8 C9 CA CB CC CD CE CF */
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* D0 D1 D2 D3 D4 D5 D6 D7 */
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* D8 D9 DA DB DC DD DE DF */
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* E0 E1 E2 E3 E4 E5 E6 E7 */
- 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* E8 E9 EA EB EC ED EE EF */
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-/* F0 F1 F2 F3 F4 F5 F6 F7 */
- 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-const uint8_t u8_valid_max_2nd_byte[0x100] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-/* C0 C1 C2 C3 C4 C5 C6 C7 */
- 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
-/* C8 C9 CA CB CC CD CE CF */
- 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
-/* D0 D1 D2 D3 D4 D5 D6 D7 */
- 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
-/* D8 D9 DA DB DC DD DE DF */
- 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
-/* E0 E1 E2 E3 E4 E5 E6 E7 */
- 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
-/* E8 E9 EA EB EC ED EE EF */
- 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
-/* F0 F1 F2 F3 F4 F5 F6 F7 */
- 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-
-/*
- * The u8_validate() validates on the given UTF-8 character string and
- * calculate the byte length. It is quite similar to mblen(3C) except that
- * this will validate against the list of characters if required and
- * specific to UTF-8 and Unicode.
- */
-int
-u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
-{
- uchar_t *ib;
- uchar_t *ibtail;
- uchar_t **p;
- uchar_t *s1;
- uchar_t *s2;
- uchar_t f;
- int sz;
- size_t i;
- int ret_val;
- boolean_t second;
- boolean_t no_need_to_validate_entire;
- boolean_t check_additional;
- boolean_t validate_ucs2_range_only;
-
- if (! u8str)
- return (0);
-
- ib = (uchar_t *)u8str;
- ibtail = ib + n;
-
- ret_val = 0;
-
- no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE);
- check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL;
- validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE;
-
- while (ib < ibtail) {
- /*
- * The first byte of a UTF-8 character tells how many
- * bytes will follow for the character. If the first byte
- * is an illegal byte value or out of range value, we just
- * return -1 with an appropriate error number.
- */
- sz = u8_number_of_bytes[*ib];
- if (sz == U8_ILLEGAL_CHAR) {
- *errnum = EILSEQ;
- return (-1);
- }
-
- if (sz == U8_OUT_OF_RANGE_CHAR ||
- (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) {
- *errnum = ERANGE;
- return (-1);
- }
-
- /*
- * If we don't have enough bytes to check on, that's also
- * an error. As you can see, we give illegal byte sequence
- * checking higher priority then EINVAL cases.
- */
- if ((ibtail - ib) < sz) {
- *errnum = EINVAL;
- return (-1);
- }
-
- if (sz == 1) {
- ib++;
- ret_val++;
- } else {
- /*
- * Check on the multi-byte UTF-8 character. For more
- * details on this, see comment added for the used
- * data structures at the beginning of the file.
- */
- f = *ib++;
- ret_val++;
- second = B_TRUE;
- for (i = 1; i < sz; i++) {
- if (second) {
- if (*ib < u8_valid_min_2nd_byte[f] ||
- *ib > u8_valid_max_2nd_byte[f]) {
- *errnum = EILSEQ;
- return (-1);
- }
- second = B_FALSE;
- } else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) {
- *errnum = EILSEQ;
- return (-1);
- }
- ib++;
- ret_val++;
- }
- }
-
- if (check_additional) {
- for (p = (uchar_t **)list, i = 0; p[i]; i++) {
- s1 = ib - sz;
- s2 = p[i];
- while (s1 < ib) {
- if (*s1 != *s2 || *s2 == '\0')
- break;
- s1++;
- s2++;
- }
-
- if (s1 >= ib && *s2 == '\0') {
- *errnum = EBADF;
- return (-1);
- }
- }
- }
-
- if (no_need_to_validate_entire)
- break;
- }
-
- return (ret_val);
-}
-
-/*
- * The do_case_conv() looks at the mapping tables and returns found
- * bytes if any. If not found, the input bytes are returned. The function
- * always terminate the return bytes with a null character assuming that
- * there are plenty of room to do so.
- *
- * The case conversions are simple case conversions mapping a character to
- * another character as specified in the Unicode data. The byte size of
- * the mapped character could be different from that of the input character.
- *
- * The return value is the byte length of the returned character excluding
- * the terminating null byte.
- */
-static size_t
-do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
-{
- size_t i;
- uint16_t b1 = 0;
- uint16_t b2 = 0;
- uint16_t b3 = 0;
- uint16_t b3_tbl;
- uint16_t b3_base;
- uint16_t b4 = 0;
- size_t start_id;
- size_t end_id;
-
- /*
- * At this point, the only possible values for sz are 2, 3, and 4.
- * The u8s should point to a vector that is well beyond the size of
- * 5 bytes.
- */
- if (sz == 2) {
- b3 = u8s[0] = s[0];
- b4 = u8s[1] = s[1];
- } else if (sz == 3) {
- b2 = u8s[0] = s[0];
- b3 = u8s[1] = s[1];
- b4 = u8s[2] = s[2];
- } else if (sz == 4) {
- b1 = u8s[0] = s[0];
- b2 = u8s[1] = s[1];
- b3 = u8s[2] = s[2];
- b4 = u8s[3] = s[3];
- } else {
- /* This is not possible but just in case as a fallback. */
- if (is_it_toupper)
- *u8s = U8_ASCII_TOUPPER(*s);
- else
- *u8s = U8_ASCII_TOLOWER(*s);
- u8s[1] = '\0';
-
- return (1);
- }
- u8s[sz] = '\0';
-
- /*
- * Let's find out if we have a corresponding character.
- */
- b1 = u8_common_b1_tbl[uv][b1];
- if (b1 == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- b2 = u8_case_common_b2_tbl[uv][b1][b2];
- if (b2 == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- if (is_it_toupper) {
- b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id;
- if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4];
- end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1];
-
- /* Either there is no match or an error at the table. */
- if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
- return ((size_t)sz);
-
- b3_base = u8_toupper_b3_tbl[uv][b2][b3].base;
-
- for (i = 0; start_id < end_id; start_id++)
- u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id];
- } else {
- b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id;
- if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4];
- end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1];
-
- if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
- return ((size_t)sz);
-
- b3_base = u8_tolower_b3_tbl[uv][b2][b3].base;
-
- for (i = 0; start_id < end_id; start_id++)
- u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id];
- }
-
- /*
- * If i is still zero, that means there is no corresponding character.
- */
- if (i == 0)
- return ((size_t)sz);
-
- u8s[i] = '\0';
-
- return (i);
-}
-
-/*
- * The do_case_compare() function compares the two input strings, s1 and s2,
- * one character at a time doing case conversions if applicable and return
- * the comparison result as like strcmp().
- *
- * Since, in empirical sense, most of text data are 7-bit ASCII characters,
- * we treat the 7-bit ASCII characters as a special case trying to yield
- * faster processing time.
- */
-static int
-do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
- size_t n2, boolean_t is_it_toupper, int *errnum)
-{
- int f;
- int sz1;
- int sz2;
- size_t j;
- size_t i1;
- size_t i2;
- uchar_t u8s1[U8_MB_CUR_MAX + 1];
- uchar_t u8s2[U8_MB_CUR_MAX + 1];
-
- i1 = i2 = 0;
- while (i1 < n1 && i2 < n2) {
- /*
- * Find out what would be the byte length for this UTF-8
- * character at string s1 and also find out if this is
- * an illegal start byte or not and if so, issue a proper
- * error number and yet treat this byte as a character.
- */
- sz1 = u8_number_of_bytes[*s1];
- if (sz1 < 0) {
- *errnum = EILSEQ;
- sz1 = 1;
- }
-
- /*
- * For 7-bit ASCII characters mainly, we do a quick case
- * conversion right at here.
- *
- * If we don't have enough bytes for this character, issue
- * an EINVAL error and use what are available.
- *
- * If we have enough bytes, find out if there is
- * a corresponding uppercase character and if so, copy over
- * the bytes for a comparison later. If there is no
- * corresponding uppercase character, then, use what we have
- * for the comparison.
- */
- if (sz1 == 1) {
- if (is_it_toupper)
- u8s1[0] = U8_ASCII_TOUPPER(*s1);
- else
- u8s1[0] = U8_ASCII_TOLOWER(*s1);
- s1++;
- u8s1[1] = '\0';
- } else if ((i1 + sz1) > n1) {
- *errnum = EINVAL;
- for (j = 0; (i1 + j) < n1; )
- u8s1[j++] = *s1++;
- u8s1[j] = '\0';
- } else {
- (void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper);
- s1 += sz1;
- }
-
- /* Do the same for the string s2. */
- sz2 = u8_number_of_bytes[*s2];
- if (sz2 < 0) {
- *errnum = EILSEQ;
- sz2 = 1;
- }
-
- if (sz2 == 1) {
- if (is_it_toupper)
- u8s2[0] = U8_ASCII_TOUPPER(*s2);
- else
- u8s2[0] = U8_ASCII_TOLOWER(*s2);
- s2++;
- u8s2[1] = '\0';
- } else if ((i2 + sz2) > n2) {
- *errnum = EINVAL;
- for (j = 0; (i2 + j) < n2; )
- u8s2[j++] = *s2++;
- u8s2[j] = '\0';
- } else {
- (void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper);
- s2 += sz2;
- }
-
- /* Now compare the two characters. */
- if (sz1 == 1 && sz2 == 1) {
- if (*u8s1 > *u8s2)
- return (1);
- if (*u8s1 < *u8s2)
- return (-1);
- } else {
- f = strcmp((const char *)u8s1, (const char *)u8s2);
- if (f != 0)
- return (f);
- }
-
- /*
- * They were the same. Let's move on to the next
- * characters then.
- */
- i1 += sz1;
- i2 += sz2;
- }
-
- /*
- * We compared until the end of either or both strings.
- *
- * If we reached to or went over the ends for the both, that means
- * they are the same.
- *
- * If we reached only one of the two ends, that means the other string
- * has something which then the fact can be used to determine
- * the return value.
- */
- if (i1 >= n1) {
- if (i2 >= n2)
- return (0);
- return (-1);
- }
- return (1);
-}
-
-/*
- * The combining_class() function checks on the given bytes and find out
- * the corresponding Unicode combining class value. The return value 0 means
- * it is a Starter. Any illegal UTF-8 character will also be treated as
- * a Starter.
- */
-static uchar_t
-combining_class(size_t uv, uchar_t *s, size_t sz)
-{
- uint16_t b1 = 0;
- uint16_t b2 = 0;
- uint16_t b3 = 0;
- uint16_t b4 = 0;
-
- if (sz == 1 || sz > 4)
- return (0);
-
- if (sz == 2) {
- b3 = s[0];
- b4 = s[1];
- } else if (sz == 3) {
- b2 = s[0];
- b3 = s[1];
- b4 = s[2];
- } else if (sz == 4) {
- b1 = s[0];
- b2 = s[1];
- b3 = s[2];
- b4 = s[3];
- }
-
- b1 = u8_common_b1_tbl[uv][b1];
- if (b1 == U8_TBL_ELEMENT_NOT_DEF)
- return (0);
-
- b2 = u8_combining_class_b2_tbl[uv][b1][b2];
- if (b2 == U8_TBL_ELEMENT_NOT_DEF)
- return (0);
-
- b3 = u8_combining_class_b3_tbl[uv][b2][b3];
- if (b3 == U8_TBL_ELEMENT_NOT_DEF)
- return (0);
-
- return (u8_combining_class_b4_tbl[uv][b3][b4]);
-}
-
-/*
- * The do_decomp() function finds out a matching decomposition if any
- * and return. If there is no match, the input bytes are copied and returned.
- * The function also checks if there is a Hangul, decomposes it if necessary
- * and returns.
- *
- * To save time, a single byte 7-bit ASCII character should be handled by
- * the caller.
- *
- * The function returns the number of bytes returned sans always terminating
- * the null byte. It will also return a state that will tell if there was
- * a Hangul character decomposed which then will be used by the caller.
- */
-static size_t
-do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
- boolean_t canonical_decomposition, u8_normalization_states_t *state)
-{
- uint16_t b1 = 0;
- uint16_t b2 = 0;
- uint16_t b3 = 0;
- uint16_t b3_tbl;
- uint16_t b3_base;
- uint16_t b4 = 0;
- size_t start_id;
- size_t end_id;
- size_t i;
- uint32_t u1;
-
- if (sz == 2) {
- b3 = u8s[0] = s[0];
- b4 = u8s[1] = s[1];
- u8s[2] = '\0';
- } else if (sz == 3) {
- /* Convert it to a Unicode scalar value. */
- U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]);
-
- /*
- * If this is a Hangul syllable, we decompose it into
- * a leading consonant, a vowel, and an optional trailing
- * consonant and then return.
- */
- if (U8_HANGUL_SYLLABLE(u1)) {
- u1 -= U8_HANGUL_SYL_FIRST;
-
- b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT;
- b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT)
- / U8_HANGUL_T_COUNT;
- b3 = u1 % U8_HANGUL_T_COUNT;
-
- U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1);
- U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2);
- if (b3) {
- b3 += U8_HANGUL_JAMO_T_FIRST;
- U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3);
-
- u8s[9] = '\0';
- *state = U8_STATE_HANGUL_LVT;
- return (9);
- }
-
- u8s[6] = '\0';
- *state = U8_STATE_HANGUL_LV;
- return (6);
- }
-
- b2 = u8s[0] = s[0];
- b3 = u8s[1] = s[1];
- b4 = u8s[2] = s[2];
- u8s[3] = '\0';
-
- /*
- * If this is a Hangul Jamo, we know there is nothing
- * further that we can decompose.
- */
- if (U8_HANGUL_JAMO_L(u1)) {
- *state = U8_STATE_HANGUL_L;
- return (3);
- }
-
- if (U8_HANGUL_JAMO_V(u1)) {
- if (*state == U8_STATE_HANGUL_L)
- *state = U8_STATE_HANGUL_LV;
- else
- *state = U8_STATE_HANGUL_V;
- return (3);
- }
-
- if (U8_HANGUL_JAMO_T(u1)) {
- if (*state == U8_STATE_HANGUL_LV)
- *state = U8_STATE_HANGUL_LVT;
- else
- *state = U8_STATE_HANGUL_T;
- return (3);
- }
- } else if (sz == 4) {
- b1 = u8s[0] = s[0];
- b2 = u8s[1] = s[1];
- b3 = u8s[2] = s[2];
- b4 = u8s[3] = s[3];
- u8s[4] = '\0';
- } else {
- /*
- * This is a fallback and should not happen if the function
- * was called properly.
- */
- u8s[0] = s[0];
- u8s[1] = '\0';
- *state = U8_STATE_START;
- return (1);
- }
-
- /*
- * At this point, this rountine does not know what it would get.
- * The caller should sort it out if the state isn't a Hangul one.
- */
- *state = U8_STATE_START;
-
- /* Try to find matching decomposition mapping byte sequence. */
- b1 = u8_common_b1_tbl[uv][b1];
- if (b1 == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- b2 = u8_decomp_b2_tbl[uv][b1][b2];
- if (b2 == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id;
- if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
- return ((size_t)sz);
-
- /*
- * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR
- * which is 0x8000, this means we couldn't fit the mappings into
- * the cardinality of a unsigned byte.
- */
- if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
- b3_tbl -= U8_16BIT_TABLE_INDICATOR;
- start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
- end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
- } else {
- start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
- end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
- }
-
- /* This also means there wasn't any matching decomposition. */
- if (start_id >= end_id)
- return ((size_t)sz);
-
- /*
- * The final table for decomposition mappings has three types of
- * byte sequences depending on whether a mapping is for compatibility
- * decomposition, canonical decomposition, or both like the following:
- *
- * (1) Compatibility decomposition mappings:
- *
- * +---+---+-...-+---+
- * | B0| B1| ... | Bm|
- * +---+---+-...-+---+
- *
- * The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
- *
- * (2) Canonical decomposition mappings:
- *
- * +---+---+---+-...-+---+
- * | T | b0| b1| ... | bn|
- * +---+---+---+-...-+---+
- *
- * where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL).
- *
- * (3) Both mappings:
- *
- * +---+---+---+---+-...-+---+---+---+-...-+---+
- * | T | D | b0| b1| ... | bn| B0| B1| ... | Bm|
- * +---+---+---+---+-...-+---+---+---+-...-+---+
- *
- * where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement
- * byte, b0 to bn are canonical mapping bytes and B0 to Bm are
- * compatibility mapping bytes.
- *
- * Note that compatibility decomposition means doing recursive
- * decompositions using both compatibility decomposition mappings and
- * canonical decomposition mappings. On the other hand, canonical
- * decomposition means doing recursive decompositions using only
- * canonical decomposition mappings. Since the table we have has gone
- * through the recursions already, we do not need to do so during
- * runtime, i.e., the table has been completely flattened out
- * already.
- */
-
- b3_base = u8_decomp_b3_tbl[uv][b2][b3].base;
-
- /* Get the type, T, of the byte sequence. */
- b1 = u8_decomp_final_tbl[uv][b3_base + start_id];
-
- /*
- * If necessary, adjust start_id, end_id, or both. Note that if
- * this is compatibility decomposition mapping, there is no
- * adjustment.
- */
- if (canonical_decomposition) {
- /* Is the mapping only for compatibility decomposition? */
- if (b1 < U8_DECOMP_BOTH)
- return ((size_t)sz);
-
- start_id++;
-
- if (b1 == U8_DECOMP_BOTH) {
- end_id = start_id +
- u8_decomp_final_tbl[uv][b3_base + start_id];
- start_id++;
- }
- } else {
- /*
- * Unless this is a compatibility decomposition mapping,
- * we adjust the start_id.
- */
- if (b1 == U8_DECOMP_BOTH) {
- start_id++;
- start_id += u8_decomp_final_tbl[uv][b3_base + start_id];
- } else if (b1 == U8_DECOMP_CANONICAL) {
- start_id++;
- }
- }
-
- for (i = 0; start_id < end_id; start_id++)
- u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id];
- u8s[i] = '\0';
-
- return (i);
-}
-
-/*
- * The find_composition_start() function uses the character bytes given and
- * find out the matching composition mappings if any and return the address
- * to the composition mappings as explained in the do_composition().
- */
-static uchar_t *
-find_composition_start(size_t uv, uchar_t *s, size_t sz)
-{
- uint16_t b1 = 0;
- uint16_t b2 = 0;
- uint16_t b3 = 0;
- uint16_t b3_tbl;
- uint16_t b3_base;
- uint16_t b4 = 0;
- size_t start_id;
- size_t end_id;
-
- if (sz == 1) {
- b4 = s[0];
- } else if (sz == 2) {
- b3 = s[0];
- b4 = s[1];
- } else if (sz == 3) {
- b2 = s[0];
- b3 = s[1];
- b4 = s[2];
- } else if (sz == 4) {
- b1 = s[0];
- b2 = s[1];
- b3 = s[2];
- b4 = s[3];
- } else {
- /*
- * This is a fallback and should not happen if the function
- * was called properly.
- */
- return (NULL);
- }
-
- b1 = u8_composition_b1_tbl[uv][b1];
- if (b1 == U8_TBL_ELEMENT_NOT_DEF)
- return (NULL);
-
- b2 = u8_composition_b2_tbl[uv][b1][b2];
- if (b2 == U8_TBL_ELEMENT_NOT_DEF)
- return (NULL);
-
- b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id;
- if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
- return (NULL);
-
- if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
- b3_tbl -= U8_16BIT_TABLE_INDICATOR;
- start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
- end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
- } else {
- start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
- end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
- }
-
- if (start_id >= end_id)
- return (NULL);
-
- b3_base = u8_composition_b3_tbl[uv][b2][b3].base;
-
- return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id]));
-}
-
-/*
- * The blocked() function checks on the combining class values of previous
- * characters in this sequence and return whether it is blocked or not.
- */
-static boolean_t
-blocked(uchar_t *comb_class, size_t last)
-{
- uchar_t my_comb_class;
- size_t i;
-
- my_comb_class = comb_class[last];
- for (i = 1; i < last; i++)
- if (comb_class[i] >= my_comb_class ||
- comb_class[i] == U8_COMBINING_CLASS_STARTER)
- return (B_TRUE);
-
- return (B_FALSE);
-}
-
-/*
- * The do_composition() reads the character string pointed by 's' and
- * do necessary canonical composition and then copy over the result back to
- * the 's'.
- *
- * The input argument 's' cannot contain more than 32 characters.
- */
-static size_t
-do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
- uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
-{
- uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
- uchar_t tc[U8_MB_CUR_MAX];
- uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
- size_t saved_marks_count;
- uchar_t *p;
- uchar_t *saved_p;
- uchar_t *q;
- size_t i;
- size_t saved_i;
- size_t j;
- size_t k;
- size_t l;
- size_t C;
- size_t saved_l;
- size_t size;
- uint32_t u1;
- uint32_t u2;
- boolean_t match_not_found = B_TRUE;
-
- /*
- * This should never happen unless the callers are doing some strange
- * and unexpected things.
- *
- * The "last" is the index pointing to the last character not last + 1.
- */
- if (last >= U8_MAX_CHARS_A_SEQ)
- last = U8_UPPER_LIMIT_IN_A_SEQ;
-
- for (i = l = 0; i <= last; i++) {
- /*
- * The last or any non-Starters at the beginning, we don't
- * have any chance to do composition and so we just copy them
- * to the temporary buffer.
- */
- if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) {
-SAVE_THE_CHAR:
- p = s + start[i];
- size = disp[i];
- for (k = 0; k < size; k++)
- t[l++] = *p++;
- continue;
- }
-
- /*
- * If this could be a start of Hangul Jamos, then, we try to
- * conjoin them.
- */
- if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) {
- U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]],
- s[start[i] + 1], s[start[i] + 2]);
- U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3],
- s[start[i] + 4], s[start[i] + 5]);
-
- if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) {
- u1 -= U8_HANGUL_JAMO_L_FIRST;
- u2 -= U8_HANGUL_JAMO_V_FIRST;
- u1 = U8_HANGUL_SYL_FIRST +
- (u1 * U8_HANGUL_V_COUNT + u2) *
- U8_HANGUL_T_COUNT;
-
- i += 2;
- if (i <= last) {
- U8_PUT_3BYTES_INTO_UTF32(u2,
- s[start[i]], s[start[i] + 1],
- s[start[i] + 2]);
-
- if (U8_HANGUL_JAMO_T(u2)) {
- u1 += u2 -
- U8_HANGUL_JAMO_T_FIRST;
- i++;
- }
- }
-
- U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1);
- i--;
- l += 3;
- continue;
- }
- }
-
- /*
- * Let's then find out if this Starter has composition
- * mapping.
- */
- p = find_composition_start(uv, s + start[i], disp[i]);
- if (p == NULL)
- goto SAVE_THE_CHAR;
-
- /*
- * We have a Starter with composition mapping and the next
- * character is a non-Starter. Let's try to find out if
- * we can do composition.
- */
-
- saved_p = p;
- saved_i = i;
- saved_l = l;
- saved_marks_count = 0;
-
-TRY_THE_NEXT_MARK:
- q = s + start[++i];
- size = disp[i];
-
- /*
- * The next for() loop compares the non-Starter pointed by
- * 'q' with the possible (joinable) characters pointed by 'p'.
- *
- * The composition final table entry pointed by the 'p'
- * looks like the following:
- *
- * +---+---+---+-...-+---+---+---+---+-...-+---+---+
- * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F |
- * +---+---+---+-...-+---+---+---+---+-...-+---+---+
- *
- * where C is the count byte indicating the number of
- * mapping pairs where each pair would be look like
- * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second
- * character of a canonical decomposition and the B0-Bm are
- * the bytes of a matching composite character. The F is
- * a filler byte after each character as the separator.
- */
-
- match_not_found = B_TRUE;
-
- for (C = *p++; C > 0; C--) {
- for (k = 0; k < size; p++, k++)
- if (*p != q[k])
- break;
-
- /* Have we found it? */
- if (k >= size && *p == U8_TBL_ELEMENT_FILLER) {
- match_not_found = B_FALSE;
-
- l = saved_l;
-
- while (*++p != U8_TBL_ELEMENT_FILLER)
- t[l++] = *p;
-
- break;
- }
-
- /* We didn't find; skip to the next pair. */
- if (*p != U8_TBL_ELEMENT_FILLER)
- while (*++p != U8_TBL_ELEMENT_FILLER)
- ;
- while (*++p != U8_TBL_ELEMENT_FILLER)
- ;
- p++;
- }
-
- /*
- * If there was no match, we will need to save the combining
- * mark for later appending. After that, if the next one
- * is a non-Starter and not blocked, then, we try once
- * again to do composition with the next non-Starter.
- *
- * If there was no match and this was a Starter, then,
- * this is a new start.
- *
- * If there was a match and a composition done and we have
- * more to check on, then, we retrieve a new composition final
- * table entry for the composite and then try to do the
- * composition again.
- */
-
- if (match_not_found) {
- if (comb_class[i] == U8_COMBINING_CLASS_STARTER) {
- i--;
- goto SAVE_THE_CHAR;
- }
-
- saved_marks[saved_marks_count++] = i;
- }
-
- if (saved_l == l) {
- while (i < last) {
- if (blocked(comb_class, i + 1))
- saved_marks[saved_marks_count++] = ++i;
- else
- break;
- }
- if (i < last) {
- p = saved_p;
- goto TRY_THE_NEXT_MARK;
- }
- } else if (i < last) {
- p = find_composition_start(uv, t + saved_l,
- l - saved_l);
- if (p != NULL) {
- saved_p = p;
- goto TRY_THE_NEXT_MARK;
- }
- }
-
- /*
- * There is no more composition possible.
- *
- * If there was no composition what so ever then we copy
- * over the original Starter and then append any non-Starters
- * remaining at the target string sequentially after that.
- */
-
- if (saved_l == l) {
- p = s + start[saved_i];
- size = disp[saved_i];
- for (j = 0; j < size; j++)
- t[l++] = *p++;
- }
-
- for (k = 0; k < saved_marks_count; k++) {
- p = s + start[saved_marks[k]];
- size = disp[saved_marks[k]];
- for (j = 0; j < size; j++)
- t[l++] = *p++;
- }
- }
-
- /*
- * If the last character is a Starter and if we have a character
- * (possibly another Starter) that can be turned into a composite,
- * we do so and we do so until there is no more of composition
- * possible.
- */
- if (comb_class[last] == U8_COMBINING_CLASS_STARTER) {
- p = *os;
- saved_l = l - disp[last];
-
- while (p < oslast) {
- size = u8_number_of_bytes[*p];
- if (size <= 1 || (p + size) > oslast)
- break;
-
- saved_p = p;
-
- for (i = 0; i < size; i++)
- tc[i] = *p++;
-
- q = find_composition_start(uv, t + saved_l,
- l - saved_l);
- if (q == NULL) {
- p = saved_p;
- break;
- }
-
- match_not_found = B_TRUE;
-
- for (C = *q++; C > 0; C--) {
- for (k = 0; k < size; q++, k++)
- if (*q != tc[k])
- break;
-
- if (k >= size && *q == U8_TBL_ELEMENT_FILLER) {
- match_not_found = B_FALSE;
-
- l = saved_l;
-
- while (*++q != U8_TBL_ELEMENT_FILLER) {
- /*
- * This is practically
- * impossible but we don't
- * want to take any chances.
- */
- if (l >=
- U8_STREAM_SAFE_TEXT_MAX) {
- p = saved_p;
- goto SAFE_RETURN;
- }
- t[l++] = *q;
- }
-
- break;
- }
-
- if (*q != U8_TBL_ELEMENT_FILLER)
- while (*++q != U8_TBL_ELEMENT_FILLER)
- ;
- while (*++q != U8_TBL_ELEMENT_FILLER)
- ;
- q++;
- }
-
- if (match_not_found) {
- p = saved_p;
- break;
- }
- }
-SAFE_RETURN:
- *os = p;
- }
-
- /*
- * Now we copy over the temporary string to the target string.
- * Since composition always reduces the number of characters or
- * the number of characters stay, we don't need to worry about
- * the buffer overflow here.
- */
- for (i = 0; i < l; i++)
- s[i] = t[i];
- s[l] = '\0';
-
- return (l);
-}
-
-/*
- * The collect_a_seq() function checks on the given string s, collect
- * a sequence of characters at u8s, and return the sequence. While it collects
- * a sequence, it also applies case conversion, canonical or compatibility
- * decomposition, canonical decomposition, or some or all of them and
- * in that order.
- *
- * The collected sequence cannot be bigger than 32 characters since if
- * it is having more than 31 characters, the sequence will be terminated
- * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into
- * a Stream-Safe Text. The collected sequence is always terminated with
- * a null byte and the return value is the byte length of the sequence
- * including 0. The return value does not include the terminating
- * null byte.
- */
-static size_t
-collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
- boolean_t is_it_toupper,
- boolean_t is_it_tolower,
- boolean_t canonical_decomposition,
- boolean_t compatibility_decomposition,
- boolean_t canonical_composition,
- int *errnum, u8_normalization_states_t *state)
-{
- uchar_t *s;
- int sz;
- int saved_sz;
- size_t i;
- size_t j;
- size_t k;
- size_t l;
- uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
- uchar_t disp[U8_MAX_CHARS_A_SEQ];
- uchar_t start[U8_MAX_CHARS_A_SEQ];
- uchar_t u8t[U8_MB_CUR_MAX];
- uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
- uchar_t tc;
- size_t last;
- size_t saved_last;
- uint32_t u1;
-
- /*
- * Save the source string pointer which we will return a changed
- * pointer if we do processing.
- */
- s = *source;
-
- /*
- * The following is a fallback for just in case callers are not
- * checking the string boundaries before the calling.
- */
- if (s >= slast) {
- u8s[0] = '\0';
-
- return (0);
- }
-
- /*
- * As the first thing, let's collect a character and do case
- * conversion if necessary.
- */
-
- sz = u8_number_of_bytes[*s];
-
- if (sz < 0) {
- *errnum = EILSEQ;
-
- u8s[0] = *s++;
- u8s[1] = '\0';
-
- *source = s;
-
- return (1);
- }
-
- if (sz == 1) {
- if (is_it_toupper)
- u8s[0] = U8_ASCII_TOUPPER(*s);
- else if (is_it_tolower)
- u8s[0] = U8_ASCII_TOLOWER(*s);
- else
- u8s[0] = *s;
- s++;
- u8s[1] = '\0';
- } else if ((s + sz) > slast) {
- *errnum = EINVAL;
-
- for (i = 0; s < slast; )
- u8s[i++] = *s++;
- u8s[i] = '\0';
-
- *source = s;
-
- return (i);
- } else {
- if (is_it_toupper || is_it_tolower) {
- i = do_case_conv(uv, u8s, s, sz, is_it_toupper);
- s += sz;
- sz = i;
- } else {
- for (i = 0; i < sz; )
- u8s[i++] = *s++;
- u8s[i] = '\0';
- }
- }
-
- /*
- * And then canonical/compatibility decomposition followed by
- * an optional canonical composition. Please be noted that
- * canonical composition is done only when a decomposition is
- * done.
- */
- if (canonical_decomposition || compatibility_decomposition) {
- if (sz == 1) {
- *state = U8_STATE_START;
-
- saved_sz = 1;
-
- comb_class[0] = 0;
- start[0] = 0;
- disp[0] = 1;
-
- last = 1;
- } else {
- saved_sz = do_decomp(uv, u8s, u8s, sz,
- canonical_decomposition, state);
-
- last = 0;
-
- for (i = 0; i < saved_sz; ) {
- sz = u8_number_of_bytes[u8s[i]];
-
- comb_class[last] = combining_class(uv,
- u8s + i, sz);
- start[last] = i;
- disp[last] = sz;
-
- last++;
- i += sz;
- }
-
- /*
- * Decomposition yields various Hangul related
- * states but not on combining marks. We need to
- * find out at here by checking on the last
- * character.
- */
- if (*state == U8_STATE_START) {
- if (comb_class[last - 1])
- *state = U8_STATE_COMBINING_MARK;
- }
- }
-
- saved_last = last;
-
- while (s < slast) {
- sz = u8_number_of_bytes[*s];
-
- /*
- * If this is an illegal character, an incomplete
- * character, or an 7-bit ASCII Starter character,
- * then we have collected a sequence; break and let
- * the next call deal with the two cases.
- *
- * Note that this is okay only if you are using this
- * function with a fixed length string, not on
- * a buffer with multiple calls of one chunk at a time.
- */
- if (sz <= 1) {
- break;
- } else if ((s + sz) > slast) {
- break;
- } else {
- /*
- * If the previous character was a Hangul Jamo
- * and this character is a Hangul Jamo that
- * can be conjoined, we collect the Jamo.
- */
- if (*s == U8_HANGUL_JAMO_1ST_BYTE) {
- U8_PUT_3BYTES_INTO_UTF32(u1,
- *s, *(s + 1), *(s + 2));
-
- if (U8_HANGUL_COMPOSABLE_L_V(*state,
- u1)) {
- i = 0;
- *state = U8_STATE_HANGUL_LV;
- goto COLLECT_A_HANGUL;
- }
-
- if (U8_HANGUL_COMPOSABLE_LV_T(*state,
- u1)) {
- i = 0;
- *state = U8_STATE_HANGUL_LVT;
- goto COLLECT_A_HANGUL;
- }
- }
-
- /*
- * Regardless of whatever it was, if this is
- * a Starter, we don't collect the character
- * since that's a new start and we will deal
- * with it at the next time.
- */
- i = combining_class(uv, s, sz);
- if (i == U8_COMBINING_CLASS_STARTER)
- break;
-
- /*
- * We know the current character is a combining
- * mark. If the previous character wasn't
- * a Starter (not Hangul) or a combining mark,
- * then, we don't collect this combining mark.
- */
- if (*state != U8_STATE_START &&
- *state != U8_STATE_COMBINING_MARK)
- break;
-
- *state = U8_STATE_COMBINING_MARK;
-COLLECT_A_HANGUL:
- /*
- * If we collected a Starter and combining
- * marks up to 30, i.e., total 31 characters,
- * then, we terminate this degenerately long
- * combining sequence with a U+034F COMBINING
- * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in
- * UTF-8 and turn this into a Stream-Safe
- * Text. This will be extremely rare but
- * possible.
- *
- * The following will also guarantee that
- * we are not writing more than 32 characters
- * plus a NULL at u8s[].
- */
- if (last >= U8_UPPER_LIMIT_IN_A_SEQ) {
-TURN_STREAM_SAFE:
- *state = U8_STATE_START;
- comb_class[last] = 0;
- start[last] = saved_sz;
- disp[last] = 2;
- last++;
-
- u8s[saved_sz++] = 0xCD;
- u8s[saved_sz++] = 0x8F;
-
- break;
- }
-
- /*
- * Some combining marks also do decompose into
- * another combining mark or marks.
- */
- if (*state == U8_STATE_COMBINING_MARK) {
- k = last;
- l = sz;
- i = do_decomp(uv, uts, s, sz,
- canonical_decomposition, state);
- for (j = 0; j < i; ) {
- sz = u8_number_of_bytes[uts[j]];
-
- comb_class[last] =
- combining_class(uv,
- uts + j, sz);
- start[last] = saved_sz + j;
- disp[last] = sz;
-
- last++;
- if (last >=
- U8_UPPER_LIMIT_IN_A_SEQ) {
- last = k;
- goto TURN_STREAM_SAFE;
- }
- j += sz;
- }
-
- *state = U8_STATE_COMBINING_MARK;
- sz = i;
- s += l;
-
- for (i = 0; i < sz; i++)
- u8s[saved_sz++] = uts[i];
- } else {
- comb_class[last] = i;
- start[last] = saved_sz;
- disp[last] = sz;
- last++;
-
- for (i = 0; i < sz; i++)
- u8s[saved_sz++] = *s++;
- }
-
- /*
- * If this is U+0345 COMBINING GREEK
- * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a.,
- * iota subscript, and need to be converted to
- * uppercase letter, convert it to U+0399 GREEK
- * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8),
- * i.e., convert to capital adscript form as
- * specified in the Unicode standard.
- *
- * This is the only special case of (ambiguous)
- * case conversion at combining marks and
- * probably the standard will never have
- * anything similar like this in future.
- */
- if (is_it_toupper && sz >= 2 &&
- u8s[saved_sz - 2] == 0xCD &&
- u8s[saved_sz - 1] == 0x85) {
- u8s[saved_sz - 2] = 0xCE;
- u8s[saved_sz - 1] = 0x99;
- }
- }
- }
-
- /*
- * Let's try to ensure a canonical ordering for the collected
- * combining marks. We do this only if we have collected
- * at least one more non-Starter. (The decomposition mapping
- * data tables have fully (and recursively) expanded and
- * canonically ordered decompositions.)
- *
- * The U8_SWAP_COMB_MARKS() convenience macro has some
- * assumptions and we are meeting the assumptions.
- */
- last--;
- if (last >= saved_last) {
- for (i = 0; i < last; i++)
- for (j = last; j > i; j--)
- if (comb_class[j] &&
- comb_class[j - 1] > comb_class[j]) {
- U8_SWAP_COMB_MARKS(j - 1, j);
- }
- }
-
- *source = s;
-
- if (! canonical_composition) {
- u8s[saved_sz] = '\0';
- return (saved_sz);
- }
-
- /*
- * Now do the canonical composition. Note that we do this
- * only after a canonical or compatibility decomposition to
- * finish up NFC or NFKC.
- */
- sz = do_composition(uv, u8s, comb_class, start, disp, last,
- &s, slast);
- }
-
- *source = s;
-
- return ((size_t)sz);
-}
-
-/*
- * The do_norm_compare() function does string comparion based on Unicode
- * simple case mappings and Unicode Normalization definitions.
- *
- * It does so by collecting a sequence of character at a time and comparing
- * the collected sequences from the strings.
- *
- * The meanings on the return values are the same as the usual strcmp().
- */
-static int
-do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
- int flag, int *errnum)
-{
- int result;
- size_t sz1;
- size_t sz2;
- uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1];
- uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1];
- uchar_t *s1last;
- uchar_t *s2last;
- boolean_t is_it_toupper;
- boolean_t is_it_tolower;
- boolean_t canonical_decomposition;
- boolean_t compatibility_decomposition;
- boolean_t canonical_composition;
- u8_normalization_states_t state;
-
- s1last = s1 + n1;
- s2last = s2 + n2;
-
- is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
- is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
- canonical_decomposition = flag & U8_CANON_DECOMP;
- compatibility_decomposition = flag & U8_COMPAT_DECOMP;
- canonical_composition = flag & U8_CANON_COMP;
-
- while (s1 < s1last && s2 < s2last) {
- /*
- * If the current character is a 7-bit ASCII and the last
- * character, or, if the current character and the next
- * character are both some 7-bit ASCII characters then
- * we treat the current character as a sequence.
- *
- * In any other cases, we need to call collect_a_seq().
- */
-
- if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last ||
- ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) {
- if (is_it_toupper)
- u8s1[0] = U8_ASCII_TOUPPER(*s1);
- else if (is_it_tolower)
- u8s1[0] = U8_ASCII_TOLOWER(*s1);
- else
- u8s1[0] = *s1;
- u8s1[1] = '\0';
- sz1 = 1;
- s1++;
- } else {
- state = U8_STATE_START;
- sz1 = collect_a_seq(uv, u8s1, &s1, s1last,
- is_it_toupper, is_it_tolower,
- canonical_decomposition,
- compatibility_decomposition,
- canonical_composition, errnum, &state);
- }
-
- if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last ||
- ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) {
- if (is_it_toupper)
- u8s2[0] = U8_ASCII_TOUPPER(*s2);
- else if (is_it_tolower)
- u8s2[0] = U8_ASCII_TOLOWER(*s2);
- else
- u8s2[0] = *s2;
- u8s2[1] = '\0';
- sz2 = 1;
- s2++;
- } else {
- state = U8_STATE_START;
- sz2 = collect_a_seq(uv, u8s2, &s2, s2last,
- is_it_toupper, is_it_tolower,
- canonical_decomposition,
- compatibility_decomposition,
- canonical_composition, errnum, &state);
- }
-
- /*
- * Now compare the two characters. If they are the same,
- * we move on to the next character sequences.
- */
- if (sz1 == 1 && sz2 == 1) {
- if (*u8s1 > *u8s2)
- return (1);
- if (*u8s1 < *u8s2)
- return (-1);
- } else {
- result = strcmp((const char *)u8s1, (const char *)u8s2);
- if (result != 0)
- return (result);
- }
- }
-
- /*
- * We compared until the end of either or both strings.
- *
- * If we reached to or went over the ends for the both, that means
- * they are the same.
- *
- * If we reached only one end, that means the other string has
- * something which then can be used to determine the return value.
- */
- if (s1 >= s1last) {
- if (s2 >= s2last)
- return (0);
- return (-1);
- }
- return (1);
-}
-
-/*
- * The u8_strcmp() function compares two UTF-8 strings quite similar to
- * the strcmp(). For the comparison, however, Unicode Normalization specific
- * equivalency and Unicode simple case conversion mappings based equivalency
- * can be requested and checked against.
- */
-int
-u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
- int *errnum)
-{
- int f;
- size_t n1;
- size_t n2;
-
- *errnum = 0;
-
- /*
- * Check on the requested Unicode version, case conversion, and
- * normalization flag values.
- */
-
- if (uv > U8_UNICODE_LATEST) {
- *errnum = ERANGE;
- uv = U8_UNICODE_LATEST;
- }
-
- if (flag == 0) {
- flag = U8_STRCMP_CS;
- } else {
- f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER |
- U8_STRCMP_CI_LOWER);
- if (f == 0) {
- flag |= U8_STRCMP_CS;
- } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER &&
- f != U8_STRCMP_CI_LOWER) {
- *errnum = EBADF;
- flag = U8_STRCMP_CS;
- }
-
- f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
- if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC &&
- f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) {
- *errnum = EBADF;
- flag = U8_STRCMP_CS;
- }
- }
-
- if (flag == U8_STRCMP_CS) {
- return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n));
- }
-
- n1 = strlen(s1);
- n2 = strlen(s2);
- if (n != 0) {
- if (n < n1)
- n1 = n;
- if (n < n2)
- n2 = n;
- }
-
- /*
- * Simple case conversion can be done much faster and so we do
- * them separately here.
- */
- if (flag == U8_STRCMP_CI_UPPER) {
- return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
- n1, n2, B_TRUE, errnum));
- } else if (flag == U8_STRCMP_CI_LOWER) {
- return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
- n1, n2, B_FALSE, errnum));
- }
-
- return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2,
- flag, errnum));
-}
-
-size_t
-u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
- int flag, size_t unicode_version, int *errnum)
-{
- int f;
- int sz;
- uchar_t *ib;
- uchar_t *ibtail;
- uchar_t *ob;
- uchar_t *obtail;
- boolean_t do_not_ignore_null;
- boolean_t do_not_ignore_invalid;
- boolean_t is_it_toupper;
- boolean_t is_it_tolower;
- boolean_t canonical_decomposition;
- boolean_t compatibility_decomposition;
- boolean_t canonical_composition;
- size_t ret_val;
- size_t i;
- size_t j;
- uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1];
- u8_normalization_states_t state;
-
- if (unicode_version > U8_UNICODE_LATEST) {
- *errnum = ERANGE;
- return ((size_t)-1);
- }
-
- f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER);
- if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) {
- *errnum = EBADF;
- return ((size_t)-1);
- }
-
- f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
- if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC &&
- f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) {
- *errnum = EBADF;
- return ((size_t)-1);
- }
-
- if (inarray == NULL || *inlen == 0)
- return (0);
-
- if (outarray == NULL) {
- *errnum = E2BIG;
- return ((size_t)-1);
- }
-
- ib = (uchar_t *)inarray;
- ob = (uchar_t *)outarray;
- ibtail = ib + *inlen;
- obtail = ob + *outlen;
-
- do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL);
- do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID);
- is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
- is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
-
- ret_val = 0;
-
- /*
- * If we don't have a normalization flag set, we do the simple case
- * conversion based text preparation separately below. Text
- * preparation involving Normalization will be done in the false task
- * block, again, separately since it will take much more time and
- * resource than doing simple case conversions.
- */
- if (f == 0) {
- while (ib < ibtail) {
- if (*ib == '\0' && do_not_ignore_null)
- break;
-
- sz = u8_number_of_bytes[*ib];
-
- if (sz < 0) {
- if (do_not_ignore_invalid) {
- *errnum = EILSEQ;
- ret_val = (size_t)-1;
- break;
- }
-
- sz = 1;
- ret_val++;
- }
-
- if (sz == 1) {
- if (ob >= obtail) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- if (is_it_toupper)
- *ob = U8_ASCII_TOUPPER(*ib);
- else if (is_it_tolower)
- *ob = U8_ASCII_TOLOWER(*ib);
- else
- *ob = *ib;
- ib++;
- ob++;
- } else if ((ib + sz) > ibtail) {
- if (do_not_ignore_invalid) {
- *errnum = EINVAL;
- ret_val = (size_t)-1;
- break;
- }
-
- if ((obtail - ob) < (ibtail - ib)) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- /*
- * We treat the remaining incomplete character
- * bytes as a character.
- */
- ret_val++;
-
- while (ib < ibtail)
- *ob++ = *ib++;
- } else {
- if (is_it_toupper || is_it_tolower) {
- i = do_case_conv(unicode_version, u8s,
- ib, sz, is_it_toupper);
-
- if ((obtail - ob) < i) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- ib += sz;
-
- for (sz = 0; sz < i; sz++)
- *ob++ = u8s[sz];
- } else {
- if ((obtail - ob) < sz) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- for (i = 0; i < sz; i++)
- *ob++ = *ib++;
- }
- }
- }
- } else {
- canonical_decomposition = flag & U8_CANON_DECOMP;
- compatibility_decomposition = flag & U8_COMPAT_DECOMP;
- canonical_composition = flag & U8_CANON_COMP;
-
- while (ib < ibtail) {
- if (*ib == '\0' && do_not_ignore_null)
- break;
-
- /*
- * If the current character is a 7-bit ASCII
- * character and it is the last character, or,
- * if the current character is a 7-bit ASCII
- * character and the next character is also a 7-bit
- * ASCII character, then, we copy over this
- * character without going through collect_a_seq().
- *
- * In any other cases, we need to look further with
- * the collect_a_seq() function.
- */
- if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail ||
- ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) {
- if (ob >= obtail) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- if (is_it_toupper)
- *ob = U8_ASCII_TOUPPER(*ib);
- else if (is_it_tolower)
- *ob = U8_ASCII_TOLOWER(*ib);
- else
- *ob = *ib;
- ib++;
- ob++;
- } else {
- *errnum = 0;
- state = U8_STATE_START;
-
- j = collect_a_seq(unicode_version, u8s,
- &ib, ibtail,
- is_it_toupper,
- is_it_tolower,
- canonical_decomposition,
- compatibility_decomposition,
- canonical_composition,
- errnum, &state);
-
- if (*errnum && do_not_ignore_invalid) {
- ret_val = (size_t)-1;
- break;
- }
-
- if ((obtail - ob) < j) {
- *errnum = E2BIG;
- ret_val = (size_t)-1;
- break;
- }
-
- for (i = 0; i < j; i++)
- *ob++ = u8s[i];
- }
- }
- }
-
- *inlen = ibtail - ib;
- *outlen = obtail - ob;
-
- return (ret_val);
-}
Property changes on: head/sys/cddl/contrib/opensolaris/common/unicode/u8_textprep.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/contrib/openzfs/CODE_OF_CONDUCT.md
===================================================================
--- head/sys/contrib/openzfs/CODE_OF_CONDUCT.md (revision 366779)
+++ head/sys/contrib/openzfs/CODE_OF_CONDUCT.md (revision 366780)
@@ -1,2 +1,2 @@
The [OpenZFS Code of Conduct](http://www.open-zfs.org/wiki/Code_of_Conduct)
-applies to spaces associated with the ZFS on Linux project, including GitHub.
+applies to spaces associated with the OpenZFS project, including GitHub.
Index: head/sys/contrib/openzfs/META
===================================================================
--- head/sys/contrib/openzfs/META (revision 366779)
+++ head/sys/contrib/openzfs/META (revision 366780)
@@ -1,10 +1,10 @@
Meta: 1
Name: zfs
Branch: 1.0
Version: 2.0.0
-Release: rc2
+Release: rc3
Release-Tags: relext
License: CDDL
Author: OpenZFS
-Linux-Maximum: 5.8
+Linux-Maximum: 5.9
Linux-Minimum: 3.10
Index: head/sys/contrib/openzfs/README.md
===================================================================
--- head/sys/contrib/openzfs/README.md (revision 366779)
+++ head/sys/contrib/openzfs/README.md (revision 366780)
@@ -1,35 +1,35 @@
![img](https://openzfs.github.io/openzfs-docs/_static/img/logo/480px-Open-ZFS-Secondary-Logo-Colour-halfsize.png)
OpenZFS is an advanced file system and volume manager which was originally
developed for Solaris and is now maintained by the OpenZFS community.
This repository contains the code for running OpenZFS on Linux and FreeBSD.
[![codecov](https://codecov.io/gh/openzfs/zfs/branch/master/graph/badge.svg)](https://codecov.io/gh/openzfs/zfs)
[![coverity](https://scan.coverity.com/projects/1973/badge.svg)](https://scan.coverity.com/projects/openzfs-zfs)
# Official Resources
* [Documentation](https://openzfs.github.io/openzfs-docs/) - for using and developing this repo
* [ZoL Site](https://zfsonlinux.org) - Linux release info & links
* [Mailing lists](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
* [OpenZFS site](http://open-zfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)
# Installation
-Full documentation for installing OpenZFS on your favorite Linux distribution can
-be found at the [ZoL Site](https://zfsonlinux.org/).
+Full documentation for installing OpenZFS on your favorite operating system can
+be found at the [Getting Started Page](https://openzfs.github.io/openzfs-docs/Getting%20Started/index.html).
# Contribute & Develop
We have a separate document with [contribution guidelines](./.github/CONTRIBUTING.md).
We have a [Code of Conduct](./CODE_OF_CONDUCT.md).
# Release
OpenZFS is released under a CDDL license.
For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
# Supported Kernels
* The `META` file contains the officially recognized supported Linux kernel versions.
* Supported FreeBSD versions are 12-STABLE and 13-CURRENT.
Index: head/sys/contrib/openzfs/cmd/arc_summary/arc_summary2
===================================================================
--- head/sys/contrib/openzfs/cmd/arc_summary/arc_summary2 (revision 366779)
+++ head/sys/contrib/openzfs/cmd/arc_summary/arc_summary2 (revision 366780)
@@ -1,1093 +1,1093 @@
#!/usr/bin/env python2
#
# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $
#
# Copyright (c) 2008 Ben Rockwood <benr@cuddletech.com>,
# Copyright (c) 2010 Martin Matuska <mm@FreeBSD.org>,
# Copyright (c) 2010-2011 Jason J. Hellenthal <jhell@DataIX.net>,
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# If you are having troubles when using this script from cron(8) please try
# adjusting your PATH before reporting problems.
#
# Note some of this code uses older code (eg getopt instead of argparse,
# subprocess.Popen() instead of subprocess.run()) because we need to support
# some very old versions of Python.
#
"""Print statistics on the ZFS Adjustable Replacement Cache (ARC)
Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the
in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
+https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
"""
import getopt
import os
import sys
import time
import errno
from subprocess import Popen, PIPE
from decimal import Decimal as D
if sys.platform.startswith('freebsd'):
# Requires py27-sysctl on FreeBSD
import sysctl
def load_kstats(namespace):
"""Collect information on a specific subsystem of the ARC"""
base = 'kstat.zfs.misc.%s.' % namespace
return [(kstat.name, D(kstat.value)) for kstat in sysctl.filter(base)]
def load_tunables():
return dict((ctl.name, ctl.value) for ctl in sysctl.filter('vfs.zfs'))
elif sys.platform.startswith('linux'):
def load_kstats(namespace):
"""Collect information on a specific subsystem of the ARC"""
kstat = 'kstat.zfs.misc.%s.%%s' % namespace
path = '/proc/spl/kstat/zfs/%s' % namespace
with open(path) as f:
entries = [line.strip().split() for line in f][2:] # Skip header
return [(kstat % name, D(value)) for name, _, value in entries]
def load_tunables():
basepath = '/sys/module/zfs/parameters'
tunables = {}
for name in os.listdir(basepath):
if not name:
continue
path = '%s/%s' % (basepath, name)
with open(path) as f:
value = f.read()
tunables[name] = value.strip()
return tunables
show_tunable_descriptions = False
alternate_tunable_layout = False
def handle_Exception(ex_cls, ex, tb):
if ex is IOError:
if ex.errno == errno.EPIPE:
sys.exit()
if ex is KeyboardInterrupt:
sys.exit()
sys.excepthook = handle_Exception
def get_Kstat():
"""Collect information on the ZFS subsystem from the /proc virtual
file system. The name "kstat" is a holdover from the Solaris utility
of the same name.
"""
Kstat = {}
Kstat.update(load_kstats('arcstats'))
Kstat.update(load_kstats('zfetchstats'))
Kstat.update(load_kstats('vdev_cache_stats'))
return Kstat
def fBytes(b=0):
"""Return human-readable representation of a byte value in
powers of 2 (eg "KiB" for "kibibytes", etc) to two decimal
points. Values smaller than one KiB are returned without
decimal points.
"""
prefixes = [
[2**80, "YiB"], # yobibytes (yotta)
[2**70, "ZiB"], # zebibytes (zetta)
[2**60, "EiB"], # exbibytes (exa)
[2**50, "PiB"], # pebibytes (peta)
[2**40, "TiB"], # tebibytes (tera)
[2**30, "GiB"], # gibibytes (giga)
[2**20, "MiB"], # mebibytes (mega)
[2**10, "KiB"]] # kibibytes (kilo)
if b >= 2**10:
for limit, unit in prefixes:
if b >= limit:
value = b / limit
break
result = "%0.2f\t%s" % (value, unit)
else:
result = "%d\tBytes" % b
return result
def fHits(hits=0):
"""Create a human-readable representation of the number of hits.
The single-letter symbols used are SI to avoid the confusion caused
by the different "short scale" and "long scale" representations in
English, which use the same words for different values. See
https://en.wikipedia.org/wiki/Names_of_large_numbers and
https://physics.nist.gov/cuu/Units/prefixes.html
"""
numbers = [
[10**24, 'Y'], # yotta (septillion)
[10**21, 'Z'], # zetta (sextillion)
[10**18, 'E'], # exa (quintrillion)
[10**15, 'P'], # peta (quadrillion)
[10**12, 'T'], # tera (trillion)
[10**9, 'G'], # giga (billion)
[10**6, 'M'], # mega (million)
[10**3, 'k']] # kilo (thousand)
if hits >= 1000:
for limit, symbol in numbers:
if hits >= limit:
value = hits/limit
break
result = "%0.2f%s" % (value, symbol)
else:
result = "%d" % hits
return result
def fPerc(lVal=0, rVal=0, Decimal=2):
"""Calculate percentage value and return in human-readable format"""
if rVal > 0:
return str("%0." + str(Decimal) + "f") % (100 * (lVal / rVal)) + "%"
else:
return str("%0." + str(Decimal) + "f") % 100 + "%"
def get_arc_summary(Kstat):
"""Collect general data on the ARC"""
output = {}
memory_throttle_count = Kstat[
"kstat.zfs.misc.arcstats.memory_throttle_count"
]
if memory_throttle_count > 0:
output['health'] = 'THROTTLED'
else:
output['health'] = 'HEALTHY'
output['memory_throttle_count'] = fHits(memory_throttle_count)
# ARC Misc.
deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
evict_skip = Kstat["kstat.zfs.misc.arcstats.evict_skip"]
# ARC Misc.
output["arc_misc"] = {}
output["arc_misc"]["deleted"] = fHits(deleted)
output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
output["arc_misc"]['evict_skips'] = fHits(evict_skip)
# ARC Sizing
arc_size = Kstat["kstat.zfs.misc.arcstats.size"]
mru_size = Kstat["kstat.zfs.misc.arcstats.mru_size"]
mfu_size = Kstat["kstat.zfs.misc.arcstats.mfu_size"]
meta_limit = Kstat["kstat.zfs.misc.arcstats.arc_meta_limit"]
meta_size = Kstat["kstat.zfs.misc.arcstats.arc_meta_used"]
dnode_limit = Kstat["kstat.zfs.misc.arcstats.arc_dnode_limit"]
dnode_size = Kstat["kstat.zfs.misc.arcstats.dnode_size"]
target_max_size = Kstat["kstat.zfs.misc.arcstats.c_max"]
target_min_size = Kstat["kstat.zfs.misc.arcstats.c_min"]
target_size = Kstat["kstat.zfs.misc.arcstats.c"]
target_size_ratio = (target_max_size / target_min_size)
# ARC Sizing
output['arc_sizing'] = {}
output['arc_sizing']['arc_size'] = {
'per': fPerc(arc_size, target_max_size),
'num': fBytes(arc_size),
}
output['arc_sizing']['target_max_size'] = {
'ratio': target_size_ratio,
'num': fBytes(target_max_size),
}
output['arc_sizing']['target_min_size'] = {
'per': fPerc(target_min_size, target_max_size),
'num': fBytes(target_min_size),
}
output['arc_sizing']['target_size'] = {
'per': fPerc(target_size, target_max_size),
'num': fBytes(target_size),
}
output['arc_sizing']['meta_limit'] = {
'per': fPerc(meta_limit, target_max_size),
'num': fBytes(meta_limit),
}
output['arc_sizing']['meta_size'] = {
'per': fPerc(meta_size, meta_limit),
'num': fBytes(meta_size),
}
output['arc_sizing']['dnode_limit'] = {
'per': fPerc(dnode_limit, meta_limit),
'num': fBytes(dnode_limit),
}
output['arc_sizing']['dnode_size'] = {
'per': fPerc(dnode_size, dnode_limit),
'num': fBytes(dnode_size),
}
# ARC Hash Breakdown
output['arc_hash_break'] = {}
output['arc_hash_break']['hash_chain_max'] = Kstat[
"kstat.zfs.misc.arcstats.hash_chain_max"
]
output['arc_hash_break']['hash_chains'] = Kstat[
"kstat.zfs.misc.arcstats.hash_chains"
]
output['arc_hash_break']['hash_collisions'] = Kstat[
"kstat.zfs.misc.arcstats.hash_collisions"
]
output['arc_hash_break']['hash_elements'] = Kstat[
"kstat.zfs.misc.arcstats.hash_elements"
]
output['arc_hash_break']['hash_elements_max'] = Kstat[
"kstat.zfs.misc.arcstats.hash_elements_max"
]
output['arc_size_break'] = {}
output['arc_size_break']['recently_used_cache_size'] = {
'per': fPerc(mru_size, mru_size + mfu_size),
'num': fBytes(mru_size),
}
output['arc_size_break']['frequently_used_cache_size'] = {
'per': fPerc(mfu_size, mru_size + mfu_size),
'num': fBytes(mfu_size),
}
# ARC Hash Breakdown
hash_chain_max = Kstat["kstat.zfs.misc.arcstats.hash_chain_max"]
hash_chains = Kstat["kstat.zfs.misc.arcstats.hash_chains"]
hash_collisions = Kstat["kstat.zfs.misc.arcstats.hash_collisions"]
hash_elements = Kstat["kstat.zfs.misc.arcstats.hash_elements"]
hash_elements_max = Kstat["kstat.zfs.misc.arcstats.hash_elements_max"]
output['arc_hash_break'] = {}
output['arc_hash_break']['elements_max'] = fHits(hash_elements_max)
output['arc_hash_break']['elements_current'] = {
'per': fPerc(hash_elements, hash_elements_max),
'num': fHits(hash_elements),
}
output['arc_hash_break']['collisions'] = fHits(hash_collisions)
output['arc_hash_break']['chain_max'] = fHits(hash_chain_max)
output['arc_hash_break']['chains'] = fHits(hash_chains)
return output
def _arc_summary(Kstat):
"""Print information on the ARC"""
# ARC Sizing
arc = get_arc_summary(Kstat)
sys.stdout.write("ARC Summary: (%s)\n" % arc['health'])
sys.stdout.write("\tMemory Throttle Count:\t\t\t%s\n" %
arc['memory_throttle_count'])
sys.stdout.write("\n")
# ARC Misc.
sys.stdout.write("ARC Misc:\n")
sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
arc['arc_misc']['mutex_miss'])
sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
arc['arc_misc']['evict_skips'])
sys.stdout.write("\n")
# ARC Sizing
sys.stdout.write("ARC Size:\t\t\t\t%s\t%s\n" % (
arc['arc_sizing']['arc_size']['per'],
arc['arc_sizing']['arc_size']['num']
)
)
sys.stdout.write("\tTarget Size: (Adaptive)\t\t%s\t%s\n" % (
arc['arc_sizing']['target_size']['per'],
arc['arc_sizing']['target_size']['num'],
)
)
sys.stdout.write("\tMin Size (Hard Limit):\t\t%s\t%s\n" % (
arc['arc_sizing']['target_min_size']['per'],
arc['arc_sizing']['target_min_size']['num'],
)
)
sys.stdout.write("\tMax Size (High Water):\t\t%d:1\t%s\n" % (
arc['arc_sizing']['target_max_size']['ratio'],
arc['arc_sizing']['target_max_size']['num'],
)
)
sys.stdout.write("\nARC Size Breakdown:\n")
sys.stdout.write("\tRecently Used Cache Size:\t%s\t%s\n" % (
arc['arc_size_break']['recently_used_cache_size']['per'],
arc['arc_size_break']['recently_used_cache_size']['num'],
)
)
sys.stdout.write("\tFrequently Used Cache Size:\t%s\t%s\n" % (
arc['arc_size_break']['frequently_used_cache_size']['per'],
arc['arc_size_break']['frequently_used_cache_size']['num'],
)
)
sys.stdout.write("\tMetadata Size (Hard Limit):\t%s\t%s\n" % (
arc['arc_sizing']['meta_limit']['per'],
arc['arc_sizing']['meta_limit']['num'],
)
)
sys.stdout.write("\tMetadata Size:\t\t\t%s\t%s\n" % (
arc['arc_sizing']['meta_size']['per'],
arc['arc_sizing']['meta_size']['num'],
)
)
sys.stdout.write("\tDnode Size (Hard Limit):\t%s\t%s\n" % (
arc['arc_sizing']['dnode_limit']['per'],
arc['arc_sizing']['dnode_limit']['num'],
)
)
sys.stdout.write("\tDnode Size:\t\t\t%s\t%s\n" % (
arc['arc_sizing']['dnode_size']['per'],
arc['arc_sizing']['dnode_size']['num'],
)
)
sys.stdout.write("\n")
# ARC Hash Breakdown
sys.stdout.write("ARC Hash Breakdown:\n")
sys.stdout.write("\tElements Max:\t\t\t\t%s\n" %
arc['arc_hash_break']['elements_max'])
sys.stdout.write("\tElements Current:\t\t%s\t%s\n" % (
arc['arc_hash_break']['elements_current']['per'],
arc['arc_hash_break']['elements_current']['num'],
)
)
sys.stdout.write("\tCollisions:\t\t\t\t%s\n" %
arc['arc_hash_break']['collisions'])
sys.stdout.write("\tChain Max:\t\t\t\t%s\n" %
arc['arc_hash_break']['chain_max'])
sys.stdout.write("\tChains:\t\t\t\t\t%s\n" %
arc['arc_hash_break']['chains'])
def get_arc_efficiency(Kstat):
"""Collect information on the efficiency of the ARC"""
output = {}
arc_hits = Kstat["kstat.zfs.misc.arcstats.hits"]
arc_misses = Kstat["kstat.zfs.misc.arcstats.misses"]
demand_data_hits = Kstat["kstat.zfs.misc.arcstats.demand_data_hits"]
demand_data_misses = Kstat["kstat.zfs.misc.arcstats.demand_data_misses"]
demand_metadata_hits = Kstat[
"kstat.zfs.misc.arcstats.demand_metadata_hits"
]
demand_metadata_misses = Kstat[
"kstat.zfs.misc.arcstats.demand_metadata_misses"
]
mfu_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mfu_ghost_hits"]
mfu_hits = Kstat["kstat.zfs.misc.arcstats.mfu_hits"]
mru_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mru_ghost_hits"]
mru_hits = Kstat["kstat.zfs.misc.arcstats.mru_hits"]
prefetch_data_hits = Kstat["kstat.zfs.misc.arcstats.prefetch_data_hits"]
prefetch_data_misses = Kstat[
"kstat.zfs.misc.arcstats.prefetch_data_misses"
]
prefetch_metadata_hits = Kstat[
"kstat.zfs.misc.arcstats.prefetch_metadata_hits"
]
prefetch_metadata_misses = Kstat[
"kstat.zfs.misc.arcstats.prefetch_metadata_misses"
]
anon_hits = arc_hits - (
mfu_hits + mru_hits + mfu_ghost_hits + mru_ghost_hits
)
arc_accesses_total = (arc_hits + arc_misses)
demand_data_total = (demand_data_hits + demand_data_misses)
prefetch_data_total = (prefetch_data_hits + prefetch_data_misses)
real_hits = (mfu_hits + mru_hits)
output["total_accesses"] = fHits(arc_accesses_total)
output["cache_hit_ratio"] = {
'per': fPerc(arc_hits, arc_accesses_total),
'num': fHits(arc_hits),
}
output["cache_miss_ratio"] = {
'per': fPerc(arc_misses, arc_accesses_total),
'num': fHits(arc_misses),
}
output["actual_hit_ratio"] = {
'per': fPerc(real_hits, arc_accesses_total),
'num': fHits(real_hits),
}
output["data_demand_efficiency"] = {
'per': fPerc(demand_data_hits, demand_data_total),
'num': fHits(demand_data_total),
}
if prefetch_data_total > 0:
output["data_prefetch_efficiency"] = {
'per': fPerc(prefetch_data_hits, prefetch_data_total),
'num': fHits(prefetch_data_total),
}
if anon_hits > 0:
output["cache_hits_by_cache_list"] = {}
output["cache_hits_by_cache_list"]["anonymously_used"] = {
'per': fPerc(anon_hits, arc_hits),
'num': fHits(anon_hits),
}
output["most_recently_used"] = {
'per': fPerc(mru_hits, arc_hits),
'num': fHits(mru_hits),
}
output["most_frequently_used"] = {
'per': fPerc(mfu_hits, arc_hits),
'num': fHits(mfu_hits),
}
output["most_recently_used_ghost"] = {
'per': fPerc(mru_ghost_hits, arc_hits),
'num': fHits(mru_ghost_hits),
}
output["most_frequently_used_ghost"] = {
'per': fPerc(mfu_ghost_hits, arc_hits),
'num': fHits(mfu_ghost_hits),
}
output["cache_hits_by_data_type"] = {}
output["cache_hits_by_data_type"]["demand_data"] = {
'per': fPerc(demand_data_hits, arc_hits),
'num': fHits(demand_data_hits),
}
output["cache_hits_by_data_type"]["prefetch_data"] = {
'per': fPerc(prefetch_data_hits, arc_hits),
'num': fHits(prefetch_data_hits),
}
output["cache_hits_by_data_type"]["demand_metadata"] = {
'per': fPerc(demand_metadata_hits, arc_hits),
'num': fHits(demand_metadata_hits),
}
output["cache_hits_by_data_type"]["prefetch_metadata"] = {
'per': fPerc(prefetch_metadata_hits, arc_hits),
'num': fHits(prefetch_metadata_hits),
}
output["cache_misses_by_data_type"] = {}
output["cache_misses_by_data_type"]["demand_data"] = {
'per': fPerc(demand_data_misses, arc_misses),
'num': fHits(demand_data_misses),
}
output["cache_misses_by_data_type"]["prefetch_data"] = {
'per': fPerc(prefetch_data_misses, arc_misses),
'num': fHits(prefetch_data_misses),
}
output["cache_misses_by_data_type"]["demand_metadata"] = {
'per': fPerc(demand_metadata_misses, arc_misses),
'num': fHits(demand_metadata_misses),
}
output["cache_misses_by_data_type"]["prefetch_metadata"] = {
'per': fPerc(prefetch_metadata_misses, arc_misses),
'num': fHits(prefetch_metadata_misses),
}
return output
def _arc_efficiency(Kstat):
"""Print information on the efficiency of the ARC"""
arc = get_arc_efficiency(Kstat)
sys.stdout.write("ARC Total accesses:\t\t\t\t\t%s\n" %
arc['total_accesses'])
sys.stdout.write("\tCache Hit Ratio:\t\t%s\t%s\n" % (
arc['cache_hit_ratio']['per'],
arc['cache_hit_ratio']['num'],
)
)
sys.stdout.write("\tCache Miss Ratio:\t\t%s\t%s\n" % (
arc['cache_miss_ratio']['per'],
arc['cache_miss_ratio']['num'],
)
)
sys.stdout.write("\tActual Hit Ratio:\t\t%s\t%s\n" % (
arc['actual_hit_ratio']['per'],
arc['actual_hit_ratio']['num'],
)
)
sys.stdout.write("\n")
sys.stdout.write("\tData Demand Efficiency:\t\t%s\t%s\n" % (
arc['data_demand_efficiency']['per'],
arc['data_demand_efficiency']['num'],
)
)
if 'data_prefetch_efficiency' in arc:
sys.stdout.write("\tData Prefetch Efficiency:\t%s\t%s\n" % (
arc['data_prefetch_efficiency']['per'],
arc['data_prefetch_efficiency']['num'],
)
)
sys.stdout.write("\n")
sys.stdout.write("\tCACHE HITS BY CACHE LIST:\n")
if 'cache_hits_by_cache_list' in arc:
sys.stdout.write("\t Anonymously Used:\t\t%s\t%s\n" % (
arc['cache_hits_by_cache_list']['anonymously_used']['per'],
arc['cache_hits_by_cache_list']['anonymously_used']['num'],
)
)
sys.stdout.write("\t Most Recently Used:\t\t%s\t%s\n" % (
arc['most_recently_used']['per'],
arc['most_recently_used']['num'],
)
)
sys.stdout.write("\t Most Frequently Used:\t\t%s\t%s\n" % (
arc['most_frequently_used']['per'],
arc['most_frequently_used']['num'],
)
)
sys.stdout.write("\t Most Recently Used Ghost:\t%s\t%s\n" % (
arc['most_recently_used_ghost']['per'],
arc['most_recently_used_ghost']['num'],
)
)
sys.stdout.write("\t Most Frequently Used Ghost:\t%s\t%s\n" % (
arc['most_frequently_used_ghost']['per'],
arc['most_frequently_used_ghost']['num'],
)
)
sys.stdout.write("\n\tCACHE HITS BY DATA TYPE:\n")
sys.stdout.write("\t Demand Data:\t\t\t%s\t%s\n" % (
arc["cache_hits_by_data_type"]['demand_data']['per'],
arc["cache_hits_by_data_type"]['demand_data']['num'],
)
)
sys.stdout.write("\t Prefetch Data:\t\t%s\t%s\n" % (
arc["cache_hits_by_data_type"]['prefetch_data']['per'],
arc["cache_hits_by_data_type"]['prefetch_data']['num'],
)
)
sys.stdout.write("\t Demand Metadata:\t\t%s\t%s\n" % (
arc["cache_hits_by_data_type"]['demand_metadata']['per'],
arc["cache_hits_by_data_type"]['demand_metadata']['num'],
)
)
sys.stdout.write("\t Prefetch Metadata:\t\t%s\t%s\n" % (
arc["cache_hits_by_data_type"]['prefetch_metadata']['per'],
arc["cache_hits_by_data_type"]['prefetch_metadata']['num'],
)
)
sys.stdout.write("\n\tCACHE MISSES BY DATA TYPE:\n")
sys.stdout.write("\t Demand Data:\t\t\t%s\t%s\n" % (
arc["cache_misses_by_data_type"]['demand_data']['per'],
arc["cache_misses_by_data_type"]['demand_data']['num'],
)
)
sys.stdout.write("\t Prefetch Data:\t\t%s\t%s\n" % (
arc["cache_misses_by_data_type"]['prefetch_data']['per'],
arc["cache_misses_by_data_type"]['prefetch_data']['num'],
)
)
sys.stdout.write("\t Demand Metadata:\t\t%s\t%s\n" % (
arc["cache_misses_by_data_type"]['demand_metadata']['per'],
arc["cache_misses_by_data_type"]['demand_metadata']['num'],
)
)
sys.stdout.write("\t Prefetch Metadata:\t\t%s\t%s\n" % (
arc["cache_misses_by_data_type"]['prefetch_metadata']['per'],
arc["cache_misses_by_data_type"]['prefetch_metadata']['num'],
)
)
def get_l2arc_summary(Kstat):
"""Collection information on the L2ARC"""
output = {}
l2_abort_lowmem = Kstat["kstat.zfs.misc.arcstats.l2_abort_lowmem"]
l2_cksum_bad = Kstat["kstat.zfs.misc.arcstats.l2_cksum_bad"]
l2_evict_lock_retry = Kstat["kstat.zfs.misc.arcstats.l2_evict_lock_retry"]
l2_evict_reading = Kstat["kstat.zfs.misc.arcstats.l2_evict_reading"]
l2_feeds = Kstat["kstat.zfs.misc.arcstats.l2_feeds"]
l2_free_on_write = Kstat["kstat.zfs.misc.arcstats.l2_free_on_write"]
l2_hdr_size = Kstat["kstat.zfs.misc.arcstats.l2_hdr_size"]
l2_hits = Kstat["kstat.zfs.misc.arcstats.l2_hits"]
l2_io_error = Kstat["kstat.zfs.misc.arcstats.l2_io_error"]
l2_misses = Kstat["kstat.zfs.misc.arcstats.l2_misses"]
l2_rw_clash = Kstat["kstat.zfs.misc.arcstats.l2_rw_clash"]
l2_size = Kstat["kstat.zfs.misc.arcstats.l2_size"]
l2_asize = Kstat["kstat.zfs.misc.arcstats.l2_asize"]
l2_writes_done = Kstat["kstat.zfs.misc.arcstats.l2_writes_done"]
l2_writes_error = Kstat["kstat.zfs.misc.arcstats.l2_writes_error"]
l2_writes_sent = Kstat["kstat.zfs.misc.arcstats.l2_writes_sent"]
l2_access_total = (l2_hits + l2_misses)
output['l2_health_count'] = (l2_writes_error + l2_cksum_bad + l2_io_error)
output['l2_access_total'] = l2_access_total
output['l2_size'] = l2_size
output['l2_asize'] = l2_asize
if l2_size > 0 and l2_access_total > 0:
if output['l2_health_count'] > 0:
output["health"] = "DEGRADED"
else:
output["health"] = "HEALTHY"
output["low_memory_aborts"] = fHits(l2_abort_lowmem)
output["free_on_write"] = fHits(l2_free_on_write)
output["rw_clashes"] = fHits(l2_rw_clash)
output["bad_checksums"] = fHits(l2_cksum_bad)
output["io_errors"] = fHits(l2_io_error)
output["l2_arc_size"] = {}
output["l2_arc_size"]["adative"] = fBytes(l2_size)
output["l2_arc_size"]["actual"] = {
'per': fPerc(l2_asize, l2_size),
'num': fBytes(l2_asize)
}
output["l2_arc_size"]["head_size"] = {
'per': fPerc(l2_hdr_size, l2_size),
'num': fBytes(l2_hdr_size),
}
output["l2_arc_evicts"] = {}
output["l2_arc_evicts"]['lock_retries'] = fHits(l2_evict_lock_retry)
output["l2_arc_evicts"]['reading'] = fHits(l2_evict_reading)
output['l2_arc_breakdown'] = {}
output['l2_arc_breakdown']['value'] = fHits(l2_access_total)
output['l2_arc_breakdown']['hit_ratio'] = {
'per': fPerc(l2_hits, l2_access_total),
'num': fHits(l2_hits),
}
output['l2_arc_breakdown']['miss_ratio'] = {
'per': fPerc(l2_misses, l2_access_total),
'num': fHits(l2_misses),
}
output['l2_arc_breakdown']['feeds'] = fHits(l2_feeds)
output['l2_arc_buffer'] = {}
output['l2_arc_writes'] = {}
output['l2_writes_done'] = l2_writes_done
output['l2_writes_sent'] = l2_writes_sent
if l2_writes_done != l2_writes_sent:
output['l2_arc_writes']['writes_sent'] = {
'value': "FAULTED",
'num': fHits(l2_writes_sent),
}
output['l2_arc_writes']['done_ratio'] = {
'per': fPerc(l2_writes_done, l2_writes_sent),
'num': fHits(l2_writes_done),
}
output['l2_arc_writes']['error_ratio'] = {
'per': fPerc(l2_writes_error, l2_writes_sent),
'num': fHits(l2_writes_error),
}
else:
output['l2_arc_writes']['writes_sent'] = {
'per': fPerc(100),
'num': fHits(l2_writes_sent),
}
return output
def _l2arc_summary(Kstat):
"""Print information on the L2ARC"""
arc = get_l2arc_summary(Kstat)
if arc['l2_size'] > 0 and arc['l2_access_total'] > 0:
sys.stdout.write("L2 ARC Summary: ")
if arc['l2_health_count'] > 0:
sys.stdout.write("(DEGRADED)\n")
else:
sys.stdout.write("(HEALTHY)\n")
sys.stdout.write("\tLow Memory Aborts:\t\t\t%s\n" %
arc['low_memory_aborts'])
sys.stdout.write("\tFree on Write:\t\t\t\t%s\n" % arc['free_on_write'])
sys.stdout.write("\tR/W Clashes:\t\t\t\t%s\n" % arc['rw_clashes'])
sys.stdout.write("\tBad Checksums:\t\t\t\t%s\n" % arc['bad_checksums'])
sys.stdout.write("\tIO Errors:\t\t\t\t%s\n" % arc['io_errors'])
sys.stdout.write("\n")
sys.stdout.write("L2 ARC Size: (Adaptive)\t\t\t\t%s\n" %
arc["l2_arc_size"]["adative"])
sys.stdout.write("\tCompressed:\t\t\t%s\t%s\n" % (
arc["l2_arc_size"]["actual"]["per"],
arc["l2_arc_size"]["actual"]["num"],
)
)
sys.stdout.write("\tHeader Size:\t\t\t%s\t%s\n" % (
arc["l2_arc_size"]["head_size"]["per"],
arc["l2_arc_size"]["head_size"]["num"],
)
)
sys.stdout.write("\n")
if arc["l2_arc_evicts"]['lock_retries'] != '0' or \
arc["l2_arc_evicts"]["reading"] != '0':
sys.stdout.write("L2 ARC Evicts:\n")
sys.stdout.write("\tLock Retries:\t\t\t\t%s\n" %
arc["l2_arc_evicts"]['lock_retries'])
sys.stdout.write("\tUpon Reading:\t\t\t\t%s\n" %
arc["l2_arc_evicts"]["reading"])
sys.stdout.write("\n")
sys.stdout.write("L2 ARC Breakdown:\t\t\t\t%s\n" %
arc['l2_arc_breakdown']['value'])
sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
arc['l2_arc_breakdown']['hit_ratio']['per'],
arc['l2_arc_breakdown']['hit_ratio']['num'],
)
)
sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
arc['l2_arc_breakdown']['miss_ratio']['per'],
arc['l2_arc_breakdown']['miss_ratio']['num'],
)
)
sys.stdout.write("\tFeeds:\t\t\t\t\t%s\n" %
arc['l2_arc_breakdown']['feeds'])
sys.stdout.write("\n")
sys.stdout.write("L2 ARC Writes:\n")
if arc['l2_writes_done'] != arc['l2_writes_sent']:
sys.stdout.write("\tWrites Sent: (%s)\t\t\t\t%s\n" % (
arc['l2_arc_writes']['writes_sent']['value'],
arc['l2_arc_writes']['writes_sent']['num'],
)
)
sys.stdout.write("\t Done Ratio:\t\t\t%s\t%s\n" % (
arc['l2_arc_writes']['done_ratio']['per'],
arc['l2_arc_writes']['done_ratio']['num'],
)
)
sys.stdout.write("\t Error Ratio:\t\t\t%s\t%s\n" % (
arc['l2_arc_writes']['error_ratio']['per'],
arc['l2_arc_writes']['error_ratio']['num'],
)
)
else:
sys.stdout.write("\tWrites Sent:\t\t\t%s\t%s\n" % (
arc['l2_arc_writes']['writes_sent']['per'],
arc['l2_arc_writes']['writes_sent']['num'],
)
)
def get_dmu_summary(Kstat):
"""Collect information on the DMU"""
output = {}
zfetch_hits = Kstat["kstat.zfs.misc.zfetchstats.hits"]
zfetch_misses = Kstat["kstat.zfs.misc.zfetchstats.misses"]
zfetch_access_total = (zfetch_hits + zfetch_misses)
output['zfetch_access_total'] = zfetch_access_total
if zfetch_access_total > 0:
output['dmu'] = {}
output['dmu']['efficiency'] = {}
output['dmu']['efficiency']['value'] = fHits(zfetch_access_total)
output['dmu']['efficiency']['hit_ratio'] = {
'per': fPerc(zfetch_hits, zfetch_access_total),
'num': fHits(zfetch_hits),
}
output['dmu']['efficiency']['miss_ratio'] = {
'per': fPerc(zfetch_misses, zfetch_access_total),
'num': fHits(zfetch_misses),
}
return output
def _dmu_summary(Kstat):
"""Print information on the DMU"""
arc = get_dmu_summary(Kstat)
if arc['zfetch_access_total'] > 0:
sys.stdout.write("DMU Prefetch Efficiency:\t\t\t\t\t%s\n" %
arc['dmu']['efficiency']['value'])
sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
arc['dmu']['efficiency']['hit_ratio']['per'],
arc['dmu']['efficiency']['hit_ratio']['num'],
)
)
sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
arc['dmu']['efficiency']['miss_ratio']['per'],
arc['dmu']['efficiency']['miss_ratio']['num'],
)
)
sys.stdout.write("\n")
def get_vdev_summary(Kstat):
"""Collect information on the VDEVs"""
output = {}
vdev_cache_delegations = \
Kstat["kstat.zfs.misc.vdev_cache_stats.delegations"]
vdev_cache_misses = Kstat["kstat.zfs.misc.vdev_cache_stats.misses"]
vdev_cache_hits = Kstat["kstat.zfs.misc.vdev_cache_stats.hits"]
vdev_cache_total = (vdev_cache_misses + vdev_cache_hits +
vdev_cache_delegations)
output['vdev_cache_total'] = vdev_cache_total
if vdev_cache_total > 0:
output['summary'] = fHits(vdev_cache_total)
output['hit_ratio'] = {
'per': fPerc(vdev_cache_hits, vdev_cache_total),
'num': fHits(vdev_cache_hits),
}
output['miss_ratio'] = {
'per': fPerc(vdev_cache_misses, vdev_cache_total),
'num': fHits(vdev_cache_misses),
}
output['delegations'] = {
'per': fPerc(vdev_cache_delegations, vdev_cache_total),
'num': fHits(vdev_cache_delegations),
}
return output
def _vdev_summary(Kstat):
"""Print information on the VDEVs"""
arc = get_vdev_summary(Kstat)
if arc['vdev_cache_total'] > 0:
sys.stdout.write("VDEV Cache Summary:\t\t\t\t%s\n" % arc['summary'])
sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
arc['hit_ratio']['per'],
arc['hit_ratio']['num'],
))
sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
arc['miss_ratio']['per'],
arc['miss_ratio']['num'],
))
sys.stdout.write("\tDelegations:\t\t\t%s\t%s\n" % (
arc['delegations']['per'],
arc['delegations']['num'],
))
def _tunable_summary(Kstat):
"""Print information on tunables, including descriptions if requested"""
global show_tunable_descriptions
global alternate_tunable_layout
tunables = load_tunables()
descriptions = {}
if show_tunable_descriptions:
command = ["/sbin/modinfo", "zfs", "-0"]
try:
p = Popen(command, stdin=PIPE, stdout=PIPE,
stderr=PIPE, shell=False, close_fds=True)
p.wait()
# By default, Python 2 returns a string as the first element of the
# tuple from p.communicate(), while Python 3 returns bytes which
# must be decoded first. The better way to do this would be with
# subprocess.run() or at least .check_output(), but this fails on
# CentOS 6 because of its old version of Python 2
desc = bytes.decode(p.communicate()[0])
description_list = desc.strip().split('\0')
if p.returncode == 0:
for tunable in description_list:
if tunable[0:5] == 'parm:':
tunable = tunable[5:].strip()
name, description = tunable.split(':', 1)
if not description:
description = "Description unavailable"
descriptions[name] = description
else:
sys.stderr.write("%s: '%s' exited with code %i\n" %
(sys.argv[0], command[0], p.returncode))
sys.stderr.write("Tunable descriptions will be disabled.\n")
except OSError as e:
sys.stderr.write("%s: Cannot run '%s': %s\n" %
(sys.argv[0], command[0], e.strerror))
sys.stderr.write("Tunable descriptions will be disabled.\n")
sys.stdout.write("ZFS Tunables:\n")
if alternate_tunable_layout:
fmt = "\t%s=%s\n"
else:
fmt = "\t%-50s%s\n"
for name in sorted(tunables.keys()):
if show_tunable_descriptions and name in descriptions:
sys.stdout.write("\t# %s\n" % descriptions[name])
sys.stdout.write(fmt % (name, tunables[name]))
unSub = [
_arc_summary,
_arc_efficiency,
_l2arc_summary,
_dmu_summary,
_vdev_summary,
_tunable_summary
]
def zfs_header():
"""Print title string with date"""
daydate = time.strftime('%a %b %d %H:%M:%S %Y')
sys.stdout.write('\n'+'-'*72+'\n')
sys.stdout.write('ZFS Subsystem Report\t\t\t\t%s' % daydate)
sys.stdout.write('\n')
def usage():
"""Print usage information"""
sys.stdout.write("Usage: arc_summary [-h] [-a] [-d] [-p PAGE]\n\n")
sys.stdout.write("\t -h, --help : "
"Print this help message and exit\n")
sys.stdout.write("\t -a, --alternate : "
"Show an alternate sysctl layout\n")
sys.stdout.write("\t -d, --description : "
"Show the sysctl descriptions\n")
sys.stdout.write("\t -p PAGE, --page=PAGE : "
"Select a single output page to display,\n")
sys.stdout.write("\t "
"should be an integer between 1 and " +
str(len(unSub)) + "\n\n")
sys.stdout.write("Examples:\n")
sys.stdout.write("\tarc_summary -a\n")
sys.stdout.write("\tarc_summary -p 4\n")
sys.stdout.write("\tarc_summary -ad\n")
sys.stdout.write("\tarc_summary --page=2\n")
def main():
"""Main function"""
global show_tunable_descriptions
global alternate_tunable_layout
try:
opts, args = getopt.getopt(
sys.argv[1:],
"adp:h", ["alternate", "description", "page=", "help"]
)
except getopt.error as e:
sys.stderr.write("Error: %s\n" % e.msg)
usage()
sys.exit(1)
args = {}
for opt, arg in opts:
if opt in ('-a', '--alternate'):
args['a'] = True
if opt in ('-d', '--description'):
args['d'] = True
if opt in ('-p', '--page'):
args['p'] = arg
if opt in ('-h', '--help'):
usage()
sys.exit(0)
Kstat = get_Kstat()
alternate_tunable_layout = 'a' in args
show_tunable_descriptions = 'd' in args
pages = []
if 'p' in args:
try:
pages.append(unSub[int(args['p']) - 1])
except IndexError:
sys.stderr.write('the argument to -p must be between 1 and ' +
str(len(unSub)) + '\n')
sys.exit(1)
else:
pages = unSub
zfs_header()
for page in pages:
page(Kstat)
sys.stdout.write("\n")
if __name__ == '__main__':
main()
Index: head/sys/contrib/openzfs/cmd/arc_summary/arc_summary3
===================================================================
--- head/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 (revision 366779)
+++ head/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 (revision 366780)
@@ -1,943 +1,943 @@
#!/usr/bin/env python3
#
# Copyright (c) 2008 Ben Rockwood <benr@cuddletech.com>,
# Copyright (c) 2010 Martin Matuska <mm@FreeBSD.org>,
# Copyright (c) 2010-2011 Jason J. Hellenthal <jhell@DataIX.net>,
# Copyright (c) 2017 Scot W. Stevenson <scot.stevenson@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
"""Print statistics on the ZFS ARC Cache and other information
Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
the in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
+https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
The original introduction to arc_summary can be found at
http://cuddletech.com/?p=454
"""
import argparse
import os
import subprocess
import sys
import time
-DESCRIPTION = 'Print ARC and other statistics for ZFS on Linux'
+DESCRIPTION = 'Print ARC and other statistics for OpenZFS'
INDENT = ' '*8
LINE_LENGTH = 72
DATE_FORMAT = '%a %b %d %H:%M:%S %Y'
TITLE = 'ZFS Subsystem Report'
SECTIONS = 'arc archits dmu l2arc spl tunables vdev zil'.split()
SECTION_HELP = 'print info from one section ('+' '.join(SECTIONS)+')'
# Tunables and SPL are handled separately because they come from
# different sources
SECTION_PATHS = {'arc': 'arcstats',
'dmu': 'dmu_tx',
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
'vdev': 'vdev_cache_stats',
'xuio': 'xuio_stats',
'zfetch': 'zfetchstats',
'zil': 'zil'}
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument('-a', '--alternate', action='store_true', default=False,
help='use alternate formatting for tunables and SPL',
dest='alt')
parser.add_argument('-d', '--description', action='store_true', default=False,
help='print descriptions with tunables and SPL',
dest='desc')
parser.add_argument('-g', '--graph', action='store_true', default=False,
help='print graph on ARC use and exit', dest='graph')
parser.add_argument('-p', '--page', type=int, dest='page',
help='print page by number (DEPRECATED, use "-s")')
parser.add_argument('-r', '--raw', action='store_true', default=False,
help='dump all available data with minimal formatting',
dest='raw')
parser.add_argument('-s', '--section', dest='section', help=SECTION_HELP)
ARGS = parser.parse_args()
if sys.platform.startswith('freebsd'):
# Requires py36-sysctl on FreeBSD
import sysctl
VDEV_CACHE_SIZE = 'vdev.cache_size'
def load_kstats(section):
base = 'kstat.zfs.misc.{section}.'.format(section=section)
# base is removed from the name
fmt = lambda kstat: '{name} : {value}'.format(name=kstat.name[len(base):],
value=kstat.value)
return [fmt(kstat) for kstat in sysctl.filter(base)]
def get_params(base):
cut = 8 # = len('vfs.zfs.')
return {ctl.name[cut:]: str(ctl.value) for ctl in sysctl.filter(base)}
def get_tunable_params():
return get_params('vfs.zfs')
def get_vdev_params():
return get_params('vfs.zfs.vdev')
def get_version_impl(request):
# FreeBSD reports versions for zpl and spa instead of zfs and spl.
name = {'zfs': 'zpl',
'spl': 'spa'}[request]
mib = 'vfs.zfs.version.{}'.format(name)
version = sysctl.filter(mib)[0].value
return '{} version {}'.format(name, version)
def get_descriptions(_request):
# py-sysctl doesn't give descriptions, so we have to shell out.
command = ['sysctl', '-d', 'vfs.zfs']
# The recommended way to do this is with subprocess.run(). However,
# some installed versions of Python are < 3.5, so we offer them
# the option of doing it the old way (for now)
if 'run' in dir(subprocess):
info = subprocess.run(command, stdout=subprocess.PIPE,
universal_newlines=True)
lines = info.stdout.split('\n')
else:
info = subprocess.check_output(command, universal_newlines=True)
lines = info.split('\n')
def fmt(line):
name, desc = line.split(':', 1)
return (name.strip(), desc.strip())
return dict([fmt(line) for line in lines if len(line) > 0])
elif sys.platform.startswith('linux'):
KSTAT_PATH = '/proc/spl/kstat/zfs'
SPL_PATH = '/sys/module/spl/parameters'
TUNABLES_PATH = '/sys/module/zfs/parameters'
VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
def load_kstats(section):
path = os.path.join(KSTAT_PATH, section)
with open(path) as f:
return list(f)[2:] # Get rid of header
def get_params(basepath):
"""Collect information on the Solaris Porting Layer (SPL) or the
tunables, depending on the PATH given. Does not check if PATH is
legal.
"""
result = {}
for name in os.listdir(basepath):
path = os.path.join(basepath, name)
with open(path) as f:
value = f.read()
result[name] = value.strip()
return result
def get_spl_params():
return get_params(SPL_PATH)
def get_tunable_params():
return get_params(TUNABLES_PATH)
def get_vdev_params():
return get_params(TUNABLES_PATH)
def get_version_impl(request):
# The original arc_summary called /sbin/modinfo/{spl,zfs} to get
# the version information. We switch to /sys/module/{spl,zfs}/version
# to make sure we get what is really loaded in the kernel
command = ["cat", "/sys/module/{0}/version".format(request)]
req = request.upper()
# The recommended way to do this is with subprocess.run(). However,
# some installed versions of Python are < 3.5, so we offer them
# the option of doing it the old way (for now)
if 'run' in dir(subprocess):
info = subprocess.run(command, stdout=subprocess.PIPE,
universal_newlines=True)
version = info.stdout.strip()
else:
info = subprocess.check_output(command, universal_newlines=True)
version = info.strip()
return version
def get_descriptions(request):
"""Get the descriptions of the Solaris Porting Layer (SPL) or the
tunables, return with minimal formatting.
"""
if request not in ('spl', 'zfs'):
print('ERROR: description of "{0}" requested)'.format(request))
sys.exit(1)
descs = {}
target_prefix = 'parm:'
# We would prefer to do this with /sys/modules -- see the discussion at
# get_version() -- but there isn't a way to get the descriptions from
# there, so we fall back on modinfo
command = ["/sbin/modinfo", request, "-0"]
# The recommended way to do this is with subprocess.run(). However,
# some installed versions of Python are < 3.5, so we offer them
# the option of doing it the old way (for now)
info = ''
try:
if 'run' in dir(subprocess):
info = subprocess.run(command, stdout=subprocess.PIPE,
universal_newlines=True)
raw_output = info.stdout.split('\0')
else:
info = subprocess.check_output(command,
universal_newlines=True)
raw_output = info.split('\0')
except subprocess.CalledProcessError:
print("Error: Descriptions not available",
"(can't access kernel module)")
sys.exit(1)
for line in raw_output:
if not line.startswith(target_prefix):
continue
line = line[len(target_prefix):].strip()
name, raw_desc = line.split(':', 1)
desc = raw_desc.rsplit('(', 1)[0]
if desc == '':
desc = '(No description found)'
descs[name.strip()] = desc.strip()
return descs
def cleanup_line(single_line):
"""Format a raw line of data from /proc and isolate the name value
part, returning a tuple with each. Currently, this gets rid of the
middle '4'. For example "arc_no_grow 4 0" returns the tuple
("arc_no_grow", "0").
"""
name, _, value = single_line.split()
return name, value
def draw_graph(kstats_dict):
"""Draw a primitive graph representing the basic information on the
ARC -- its size and the proportion used by MFU and MRU -- and quit.
We use max size of the ARC to calculate how full it is. This is a
very rough representation.
"""
arc_stats = isolate_section('arcstats', kstats_dict)
GRAPH_INDENT = ' '*4
GRAPH_WIDTH = 60
arc_size = f_bytes(arc_stats['size'])
arc_perc = f_perc(arc_stats['size'], arc_stats['c_max'])
mfu_size = f_bytes(arc_stats['mfu_size'])
mru_size = f_bytes(arc_stats['mru_size'])
meta_limit = f_bytes(arc_stats['arc_meta_limit'])
meta_size = f_bytes(arc_stats['arc_meta_used'])
dnode_limit = f_bytes(arc_stats['arc_dnode_limit'])
dnode_size = f_bytes(arc_stats['dnode_size'])
info_form = ('ARC: {0} ({1}) MFU: {2} MRU: {3} META: {4} ({5}) '
'DNODE {6} ({7})')
info_line = info_form.format(arc_size, arc_perc, mfu_size, mru_size,
meta_size, meta_limit, dnode_size,
dnode_limit)
info_spc = ' '*int((GRAPH_WIDTH-len(info_line))/2)
info_line = GRAPH_INDENT+info_spc+info_line
graph_line = GRAPH_INDENT+'+'+('-'*(GRAPH_WIDTH-2))+'+'
mfu_perc = float(int(arc_stats['mfu_size'])/int(arc_stats['c_max']))
mru_perc = float(int(arc_stats['mru_size'])/int(arc_stats['c_max']))
arc_perc = float(int(arc_stats['size'])/int(arc_stats['c_max']))
total_ticks = float(arc_perc)*GRAPH_WIDTH
mfu_ticks = mfu_perc*GRAPH_WIDTH
mru_ticks = mru_perc*GRAPH_WIDTH
other_ticks = total_ticks-(mfu_ticks+mru_ticks)
core_form = 'F'*int(mfu_ticks)+'R'*int(mru_ticks)+'O'*int(other_ticks)
core_spc = ' '*(GRAPH_WIDTH-(2+len(core_form)))
core_line = GRAPH_INDENT+'|'+core_form+core_spc+'|'
for line in ('', info_line, graph_line, core_line, graph_line, ''):
print(line)
def f_bytes(byte_string):
"""Return human-readable representation of a byte value in
powers of 2 (eg "KiB" for "kibibytes", etc) to two decimal
points. Values smaller than one KiB are returned without
decimal points. Note "bytes" is a reserved keyword.
"""
prefixes = ([2**80, "YiB"], # yobibytes (yotta)
[2**70, "ZiB"], # zebibytes (zetta)
[2**60, "EiB"], # exbibytes (exa)
[2**50, "PiB"], # pebibytes (peta)
[2**40, "TiB"], # tebibytes (tera)
[2**30, "GiB"], # gibibytes (giga)
[2**20, "MiB"], # mebibytes (mega)
[2**10, "KiB"]) # kibibytes (kilo)
bites = int(byte_string)
if bites >= 2**10:
for limit, unit in prefixes:
if bites >= limit:
value = bites / limit
break
result = '{0:.1f} {1}'.format(value, unit)
else:
result = '{0} Bytes'.format(bites)
return result
def f_hits(hits_string):
"""Create a human-readable representation of the number of hits.
The single-letter symbols used are SI to avoid the confusion caused
by the different "short scale" and "long scale" representations in
English, which use the same words for different values. See
https://en.wikipedia.org/wiki/Names_of_large_numbers and:
https://physics.nist.gov/cuu/Units/prefixes.html
"""
numbers = ([10**24, 'Y'], # yotta (septillion)
[10**21, 'Z'], # zetta (sextillion)
[10**18, 'E'], # exa (quintrillion)
[10**15, 'P'], # peta (quadrillion)
[10**12, 'T'], # tera (trillion)
[10**9, 'G'], # giga (billion)
[10**6, 'M'], # mega (million)
[10**3, 'k']) # kilo (thousand)
hits = int(hits_string)
if hits >= 1000:
for limit, symbol in numbers:
if hits >= limit:
value = hits/limit
break
result = "%0.1f%s" % (value, symbol)
else:
result = "%d" % hits
return result
def f_perc(value1, value2):
"""Calculate percentage and return in human-readable form. If
rounding produces the result '0.0' though the first number is
not zero, include a 'less-than' symbol to avoid confusion.
Division by zero is handled by returning 'n/a'; no error
is called.
"""
v1 = float(value1)
v2 = float(value2)
try:
perc = 100 * v1/v2
except ZeroDivisionError:
result = 'n/a'
else:
result = '{0:0.1f} %'.format(perc)
if result == '0.0 %' and v1 > 0:
result = '< 0.1 %'
return result
def format_raw_line(name, value):
"""For the --raw option for the tunable and SPL outputs, decide on the
correct formatting based on the --alternate flag.
"""
if ARGS.alt:
result = '{0}{1}={2}'.format(INDENT, name, value)
else:
spc = LINE_LENGTH-(len(INDENT)+len(value))
result = '{0}{1:<{spc}}{2}'.format(INDENT, name, value, spc=spc)
return result
def get_kstats():
"""Collect information on the ZFS subsystem. The step does not perform any
further processing, giving us the option to only work on what is actually
needed. The name "kstat" is a holdover from the Solaris utility of the same
name.
"""
result = {}
for section in SECTION_PATHS.values():
if section not in result:
result[section] = load_kstats(section)
return result
def get_version(request):
"""Get the version number of ZFS or SPL on this machine for header.
Returns an error string, but does not raise an error, if we can't
get the ZFS/SPL version.
"""
if request not in ('spl', 'zfs'):
error_msg = '(ERROR: "{0}" requested)'.format(request)
return error_msg
return get_version_impl(request)
def print_header():
"""Print the initial heading with date and time as well as info on the
kernel and ZFS versions. This is not called for the graph.
"""
# datetime is now recommended over time but we keep the exact formatting
# from the older version of arc_summary in case there are scripts
# that expect it in this way
daydate = time.strftime(DATE_FORMAT)
spc_date = LINE_LENGTH-len(daydate)
sys_version = os.uname()
sys_msg = sys_version.sysname+' '+sys_version.release
zfs = get_version('zfs')
spc_zfs = LINE_LENGTH-len(zfs)
machine_msg = 'Machine: '+sys_version.nodename+' ('+sys_version.machine+')'
spl = get_version('spl')
spc_spl = LINE_LENGTH-len(spl)
print('\n'+('-'*LINE_LENGTH))
print('{0:<{spc}}{1}'.format(TITLE, daydate, spc=spc_date))
print('{0:<{spc}}{1}'.format(sys_msg, zfs, spc=spc_zfs))
print('{0:<{spc}}{1}\n'.format(machine_msg, spl, spc=spc_spl))
def print_raw(kstats_dict):
"""Print all available data from the system in a minimally sorted format.
This can be used as a source to be piped through 'grep'.
"""
sections = sorted(kstats_dict.keys())
for section in sections:
print('\n{0}:'.format(section.upper()))
lines = sorted(kstats_dict[section])
for line in lines:
name, value = cleanup_line(line)
print(format_raw_line(name, value))
# Tunables and SPL must be handled separately because they come from a
# different source and have descriptions the user might request
print()
section_spl()
section_tunables()
def isolate_section(section_name, kstats_dict):
"""From the complete information on all sections, retrieve only those
for one section.
"""
try:
section_data = kstats_dict[section_name]
except KeyError:
print('ERROR: Data on {0} not available'.format(section_data))
sys.exit(1)
section_dict = dict(cleanup_line(l) for l in section_data)
return section_dict
# Formatted output helper functions
def prt_1(text, value):
"""Print text and one value, no indent"""
spc = ' '*(LINE_LENGTH-(len(text)+len(value)))
print('{0}{spc}{1}'.format(text, value, spc=spc))
def prt_i1(text, value):
"""Print text and one value, with indent"""
spc = ' '*(LINE_LENGTH-(len(INDENT)+len(text)+len(value)))
print(INDENT+'{0}{spc}{1}'.format(text, value, spc=spc))
def prt_2(text, value1, value2):
"""Print text and two values, no indent"""
values = '{0:>9} {1:>9}'.format(value1, value2)
spc = ' '*(LINE_LENGTH-(len(text)+len(values)+2))
print('{0}{spc} {1}'.format(text, values, spc=spc))
def prt_i2(text, value1, value2):
"""Print text and two values, with indent"""
values = '{0:>9} {1:>9}'.format(value1, value2)
spc = ' '*(LINE_LENGTH-(len(INDENT)+len(text)+len(values)+2))
print(INDENT+'{0}{spc} {1}'.format(text, values, spc=spc))
# The section output concentrates on important parameters instead of
# being exhaustive (that is what the --raw parameter is for)
def section_arc(kstats_dict):
"""Give basic information on the ARC, MRU and MFU. This is the first
and most used section.
"""
arc_stats = isolate_section('arcstats', kstats_dict)
throttle = arc_stats['memory_throttle_count']
if throttle == '0':
health = 'HEALTHY'
else:
health = 'THROTTLED'
prt_1('ARC status:', health)
prt_i1('Memory throttle count:', throttle)
print()
arc_size = arc_stats['size']
arc_target_size = arc_stats['c']
arc_max = arc_stats['c_max']
arc_min = arc_stats['c_min']
mfu_size = arc_stats['mfu_size']
mru_size = arc_stats['mru_size']
meta_limit = arc_stats['arc_meta_limit']
meta_size = arc_stats['arc_meta_used']
dnode_limit = arc_stats['arc_dnode_limit']
dnode_size = arc_stats['dnode_size']
target_size_ratio = '{0}:1'.format(int(arc_max) // int(arc_min))
prt_2('ARC size (current):',
f_perc(arc_size, arc_max), f_bytes(arc_size))
prt_i2('Target size (adaptive):',
f_perc(arc_target_size, arc_max), f_bytes(arc_target_size))
prt_i2('Min size (hard limit):',
f_perc(arc_min, arc_max), f_bytes(arc_min))
prt_i2('Max size (high water):',
target_size_ratio, f_bytes(arc_max))
caches_size = int(mfu_size)+int(mru_size)
prt_i2('Most Frequently Used (MFU) cache size:',
f_perc(mfu_size, caches_size), f_bytes(mfu_size))
prt_i2('Most Recently Used (MRU) cache size:',
f_perc(mru_size, caches_size), f_bytes(mru_size))
prt_i2('Metadata cache size (hard limit):',
f_perc(meta_limit, arc_max), f_bytes(meta_limit))
prt_i2('Metadata cache size (current):',
f_perc(meta_size, meta_limit), f_bytes(meta_size))
prt_i2('Dnode cache size (hard limit):',
f_perc(dnode_limit, meta_limit), f_bytes(dnode_limit))
prt_i2('Dnode cache size (current):',
f_perc(dnode_size, dnode_limit), f_bytes(dnode_size))
print()
print('ARC hash breakdown:')
prt_i1('Elements max:', f_hits(arc_stats['hash_elements_max']))
prt_i2('Elements current:',
f_perc(arc_stats['hash_elements'], arc_stats['hash_elements_max']),
f_hits(arc_stats['hash_elements']))
prt_i1('Collisions:', f_hits(arc_stats['hash_collisions']))
prt_i1('Chain max:', f_hits(arc_stats['hash_chain_max']))
prt_i1('Chains:', f_hits(arc_stats['hash_chains']))
print()
print('ARC misc:')
prt_i1('Deleted:', f_hits(arc_stats['deleted']))
prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss']))
prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip']))
print()
def section_archits(kstats_dict):
"""Print information on how the caches are accessed ("arc hits").
"""
arc_stats = isolate_section('arcstats', kstats_dict)
all_accesses = int(arc_stats['hits'])+int(arc_stats['misses'])
actual_hits = int(arc_stats['mfu_hits'])+int(arc_stats['mru_hits'])
prt_1('ARC total accesses (hits + misses):', f_hits(all_accesses))
ta_todo = (('Cache hit ratio:', arc_stats['hits']),
('Cache miss ratio:', arc_stats['misses']),
('Actual hit ratio (MFU + MRU hits):', actual_hits))
for title, value in ta_todo:
prt_i2(title, f_perc(value, all_accesses), f_hits(value))
dd_total = int(arc_stats['demand_data_hits']) +\
int(arc_stats['demand_data_misses'])
prt_i2('Data demand efficiency:',
f_perc(arc_stats['demand_data_hits'], dd_total),
f_hits(dd_total))
dp_total = int(arc_stats['prefetch_data_hits']) +\
int(arc_stats['prefetch_data_misses'])
prt_i2('Data prefetch efficiency:',
f_perc(arc_stats['prefetch_data_hits'], dp_total),
f_hits(dp_total))
known_hits = int(arc_stats['mfu_hits']) +\
int(arc_stats['mru_hits']) +\
int(arc_stats['mfu_ghost_hits']) +\
int(arc_stats['mru_ghost_hits'])
anon_hits = int(arc_stats['hits'])-known_hits
print()
print('Cache hits by cache type:')
cl_todo = (('Most frequently used (MFU):', arc_stats['mfu_hits']),
('Most recently used (MRU):', arc_stats['mru_hits']),
('Most frequently used (MFU) ghost:',
arc_stats['mfu_ghost_hits']),
('Most recently used (MRU) ghost:',
arc_stats['mru_ghost_hits']))
for title, value in cl_todo:
prt_i2(title, f_perc(value, arc_stats['hits']), f_hits(value))
# For some reason, anon_hits can turn negative, which is weird. Until we
# have figured out why this happens, we just hide the problem, following
# the behavior of the original arc_summary.
if anon_hits >= 0:
prt_i2('Anonymously used:',
f_perc(anon_hits, arc_stats['hits']), f_hits(anon_hits))
print()
print('Cache hits by data type:')
dt_todo = (('Demand data:', arc_stats['demand_data_hits']),
('Demand prefetch data:', arc_stats['prefetch_data_hits']),
('Demand metadata:', arc_stats['demand_metadata_hits']),
('Demand prefetch metadata:',
arc_stats['prefetch_metadata_hits']))
for title, value in dt_todo:
prt_i2(title, f_perc(value, arc_stats['hits']), f_hits(value))
print()
print('Cache misses by data type:')
dm_todo = (('Demand data:', arc_stats['demand_data_misses']),
('Demand prefetch data:',
arc_stats['prefetch_data_misses']),
('Demand metadata:', arc_stats['demand_metadata_misses']),
('Demand prefetch metadata:',
arc_stats['prefetch_metadata_misses']))
for title, value in dm_todo:
prt_i2(title, f_perc(value, arc_stats['misses']), f_hits(value))
print()
def section_dmu(kstats_dict):
"""Collect information on the DMU"""
zfetch_stats = isolate_section('zfetchstats', kstats_dict)
zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses'])
prt_1('DMU prefetch efficiency:', f_hits(zfetch_access_total))
prt_i2('Hit ratio:', f_perc(zfetch_stats['hits'], zfetch_access_total),
f_hits(zfetch_stats['hits']))
prt_i2('Miss ratio:', f_perc(zfetch_stats['misses'], zfetch_access_total),
f_hits(zfetch_stats['misses']))
print()
def section_l2arc(kstats_dict):
"""Collect information on L2ARC device if present. If not, tell user
that we're skipping the section.
"""
# The L2ARC statistics live in the same section as the normal ARC stuff
arc_stats = isolate_section('arcstats', kstats_dict)
if arc_stats['l2_size'] == '0':
print('L2ARC not detected, skipping section\n')
return
l2_errors = int(arc_stats['l2_writes_error']) +\
int(arc_stats['l2_cksum_bad']) +\
int(arc_stats['l2_io_error'])
l2_access_total = int(arc_stats['l2_hits'])+int(arc_stats['l2_misses'])
health = 'HEALTHY'
if l2_errors > 0:
health = 'DEGRADED'
prt_1('L2ARC status:', health)
l2_todo = (('Low memory aborts:', 'l2_abort_lowmem'),
('Free on write:', 'l2_free_on_write'),
('R/W clashes:', 'l2_rw_clash'),
('Bad checksums:', 'l2_cksum_bad'),
('I/O errors:', 'l2_io_error'))
for title, value in l2_todo:
prt_i1(title, f_hits(arc_stats[value]))
print()
prt_1('L2ARC size (adaptive):', f_bytes(arc_stats['l2_size']))
prt_i2('Compressed:', f_perc(arc_stats['l2_asize'], arc_stats['l2_size']),
f_bytes(arc_stats['l2_asize']))
prt_i2('Header size:',
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
f_bytes(arc_stats['l2_hdr_size']))
print()
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
prt_i2('Hit ratio:',
f_perc(arc_stats['l2_hits'], l2_access_total),
f_hits(arc_stats['l2_hits']))
prt_i2('Miss ratio:',
f_perc(arc_stats['l2_misses'], l2_access_total),
f_hits(arc_stats['l2_misses']))
prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))
print()
print('L2ARC writes:')
if arc_stats['l2_writes_done'] != arc_stats['l2_writes_sent']:
prt_i2('Writes sent:', 'FAULTED', f_hits(arc_stats['l2_writes_sent']))
prt_i2('Done ratio:',
f_perc(arc_stats['l2_writes_done'],
arc_stats['l2_writes_sent']),
f_hits(arc_stats['l2_writes_done']))
prt_i2('Error ratio:',
f_perc(arc_stats['l2_writes_error'],
arc_stats['l2_writes_sent']),
f_hits(arc_stats['l2_writes_error']))
else:
prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
print()
print('L2ARC evicts:')
prt_i1('Lock retries:', f_hits(arc_stats['l2_evict_lock_retry']))
prt_i1('Upon reading:', f_hits(arc_stats['l2_evict_reading']))
print()
def section_spl(*_):
"""Print the SPL parameters, if requested with alternative format
and/or descriptions. This does not use kstats.
"""
if sys.platform.startswith('freebsd'):
# No SPL support in FreeBSD
return
spls = get_spl_params()
keylist = sorted(spls.keys())
print('Solaris Porting Layer (SPL):')
if ARGS.desc:
descriptions = get_descriptions('spl')
for key in keylist:
value = spls[key]
if ARGS.desc:
try:
print(INDENT+'#', descriptions[key])
except KeyError:
print(INDENT+'# (No description found)') # paranoid
print(format_raw_line(key, value))
print()
def section_tunables(*_):
"""Print the tunables, if requested with alternative format and/or
descriptions. This does not use kstasts.
"""
tunables = get_tunable_params()
keylist = sorted(tunables.keys())
print('Tunables:')
if ARGS.desc:
descriptions = get_descriptions('zfs')
for key in keylist:
value = tunables[key]
if ARGS.desc:
try:
print(INDENT+'#', descriptions[key])
except KeyError:
print(INDENT+'# (No description found)') # paranoid
print(format_raw_line(key, value))
print()
def section_vdev(kstats_dict):
"""Collect information on VDEV caches"""
# Currently [Nov 2017] the VDEV cache is disabled, because it is actually
# harmful. When this is the case, we just skip the whole entry. See
- # https://github.com/zfsonlinux/zfs/blob/master/module/zfs/vdev_cache.c
+ # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
# for details
tunables = get_vdev_params()
if tunables[VDEV_CACHE_SIZE] == '0':
print('VDEV cache disabled, skipping section\n')
return
vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
vdev_cache_total = int(vdev_stats['hits']) +\
int(vdev_stats['misses']) +\
int(vdev_stats['delegations'])
prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
f_hits(vdev_stats['hits']))
prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
f_hits(vdev_stats['misses']))
prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
f_hits(vdev_stats['delegations']))
print()
def section_zil(kstats_dict):
"""Collect information on the ZFS Intent Log. Some of the information
- taken from https://github.com/zfsonlinux/zfs/blob/master/include/sys/zil.h
+ taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
"""
zil_stats = isolate_section('zil', kstats_dict)
prt_1('ZIL committed transactions:',
f_hits(zil_stats['zil_itx_count']))
prt_i1('Commit requests:', f_hits(zil_stats['zil_commit_count']))
prt_i1('Flushes to stable storage:',
f_hits(zil_stats['zil_commit_writer_count']))
prt_i2('Transactions to SLOG storage pool:',
f_bytes(zil_stats['zil_itx_metaslab_slog_bytes']),
f_hits(zil_stats['zil_itx_metaslab_slog_count']))
prt_i2('Transactions to non-SLOG storage pool:',
f_bytes(zil_stats['zil_itx_metaslab_normal_bytes']),
f_hits(zil_stats['zil_itx_metaslab_normal_count']))
print()
section_calls = {'arc': section_arc,
'archits': section_archits,
'dmu': section_dmu,
'l2arc': section_l2arc,
'spl': section_spl,
'tunables': section_tunables,
'vdev': section_vdev,
'zil': section_zil}
def main():
"""Run program. The options to draw a graph and to print all data raw are
treated separately because they come with their own call.
"""
kstats = get_kstats()
if ARGS.graph:
draw_graph(kstats)
sys.exit(0)
print_header()
if ARGS.raw:
print_raw(kstats)
elif ARGS.section:
try:
section_calls[ARGS.section](kstats)
except KeyError:
print('Error: Section "{0}" unknown'.format(ARGS.section))
sys.exit(1)
elif ARGS.page:
print('WARNING: Pages are deprecated, please use "--section"\n')
pages_to_calls = {1: 'arc',
2: 'archits',
3: 'l2arc',
4: 'dmu',
5: 'vdev',
6: 'tunables'}
try:
call = pages_to_calls[ARGS.page]
except KeyError:
print('Error: Page "{0}" not supported'.format(ARGS.page))
sys.exit(1)
else:
section_calls[call](kstats)
else:
# If no parameters were given, we print all sections. We might want to
# change the sequence by hand
calls = sorted(section_calls.keys())
for section in calls:
section_calls[section](kstats)
sys.exit(0)
if __name__ == '__main__':
main()
Index: head/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in
===================================================================
--- head/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in (revision 366779)
+++ head/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in (revision 366780)
@@ -1,669 +1,684 @@
#!/usr/bin/env @PYTHON_SHEBANG@
#
# Print out statistics for all cached dmu buffers. This information
# is available through the dbufs kstat and may be post-processed as
# needed by the script.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License, Version 1.0 only
# (the "License"). You may not use this file except in compliance
# with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (C) 2013 Lawrence Livermore National Security, LLC.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#
# This script must remain compatible with Python 2.6+ and Python 3.4+.
#
import sys
import getopt
import errno
import re
bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"]
bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize",
"meta", "state", "dbholds", "dbc", "list", "atype", "flags",
"count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2",
"l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype",
"data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"]
bincompat = ["cached", "direct", "indirect", "bonus", "spill"]
dhdr = ["pool", "objset", "object", "dtype", "cached"]
dxhdr = ["pool", "objset", "object", "dtype", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct",
"indirect", "bonus", "spill"]
dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds",
"dbc", "list", "atype", "flags", "count", "asize", "access",
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
"l2_comp", "aholds"]
thdr = ["pool", "objset", "dtype", "cached"]
txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect",
"bonus", "spill"]
tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state",
"dbc", "dbholds", "list", "atype", "flags", "count", "asize",
"access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
"l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize"]
cols = {
# hdr: [size, scale, description]
"pool": [15, -1, "pool name"],
"objset": [6, -1, "dataset identification number"],
"object": [10, -1, "object number"],
"level": [5, -1, "indirection level of buffer"],
"blkid": [8, -1, "block number of buffer"],
"offset": [12, 1024, "offset in object of buffer"],
"dbsize": [7, 1024, "size of buffer"],
"meta": [4, -1, "is this buffer metadata?"],
"state": [5, -1, "state of buffer (read, cached, etc)"],
"dbholds": [7, 1000, "number of holds on buffer"],
"dbc": [3, -1, "in dbuf cache"],
"list": [4, -1, "which ARC list contains this buffer"],
"atype": [7, -1, "ARC header type (data or metadata)"],
"flags": [9, -1, "ARC read flags"],
"count": [5, -1, "ARC data count"],
"asize": [7, 1024, "size of this ARC buffer"],
"access": [10, -1, "time this ARC buffer was last accessed"],
"mru": [5, 1000, "hits while on the ARC's MRU list"],
"gmru": [5, 1000, "hits while on the ARC's MRU ghost list"],
"mfu": [5, 1000, "hits while on the ARC's MFU list"],
"gmfu": [5, 1000, "hits while on the ARC's MFU ghost list"],
"l2": [5, 1000, "hits while on the L2ARC"],
"l2_dattr": [8, -1, "L2ARC disk address/offset"],
"l2_asize": [8, 1024, "L2ARC alloc'd size (depending on compression)"],
"l2_comp": [21, -1, "L2ARC compression algorithm for buffer"],
"aholds": [6, 1000, "number of holds on this ARC buffer"],
"dtype": [27, -1, "dnode type"],
"btype": [27, -1, "bonus buffer type"],
"data_bs": [7, 1024, "data block size"],
"meta_bs": [7, 1024, "metadata block size"],
"bsize": [6, 1024, "bonus buffer size"],
"lvls": [6, -1, "number of indirection levels"],
"dholds": [6, 1000, "number of holds on dnode"],
"blocks": [8, 1000, "number of allocated blocks"],
"dsize": [12, 1024, "size of dnode"],
"cached": [6, 1024, "bytes cached for all blocks"],
"direct": [6, 1024, "bytes cached for direct blocks"],
"indirect": [8, 1024, "bytes cached for indirect blocks"],
"bonus": [5, 1024, "bytes cached for bonus buffer"],
"spill": [5, 1024, "bytes cached for spill block"],
}
hdr = None
xhdr = None
sep = " " # Default separator is 2 spaces
cmd = ("Usage: dbufstat [-bdhnrtvx] [-i file] [-f fields] [-o file] "
"[-s string] [-F filter]\n")
raw = 0
+if sys.platform.startswith("freebsd"):
+ import io
+ # Requires py-sysctl on FreeBSD
+ import sysctl
+
+ def default_ifile():
+ dbufs = sysctl.filter("kstat.zfs.misc.dbufs")[0].value
+ sys.stdin = io.StringIO(dbufs)
+ return "-"
+
+elif sys.platform.startswith("linux"):
+ def default_ifile():
+ return "/proc/spl/kstat/zfs/dbufs"
+
+
def print_incompat_helper(incompat):
cnt = 0
for key in sorted(incompat):
if cnt is 0:
sys.stderr.write("\t")
elif cnt > 8:
sys.stderr.write(",\n\t")
cnt = 0
else:
sys.stderr.write(", ")
sys.stderr.write("%s" % key)
cnt += 1
sys.stderr.write("\n\n")
def detailed_usage():
sys.stderr.write("%s\n" % cmd)
sys.stderr.write("Field definitions incompatible with '-b' option:\n")
print_incompat_helper(bincompat)
sys.stderr.write("Field definitions incompatible with '-d' option:\n")
print_incompat_helper(dincompat)
sys.stderr.write("Field definitions incompatible with '-t' option:\n")
print_incompat_helper(tincompat)
sys.stderr.write("Field definitions are as follows:\n")
for key in sorted(cols.keys()):
sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
sys.stderr.write("\n")
sys.exit(0)
def usage():
sys.stderr.write("%s\n" % cmd)
sys.stderr.write("\t -b : Print table of information for each dbuf\n")
sys.stderr.write("\t -d : Print table of information for each dnode\n")
sys.stderr.write("\t -h : Print this help message\n")
sys.stderr.write("\t -n : Exclude header from output\n")
sys.stderr.write("\t -r : Print raw values\n")
sys.stderr.write("\t -t : Print table of information for each dnode type"
"\n")
sys.stderr.write("\t -v : List all possible field headers and definitions"
"\n")
sys.stderr.write("\t -x : Print extended stats\n")
sys.stderr.write("\t -i : Redirect input from the specified file\n")
sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
sys.stderr.write("\t -o : Redirect output to the specified file\n")
sys.stderr.write("\t -s : Override default field separator with custom "
"character or string\n")
sys.stderr.write("\t -F : Filter output by value or regex\n")
sys.stderr.write("\nExamples:\n")
sys.stderr.write("\tdbufstat -d -o /tmp/d.log\n")
sys.stderr.write("\tdbufstat -t -s \",\" -o /tmp/t.log\n")
sys.stderr.write("\tdbufstat -v\n")
sys.stderr.write("\tdbufstat -d -f pool,object,objset,dsize,cached\n")
sys.stderr.write("\tdbufstat -bx -F dbc=1,objset=54,pool=testpool\n")
sys.stderr.write("\n")
sys.exit(1)
def prettynum(sz, scale, num=0):
global raw
suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
index = 0
save = 0
if raw or scale == -1:
return "%*s" % (sz, num)
# Rounding error, return 0
elif 0 < num < 1:
num = 0
while num > scale and index < 5:
save = num
num = num / scale
index += 1
if index == 0:
return "%*d" % (sz, num)
if (save / scale) < 10:
return "%*.1f%s" % (sz - 1, num, suffix[index])
else:
return "%*d%s" % (sz - 1, num, suffix[index])
def print_values(v):
global hdr
global sep
try:
for col in hdr:
sys.stdout.write("%s%s" % (
prettynum(cols[col][0], cols[col][1], v[col]), sep))
sys.stdout.write("\n")
except IOError as e:
if e.errno == errno.EPIPE:
sys.exit(1)
def print_header():
global hdr
global sep
try:
for col in hdr:
sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
sys.stdout.write("\n")
except IOError as e:
if e.errno == errno.EPIPE:
sys.exit(1)
def get_typestring(t):
ot_strings = [
"DMU_OT_NONE",
# general:
"DMU_OT_OBJECT_DIRECTORY",
"DMU_OT_OBJECT_ARRAY",
"DMU_OT_PACKED_NVLIST",
"DMU_OT_PACKED_NVLIST_SIZE",
"DMU_OT_BPOBJ",
"DMU_OT_BPOBJ_HDR",
# spa:
"DMU_OT_SPACE_MAP_HEADER",
"DMU_OT_SPACE_MAP",
# zil:
"DMU_OT_INTENT_LOG",
# dmu:
"DMU_OT_DNODE",
"DMU_OT_OBJSET",
# dsl:
"DMU_OT_DSL_DIR",
"DMU_OT_DSL_DIR_CHILD_MAP",
"DMU_OT_DSL_DS_SNAP_MAP",
"DMU_OT_DSL_PROPS",
"DMU_OT_DSL_DATASET",
# zpl:
"DMU_OT_ZNODE",
"DMU_OT_OLDACL",
"DMU_OT_PLAIN_FILE_CONTENTS",
"DMU_OT_DIRECTORY_CONTENTS",
"DMU_OT_MASTER_NODE",
"DMU_OT_UNLINKED_SET",
# zvol:
"DMU_OT_ZVOL",
"DMU_OT_ZVOL_PROP",
# other; for testing only!
"DMU_OT_PLAIN_OTHER",
"DMU_OT_UINT64_OTHER",
"DMU_OT_ZAP_OTHER",
# new object types:
"DMU_OT_ERROR_LOG",
"DMU_OT_SPA_HISTORY",
"DMU_OT_SPA_HISTORY_OFFSETS",
"DMU_OT_POOL_PROPS",
"DMU_OT_DSL_PERMS",
"DMU_OT_ACL",
"DMU_OT_SYSACL",
"DMU_OT_FUID",
"DMU_OT_FUID_SIZE",
"DMU_OT_NEXT_CLONES",
"DMU_OT_SCAN_QUEUE",
"DMU_OT_USERGROUP_USED",
"DMU_OT_USERGROUP_QUOTA",
"DMU_OT_USERREFS",
"DMU_OT_DDT_ZAP",
"DMU_OT_DDT_STATS",
"DMU_OT_SA",
"DMU_OT_SA_MASTER_NODE",
"DMU_OT_SA_ATTR_REGISTRATION",
"DMU_OT_SA_ATTR_LAYOUTS",
"DMU_OT_SCAN_XLATE",
"DMU_OT_DEDUP",
"DMU_OT_DEADLIST",
"DMU_OT_DEADLIST_HDR",
"DMU_OT_DSL_CLONES",
"DMU_OT_BPOBJ_SUBOBJ"]
otn_strings = {
0x80: "DMU_OTN_UINT8_DATA",
0xc0: "DMU_OTN_UINT8_METADATA",
0x81: "DMU_OTN_UINT16_DATA",
0xc1: "DMU_OTN_UINT16_METADATA",
0x82: "DMU_OTN_UINT32_DATA",
0xc2: "DMU_OTN_UINT32_METADATA",
0x83: "DMU_OTN_UINT64_DATA",
0xc3: "DMU_OTN_UINT64_METADATA",
0x84: "DMU_OTN_ZAP_DATA",
0xc4: "DMU_OTN_ZAP_METADATA",
0xa0: "DMU_OTN_UINT8_ENC_DATA",
0xe0: "DMU_OTN_UINT8_ENC_METADATA",
0xa1: "DMU_OTN_UINT16_ENC_DATA",
0xe1: "DMU_OTN_UINT16_ENC_METADATA",
0xa2: "DMU_OTN_UINT32_ENC_DATA",
0xe2: "DMU_OTN_UINT32_ENC_METADATA",
0xa3: "DMU_OTN_UINT64_ENC_DATA",
0xe3: "DMU_OTN_UINT64_ENC_METADATA",
0xa4: "DMU_OTN_ZAP_ENC_DATA",
0xe4: "DMU_OTN_ZAP_ENC_METADATA"}
# If "-rr" option is used, don't convert to string representation
if raw > 1:
return "%i" % t
try:
if t < len(ot_strings):
return ot_strings[t]
else:
return otn_strings[t]
except (IndexError, KeyError):
return "(UNKNOWN)"
def get_compstring(c):
comp_strings = ["ZIO_COMPRESS_INHERIT", "ZIO_COMPRESS_ON",
"ZIO_COMPRESS_OFF", "ZIO_COMPRESS_LZJB",
"ZIO_COMPRESS_EMPTY", "ZIO_COMPRESS_GZIP_1",
"ZIO_COMPRESS_GZIP_2", "ZIO_COMPRESS_GZIP_3",
"ZIO_COMPRESS_GZIP_4", "ZIO_COMPRESS_GZIP_5",
"ZIO_COMPRESS_GZIP_6", "ZIO_COMPRESS_GZIP_7",
"ZIO_COMPRESS_GZIP_8", "ZIO_COMPRESS_GZIP_9",
"ZIO_COMPRESS_ZLE", "ZIO_COMPRESS_LZ4",
"ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_FUNCTION"]
# If "-rr" option is used, don't convert to string representation
if raw > 1:
return "%i" % c
try:
return comp_strings[c]
except IndexError:
return "%i" % c
def parse_line(line, labels):
global hdr
new = dict()
val = None
for col in hdr:
# These are "special" fields computed in the update_dict
# function, prevent KeyError exception on labels[col] for these.
if col not in ['bonus', 'cached', 'direct', 'indirect', 'spill']:
val = line[labels[col]]
if col in ['pool', 'flags']:
new[col] = str(val)
elif col in ['dtype', 'btype']:
new[col] = get_typestring(int(val))
elif col in ['l2_comp']:
new[col] = get_compstring(int(val))
else:
new[col] = int(val)
return new
def update_dict(d, k, line, labels):
pool = line[labels['pool']]
objset = line[labels['objset']]
key = line[labels[k]]
dbsize = int(line[labels['dbsize']])
blkid = int(line[labels['blkid']])
level = int(line[labels['level']])
if pool not in d:
d[pool] = dict()
if objset not in d[pool]:
d[pool][objset] = dict()
if key not in d[pool][objset]:
d[pool][objset][key] = parse_line(line, labels)
d[pool][objset][key]['bonus'] = 0
d[pool][objset][key]['cached'] = 0
d[pool][objset][key]['direct'] = 0
d[pool][objset][key]['indirect'] = 0
d[pool][objset][key]['spill'] = 0
d[pool][objset][key]['cached'] += dbsize
if blkid == -1:
d[pool][objset][key]['bonus'] += dbsize
elif blkid == -2:
d[pool][objset][key]['spill'] += dbsize
else:
if level == 0:
d[pool][objset][key]['direct'] += dbsize
else:
d[pool][objset][key]['indirect'] += dbsize
return d
def skip_line(vals, filters):
'''
Determines if a line should be skipped during printing
based on a set of filters
'''
if len(filters) == 0:
return False
for key in vals:
if key in filters:
val = prettynum(cols[key][0], cols[key][1], vals[key]).strip()
# we want a full match here
if re.match("(?:" + filters[key] + r")\Z", val) is None:
return True
return False
def print_dict(d, filters, noheader):
if not noheader:
print_header()
for pool in list(d.keys()):
for objset in list(d[pool].keys()):
for v in list(d[pool][objset].values()):
if not skip_line(v, filters):
print_values(v)
def dnodes_build_dict(filehandle):
labels = dict()
dnodes = dict()
# First 3 lines are header information, skip the first two
for i in range(2):
next(filehandle)
# The third line contains the labels and index locations
for i, v in enumerate(next(filehandle).split()):
labels[v] = i
# The rest of the file is buffer information
for line in filehandle:
update_dict(dnodes, 'object', line.split(), labels)
return dnodes
def types_build_dict(filehandle):
labels = dict()
types = dict()
# First 3 lines are header information, skip the first two
for i in range(2):
next(filehandle)
# The third line contains the labels and index locations
for i, v in enumerate(next(filehandle).split()):
labels[v] = i
# The rest of the file is buffer information
for line in filehandle:
update_dict(types, 'dtype', line.split(), labels)
return types
def buffers_print_all(filehandle, filters, noheader):
labels = dict()
# First 3 lines are header information, skip the first two
for i in range(2):
next(filehandle)
# The third line contains the labels and index locations
for i, v in enumerate(next(filehandle).split()):
labels[v] = i
if not noheader:
print_header()
# The rest of the file is buffer information
for line in filehandle:
vals = parse_line(line.split(), labels)
if not skip_line(vals, filters):
print_values(vals)
def main():
global hdr
global sep
global raw
desired_cols = None
bflag = False
dflag = False
hflag = False
ifile = None
ofile = None
tflag = False
vflag = False
xflag = False
nflag = False
filters = dict()
try:
opts, args = getopt.getopt(
sys.argv[1:],
"bdf:hi:o:rs:tvxF:n",
[
"buffers",
"dnodes",
"columns",
"help",
"infile",
"outfile",
"separator",
"types",
"verbose",
"extended",
"filter"
]
)
except getopt.error:
usage()
opts = None
for opt, arg in opts:
if opt in ('-b', '--buffers'):
bflag = True
if opt in ('-d', '--dnodes'):
dflag = True
if opt in ('-f', '--columns'):
desired_cols = arg
if opt in ('-h', '--help'):
hflag = True
if opt in ('-i', '--infile'):
ifile = arg
if opt in ('-o', '--outfile'):
ofile = arg
if opt in ('-r', '--raw'):
raw += 1
if opt in ('-s', '--separator'):
sep = arg
if opt in ('-t', '--types'):
tflag = True
if opt in ('-v', '--verbose'):
vflag = True
if opt in ('-x', '--extended'):
xflag = True
if opt in ('-n', '--noheader'):
nflag = True
if opt in ('-F', '--filter'):
fils = [x.strip() for x in arg.split(",")]
for fil in fils:
f = [x.strip() for x in fil.split("=")]
if len(f) != 2:
sys.stderr.write("Invalid filter '%s'.\n" % fil)
sys.exit(1)
if f[0] not in cols:
sys.stderr.write("Invalid field '%s' in filter.\n" % f[0])
sys.exit(1)
if f[0] in filters:
sys.stderr.write("Field '%s' specified multiple times in "
"filter.\n" % f[0])
sys.exit(1)
try:
re.compile("(?:" + f[1] + r")\Z")
except re.error:
sys.stderr.write("Invalid regex for field '%s' in "
"filter.\n" % f[0])
sys.exit(1)
filters[f[0]] = f[1]
if hflag or (xflag and desired_cols):
usage()
if vflag:
detailed_usage()
# Ensure at most only one of b, d, or t flags are set
if (bflag and dflag) or (bflag and tflag) or (dflag and tflag):
usage()
if bflag:
hdr = bxhdr if xflag else bhdr
elif tflag:
hdr = txhdr if xflag else thdr
else: # Even if dflag is False, it's the default if none set
dflag = True
hdr = dxhdr if xflag else dhdr
if desired_cols:
hdr = desired_cols.split(",")
invalid = []
incompat = []
for ele in hdr:
if ele not in cols:
invalid.append(ele)
elif ((bflag and bincompat and ele in bincompat) or
(dflag and dincompat and ele in dincompat) or
(tflag and tincompat and ele in tincompat)):
incompat.append(ele)
if len(invalid) > 0:
sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
usage()
if len(incompat) > 0:
sys.stderr.write("Incompatible field specified! -- %s\n" %
incompat)
usage()
if ofile:
try:
tmp = open(ofile, "w")
sys.stdout = tmp
except IOError:
sys.stderr.write("Cannot open %s for writing\n" % ofile)
sys.exit(1)
if not ifile:
- ifile = '/proc/spl/kstat/zfs/dbufs'
+ ifile = default_ifile()
if ifile is not "-":
try:
tmp = open(ifile, "r")
sys.stdin = tmp
except IOError:
sys.stderr.write("Cannot open %s for reading\n" % ifile)
sys.exit(1)
if bflag:
buffers_print_all(sys.stdin, filters, nflag)
if dflag:
print_dict(dnodes_build_dict(sys.stdin), filters, nflag)
if tflag:
print_dict(types_build_dict(sys.stdin), filters, nflag)
if __name__ == '__main__':
main()
Index: head/sys/contrib/openzfs/cmd/zdb/zdb.c
===================================================================
--- head/sys/contrib/openzfs/cmd/zdb/zdb.c (revision 366779)
+++ head/sys/contrib/openzfs/cmd/zdb/zdb.c (revision 366780)
@@ -1,8598 +1,8612 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc.
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
* Copyright (c) 2015, 2017, Intel Corporation.
* Copyright (c) 2020 Datto Inc.
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
* [1] Portions of this software were developed by Allan Jude
* under sponsorship from the FreeBSD Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_sa.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_bookmark.h>
#include <sys/dbuf.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/dmu_send.h>
#include <sys/dmu_traverse.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <sys/blkptr.h>
#include <sys/dsl_crypt.h>
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>
#include <libnvpair.h>
#include <libzutil.h>
#include "zdb.h"
#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
zio_compress_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
zio_checksum_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
(idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
DMU_OT_ZAP_OTHER : \
(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
static char *
zdb_ot_name(dmu_object_type_t type)
{
if (type < DMU_OT_NUMTYPES)
return (dmu_ot[type].ot_name);
else if ((type & DMU_OT_NEWTYPE) &&
((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
else
return ("UNKNOWN");
}
extern int reference_tracking_enable;
extern int zfs_recover;
extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit;
extern int zfs_vdev_async_read_max_active;
extern boolean_t spa_load_verify_dryrun;
extern int zfs_reconstruct_indirect_combinations_max;
extern int zfs_btree_verify_intensity;
static const char cmdname[] = "zdb";
uint8_t dump_opt[256];
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
uint64_t *zopt_metaslab = NULL;
static unsigned zopt_metaslab_args = 0;
typedef struct zopt_object_range {
uint64_t zor_obj_start;
uint64_t zor_obj_end;
uint64_t zor_flags;
} zopt_object_range_t;
zopt_object_range_t *zopt_object_ranges = NULL;
static unsigned zopt_object_args = 0;
static int flagbits[256];
#define ZOR_FLAG_PLAIN_FILE 0x0001
#define ZOR_FLAG_DIRECTORY 0x0002
#define ZOR_FLAG_SPACE_MAP 0x0004
#define ZOR_FLAG_ZAP 0x0008
#define ZOR_FLAG_ALL_TYPES -1
#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
ZOR_FLAG_DIRECTORY | \
ZOR_FLAG_SPACE_MAP | \
ZOR_FLAG_ZAP)
#define ZDB_FLAG_CHECKSUM 0x0001
#define ZDB_FLAG_DECOMPRESS 0x0002
#define ZDB_FLAG_BSWAP 0x0004
#define ZDB_FLAG_GBH 0x0008
#define ZDB_FLAG_INDIRECT 0x0010
#define ZDB_FLAG_RAW 0x0020
#define ZDB_FLAG_PRINT_BLKPTR 0x0040
#define ZDB_FLAG_VERBOSE 0x0080
uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
static int leaked_objects = 0;
static range_tree_t *mos_refd_objs;
static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
boolean_t);
static void mos_obj_refd(uint64_t);
static void mos_obj_refd_multiple(uint64_t);
static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
dmu_tx_t *tx);
typedef struct sublivelist_verify {
/* all ALLOC'd blkptr_t in one sub-livelist */
zfs_btree_t sv_all_allocs;
/* all FREE'd blkptr_t in one sub-livelist */
zfs_btree_t sv_all_frees;
/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
zfs_btree_t sv_pair;
/* ALLOC's without a matching FREE, accumulates across sub-livelists */
zfs_btree_t sv_leftover;
} sublivelist_verify_t;
static int
livelist_compare(const void *larg, const void *rarg)
{
const blkptr_t *l = larg;
const blkptr_t *r = rarg;
/* Sort them according to dva[0] */
uint64_t l_dva0_vdev, r_dva0_vdev;
l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
if (l_dva0_vdev < r_dva0_vdev)
return (-1);
else if (l_dva0_vdev > r_dva0_vdev)
return (+1);
/* if vdevs are equal, sort by offsets. */
uint64_t l_dva0_offset;
uint64_t r_dva0_offset;
l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
if (l_dva0_offset < r_dva0_offset) {
return (-1);
} else if (l_dva0_offset > r_dva0_offset) {
return (+1);
}
/*
* Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
* it's possible the offsets are equal. In that case, sort by txg
*/
if (l->blk_birth < r->blk_birth) {
return (-1);
} else if (l->blk_birth > r->blk_birth) {
return (+1);
}
return (0);
}
typedef struct sublivelist_verify_block {
dva_t svb_dva;
/*
* We need this to check if the block marked as allocated
* in the livelist was freed (and potentially reallocated)
* in the metaslab spacemaps at a later TXG.
*/
uint64_t svb_allocated_txg;
} sublivelist_verify_block_t;
static void zdb_print_blkptr(const blkptr_t *bp, int flags);
static int
sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
dmu_tx_t *tx)
{
ASSERT3P(tx, ==, NULL);
struct sublivelist_verify *sv = arg;
char blkbuf[BP_SPRINTF_LEN];
zfs_btree_index_t where;
if (free) {
zfs_btree_add(&sv->sv_pair, bp);
/* Check if the FREE is a duplicate */
if (zfs_btree_find(&sv->sv_all_frees, bp, &where) != NULL) {
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
free);
(void) printf("\tERROR: Duplicate FREE: %s\n", blkbuf);
} else {
zfs_btree_add_idx(&sv->sv_all_frees, bp, &where);
}
} else {
/* Check if the ALLOC has been freed */
if (zfs_btree_find(&sv->sv_pair, bp, &where) != NULL) {
zfs_btree_remove_idx(&sv->sv_pair, &where);
} else {
for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
if (DVA_IS_EMPTY(&bp->blk_dva[i]))
break;
sublivelist_verify_block_t svb = {
.svb_dva = bp->blk_dva[i],
.svb_allocated_txg = bp->blk_birth
};
if (zfs_btree_find(&sv->sv_leftover, &svb,
&where) == NULL) {
zfs_btree_add_idx(&sv->sv_leftover,
&svb, &where);
}
}
}
/* Check if the ALLOC is a duplicate */
if (zfs_btree_find(&sv->sv_all_allocs, bp, &where) != NULL) {
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
free);
(void) printf("\tERROR: Duplicate ALLOC: %s\n", blkbuf);
} else {
zfs_btree_add_idx(&sv->sv_all_allocs, bp, &where);
}
}
return (0);
}
static int
sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
{
int err;
char blkbuf[BP_SPRINTF_LEN];
struct sublivelist_verify *sv = args;
zfs_btree_create(&sv->sv_all_allocs, livelist_compare,
sizeof (blkptr_t));
zfs_btree_create(&sv->sv_all_frees, livelist_compare,
sizeof (blkptr_t));
zfs_btree_create(&sv->sv_pair, livelist_compare,
sizeof (blkptr_t));
err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
sv, NULL);
zfs_btree_clear(&sv->sv_all_allocs);
zfs_btree_destroy(&sv->sv_all_allocs);
zfs_btree_clear(&sv->sv_all_frees);
zfs_btree_destroy(&sv->sv_all_frees);
blkptr_t *e;
zfs_btree_index_t *cookie = NULL;
while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), e, B_TRUE);
(void) printf("\tERROR: Unmatched FREE: %s\n", blkbuf);
}
zfs_btree_destroy(&sv->sv_pair);
return (err);
}
static int
livelist_block_compare(const void *larg, const void *rarg)
{
const sublivelist_verify_block_t *l = larg;
const sublivelist_verify_block_t *r = rarg;
if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
return (-1);
else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
return (+1);
if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
return (-1);
else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
return (+1);
if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
return (-1);
else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
return (+1);
return (0);
}
/*
* Check for errors in a livelist while tracking all unfreed ALLOCs in the
* sublivelist_verify_t: sv->sv_leftover
*/
static void
livelist_verify(dsl_deadlist_t *dl, void *arg)
{
sublivelist_verify_t *sv = arg;
dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
}
/*
* Check for errors in the livelist entry and discard the intermediary
* data structures
*/
/* ARGSUSED */
static int
sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
{
sublivelist_verify_t sv;
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
sizeof (sublivelist_verify_block_t));
int err = sublivelist_verify_func(&sv, dle);
zfs_btree_clear(&sv.sv_leftover);
zfs_btree_destroy(&sv.sv_leftover);
return (err);
}
typedef struct metaslab_verify {
/*
* Tree containing all the leftover ALLOCs from the livelists
* that are part of this metaslab.
*/
zfs_btree_t mv_livelist_allocs;
/*
* Metaslab information.
*/
uint64_t mv_vdid;
uint64_t mv_msid;
uint64_t mv_start;
uint64_t mv_end;
/*
* What's currently allocated for this metaslab.
*/
range_tree_t *mv_allocated;
} metaslab_verify_t;
typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
void *arg);
typedef struct unflushed_iter_cb_arg {
spa_t *uic_spa;
uint64_t uic_txg;
void *uic_arg;
zdb_log_sm_cb_t uic_cb;
} unflushed_iter_cb_arg_t;
static int
iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
{
unflushed_iter_cb_arg_t *uic = arg;
return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
}
static void
iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
{
if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
return;
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
space_map_t *sm = NULL;
VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
unflushed_iter_cb_arg_t uic = {
.uic_spa = spa,
.uic_txg = sls->sls_txg,
.uic_arg = arg,
.uic_cb = cb
};
VERIFY0(space_map_iterate(sm, space_map_length(sm),
iterate_through_spacemap_logs_cb, &uic));
space_map_close(sm);
}
spa_config_exit(spa, SCL_CONFIG, FTAG);
}
static void
verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
uint64_t offset, uint64_t size)
{
sublivelist_verify_block_t svb;
DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
DVA_SET_OFFSET(&svb.svb_dva, offset);
DVA_SET_ASIZE(&svb.svb_dva, size);
zfs_btree_index_t where;
uint64_t end_offset = offset + size;
/*
* Look for an exact match for spacemap entry in the livelist entries.
* Then, look for other livelist entries that fall within the range
* of the spacemap entry as it may have been condensed
*/
sublivelist_verify_block_t *found =
zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
if (found == NULL) {
found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
}
for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
DVA_GET_OFFSET(&found->svb_dva) < end_offset;
found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
if (found->svb_allocated_txg <= txg) {
(void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
"from TXG %llx FREED at TXG %llx\n",
(u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
(u_longlong_t)found->svb_allocated_txg,
(u_longlong_t)txg);
}
}
}
static int
metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
{
metaslab_verify_t *mv = arg;
uint64_t offset = sme->sme_offset;
uint64_t size = sme->sme_run;
uint64_t txg = sme->sme_txg;
if (sme->sme_type == SM_ALLOC) {
if (range_tree_contains(mv->mv_allocated,
offset, size)) {
(void) printf("ERROR: DOUBLE ALLOC: "
"%llu [%llx:%llx] "
"%llu:%llu LOG_SM\n",
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
} else {
range_tree_add(mv->mv_allocated,
offset, size);
}
} else {
if (!range_tree_contains(mv->mv_allocated,
offset, size)) {
(void) printf("ERROR: DOUBLE FREE: "
"%llu [%llx:%llx] "
"%llu:%llu LOG_SM\n",
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
} else {
range_tree_remove(mv->mv_allocated,
offset, size);
}
}
if (sme->sme_type != SM_ALLOC) {
/*
* If something is freed in the spacemap, verify that
* it is not listed as allocated in the livelist.
*/
verify_livelist_allocs(mv, txg, offset, size);
}
return (0);
}
static int
spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
uint64_t txg, void *arg)
{
metaslab_verify_t *mv = arg;
uint64_t offset = sme->sme_offset;
uint64_t vdev_id = sme->sme_vdev;
vdev_t *vd = vdev_lookup_top(spa, vdev_id);
/* skip indirect vdevs */
if (!vdev_is_concrete(vd))
return (0);
if (vdev_id != mv->mv_vdid)
return (0);
metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
if (ms->ms_id != mv->mv_msid)
return (0);
if (txg < metaslab_unflushed_txg(ms))
return (0);
ASSERT3U(txg, ==, sme->sme_txg);
return (metaslab_spacemap_validation_cb(sme, mv));
}
static void
spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
{
iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
}
static void
spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv)
{
if (sm == NULL)
return;
VERIFY0(space_map_iterate(sm, space_map_length(sm),
metaslab_spacemap_validation_cb, mv));
}
static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
/*
* Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
* they are part of that metaslab (mv_msid).
*/
static void
mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
{
zfs_btree_index_t where;
sublivelist_verify_block_t *svb;
ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
for (svb = zfs_btree_first(&sv->sv_leftover, &where);
svb != NULL;
svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
continue;
if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
(DVA_GET_OFFSET(&svb->svb_dva) +
DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
(void) printf("ERROR: Found block that crosses "
"metaslab boundary: <%llu:%llx:%llx>\n",
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
continue;
}
if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
continue;
if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
continue;
if ((DVA_GET_OFFSET(&svb->svb_dva) +
DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
(void) printf("ERROR: Found block that crosses "
"metaslab boundary: <%llu:%llx:%llx>\n",
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
continue;
}
zfs_btree_add(&mv->mv_livelist_allocs, svb);
}
for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
svb != NULL;
svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
zfs_btree_remove(&sv->sv_leftover, svb);
}
}
/*
* [Livelist Check]
* Iterate through all the sublivelists and:
* - report leftover frees
* - report double ALLOCs/FREEs
* - record leftover ALLOCs together with their TXG [see Cross Check]
*
* [Spacemap Check]
* for each metaslab:
* - iterate over spacemap and then the metaslab's entries in the
* spacemap log, then report any double FREEs and ALLOCs (do not
* blow up).
*
* [Cross Check]
* After finishing the Livelist Check phase and while being in the
* Spacemap Check phase, we find all the recorded leftover ALLOCs
* of the livelist check that are part of the metaslab that we are
* currently looking at in the Spacemap Check. We report any entries
* that are marked as ALLOCs in the livelists but have been actually
* freed (and potentially allocated again) after their TXG stamp in
* the spacemaps. Also report any ALLOCs from the livelists that
* belong to indirect vdevs (e.g. their vdev completed removal).
*
* Note that this will miss Log Spacemap entries that cancelled each other
* out before being flushed to the metaslab, so we are not guaranteed
* to match all erroneous ALLOCs.
*/
static void
livelist_metaslab_validate(spa_t *spa)
{
(void) printf("Verifying deleted livelist entries\n");
sublivelist_verify_t sv;
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
sizeof (sublivelist_verify_block_t));
iterate_deleted_livelists(spa, livelist_verify, &sv);
(void) printf("Verifying metaslab entries\n");
vdev_t *rvd = spa->spa_root_vdev;
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
if (!vdev_is_concrete(vd))
continue;
for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
metaslab_t *m = vd->vdev_ms[mid];
(void) fprintf(stderr,
"\rverifying concrete vdev %llu, "
"metaslab %llu of %llu ...",
(longlong_t)vd->vdev_id,
(longlong_t)mid,
(longlong_t)vd->vdev_ms_count);
uint64_t shift, start;
range_seg_type_t type =
metaslab_calculate_range_tree_type(vd, m,
&start, &shift);
metaslab_verify_t mv;
mv.mv_allocated = range_tree_create(NULL,
type, NULL, start, shift);
mv.mv_vdid = vd->vdev_id;
mv.mv_msid = m->ms_id;
mv.mv_start = m->ms_start;
mv.mv_end = m->ms_start + m->ms_size;
zfs_btree_create(&mv.mv_livelist_allocs,
livelist_block_compare,
sizeof (sublivelist_verify_block_t));
mv_populate_livelist_allocs(&mv, &sv);
spacemap_check_ms_sm(m->ms_sm, &mv);
spacemap_check_sm_log(spa, &mv);
range_tree_vacate(mv.mv_allocated, NULL, NULL);
range_tree_destroy(mv.mv_allocated);
zfs_btree_clear(&mv.mv_livelist_allocs);
zfs_btree_destroy(&mv.mv_livelist_allocs);
}
}
(void) fprintf(stderr, "\n");
/*
* If there are any segments in the leftover tree after we walked
* through all the metaslabs in the concrete vdevs then this means
* that we have segments in the livelists that belong to indirect
* vdevs and are marked as allocated.
*/
if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
zfs_btree_destroy(&sv.sv_leftover);
return;
}
(void) printf("ERROR: Found livelist blocks marked as allocated "
"for indirect vdevs:\n");
zfs_btree_index_t *where = NULL;
sublivelist_verify_block_t *svb;
while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
NULL) {
int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
ASSERT3U(vdev_id, <, rvd->vdev_children);
vdev_t *vd = rvd->vdev_child[vdev_id];
ASSERT(!vdev_is_concrete(vd));
(void) printf("<%d:%llx:%llx> TXG %llx\n",
vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
(u_longlong_t)svb->svb_allocated_txg);
}
(void) printf("\n");
zfs_btree_destroy(&sv.sv_leftover);
}
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
const char *
_umem_debug_init(void)
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
const char *
_umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
static void
usage(void)
{
(void) fprintf(stderr,
"Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] "
"[-I <inflight I/Os>]\n"
"\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
"\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
"\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
"\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
"\t%s [-v] <bookmark>\n"
"\t%s -C [-A] [-U <cache>]\n"
"\t%s -l [-Aqu] <device>\n"
"\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
"[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
"\t%s -O <dataset> <path>\n"
"\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
"\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
"\t%s -E [-A] word0:word1:...:word15\n"
"\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
"<poolname>\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
cmdname, cmdname, cmdname);
(void) fprintf(stderr, " Dataset name must include at least one "
"separator character '/' or '@'\n");
(void) fprintf(stderr, " If dataset name is specified, only that "
"dataset is dumped\n");
(void) fprintf(stderr, " If object numbers or object number "
"ranges are specified, only those\n"
" objects or ranges are dumped.\n\n");
(void) fprintf(stderr,
" Object ranges take the form <start>:<end>[:<flags>]\n"
" start Starting object number\n"
" end Ending object number, or -1 for no upper bound\n"
" flags Optional flags to select object types:\n"
" A All objects (this is the default)\n"
" d ZFS directories\n"
" f ZFS files \n"
" m SPA space maps\n"
" z ZAPs\n"
" - Negate effect of next flag\n\n");
(void) fprintf(stderr, " Options to control amount of output:\n");
(void) fprintf(stderr, " -b block statistics\n");
(void) fprintf(stderr, " -c checksum all metadata (twice for "
"all data) blocks\n");
(void) fprintf(stderr, " -C config (or cachefile if alone)\n");
(void) fprintf(stderr, " -d dataset(s)\n");
(void) fprintf(stderr, " -D dedup statistics\n");
(void) fprintf(stderr, " -E decode and display block from an "
"embedded block pointer\n");
(void) fprintf(stderr, " -h pool history\n");
(void) fprintf(stderr, " -i intent logs\n");
(void) fprintf(stderr, " -l read label contents\n");
(void) fprintf(stderr, " -k examine the checkpointed state "
"of the pool\n");
(void) fprintf(stderr, " -L disable leak tracking (do not "
"load spacemaps)\n");
(void) fprintf(stderr, " -m metaslabs\n");
(void) fprintf(stderr, " -M metaslab groups\n");
(void) fprintf(stderr, " -O perform object lookups by path\n");
(void) fprintf(stderr, " -R read and display block from a "
"device\n");
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
(void) fprintf(stderr, " -S simulate dedup to measure effect\n");
(void) fprintf(stderr, " -v verbose (applies to all "
"others)\n");
(void) fprintf(stderr, " -y perform livelist and metaslab "
"validation on any livelists being deleted\n\n");
(void) fprintf(stderr, " Below options are intended for use "
"with other options:\n");
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
"panic recovery (-AA) or both (-AAA)\n");
(void) fprintf(stderr, " -e pool is exported/destroyed/"
"has altroot/not in a cachefile\n");
(void) fprintf(stderr, " -F attempt automatic rewind within "
"safe range of transaction groups\n");
(void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before "
"exiting\n");
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
"specify the maximum number of\n "
"checksumming I/Os [default is 200]\n");
(void) fprintf(stderr, " -o <variable>=<value> set global "
"variable to an unsigned 32-bit integer\n");
(void) fprintf(stderr, " -p <path> -- use one or more with "
"-e to specify path to vdev dir\n");
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -q don't print label contents\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -u uberblock\n");
(void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
"cachefile\n");
(void) fprintf(stderr, " -V do verbatim import\n");
(void) fprintf(stderr, " -x <dumpdir> -- "
"dump all read blocks into specified directory\n");
(void) fprintf(stderr, " -X attempt extreme rewind (does not "
"work with dataset)\n");
(void) fprintf(stderr, " -Y attempt all reconstruction "
"combinations for split blocks\n");
(void) fprintf(stderr, " -Z show ZSTD headers \n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
exit(1);
}
static void
dump_debug_buffer(void)
{
if (dump_opt['G']) {
(void) printf("\n");
(void) fflush(stdout);
zfs_dbgmsg_print("zdb");
}
}
/*
* Called for usage errors that are discovered after a call to spa_open(),
* dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
*/
static void
fatal(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
(void) fprintf(stderr, "%s: ", cmdname);
(void) vfprintf(stderr, fmt, ap);
va_end(ap);
(void) fprintf(stderr, "\n");
dump_debug_buffer();
exit(1);
}
/* ARGSUSED */
static void
dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
{
nvlist_t *nv;
size_t nvsize = *(uint64_t *)data;
char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
umem_free(packed, nvsize);
dump_nvlist(nv, 8);
nvlist_free(nv);
}
/* ARGSUSED */
static void
dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
{
spa_history_phys_t *shp = data;
if (shp == NULL)
return;
(void) printf("\t\tpool_create_len = %llu\n",
(u_longlong_t)shp->sh_pool_create_len);
(void) printf("\t\tphys_max_off = %llu\n",
(u_longlong_t)shp->sh_phys_max_off);
(void) printf("\t\tbof = %llu\n",
(u_longlong_t)shp->sh_bof);
(void) printf("\t\teof = %llu\n",
(u_longlong_t)shp->sh_eof);
(void) printf("\t\trecords_lost = %llu\n",
(u_longlong_t)shp->sh_records_lost);
}
static void
zdb_nicenum(uint64_t num, char *buf, size_t buflen)
{
if (dump_opt['P'])
(void) snprintf(buf, buflen, "%llu", (longlong_t)num);
else
nicenum(num, buf, sizeof (buf));
}
static const char histo_stars[] = "****************************************";
static const uint64_t histo_width = sizeof (histo_stars) - 1;
static void
dump_histogram(const uint64_t *histo, int size, int offset)
{
int i;
int minidx = size - 1;
int maxidx = 0;
uint64_t max = 0;
for (i = 0; i < size; i++) {
if (histo[i] > max)
max = histo[i];
if (histo[i] > 0 && i > maxidx)
maxidx = i;
if (histo[i] > 0 && i < minidx)
minidx = i;
}
if (max < histo_width)
max = histo_width;
for (i = minidx; i <= maxidx; i++) {
(void) printf("\t\t\t%3u: %6llu %s\n",
i + offset, (u_longlong_t)histo[i],
&histo_stars[(max - histo[i]) * histo_width / max]);
}
}
static void
dump_zap_stats(objset_t *os, uint64_t object)
{
int error;
zap_stats_t zs;
error = zap_get_stats(os, object, &zs);
if (error)
return;
if (zs.zs_ptrtbl_len == 0) {
ASSERT(zs.zs_num_blocks == 1);
(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
(u_longlong_t)zs.zs_blocksize,
(u_longlong_t)zs.zs_num_entries);
return;
}
(void) printf("\tFat ZAP stats:\n");
(void) printf("\t\tPointer table:\n");
(void) printf("\t\t\t%llu elements\n",
(u_longlong_t)zs.zs_ptrtbl_len);
(void) printf("\t\t\tzt_blk: %llu\n",
(u_longlong_t)zs.zs_ptrtbl_zt_blk);
(void) printf("\t\t\tzt_numblks: %llu\n",
(u_longlong_t)zs.zs_ptrtbl_zt_numblks);
(void) printf("\t\t\tzt_shift: %llu\n",
(u_longlong_t)zs.zs_ptrtbl_zt_shift);
(void) printf("\t\t\tzt_blks_copied: %llu\n",
(u_longlong_t)zs.zs_ptrtbl_blks_copied);
(void) printf("\t\t\tzt_nextblk: %llu\n",
(u_longlong_t)zs.zs_ptrtbl_nextblk);
(void) printf("\t\tZAP entries: %llu\n",
(u_longlong_t)zs.zs_num_entries);
(void) printf("\t\tLeaf blocks: %llu\n",
(u_longlong_t)zs.zs_num_leafs);
(void) printf("\t\tTotal blocks: %llu\n",
(u_longlong_t)zs.zs_num_blocks);
(void) printf("\t\tzap_block_type: 0x%llx\n",
(u_longlong_t)zs.zs_block_type);
(void) printf("\t\tzap_magic: 0x%llx\n",
(u_longlong_t)zs.zs_magic);
(void) printf("\t\tzap_salt: 0x%llx\n",
(u_longlong_t)zs.zs_salt);
(void) printf("\t\tLeafs with 2^n pointers:\n");
dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
(void) printf("\t\tBlocks with n*5 entries:\n");
dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
(void) printf("\t\tBlocks n/10 full:\n");
dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
(void) printf("\t\tEntries with n chunks:\n");
dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
(void) printf("\t\tBuckets with n entries:\n");
dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
}
/*ARGSUSED*/
static void
dump_none(objset_t *os, uint64_t object, void *data, size_t size)
{
}
/*ARGSUSED*/
static void
dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
{
(void) printf("\tUNKNOWN OBJECT TYPE\n");
}
/*ARGSUSED*/
static void
dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
{
}
/*ARGSUSED*/
static void
dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
{
uint64_t *arr;
uint64_t oursize;
if (dump_opt['d'] < 6)
return;
if (data == NULL) {
dmu_object_info_t doi;
VERIFY0(dmu_object_info(os, object, &doi));
size = doi.doi_max_offset;
/*
* We cap the size at 1 mebibyte here to prevent
* allocation failures and nigh-infinite printing if the
* object is extremely large.
*/
oursize = MIN(size, 1 << 20);
arr = kmem_alloc(oursize, KM_SLEEP);
int err = dmu_read(os, object, 0, oursize, arr, 0);
if (err != 0) {
(void) printf("got error %u from dmu_read\n", err);
kmem_free(arr, oursize);
return;
}
} else {
/*
* Even though the allocation is already done in this code path,
* we still cap the size to prevent excessive printing.
*/
oursize = MIN(size, 1 << 20);
arr = data;
}
if (size == 0) {
(void) printf("\t\t[]\n");
return;
}
(void) printf("\t\t[%0llx", (u_longlong_t)arr[0]);
for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) {
if (i % 4 != 0)
(void) printf(", %0llx", (u_longlong_t)arr[i]);
else
(void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]);
}
if (oursize != size)
(void) printf(", ... ");
(void) printf("]\n");
if (data == NULL)
kmem_free(arr, oursize);
}
/*ARGSUSED*/
static void
dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
zap_attribute_t attr;
void *prop;
unsigned i;
dump_zap_stats(os, object);
(void) printf("\n");
for (zap_cursor_init(&zc, os, object);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
(void) printf("\t\t%s = ", attr.za_name);
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
prop = umem_zalloc(attr.za_num_integers *
attr.za_integer_length, UMEM_NOFAIL);
(void) zap_lookup(os, object, attr.za_name,
attr.za_integer_length, attr.za_num_integers, prop);
if (attr.za_integer_length == 1) {
- (void) printf("%s", (char *)prop);
+ if (strcmp(attr.za_name,
+ DSL_CRYPTO_KEY_MASTER_KEY) == 0 ||
+ strcmp(attr.za_name,
+ DSL_CRYPTO_KEY_HMAC_KEY) == 0 ||
+ strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 ||
+ strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 ||
+ strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) {
+ uint8_t *u8 = prop;
+
+ for (i = 0; i < attr.za_num_integers; i++) {
+ (void) printf("%02x", u8[i]);
+ }
+ } else {
+ (void) printf("%s", (char *)prop);
+ }
} else {
for (i = 0; i < attr.za_num_integers; i++) {
switch (attr.za_integer_length) {
case 2:
(void) printf("%u ",
((uint16_t *)prop)[i]);
break;
case 4:
(void) printf("%u ",
((uint32_t *)prop)[i]);
break;
case 8:
(void) printf("%lld ",
(u_longlong_t)((int64_t *)prop)[i]);
break;
}
}
}
(void) printf("\n");
umem_free(prop, attr.za_num_integers * attr.za_integer_length);
}
zap_cursor_fini(&zc);
}
static void
dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
{
bpobj_phys_t *bpop = data;
uint64_t i;
char bytes[32], comp[32], uncomp[32];
/* make sure the output won't get truncated */
CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
if (bpop == NULL)
return;
zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
(void) printf("\t\tnum_blkptrs = %llu\n",
(u_longlong_t)bpop->bpo_num_blkptrs);
(void) printf("\t\tbytes = %s\n", bytes);
if (size >= BPOBJ_SIZE_V1) {
(void) printf("\t\tcomp = %s\n", comp);
(void) printf("\t\tuncomp = %s\n", uncomp);
}
if (size >= BPOBJ_SIZE_V2) {
(void) printf("\t\tsubobjs = %llu\n",
(u_longlong_t)bpop->bpo_subobjs);
(void) printf("\t\tnum_subobjs = %llu\n",
(u_longlong_t)bpop->bpo_num_subobjs);
}
if (size >= sizeof (*bpop)) {
(void) printf("\t\tnum_freed = %llu\n",
(u_longlong_t)bpop->bpo_num_freed);
}
if (dump_opt['d'] < 5)
return;
for (i = 0; i < bpop->bpo_num_blkptrs; i++) {
char blkbuf[BP_SPRINTF_LEN];
blkptr_t bp;
int err = dmu_read(os, object,
i * sizeof (bp), sizeof (bp), &bp, 0);
if (err != 0) {
(void) printf("got error %u from dmu_read\n", err);
break;
}
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp,
BP_GET_FREE(&bp));
(void) printf("\t%s\n", blkbuf);
}
}
/* ARGSUSED */
static void
dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
{
dmu_object_info_t doi;
int64_t i;
VERIFY0(dmu_object_info(os, object, &doi));
uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
if (err != 0) {
(void) printf("got error %u from dmu_read\n", err);
kmem_free(subobjs, doi.doi_max_offset);
return;
}
int64_t last_nonzero = -1;
for (i = 0; i < doi.doi_max_offset / 8; i++) {
if (subobjs[i] != 0)
last_nonzero = i;
}
for (i = 0; i <= last_nonzero; i++) {
(void) printf("\t%llu\n", (u_longlong_t)subobjs[i]);
}
kmem_free(subobjs, doi.doi_max_offset);
}
/*ARGSUSED*/
static void
dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
{
dump_zap_stats(os, object);
/* contents are printed elsewhere, properly decoded */
}
/*ARGSUSED*/
static void
dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
zap_attribute_t attr;
dump_zap_stats(os, object);
(void) printf("\n");
for (zap_cursor_init(&zc, os, object);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
(void) printf("\t\t%s = ", attr.za_name);
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
(void) printf(" %llx : [%d:%d:%d]\n",
(u_longlong_t)attr.za_first_integer,
(int)ATTR_LENGTH(attr.za_first_integer),
(int)ATTR_BSWAP(attr.za_first_integer),
(int)ATTR_NUM(attr.za_first_integer));
}
zap_cursor_fini(&zc);
}
/*ARGSUSED*/
static void
dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
zap_attribute_t attr;
uint16_t *layout_attrs;
unsigned i;
dump_zap_stats(os, object);
(void) printf("\n");
for (zap_cursor_init(&zc, os, object);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
(void) printf("\t\t%s = [", attr.za_name);
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
VERIFY(attr.za_integer_length == 2);
layout_attrs = umem_zalloc(attr.za_num_integers *
attr.za_integer_length, UMEM_NOFAIL);
VERIFY(zap_lookup(os, object, attr.za_name,
attr.za_integer_length,
attr.za_num_integers, layout_attrs) == 0);
for (i = 0; i != attr.za_num_integers; i++)
(void) printf(" %d ", (int)layout_attrs[i]);
(void) printf("]\n");
umem_free(layout_attrs,
attr.za_num_integers * attr.za_integer_length);
}
zap_cursor_fini(&zc);
}
/*ARGSUSED*/
static void
dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
zap_attribute_t attr;
const char *typenames[] = {
/* 0 */ "not specified",
/* 1 */ "FIFO",
/* 2 */ "Character Device",
/* 3 */ "3 (invalid)",
/* 4 */ "Directory",
/* 5 */ "5 (invalid)",
/* 6 */ "Block Device",
/* 7 */ "7 (invalid)",
/* 8 */ "Regular File",
/* 9 */ "9 (invalid)",
/* 10 */ "Symbolic Link",
/* 11 */ "11 (invalid)",
/* 12 */ "Socket",
/* 13 */ "Door",
/* 14 */ "Event Port",
/* 15 */ "15 (invalid)",
};
dump_zap_stats(os, object);
(void) printf("\n");
for (zap_cursor_init(&zc, os, object);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
(void) printf("\t\t%s = %lld (type: %s)\n",
attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
}
zap_cursor_fini(&zc);
}
static int
get_dtl_refcount(vdev_t *vd)
{
int refcount = 0;
if (vd->vdev_ops->vdev_op_leaf) {
space_map_t *sm = vd->vdev_dtl_sm;
if (sm != NULL &&
sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
return (1);
return (0);
}
for (unsigned c = 0; c < vd->vdev_children; c++)
refcount += get_dtl_refcount(vd->vdev_child[c]);
return (refcount);
}
static int
get_metaslab_refcount(vdev_t *vd)
{
int refcount = 0;
if (vd->vdev_top == vd) {
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
space_map_t *sm = vd->vdev_ms[m]->ms_sm;
if (sm != NULL &&
sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
refcount++;
}
}
for (unsigned c = 0; c < vd->vdev_children; c++)
refcount += get_metaslab_refcount(vd->vdev_child[c]);
return (refcount);
}
static int
get_obsolete_refcount(vdev_t *vd)
{
uint64_t obsolete_sm_object;
int refcount = 0;
VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
if (vd->vdev_top == vd && obsolete_sm_object != 0) {
dmu_object_info_t doi;
VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
obsolete_sm_object, &doi));
if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
refcount++;
}
} else {
ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
ASSERT3U(obsolete_sm_object, ==, 0);
}
for (unsigned c = 0; c < vd->vdev_children; c++) {
refcount += get_obsolete_refcount(vd->vdev_child[c]);
}
return (refcount);
}
static int
get_prev_obsolete_spacemap_refcount(spa_t *spa)
{
uint64_t prev_obj =
spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
if (prev_obj != 0) {
dmu_object_info_t doi;
VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
return (1);
}
}
return (0);
}
static int
get_checkpoint_refcount(vdev_t *vd)
{
int refcount = 0;
if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
zap_contains(spa_meta_objset(vd->vdev_spa),
vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
refcount++;
for (uint64_t c = 0; c < vd->vdev_children; c++)
refcount += get_checkpoint_refcount(vd->vdev_child[c]);
return (refcount);
}
static int
get_log_spacemap_refcount(spa_t *spa)
{
return (avl_numnodes(&spa->spa_sm_logs_by_txg));
}
static int
verify_spacemap_refcounts(spa_t *spa)
{
uint64_t expected_refcount = 0;
uint64_t actual_refcount;
(void) feature_get_refcount(spa,
&spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
&expected_refcount);
actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
actual_refcount += get_log_spacemap_refcount(spa);
if (expected_refcount != actual_refcount) {
(void) printf("space map refcount mismatch: expected %lld != "
"actual %lld\n",
(longlong_t)expected_refcount,
(longlong_t)actual_refcount);
return (2);
}
return (0);
}
static void
dump_spacemap(objset_t *os, space_map_t *sm)
{
const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
"INVALID", "INVALID", "INVALID", "INVALID" };
if (sm == NULL)
return;
(void) printf("space map object %llu:\n",
(longlong_t)sm->sm_object);
(void) printf(" smp_length = 0x%llx\n",
(longlong_t)sm->sm_phys->smp_length);
(void) printf(" smp_alloc = 0x%llx\n",
(longlong_t)sm->sm_phys->smp_alloc);
if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
return;
/*
* Print out the freelist entries in both encoded and decoded form.
*/
uint8_t mapshift = sm->sm_shift;
int64_t alloc = 0;
uint64_t word, entry_id = 0;
for (uint64_t offset = 0; offset < space_map_length(sm);
offset += sizeof (word)) {
VERIFY0(dmu_read(os, space_map_object(sm), offset,
sizeof (word), &word, DMU_READ_PREFETCH));
if (sm_entry_is_debug(word)) {
uint64_t de_txg = SM_DEBUG_TXG_DECODE(word);
uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word);
if (de_txg == 0) {
(void) printf(
"\t [%6llu] PADDING\n",
(u_longlong_t)entry_id);
} else {
(void) printf(
"\t [%6llu] %s: txg %llu pass %llu\n",
(u_longlong_t)entry_id,
ddata[SM_DEBUG_ACTION_DECODE(word)],
(u_longlong_t)de_txg,
(u_longlong_t)de_sync_pass);
}
entry_id++;
continue;
}
uint8_t words;
char entry_type;
uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
if (sm_entry_is_single_word(word)) {
entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
'A' : 'F';
entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
sm->sm_start;
entry_run = SM_RUN_DECODE(word) << mapshift;
words = 1;
} else {
/* it is a two-word entry so we read another word */
ASSERT(sm_entry_is_double_word(word));
uint64_t extra_word;
offset += sizeof (extra_word);
VERIFY0(dmu_read(os, space_map_object(sm), offset,
sizeof (extra_word), &extra_word,
DMU_READ_PREFETCH));
ASSERT3U(offset, <=, space_map_length(sm));
entry_run = SM2_RUN_DECODE(word) << mapshift;
entry_vdev = SM2_VDEV_DECODE(word);
entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
'A' : 'F';
entry_off = (SM2_OFFSET_DECODE(extra_word) <<
mapshift) + sm->sm_start;
words = 2;
}
(void) printf("\t [%6llu] %c range:"
" %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
(u_longlong_t)entry_id,
entry_type, (u_longlong_t)entry_off,
(u_longlong_t)(entry_off + entry_run),
(u_longlong_t)entry_run,
(u_longlong_t)entry_vdev, words);
if (entry_type == 'A')
alloc += entry_run;
else
alloc -= entry_run;
entry_id++;
}
if (alloc != space_map_allocated(sm)) {
(void) printf("space_map_object alloc (%lld) INCONSISTENT "
"with space map summary (%lld)\n",
(longlong_t)space_map_allocated(sm), (longlong_t)alloc);
}
}
static void
dump_metaslab_stats(metaslab_t *msp)
{
char maxbuf[32];
range_tree_t *rt = msp->ms_allocatable;
zfs_btree_t *t = &msp->ms_allocatable_by_size;
int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
/* max sure nicenum has enough space */
CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf));
(void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
"segments", zfs_btree_numnodes(t), "maxsize", maxbuf,
"freepct", free_pct);
(void) printf("\tIn-memory histogram:\n");
dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
}
static void
dump_metaslab(metaslab_t *msp)
{
vdev_t *vd = msp->ms_group->mg_vd;
spa_t *spa = vd->vdev_spa;
space_map_t *sm = msp->ms_sm;
char freebuf[32];
zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
sizeof (freebuf));
(void) printf(
"\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
(u_longlong_t)space_map_object(sm), freebuf);
if (dump_opt['m'] > 2 && !dump_opt['L']) {
mutex_enter(&msp->ms_lock);
VERIFY0(metaslab_load(msp));
range_tree_stat_verify(msp->ms_allocatable);
dump_metaslab_stats(msp);
metaslab_unload(msp);
mutex_exit(&msp->ms_lock);
}
if (dump_opt['m'] > 1 && sm != NULL &&
spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
/*
* The space map histogram represents free space in chunks
* of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
*/
(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
(u_longlong_t)msp->ms_fragmentation);
dump_histogram(sm->sm_phys->smp_histogram,
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
(void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n",
(u_longlong_t)metaslab_unflushed_txg(msp));
}
}
static void
print_vdev_metaslab_header(vdev_t *vd)
{
vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
const char *bias_str = "";
if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) {
bias_str = VDEV_ALLOC_BIAS_LOG;
} else if (alloc_bias == VDEV_BIAS_SPECIAL) {
bias_str = VDEV_ALLOC_BIAS_SPECIAL;
} else if (alloc_bias == VDEV_BIAS_DEDUP) {
bias_str = VDEV_ALLOC_BIAS_DEDUP;
}
uint64_t ms_flush_data_obj = 0;
if (vd->vdev_top_zap != 0) {
int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
sizeof (uint64_t), 1, &ms_flush_data_obj);
if (error != ENOENT) {
ASSERT0(error);
}
}
(void) printf("\tvdev %10llu %s",
(u_longlong_t)vd->vdev_id, bias_str);
if (ms_flush_data_obj != 0) {
(void) printf(" ms_unflushed_phys object %llu",
(u_longlong_t)ms_flush_data_obj);
}
(void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n",
"metaslabs", (u_longlong_t)vd->vdev_ms_count,
"offset", "spacemap", "free");
(void) printf("\t%15s %19s %15s %12s\n",
"---------------", "-------------------",
"---------------", "------------");
}
static void
dump_metaslab_groups(spa_t *spa)
{
vdev_t *rvd = spa->spa_root_vdev;
metaslab_class_t *mc = spa_normal_class(spa);
uint64_t fragmentation;
metaslab_class_histogram_verify(mc);
for (unsigned c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
metaslab_group_t *mg = tvd->vdev_mg;
if (mg == NULL || mg->mg_class != mc)
continue;
metaslab_group_histogram_verify(mg);
mg->mg_fragmentation = metaslab_group_fragmentation(mg);
(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
"fragmentation",
(u_longlong_t)tvd->vdev_id,
(u_longlong_t)tvd->vdev_ms_count);
if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
(void) printf("%3s\n", "-");
} else {
(void) printf("%3llu%%\n",
(u_longlong_t)mg->mg_fragmentation);
}
dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
}
(void) printf("\tpool %s\tfragmentation", spa_name(spa));
fragmentation = metaslab_class_fragmentation(mc);
if (fragmentation == ZFS_FRAG_INVALID)
(void) printf("\t%3s\n", "-");
else
(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
}
static void
print_vdev_indirect(vdev_t *vd)
{
vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
vdev_indirect_births_t *vib = vd->vdev_indirect_births;
if (vim == NULL) {
ASSERT3P(vib, ==, NULL);
return;
}
ASSERT3U(vdev_indirect_mapping_object(vim), ==,
vic->vic_mapping_object);
ASSERT3U(vdev_indirect_births_object(vib), ==,
vic->vic_births_object);
(void) printf("indirect births obj %llu:\n",
(longlong_t)vic->vic_births_object);
(void) printf(" vib_count = %llu\n",
(longlong_t)vdev_indirect_births_count(vib));
for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
vdev_indirect_birth_entry_phys_t *cur_vibe =
&vib->vib_entries[i];
(void) printf("\toffset %llx -> txg %llu\n",
(longlong_t)cur_vibe->vibe_offset,
(longlong_t)cur_vibe->vibe_phys_birth_txg);
}
(void) printf("\n");
(void) printf("indirect mapping obj %llu:\n",
(longlong_t)vic->vic_mapping_object);
(void) printf(" vim_max_offset = 0x%llx\n",
(longlong_t)vdev_indirect_mapping_max_offset(vim));
(void) printf(" vim_bytes_mapped = 0x%llx\n",
(longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
(void) printf(" vim_count = %llu\n",
(longlong_t)vdev_indirect_mapping_num_entries(vim));
if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
return;
uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
vdev_indirect_mapping_entry_phys_t *vimep =
&vim->vim_entries[i];
(void) printf("\t<%llx:%llx:%llx> -> "
"<%llx:%llx:%llx> (%x obsolete)\n",
(longlong_t)vd->vdev_id,
(longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
(longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
(longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
(longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
(longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
counts[i]);
}
(void) printf("\n");
uint64_t obsolete_sm_object;
VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
if (obsolete_sm_object != 0) {
objset_t *mos = vd->vdev_spa->spa_meta_objset;
(void) printf("obsolete space map object %llu:\n",
(u_longlong_t)obsolete_sm_object);
ASSERT(vd->vdev_obsolete_sm != NULL);
ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
obsolete_sm_object);
dump_spacemap(mos, vd->vdev_obsolete_sm);
(void) printf("\n");
}
}
static void
dump_metaslabs(spa_t *spa)
{
vdev_t *vd, *rvd = spa->spa_root_vdev;
uint64_t m, c = 0, children = rvd->vdev_children;
(void) printf("\nMetaslabs:\n");
if (!dump_opt['d'] && zopt_metaslab_args > 0) {
c = zopt_metaslab[0];
if (c >= children)
(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
if (zopt_metaslab_args > 1) {
vd = rvd->vdev_child[c];
print_vdev_metaslab_header(vd);
for (m = 1; m < zopt_metaslab_args; m++) {
if (zopt_metaslab[m] < vd->vdev_ms_count)
dump_metaslab(
vd->vdev_ms[zopt_metaslab[m]]);
else
(void) fprintf(stderr, "bad metaslab "
"number %llu\n",
(u_longlong_t)zopt_metaslab[m]);
}
(void) printf("\n");
return;
}
children = c + 1;
}
for (; c < children; c++) {
vd = rvd->vdev_child[c];
print_vdev_metaslab_header(vd);
print_vdev_indirect(vd);
for (m = 0; m < vd->vdev_ms_count; m++)
dump_metaslab(vd->vdev_ms[m]);
(void) printf("\n");
}
}
static void
dump_log_spacemaps(spa_t *spa)
{
if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
return;
(void) printf("\nLog Space Maps in Pool:\n");
for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
space_map_t *sm = NULL;
VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
(void) printf("Log Spacemap object %llu txg %llu\n",
(u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg);
dump_spacemap(spa->spa_meta_objset, sm);
space_map_close(sm);
}
(void) printf("\n");
}
static void
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
{
const ddt_phys_t *ddp = dde->dde_phys;
const ddt_key_t *ddk = &dde->dde_key;
const char *types[4] = { "ditto", "single", "double", "triple" };
char blkbuf[BP_SPRINTF_LEN];
blkptr_t blk;
int p;
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
(void) printf("index %llx refcnt %llu %s %s\n",
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
types[p], blkbuf);
}
}
static void
dump_dedup_ratio(const ddt_stat_t *dds)
{
double rL, rP, rD, D, dedup, compress, copies;
if (dds->dds_blocks == 0)
return;
rL = (double)dds->dds_ref_lsize;
rP = (double)dds->dds_ref_psize;
rD = (double)dds->dds_ref_dsize;
D = (double)dds->dds_dsize;
dedup = rD / D;
compress = rL / rP;
copies = rD / rP;
(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
"dedup * compress / copies = %.2f\n\n",
dedup, compress, copies, dedup * compress / copies);
}
static void
dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
{
char name[DDT_NAMELEN];
ddt_entry_t dde;
uint64_t walk = 0;
dmu_object_info_t doi;
uint64_t count, dspace, mspace;
int error;
error = ddt_object_info(ddt, type, class, &doi);
if (error == ENOENT)
return;
ASSERT(error == 0);
error = ddt_object_count(ddt, type, class, &count);
ASSERT(error == 0);
if (count == 0)
return;
dspace = doi.doi_physical_blocks_512 << 9;
mspace = doi.doi_fill_count * doi.doi_data_block_size;
ddt_object_name(ddt, type, class, name);
(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
name,
(u_longlong_t)count,
(u_longlong_t)(dspace / count),
(u_longlong_t)(mspace / count));
if (dump_opt['D'] < 3)
return;
zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
if (dump_opt['D'] < 4)
return;
if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
return;
(void) printf("%s contents:\n\n", name);
while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
dump_dde(ddt, &dde, walk);
ASSERT3U(error, ==, ENOENT);
(void) printf("\n");
}
static void
dump_all_ddts(spa_t *spa)
{
ddt_histogram_t ddh_total;
ddt_stat_t dds_total;
bzero(&ddh_total, sizeof (ddh_total));
bzero(&dds_total, sizeof (dds_total));
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
for (enum ddt_class class = 0; class < DDT_CLASSES;
class++) {
dump_ddt(ddt, type, class);
}
}
}
ddt_get_dedup_stats(spa, &dds_total);
if (dds_total.dds_blocks == 0) {
(void) printf("All DDTs are empty\n");
return;
}
(void) printf("\n");
if (dump_opt['D'] > 1) {
(void) printf("DDT histogram (aggregated over all DDTs):\n");
ddt_get_dedup_histogram(spa, &ddh_total);
zpool_dump_ddt(&dds_total, &ddh_total);
}
dump_dedup_ratio(&dds_total);
}
static void
dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
{
char *prefix = arg;
(void) printf("%s [%llu,%llu) length %llu\n",
prefix,
(u_longlong_t)start,
(u_longlong_t)(start + size),
(u_longlong_t)(size));
}
static void
dump_dtl(vdev_t *vd, int indent)
{
spa_t *spa = vd->vdev_spa;
boolean_t required;
const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
"outage" };
char prefix[256];
spa_vdev_state_enter(spa, SCL_NONE);
required = vdev_dtl_required(vd);
(void) spa_vdev_state_exit(spa, NULL, 0);
if (indent == 0)
(void) printf("\nDirty time logs:\n\n");
(void) printf("\t%*s%s [%s]\n", indent, "",
vd->vdev_path ? vd->vdev_path :
vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
required ? "DTL-required" : "DTL-expendable");
for (int t = 0; t < DTL_TYPES; t++) {
range_tree_t *rt = vd->vdev_dtl[t];
if (range_tree_space(rt) == 0)
continue;
(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
indent + 2, "", name[t]);
range_tree_walk(rt, dump_dtl_seg, prefix);
if (dump_opt['d'] > 5 && vd->vdev_children == 0)
dump_spacemap(spa->spa_meta_objset,
vd->vdev_dtl_sm);
}
for (unsigned c = 0; c < vd->vdev_children; c++)
dump_dtl(vd->vdev_child[c], indent + 4);
}
static void
dump_history(spa_t *spa)
{
nvlist_t **events = NULL;
char *buf;
uint64_t resid, len, off = 0;
uint_t num = 0;
int error;
time_t tsec;
struct tm t;
char tbuf[30];
char internalstr[MAXPATHLEN];
if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) {
(void) fprintf(stderr, "%s: unable to allocate I/O buffer\n",
__func__);
return;
}
do {
len = SPA_OLD_MAXBLOCKSIZE;
if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
(void) fprintf(stderr, "Unable to read history: "
"error %d\n", error);
free(buf);
return;
}
if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
break;
off -= resid;
} while (len != 0);
(void) printf("\nHistory:\n");
for (unsigned i = 0; i < num; i++) {
uint64_t time, txg, ievent;
char *cmd, *intstr;
boolean_t printed = B_FALSE;
if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
&time) != 0)
goto next;
if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
&cmd) != 0) {
if (nvlist_lookup_uint64(events[i],
ZPOOL_HIST_INT_EVENT, &ievent) != 0)
goto next;