Page MenuHomeFreeBSD

D37354.diff
No OneTemporary

D37354.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sbin/Makefile b/sbin/Makefile
--- a/sbin/Makefile
+++ b/sbin/Makefile
@@ -20,6 +20,7 @@
ffsinfo \
fsck \
fsck_ffs \
+ fsck_hammer2 \
fsck_msdosfs \
fsdb \
fsirand \
@@ -28,6 +29,7 @@
ggate \
growfs \
gvinum \
+ hammer2 \
ifconfig \
init \
kldconfig \
@@ -43,12 +45,14 @@
mount \
mount_cd9660 \
mount_fusefs \
+ mount_hammer2 \
mount_msdosfs \
mount_nfs \
mount_nullfs \
mount_udf \
mount_unionfs \
newfs \
+ newfs_hammer2 \
newfs_msdos \
nfsiod \
nos-tun \
diff --git a/sbin/fsck_hammer2/Makefile b/sbin/fsck_hammer2/Makefile
new file mode 100644
--- /dev/null
+++ b/sbin/fsck_hammer2/Makefile
@@ -0,0 +1,18 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+PROG= fsck_hammer2
+SRCS= fsck_hammer2.c test.c ondisk.c subs.c xxhash.c gsb_crc32.c
+MAN= fsck_hammer2.8
+
+.PATH: ${SRCTOP}/sbin/hammer2 ${SRCTOP}/sys/fs/hammer2/xxhash ${SRCTOP}/sys/libkern
+
+WARNS?= 3
+
+CFLAGS+= -DXXH_NAMESPACE=h2_
+CFLAGS+= -I${SRCTOP}/sys
+CFLAGS+= -I${SRCTOP}/sbin/hammer2
+
+LIBADD= md
+
+.include <bsd.prog.mk>
diff --git a/sbin/fsck_hammer2/fsck_hammer2.h b/sbin/fsck_hammer2/fsck_hammer2.h
new file mode 100644
--- /dev/null
+++ b/sbin/fsck_hammer2/fsck_hammer2.h
@@ -0,0 +1,56 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FSCK_HAMMER2_H_
+#define FSCK_HAMMER2_H_
+
+extern int DebugOpt;
+extern int ForceOpt;
+extern int VerboseOpt;
+extern int QuietOpt;
+extern int CountEmpty;
+extern int ScanBest;
+extern int ScanPFS;
+extern int PrintPFS;
+extern int NumPFSNames;
+extern char **PFSNames;
+extern long BlockrefCacheCount;
+
+int test_hammer2(const char *);
+
+#endif /* !FSCK_HAMMER2_H_ */
diff --git a/sbin/fsck_hammer2/fsck_hammer2.8 b/sbin/fsck_hammer2/fsck_hammer2.8
new file mode 100644
--- /dev/null
+++ b/sbin/fsck_hammer2/fsck_hammer2.8
@@ -0,0 +1,100 @@
+.\" Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+.\" Copyright (c) 2019 The DragonFly Project
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The DragonFly Project
+.\" by Matthew Dillon <dillon@backplane.com>
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in
+.\" the documentation and/or other materials provided with the
+.\" distribution.
+.\" 3. Neither the name of The DragonFly Project nor the names of its
+.\" contributors may be used to endorse or promote products derived
+.\" from this software without specific, prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd September 18, 2022
+.Dt FSCK_HAMMER2 8
+.Os
+.Sh NAME
+.Nm fsck_hammer2
+.Nd HAMMER2 file system consistency checker
+.Sh SYNOPSIS
+.Nm
+.Op Fl f
+.Op Fl v
+.Op Fl q
+.Op Fl e
+.Op Fl b
+.Op Fl p
+.Op Fl P
+.Op Fl l Ar pfs_names
+.Op Fl c Ar cache_count
+.Ar special
+.Sh DESCRIPTION
+The
+.Nm
+utility verifies
+.Tn HAMMER2
+file system.
+.Bl -tag -width indent
+.It Fl f
+Force option.
+.It Fl v
+Verbose option.
+Print blockref data on failure if possible.
+.It Fl q
+Quiet option.
+.It Fl e
+Count empty blockrefs.
+.It Fl b
+Scan only best zone.
+.It Fl p
+Scan each PFS separately.
+.It Fl P
+Print PFS information.
+.It Fl l
+Specify PFS names when
+.Fl p
+is used.
+.It Fl c
+Specify blockref cache count.
+.El
+.Sh SEE ALSO
+.Xr fsck 8 ,
+.Xr hammer2 8 ,
+.Xr mount_hammer2 8 ,
+.Xr newfs_hammer2 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Dx 5.7 .
+.Sh AUTHORS
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org
+.Pp
+The
+.Nm
+utility was ported to
+.Fx
+by
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org .
diff --git a/sbin/fsck_hammer2/fsck_hammer2.c b/sbin/fsck_hammer2/fsck_hammer2.c
new file mode 100644
--- /dev/null
+++ b/sbin/fsck_hammer2/fsck_hammer2.c
@@ -0,0 +1,187 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "fsck_hammer2.h"
+
+int DebugOpt;
+int ForceOpt;
+int VerboseOpt;
+int QuietOpt;
+int CountEmpty;
+int ScanBest;
+int ScanPFS;
+int PrintPFS;
+int NumPFSNames;
+char **PFSNames;
+long BlockrefCacheCount = -1;
+
+static void
+init_pfs_names(const char *names)
+{
+ char *name, *h, *p;
+ int siz = 32;
+
+ PFSNames = calloc(siz, sizeof(char *));
+ p = strdup(names);
+ h = p;
+
+ while ((name = p) != NULL) {
+ p = strchr(p, ',');
+ if (p)
+ *p++ = 0;
+ if (strlen(name)) {
+ if (NumPFSNames > siz - 1) {
+ siz *= 2;
+ PFSNames = realloc(PFSNames,
+ siz * sizeof(char *));
+ }
+ PFSNames[NumPFSNames++] = strdup(name);
+ }
+ }
+ free(h);
+
+ if (DebugOpt) {
+ int i;
+ for (i = 0; i < NumPFSNames; i++)
+ printf("PFSNames[%d]=\"%s\"\n", i, PFSNames[i]);
+ }
+}
+
+static void
+cleanup_pfs_names(void)
+{
+ int i;
+
+ for (i = 0; i < NumPFSNames; i++)
+ free(PFSNames[i]);
+ free(PFSNames);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "fsck_hammer2 [-f] [-v] [-q] [-e] [-b] [-p] [-P] "
+ "[-l pfs_names] [-c cache_count] special\n");
+ exit(1);
+}
+
+int
+main(int ac, char **av)
+{
+ int i, ch;
+
+ while ((ch = getopt(ac, av, "dfvqebpPl:c:")) != -1) {
+ switch(ch) {
+ case 'd':
+ DebugOpt++;
+ break;
+ case 'f':
+ ForceOpt = 1;
+ break;
+ case 'v':
+ if (QuietOpt)
+ --QuietOpt;
+ else
+ ++VerboseOpt;
+ break;
+ case 'q':
+ if (VerboseOpt)
+ --VerboseOpt;
+ else
+ ++QuietOpt;
+ break;
+ case 'e':
+ CountEmpty = 1;
+ break;
+ case 'b':
+ ScanBest = 1;
+ break;
+ case 'p':
+ ScanPFS = 1;
+ break;
+ case 'P':
+ PrintPFS = 1;
+ break;
+ case 'l':
+ init_pfs_names(optarg);
+ break;
+ case 'c':
+ errno = 0;
+ BlockrefCacheCount = strtol(optarg, NULL, 10);
+ if (errno == ERANGE &&
+ (BlockrefCacheCount == LONG_MIN ||
+ BlockrefCacheCount == LONG_MAX)) {
+ perror("strtol");
+ exit(1);
+ }
+ break;
+ default:
+ usage();
+ /* not reached */
+ break;
+ }
+ }
+
+ ac -= optind;
+ av += optind;
+ if (ac < 1) {
+ usage();
+ /* not reached */
+ }
+
+ for (i = 0; i < ac; i++) {
+ if (ac != 1)
+ printf("%s\n", av[i]);
+ if (test_hammer2(av[i]) == -1)
+ exit(1);
+ if (i != ac - 1)
+ printf("----------------------------------------"
+ "----------------------------------------\n");
+ }
+
+ cleanup_pfs_names();
+
+ return 0;
+}
diff --git a/sbin/fsck_hammer2/test.c b/sbin/fsck_hammer2/test.c
new file mode 100644
--- /dev/null
+++ b/sbin/fsck_hammer2/test.c
@@ -0,0 +1,1349 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/tree.h>
+#include <sys/queue.h>
+#include <sys/ttycom.h>
+#include <sys/disk.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include <crypto/sha2/sha256.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+#include <fs/hammer2/hammer2_xxhash.h>
+
+#include "hammer2_subs.h"
+#include "fsck_hammer2.h"
+
+struct blockref_msg {
+ TAILQ_ENTRY(blockref_msg) entry;
+ hammer2_blockref_t bref;
+ void *msg;
+};
+
+TAILQ_HEAD(blockref_list, blockref_msg);
+
+struct blockref_entry {
+ RB_ENTRY(blockref_entry) entry;
+ hammer2_off_t data_off;
+ struct blockref_list head;
+};
+
+static int
+blockref_cmp(struct blockref_entry *b1, struct blockref_entry *b2)
+{
+ if (b1->data_off < b2->data_off)
+ return -1;
+ if (b1->data_off > b2->data_off)
+ return 1;
+ return 0;
+}
+
+RB_HEAD(blockref_tree, blockref_entry);
+RB_PROTOTYPE(blockref_tree, blockref_entry, entry, blockref_cmp);
+RB_GENERATE(blockref_tree, blockref_entry, entry, blockref_cmp);
+
+typedef struct {
+ struct blockref_tree root;
+ uint8_t type; /* HAMMER2_BREF_TYPE_VOLUME or FREEMAP */
+ uint64_t total_blockref;
+ uint64_t total_empty;
+ uint64_t total_bytes;
+ union {
+ /* use volume or freemap depending on type value */
+ struct {
+ uint64_t total_inode;
+ uint64_t total_indirect;
+ uint64_t total_data;
+ uint64_t total_dirent;
+ } volume;
+ struct {
+ uint64_t total_freemap_node;
+ uint64_t total_freemap_leaf;
+ } freemap;
+ };
+} blockref_stats_t;
+
+typedef struct {
+ uint64_t total_blockref;
+ uint64_t total_empty;
+ uint64_t total_bytes;
+ struct {
+ uint64_t total_inode;
+ uint64_t total_indirect;
+ uint64_t total_data;
+ uint64_t total_dirent;
+ } volume;
+ struct {
+ uint64_t total_freemap_node;
+ uint64_t total_freemap_leaf;
+ } freemap;
+ long count;
+} delta_stats_t;
+
+static void print_blockref_entry(struct blockref_tree *);
+static void init_blockref_stats(blockref_stats_t *, uint8_t);
+static void cleanup_blockref_stats(blockref_stats_t *);
+static void init_delta_root(struct blockref_tree *);
+static void cleanup_delta_root(struct blockref_tree *);
+static void print_blockref_stats(const blockref_stats_t *, bool);
+static int verify_volume_header(const hammer2_volume_data_t *);
+static int read_media(const hammer2_blockref_t *, hammer2_media_data_t *,
+ size_t *);
+static int verify_blockref(const hammer2_volume_data_t *,
+ const hammer2_blockref_t *, bool, blockref_stats_t *,
+ struct blockref_tree *, delta_stats_t *, int, int);
+static void print_pfs(const hammer2_inode_data_t *);
+static char *get_inode_filename(const hammer2_inode_data_t *);
+static int init_pfs_blockref(const hammer2_volume_data_t *,
+ const hammer2_blockref_t *, struct blockref_list *);
+static void cleanup_pfs_blockref(struct blockref_list *);
+static void print_media(FILE *, int, const hammer2_blockref_t *,
+ const hammer2_media_data_t *, size_t);
+
+static int best_zone = -1;
+
+#define TAB 8
+
+static void
+tfprintf(FILE *fp, int tab, const char *ctl, ...)
+{
+ va_list va;
+ int ret;
+
+ ret = fprintf(fp, "%*s", tab * TAB, "");
+ if (ret < 0)
+ return;
+
+ va_start(va, ctl);
+ vfprintf(fp, ctl, va);
+ va_end(va);
+}
+
+static void
+tsnprintf(char *str, size_t siz, int tab, const char *ctl, ...)
+{
+ va_list va;
+ int ret;
+
+ ret = snprintf(str, siz, "%*s", tab * TAB, "");
+ if (ret < 0 || ret >= (int)siz)
+ return;
+
+ va_start(va, ctl);
+ vsnprintf(str + ret, siz - ret, ctl, va);
+ va_end(va);
+}
+
+static void
+tprintf_zone(int tab, int i, const hammer2_blockref_t *bref)
+{
+ tfprintf(stdout, tab, "zone.%d %016jx%s\n",
+ i, (uintmax_t)bref->data_off,
+ (!ScanBest && i == best_zone) ? " (best)" : "");
+}
+
+static int
+init_root_blockref(int i, uint8_t type, hammer2_blockref_t *bref)
+{
+ hammer2_off_t off;
+
+ assert(type == HAMMER2_BREF_TYPE_EMPTY ||
+ type == HAMMER2_BREF_TYPE_VOLUME ||
+ type == HAMMER2_BREF_TYPE_FREEMAP);
+ memset(bref, 0, sizeof(*bref));
+ bref->type = type;
+ bref->data_off = (i * HAMMER2_ZONE_BYTES64) | HAMMER2_PBUFRADIX;
+ off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+
+ return lseek(hammer2_get_root_volume_fd(),
+ off - hammer2_get_root_volume_offset(), SEEK_SET);
+}
+
+static int
+find_best_zone(void)
+{
+ hammer2_blockref_t best;
+ int i, best_i = -1;
+
+ memset(&best, 0, sizeof(best));
+
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ hammer2_volume_data_t voldata;
+ hammer2_blockref_t broot;
+ ssize_t ret;
+
+ if (i * HAMMER2_ZONE_BYTES64 >=
+ hammer2_get_root_volume_size())
+ break;
+ init_root_blockref(i, HAMMER2_BREF_TYPE_EMPTY, &broot);
+ ret = read(hammer2_get_root_volume_fd(), &voldata,
+ HAMMER2_PBUFSIZE);
+ if (ret == HAMMER2_PBUFSIZE) {
+ if ((voldata.magic != HAMMER2_VOLUME_ID_HBO) &&
+ (voldata.magic != HAMMER2_VOLUME_ID_ABO))
+ continue;
+ broot.mirror_tid = voldata.mirror_tid;
+ if (best_i < 0 || best.mirror_tid < broot.mirror_tid) {
+ best_i = i;
+ best = broot;
+ }
+ } else if (ret == -1) {
+ perror("read");
+ return -1;
+ } else {
+ tfprintf(stderr, 1, "Failed to read volume header\n");
+ return -1;
+ }
+ }
+
+ return best_i;
+}
+
+static int
+test_volume_header(void)
+{
+ bool failed = false;
+ int i;
+
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ hammer2_volume_data_t voldata;
+ hammer2_blockref_t broot;
+ ssize_t ret;
+
+ if (ScanBest && i != best_zone)
+ continue;
+ if (i * HAMMER2_ZONE_BYTES64 >=
+ hammer2_get_root_volume_size()) {
+ tfprintf(stderr, 0, "zone.%d exceeds volume size\n", i);
+ break;
+ }
+ init_root_blockref(i, HAMMER2_BREF_TYPE_EMPTY, &broot);
+ ret = read(hammer2_get_root_volume_fd(), &voldata,
+ HAMMER2_PBUFSIZE);
+ if (ret == HAMMER2_PBUFSIZE) {
+ tprintf_zone(0, i, &broot);
+ if (verify_volume_header(&voldata) == -1)
+ failed = true;
+ } else if (ret == -1) {
+ perror("read");
+ return -1;
+ } else {
+ tfprintf(stderr, 1, "Failed to read volume header\n");
+ return -1;
+ }
+ }
+
+ return failed ? -1 : 0;
+}
+
+static int
+test_blockref(uint8_t type)
+{
+ struct blockref_tree droot;
+ bool failed = false;
+ int i;
+
+ init_delta_root(&droot);
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ hammer2_volume_data_t voldata;
+ hammer2_blockref_t broot;
+ ssize_t ret;
+
+ if (ScanBest && i != best_zone)
+ continue;
+ if (i * HAMMER2_ZONE_BYTES64 >=
+ hammer2_get_root_volume_size()) {
+ tfprintf(stderr, 0, "zone.%d exceeds volume size\n", i);
+ break;
+ }
+ init_root_blockref(i, type, &broot);
+ ret = read(hammer2_get_root_volume_fd(), &voldata,
+ HAMMER2_PBUFSIZE);
+ if (ret == HAMMER2_PBUFSIZE) {
+ blockref_stats_t bstats;
+ init_blockref_stats(&bstats, type);
+ delta_stats_t ds;
+ memset(&ds, 0, sizeof(ds));
+ tprintf_zone(0, i, &broot);
+ if (verify_blockref(&voldata, &broot, false, &bstats,
+ &droot, &ds, 0, 0) == -1)
+ failed = true;
+ print_blockref_stats(&bstats, true);
+ print_blockref_entry(&bstats.root);
+ cleanup_blockref_stats(&bstats);
+ } else if (ret == -1) {
+ perror("read");
+ failed = true;
+ goto end;
+ } else {
+ tfprintf(stderr, 1, "Failed to read volume header\n");
+ failed = true;
+ goto end;
+ }
+ }
+end:
+ cleanup_delta_root(&droot);
+ return failed ? -1 : 0;
+}
+
+static int
+test_pfs_blockref(void)
+{
+ struct blockref_tree droot;
+ uint8_t type = HAMMER2_BREF_TYPE_VOLUME;
+ bool failed = false;
+ int i;
+
+ init_delta_root(&droot);
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ hammer2_volume_data_t voldata;
+ hammer2_blockref_t broot;
+ ssize_t ret;
+
+ if (ScanBest && i != best_zone)
+ continue;
+ if (i * HAMMER2_ZONE_BYTES64 >=
+ hammer2_get_root_volume_size()) {
+ tfprintf(stderr, 0, "zone.%d exceeds volume size\n", i);
+ break;
+ }
+ init_root_blockref(i, type, &broot);
+ ret = read(hammer2_get_root_volume_fd(), &voldata,
+ HAMMER2_PBUFSIZE);
+ if (ret == HAMMER2_PBUFSIZE) {
+ struct blockref_list blist;
+ struct blockref_msg *p;
+ int count = 0;
+
+ tprintf_zone(0, i, &broot);
+ TAILQ_INIT(&blist);
+ if (init_pfs_blockref(&voldata, &broot, &blist) == -1) {
+ tfprintf(stderr, 1, "Failed to read PFS "
+ "blockref\n");
+ failed = true;
+ continue;
+ }
+ if (TAILQ_EMPTY(&blist)) {
+ tfprintf(stderr, 1, "Failed to find PFS "
+ "blockref\n");
+ failed = true;
+ continue;
+ }
+ TAILQ_FOREACH(p, &blist, entry) {
+ blockref_stats_t bstats;
+ bool found = false;
+ char *f = get_inode_filename(p->msg);
+ if (NumPFSNames) {
+ int j;
+ for (j = 0; j < NumPFSNames; j++)
+ if (!strcmp(PFSNames[j], f))
+ found = true;
+ } else
+ found = true;
+ if (!found) {
+ free(f);
+ continue;
+ }
+ count++;
+ if (PrintPFS) {
+ print_pfs(p->msg);
+ free(f);
+ continue;
+ }
+ tfprintf(stdout, 1, "%s\n", f);
+ free(f);
+ init_blockref_stats(&bstats, type);
+ delta_stats_t ds;
+ memset(&ds, 0, sizeof(ds));
+ if (verify_blockref(&voldata, &p->bref, false,
+ &bstats, &droot, &ds, 0, 0) == -1)
+ failed = true;
+ print_blockref_stats(&bstats, true);
+ print_blockref_entry(&bstats.root);
+ cleanup_blockref_stats(&bstats);
+ }
+ cleanup_pfs_blockref(&blist);
+ if (NumPFSNames && !count) {
+ tfprintf(stderr, 1, "PFS not found\n");
+ failed = true;
+ }
+ } else if (ret == -1) {
+ perror("read");
+ failed = true;
+ goto end;
+ } else {
+ tfprintf(stderr, 1, "Failed to read volume header\n");
+ failed = true;
+ goto end;
+ }
+ }
+end:
+ cleanup_delta_root(&droot);
+ return failed ? -1 : 0;
+}
+
+static int
+charsperline(void)
+{
+ int columns;
+ char *cp;
+ struct winsize ws;
+
+ columns = 0;
+ if (ioctl(0, TIOCGWINSZ, &ws) != -1)
+ columns = ws.ws_col;
+ if (columns == 0 && (cp = getenv("COLUMNS")))
+ columns = atoi(cp);
+ if (columns == 0)
+ columns = 80; /* last resort */
+
+ return columns;
+}
+
+static void
+cleanup_blockref_msg(struct blockref_list *head)
+{
+ struct blockref_msg *p;
+
+ while ((p = TAILQ_FIRST(head)) != NULL) {
+ TAILQ_REMOVE(head, p, entry);
+ free(p->msg);
+ free(p);
+ }
+ assert(TAILQ_EMPTY(head));
+}
+
+static void
+cleanup_blockref_entry(struct blockref_tree *root)
+{
+ struct blockref_entry *e;
+
+ while ((e = RB_ROOT(root)) != NULL) {
+ RB_REMOVE(blockref_tree, root, e);
+ cleanup_blockref_msg(&e->head);
+ free(e);
+ }
+ assert(RB_EMPTY(root));
+}
+
+static void
+add_blockref_msg(struct blockref_list *head, const hammer2_blockref_t *bref,
+ const void *msg, size_t siz)
+{
+ struct blockref_msg *m;
+ void *p;
+
+ m = calloc(1, sizeof(*m));
+ assert(m);
+ m->bref = *bref;
+ p = calloc(1, siz);
+ assert(p);
+ memcpy(p, msg, siz);
+ m->msg = p;
+
+ TAILQ_INSERT_TAIL(head, m, entry);
+}
+
+static void
+add_blockref_entry(struct blockref_tree *root, const hammer2_blockref_t *bref,
+ const void *msg, size_t siz)
+{
+ struct blockref_entry *e, bref_find;
+
+ memset(&bref_find, 0, sizeof(bref_find));
+ bref_find.data_off = bref->data_off;
+ e = RB_FIND(blockref_tree, root, &bref_find);
+ if (!e) {
+ e = calloc(1, sizeof(*e));
+ assert(e);
+ TAILQ_INIT(&e->head);
+ e->data_off = bref->data_off;
+ }
+
+ add_blockref_msg(&e->head, bref, msg, siz);
+
+ RB_INSERT(blockref_tree, root, e);
+}
+
+static void
+__print_blockref(FILE *fp, int tab, const hammer2_blockref_t *bref,
+ const char *msg)
+{
+ tfprintf(fp, tab, "%016jx %-12s %016jx/%-2d%s%s\n",
+ (uintmax_t)bref->data_off,
+ hammer2_breftype_to_str(bref->type),
+ (uintmax_t)bref->key,
+ bref->keybits,
+ msg ? " " : "",
+ msg ? msg : "");
+}
+
+static void
+print_blockref(FILE *fp, const hammer2_blockref_t *bref, const char *msg)
+{
+ __print_blockref(fp, 1, bref, msg);
+}
+
+static void
+print_blockref_debug(FILE *fp, int depth, int index,
+ const hammer2_blockref_t *bref, const char *msg)
+{
+ if (DebugOpt > 1) {
+ char buf[256];
+ int i;
+
+ memset(buf, 0, sizeof(buf));
+ for (i = 0; i < depth * 2; i++)
+ strlcat(buf, " ", sizeof(buf));
+ tfprintf(fp, 1, buf);
+ fprintf(fp, "%-2d %-3d ", depth, index);
+ __print_blockref(fp, 0, bref, msg);
+ } else if (DebugOpt > 0)
+ print_blockref(fp, bref, msg);
+}
+
+static void
+print_blockref_msg(const struct blockref_list *head)
+{
+ struct blockref_msg *m;
+
+ TAILQ_FOREACH(m, head, entry) {
+ hammer2_blockref_t *bref = &m->bref;
+ print_blockref(stderr, bref, m->msg);
+ if (VerboseOpt > 0) {
+ hammer2_media_data_t media;
+ size_t bytes;
+ if (!read_media(bref, &media, &bytes))
+ print_media(stderr, 2, bref, &media, bytes);
+ else
+ tfprintf(stderr, 2, "Failed to read media\n");
+ }
+ }
+}
+
+static void
+print_blockref_entry(struct blockref_tree *root)
+{
+ struct blockref_entry *e;
+
+ RB_FOREACH(e, blockref_tree, root)
+ print_blockref_msg(&e->head);
+}
+
+static void
+init_blockref_stats(blockref_stats_t *bstats, uint8_t type)
+{
+ memset(bstats, 0, sizeof(*bstats));
+ RB_INIT(&bstats->root);
+ bstats->type = type;
+}
+
+static void
+cleanup_blockref_stats(blockref_stats_t *bstats)
+{
+ cleanup_blockref_entry(&bstats->root);
+}
+
+static void
+init_delta_root(struct blockref_tree *droot)
+{
+ RB_INIT(droot);
+}
+
+static void
+cleanup_delta_root(struct blockref_tree *droot)
+{
+ cleanup_blockref_entry(droot);
+}
+
+static void
+print_blockref_stats(const blockref_stats_t *bstats, bool newline)
+{
+ size_t siz = charsperline();
+ char *buf = calloc(1, siz);
+ char emptybuf[128];
+
+ assert(buf);
+
+ if (CountEmpty)
+ snprintf(emptybuf, sizeof(emptybuf), ", %ju empty",
+ (uintmax_t)bstats->total_empty);
+ else
+ strlcpy(emptybuf, "", sizeof(emptybuf));
+
+ switch (bstats->type) {
+ case HAMMER2_BREF_TYPE_VOLUME:
+ tsnprintf(buf, siz, 1, "%ju blockref (%ju inode, %ju indirect, "
+ "%ju data, %ju dirent%s), %s",
+ (uintmax_t)bstats->total_blockref,
+ (uintmax_t)bstats->volume.total_inode,
+ (uintmax_t)bstats->volume.total_indirect,
+ (uintmax_t)bstats->volume.total_data,
+ (uintmax_t)bstats->volume.total_dirent,
+ emptybuf,
+ sizetostr(bstats->total_bytes));
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ tsnprintf(buf, siz, 1, "%ju blockref (%ju node, %ju leaf%s), "
+ "%s",
+ (uintmax_t)bstats->total_blockref,
+ (uintmax_t)bstats->freemap.total_freemap_node,
+ (uintmax_t)bstats->freemap.total_freemap_leaf,
+ emptybuf,
+ sizetostr(bstats->total_bytes));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (newline) {
+ printf("%s\n", buf);
+ } else {
+ printf("%s\r", buf);
+ fflush(stdout);
+ }
+ free(buf);
+}
+
+static int
+verify_volume_header(const hammer2_volume_data_t *voldata)
+{
+ hammer2_crc32_t crc0, crc1;
+ const char *p = (const char*)voldata;
+
+ if ((voldata->magic != HAMMER2_VOLUME_ID_HBO) &&
+ (voldata->magic != HAMMER2_VOLUME_ID_ABO)) {
+ tfprintf(stderr, 1, "Bad magic %jX\n", voldata->magic);
+ return -1;
+ }
+
+ if (voldata->magic == HAMMER2_VOLUME_ID_ABO)
+ tfprintf(stderr, 1, "Reverse endian\n");
+
+ crc0 = voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0];
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE);
+ if (crc0 != crc1) {
+ tfprintf(stderr, 1, "Bad HAMMER2_VOL_ICRC_SECT0 CRC\n");
+ return -1;
+ }
+
+ crc0 = voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1];
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE);
+ if (crc0 != crc1) {
+ tfprintf(stderr, 1, "Bad HAMMER2_VOL_ICRC_SECT1 CRC\n");
+ return -1;
+ }
+
+ crc0 = voldata->icrc_volheader;
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE);
+ if (crc0 != crc1) {
+ tfprintf(stderr, 1, "Bad volume header CRC\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+read_media(const hammer2_blockref_t *bref, hammer2_media_data_t *media,
+ size_t *media_bytes)
+{
+ hammer2_off_t io_off, io_base;
+ size_t bytes, io_bytes, boff;
+ int fd;
+
+ bytes = (bref->data_off & HAMMER2_OFF_MASK_RADIX);
+ if (bytes)
+ bytes = (size_t)1 << bytes;
+ if (media_bytes)
+ *media_bytes = bytes;
+
+ if (!bytes)
+ return 0;
+
+ io_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+ io_base = io_off & ~(hammer2_off_t)(HAMMER2_LBUFSIZE - 1);
+ boff = io_off - io_base;
+
+ io_bytes = HAMMER2_LBUFSIZE;
+ while (io_bytes + boff < bytes)
+ io_bytes <<= 1;
+
+ if (io_bytes > sizeof(*media))
+ return -1;
+ fd = hammer2_get_volume_fd(io_off);
+ if (lseek(fd, io_base - hammer2_get_volume_offset(io_base), SEEK_SET)
+ == -1)
+ return -2;
+ if (read(fd, media, io_bytes) != (ssize_t)io_bytes)
+ return -2;
+ if (boff)
+ memmove(media, (char *)media + boff, bytes);
+
+ return 0;
+}
+
+static void
+load_delta_stats(blockref_stats_t *bstats, const delta_stats_t *dstats)
+{
+ bstats->total_blockref += dstats->total_blockref;
+ bstats->total_empty += dstats->total_empty;
+ bstats->total_bytes += dstats->total_bytes;
+
+ switch (bstats->type) {
+ case HAMMER2_BREF_TYPE_VOLUME:
+ bstats->volume.total_inode += dstats->volume.total_inode;
+ bstats->volume.total_indirect += dstats->volume.total_indirect;
+ bstats->volume.total_data += dstats->volume.total_data;
+ bstats->volume.total_dirent += dstats->volume.total_dirent;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ bstats->freemap.total_freemap_node +=
+ dstats->freemap.total_freemap_node;
+ bstats->freemap.total_freemap_leaf +=
+ dstats->freemap.total_freemap_leaf;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void
+accumulate_delta_stats(delta_stats_t *dst, const delta_stats_t *src)
+{
+ dst->total_blockref += src->total_blockref;
+ dst->total_empty += src->total_empty;
+ dst->total_bytes += src->total_bytes;
+
+ dst->volume.total_inode += src->volume.total_inode;
+ dst->volume.total_indirect += src->volume.total_indirect;
+ dst->volume.total_data += src->volume.total_data;
+ dst->volume.total_dirent += src->volume.total_dirent;
+
+ dst->freemap.total_freemap_node += src->freemap.total_freemap_node;
+ dst->freemap.total_freemap_leaf += src->freemap.total_freemap_leaf;
+
+ dst->count += src->count;
+}
+
+static int
+verify_blockref(const hammer2_volume_data_t *voldata,
+ const hammer2_blockref_t *bref, bool norecurse, blockref_stats_t *bstats,
+ struct blockref_tree *droot, delta_stats_t *dstats, int depth, int index)
+{
+ hammer2_media_data_t media;
+ hammer2_blockref_t *bscan;
+ int i, bcount;
+ bool failed = false;
+ size_t bytes;
+ uint32_t cv;
+ uint64_t cv64;
+ char msg[256];
+ SHA256_CTX hash_ctx;
+ union {
+ uint8_t digest[SHA256_DIGEST_LENGTH];
+ uint64_t digest64[SHA256_DIGEST_LENGTH/8];
+ } u;
+
+ /* only for DebugOpt > 1 */
+ if (DebugOpt > 1)
+ print_blockref_debug(stdout, depth, index, bref, NULL);
+
+ if (bref->data_off) {
+ struct blockref_entry *e, bref_find;
+ memset(&bref_find, 0, sizeof(bref_find));
+ bref_find.data_off = bref->data_off;
+ e = RB_FIND(blockref_tree, droot, &bref_find);
+ if (e) {
+ struct blockref_msg *m;
+ TAILQ_FOREACH(m, &e->head, entry) {
+ delta_stats_t *ds = m->msg;
+ if (!memcmp(&m->bref, bref, sizeof(*bref))) {
+ /* delta contains cached delta */
+ accumulate_delta_stats(dstats, ds);
+ load_delta_stats(bstats, ds);
+ print_blockref_debug(stdout, depth,
+ index, &m->bref, "cache-hit");
+ return 0;
+ }
+ }
+ }
+ }
+
+ bstats->total_blockref++;
+ dstats->total_blockref++;
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_EMPTY:
+ if (CountEmpty) {
+ bstats->total_empty++;
+ dstats->total_empty++;
+ } else {
+ bstats->total_blockref--;
+ dstats->total_blockref--;
+ }
+ break;
+ case HAMMER2_BREF_TYPE_INODE:
+ bstats->volume.total_inode++;
+ dstats->volume.total_inode++;
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ bstats->volume.total_indirect++;
+ dstats->volume.total_indirect++;
+ break;
+ case HAMMER2_BREF_TYPE_DATA:
+ bstats->volume.total_data++;
+ dstats->volume.total_data++;
+ break;
+ case HAMMER2_BREF_TYPE_DIRENT:
+ bstats->volume.total_dirent++;
+ dstats->volume.total_dirent++;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ bstats->freemap.total_freemap_node++;
+ dstats->freemap.total_freemap_node++;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ bstats->freemap.total_freemap_leaf++;
+ dstats->freemap.total_freemap_leaf++;
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ bstats->total_blockref--;
+ dstats->total_blockref--;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ bstats->total_blockref--;
+ dstats->total_blockref--;
+ break;
+ default:
+ snprintf(msg, sizeof(msg), "Invalid blockref type %d",
+ bref->type);
+ add_blockref_entry(&bstats->root, bref, msg, strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ failed = true;
+ break;
+ }
+
+ switch (read_media(bref, &media, &bytes)) {
+ case -1:
+ strlcpy(msg, "Bad I/O bytes", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg, strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ return -1;
+ case -2:
+ strlcpy(msg, "Failed to read media", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg, strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ return -1;
+ default:
+ break;
+ }
+
+ if (bref->type != HAMMER2_BREF_TYPE_VOLUME &&
+ bref->type != HAMMER2_BREF_TYPE_FREEMAP) {
+ bstats->total_bytes += bytes;
+ dstats->total_bytes += bytes;
+ }
+
+ if (!CountEmpty && bref->type == HAMMER2_BREF_TYPE_EMPTY) {
+ assert(bytes == 0);
+ bstats->total_bytes -= bytes;
+ dstats->total_bytes -= bytes;
+ }
+
+ if (!DebugOpt && QuietOpt <= 0 && (bstats->total_blockref % 100) == 0)
+ print_blockref_stats(bstats, false);
+
+ if (!bytes)
+ goto end;
+
+ switch (HAMMER2_DEC_CHECK(bref->methods)) {
+ case HAMMER2_CHECK_ISCSI32:
+ cv = hammer2_icrc32(&media, bytes);
+ if (bref->check.iscsi32.value != cv) {
+ strlcpy(msg, "Bad HAMMER2_CHECK_ISCSI32", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg,
+ strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ failed = true;
+ }
+ break;
+ case HAMMER2_CHECK_XXHASH64:
+ cv64 = XXH64(&media, bytes, XXH_HAMMER2_SEED);
+ if (bref->check.xxhash64.value != cv64) {
+ strlcpy(msg, "Bad HAMMER2_CHECK_XXHASH64", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg,
+ strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ failed = true;
+ }
+ break;
+ case HAMMER2_CHECK_SHA192:
+ SHA256_Init(&hash_ctx);
+ SHA256_Update(&hash_ctx, &media, bytes);
+ SHA256_Final(u.digest, &hash_ctx);
+ u.digest64[2] ^= u.digest64[3];
+ if (memcmp(u.digest, bref->check.sha192.data,
+ sizeof(bref->check.sha192.data))) {
+ strlcpy(msg, "Bad HAMMER2_CHECK_SHA192", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg,
+ strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ failed = true;
+ }
+ break;
+ case HAMMER2_CHECK_FREEMAP:
+ cv = hammer2_icrc32(&media, bytes);
+ if (bref->check.freemap.icrc32 != cv) {
+ strlcpy(msg, "Bad HAMMER2_CHECK_FREEMAP", sizeof(msg));
+ add_blockref_entry(&bstats->root, bref, msg,
+ strlen(msg) + 1);
+ print_blockref_debug(stdout, depth, index, bref, msg);
+ failed = true;
+ }
+ break;
+ }
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ if (!(media.ipdata.meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) {
+ bscan = &media.ipdata.u.blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ } else {
+ bscan = NULL;
+ bcount = 0;
+ }
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ bscan = &media.npdata[0];
+ bcount = bytes / sizeof(hammer2_blockref_t);
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ bscan = &media.npdata[0];
+ bcount = bytes / sizeof(hammer2_blockref_t);
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ bscan = &media.voldata.sroot_blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ bscan = &media.voldata.freemap_blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ break;
+ default:
+ bscan = NULL;
+ bcount = 0;
+ break;
+ }
+
+ if (ForceOpt)
+ norecurse = false;
+ /*
+ * If failed, no recurse, but still verify its direct children.
+ * Beyond that is probably garbage.
+ */
+ for (i = 0; norecurse == false && i < bcount; ++i) {
+ delta_stats_t ds;
+ memset(&ds, 0, sizeof(ds));
+ if (verify_blockref(voldata, &bscan[i], failed, bstats, droot,
+ &ds, depth + 1, i) == -1)
+ return -1;
+ if (!failed)
+ accumulate_delta_stats(dstats, &ds);
+ }
+end:
+ if (failed)
+ return -1;
+
+ dstats->count++;
+ if (bref->data_off && BlockrefCacheCount > 0 &&
+ dstats->count >= BlockrefCacheCount) {
+ assert(bytes);
+ add_blockref_entry(droot, bref, dstats, sizeof(*dstats));
+ print_blockref_debug(stdout, depth, index, bref, "cache-add");
+ }
+
+ return 0;
+}
+
+static void
+print_pfs(const hammer2_inode_data_t *ipdata)
+{
+ const hammer2_inode_meta_t *meta = &ipdata->meta;
+ char *f, *pfs_id_str = NULL;
+ const char *type_str;
+ uuid_t uuid;
+
+ f = get_inode_filename(ipdata);
+ uuid = meta->pfs_clid;
+ hammer2_uuid_to_str(&uuid, &pfs_id_str);
+ if (meta->pfs_type == HAMMER2_PFSTYPE_MASTER) {
+ if (meta->pfs_subtype == HAMMER2_PFSSUBTYPE_NONE)
+ type_str = "MASTER";
+ else
+ type_str = hammer2_pfssubtype_to_str(meta->pfs_subtype);
+ } else {
+ type_str = hammer2_pfstype_to_str(meta->pfs_type);
+ }
+ tfprintf(stdout, 1, "%-11s %s %s\n", type_str, pfs_id_str, f);
+
+ free(f);
+ free(pfs_id_str);
+}
+
+static char*
+get_inode_filename(const hammer2_inode_data_t *ipdata)
+{
+ char *p = malloc(HAMMER2_INODE_MAXNAME + 1);
+
+ memcpy(p, ipdata->filename, sizeof(ipdata->filename));
+ p[HAMMER2_INODE_MAXNAME] = '\0';
+
+ return p;
+}
+
+static void
+__add_pfs_blockref(const hammer2_blockref_t *bref, struct blockref_list *blist,
+ const hammer2_inode_data_t *ipdata)
+{
+ struct blockref_msg *newp, *p;
+
+ newp = calloc(1, sizeof(*newp));
+ newp->bref = *bref;
+ newp->msg = calloc(1, sizeof(*ipdata));
+ memcpy(newp->msg, ipdata, sizeof(*ipdata));
+
+ p = TAILQ_FIRST(blist);
+ while (p) {
+ char *f1 = get_inode_filename(newp->msg);
+ char *f2 = get_inode_filename(p->msg);
+ if (strcmp(f1, f2) <= 0) {
+ TAILQ_INSERT_BEFORE(p, newp, entry);
+ free(f1);
+ free(f2);
+ break;
+ }
+ p = TAILQ_NEXT(p, entry);
+ free(f1);
+ free(f2);
+ }
+ if (!p)
+ TAILQ_INSERT_TAIL(blist, newp, entry);
+}
+
+static int
+init_pfs_blockref(const hammer2_volume_data_t *voldata,
+ const hammer2_blockref_t *bref, struct blockref_list *blist)
+{
+ hammer2_media_data_t media;
+ hammer2_inode_data_t ipdata;
+ hammer2_blockref_t *bscan;
+ int i, bcount;
+ size_t bytes;
+
+ if (read_media(bref, &media, &bytes))
+ return -1;
+ if (!bytes)
+ return 0;
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ ipdata = media.ipdata;
+ if (ipdata.meta.pfs_type == HAMMER2_PFSTYPE_SUPROOT) {
+ bscan = &ipdata.u.blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ } else {
+ bscan = NULL;
+ bcount = 0;
+ if (ipdata.meta.op_flags & HAMMER2_OPFLAG_PFSROOT)
+ __add_pfs_blockref(bref, blist, &ipdata);
+ else
+ assert(0); /* should only see SUPROOT or PFS */
+ }
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ bscan = &media.npdata[0];
+ bcount = bytes / sizeof(hammer2_blockref_t);
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ bscan = &media.voldata.sroot_blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ break;
+ default:
+ bscan = NULL;
+ bcount = 0;
+ break;
+ }
+
+ for (i = 0; i < bcount; ++i)
+ if (init_pfs_blockref(voldata, &bscan[i], blist) == -1)
+ return -1;
+ return 0;
+}
+
+static void
+cleanup_pfs_blockref(struct blockref_list *blist)
+{
+ cleanup_blockref_msg(blist);
+}
+
+static void
+print_media(FILE *fp, int tab, const hammer2_blockref_t *bref,
+ const hammer2_media_data_t *media, size_t media_bytes)
+{
+ const hammer2_blockref_t *bscan;
+ const hammer2_inode_data_t *ipdata;
+ int i, bcount, namelen;
+ char *str = NULL;
+ uuid_t uuid;
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ ipdata = &media->ipdata;
+ namelen = ipdata->meta.name_len;
+ if (namelen > HAMMER2_INODE_MAXNAME)
+ namelen = 0;
+ tfprintf(fp, tab, "filename \"%*.*s\"\n", namelen, namelen,
+ ipdata->filename);
+ tfprintf(fp, tab, "version %d\n", ipdata->meta.version);
+ if ((ipdata->meta.op_flags & HAMMER2_OPFLAG_PFSROOT) ||
+ ipdata->meta.pfs_type == HAMMER2_PFSTYPE_SUPROOT)
+ tfprintf(fp, tab, "pfs_subtype %d (%s)\n",
+ ipdata->meta.pfs_subtype,
+ hammer2_pfssubtype_to_str(ipdata->meta.pfs_subtype));
+ tfprintf(fp, tab, "uflags 0x%08x\n", ipdata->meta.uflags);
+ if (ipdata->meta.rmajor || ipdata->meta.rminor) {
+ tfprintf(fp, tab, "rmajor %d\n", ipdata->meta.rmajor);
+ tfprintf(fp, tab, "rminor %d\n", ipdata->meta.rminor);
+ }
+ tfprintf(fp, tab, "ctime %s\n",
+ hammer2_time64_to_str(ipdata->meta.ctime, &str));
+ tfprintf(fp, tab, "mtime %s\n",
+ hammer2_time64_to_str(ipdata->meta.mtime, &str));
+ tfprintf(fp, tab, "atime %s\n",
+ hammer2_time64_to_str(ipdata->meta.atime, &str));
+ tfprintf(fp, tab, "btime %s\n",
+ hammer2_time64_to_str(ipdata->meta.btime, &str));
+ uuid = ipdata->meta.uid;
+ tfprintf(fp, tab, "uid %s\n", hammer2_uuid_to_str(&uuid, &str));
+ uuid = ipdata->meta.gid;
+ tfprintf(fp, tab, "gid %s\n", hammer2_uuid_to_str(&uuid, &str));
+ tfprintf(fp, tab, "type %s\n",
+ hammer2_iptype_to_str(ipdata->meta.type));
+ tfprintf(fp, tab, "op_flags 0x%02x\n", ipdata->meta.op_flags);
+ tfprintf(fp, tab, "cap_flags 0x%04x\n", ipdata->meta.cap_flags);
+ tfprintf(fp, tab, "mode %-7o\n", ipdata->meta.mode);
+ tfprintf(fp, tab, "inum 0x%016jx\n", ipdata->meta.inum);
+ tfprintf(fp, tab, "size %ju ", (uintmax_t)ipdata->meta.size);
+ if (ipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA &&
+ ipdata->meta.size <= HAMMER2_EMBEDDED_BYTES)
+ printf("(embedded data)\n");
+ else
+ printf("\n");
+ tfprintf(fp, tab, "nlinks %ju\n",
+ (uintmax_t)ipdata->meta.nlinks);
+ tfprintf(fp, tab, "iparent 0x%016jx\n",
+ (uintmax_t)ipdata->meta.iparent);
+ tfprintf(fp, tab, "name_key 0x%016jx\n",
+ (uintmax_t)ipdata->meta.name_key);
+ tfprintf(fp, tab, "name_len %u\n", ipdata->meta.name_len);
+ tfprintf(fp, tab, "ncopies %u\n", ipdata->meta.ncopies);
+ tfprintf(fp, tab, "comp_algo %u\n", ipdata->meta.comp_algo);
+ tfprintf(fp, tab, "target_type %u\n", ipdata->meta.target_type);
+ tfprintf(fp, tab, "check_algo %u\n", ipdata->meta.check_algo);
+ if ((ipdata->meta.op_flags & HAMMER2_OPFLAG_PFSROOT) ||
+ ipdata->meta.pfs_type == HAMMER2_PFSTYPE_SUPROOT) {
+ tfprintf(fp, tab, "pfs_nmasters %u\n",
+ ipdata->meta.pfs_nmasters);
+ tfprintf(fp, tab, "pfs_type %u (%s)\n",
+ ipdata->meta.pfs_type,
+ hammer2_pfstype_to_str(ipdata->meta.pfs_type));
+ tfprintf(fp, tab, "pfs_inum 0x%016jx\n",
+ (uintmax_t)ipdata->meta.pfs_inum);
+ uuid = ipdata->meta.pfs_clid;
+ tfprintf(fp, tab, "pfs_clid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ uuid = ipdata->meta.pfs_fsid;
+ tfprintf(fp, tab, "pfs_fsid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ tfprintf(fp, tab, "pfs_lsnap_tid 0x%016jx\n",
+ (uintmax_t)ipdata->meta.pfs_lsnap_tid);
+ }
+ tfprintf(fp, tab, "data_quota %ju\n",
+ (uintmax_t)ipdata->meta.data_quota);
+ tfprintf(fp, tab, "data_count %ju\n",
+ (uintmax_t)bref->embed.stats.data_count);
+ tfprintf(fp, tab, "inode_quota %ju\n",
+ (uintmax_t)ipdata->meta.inode_quota);
+ tfprintf(fp, tab, "inode_count %ju\n",
+ (uintmax_t)bref->embed.stats.inode_count);
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ bcount = media_bytes / sizeof(hammer2_blockref_t);
+ for (i = 0; i < bcount; ++i) {
+ bscan = &media->npdata[i];
+ tfprintf(fp, tab, "%3d %016jx %-12s %016jx/%-2d\n",
+ i, (uintmax_t)bscan->data_off,
+ hammer2_breftype_to_str(bscan->type),
+ (uintmax_t)bscan->key,
+ bscan->keybits);
+ }
+ break;
+ case HAMMER2_BREF_TYPE_DIRENT:
+ if (bref->embed.dirent.namlen <= sizeof(bref->check.buf)) {
+ tfprintf(fp, tab, "filename \"%*.*s\"\n",
+ bref->embed.dirent.namlen,
+ bref->embed.dirent.namlen,
+ bref->check.buf);
+ } else {
+ tfprintf(fp, tab, "filename \"%*.*s\"\n",
+ bref->embed.dirent.namlen,
+ bref->embed.dirent.namlen,
+ media->buf);
+ }
+ tfprintf(fp, tab, "inum 0x%016jx\n",
+ (uintmax_t)bref->embed.dirent.inum);
+ tfprintf(fp, tab, "namlen %d\n",
+ (uintmax_t)bref->embed.dirent.namlen);
+ tfprintf(fp, tab, "type %s\n",
+ hammer2_iptype_to_str(bref->embed.dirent.type));
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ bcount = media_bytes / sizeof(hammer2_blockref_t);
+ for (i = 0; i < bcount; ++i) {
+ bscan = &media->npdata[i];
+ tfprintf(fp, tab, "%3d %016jx %-12s %016jx/%-2d\n",
+ i, (uintmax_t)bscan->data_off,
+ hammer2_breftype_to_str(bscan->type),
+ (uintmax_t)bscan->key,
+ bscan->keybits);
+ }
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ for (i = 0; i < HAMMER2_FREEMAP_COUNT; ++i) {
+ hammer2_off_t data_off = bref->key +
+ i * HAMMER2_FREEMAP_LEVEL0_SIZE;
+#if HAMMER2_BMAP_ELEMENTS != 8
+#error "HAMMER2_BMAP_ELEMENTS != 8"
+#endif
+ tfprintf(fp, tab, "%016jx %04d.%04x (avail=%7d) "
+ "%016jx %016jx %016jx %016jx "
+ "%016jx %016jx %016jx %016jx\n",
+ data_off, i, media->bmdata[i].class,
+ media->bmdata[i].avail,
+ media->bmdata[i].bitmapq[0],
+ media->bmdata[i].bitmapq[1],
+ media->bmdata[i].bitmapq[2],
+ media->bmdata[i].bitmapq[3],
+ media->bmdata[i].bitmapq[4],
+ media->bmdata[i].bitmapq[5],
+ media->bmdata[i].bitmapq[6],
+ media->bmdata[i].bitmapq[7]);
+ }
+ break;
+ default:
+ break;
+ }
+ if (str)
+ free(str);
+}
+
+int
+test_hammer2(const char *devpath)
+{
+ bool failed = false;
+
+ hammer2_init_volumes(devpath, 1);
+
+ best_zone = find_best_zone();
+ if (best_zone == -1)
+ fprintf(stderr, "Failed to find best zone\n");
+
+ if (PrintPFS) {
+ if (test_pfs_blockref() == -1)
+ failed = true;
+ goto end; /* print PFS info and exit */
+ }
+
+ printf("volume header\n");
+ if (test_volume_header() == -1) {
+ failed = true;
+ if (!ForceOpt)
+ goto end;
+ }
+
+ printf("freemap\n");
+ if (test_blockref(HAMMER2_BREF_TYPE_FREEMAP) == -1) {
+ failed = true;
+ if (!ForceOpt)
+ goto end;
+ }
+ printf("volume\n");
+ if (!ScanPFS) {
+ if (test_blockref(HAMMER2_BREF_TYPE_VOLUME) == -1) {
+ failed = true;
+ if (!ForceOpt)
+ goto end;
+ }
+ } else {
+ if (test_pfs_blockref() == -1) {
+ failed = true;
+ if (!ForceOpt)
+ goto end;
+ }
+ }
+end:
+ hammer2_cleanup_volumes();
+
+ return failed ? -1 : 0;
+}
diff --git a/sbin/hammer2/Makefile b/sbin/hammer2/Makefile
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/Makefile
@@ -0,0 +1,18 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+PROG= hammer2
+SRCS= cmd_debug.c cmd_pfs.c cmd_stat.c cmd_volume.c main.c ondisk.c \
+ print_inode.c subs.c xxhash.c gsb_crc32.c
+MAN= hammer2.8
+
+.PATH: ${SRCTOP}/sys/fs/hammer2/xxhash ${SRCTOP}/sys/libkern
+
+WARNS?= 3
+
+CFLAGS+= -DXXH_NAMESPACE=h2_
+CFLAGS+= -I${SRCTOP}/sys
+
+LIBADD= md
+
+.include <bsd.prog.mk>
diff --git a/sbin/hammer2/cmd_debug.c b/sbin/hammer2/cmd_debug.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/cmd_debug.c
@@ -0,0 +1,953 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+#include <crypto/sha2/sha256.h>
+
+#define GIG (1024LL*1024*1024)
+
+static int show_all_volume_headers = 0;
+static int show_tab = 2;
+static int show_depth = -1;
+static hammer2_tid_t show_min_mirror_tid = 0;
+static hammer2_tid_t show_min_modify_tid = 0;
+
+static void count_blocks(hammer2_bmap_data_t *bmap, int value,
+ hammer2_off_t *accum16, hammer2_off_t *accum64);
+
+/************************************************************************
+ * SHOW *
+ ************************************************************************/
+
+static void show_volhdr(hammer2_volume_data_t *voldata, int bi);
+static void show_bref(hammer2_volume_data_t *voldata, int tab,
+ int bi, hammer2_blockref_t *bref, int norecurse);
+static void tabprintf(int tab, const char *ctl, ...);
+
+static hammer2_off_t TotalAccum16[4]; /* includes TotalAccum64 */
+static hammer2_off_t TotalAccum64[4];
+static hammer2_off_t TotalUnavail;
+static hammer2_off_t TotalFreemap;
+
+static
+hammer2_off_t
+get_next_volume(hammer2_volume_data_t *voldata, hammer2_off_t volu_loff)
+{
+ hammer2_off_t ret = -1;
+ int i;
+
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ hammer2_off_t tmp = voldata->volu_loff[i];
+ if (tmp > volu_loff) {
+ ret = tmp;
+ break;
+ }
+ }
+ return ret;
+}
+
+int
+cmd_show(const char *devpath, int which)
+{
+ hammer2_blockref_t broot;
+ hammer2_blockref_t best;
+ hammer2_media_data_t media;
+ hammer2_media_data_t best_media;
+ hammer2_off_t off, volu_loff, next_volu_loff = 0;
+ int fd;
+ int i;
+ int best_i;
+ char *env;
+
+ memset(TotalAccum16, 0, sizeof(TotalAccum16));
+ memset(TotalAccum64, 0, sizeof(TotalAccum64));
+ TotalUnavail = TotalFreemap = 0;
+
+ env = getenv("HAMMER2_SHOW_ALL_VOLUME_HEADERS");
+ if (env != NULL) {
+ show_all_volume_headers = (int)strtol(env, NULL, 0);
+ if (errno)
+ show_all_volume_headers = 0;
+ }
+ env = getenv("HAMMER2_SHOW_TAB");
+ if (env != NULL) {
+ show_tab = (int)strtol(env, NULL, 0);
+ if (errno || show_tab < 0 || show_tab > 8)
+ show_tab = 2;
+ }
+ env = getenv("HAMMER2_SHOW_DEPTH");
+ if (env != NULL) {
+ show_depth = (int)strtol(env, NULL, 0);
+ if (errno || show_depth < 0)
+ show_depth = -1;
+ }
+ env = getenv("HAMMER2_SHOW_MIN_MIRROR_TID");
+ if (env != NULL) {
+ show_min_mirror_tid = (hammer2_tid_t)strtoull(env, NULL, 16);
+ if (errno)
+ show_min_mirror_tid = 0;
+ }
+ env = getenv("HAMMER2_SHOW_MIN_MODIFY_TID");
+ if (env != NULL) {
+ show_min_modify_tid = (hammer2_tid_t)strtoull(env, NULL, 16);
+ if (errno)
+ show_min_modify_tid = 0;
+ }
+
+ hammer2_init_volumes(devpath, 1);
+ int all_volume_headers = VerboseOpt >= 3 || show_all_volume_headers;
+next_volume:
+ volu_loff = next_volu_loff;
+ next_volu_loff = -1;
+ printf("%s\n", hammer2_get_volume_path(volu_loff));
+ /*
+ * Show the tree using the best volume header.
+ * -vvv will show the tree for all four volume headers.
+ */
+ best_i = -1;
+ bzero(&best, sizeof(best));
+ bzero(&best_media, sizeof(best_media));
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ bzero(&broot, sizeof(broot));
+ broot.data_off = (i * HAMMER2_ZONE_BYTES64) | HAMMER2_PBUFRADIX;
+ off = broot.data_off & ~HAMMER2_OFF_MASK_RADIX;
+ fd = hammer2_get_volume_fd(volu_loff);
+ lseek(fd, off, SEEK_SET);
+ if (read(fd, &media, HAMMER2_PBUFSIZE) ==
+ (ssize_t)HAMMER2_PBUFSIZE) {
+ broot.mirror_tid = media.voldata.mirror_tid;
+ if (best_i < 0 || best.mirror_tid < broot.mirror_tid) {
+ best_i = i;
+ best = broot;
+ best_media = media;
+ }
+ printf("Volume header %d: mirror_tid=%016jx\n",
+ i, (intmax_t)broot.mirror_tid);
+
+ if (all_volume_headers) {
+ switch(which) {
+ case 0:
+ broot.type = HAMMER2_BREF_TYPE_VOLUME;
+ show_bref(&media.voldata, 0, i, &broot,
+ 0);
+ break;
+ case 1:
+ broot.type = HAMMER2_BREF_TYPE_FREEMAP;
+ show_bref(&media.voldata, 0, i, &broot,
+ 0);
+ break;
+ default:
+ show_volhdr(&media.voldata, i);
+ if (i == 0)
+ next_volu_loff = get_next_volume(&media.voldata, volu_loff);
+ break;
+ }
+ if (i != HAMMER2_NUM_VOLHDRS - 1)
+ printf("\n");
+ }
+ }
+ }
+ if (next_volu_loff != (hammer2_off_t)-1) {
+ printf("---------------------------------------------\n");
+ goto next_volume;
+ }
+
+ if (!all_volume_headers) {
+ switch(which) {
+ case 0:
+ best.type = HAMMER2_BREF_TYPE_VOLUME;
+ show_bref(&best_media.voldata, 0, best_i, &best, 0);
+ break;
+ case 1:
+ best.type = HAMMER2_BREF_TYPE_FREEMAP;
+ show_bref(&best_media.voldata, 0, best_i, &best, 0);
+ break;
+ default:
+ show_volhdr(&best_media.voldata, best_i);
+ next_volu_loff = get_next_volume(&best_media.voldata, volu_loff);
+ if (next_volu_loff != (hammer2_off_t)-1) {
+ printf("---------------------------------------------\n");
+ goto next_volume;
+ }
+ break;
+ }
+ }
+
+ if (which == 1 && VerboseOpt < 3) {
+ printf("Total unallocated storage: %6.3fGiB (%6.3fGiB in 64KB chunks)\n",
+ (double)TotalAccum16[0] / GIG,
+ (double)TotalAccum64[0] / GIG);
+ printf("Total possibly free storage: %6.3fGiB (%6.3fGiB in 64KB chunks)\n",
+ (double)TotalAccum16[2] / GIG,
+ (double)TotalAccum64[2] / GIG);
+ printf("Total allocated storage: %6.3fGiB (%6.3fGiB in 64KB chunks)\n",
+ (double)TotalAccum16[3] / GIG,
+ (double)TotalAccum64[3] / GIG);
+ printf("Total unavailable storage: %6.3fGiB\n",
+ (double)TotalUnavail / GIG);
+ printf("Total freemap storage: %6.3fGiB\n",
+ (double)TotalFreemap / GIG);
+ }
+ hammer2_cleanup_volumes();
+
+ return 0;
+}
+
+static void
+show_volhdr(hammer2_volume_data_t *voldata, int bi)
+{
+ uint32_t i;
+ char *str;
+ const char *name;
+ char *buf;
+ uuid_t uuid;
+
+ printf("\nVolume header %d {\n", bi);
+ printf(" magic 0x%016jx\n", (intmax_t)voldata->magic);
+ printf(" boot_beg 0x%016jx\n", (intmax_t)voldata->boot_beg);
+ printf(" boot_end 0x%016jx (%6.2fMB)\n",
+ (intmax_t)voldata->boot_end,
+ (double)(voldata->boot_end - voldata->boot_beg) /
+ (1024.0*1024.0));
+ printf(" aux_beg 0x%016jx\n", (intmax_t)voldata->aux_beg);
+ printf(" aux_end 0x%016jx (%6.2fMB)\n",
+ (intmax_t)voldata->aux_end,
+ (double)(voldata->aux_end - voldata->aux_beg) /
+ (1024.0*1024.0));
+ printf(" volu_size 0x%016jx (%6.2fGiB)\n",
+ (intmax_t)voldata->volu_size,
+ (double)voldata->volu_size / GIG);
+ printf(" version %d\n", voldata->version);
+ printf(" flags 0x%08x\n", voldata->flags);
+ printf(" copyid %d\n", voldata->copyid);
+ printf(" freemap_vers %d\n", voldata->freemap_version);
+ printf(" peer_type %d\n", voldata->peer_type);
+ printf(" volu_id %d\n", voldata->volu_id);
+ printf(" nvolumes %d\n", voldata->nvolumes);
+
+ str = NULL;
+ uuid = voldata->fsid;
+ hammer2_uuid_to_str(&uuid, &str);
+ printf(" fsid %s\n", str);
+ free(str);
+
+ str = NULL;
+ uuid = voldata->fstype;
+ hammer2_uuid_to_str(&uuid, &str);
+ printf(" fstype %s\n", str);
+ if (!strcmp(str, "5cbb9ad1-862d-11dc-a94d-01301bb8a9f5"))
+ name = "DragonFly HAMMER2";
+ else
+ name = "?";
+ printf(" (%s)\n", name);
+ free(str);
+
+ printf(" allocator_size 0x%016jx (%6.2fGiB)\n",
+ voldata->allocator_size,
+ (double)voldata->allocator_size / GIG);
+ printf(" allocator_free 0x%016jx (%6.2fGiB)\n",
+ voldata->allocator_free,
+ (double)voldata->allocator_free / GIG);
+ printf(" allocator_beg 0x%016jx (%6.2fGiB)\n",
+ voldata->allocator_beg,
+ (double)voldata->allocator_beg / GIG);
+
+ printf(" mirror_tid 0x%016jx\n", voldata->mirror_tid);
+ printf(" reserved0080 0x%016jx\n", voldata->reserved0080);
+ printf(" reserved0088 0x%016jx\n", voldata->reserved0088);
+ printf(" freemap_tid 0x%016jx\n", voldata->freemap_tid);
+ printf(" bulkfree_tid 0x%016jx\n", voldata->bulkfree_tid);
+ for (i = 0; i < nitems(voldata->reserved00A0); ++i) {
+ printf(" reserved00A0/%u 0x%016jx\n",
+ i, voldata->reserved00A0[0]);
+ }
+ printf(" total_size 0x%016jx\n", voldata->total_size);
+
+ printf(" copyexists ");
+ for (i = 0; i < nitems(voldata->copyexists); ++i)
+ printf(" 0x%02x", voldata->copyexists[i]);
+ printf("\n");
+
+ /*
+ * NOTE: Index numbers and ICRC_SECTn definitions are not matched,
+ * the ICRC for sector 0 actually uses the last index, for
+ * example.
+ *
+ * NOTE: The whole voldata CRC does not have to match critically
+ * as certain sub-areas of the volume header have their own
+ * CRCs.
+ */
+ printf("\n");
+ for (i = 0; i < nitems(voldata->icrc_sects); ++i) {
+ printf(" icrc_sects[%u] ", i);
+ switch(i) {
+ case HAMMER2_VOL_ICRC_SECT0:
+ printf("0x%08x/0x%08x",
+ hammer2_icrc32((char *)voldata +
+ HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE),
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0]);
+ if (hammer2_icrc32((char *)voldata +
+ HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE) ==
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0]) {
+ printf(" (OK)");
+ } else {
+ printf(" (FAILED)");
+ }
+ break;
+ case HAMMER2_VOL_ICRC_SECT1:
+ printf("0x%08x/0x%08x",
+ hammer2_icrc32((char *)voldata +
+ HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE),
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1]);
+ if (hammer2_icrc32((char *)voldata +
+ HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE) ==
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1]) {
+ printf(" (OK)");
+ } else {
+ printf(" (FAILED)");
+ }
+
+ break;
+ default:
+ printf("0x%08x (reserved)", voldata->icrc_sects[i]);
+ break;
+ }
+ printf("\n");
+ }
+ printf(" icrc_volhdr 0x%08x/0x%08x",
+ hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE),
+ voldata->icrc_volheader);
+ if (hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE) ==
+ voldata->icrc_volheader) {
+ printf(" (OK)\n");
+ } else {
+ printf(" (FAILED - not a critical error)\n");
+ }
+
+ /*
+ * The super-root and freemap blocksets (not recursed)
+ */
+ printf("\n");
+ printf(" sroot_blockset {\n");
+ for (i = 0; i < HAMMER2_SET_COUNT; ++i) {
+ show_bref(voldata, 16, i,
+ &voldata->sroot_blockset.blockref[i], 2);
+ }
+ printf(" }\n");
+
+ printf(" freemap_blockset {\n");
+ for (i = 0; i < HAMMER2_SET_COUNT; ++i) {
+ show_bref(voldata, 16, i,
+ &voldata->freemap_blockset.blockref[i], 2);
+ }
+ printf(" }\n");
+
+ buf = calloc(1, sizeof(voldata->volu_loff));
+ if (bcmp(buf, voldata->volu_loff, sizeof(voldata->volu_loff))) {
+ printf("\n");
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ hammer2_off_t loff = voldata->volu_loff[i];
+ if (loff != (hammer2_off_t)-1)
+ printf(" volu_loff[%d] 0x%016jx\n", i, loff);
+ }
+ }
+ free(buf);
+
+ printf("}\n");
+}
+
+static void
+show_bref(hammer2_volume_data_t *voldata, int tab, int bi,
+ hammer2_blockref_t *bref, int norecurse)
+{
+ hammer2_media_data_t media;
+ hammer2_blockref_t *bscan;
+ hammer2_off_t tmp;
+ int i, bcount, namelen, failed, obrace, fd;
+ int type_pad;
+ size_t bytes;
+ const char *type_str;
+ char *str = NULL;
+ uint32_t cv;
+ uint64_t cv64;
+ static int init_tab = -1;
+ uuid_t uuid;
+
+ SHA256_CTX hash_ctx;
+ union {
+ uint8_t digest[SHA256_DIGEST_LENGTH];
+ uint64_t digest64[SHA256_DIGEST_LENGTH/8];
+ } u;
+
+ /* omit if smaller than mininum mirror_tid threshold */
+ if (bref->mirror_tid < show_min_mirror_tid)
+ return;
+ /* omit if smaller than mininum modify_tid threshold */
+ if (bref->modify_tid < show_min_modify_tid) {
+ if (bref->modify_tid)
+ return;
+ else if (bref->type == HAMMER2_BREF_TYPE_INODE && !bref->leaf_count)
+ return;
+ }
+
+ if (init_tab == -1)
+ init_tab = tab;
+
+ bytes = (bref->data_off & HAMMER2_OFF_MASK_RADIX);
+ if (bytes)
+ bytes = (size_t)1 << bytes;
+ if (bytes) {
+ hammer2_off_t io_off;
+ hammer2_off_t io_base;
+ size_t io_bytes;
+ size_t boff;
+
+ io_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+ io_base = io_off & ~(hammer2_off_t)(HAMMER2_LBUFSIZE - 1);
+ boff = io_off - io_base;
+
+ io_bytes = HAMMER2_LBUFSIZE;
+ while (io_bytes + boff < bytes)
+ io_bytes <<= 1;
+
+ if (io_bytes > sizeof(media)) {
+ printf("(bad block size %zu)\n", bytes);
+ return;
+ }
+ if (bref->type != HAMMER2_BREF_TYPE_DATA || VerboseOpt >= 1) {
+ fd = hammer2_get_volume_fd(io_off);
+ lseek(fd, io_base - hammer2_get_volume_offset(io_base),
+ SEEK_SET);
+ if (read(fd, &media, io_bytes) != (ssize_t)io_bytes) {
+ printf("(media read failed)\n");
+ return;
+ }
+ if (boff)
+ bcopy((char *)&media + boff, &media, bytes);
+ }
+ }
+
+ bscan = NULL;
+ bcount = 0;
+ namelen = 0;
+ failed = 0;
+ obrace = 1;
+
+ type_str = hammer2_breftype_to_str(bref->type);
+ type_pad = 8 - strlen(type_str);
+ if (type_pad < 0)
+ type_pad = 0;
+
+ switch(bref->type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ assert(bytes);
+ if (!(media.ipdata.meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) {
+ bscan = &media.ipdata.u.blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ }
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ assert(bytes);
+ bscan = &media.npdata[0];
+ bcount = bytes / sizeof(hammer2_blockref_t);
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ bscan = &media.voldata.sroot_blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ bscan = &media.voldata.freemap_blockset.blockref[0];
+ bcount = HAMMER2_SET_COUNT;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ assert(bytes);
+ bscan = &media.npdata[0];
+ bcount = bytes / sizeof(hammer2_blockref_t);
+ break;
+ }
+
+ if (QuietOpt > 0) {
+ tabprintf(tab,
+ "%s.%-3d %016jx %016jx/%-2d "
+ "vol=%d mir=%016jx mod=%016jx leafcnt=%d ",
+ type_str, bi, (intmax_t)bref->data_off,
+ (intmax_t)bref->key, (intmax_t)bref->keybits,
+ hammer2_get_volume_id(bref->data_off),
+ (intmax_t)bref->mirror_tid,
+ (intmax_t)bref->modify_tid,
+ bref->leaf_count);
+ } else {
+ tabprintf(tab, "%s.%-3d%*.*s %016jx %016jx/%-2d ",
+ type_str, bi, type_pad, type_pad, "",
+ (intmax_t)bref->data_off,
+ (intmax_t)bref->key, (intmax_t)bref->keybits);
+ /*if (norecurse > 1)*/ {
+ printf("\n");
+ tabprintf(tab + 13, "");
+ }
+ printf("vol=%d mir=%016jx mod=%016jx lfcnt=%d ",
+ hammer2_get_volume_id(bref->data_off),
+ (intmax_t)bref->mirror_tid, (intmax_t)bref->modify_tid,
+ bref->leaf_count);
+ if (/*norecurse > 1 && */ (bcount || bref->flags ||
+ bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
+ bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF)) {
+ printf("\n");
+ tabprintf(tab + 13, "");
+ }
+ }
+
+ if (bcount)
+ printf("bcnt=%d ", bcount);
+ if (bref->flags)
+ printf("flags=%02x ", bref->flags);
+ if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
+ bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) {
+ printf("bigmask=%08x avail=%ju ",
+ bref->check.freemap.bigmask,
+ (uintmax_t)bref->check.freemap.avail);
+ }
+
+ /*
+ * Check data integrity in verbose mode, otherwise we are just doing
+ * a quick meta-data scan. Meta-data integrity is always checked.
+ * (Also see the check above that ensures the media data is loaded,
+ * otherwise there's no data to check!).
+ *
+ * WARNING! bref->check state may be used for other things when
+ * bref has no data (bytes == 0).
+ */
+ if (bytes &&
+ (bref->type != HAMMER2_BREF_TYPE_DATA || VerboseOpt >= 1)) {
+ if (!(QuietOpt > 0)) {
+ /*if (norecurse > 1)*/ {
+ printf("\n");
+ tabprintf(tab + 13, "");
+ }
+ }
+
+ switch(HAMMER2_DEC_CHECK(bref->methods)) {
+ case HAMMER2_CHECK_NONE:
+ printf("meth=%02x ", bref->methods);
+ break;
+ case HAMMER2_CHECK_DISABLED:
+ printf("meth=%02x ", bref->methods);
+ break;
+ case HAMMER2_CHECK_ISCSI32:
+ cv = hammer2_icrc32(&media, bytes);
+ if (bref->check.iscsi32.value != cv) {
+ printf("(icrc %02x:%08x/%08x failed) ",
+ bref->methods,
+ bref->check.iscsi32.value,
+ cv);
+ failed = 1;
+ } else {
+ printf("meth=%02x iscsi32=%08x ",
+ bref->methods, cv);
+ }
+ break;
+ case HAMMER2_CHECK_XXHASH64:
+ cv64 = XXH64(&media, bytes, XXH_HAMMER2_SEED);
+ if (bref->check.xxhash64.value != cv64) {
+ printf("(xxhash64 %02x:%016jx/%016jx failed) ",
+ bref->methods,
+ bref->check.xxhash64.value,
+ cv64);
+ failed = 1;
+ } else {
+ printf("meth=%02x xxh=%016jx ",
+ bref->methods, cv64);
+ }
+ break;
+ case HAMMER2_CHECK_SHA192:
+ SHA256_Init(&hash_ctx);
+ SHA256_Update(&hash_ctx, &media, bytes);
+ SHA256_Final(u.digest, &hash_ctx);
+ u.digest64[2] ^= u.digest64[3];
+ if (memcmp(u.digest, bref->check.sha192.data,
+ sizeof(bref->check.sha192.data))) {
+ printf("(sha192 failed) ");
+ failed = 1;
+ } else {
+ printf("meth=%02x ", bref->methods);
+ }
+ break;
+ case HAMMER2_CHECK_FREEMAP:
+ cv = hammer2_icrc32(&media, bytes);
+ if (bref->check.freemap.icrc32 != cv) {
+ printf("(fcrc %02x:%08x/%08x failed) ",
+ bref->methods,
+ bref->check.freemap.icrc32,
+ cv);
+ failed = 1;
+ } else {
+ printf("meth=%02x fcrc=%08x ",
+ bref->methods, cv);
+ }
+ break;
+ }
+ }
+
+ tab += show_tab;
+
+ if (QuietOpt > 0) {
+ obrace = 0;
+ printf("\n");
+ goto skip_data;
+ }
+
+ switch(bref->type) {
+ case HAMMER2_BREF_TYPE_EMPTY:
+ if (norecurse)
+ printf("\n");
+ obrace = 0;
+ break;
+ case HAMMER2_BREF_TYPE_DIRENT:
+ printf("{\n");
+ if (bref->embed.dirent.namlen <= sizeof(bref->check.buf)) {
+ tabprintf(tab, "filename \"%*.*s\"\n",
+ bref->embed.dirent.namlen,
+ bref->embed.dirent.namlen,
+ bref->check.buf);
+ } else {
+ tabprintf(tab, "filename \"%*.*s\"\n",
+ bref->embed.dirent.namlen,
+ bref->embed.dirent.namlen,
+ media.buf);
+ }
+ tabprintf(tab, "inum 0x%016jx\n",
+ (uintmax_t)bref->embed.dirent.inum);
+ tabprintf(tab, "nlen %d\n", bref->embed.dirent.namlen);
+ tabprintf(tab, "type %s\n",
+ hammer2_iptype_to_str(bref->embed.dirent.type));
+ break;
+ case HAMMER2_BREF_TYPE_INODE:
+ printf("{\n");
+ namelen = media.ipdata.meta.name_len;
+ if (namelen > HAMMER2_INODE_MAXNAME)
+ namelen = 0;
+ tabprintf(tab, "filename \"%*.*s\"\n",
+ namelen, namelen, media.ipdata.filename);
+ tabprintf(tab, "version %d\n", media.ipdata.meta.version);
+ if ((media.ipdata.meta.op_flags & HAMMER2_OPFLAG_PFSROOT) ||
+ media.ipdata.meta.pfs_type == HAMMER2_PFSTYPE_SUPROOT) {
+ tabprintf(tab, "pfs_st %d (%s)\n",
+ media.ipdata.meta.pfs_subtype,
+ hammer2_pfssubtype_to_str(media.ipdata.meta.pfs_subtype));
+ }
+ tabprintf(tab, "uflags 0x%08x\n",
+ media.ipdata.meta.uflags);
+ if (media.ipdata.meta.rmajor || media.ipdata.meta.rminor) {
+ tabprintf(tab, "rmajor %d\n",
+ media.ipdata.meta.rmajor);
+ tabprintf(tab, "rminor %d\n",
+ media.ipdata.meta.rminor);
+ }
+ tabprintf(tab, "ctime %s\n",
+ hammer2_time64_to_str(media.ipdata.meta.ctime, &str));
+ tabprintf(tab, "mtime %s\n",
+ hammer2_time64_to_str(media.ipdata.meta.mtime, &str));
+ tabprintf(tab, "atime %s\n",
+ hammer2_time64_to_str(media.ipdata.meta.atime, &str));
+ tabprintf(tab, "btime %s\n",
+ hammer2_time64_to_str(media.ipdata.meta.btime, &str));
+ uuid = media.ipdata.meta.uid;
+ tabprintf(tab, "uid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ uuid = media.ipdata.meta.gid;
+ tabprintf(tab, "gid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ tabprintf(tab, "type %s\n",
+ hammer2_iptype_to_str(media.ipdata.meta.type));
+ tabprintf(tab, "opflgs 0x%02x\n",
+ media.ipdata.meta.op_flags);
+ tabprintf(tab, "capflgs 0x%04x\n",
+ media.ipdata.meta.cap_flags);
+ tabprintf(tab, "mode %-7o\n",
+ media.ipdata.meta.mode);
+ tabprintf(tab, "inum 0x%016jx\n",
+ media.ipdata.meta.inum);
+ tabprintf(tab, "size %ju ",
+ (uintmax_t)media.ipdata.meta.size);
+ if (media.ipdata.meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA &&
+ media.ipdata.meta.size <= HAMMER2_EMBEDDED_BYTES)
+ printf("(embedded data)\n");
+ else
+ printf("\n");
+ tabprintf(tab, "nlinks %ju\n",
+ (uintmax_t)media.ipdata.meta.nlinks);
+ tabprintf(tab, "iparent 0x%016jx\n",
+ (uintmax_t)media.ipdata.meta.iparent);
+ tabprintf(tab, "name_key 0x%016jx\n",
+ (uintmax_t)media.ipdata.meta.name_key);
+ tabprintf(tab, "name_len %u\n",
+ media.ipdata.meta.name_len);
+ tabprintf(tab, "ncopies %u\n",
+ media.ipdata.meta.ncopies);
+ tabprintf(tab, "compalg %u\n",
+ media.ipdata.meta.comp_algo);
+ tabprintf(tab, "target_t %u\n",
+ media.ipdata.meta.target_type);
+ tabprintf(tab, "checkalg %u\n",
+ media.ipdata.meta.check_algo);
+ if ((media.ipdata.meta.op_flags & HAMMER2_OPFLAG_PFSROOT) ||
+ media.ipdata.meta.pfs_type == HAMMER2_PFSTYPE_SUPROOT) {
+ tabprintf(tab, "pfs_nmas %u\n",
+ media.ipdata.meta.pfs_nmasters);
+ tabprintf(tab, "pfs_type %u (%s)\n",
+ media.ipdata.meta.pfs_type,
+ hammer2_pfstype_to_str(media.ipdata.meta.pfs_type));
+ tabprintf(tab, "pfs_inum 0x%016jx\n",
+ (uintmax_t)media.ipdata.meta.pfs_inum);
+ uuid = media.ipdata.meta.pfs_clid;
+ tabprintf(tab, "pfs_clid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ uuid = media.ipdata.meta.pfs_fsid;
+ tabprintf(tab, "pfs_fsid %s\n",
+ hammer2_uuid_to_str(&uuid, &str));
+ tabprintf(tab, "pfs_lsnap_tid 0x%016jx\n",
+ (uintmax_t)media.ipdata.meta.pfs_lsnap_tid);
+ }
+ tabprintf(tab, "data_quota %ju\n",
+ (uintmax_t)media.ipdata.meta.data_quota);
+ tabprintf(tab, "data_count %ju\n",
+ (uintmax_t)bref->embed.stats.data_count);
+ tabprintf(tab, "inode_quota %ju\n",
+ (uintmax_t)media.ipdata.meta.inode_quota);
+ tabprintf(tab, "inode_count %ju\n",
+ (uintmax_t)bref->embed.stats.inode_count);
+ break;
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ printf("{\n");
+ break;
+ case HAMMER2_BREF_TYPE_DATA:
+ printf("\n");
+ obrace = 0;
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ printf("mirror_tid=%016jx freemap_tid=%016jx ",
+ media.voldata.mirror_tid,
+ media.voldata.freemap_tid);
+ printf("{\n");
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ printf("mirror_tid=%016jx freemap_tid=%016jx ",
+ media.voldata.mirror_tid,
+ media.voldata.freemap_tid);
+ printf("{\n");
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ printf("{\n");
+ tmp = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+ tmp &= HAMMER2_SEGMASK;
+ tmp /= HAMMER2_PBUFSIZE;
+ assert(tmp >= HAMMER2_ZONE_FREEMAP_00);
+ assert(tmp < HAMMER2_ZONE_FREEMAP_END);
+ tmp -= HAMMER2_ZONE_FREEMAP_00;
+ tmp /= HAMMER2_ZONE_FREEMAP_INC;
+ tabprintf(tab, "rotation=%d\n", (int)tmp);
+
+ for (i = 0; i < HAMMER2_FREEMAP_COUNT; ++i) {
+ hammer2_off_t data_off = bref->key +
+ i * HAMMER2_FREEMAP_LEVEL0_SIZE;
+#if HAMMER2_BMAP_ELEMENTS != 8
+#error "cmd_debug.c: HAMMER2_BMAP_ELEMENTS expected to be 8"
+#endif
+ tabprintf(tab + 4, "%016jx %04d.%04x linear=%06x avail=%06x "
+ "%016jx %016jx %016jx %016jx "
+ "%016jx %016jx %016jx %016jx\n",
+ data_off, i, media.bmdata[i].class,
+ media.bmdata[i].linear,
+ media.bmdata[i].avail,
+ media.bmdata[i].bitmapq[0],
+ media.bmdata[i].bitmapq[1],
+ media.bmdata[i].bitmapq[2],
+ media.bmdata[i].bitmapq[3],
+ media.bmdata[i].bitmapq[4],
+ media.bmdata[i].bitmapq[5],
+ media.bmdata[i].bitmapq[6],
+ media.bmdata[i].bitmapq[7]);
+ }
+ tabprintf(tab, "}\n");
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ printf("{\n");
+ tmp = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+ tmp &= HAMMER2_SEGMASK;
+ tmp /= HAMMER2_PBUFSIZE;
+ assert(tmp >= HAMMER2_ZONE_FREEMAP_00);
+ assert(tmp < HAMMER2_ZONE_FREEMAP_END);
+ tmp -= HAMMER2_ZONE_FREEMAP_00;
+ tmp /= HAMMER2_ZONE_FREEMAP_INC;
+ tabprintf(tab, "rotation=%d\n", (int)tmp);
+ break;
+ default:
+ printf("\n");
+ obrace = 0;
+ break;
+ }
+ if (str)
+ free(str);
+
+skip_data:
+ /*
+ * Update statistics.
+ */
+ switch(bref->type) {
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ for (i = 0; i < HAMMER2_FREEMAP_COUNT; ++i) {
+ hammer2_off_t data_off = bref->key +
+ i * HAMMER2_FREEMAP_LEVEL0_SIZE;
+ if (data_off >= voldata->aux_end &&
+ data_off < hammer2_get_total_size()) {
+ int j;
+ for (j = 0; j < 4; ++j)
+ count_blocks(&media.bmdata[i], j,
+ &TotalAccum16[j],
+ &TotalAccum64[j]);
+ } else
+ TotalUnavail += HAMMER2_FREEMAP_LEVEL0_SIZE;
+ }
+ TotalFreemap += HAMMER2_FREEMAP_LEVEL1_SIZE;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Recurse if norecurse == 0. If the CRC failed, pass norecurse = 1.
+ * That is, if an indirect or inode fails we still try to list its
+ * direct children to help with debugging, but go no further than
+ * that because they are probably garbage.
+ */
+ if (show_depth == -1 || ((tab - init_tab) / show_tab) < show_depth) {
+ for (i = 0; norecurse == 0 && i < bcount; ++i) {
+ if (bscan[i].type != HAMMER2_BREF_TYPE_EMPTY) {
+ show_bref(voldata, tab, i, &bscan[i],
+ failed);
+ }
+ }
+ }
+ tab -= show_tab;
+ if (obrace) {
+ if (bref->type == HAMMER2_BREF_TYPE_INODE)
+ tabprintf(tab, "} (%s.%d, \"%*.*s\")\n",
+ type_str, bi, namelen, namelen,
+ media.ipdata.filename);
+ else
+ tabprintf(tab, "} (%s.%d)\n", type_str, bi);
+ }
+}
+
+static
+void
+count_blocks(hammer2_bmap_data_t *bmap, int value,
+ hammer2_off_t *accum16, hammer2_off_t *accum64)
+{
+ int i, j, bits;
+ hammer2_bitmap_t value16, value64;
+
+ bits = (int)sizeof(hammer2_bitmap_t) * 8;
+ assert(bits == 64);
+
+ value16 = value;
+ assert(value16 < 4);
+ value64 = (value16 << 6) | (value16 << 4) | (value16 << 2) | value16;
+ assert(value64 < 256);
+
+ for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) {
+ hammer2_bitmap_t bm = bmap->bitmapq[i];
+ hammer2_bitmap_t bm_save = bm;
+ hammer2_bitmap_t mask;
+
+ mask = 0x03; /* 2 bits per 16KB */
+ for (j = 0; j < bits; j += 2) {
+ if ((bm & mask) == value16)
+ *accum16 += 16384;
+ bm >>= 2;
+ }
+
+ bm = bm_save;
+ mask = 0xFF; /* 8 bits per 64KB chunk */
+ for (j = 0; j < bits; j += 8) {
+ if ((bm & mask) == value64)
+ *accum64 += 65536;
+ bm >>= 8;
+ }
+ }
+}
+
+int
+cmd_dumpchain(const char *path, u_int flags)
+{
+ int dummy = (int)flags;
+ int ecode = 0;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ if (ioctl(fd, HAMMER2IOC_DEBUG_DUMP, &dummy) < 0) {
+ fprintf(stderr, "%s: %s\n", path, strerror(errno));
+ ecode = 1;
+ }
+ close(fd);
+ } else {
+ fprintf(stderr, "unable to open %s\n", path);
+ ecode = 1;
+ }
+ return ecode;
+}
+
+static
+void
+tabprintf(int tab, const char *ctl, ...)
+{
+ va_list va;
+
+ printf("%*.*s", tab, tab, "");
+ va_start(va, ctl);
+ vprintf(ctl, va);
+ va_end(va);
+}
diff --git a/sbin/hammer2/cmd_pfs.c b/sbin/hammer2/cmd_pfs.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/cmd_pfs.c
@@ -0,0 +1,152 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+struct pfs_entry {
+ TAILQ_ENTRY(pfs_entry) entry;
+ char name[NAME_MAX+1];
+ char s[NAME_MAX+1];
+};
+
+int
+cmd_pfs_list(int ac, char **av)
+{
+ hammer2_ioc_pfs_t pfs;
+ int ecode = 0;
+ int fd;
+ int i;
+ int all = 0;
+ char *pfs_id_str = NULL;
+ const char *type_str;
+ TAILQ_HEAD(, pfs_entry) head;
+ struct pfs_entry *p, *e;
+
+ if (ac == 1 && av[0] == NULL) {
+ av = get_hammer2_mounts(&ac);
+ all = 1;
+ }
+
+ for (i = 0; i < ac; ++i) {
+ if ((fd = hammer2_ioctl_handle(av[i])) < 0)
+ return(1);
+ bzero(&pfs, sizeof(pfs));
+ TAILQ_INIT(&head);
+ if (i)
+ printf("\n");
+
+ while ((pfs.name_key = pfs.name_next) != (hammer2_key_t)-1) {
+ if (ioctl(fd, HAMMER2IOC_PFS_GET, &pfs) < 0) {
+ perror("ioctl");
+ ecode = 1;
+ break;
+ }
+ hammer2_uuid_to_str(&pfs.pfs_clid, &pfs_id_str);
+ if (pfs.pfs_type == HAMMER2_PFSTYPE_MASTER) {
+ if (pfs.pfs_subtype == HAMMER2_PFSSUBTYPE_NONE)
+ type_str = "MASTER";
+ else
+ type_str = hammer2_pfssubtype_to_str(
+ pfs.pfs_subtype);
+ } else {
+ type_str = hammer2_pfstype_to_str(pfs.pfs_type);
+ }
+ e = calloc(1, sizeof(*e));
+ snprintf(e->name, sizeof(e->name), "%s", pfs.name);
+ snprintf(e->s, sizeof(e->s), "%-11s %s",
+ type_str, pfs_id_str);
+ free(pfs_id_str);
+ pfs_id_str = NULL;
+
+ p = TAILQ_FIRST(&head);
+ while (p) {
+ if (strcmp(e->name, p->name) <= 0) {
+ TAILQ_INSERT_BEFORE(p, e, entry);
+ break;
+ }
+ p = TAILQ_NEXT(p, entry);
+ }
+ if (!p)
+ TAILQ_INSERT_TAIL(&head, e, entry);
+ }
+ close(fd);
+
+ printf("Type "
+ "ClusterId (pfs_clid) "
+ "Label on %s\n", av[i]);
+ while ((p = TAILQ_FIRST(&head)) != NULL) {
+ printf("%s %s\n", p->s, p->name);
+ TAILQ_REMOVE(&head, p, entry);
+ free(p);
+ }
+ }
+
+ if (all)
+ put_hammer2_mounts(ac, av);
+
+ return (ecode);
+}
+
+int
+cmd_pfs_getid(const char *sel_path, const char *name, int privateid)
+{
+ hammer2_ioc_pfs_t pfs;
+ int ecode = 0;
+ int fd;
+ char *pfs_id_str = NULL;
+
+ if ((fd = hammer2_ioctl_handle(sel_path)) < 0)
+ return(1);
+ bzero(&pfs, sizeof(pfs));
+
+ snprintf(pfs.name, sizeof(pfs.name), "%s", name);
+ if (ioctl(fd, HAMMER2IOC_PFS_LOOKUP, &pfs) < 0) {
+ perror("ioctl");
+ ecode = 1;
+ } else {
+ if (privateid)
+ hammer2_uuid_to_str(&pfs.pfs_fsid, &pfs_id_str);
+ else
+ hammer2_uuid_to_str(&pfs.pfs_clid, &pfs_id_str);
+ printf("%s\n", pfs_id_str);
+ free(pfs_id_str);
+ pfs_id_str = NULL;
+ }
+ close(fd);
+ return (ecode);
+}
diff --git a/sbin/hammer2/cmd_stat.c b/sbin/hammer2/cmd_stat.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/cmd_stat.c
@@ -0,0 +1,151 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+static const char *compmodestr(uint8_t comp_algo);
+static const char *checkmodestr(uint8_t comp_algo);
+
+/*
+ * Should be run as root. Creates /etc/hammer2/rsa.{pub,prv} using
+ * an openssl command.
+ */
+int
+cmd_stat(int ac, const char **av)
+{
+ hammer2_ioc_inode_t ino;
+ const char *cdir = ".";
+ int ecode = 0;
+ int w;
+ int i;
+ int fd;
+
+ if (ac == 0) {
+ ac = 1;
+ av = &cdir;
+ }
+ for (i = w = 0; i < ac; ++i) {
+ if (w < (int)strlen(av[i]))
+ w = (int)strlen(av[i]);
+ }
+ if (w < 16)
+ w = 16;
+ printf("%-*.*s ncp data-use inode-use comp check quota\n",
+ w, w, "PATH");
+ for (i = 0; i < ac; ++i) {
+ if ((fd = open(av[i], O_RDONLY)) < 0) {
+ fprintf(stderr, "%s: %s\n", av[i], strerror(errno));
+ ecode = 1;
+ continue;
+ }
+ if (ioctl(fd, HAMMER2IOC_INODE_GET, &ino) < 0) {
+ fprintf(stderr, "%s: %s\n", av[i], strerror(errno));
+ ecode = 1;
+ continue;
+ }
+ printf("%-*.*s ", w, w, av[i]);
+ printf("%3d ", ino.ip_data.meta.ncopies);
+ printf("%9s ", sizetostr(ino.data_count));
+ printf("%9s ", counttostr(ino.inode_count));
+ printf("%-18s ", compmodestr(ino.ip_data.meta.comp_algo));
+ printf("%-12s ", checkmodestr(ino.ip_data.meta.check_algo));
+ if (ino.ip_data.meta.data_quota ||
+ ino.ip_data.meta.inode_quota) {
+ printf("%s",
+ sizetostr(ino.ip_data.meta.data_quota));
+ printf("/%-12s",
+ counttostr(ino.ip_data.meta.inode_quota));
+ }
+ printf("\n");
+ }
+ return ecode;
+}
+
+static
+const char *
+compmodestr(uint8_t comp_algo)
+{
+ static char buf[64];
+ static const char *comps[] = HAMMER2_COMP_STRINGS;
+ int comp = HAMMER2_DEC_ALGO(comp_algo);
+ int level = HAMMER2_DEC_LEVEL(comp_algo);
+
+ if (level) {
+ if (comp >= 0 && comp < HAMMER2_COMP_STRINGS_COUNT)
+ snprintf(buf, sizeof(buf), "%s:%d",
+ comps[comp], level);
+ else
+ snprintf(buf, sizeof(buf), "unknown(%d):%d",
+ comp, level);
+ } else {
+ if (comp >= 0 && comp < HAMMER2_COMP_STRINGS_COUNT)
+ snprintf(buf, sizeof(buf), "%s:default",
+ comps[comp]);
+ else
+ snprintf(buf, sizeof(buf), "unknown(%d):default",
+ comp);
+ }
+ return (buf);
+}
+
+static
+const char *
+checkmodestr(uint8_t check_algo)
+{
+ static char buf[64];
+ static const char *checks[] = HAMMER2_CHECK_STRINGS;
+ int check = HAMMER2_DEC_ALGO(check_algo);
+ int level = HAMMER2_DEC_LEVEL(check_algo);
+
+ /*
+ * NOTE: Check algorithms normally do not encode any level.
+ */
+ if (level) {
+ if (check >= 0 && check < HAMMER2_CHECK_STRINGS_COUNT)
+ snprintf(buf, sizeof(buf), "%s:%d",
+ checks[check], level);
+ else
+ snprintf(buf, sizeof(buf), "unknown(%d):%d",
+ check, level);
+ } else {
+ if (check >= 0 && check < HAMMER2_CHECK_STRINGS_COUNT)
+ snprintf(buf, sizeof(buf), "%s", checks[check]);
+ else
+ snprintf(buf, sizeof(buf), "unknown(%d)", check);
+ }
+ return (buf);
+}
diff --git a/sbin/hammer2/cmd_volume.c b/sbin/hammer2/cmd_volume.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/cmd_volume.c
@@ -0,0 +1,108 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2020 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2020 The DragonFly Project
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+int
+cmd_volume_list(int ac, char **av)
+{
+ hammer2_ioc_volume_list_t vollist;
+ hammer2_ioc_volume_t *entry;
+ int fd, i, j, n, w, all = 0, ecode = 0;
+
+ if (ac == 1 && av[0] == NULL) {
+ av = get_hammer2_mounts(&ac);
+ all = 1;
+ }
+ vollist.volumes = calloc(HAMMER2_MAX_VOLUMES, sizeof(*vollist.volumes));
+
+ for (i = 0; i < ac; ++i) {
+ if (i)
+ printf("\n");
+ if (ac > 1 || all)
+ printf("%s\n", av[i]);
+ if ((fd = hammer2_ioctl_handle(av[i])) < 0) {
+ ecode = 1;
+ goto failed;
+ }
+
+ vollist.nvolumes = HAMMER2_MAX_VOLUMES;
+ if (ioctl(fd, HAMMER2IOC_VOLUME_LIST, &vollist) < 0) {
+ perror("ioctl");
+ close(fd);
+ ecode = 1;
+ goto failed;
+ }
+
+ w = 0;
+ for (j = 0; j < vollist.nvolumes; ++j) {
+ entry = &vollist.volumes[j];
+ n = (int)strlen(entry->path);
+ if (n > w)
+ w = n;
+ }
+
+ if (QuietOpt > 0) {
+ for (j = 0; j < vollist.nvolumes; ++j) {
+ entry = &vollist.volumes[j];
+ printf("%s\n", entry->path);
+ }
+ } else {
+ printf("version %d\n", vollist.version);
+ printf("@%s\n", vollist.pfs_name);
+ for (j = 0; j < vollist.nvolumes; ++j) {
+ entry = &vollist.volumes[j];
+ printf("volume%-2d %-*.*s %s",
+ entry->id, w, w, entry->path,
+ sizetostr(entry->size));
+ if (VerboseOpt > 0)
+ printf(" 0x%016jx 0x%016jx",
+ (intmax_t)entry->offset,
+ (intmax_t)entry->size);
+ printf("\n");
+ }
+ }
+ close(fd);
+ }
+failed:
+ free(vollist.volumes);
+ if (all)
+ put_hammer2_mounts(ac, av);
+
+ return (ecode);
+}
diff --git a/sbin/hammer2/hammer2.h b/sbin/hammer2/hammer2.h
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/hammer2.h
@@ -0,0 +1,100 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef HAMMER2_HAMMER2_H_
+#define HAMMER2_HAMMER2_H_
+
+/*
+ * Rollup headers for hammer2 utility
+ */
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/disk.h>
+#include <dirent.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+#include <fs/hammer2/hammer2_mount.h>
+#include <fs/hammer2/hammer2_ioctl.h>
+#include <fs/hammer2/hammer2_xxhash.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <uuid.h>
+#include <assert.h>
+
+#include "hammer2_subs.h"
+
+/* user-specifiable check modes only */
+#define HAMMER2_CHECK_STRINGS { "none", "disabled", "crc32", \
+ "xxhash64", "sha192" }
+#define HAMMER2_CHECK_STRINGS_COUNT 5
+
+#define HAMMER2_COMP_STRINGS { "none", "autozero", "lz4", "zlib" }
+#define HAMMER2_COMP_STRINGS_COUNT 4
+
+extern int VerboseOpt;
+extern int QuietOpt;
+
+/*
+ * Hammer2 command APIs
+ */
+int cmd_pfs_getid(const char *sel_path, const char *name, int privateid);
+int cmd_pfs_list(int ac, char **av);
+int cmd_info(int ac, const char **av);
+int cmd_mountall(int ac, const char **av);
+int cmd_stat(int ac, const char **av);
+int cmd_dumpchain(const char *path, u_int flags);
+int cmd_show(const char *devpath, int which);
+int cmd_volume_list(int ac, char **av);
+
+void print_inode(const char *path);
+
+#endif /* !HAMMER2_HAMMER2_H_ */
diff --git a/sbin/hammer2/hammer2.8 b/sbin/hammer2/hammer2.8
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/hammer2.8
@@ -0,0 +1,137 @@
+.\" Copyright (c) 2015-2019 The DragonFly Project. All rights reserved.
+.\"
+.\" This code is derived from software contributed to The DragonFly Project
+.\" by Matthew Dillon <dillon@backplane.com>
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in
+.\" the documentation and/or other materials provided with the
+.\" distribution.
+.\" 3. Neither the name of The DragonFly Project nor the names of its
+.\" contributors may be used to endorse or promote products derived
+.\" from this software without specific, prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd September 18, 2022
+.Dt HAMMER2 8
+.Os
+.Sh NAME
+.Nm hammer2
+.Nd hammer2 file system utility
+.Sh SYNOPSIS
+.Nm
+.Fl h
+.Nm
+.Op Fl s Ar path
+.Ar command
+.Op Ar argument ...
+.Sh DESCRIPTION
+The
+.Nm
+utility provides miscellaneous support functions for a
+read-only HAMMER2 file system.
+.Pp
+The options are as follows:
+.Bl -tag -width indent
+.It Fl s Ar path
+Specify the path to a mounted HAMMER2 filesystem.
+At least one PFS on a HAMMER2 filesystem must be mounted for the system
+to act on all PFSs managed by it.
+Every HAMMER2 filesystem typically has a PFS called "LOCAL" for this purpose.
+.El
+.Pp
+.Nm
+directives are as shown below.
+Note that most directives require you to either be CD'd into a hammer2
+filesystem, specify a path to a mounted hammer2 filesystem via the
+.Fl s
+option, or specify a path after the directive.
+It depends on the directive.
+All hammer2 filesystem have a PFS called "LOCAL" which is typically mounted
+locally on the host in order to be able to issue commands for other PFSs
+on the filesystem.
+The mount also enables PFS configuration scanning for that filesystem.
+.Bl -tag -width indent
+.\" ==== pfs-list ====
+.It Cm pfs-list Op path...
+List all PFSs associated with all mounted hammer2 storage devices.
+The list may be restricted to a particular filesystem using
+.Fl s Ar mount .
+.Pp
+Note that hammer2 PFSs associated with storage devices which have not been
+mounted in any fashion will not be listed.
+At least one hammer2 label must be mounted for the PFSs on that device to be
+visible.
+.\" ==== pfs-clid ====
+.It Cm pfs-clid Ar label
+Print the cluster id for a PFS specified by name.
+.\" ==== pfs-fsid ====
+.It Cm pfs-fsid Ar label
+Print the unique filesystem id for a PFS specified by name.
+.\" ==== stat ====
+.It Cm stat Op path...
+Print the inode statistics, compression, and other meta-data associated
+with a list of paths.
+.\" ==== show ====
+.It Cm show Ar devpath
+Dump the radix tree for the HAMMER2 filesystem by scanning a
+block device directly.
+No mount is required.
+.\" ==== freemap ====
+.It Cm freemap Ar devpath
+Dump the freemap tree for the HAMMER2 filesystem by scanning a
+block device directly.
+No mount is required.
+.\" ==== volhdr ====
+.It Cm volhdr Ar devpath
+Dump the volume header for the HAMMER2 filesystem by scanning a
+block device directly.
+No mount is required.
+.\" ==== volume-list ====
+.It Cm volume-list Op path...
+List all volumes associated with all mounted hammer2 storage devices.
+The list may be restricted to a particular filesystem using
+.Fl s Ar mount .
+.Pp
+Note that hammer2 volumes associated with storage devices which have not been
+mounted in any fashion will not be listed.
+At least one hammer2 label must be mounted for the volumes on that device to be
+visible.
+.\" ==== printinode ====
+.It Cm printinode Ar path
+Dump inode.
+.Sh SEE ALSO
+.Xr mount_hammer2 8 ,
+.Xr newfs_hammer2 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Dx 4.1 .
+.Sh AUTHORS
+.An Matthew Dillon Aq Mt dillon@backplane.com
+.Pp
+The
+.Nm
+utility was ported to
+.Fx
+by
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org .
diff --git a/sbin/hammer2/hammer2_subs.h b/sbin/hammer2/hammer2_subs.h
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/hammer2_subs.h
@@ -0,0 +1,121 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef HAMMER2_HAMMER2_SUBS_H_
+#define HAMMER2_HAMMER2_SUBS_H_
+
+#include <sys/types.h>
+#include <uuid.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+
+typedef struct hammer2_volume {
+ int fd;
+ int id;
+ char *path;
+ hammer2_off_t offset;
+ hammer2_off_t size;
+} hammer2_volume_t;
+
+typedef struct hammer2_ondisk {
+ int version;
+ int nvolumes;
+ hammer2_volume_t volumes[HAMMER2_MAX_VOLUMES];
+ hammer2_off_t total_size;
+ hammer2_off_t free_size;
+ uuid_t fsid;
+ uuid_t fstype;
+} hammer2_ondisk_t;
+
+/*
+ * Misc functions
+ */
+int hammer2_ioctl_handle(const char *sel_path);
+const char *hammer2_time64_to_str(uint64_t htime64, char **strp);
+const char *hammer2_uuid_to_str(const uuid_t *uuid, char **strp);
+const char *hammer2_iptype_to_str(uint8_t type);
+const char *hammer2_pfstype_to_str(uint8_t type);
+const char *hammer2_pfssubtype_to_str(uint8_t subtype);
+const char *hammer2_breftype_to_str(uint8_t type);
+const char *sizetostr(hammer2_off_t size);
+const char *counttostr(hammer2_off_t size);
+hammer2_off_t check_volume(int fd);
+hammer2_key_t dirhash(const unsigned char *name, size_t len);
+
+uint32_t calculate_crc32c(uint32_t, const void *, size_t);
+
+char **get_hammer2_mounts(int *acp);
+void put_hammer2_mounts(int ac, char **av);
+
+void hammer2_init_ondisk(hammer2_ondisk_t *fsp);
+void hammer2_install_volume(hammer2_volume_t *vol, int fd, int id,
+ const char *path, hammer2_off_t offset, hammer2_off_t size);
+void hammer2_uninstall_volume(hammer2_volume_t *vol);
+void hammer2_verify_volumes(hammer2_ondisk_t *fsp,
+ const hammer2_volume_data_t *rootvoldata);
+void hammer2_print_volumes(const hammer2_ondisk_t *fsp);
+void hammer2_init_volumes(const char *blkdevs, int rdonly);
+void hammer2_cleanup_volumes(void);
+
+int hammer2_get_volume_fd(hammer2_off_t offset);
+int hammer2_get_root_volume_fd(void);
+int hammer2_get_volume_id(hammer2_off_t offset);
+int hammer2_get_root_volume_id(void);
+const char *hammer2_get_volume_path(hammer2_off_t offset);
+const char *hammer2_get_root_volume_path(void);
+hammer2_off_t hammer2_get_volume_offset(hammer2_off_t offset);
+hammer2_off_t hammer2_get_root_volume_offset(void);
+hammer2_off_t hammer2_get_volume_size(hammer2_off_t offset);
+hammer2_off_t hammer2_get_root_volume_size(void);
+
+hammer2_off_t hammer2_get_total_size(void);
+hammer2_volume_data_t* hammer2_read_root_volume_header(void);
+
+static __inline
+uint32_t
+hammer2_icrc32(const void *buf, size_t size)
+{
+ return (~calculate_crc32c(-1, buf, size));
+}
+
+static __inline
+uint32_t
+hammer2_icrc32c(const void *buf, size_t size, uint32_t ocrc)
+{
+ return (~calculate_crc32c(~ocrc, buf, size));
+}
+#endif /* !HAMMER2_HAMMER2_SUBS_H_ */
diff --git a/sbin/hammer2/main.c b/sbin/hammer2/main.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/main.c
@@ -0,0 +1,224 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2019 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * XXX HAMMER2 userspace consists of sbin/{hammer2,newfs_hammer2,
+ * mount_hammer2,fsck_hammer2}. These are basically same as DragonFly
+ * except that write related are currently removed. Avoid non functional
+ * changes in the name of cleanup which makes it less easy to sync with
+ * DragonFly.
+ */
+
+#include "hammer2.h"
+
+int VerboseOpt;
+int QuietOpt;
+
+static void usage(int code);
+
+int
+main(int ac, char **av)
+{
+ char *sel_path = NULL;
+ int ecode = 0;
+ int ch;
+
+ /*
+ * Core options
+ */
+ while ((ch = getopt(ac, av, "s:vq")) != -1) {
+ switch(ch) {
+ case 's':
+ sel_path = strdup(optarg);
+ break;
+ case 'v':
+ if (QuietOpt)
+ --QuietOpt;
+ else
+ ++VerboseOpt;
+ break;
+ case 'q':
+ if (VerboseOpt)
+ --VerboseOpt;
+ else
+ ++QuietOpt;
+ break;
+ default:
+ fprintf(stderr, "Unknown option: %c\n", ch);
+ usage(1);
+ /* not reached */
+ break;
+ }
+ }
+
+ /*
+ * Adjust, then process the command
+ */
+ ac -= optind;
+ av += optind;
+ if (ac < 1) {
+ fprintf(stderr, "Missing command\n");
+ usage(1);
+ /* not reached */
+ }
+
+ if (strcmp(av[0], "dumpchain") == 0) {
+ if (ac < 2)
+ ecode = cmd_dumpchain(".", (u_int)-1);
+ else if (ac < 3)
+ ecode = cmd_dumpchain(av[1], (u_int)-1);
+ else
+ ecode = cmd_dumpchain(av[1],
+ (u_int)strtoul(av[2], NULL, 0));
+ } else if (strcmp(av[0], "pfs-clid") == 0) {
+ /*
+ * Print cluster id (uuid) for specific PFS
+ */
+ if (ac < 2) {
+ fprintf(stderr, "pfs-clid: requires name\n");
+ usage(1);
+ }
+ ecode = cmd_pfs_getid(sel_path, av[1], 0);
+ } else if (strcmp(av[0], "pfs-fsid") == 0) {
+ /*
+ * Print private id (uuid) for specific PFS
+ */
+ if (ac < 2) {
+ fprintf(stderr, "pfs-fsid: requires name\n");
+ usage(1);
+ }
+ ecode = cmd_pfs_getid(sel_path, av[1], 1);
+ } else if (strcmp(av[0], "pfs-list") == 0) {
+ /*
+ * List all PFSs
+ */
+ if (ac >= 2) {
+ ecode = cmd_pfs_list(ac - 1,
+ (char **)(void *)&av[1]);
+ } else {
+ ecode = cmd_pfs_list(1, &sel_path);
+ }
+ } else if (strcmp(av[0], "stat") == 0) {
+ ecode = cmd_stat(ac - 1, (const char **)(void *)&av[1]);
+ } else if (strcmp(av[0], "show") == 0) {
+ /*
+ * Raw dump of filesystem. Use -v to check all crc's, and
+ * -vv to dump bulk file data.
+ */
+ if (ac != 2) {
+ fprintf(stderr, "show: requires device path\n");
+ usage(1);
+ } else {
+ cmd_show(av[1], 0);
+ }
+ } else if (strcmp(av[0], "freemap") == 0) {
+ /*
+ * Raw dump of freemap. Use -v to check all crc's, and
+ * -vv to dump bulk file data.
+ */
+ if (ac != 2) {
+ fprintf(stderr, "freemap: requires device path\n");
+ usage(1);
+ } else {
+ cmd_show(av[1], 1);
+ }
+ } else if (strcmp(av[0], "volhdr") == 0) {
+ /*
+ * Dump the volume header.
+ */
+ if (ac != 2) {
+ fprintf(stderr, "volhdr: requires device path\n");
+ usage(1);
+ } else {
+ cmd_show(av[1], 2);
+ }
+ } else if (strcmp(av[0], "volume-list") == 0) {
+ /*
+ * List all volumes
+ */
+ if (ac >= 2) {
+ ecode = cmd_volume_list(ac - 1,
+ (char **)(void *)&av[1]);
+ } else {
+ ecode = cmd_volume_list(1, &sel_path);
+ }
+ } else if (strcmp(av[0], "printinode") == 0) {
+ if (ac != 2) {
+ fprintf(stderr,
+ "printinode: requires directory/file path\n");
+ usage(1);
+ } else {
+ print_inode(av[1]);
+ }
+ } else {
+ fprintf(stderr, "Unrecognized command: %s\n", av[0]);
+ usage(1);
+ }
+
+ return (ecode);
+}
+
+static
+void
+usage(int code)
+{
+ fprintf(stderr,
+ "hammer2 [options] command [argument ...]\n"
+ " -s path Select filesystem\n"
+ "\n"
+ " pfs-list [<path>...] "
+ "List PFSs\n"
+ " pfs-clid <label> "
+ "Print cluster id for specific PFS\n"
+ " pfs-fsid <label> "
+ "Print private id for specific PFS\n"
+ " stat [<path>...] "
+ "Return inode quota & config\n"
+ " show <devpath> "
+ "Raw hammer2 media dump for topology\n"
+ " freemap <devpath> "
+ "Raw hammer2 media dump for freemap\n"
+ " volhdr <devpath> "
+ "Raw hammer2 media dump for the volume header(s)\n"
+ " volume-list [<path>...] "
+ "List volumes\n"
+ " printinode <path> "
+ "Dump inode\n"
+ );
+ exit(code);
+}
diff --git a/sbin/hammer2/ondisk.c b/sbin/hammer2/ondisk.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/ondisk.c
@@ -0,0 +1,698 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2020 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2020 The DragonFly Project
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fstab.h>
+#include <assert.h>
+#include <errno.h>
+#include <err.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+
+#include "hammer2_subs.h"
+
+static hammer2_ondisk_t fso;
+static int hammer2_volumes_initialized;
+
+static void
+hammer2_init_volume(hammer2_volume_t *vol)
+{
+ vol->fd = -1;
+ vol->id = -1;
+ vol->offset = (hammer2_off_t)-1;
+ vol->size = (hammer2_off_t)-1;
+}
+
+void
+hammer2_init_ondisk(hammer2_ondisk_t *fsp)
+{
+ int i;
+
+ bzero(fsp, sizeof(*fsp));
+ fsp->version = -1;
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i)
+ hammer2_init_volume(&fsp->volumes[i]);
+}
+
+void
+hammer2_install_volume(hammer2_volume_t *vol, int fd, int id, const char *path,
+ hammer2_off_t offset, hammer2_off_t size)
+{
+ bzero(vol, sizeof(*vol));
+ vol->fd = fd;
+ vol->id = id;
+ vol->path = strdup(path);
+ vol->offset = offset;
+ vol->size = size;
+}
+
+void
+hammer2_uninstall_volume(hammer2_volume_t *vol)
+{
+ fsync(vol->fd);
+ close(vol->fd);
+ free(vol->path);
+ hammer2_init_volume(vol);
+}
+
+/*
+ * Locate a valid volume header. If any of the four volume headers is good,
+ * we have a valid volume header and choose the best one based on mirror_tid.
+ */
+static int
+hammer2_read_volume_header(int fd, const char *path,
+ hammer2_volume_data_t *voldata)
+{
+ hammer2_volume_data_t vd;
+ hammer2_tid_t mirror_tid = -1;
+ hammer2_off_t size = check_volume(fd);
+ hammer2_crc32_t crc0, crc1;
+ const char *p;
+ int i, zone = -1;
+ ssize_t ret;
+
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ if (i * HAMMER2_ZONE_BYTES64 >= size)
+ break;
+ if (lseek(fd, i * HAMMER2_ZONE_BYTES64, SEEK_SET) == -1)
+ break;
+ ret = read(fd, &vd, HAMMER2_PBUFSIZE);
+ if (ret == -1) {
+ fprintf(stderr, "%s #%d: read %s\n",
+ path, i, strerror(errno));
+ continue;
+ }
+ if (ret != HAMMER2_PBUFSIZE) {
+ fprintf(stderr, "%s #%d: read %s\n",
+ path, i, strerror(errno));
+ continue;
+ }
+
+ p = (const char*)&vd;
+ /* verify volume header magic */
+ if ((vd.magic != HAMMER2_VOLUME_ID_HBO) &&
+ (vd.magic != HAMMER2_VOLUME_ID_ABO)) {
+ fprintf(stderr, "%s #%d: bad magic\n", path, i);
+ continue;
+ }
+
+ if (vd.magic == HAMMER2_VOLUME_ID_ABO) {
+ /* XXX: Reversed-endianness filesystem */
+ fprintf(stderr,
+ "%s #%d: reverse-endian filesystem detected",
+ path, i);
+ continue;
+ }
+
+ /* verify volume header CRC's */
+ crc0 = vd.icrc_sects[HAMMER2_VOL_ICRC_SECT0];
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE);
+ if (crc0 != crc1) {
+ fprintf(stderr,
+ "%s #%d: volume header crc mismatch "
+ "sect0 %08x/%08x\n",
+ path, i, crc0, crc1);
+ continue;
+ }
+
+ crc0 = vd.icrc_sects[HAMMER2_VOL_ICRC_SECT1];
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE);
+ if (crc0 != crc1) {
+ fprintf(stderr,
+ "%s #%d: volume header crc mismatch "
+ "sect1 %08x/%08x",
+ path, i, crc0, crc1);
+ continue;
+ }
+
+ crc0 = vd.icrc_volheader;
+ crc1 = hammer2_icrc32(p + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE);
+ if (crc0 != crc1) {
+ fprintf(stderr,
+ "%s #%d: volume header crc mismatch "
+ "vh %08x/%08x",
+ path, i, crc0, crc1);
+ continue;
+ }
+ if (zone == -1 || mirror_tid < vd.mirror_tid) {
+ bcopy(&vd, voldata, sizeof(vd));
+ mirror_tid = vd.mirror_tid;
+ zone = i;
+ }
+ }
+ return(zone);
+}
+
+static void
+hammer2_err_uuid_mismatch(uuid_t *uuid1, uuid_t *uuid2, const char *id)
+{
+ char *p1 = NULL, *p2 = NULL;
+
+ hammer2_uuid_to_str(uuid1, &p1);
+ hammer2_uuid_to_str(uuid2, &p2);
+
+ errx(1, "%s uuid mismatch %s vs %s", id, p1, p2);
+
+ free(p1);
+ free(p2);
+}
+
+static void
+hammer2_add_volume(const char *path, int rdonly)
+{
+ hammer2_volume_data_t voldata;
+ hammer2_volume_t *vol;
+ struct stat st;
+ int fd, i;
+ uuid_t uuid;
+
+ fd = open(path, rdonly ? O_RDONLY : O_RDWR);
+ if (fd == -1)
+ err(1, "open");
+
+ if (fstat(fd, &st) == -1)
+ err(1, "fstat");
+ if (!S_ISCHR(st.st_mode) && !S_ISREG(st.st_mode))
+ errx(1, "Unsupported file type");
+
+ if (hammer2_read_volume_header(fd, path, &voldata) >= 0) {
+ i = voldata.volu_id;
+ if (i < 0 || i >= HAMMER2_MAX_VOLUMES)
+ errx(1, "%s has bad volume id %d", path, i);
+ vol = &fso.volumes[i];
+ if (vol->id != -1)
+ errx(1, "volume id %d already initialized", i);
+ /* all headers must have the same version, nvolumes and uuid */
+ if (!fso.nvolumes) {
+ fso.version = voldata.version;
+ fso.nvolumes = voldata.nvolumes;
+ fso.fsid = voldata.fsid;
+ fso.fstype = voldata.fstype;
+ } else {
+ if (fso.version != (int)voldata.version)
+ errx(1, "Volume version mismatch %d vs %d",
+ fso.version, (int)voldata.version);
+ if (fso.nvolumes != voldata.nvolumes)
+ errx(1, "Volume count mismatch %d vs %d",
+ fso.nvolumes, voldata.nvolumes);
+ uuid = voldata.fsid;
+ if (!uuid_equal(&fso.fsid, &uuid, NULL))
+ hammer2_err_uuid_mismatch(&fso.fsid,
+ &uuid,
+ "fsid");
+ uuid = voldata.fstype;
+ if (!uuid_equal(&fso.fstype, &uuid, NULL))
+ hammer2_err_uuid_mismatch(&fso.fstype,
+ &uuid,
+ "fstype");
+ }
+ /* all per-volume tests passed */
+ hammer2_install_volume(vol, fd, i, path,
+ voldata.volu_loff[i], voldata.volu_size);
+ fso.total_size += vol->size;
+ } else {
+ errx(1, "No valid volume headers found!");
+ }
+}
+
+static void
+hammer2_verify_volumes_common(const hammer2_ondisk_t *fsp)
+{
+ const hammer2_volume_t *vol;
+ hammer2_off_t size;
+ struct stat *st;
+ const char *path;
+ int i, j, nvolumes = 0;
+
+ if (fsp->version == -1)
+ errx(1, "Bad volume version %d", fsp->version);
+
+ /* check initialized volume count */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fsp->volumes[i];
+ if (vol->id != -1)
+ nvolumes++;
+ }
+
+ /* fsp->nvolumes hasn't been verified yet, use nvolumes */
+ st = calloc(nvolumes, sizeof(*st));
+
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fsp->volumes[i];
+ if (vol->id == -1)
+ continue;
+ path = vol->path;
+ /* check volumes are unique */
+ if (stat(path, &st[i]) != 0)
+ errx(1, "Failed to stat %s", path);
+ if (fstat(vol->fd, &st[i]) != 0)
+ errx(1, "Failed to fstat %d", vol->fd);
+ for (j = 0; j < i; ++j) {
+ if ((st[i].st_ino == st[j].st_ino) &&
+ (st[i].st_dev == st[j].st_dev))
+ errx(1, "%s specified more than once", path);
+ }
+ /* check volume fields are initialized */
+ if (vol->fd == -1)
+ errx(1, "%s has bad fd %d", path, vol->fd);
+ if (vol->offset == (hammer2_off_t)-1)
+ errx(1, "%s has bad offset 0x%016jx", path,
+ (intmax_t)vol->offset);
+ if (vol->size == (hammer2_off_t)-1)
+ errx(1, "%s has bad size 0x%016jx", path,
+ (intmax_t)vol->size);
+ /* check volume size vs block device size */
+ size = check_volume(vol->fd);
+ printf("checkvolu header %d %016jx/%016jx\n", i, vol->size, size);
+ if (vol->size > size)
+ errx(1, "%s's size 0x%016jx exceeds device size 0x%016jx",
+ path, (intmax_t)vol->size, size);
+ }
+ free(st);
+}
+
+static void
+hammer2_verify_volumes_1(hammer2_ondisk_t *fsp,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ const hammer2_volume_t *vol;
+ hammer2_off_t off;
+ const char *path;
+ int i, nvolumes = 0;
+
+ /* check initialized volume count */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fsp->volumes[i];
+ if (vol->id != -1)
+ nvolumes++;
+ }
+ if (nvolumes != 1)
+ errx(1, "Only 1 volume supported");
+ fsp->nvolumes = nvolumes; /* adjust with actual count */
+
+ /* check volume header */
+ if (rootvoldata) {
+ if (rootvoldata->volu_id)
+ errx(1, "Volume id %d must be 0", rootvoldata->volu_id);
+ if (rootvoldata->nvolumes)
+ errx(1, "Volume count %d must be 0",
+ rootvoldata->nvolumes);
+ if (rootvoldata->total_size)
+ errx(1, "Total size 0x%016jx must be 0",
+ (intmax_t)rootvoldata->total_size);
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off)
+ errx(1, "Volume offset[%d] 0x%016jx must be 0",
+ i, (intmax_t)off);
+ }
+ }
+
+ /* check volume */
+ vol = &fsp->volumes[0];
+ path = vol->path;
+ if (vol->id)
+ errx(1, "%s has non zero id %d", path, vol->id);
+ if (vol->offset)
+ errx(1, "%s has non zero offset 0x%016jx", path,
+ (intmax_t)vol->offset);
+ if (vol->size & HAMMER2_VOLUME_ALIGNMASK64)
+ errx(1, "%s's size is not 0x%016jx aligned", path,
+ (intmax_t)HAMMER2_VOLUME_ALIGN);
+}
+
+static void
+hammer2_verify_volumes_2(const hammer2_ondisk_t *fsp,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ const hammer2_volume_t *vol;
+ hammer2_off_t off;
+ const char *path;
+ int i, nvolumes = 0;
+
+ /* check initialized volume count */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fsp->volumes[i];
+ if (vol->id != -1)
+ nvolumes++;
+ }
+ if (fsp->nvolumes != nvolumes)
+ errx(1, "Volume count mismatch %d vs %d",
+ fsp->nvolumes, nvolumes);
+
+ /* check volume header */
+ if (rootvoldata) {
+ if (rootvoldata->volu_id != HAMMER2_ROOT_VOLUME)
+ errx(1, "Volume id %d must be %d",
+ rootvoldata->volu_id, HAMMER2_ROOT_VOLUME);
+ if (rootvoldata->nvolumes != fso.nvolumes)
+ errx(1, "Volume header requires %d devices, %d specified",
+ rootvoldata->nvolumes, fso.nvolumes);
+ if (rootvoldata->total_size != fso.total_size)
+ errx(1, "Total size 0x%016jx does not equal sum of "
+ "volumes 0x%016jx",
+ rootvoldata->total_size, fso.total_size);
+ for (i = 0; i < nvolumes; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off == (hammer2_off_t)-1)
+ errx(1, "Volume offset[%d] 0x%016jx must not be -1",
+ i, (intmax_t)off);
+ }
+ for (i = nvolumes; i < HAMMER2_MAX_VOLUMES; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off != (hammer2_off_t)-1)
+ errx(1, "Volume offset[%d] 0x%016jx must be -1",
+ i, (intmax_t)off);
+ }
+ }
+
+ /* check volumes */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fsp->volumes[i];
+ if (vol->id == -1)
+ continue;
+ path = vol->path;
+ /* check offset */
+ if (vol->offset & HAMMER2_FREEMAP_LEVEL1_MASK)
+ errx(1, "%s's offset 0x%016jx not 0x%016jx aligned",
+ path, (intmax_t)vol->offset,
+ HAMMER2_FREEMAP_LEVEL1_SIZE);
+ /* check vs previous volume */
+ if (i) {
+ if (vol->id != (vol-1)->id + 1)
+ errx(1, "%s has inconsistent id %d", path,
+ vol->id);
+ if (vol->offset != (vol-1)->offset + (vol-1)->size)
+ errx(1, "%s has inconsistent offset 0x%016jx",
+ path, (intmax_t)vol->offset);
+ } else { /* first */
+ if (vol->offset)
+ errx(1, "%s has non zero offset 0x%016jx", path,
+ (intmax_t)vol->offset);
+ }
+ /* check size for non-last and last volumes */
+ if (i != fsp->nvolumes - 1) {
+ if (vol->size < HAMMER2_FREEMAP_LEVEL1_SIZE)
+ errx(1, "%s's size must be >= 0x%016jx", path,
+ (intmax_t)HAMMER2_FREEMAP_LEVEL1_SIZE);
+ if (vol->size & HAMMER2_FREEMAP_LEVEL1_MASK)
+ errx(1, "%s's size is not 0x%016jx aligned",
+ path,
+ (intmax_t)HAMMER2_FREEMAP_LEVEL1_SIZE);
+ } else { /* last */
+ if (vol->size & HAMMER2_VOLUME_ALIGNMASK64)
+ errx(1, "%s's size is not 0x%016jx aligned",
+ path, (intmax_t)HAMMER2_VOLUME_ALIGN);
+ }
+ }
+}
+
+void
+hammer2_verify_volumes(hammer2_ondisk_t *fsp,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ hammer2_verify_volumes_common(fsp);
+ if (fsp->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES)
+ hammer2_verify_volumes_2(fsp, rootvoldata);
+ else
+ hammer2_verify_volumes_1(fsp, rootvoldata);
+ assert(fsp->nvolumes > 0);
+}
+
+void
+hammer2_print_volumes(const hammer2_ondisk_t *fsp)
+{
+ const hammer2_volume_t *vol;
+ int i, n, w = 0;
+
+ for (i = 0; i < fsp->nvolumes; ++i) {
+ vol = &fsp->volumes[i];
+ n = (int)strlen(vol->path);
+ if (n > w)
+ w = n;
+ }
+
+ printf("total %-*.*s 0x%016jx 0x%016jx\n",
+ w, w, "", (intmax_t)0, (intmax_t)fsp->total_size);
+
+ for (i = 0; i < fsp->nvolumes; ++i) {
+ vol = &fsp->volumes[i];
+ printf("volume%-2d %-*.*s 0x%016jx 0x%016jx%s\n",
+ vol->id, w, w, vol->path, (intmax_t)vol->offset,
+ (intmax_t)vol->size,
+ (vol->id == HAMMER2_ROOT_VOLUME ?
+ " (root volume)" : ""));
+ }
+}
+
+void
+hammer2_init_volumes(const char *blkdevs, int rdonly)
+{
+ hammer2_volume_data_t *rootvoldata;
+ char *p, *devpath;
+
+ if (hammer2_volumes_initialized)
+ errx(1, "Already initialized");
+ if (!blkdevs)
+ errx(1, "NULL blkdevs");
+
+ hammer2_init_ondisk(&fso);
+ p = strdup(blkdevs);
+ while ((devpath = p) != NULL) {
+ if ((p = strchr(p, ':')) != NULL)
+ *p++ = 0;
+ /* DragonFly uses getdevpath(3) here */
+ if (strchr(devpath, ':'))
+ hammer2_init_volumes(devpath, rdonly);
+ else
+ hammer2_add_volume(devpath, rdonly);
+ }
+ free(p);
+ hammer2_volumes_initialized = 1;
+
+ rootvoldata = hammer2_read_root_volume_header();
+ hammer2_verify_volumes(&fso, rootvoldata);
+ free(rootvoldata);
+}
+
+void
+hammer2_cleanup_volumes(void)
+{
+ hammer2_volume_t *vol;
+ int i;
+
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &fso.volumes[i];
+ if (vol->id == -1)
+ continue;
+ hammer2_uninstall_volume(vol);
+ }
+ hammer2_volumes_initialized = 0;
+}
+
+typedef void (*callback)(const hammer2_volume_t*, void *data);
+
+static int
+hammer2_get_volume_attr(hammer2_off_t offset, callback fn, void *data)
+{
+ hammer2_volume_t *vol;
+ int i;
+
+ assert(hammer2_volumes_initialized == 1);
+ offset &= ~HAMMER2_OFF_MASK_RADIX;
+
+ /* do binary search if users really use this many supported volumes */
+ for (i = 0; i < fso.nvolumes; ++i) {
+ vol = &fso.volumes[i];
+ if ((offset >= vol->offset) &&
+ (offset < vol->offset + vol->size)) {
+ fn(vol, data);
+ return(0);
+ }
+ }
+
+ return(-1);
+}
+
+/* fd */
+static void
+hammer2_volume_fd_cb(const hammer2_volume_t *vol, void *data)
+{
+ *(int*)data = vol->fd;
+}
+
+int
+hammer2_get_volume_fd(hammer2_off_t offset)
+{
+ int ret = 0;
+
+ if (hammer2_get_volume_attr(offset, hammer2_volume_fd_cb, &ret) < 0)
+ return(-1);
+ return(ret);
+}
+
+int
+hammer2_get_root_volume_fd(void)
+{
+ return(hammer2_get_volume_fd(0));
+}
+
+/* id */
+static void
+hammer2_volume_id_cb(const hammer2_volume_t *vol, void *data)
+{
+ *(int*)data = vol->id;
+}
+
+int
+hammer2_get_volume_id(hammer2_off_t offset)
+{
+ int ret = 0;
+
+ if (hammer2_get_volume_attr(offset, hammer2_volume_id_cb, &ret) < 0)
+ return(-1);
+ return(ret);
+}
+
+int
+hammer2_get_root_volume_id(void)
+{
+ return(hammer2_get_volume_id(0));
+}
+
+/* path */
+static void
+hammer2_volume_path_cb(const hammer2_volume_t *vol, void *data)
+{
+ *(const char**)data = vol->path;
+}
+
+const char *
+hammer2_get_volume_path(hammer2_off_t offset)
+{
+ const char *ret = NULL;
+
+ if (hammer2_get_volume_attr(offset, hammer2_volume_path_cb, &ret) < 0)
+ return(NULL);
+ return(ret);
+}
+
+const char *
+hammer2_get_root_volume_path(void)
+{
+ return(hammer2_get_volume_path(0));
+}
+
+/* offset */
+static void
+hammer2_volume_offset_cb(const hammer2_volume_t *vol, void *data)
+{
+ *(hammer2_off_t*)data = vol->offset;
+}
+
+hammer2_off_t
+hammer2_get_volume_offset(hammer2_off_t offset)
+{
+ hammer2_off_t ret = 0;
+
+ if (hammer2_get_volume_attr(offset, hammer2_volume_offset_cb, &ret) < 0)
+ return(-1);
+ return(ret);
+}
+
+hammer2_off_t
+hammer2_get_root_volume_offset(void)
+{
+ return(hammer2_get_volume_offset(0));
+}
+
+/* size */
+static void
+hammer2_volume_size_cb(const hammer2_volume_t *vol, void *data)
+{
+ *(hammer2_off_t*)data = vol->size;
+}
+
+hammer2_off_t
+hammer2_get_volume_size(hammer2_off_t offset)
+{
+ hammer2_off_t ret = 0;
+
+ if (hammer2_get_volume_attr(offset, hammer2_volume_size_cb, &ret) < 0)
+ return(-1);
+ return(ret);
+}
+
+hammer2_off_t
+hammer2_get_root_volume_size(void)
+{
+ return(hammer2_get_volume_size(0));
+}
+
+/* total size */
+hammer2_off_t
+hammer2_get_total_size(void)
+{
+ return(fso.total_size);
+}
+
+hammer2_volume_data_t*
+hammer2_read_root_volume_header(void)
+{
+ hammer2_volume_data_t *voldata;
+ int fd = hammer2_get_root_volume_fd();
+ const char *path = hammer2_get_root_volume_path();
+
+ if (fd == -1)
+ return(NULL);
+
+ voldata = calloc(1, sizeof(*voldata));
+ if (hammer2_read_volume_header(fd, path, voldata) >= 0)
+ return(voldata);
+ else
+ errx(1, "Failed to read volume header");
+}
diff --git a/sbin/hammer2/print_inode.c b/sbin/hammer2/print_inode.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/print_inode.c
@@ -0,0 +1,126 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2013-2019 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+static void
+hexdump_inode(const void *data, size_t len)
+{
+ const unsigned char *p = data;
+ size_t i;
+
+ if (VerboseOpt <= 0)
+ return;
+
+ for (i = 0; i < len; i++) {
+ printf("%02X", *p);
+ if (i && !((i + 1) % 16))
+ printf("\n");
+ else if (i != len - 1)
+ printf(" ");
+ p++;
+ }
+ printf("\n");
+}
+
+void
+print_inode(const char *path)
+{
+ hammer2_ioc_inode_t inode;
+ hammer2_inode_data_t *ipdata;
+ hammer2_inode_meta_t *meta;
+ char *str = NULL;
+ int fd;
+ uuid_t uuid;
+
+ fd = hammer2_ioctl_handle(path);
+ if (fd == -1)
+ return;
+
+ if (ioctl(fd, HAMMER2IOC_INODE_GET, &inode) == -1) {
+ printf("ioctl(HAMMER2IOC_INODE_GET) failed\n");
+ return;
+ }
+ ipdata = &inode.ip_data;
+ meta = &ipdata->meta;
+
+ hexdump_inode(meta, sizeof(*meta));
+
+ printf("version = %u\n", meta->version);
+ printf("pfs_subtype = %u (%s)\n", meta->pfs_subtype,
+ hammer2_pfssubtype_to_str(meta->pfs_subtype));
+ printf("uflags = 0x%x\n", (unsigned int)meta->uflags);
+ printf("rmajor = %u\n", meta->rmajor);
+ printf("rminor = %u\n", meta->rminor);
+ printf("ctime = %s\n", hammer2_time64_to_str(meta->ctime, &str));
+ printf("mtime = %s\n", hammer2_time64_to_str(meta->mtime, &str));
+ printf("atime = %s\n", hammer2_time64_to_str(meta->atime, &str));
+ printf("btime = %s\n", hammer2_time64_to_str(meta->btime, &str));
+ uuid = meta->uid;
+ printf("uid = %s\n", hammer2_uuid_to_str(&uuid, &str));
+ uuid = meta->gid;
+ printf("gid = %s\n", hammer2_uuid_to_str(&uuid, &str));
+ printf("type = %u (%s)\n", meta->type,
+ hammer2_iptype_to_str(meta->type));
+ printf("op_flags = 0x%x\n", meta->op_flags);
+ printf("cap_flags = 0x%x\n", meta->cap_flags);
+ printf("mode = 0%o\n", meta->mode);
+ printf("inum = 0x%jx\n", (uintmax_t)meta->inum);
+ printf("size = %ju\n", (uintmax_t)meta->size);
+ printf("nlinks = %ju\n", (uintmax_t)meta->nlinks);
+ printf("iparent = 0x%jx\n", (uintmax_t)meta->iparent);
+ printf("name_key = 0x%jx\n", (uintmax_t)meta->name_key);
+ printf("name_len = %u\n", meta->name_len);
+ printf("ncopies = %u\n", meta->ncopies);
+ printf("comp_algo = %u\n", meta->comp_algo);
+ printf("target_type = %u\n", meta->target_type);
+ printf("check_algo = %u\n", meta->check_algo);
+ printf("pfs_nmasters = %u\n", meta->pfs_nmasters);
+ printf("pfs_type = %u (%s)\n", meta->pfs_type,
+ hammer2_pfstype_to_str(meta->pfs_type));
+ printf("pfs_inum = 0x%jx\n", (uintmax_t)meta->pfs_inum);
+ uuid = meta->pfs_clid;
+ printf("pfs_clid = %s\n", hammer2_uuid_to_str(&uuid, &str));
+ uuid = meta->pfs_fsid;
+ printf("pfs_fsid = %s\n", hammer2_uuid_to_str(&uuid, &str));
+ printf("data_quota = 0x%jx\n", (uintmax_t)meta->data_quota);
+ printf("inode_quota = 0x%jx\n", (uintmax_t)meta->inode_quota);
+ printf("pfs_lsnap_tid = 0x%jx\n", (uintmax_t)meta->pfs_lsnap_tid);
+ printf("decrypt_check = 0x%jx\n", (uintmax_t)meta->decrypt_check);
+
+ free(str);
+}
diff --git a/sbin/hammer2/subs.c b/sbin/hammer2/subs.c
new file mode 100644
--- /dev/null
+++ b/sbin/hammer2/subs.c
@@ -0,0 +1,387 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/statvfs.h>
+#include <sys/disk.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <uuid.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+#include <fs/hammer2/hammer2_ioctl.h>
+
+#include "hammer2_subs.h"
+
+/*
+ * Obtain a file descriptor that the caller can execute ioctl()'s on.
+ */
+int
+hammer2_ioctl_handle(const char *sel_path)
+{
+ struct hammer2_ioc_version info;
+ int fd;
+
+ if (sel_path == NULL)
+ sel_path = ".";
+
+ fd = open(sel_path, O_RDONLY, 0);
+ if (fd < 0) {
+ fprintf(stderr, "hammer2: Unable to open %s: %s\n",
+ sel_path, strerror(errno));
+ return(-1);
+ }
+ if (ioctl(fd, HAMMER2IOC_VERSION_GET, &info) < 0) {
+ fprintf(stderr, "hammer2: '%s' is not a hammer2 filesystem\n",
+ sel_path);
+ close(fd);
+ return(-1);
+ }
+ return (fd);
+}
+
+const char *
+hammer2_time64_to_str(uint64_t htime64, char **strp)
+{
+ struct tm *tp;
+ time_t t;
+
+ if (*strp) {
+ free(*strp);
+ *strp = NULL;
+ }
+ *strp = malloc(64);
+ t = htime64 / 1000000;
+ tp = localtime(&t);
+ strftime(*strp, 64, "%d-%b-%Y %H:%M:%S", tp);
+ return (*strp);
+}
+
+const char *
+hammer2_uuid_to_str(const uuid_t *uuid, char **strp)
+{
+ uint32_t status;
+ if (*strp) {
+ free(*strp);
+ *strp = NULL;
+ }
+ uuid_to_string(uuid, strp, &status);
+ return (*strp);
+}
+
+const char *
+hammer2_iptype_to_str(uint8_t type)
+{
+ switch(type) {
+ case HAMMER2_OBJTYPE_UNKNOWN:
+ return("UNKNOWN");
+ case HAMMER2_OBJTYPE_DIRECTORY:
+ return("DIR");
+ case HAMMER2_OBJTYPE_REGFILE:
+ return("FILE");
+ case HAMMER2_OBJTYPE_FIFO:
+ return("FIFO");
+ case HAMMER2_OBJTYPE_CDEV:
+ return("CDEV");
+ case HAMMER2_OBJTYPE_BDEV:
+ return("BDEV");
+ case HAMMER2_OBJTYPE_SOFTLINK:
+ return("SOFTLINK");
+ case HAMMER2_OBJTYPE_SOCKET:
+ return("SOCKET");
+ case HAMMER2_OBJTYPE_WHITEOUT:
+ return("WHITEOUT");
+ default:
+ return("ILLEGAL");
+ }
+}
+
+const char *
+hammer2_pfstype_to_str(uint8_t type)
+{
+ switch(type) {
+ case HAMMER2_PFSTYPE_NONE:
+ return("NONE");
+ case HAMMER2_PFSTYPE_SUPROOT:
+ return("SUPROOT");
+ case HAMMER2_PFSTYPE_MASTER:
+ return("MASTER");
+ default:
+ return("ILLEGAL");
+ }
+}
+
+const char *
+hammer2_pfssubtype_to_str(uint8_t subtype)
+{
+ switch(subtype) {
+ case HAMMER2_PFSSUBTYPE_NONE:
+ return("NONE");
+ default:
+ return("ILLEGAL");
+ }
+}
+
+const char *
+hammer2_breftype_to_str(uint8_t type)
+{
+ switch(type) {
+ case HAMMER2_BREF_TYPE_EMPTY:
+ return("empty");
+ case HAMMER2_BREF_TYPE_INODE:
+ return("inode");
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ return("indirect");
+ case HAMMER2_BREF_TYPE_DATA:
+ return("data");
+ case HAMMER2_BREF_TYPE_DIRENT:
+ return("dirent");
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ return("freemap_node");
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ return("freemap_leaf");
+ case HAMMER2_BREF_TYPE_INVALID:
+ return("invalid");
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ return("freemap");
+ case HAMMER2_BREF_TYPE_VOLUME:
+ return("volume");
+ default:
+ return("unknown");
+ }
+}
+
+const char *
+sizetostr(hammer2_off_t size)
+{
+ static char buf[32];
+
+ if (size < 1024 / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fB", (double)size);
+ } else if (size < 1024 * 1024 / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fKB",
+ (double)size / 1024);
+ } else if (size < 1024 * 1024 * 1024LL / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fMB",
+ (double)size / (1024 * 1024));
+ } else if (size < 1024 * 1024 * 1024LL * 1024LL / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fGB",
+ (double)size / (1024 * 1024 * 1024LL));
+ } else {
+ snprintf(buf, sizeof(buf), "%6.2fTB",
+ (double)size / (1024 * 1024 * 1024LL * 1024LL));
+ }
+ return(buf);
+}
+
+const char *
+counttostr(hammer2_off_t size)
+{
+ static char buf[32];
+
+ if (size < 1024 / 2) {
+ snprintf(buf, sizeof(buf), "%jd",
+ (intmax_t)size);
+ } else if (size < 1024 * 1024 / 2) {
+ snprintf(buf, sizeof(buf), "%jd",
+ (intmax_t)size);
+ } else if (size < 1024 * 1024 * 1024LL / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fM",
+ (double)size / (1024 * 1024));
+ } else if (size < 1024 * 1024 * 1024LL * 1024LL / 2) {
+ snprintf(buf, sizeof(buf), "%6.2fG",
+ (double)(size / (1024 * 1024 * 1024LL)));
+ } else {
+ snprintf(buf, sizeof(buf), "%6.2fT",
+ (double)(size / (1024 * 1024 * 1024LL * 1024LL)));
+ }
+ return(buf);
+}
+
+hammer2_off_t
+check_volume(int fd)
+{
+ struct stat st;
+ hammer2_off_t size;
+
+ /*
+ * Get basic information about the volume
+ */
+ if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0) {
+ /*
+ * Allow the formatting of regular files as HAMMER2 volumes
+ */
+ if (fstat(fd, &st) < 0)
+ err(1, "Unable to stat fd %d", fd);
+ if (!S_ISREG(st.st_mode))
+ errx(1, "Unsupported file type for fd %d", fd);
+ size = st.st_size;
+ } else {
+ /*
+ * When formatting a block device as a HAMMER2 volume the
+ * sector size must be compatible. HAMMER2 uses 64K
+ * filesystem buffers but logical buffers for direct I/O
+ * can be as small as HAMMER2_LOGSIZE (16KB).
+ */
+ int media_blksize;
+ if (!ioctl(fd, DIOCGSECTORSIZE, &media_blksize) &&
+ (media_blksize > HAMMER2_PBUFSIZE ||
+ HAMMER2_PBUFSIZE % media_blksize)) {
+ errx(1, "A media sector size of %d is not supported",
+ media_blksize);
+ }
+ }
+ return(size);
+}
+
+/*
+ * Borrow HAMMER1's directory hash algorithm #1 with a few modifications.
+ * The filename is split into fields which are hashed separately and then
+ * added together.
+ *
+ * Differences include: bit 63 must be set to 1 for HAMMER2 (HAMMER1 sets
+ * it to 0), this is because bit63=0 is used for hidden hardlinked inodes.
+ * (This means we do not need to do a 0-check/or-with-0x100000000 either).
+ *
+ * Also, the iscsi crc code is used instead of the old crc32 code.
+ */
+hammer2_key_t
+dirhash(const unsigned char *name, size_t len)
+{
+ const unsigned char *aname = name;
+ uint32_t crcx;
+ uint64_t key;
+ size_t i, j;
+
+ key = 0;
+
+ /*
+ * m32
+ */
+ crcx = 0;
+ for (i = j = 0; i < len; ++i) {
+ if (aname[i] == '.' ||
+ aname[i] == '-' ||
+ aname[i] == '_' ||
+ aname[i] == '~') {
+ if (i != j)
+ crcx += hammer2_icrc32(aname + j, i - j);
+ j = i + 1;
+ }
+ }
+ if (i != j)
+ crcx += hammer2_icrc32(aname + j, i - j);
+
+ /*
+ * The directory hash utilizes the top 32 bits of the 64-bit key.
+ * Bit 63 must be set to 1.
+ */
+ crcx |= 0x80000000U;
+ key |= (uint64_t)crcx << 32;
+
+ /*
+ * l16 - crc of entire filename
+ * This crc reduces degenerate hash collision conditions.
+ */
+ crcx = hammer2_icrc32(aname, len);
+ crcx = crcx ^ (crcx << 16);
+ key |= crcx & 0xFFFF0000U;
+
+ /*
+ * Set bit 15. This allows readdir to strip bit 63 so a positive
+ * 64-bit cookie/offset can always be returned, and still guarantee
+ * that the values 0x0000-0x7FFF are available for artificial entries
+ * ('.' and '..').
+ */
+ key |= 0x8000U;
+
+ return (key);
+}
+
+char **
+get_hammer2_mounts(int *acp)
+{
+ struct statfs *fs;
+ char **av;
+ int n;
+ int w;
+ int i;
+
+ /*
+ * Get a stable list of mount points
+ */
+again:
+ n = getfsstat(NULL, 0, MNT_NOWAIT);
+ av = calloc(n, sizeof(char *));
+ fs = calloc(n, sizeof(struct statfs));
+ if (getfsstat(fs, sizeof(*fs) * n, MNT_NOWAIT) != n) {
+ free(av);
+ free(fs);
+ goto again;
+ }
+
+ /*
+ * Pull out hammer2 filesystems only
+ */
+ for (i = w = 0; i < n; ++i) {
+ if (strcmp(fs[i].f_fstypename, "hammer2") != 0)
+ continue;
+ av[w++] = strdup(fs[i].f_mntonname);
+ }
+ *acp = w;
+ free(fs);
+
+ return av;
+}
+
+void
+put_hammer2_mounts(int ac, char **av)
+{
+ while (--ac >= 0)
+ free(av[ac]);
+ free(av);
+}
diff --git a/sbin/mount_hammer2/Makefile b/sbin/mount_hammer2/Makefile
new file mode 100644
--- /dev/null
+++ b/sbin/mount_hammer2/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+PROG= mount_hammer2
+SRCS= mount_hammer2.c getmntopts.c
+MAN= mount_hammer2.8
+
+MOUNT= ${.CURDIR:H}/mount
+CFLAGS+= -I${MOUNT}
+CFLAGS+= -I${SRCTOP}/sys
+.PATH: ${MOUNT}
+
+.include <bsd.prog.mk>
diff --git a/sbin/mount_hammer2/mount_hammer2.8 b/sbin/mount_hammer2/mount_hammer2.8
new file mode 100644
--- /dev/null
+++ b/sbin/mount_hammer2/mount_hammer2.8
@@ -0,0 +1,104 @@
+.\" Copyright (c) 2017 The DragonFly Project. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in
+.\" the documentation and/or other materials provided with the
+.\" distribution.
+.\" 3. Neither the name of The DragonFly Project nor the names of its
+.\" contributors may be used to endorse or promote products derived
+.\" from this software without specific, prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd September 18, 2022
+.Dt MOUNT_HAMMER2 8
+.Os
+.Sh NAME
+.Nm mount_hammer2
+.Nd mount a HAMMER2 file system
+.Sh SYNOPSIS
+.Nm
+.Op Fl o Ar options
+.Ar special Ns Op Cm @ Ns Ar label
+.Ar node
+.Nm
+.Op Fl o Ar options
+.Cm @ Ns Ar label
+.Ar node
+.Sh DESCRIPTION
+The
+.Nm
+utility mounts a
+.Nm HAMMER2
+file system backed by
+.Ar special
+file at mount point
+.Ar node .
+PFS
+.Ar label
+is mounted.
+.Pp
+.Cm @ Ns Ar label
+(no
+.Ar special )
+is a short form, which mounts
+.Ar label
+from an already mounted
+.Nm HAMMER2
+filesystem.
+If there are multiple mounts with the same
+.Ar label ,
+the first one mounted takes precedence over others.
+.Pp
+.Nm HAMMER2
+file system on
+.Fx
+is currently read-only regardless of the options specified.
+.Nm HAMMER2
+file system only supports host-endian.
+.Pp
+The options are as follows:
+.Bl -tag -width indent
+.It Fl o Ar options
+Options are specified with a
+.Fl o
+flag followed by a comma separated string of options.
+See the
+.Xr mount 8
+man page for possible options and their meanings.
+.El
+.Sh SEE ALSO
+.Xr hammer2 8 ,
+.Xr newfs_hammer2 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Dx 3.3 .
+.Sh AUTHORS
+This manual page was written by
+.An Thomas Nikolajsen .
+.Pp
+The
+.Nm
+utility was ported to
+.Fx
+by
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org .
diff --git a/sbin/mount_hammer2/mount_hammer2.c b/sbin/mount_hammer2/mount_hammer2.c
new file mode 100644
--- /dev/null
+++ b/sbin/mount_hammer2/mount_hammer2.c
@@ -0,0 +1,157 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <fs/hammer2/hammer2_mount.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <err.h>
+#include <sysexits.h>
+#include <mntopts.h>
+
+static void usage(const char *ctl, ...);
+
+static struct mntopt mopts[] = {
+ MOPT_STDOPTS,
+ MOPT_UPDATE,
+ MOPT_END
+};
+
+/*
+ * Usage: mount_hammer2 [volume] [mtpt]
+ */
+int
+main(int ac, char *av[])
+{
+ struct iovec *iov;
+ char mntpath[MAXPATHLEN];
+ char fstype[] = "hammer2";
+ char *mountpt, *devpath = NULL;
+ int ch, iovlen = 0, mount_flags = 0;
+ int hflags = HMNT2_LOCAL; /* force local, not optional */
+
+ while ((ch = getopt(ac, av, "o:")) != -1) {
+ switch(ch) {
+ case 'o':
+ getmntopts(optarg, mopts, &mount_flags, &hflags);
+ break;
+ default:
+ usage("unknown option: -%c", ch);
+ /* not reached */
+ }
+ }
+ ac -= optind;
+ av += optind;
+
+ /*
+ * New mount
+ */
+ if (ac != 2) {
+ usage("missing parameter(s) (special[@label] node)");
+ /* not reached */
+ }
+
+ devpath = strdup(av[0]);
+ mountpt = av[1];
+
+ if (devpath[0] == 0) {
+ fprintf(stderr, "mount_hammer2: empty device path\n");
+ exit(1);
+ }
+
+ /*
+ * Automatically add @DATA if no label specified.
+ */
+ if (strchr(devpath, '@') == NULL)
+ asprintf(&devpath, "%s@DATA", devpath);
+
+ /*
+ * Try to mount it, prefix if necessary.
+ */
+ if (!strchr(devpath, ':') && devpath[0] != '/' && devpath[0] != '@') {
+ char *p2;
+ asprintf(&p2, "/dev/%s", devpath);
+ free(devpath);
+ devpath = p2;
+ }
+
+ /*
+ * Resolve the mountpoint with realpath(3) and remove unnecessary
+ * slashes from the devicename if there are any.
+ */
+ if (checkpath(mountpt, mntpath) != 0)
+ err(EX_USAGE, "%s", mntpath);
+ if (devpath)
+ rmslashes(devpath, devpath);
+
+ mount_flags |= MNT_RDONLY; /* currently write unsupported */
+ build_iovec(&iov, &iovlen, "fstype", fstype, (size_t)-1);
+ build_iovec(&iov, &iovlen, "fspath", mntpath, (size_t)-1);
+ build_iovec(&iov, &iovlen, "from", devpath, (size_t)-1);
+ build_iovec(&iov, &iovlen, "hflags", &hflags, sizeof(hflags));
+ if (nmount(iov, iovlen, mount_flags) < 0)
+ err(1, "%s", devpath);
+
+ free(devpath);
+
+ return (0);
+}
+
+static
+void
+usage(const char *ctl, ...)
+{
+ va_list va;
+
+ va_start(va, ctl);
+ fprintf(stderr, "mount_hammer2: ");
+ vfprintf(stderr, ctl, va);
+ va_end(va);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " mount_hammer2 [-o options] special[@label] node\n");
+ fprintf(stderr, " mount_hammer2 [-o options] @label node\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "options:\n"
+ " <standard_mount_options>\n"
+ );
+ exit(1);
+}
diff --git a/sbin/newfs_hammer2/Makefile b/sbin/newfs_hammer2/Makefile
new file mode 100644
--- /dev/null
+++ b/sbin/newfs_hammer2/Makefile
@@ -0,0 +1,16 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+PROG= newfs_hammer2
+SRCS= newfs_hammer2.c mkfs_hammer2.c ondisk.c subs.c xxhash.c gsb_crc32.c
+MAN= newfs_hammer2.8
+
+.PATH: ${SRCTOP}/sbin/hammer2 ${SRCTOP}/sys/fs/hammer2/xxhash ${SRCTOP}/sys/libkern
+
+WARNS?= 3
+
+CFLAGS+= -DXXH_NAMESPACE=h2_
+CFLAGS+= -I${SRCTOP}/sys
+CFLAGS+= -I${SRCTOP}/sbin/hammer2
+
+.include <bsd.prog.mk>
diff --git a/sbin/newfs_hammer2/mkfs_hammer2.h b/sbin/newfs_hammer2/mkfs_hammer2.h
new file mode 100644
--- /dev/null
+++ b/sbin/newfs_hammer2/mkfs_hammer2.h
@@ -0,0 +1,79 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NEWFS_HAMMER2_H_
+#define NEWFS_HAMMER2_H_
+
+#include <fs/hammer2/hammer2_disk.h>
+
+#include <uuid.h>
+
+#include "hammer2_subs.h"
+
+#define HAMMER2_LABEL_NONE 0
+#define HAMMER2_LABEL_BOOT 1
+#define HAMMER2_LABEL_ROOT 2
+#define HAMMER2_LABEL_DATA 3
+
+#define MAXLABELS HAMMER2_SET_COUNT
+
+typedef struct {
+ int Hammer2Version;
+ uuid_t Hammer2_FSType; /* filesystem type id for HAMMER2 */
+ uuid_t Hammer2_VolFSID; /* unique filesystem id in volu header */
+ uuid_t Hammer2_SupCLID; /* PFS cluster id in super-root inode */
+ uuid_t Hammer2_SupFSID; /* PFS unique id in super-root inode */
+ uuid_t Hammer2_PfsCLID[MAXLABELS];
+ uuid_t Hammer2_PfsFSID[MAXLABELS];
+ hammer2_off_t BootAreaSize;
+ hammer2_off_t AuxAreaSize;
+ char *Label[MAXLABELS];
+ int NLabels;
+ int CompType; /* default LZ4 */
+ int CheckType; /* default XXHASH64 */
+ int DefaultLabelType;
+ int DebugOpt;
+} hammer2_mkfs_options_t;
+
+void hammer2_mkfs_init(hammer2_mkfs_options_t *opt);
+void hammer2_mkfs_cleanup(hammer2_mkfs_options_t *opt);
+
+int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw);
+
+void hammer2_mkfs(int ac, char **av, hammer2_mkfs_options_t *opt);
+
+#endif /* !NEWFS_HAMMER2_H_ */
diff --git a/sbin/newfs_hammer2/mkfs_hammer2.c b/sbin/newfs_hammer2/mkfs_hammer2.c
new file mode 100644
--- /dev/null
+++ b/sbin/newfs_hammer2/mkfs_hammer2.c
@@ -0,0 +1,811 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <err.h>
+#include <uuid.h>
+
+#include <fs/hammer2/hammer2_disk.h>
+#include <fs/hammer2/hammer2_xxhash.h>
+
+#include "mkfs_hammer2.h"
+#include "hammer2_subs.h"
+
+static uint64_t nowtime(void);
+static int blkrefary_cmp(const void *b1, const void *b2);
+static void alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref,
+ size_t bytes);
+
+static int
+get_hammer2_version(void)
+{
+ int version = HAMMER2_VOL_VERSION_DEFAULT;
+ size_t olen = sizeof(version);
+
+ if (sysctlbyname("vfs.hammer2.supported_version",
+ &version, &olen, NULL, 0) == 0) {
+ if (version >= HAMMER2_VOL_VERSION_WIP) {
+ version = HAMMER2_VOL_VERSION_WIP - 1;
+ fprintf(stderr,
+ "newfs_hammer2: WARNING: HAMMER2 VFS "
+ "supports higher version than I "
+ "understand.\n"
+ "Using default version %d\n",
+ version);
+ }
+ } else {
+ fprintf(stderr,
+ "newfs_hammer2: WARNING: HAMMER2 VFS not "
+ "loaded, cannot get version info.\n"
+ "Using default version %d\n",
+ version);
+ }
+ return(version);
+}
+
+void
+hammer2_mkfs_init(hammer2_mkfs_options_t *opt)
+{
+ uint32_t status;
+
+ memset(opt, 0, sizeof(*opt));
+
+ opt->Hammer2Version = get_hammer2_version();
+ opt->Label[opt->NLabels++] = strdup("LOCAL");
+ opt->CompType = HAMMER2_COMP_NEWFS_DEFAULT; /* LZ4 */
+ opt->CheckType = HAMMER2_CHECK_XXHASH64;
+ opt->DefaultLabelType = HAMMER2_LABEL_NONE;
+
+ /*
+ * Generate a filesystem id and lookup the filesystem type
+ */
+ srandomdev();
+ uuidgen(&opt->Hammer2_VolFSID, 1);
+ uuidgen(&opt->Hammer2_SupCLID, 1);
+ uuidgen(&opt->Hammer2_SupFSID, 1);
+ uuid_from_string(HAMMER2_UUID_STRING, &opt->Hammer2_FSType, &status);
+ /*uuid_name_lookup(&Hammer2_FSType, "DragonFly HAMMER2", &status);*/
+ if (status != uuid_s_ok) {
+ errx(1, "uuids file does not have the DragonFly "
+ "HAMMER2 filesystem type");
+ }
+}
+
+void
+hammer2_mkfs_cleanup(hammer2_mkfs_options_t *opt)
+{
+ int i;
+
+ for (i = 0; i < opt->NLabels; i++)
+ free(opt->Label[i]);
+}
+
+static void
+adjust_options(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt)
+{
+ /*
+ * Adjust Label[] and NLabels.
+ */
+ switch (opt->DefaultLabelType) {
+ case HAMMER2_LABEL_BOOT:
+ opt->Label[opt->NLabels++] = strdup("BOOT");
+ break;
+ case HAMMER2_LABEL_ROOT:
+ opt->Label[opt->NLabels++] = strdup("ROOT");
+ break;
+ case HAMMER2_LABEL_DATA:
+ opt->Label[opt->NLabels++] = strdup("DATA");
+ break;
+ case HAMMER2_LABEL_NONE:
+ /* nothing to do */
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /*
+ * Calculate defaults for the boot area size and round to the
+ * volume alignment boundary.
+ *
+ * NOTE: These areas are currently not used for booting but are
+ * reserved for future filesystem expansion.
+ */
+ hammer2_off_t BootAreaSize = opt->BootAreaSize;
+ if (BootAreaSize == 0) {
+ BootAreaSize = HAMMER2_BOOT_NOM_BYTES;
+ while (BootAreaSize > fso->total_size / 20)
+ BootAreaSize >>= 1;
+ if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES)
+ BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
+ } else if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) {
+ BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
+ }
+ BootAreaSize = (BootAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
+ ~HAMMER2_VOLUME_ALIGNMASK64;
+ opt->BootAreaSize = BootAreaSize;
+
+ /*
+ * Calculate defaults for the aux area size and round to the
+ * volume alignment boundary.
+ *
+ * NOTE: These areas are currently not used for logging but are
+ * reserved for future filesystem expansion.
+ */
+ hammer2_off_t AuxAreaSize = opt->AuxAreaSize;
+ if (AuxAreaSize == 0) {
+ AuxAreaSize = HAMMER2_AUX_NOM_BYTES;
+ while (AuxAreaSize > fso->total_size / 20)
+ AuxAreaSize >>= 1;
+ if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES)
+ AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
+ } else if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES) {
+ AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
+ }
+ AuxAreaSize = (AuxAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
+ ~HAMMER2_VOLUME_ALIGNMASK64;
+ opt->AuxAreaSize = AuxAreaSize;
+}
+
+/*
+ * Convert a string to a 64 bit signed integer with various requirements.
+ */
+int64_t
+getsize(const char *str, int64_t minval, int64_t maxval, int powerof2)
+{
+ int64_t val;
+ char *ptr;
+
+ val = strtoll(str, &ptr, 0);
+ switch(*ptr) {
+ case 't':
+ case 'T':
+ val *= 1024;
+ /* fall through */
+ case 'g':
+ case 'G':
+ val *= 1024;
+ /* fall through */
+ case 'm':
+ case 'M':
+ val *= 1024;
+ /* fall through */
+ case 'k':
+ case 'K':
+ val *= 1024;
+ break;
+ default:
+ errx(1, "Unknown suffix in number '%s'", str);
+ /* not reached */
+ }
+ if (ptr[1]) {
+ errx(1, "Unknown suffix in number '%s'", str);
+ /* not reached */
+ }
+ if (val < minval) {
+ errx(1, "Value too small: %s, min is %s",
+ str, sizetostr(minval));
+ /* not reached */
+ }
+ if (val > maxval) {
+ errx(1, "Value too large: %s, max is %s",
+ str, sizetostr(maxval));
+ /* not reached */
+ }
+ if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) {
+ errx(1, "Value not power of 2: %s", str);
+ /* not reached */
+ }
+ if ((powerof2 & 2) && (val & HAMMER2_NEWFS_ALIGNMASK)) {
+ errx(1, "Value not an integral multiple of %dK: %s",
+ HAMMER2_NEWFS_ALIGN / 1024, str);
+ /* not reached */
+ }
+ return(val);
+}
+
+static uint64_t
+nowtime(void)
+{
+ struct timeval tv;
+ uint64_t xtime;
+
+ gettimeofday(&tv, NULL);
+ xtime = tv.tv_sec * 1000000LL + tv.tv_usec;
+ return(xtime);
+}
+
+static hammer2_off_t
+format_hammer2_misc(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt,
+ hammer2_off_t boot_base, hammer2_off_t aux_base)
+{
+ char *buf = malloc(HAMMER2_PBUFSIZE);
+ hammer2_off_t alloc_base = aux_base + opt->AuxAreaSize;
+ hammer2_off_t tmp_base;
+ size_t n;
+ int i;
+
+ /*
+ * Clear the entire 4MB reserve for the first 2G zone.
+ */
+ bzero(buf, HAMMER2_PBUFSIZE);
+ tmp_base = 0;
+ for (i = 0; i < HAMMER2_ZONE_BLOCKS_SEG; ++i) {
+ n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
+ if (n != HAMMER2_PBUFSIZE) {
+ perror("write");
+ exit(1);
+ }
+ tmp_base += HAMMER2_PBUFSIZE;
+ }
+
+ /*
+ * Make sure alloc_base won't cross the reserved area at the
+ * beginning of each 1GB.
+ *
+ * Reserve space for the super-root inode and the root inode.
+ * Make sure they are in the same 64K block to simplify our code.
+ */
+ assert((alloc_base & HAMMER2_PBUFMASK) == 0);
+ assert(alloc_base < HAMMER2_FREEMAP_LEVEL1_SIZE);
+
+ /*
+ * Clear the boot/aux area.
+ */
+ for (tmp_base = boot_base; tmp_base < alloc_base;
+ tmp_base += HAMMER2_PBUFSIZE) {
+ n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
+ if (n != HAMMER2_PBUFSIZE) {
+ perror("write (boot/aux)");
+ exit(1);
+ }
+ }
+
+ free(buf);
+ return(alloc_base);
+}
+
+static hammer2_off_t
+format_hammer2_inode(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt,
+ hammer2_blockref_t *sroot_blockrefp,
+ hammer2_off_t alloc_base)
+{
+ char *buf = malloc(HAMMER2_PBUFSIZE);
+ hammer2_inode_data_t *rawip;
+ hammer2_blockref_t sroot_blockref;
+ hammer2_blockref_t root_blockref[MAXLABELS];
+ uint64_t now;
+ size_t n;
+ int i;
+
+ bzero(buf, HAMMER2_PBUFSIZE);
+ bzero(&sroot_blockref, sizeof(sroot_blockref));
+ bzero(root_blockref, sizeof(root_blockref));
+ now = nowtime();
+ alloc_base &= ~HAMMER2_PBUFMASK64;
+ alloc_direct(&alloc_base, &sroot_blockref, HAMMER2_INODE_BYTES);
+
+ for (i = 0; i < opt->NLabels; ++i) {
+ uuidgen(&opt->Hammer2_PfsCLID[i], 1);
+ uuidgen(&opt->Hammer2_PfsFSID[i], 1);
+
+ alloc_direct(&alloc_base, &root_blockref[i],
+ HAMMER2_INODE_BYTES);
+ assert(((sroot_blockref.data_off ^ root_blockref[i].data_off) &
+ ~HAMMER2_PBUFMASK64) == 0);
+
+ /*
+ * Format the root directory inode, which is left empty.
+ */
+ rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO &
+ root_blockref[i].data_off));
+ rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
+ rawip->meta.ctime = now;
+ rawip->meta.mtime = now;
+ /* rawip->atime = now; NOT IMPL MUST BE ZERO */
+ rawip->meta.btime = now;
+ rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
+ rawip->meta.mode = 0755;
+ rawip->meta.inum = 1; /* root inode, inumber 1 */
+ rawip->meta.nlinks = 1; /* directory link count compat */
+
+ rawip->meta.name_len = strlen(opt->Label[i]);
+ bcopy(opt->Label[i], rawip->filename, rawip->meta.name_len);
+ rawip->meta.name_key =
+ dirhash(rawip->filename, rawip->meta.name_len);
+
+ /*
+ * Compression mode and supported copyids.
+ *
+ * Do not allow compression when creating any "BOOT" label
+ * (pfs-create also does the same if the pfs is named "BOOT")
+ */
+ if (strcasecmp(opt->Label[i], "BOOT") == 0) {
+ rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
+ HAMMER2_COMP_AUTOZERO);
+ rawip->meta.check_algo = HAMMER2_ENC_ALGO(
+ HAMMER2_CHECK_XXHASH64);
+ } else {
+ rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
+ opt->CompType);
+ rawip->meta.check_algo = HAMMER2_ENC_ALGO(
+ HAMMER2_CHECK_XXHASH64);
+ }
+
+ /*
+ * NOTE: We leave nmasters set to 0, which means that we
+ * don't know how many masters there are. The quorum
+ * calculation will effectively be 1 ( 0 / 2 + 1 ).
+ */
+ rawip->meta.pfs_clid = opt->Hammer2_PfsCLID[i];
+ rawip->meta.pfs_fsid = opt->Hammer2_PfsFSID[i];
+ rawip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER;
+ rawip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT;
+
+ /* first allocatable inode number */
+ rawip->meta.pfs_inum = 16;
+
+ /* rawip->u.blockset is left empty */
+
+ /*
+ * The root blockref will be stored in the super-root inode as
+ * one of the ~4 PFS root directories. The copyid here is the
+ * actual copyid of the storage ref.
+ *
+ * The key field for a PFS root directory's blockref is
+ * essentially the name key for the entry.
+ */
+ root_blockref[i].key = rawip->meta.name_key;
+ root_blockref[i].copyid = HAMMER2_COPYID_LOCAL;
+ root_blockref[i].keybits = 0;
+ root_blockref[i].check.xxhash64.value =
+ XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
+ root_blockref[i].type = HAMMER2_BREF_TYPE_INODE;
+ root_blockref[i].methods =
+ HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
+ HAMMER2_ENC_COMP(HAMMER2_COMP_NONE);
+ root_blockref[i].mirror_tid = 16;
+ root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT;
+ }
+
+ /*
+ * Format the super-root directory inode, giving it ~4 PFS root
+ * directories (root_blockref).
+ *
+ * The superroot contains ~4 directories pointing at the PFS root
+ * inodes (named via the label). Inodes contain one blockset which
+ * is fully associative so we can put the entry anywhere without
+ * having to worry about the hash. Use index 0.
+ */
+ rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & sroot_blockref.data_off));
+ rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
+ rawip->meta.ctime = now;
+ rawip->meta.mtime = now;
+ /* rawip->meta.atime = now; NOT IMPL MUST BE ZERO */
+ rawip->meta.btime = now;
+ rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
+ rawip->meta.mode = 0700; /* super-root - root only */
+ rawip->meta.inum = 0; /* super root inode, inumber 0 */
+ rawip->meta.nlinks = 2; /* directory link count compat */
+
+ rawip->meta.name_len = 0; /* super-root is unnamed */
+ rawip->meta.name_key = 0;
+
+ rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO);
+ rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64);
+
+ /*
+ * The super-root is flagged as a PFS and typically given its own
+ * random FSID, making it possible to mirror an entire HAMMER2 disk
+ * snapshots and all if desired. PFS ids are used to match up
+ * mirror sources and targets and cluster copy sources and targets.
+ *
+ * (XXX whole-disk logical mirroring is not really supported in
+ * the first attempt because each PFS is in its own modify/mirror
+ * transaction id domain, so normal mechanics cannot cross a PFS
+ * boundary).
+ */
+ rawip->meta.pfs_clid = opt->Hammer2_SupCLID;
+ rawip->meta.pfs_fsid = opt->Hammer2_SupFSID;
+ rawip->meta.pfs_type = HAMMER2_PFSTYPE_SUPROOT;
+ snprintf((char*)rawip->filename, sizeof(rawip->filename), "SUPROOT");
+ rawip->meta.name_key = 0;
+ rawip->meta.name_len = strlen((char*)rawip->filename);
+
+ /* The super-root has an inode number of 0 */
+ rawip->meta.pfs_inum = 0;
+
+ /*
+ * Currently newfs_hammer2 just throws the PFS inodes into the
+ * top-level block table at the volume root and doesn't try to
+ * create an indirect block, so we are limited to ~4 at filesystem
+ * creation time. More can be added after mounting.
+ */
+ qsort(root_blockref, opt->NLabels, sizeof(root_blockref[0]), blkrefary_cmp);
+ for (i = 0; i < opt->NLabels; ++i)
+ rawip->u.blockset.blockref[i] = root_blockref[i];
+
+ /*
+ * The sroot blockref will be stored in the volume header.
+ */
+ sroot_blockref.copyid = HAMMER2_COPYID_LOCAL;
+ sroot_blockref.keybits = 0;
+ sroot_blockref.check.xxhash64.value =
+ XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
+ sroot_blockref.type = HAMMER2_BREF_TYPE_INODE;
+ sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
+ HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO);
+ sroot_blockref.mirror_tid = 16;
+ rawip = NULL;
+
+ /*
+ * Write out the 64K HAMMER2 block containing the root and sroot.
+ */
+ assert((sroot_blockref.data_off & ~HAMMER2_PBUFMASK64) ==
+ ((alloc_base - 1) & ~HAMMER2_PBUFMASK64));
+ n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
+ sroot_blockref.data_off & ~HAMMER2_PBUFMASK64);
+ if (n != HAMMER2_PBUFSIZE) {
+ perror("write");
+ exit(1);
+ }
+ *sroot_blockrefp = sroot_blockref;
+
+ free(buf);
+ return(alloc_base);
+}
+
+/*
+ * Create the volume header, the super-root directory inode, and
+ * the writable snapshot subdirectory (named via the label) which
+ * is to be the initial mount point, or at least the first mount point.
+ * newfs_hammer2 doesn't format the freemap bitmaps for these.
+ *
+ * 0 4MB
+ * [----reserved_area----][boot_area][aux_area]
+ * [[vol_hdr][freemap]...] [sroot][root][root]...
+ * \ ^\ ^ ^
+ * \--------------------------------------/ \---/-----/---...
+ *
+ * NOTE: The total size is 8MB-aligned to avoid edge cases.
+ */
+static void
+format_hammer2(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt, int index)
+{
+ char *buf = malloc(HAMMER2_PBUFSIZE);
+ hammer2_volume_t *vol = &fso->volumes[index];
+ hammer2_volume_data_t *voldata;
+ hammer2_blockset_t sroot_blockset;
+ hammer2_off_t boot_base = HAMMER2_ZONE_SEG;
+ hammer2_off_t aux_base = boot_base + opt->BootAreaSize;
+ hammer2_off_t alloc_base;
+ size_t n;
+ int i;
+
+ /*
+ * Make sure we can write to the last usable block.
+ */
+ bzero(buf, HAMMER2_PBUFSIZE);
+ n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
+ vol->size - HAMMER2_PBUFSIZE);
+ if (n != HAMMER2_PBUFSIZE) {
+ perror("write (at-end-of-volume)");
+ exit(1);
+ }
+
+ /*
+ * Format misc area and sroot/root inodes for the root volume.
+ */
+ bzero(&sroot_blockset, sizeof(sroot_blockset));
+ if (vol->id == HAMMER2_ROOT_VOLUME) {
+ alloc_base = format_hammer2_misc(vol, opt, boot_base, aux_base);
+ alloc_base = format_hammer2_inode(vol, opt,
+ &sroot_blockset.blockref[0],
+ alloc_base);
+ } else {
+ alloc_base = 0;
+ for (i = 0; i < HAMMER2_SET_COUNT; ++i)
+ sroot_blockset.blockref[i].type = HAMMER2_BREF_TYPE_INVALID;
+ }
+
+ /*
+ * Format the volume header.
+ *
+ * The volume header points to sroot_blockset. Also be absolutely
+ * sure that allocator_beg is set for the root volume.
+ */
+ assert(HAMMER2_VOLUME_BYTES <= HAMMER2_PBUFSIZE);
+ bzero(buf, HAMMER2_PBUFSIZE);
+ voldata = (void *)buf;
+
+ voldata->magic = HAMMER2_VOLUME_ID_HBO;
+ if (vol->id == HAMMER2_ROOT_VOLUME) {
+ voldata->boot_beg = boot_base;
+ voldata->boot_end = boot_base + opt->BootAreaSize;
+ voldata->aux_beg = aux_base;
+ voldata->aux_end = aux_base + opt->AuxAreaSize;
+ }
+ voldata->volu_size = vol->size;
+ voldata->version = opt->Hammer2Version;
+ voldata->flags = 0;
+
+ if (voldata->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES) {
+ voldata->volu_id = vol->id;
+ voldata->nvolumes = fso->nvolumes;
+ voldata->total_size = fso->total_size;
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ if (i < fso->nvolumes)
+ voldata->volu_loff[i] = fso->volumes[i].offset;
+ else
+ voldata->volu_loff[i] = (hammer2_off_t)-1;
+ }
+ }
+
+ voldata->fsid = opt->Hammer2_VolFSID;
+ voldata->fstype = opt->Hammer2_FSType;
+
+#define DMSG_PEER_HAMMER2 3 /* server: h2 mounted volume */
+ voldata->peer_type = DMSG_PEER_HAMMER2; /* LNK_CONN identification */
+
+ assert(vol->id == HAMMER2_ROOT_VOLUME || alloc_base == 0);
+ voldata->allocator_size = fso->free_size;
+ if (vol->id == HAMMER2_ROOT_VOLUME) {
+ voldata->allocator_free = fso->free_size;
+ voldata->allocator_beg = alloc_base;
+ }
+
+ voldata->sroot_blockset = sroot_blockset;
+ voldata->mirror_tid = 16; /* all blockref mirror TIDs set to 16 */
+ voldata->freemap_tid = 16; /* all blockref mirror TIDs set to 16 */
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1] =
+ hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE);
+
+ /*
+ * Set ICRC_SECT0 after all remaining elements of sect0 have been
+ * populated in the volume header. Note hat ICRC_SECT* (except for
+ * SECT0) are part of sect0.
+ */
+ voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0] =
+ hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE);
+ voldata->icrc_volheader =
+ hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE);
+
+ /*
+ * Write the volume header and all alternates.
+ */
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ if (i * HAMMER2_ZONE_BYTES64 >= vol->size)
+ break;
+ n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
+ i * HAMMER2_ZONE_BYTES64);
+ if (n != HAMMER2_PBUFSIZE) {
+ perror("write");
+ exit(1);
+ }
+ }
+ fsync(vol->fd);
+
+ /*
+ * Cleanup
+ */
+ free(buf);
+}
+
+static void
+alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, size_t bytes)
+{
+ int radix;
+
+ radix = 0;
+ assert(bytes);
+ while ((bytes & 1) == 0) {
+ bytes >>= 1;
+ ++radix;
+ }
+ assert(bytes == 1);
+ if (radix < HAMMER2_RADIX_MIN)
+ radix = HAMMER2_RADIX_MIN;
+
+ bzero(bref, sizeof(*bref));
+ bref->data_off = *basep | radix;
+ bref->vradix = radix;
+
+ *basep += 1U << radix;
+}
+
+static int
+blkrefary_cmp(const void *b1, const void *b2)
+{
+ const hammer2_blockref_t *bref1 = b1;
+ const hammer2_blockref_t *bref2 = b2;
+
+ if (bref1->key < bref2->key)
+ return(-1);
+ if (bref1->key > bref2->key)
+ return(1);
+ return 0;
+}
+
+void
+hammer2_mkfs(int ac, char **av, hammer2_mkfs_options_t *opt)
+{
+ hammer2_off_t reserved_size;
+ hammer2_ondisk_t fso;
+ int i;
+ char *vol_fsid = NULL;
+ char *sup_clid_name = NULL;
+ char *sup_fsid_name = NULL;
+ char *pfs_clid_name = NULL;
+ char *pfs_fsid_name = NULL;
+
+ /*
+ * Sanity check basic filesystem structures. No cookies for us
+ * if it gets broken!
+ */
+ assert(sizeof(hammer2_volume_data_t) == HAMMER2_VOLUME_BYTES);
+ assert(sizeof(hammer2_inode_data_t) == HAMMER2_INODE_BYTES);
+ assert(sizeof(hammer2_blockref_t) == HAMMER2_BLOCKREF_BYTES);
+
+ /*
+ * Construct volumes information.
+ * 1GB alignment (level1 freemap size) for volumes except for the last.
+ * For the last volume, typically 8MB alignment to avoid edge cases for
+ * reserved blocks and so raid stripes (if any) operate efficiently.
+ */
+ hammer2_init_ondisk(&fso);
+ fso.version = opt->Hammer2Version;
+ fso.nvolumes = ac;
+ for (i = 0; i < fso.nvolumes; ++i) {
+ hammer2_volume_t *vol = &fso.volumes[i];
+ hammer2_off_t size;
+ int fd = open(av[i], O_RDWR);
+ if (fd < 0)
+ err(1, "Unable to open %s R+W", av[i]);
+ size = check_volume(fd);
+ if (i == fso.nvolumes - 1)
+ size &= ~HAMMER2_VOLUME_ALIGNMASK64;
+ else
+ size &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
+ hammer2_install_volume(vol, fd, i, av[i], fso.total_size, size);
+ fso.total_size += size;
+ }
+
+ /*
+ * Verify volumes constructed above.
+ */
+ for (i = 0; i < fso.nvolumes; ++i) {
+ hammer2_volume_t *vol = &fso.volumes[i];
+ printf("Volume %-15s size %s\n", vol->path,
+ sizetostr(vol->size));
+ }
+ hammer2_verify_volumes(&fso, NULL);
+
+ /*
+ * Adjust options.
+ */
+ adjust_options(&fso, opt);
+
+ /*
+ * We'll need to stuff this in the volume header soon.
+ */
+ hammer2_uuid_to_str(&opt->Hammer2_VolFSID, &vol_fsid);
+ hammer2_uuid_to_str(&opt->Hammer2_SupCLID, &sup_clid_name);
+ hammer2_uuid_to_str(&opt->Hammer2_SupFSID, &sup_fsid_name);
+
+ /*
+ * Calculate the amount of reserved space. HAMMER2_ZONE_SEG (4MB)
+ * is reserved at the beginning of every 1GB of storage, rounded up.
+ * Thus a 200MB filesystem will still have a 4MB reserve area.
+ *
+ * We also include the boot and aux areas in the reserve. The
+ * reserve is used to help 'df' calculate the amount of available
+ * space.
+ *
+ * XXX I kinda screwed up and made the reserved area on the LEVEL1
+ * boundary rather than the ZONE boundary. LEVEL1 is on 1GB
+ * boundaries rather than 2GB boundaries. Stick with the LEVEL1
+ * boundary.
+ */
+ reserved_size = ((fso.total_size + HAMMER2_FREEMAP_LEVEL1_MASK) /
+ HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64;
+
+ fso.free_size = fso.total_size - reserved_size - opt->BootAreaSize - opt->AuxAreaSize;
+ if ((int64_t)fso.free_size < 0) {
+ fprintf(stderr, "Not enough free space\n");
+ exit(1);
+ }
+
+ /*
+ * Format HAMMER2 volumes.
+ */
+ for (i = 0; i < fso.nvolumes; ++i)
+ format_hammer2(&fso, opt, i);
+
+ printf("---------------------------------------------\n");
+ printf("version: %d\n", opt->Hammer2Version);
+ printf("total-size: %s (%jd bytes)\n",
+ sizetostr(fso.total_size),
+ (intmax_t)fso.total_size);
+ printf("boot-area-size: %s (%jd bytes)\n",
+ sizetostr(opt->BootAreaSize),
+ (intmax_t)opt->BootAreaSize);
+ printf("aux-area-size: %s (%jd bytes)\n",
+ sizetostr(opt->AuxAreaSize),
+ (intmax_t)opt->AuxAreaSize);
+ printf("topo-reserved: %s (%jd bytes)\n",
+ sizetostr(reserved_size),
+ (intmax_t)reserved_size);
+ printf("free-size: %s (%jd bytes)\n",
+ sizetostr(fso.free_size),
+ (intmax_t)fso.free_size);
+ printf("vol-fsid: %s\n", vol_fsid);
+ printf("sup-clid: %s\n", sup_clid_name);
+ printf("sup-fsid: %s\n", sup_fsid_name);
+ for (i = 0; i < opt->NLabels; ++i) {
+ printf("PFS \"%s\"\n", opt->Label[i]);
+ hammer2_uuid_to_str(&opt->Hammer2_PfsCLID[i], &pfs_clid_name);
+ hammer2_uuid_to_str(&opt->Hammer2_PfsFSID[i], &pfs_fsid_name);
+ printf(" clid %s\n", pfs_clid_name);
+ printf(" fsid %s\n", pfs_fsid_name);
+ }
+ if (opt->DebugOpt) {
+ printf("---------------------------------------------\n");
+ hammer2_print_volumes(&fso);
+ }
+
+ free(vol_fsid);
+ free(sup_clid_name);
+ free(sup_fsid_name);
+ free(pfs_clid_name);
+ free(pfs_fsid_name);
+
+ for (i = 0; i < fso.nvolumes; ++i)
+ hammer2_uninstall_volume(&fso.volumes[i]);
+}
diff --git a/sbin/newfs_hammer2/newfs_hammer2.8 b/sbin/newfs_hammer2/newfs_hammer2.8
new file mode 100644
--- /dev/null
+++ b/sbin/newfs_hammer2/newfs_hammer2.8
@@ -0,0 +1,194 @@
+.\" Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
+.\"
+.\" This code is derived from software contributed to The DragonFly Project
+.\" by Matthew Dillon <dillon@backplane.com>
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in
+.\" the documentation and/or other materials provided with the
+.\" distribution.
+.\" 3. Neither the name of The DragonFly Project nor the names of its
+.\" contributors may be used to endorse or promote products derived
+.\" from this software without specific, prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd September 18, 2022
+.Dt NEWFS_HAMMER2 8
+.Os
+.Sh NAME
+.Nm newfs_hammer2
+.Nd construct a new HAMMER2 file system
+.Sh SYNOPSIS
+.Nm
+.Op Fl b Ar bootsize
+.Op Fl r Ar auxsize
+.Op Fl V Ar version
+.Op Fl L Ar label ...
+.Ar special ...
+.Sh DESCRIPTION
+The
+.Nm
+utility prepares a
+.Nm HAMMER2
+volume on the specified block device.
+.Nm HAMMER2
+volumes can contain any number of named PFSs (Pseudo FileSystems).
+.Pp
+The
+.Nm
+program always creates a PFS called "LOCAL" which may be used for
+device-specific configuration. This PFS should never be used for generic
+data.
+.Pp
+If no
+.Fl L
+option is specified,
+.Nm
+will create "DATA".
+.Pp
+You can override the default PFS name by specifying one or more
+.Fl L
+options.
+.Pp
+You can specify
+.Fl L Ar none
+if you do not want
+.Nm
+to create any PFSs other than "LOCAL".
+.Pp
+.Nm HAMMER2
+file systems are sector-size agnostic, however the
+.Dx
+implementation requires the sector size to be no larger than 16K.
+.Nm HAMMER2
+file systems start at a relative offset of 0 and leave no room for
+in-band disklabels (old, traditional BSD labels).
+They may only be created using out-of-band disk labels, which is the
+default on
+.Dx
+via
+.Po
+.Xr disklabel 5
+or
+.Xr gpt 8
+labels
+.Pc ,
+or with
+old style disklabels as long as
+the partition does not overlap the label area (have a starting sector
+greater than 16).
+.Pp
+.Nm HAMMER2
+file systems are designed for large storage systems, up to 1 Exabyte, and
+may not operate efficiently on small storage systems.
+The minimum recommended file system size is 50GB.
+In addition,
+.Nm HAMMER2
+file systems operating normally, with automatic snapshots, do not
+immediately reclaim space when files are deleted.
+A regular system maintenance job runs once a day by
+.Xr periodic 8
+to handle reclamation.
+.Pp
+.Nm HAMMER2
+works best when the machine's normal workload would not otherwise fill
+the file system up in the course of 60 days of operation.
+.Pp
+The options are as follows:
+.Bl -tag -width indent
+.It Fl b Ar bootsize
+Specify a fixed area in which a boot related kernel and data can be stored.
+The
+.Ar bootsize
+is specified in bytes.
+By default a boot area of approximately 64MB will be created.
+This area is not currently used for booting and may be repurposed in the
+future.
+.It Fl r Ar auxsize
+Specify a fixed area in which an aux related kernel and data can be stored.
+The
+.Ar auxsize
+is specified in bytes.
+By default an aux area of approximately 256MB will be created.
+This area is not currently used and may be repurposed in the
+future.
+.It Fl V Ar version
+Specify the
+.Nm HAMMER2
+file system version to format.
+By default
+.Nm
+formats the file system using the highest production version number
+supported by the
+.Nm HAMMER2
+VFS by checking the
+.Va vfs.hammer2.supported_version
+sysctl.
+If you need to maintain compatibility with an older version of
+.Nm HAMMER2
+you may specify the version with this option.
+.It Fl L Ar label
+By default
+.Nm
+always creates a local master PFSs on the new volume called "LOCAL"
+and "DATA".
+.Pp
+If you specify one or more label options to create your own named local
+PFSs,
+.Nm
+will not create any conditional PFSs.
+However, "LOCAL" is still always created and should not be
+specified with this option.
+If you don't want any PFSs to be created (other than "LOCAL"), use
+.Fl L Ar none .
+.El
+.Pp
+The
+.Ar bootsize
+and
+.Ar auxsize
+must be given with a suffix of
+.Cm K , M , G
+or
+.Cm T
+meaning kilobyte, megabyte, gigabyte and terabyte.
+Lower case can also be used for suffix.
+These options create reserved blocks of space on the target volume
+but are not currently used by the filesystem for anything.
+.Sh SEE ALSO
+.Xr hammer2 8 ,
+.Xr mount_hammer2 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Dx 3.1
+but was not enabled unconditionally until
+.Dx 4.9 .
+.Sh AUTHORS
+.An Matthew Dillon Aq Mt dillon@backplane.com
+.Pp
+The
+.Nm
+utility was ported to
+.Fx
+by
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org .
diff --git a/sbin/newfs_hammer2/newfs_hammer2.c b/sbin/newfs_hammer2/newfs_hammer2.c
new file mode 100644
--- /dev/null
+++ b/sbin/newfs_hammer2/newfs_hammer2.c
@@ -0,0 +1,154 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <err.h>
+
+#include "mkfs_hammer2.h"
+
+static void usage(void);
+
+int
+main(int ac, char **av)
+{
+ hammer2_mkfs_options_t opt;
+ int ch;
+ int label_specified = 0;
+
+ /*
+ * Initialize option structure.
+ */
+ hammer2_mkfs_init(&opt);
+
+ /*
+ * Parse arguments.
+ */
+ while ((ch = getopt(ac, av, "L:b:r:V:d")) != -1) {
+ switch(ch) {
+ case 'b':
+ opt.BootAreaSize = getsize(optarg,
+ HAMMER2_NEWFS_ALIGN,
+ HAMMER2_BOOT_MAX_BYTES, 2);
+ break;
+ case 'r':
+ opt.AuxAreaSize = getsize(optarg,
+ HAMMER2_NEWFS_ALIGN,
+ HAMMER2_AUX_MAX_BYTES, 2);
+ break;
+ case 'V':
+ opt.Hammer2Version = strtol(optarg, NULL, 0);
+ if (opt.Hammer2Version < HAMMER2_VOL_VERSION_MIN ||
+ opt.Hammer2Version >= HAMMER2_VOL_VERSION_WIP) {
+ errx(1, "I don't understand how to format "
+ "HAMMER2 version %d",
+ opt.Hammer2Version);
+ }
+ break;
+ case 'L':
+ label_specified = 1;
+ if (strcasecmp(optarg, "none") == 0) {
+ break;
+ }
+ if (opt.NLabels >= MAXLABELS) {
+ errx(1, "Limit of %d local labels",
+ MAXLABELS - 1);
+ }
+ if (strlen(optarg) == 0) {
+ errx(1, "Volume label '%s' cannot be 0-length",
+ optarg);
+ }
+ if (strlen(optarg) >= HAMMER2_INODE_MAXNAME) {
+ errx(1, "Volume label '%s' is too long "
+ "(%d chars max)",
+ optarg,
+ HAMMER2_INODE_MAXNAME - 1);
+ }
+ opt.Label[opt.NLabels++] = strdup(optarg);
+ break;
+ case 'd':
+ opt.DebugOpt = 1;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ ac -= optind;
+ av += optind;
+
+ if (ac == 0)
+ errx(1, "You must specify at least one disk device");
+ if (ac > HAMMER2_MAX_VOLUMES)
+ errx(1, "The maximum number of volumes is %d",
+ HAMMER2_MAX_VOLUMES);
+
+ /*
+ * Check default label type.
+ */
+ if (!label_specified)
+ opt.DefaultLabelType = HAMMER2_LABEL_DATA;
+
+ /*
+ * Create Hammer2 filesystem.
+ */
+ hammer2_mkfs(ac, av, &opt);
+
+ /*
+ * Cleanup option structure.
+ */
+ hammer2_mkfs_cleanup(&opt);
+
+ return(0);
+}
+
+static
+void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: newfs_hammer2 [-b bootsize] [-r auxsize] "
+ "[-V version] [-L label ...] special ...\n"
+ );
+ exit(1);
+}
diff --git a/sys/fs/hammer2/hammer2.h b/sys/fs/hammer2/hammer2.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2.h
@@ -0,0 +1,736 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * HAMMER2 in-memory cache of media structures.
+ *
+ * This header file contains structures used internally by the HAMMER2
+ * implementation. See hammer2_disk.h for on-disk structures.
+ *
+ * There is an in-memory representation of all on-media data structure.
+ * Almost everything is represented by a hammer2_chain structure in-memory.
+ * Other higher-level structures typically map to chains.
+ *
+ * A great deal of data is accessed simply via its buffer cache buffer,
+ * which is mapped for the duration of the chain's lock. HAMMER2 must
+ * implement its own buffer cache layer on top of the system layer to
+ * allow for different threads to lock different sub-block-sized buffers.
+ *
+ * The in-memory representation may remain cached even after the related
+ * data has been detached.
+ */
+
+#ifndef _FS_HAMMER2_HAMMER2_H_
+#define _FS_HAMMER2_HAMMER2_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/gsb_crc32.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sx.h>
+#include <sys/tree.h>
+#include <sys/uuid.h>
+#include <sys/vnode.h>
+
+#include <machine/atomic.h>
+#include <vm/uma.h>
+
+#include "hammer2_disk.h"
+#include "hammer2_rb.h"
+
+/* KASSERT variant from DragonFly */
+#ifdef INVARIANTS
+#define KKASSERT(exp) do { if (__predict_false(!(exp))) \
+ panic("assertion \"%s\" failed " \
+ "in %s at %s:%u", #exp, __func__, \
+ __FILE__, __LINE__); } while (0)
+#else
+#define KKASSERT(exp) do { } while (0)
+#endif
+
+/* printf(9) variants for HAMMER2 */
+#ifdef INVARIANTS
+#define HFMT "%s(%s|%d): "
+#define HARGS __func__, \
+ curproc ? curproc->p_comm : "-", \
+ curthread ? curthread->td_tid : -1
+#else
+#define HFMT "%s: "
+#define HARGS __func__
+#endif
+
+#define hprintf(X, ...) printf(HFMT X, HARGS, ## __VA_ARGS__)
+#define hpanic(X, ...) panic(HFMT X, HARGS, ## __VA_ARGS__)
+
+#ifdef INVARIANTS
+#define debug_hprintf hprintf
+#else
+#define debug_hprintf(X, ...) do { } while (0)
+#endif
+
+struct hammer2_chain;
+struct hammer2_dev;
+struct hammer2_inode;
+struct hammer2_io;
+struct hammer2_pfs;
+union hammer2_xop;
+
+typedef struct hammer2_chain hammer2_chain_t;
+typedef struct hammer2_dev hammer2_dev_t;
+typedef struct hammer2_inode hammer2_inode_t;
+typedef struct hammer2_io hammer2_io_t;
+typedef struct hammer2_pfs hammer2_pfs_t;
+typedef union hammer2_xop hammer2_xop_t;
+
+/*
+ * Mutex and lock shims.
+ * Normal synchronous non-abortable locks can be substituted for spinlocks.
+ * FreeBSD HAMMER2 currently uses sx(9) for both mtx and spinlock.
+ */
+typedef struct sx hammer2_mtx_t;
+
+/* Zero on success. */
+#define hammer2_mtx_init(p, s) sx_init(p, s)
+#define hammer2_mtx_init_recurse(p, s) sx_init_flags(p, s, SX_RECURSE)
+#define hammer2_mtx_ex(p) sx_xlock(p)
+#define hammer2_mtx_ex_try(p) (!sx_try_xlock(p))
+#define hammer2_mtx_sh(p) sx_slock(p)
+#define hammer2_mtx_sh_try(p) (!sx_try_slock(p))
+#define hammer2_mtx_unlock(p) sx_unlock(p)
+#define hammer2_mtx_destroy(p) sx_destroy(p)
+#define hammer2_mtx_sleep(c, p, s) sx_sleep(c, p, 0, s, 0)
+#define hammer2_mtx_wakeup(c) wakeup(c)
+
+/* sx_try_upgrade panics on INVARIANTS if already exclusively locked. */
+#define hammer2_mtx_upgrade_try(p) (!sx_try_upgrade(p))
+
+/* Non-zero if exclusively locked by the calling thread. */
+#define hammer2_mtx_owned(p) sx_xlocked(p)
+
+#define hammer2_mtx_assert_locked(p) sx_assert(p, SA_LOCKED)
+#define hammer2_mtx_assert_unlocked(p) sx_assert(p, SA_UNLOCKED)
+#define hammer2_mtx_assert_ex(p) sx_assert(p, SA_XLOCKED)
+#define hammer2_mtx_assert_sh(p) sx_assert(p, SA_SLOCKED)
+
+typedef struct sx hammer2_spin_t;
+
+/* Zero on success. */
+#define hammer2_spin_init(p, s) sx_init(p, s)
+#define hammer2_spin_ex(p) sx_xlock(p)
+#define hammer2_spin_sh(p) sx_slock(p)
+#define hammer2_spin_unex(p) sx_xunlock(p)
+#define hammer2_spin_unsh(p) sx_sunlock(p)
+#define hammer2_spin_destroy(p) sx_destroy(p)
+
+#define hammer2_spin_assert_locked(p) sx_assert(p, SA_LOCKED)
+#define hammer2_spin_assert_unlocked(p) sx_assert(p, SA_UNLOCKED)
+#define hammer2_spin_assert_ex(p) sx_assert(p, SA_XLOCKED)
+#define hammer2_spin_assert_sh(p) sx_assert(p, SA_SLOCKED)
+
+/* per HAMMER2 list of device vnode */
+TAILQ_HEAD(hammer2_devvp_list, hammer2_devvp); /* <-> hammer2_devvp::entry */
+typedef struct hammer2_devvp_list hammer2_devvp_list_t;
+
+/* per PFS list of LRU chain */
+TAILQ_HEAD(hammer2_chain_list, hammer2_chain); /* <-> hammer2_chain::entry */
+typedef struct hammer2_chain_list hammer2_chain_list_t;
+
+/* per PFS list of inode */
+LIST_HEAD(hammer2_ipdep_list, hammer2_inode); /* <-> hammer2_inode::entry */
+typedef struct hammer2_ipdep_list hammer2_ipdep_list_t;
+
+/* per HAMMER2 rbtree of dio */
+RB_HEAD(hammer2_io_tree, hammer2_io); /* <-> hammer2_io::rbnode */
+typedef struct hammer2_io_tree hammer2_io_tree_t;
+
+/* per PFS rbtree of inode */
+RB_HEAD(hammer2_inode_tree, hammer2_inode); /* <-> hammer2_inode::rbnode */
+typedef struct hammer2_inode_tree hammer2_inode_tree_t;
+
+/* per chain rbtree of sub-chain */
+RB_HEAD(hammer2_chain_tree, hammer2_chain); /* <-> hammer2_chain::rbnode */
+typedef struct hammer2_chain_tree hammer2_chain_tree_t;
+
+/*
+ * HAMMER2 dio - Management structure wrapping system buffer cache.
+ *
+ * HAMMER2 uses an I/O abstraction that allows it to cache and manipulate
+ * fixed-sized filesystem buffers frontend by variable-sized hammer2_chain
+ * structures.
+ */
+struct hammer2_io {
+ RB_ENTRY(hammer2_io) rbnode; /* indexed by device offset */
+ hammer2_mtx_t lock;
+ hammer2_dev_t *hmp;
+ struct vnode *devvp;
+ struct buf *bp;
+ unsigned int refs;
+ off_t dbase; /* offset of devvp within volumes */
+ off_t pbase;
+ int psize;
+ int act; /* activity */
+ int ticks;
+ int error;
+};
+
+#define HAMMER2_DIO_GOOD 0x40000000U /* dio->bp is stable */
+#define HAMMER2_DIO_MASK 0x00FFFFFFU
+
+/*
+ * The chain structure tracks a portion of the media topology from the
+ * root (volume) down. Chains represent volumes, inodes, indirect blocks,
+ * data blocks, and freemap nodes and leafs.
+ */
+/*
+ * Core topology for chain (embedded in chain). Protected by a spinlock.
+ */
+struct hammer2_chain_core {
+ hammer2_chain_tree_t rbtree; /* sub-chains */
+ hammer2_spin_t spin;
+ int live_zero; /* blockref array opt */
+ unsigned int chain_count; /* live + deleted chains under core */
+ int generation; /* generation number (inserts only) */
+};
+
+typedef struct hammer2_chain_core hammer2_chain_core_t;
+
+/*
+ * Primary chain structure keeps track of the topology in-memory.
+ */
+struct hammer2_chain {
+ RB_ENTRY(hammer2_chain) rbnode; /* live chain(s) */
+ TAILQ_ENTRY(hammer2_chain) entry; /* 0-refs LRU */
+ hammer2_mtx_t lock;
+ hammer2_mtx_t inp_lock;
+ hammer2_chain_core_t core;
+ hammer2_blockref_t bref;
+ hammer2_dev_t *hmp;
+ hammer2_pfs_t *pmp; /* A PFS or super-root (spmp) */
+ hammer2_chain_t *parent;
+ hammer2_io_t *dio; /* physical data buffer */
+ hammer2_media_data_t *data; /* data pointer shortcut */
+ unsigned int refs;
+ unsigned int lockcnt;
+ unsigned int flags; /* for HAMMER2_CHAIN_xxx */
+ unsigned int bytes; /* physical data size */
+ int error; /* on-lock data error state */
+ int cache_index; /* heur speeds up lookup */
+};
+
+#define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */
+#define HAMMER2_CHAIN_DESTROY 0x00000004
+#define HAMMER2_CHAIN_TESTEDGOOD 0x00000100 /* crc tested good */
+#define HAMMER2_CHAIN_COUNTEDBREFS 0x00002000 /* block table stats */
+#define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */
+#define HAMMER2_CHAIN_ONLRU 0x00008000 /* on LRU list */
+#define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */
+#define HAMMER2_CHAIN_IOINPROG 0x00100000 /* I/O interlock */
+#define HAMMER2_CHAIN_IOSIGNAL 0x00200000 /* I/O interlock */
+#define HAMMER2_CHAIN_LRUHINT 0x01000000 /* was reused */
+
+/*
+ * HAMMER2 error codes, used by chain->error and cluster->error. The error
+ * code is typically set on-lock unless no I/O was requested, and set on
+ * I/O otherwise. If set for a cluster it generally means that the cluster
+ * code could not find a valid copy to present.
+ *
+ * All HAMMER2 error codes are flags and can be accumulated by ORing them
+ * together.
+ *
+ * EIO - An I/O error occurred
+ * CHECK - I/O succeeded but did not match the check code
+ *
+ * NOTE: API allows callers to check zero/non-zero to determine if an error
+ * condition exists.
+ *
+ * NOTE: Chain's data field is usually NULL on an IO error but not necessarily
+ * NULL on other errors. Check chain->error, not chain->data.
+ */
+#define HAMMER2_ERROR_EIO 0x00000001 /* device I/O error */
+#define HAMMER2_ERROR_CHECK 0x00000002 /* check code error */
+#define HAMMER2_ERROR_ENOENT 0x00000040 /* entry not found */
+#define HAMMER2_ERROR_EAGAIN 0x00000100 /* retry */
+#define HAMMER2_ERROR_ABORTED 0x00001000 /* aborted operation */
+
+/*
+ * Flags passed to hammer2_chain_lookup() and hammer2_chain_next().
+ *
+ * NOTES:
+ * SHARED - The input chain is expected to be locked shared,
+ * and the output chain is locked shared.
+ * ALWAYS - Always resolve the data.
+ */
+#define HAMMER2_LOOKUP_SHARED 0x00000100
+#define HAMMER2_LOOKUP_ALWAYS 0x00000800 /* resolve data */
+
+/*
+ * Flags passed to hammer2_chain_lock().
+ */
+#define HAMMER2_RESOLVE_MAYBE 2
+#define HAMMER2_RESOLVE_ALWAYS 3
+#define HAMMER2_RESOLVE_MASK 0x0F
+
+#define HAMMER2_RESOLVE_SHARED 0x10 /* request shared lock */
+#define HAMMER2_RESOLVE_LOCKAGAIN 0x20 /* another shared lock */
+
+/*
+ * HAMMER2 cluster - A set of chains representing the same entity.
+ *
+ * Currently a valid cluster can only have 1 set of chains (nchains)
+ * representing the same entity.
+ */
+#define HAMMER2_XOPFIFO 16
+
+#define HAMMER2_MAXCLUSTER 8
+#define HAMMER2_XOPMASK_VOP ((uint32_t)0x80000000U)
+
+#define HAMMER2_XOPMASK_ALLDONE (HAMMER2_XOPMASK_VOP)
+
+struct hammer2_cluster_item {
+ hammer2_chain_t *chain;
+ uint32_t flags; /* for HAMMER2_CITEM_xxx */
+ int error;
+};
+
+typedef struct hammer2_cluster_item hammer2_cluster_item_t;
+
+#define HAMMER2_CITEM_NULL 0x00000004
+
+struct hammer2_cluster {
+ hammer2_cluster_item_t array[HAMMER2_MAXCLUSTER];
+ hammer2_pfs_t *pmp;
+ hammer2_chain_t *focus; /* current focus (or mod) */
+ int nchains;
+ int error; /* error code valid on lock */
+};
+
+typedef struct hammer2_cluster hammer2_cluster_t;
+
+/*
+ * HAMMER2 inode.
+ */
+struct hammer2_inode {
+ RB_ENTRY(hammer2_inode) rbnode; /* inumber lookup (HL) */
+ LIST_ENTRY(hammer2_inode) entry;
+ hammer2_mtx_t lock; /* inode lock */
+ hammer2_spin_t cluster_spin; /* update cluster */
+ hammer2_cluster_t cluster;
+ hammer2_inode_meta_t meta; /* copy of meta-data */
+ hammer2_pfs_t *pmp; /* PFS mount */
+ struct vnode *vp;
+ unsigned int refs; /* +vpref, +flushref */
+ unsigned int flags; /* for HAMMER2_INODE_xxx */
+};
+
+#define HAMMER2_INODE_ONRBTREE 0x0008
+
+/*
+ * HAMMER2 XOP - container for VOP/XOP operation.
+ *
+ * This structure is used to distribute a VOP operation across multiple
+ * nodes. In FreeBSD HAMMER2, XOP is currently just a function called by
+ * VOP to handle chains.
+ */
+typedef void (*hammer2_xop_func_t)(union hammer2_xop *, int);
+
+struct hammer2_xop_desc {
+ hammer2_xop_func_t storage_func; /* local storage function */
+ const char *id;
+};
+
+typedef struct hammer2_xop_desc hammer2_xop_desc_t;
+
+struct hammer2_xop_fifo {
+ hammer2_chain_t **array;
+ int *errors;
+ int ri;
+ int wi;
+ int flags;
+};
+
+typedef struct hammer2_xop_fifo hammer2_xop_fifo_t;
+
+struct hammer2_xop_head {
+ hammer2_xop_fifo_t collect[HAMMER2_MAXCLUSTER];
+ hammer2_cluster_t cluster;
+ hammer2_xop_desc_t *desc;
+ hammer2_inode_t *ip1;
+ hammer2_io_t *focus_dio;
+ hammer2_key_t collect_key;
+ uint32_t run_mask;
+ uint32_t chk_mask;
+ int fifo_size;
+ int error;
+ char *name1;
+ size_t name1_len;
+};
+
+typedef struct hammer2_xop_head hammer2_xop_head_t;
+
+#define fifo_mask(xop_head) ((xop_head)->fifo_size - 1)
+
+struct hammer2_xop_readdir {
+ hammer2_xop_head_t head;
+ hammer2_key_t lkey;
+};
+
+struct hammer2_xop_nresolve {
+ hammer2_xop_head_t head;
+};
+
+struct hammer2_xop_lookup {
+ hammer2_xop_head_t head;
+ hammer2_key_t lhc;
+};
+
+struct hammer2_xop_bmap {
+ hammer2_xop_head_t head;
+ daddr_t lbn;
+ daddr_t pbn;
+ int runp;
+ int runb;
+};
+
+struct hammer2_xop_strategy {
+ hammer2_xop_head_t head;
+ hammer2_key_t lbase;
+ struct buf *bp;
+};
+
+typedef struct hammer2_xop_readdir hammer2_xop_readdir_t;
+typedef struct hammer2_xop_nresolve hammer2_xop_nresolve_t;
+typedef struct hammer2_xop_lookup hammer2_xop_lookup_t;
+typedef struct hammer2_xop_bmap hammer2_xop_bmap_t;
+typedef struct hammer2_xop_strategy hammer2_xop_strategy_t;
+
+union hammer2_xop {
+ hammer2_xop_head_t head;
+ hammer2_xop_readdir_t xop_readdir;
+ hammer2_xop_nresolve_t xop_nresolve;
+ hammer2_xop_lookup_t xop_lookup;
+ hammer2_xop_bmap_t xop_bmap;
+ hammer2_xop_strategy_t xop_strategy;
+};
+
+/*
+ * Device vnode management structure.
+ */
+struct hammer2_devvp {
+ TAILQ_ENTRY(hammer2_devvp) entry;
+ struct vnode *devvp; /* device vnode */
+ char *path; /* device vnode path */
+ int open; /* 1 if devvp open */
+};
+
+typedef struct hammer2_devvp hammer2_devvp_t;
+
+
+/*
+ * Volume management structure.
+ */
+struct hammer2_volume {
+ hammer2_devvp_t *dev; /* device vnode management */
+ hammer2_off_t offset; /* offset within volumes */
+ hammer2_off_t size; /* volume size */
+ int id; /* volume id */
+};
+
+typedef struct hammer2_volume hammer2_volume_t;
+
+/*
+ * Global (per partition) management structure, represents a hard block
+ * device. Typically referenced by hammer2_chain structures when applicable.
+ *
+ * Note that a single hammer2_dev can be indirectly tied to multiple system
+ * mount points. There is no direct relationship. System mounts are
+ * per-cluster-id, not per-block-device, and a single hard mount might contain
+ * many PFSs.
+ */
+struct hammer2_dev {
+ TAILQ_ENTRY(hammer2_dev) mntentry; /* hammer2_mntlist */
+ hammer2_devvp_list_t devvp_list; /* list of device vnodes including *devvp */
+ hammer2_io_tree_t iotree;
+ hammer2_mtx_t iotree_lock; /* iotree, iolruq access */
+ hammer2_pfs_t *spmp; /* super-root pmp for transactions */
+ struct vnode *devvp; /* device vnode for root volume */
+ hammer2_chain_t vchain; /* anchor chain (topology) */
+ hammer2_volume_data_t voldata;
+ hammer2_volume_t volumes[HAMMER2_MAX_VOLUMES]; /* list of volumes */
+ hammer2_off_t total_size; /* total size of volumes */
+ uint32_t hflags; /* HMNT2 flags applicable to device */
+ int mount_count; /* number of actively mounted PFSs */
+ int nvolumes; /* total number of volumes */
+ int iofree_count;
+};
+
+/*
+ * Per-cluster management structure. This structure will be tied to a
+ * system mount point if the system is mounting the PFS.
+ *
+ * This structure is also used to represent the super-root that hangs off
+ * of a hard mount point. The super-root is not really a cluster element.
+ * In this case the spmp_hmp field will be non-NULL. It's just easier to do
+ * this than to special case super-root manipulation in the hammer2_chain*
+ * code as being only hammer2_dev-related.
+ *
+ * WARNING! The chains making up pfs->iroot's cluster are accounted for in
+ * hammer2_dev->mount_count when the pfs is associated with a mount
+ * point.
+ */
+struct hammer2_pfs {
+ TAILQ_ENTRY(hammer2_pfs) mntentry; /* hammer2_pfslist */
+ hammer2_inode_tree_t inum_tree; /* (not applicable to spmp) */
+ hammer2_chain_list_t lru_list; /* basis for LRU tests */
+ hammer2_ipdep_list_t *ipdep_lists; /* inode dependencies for XOP */
+ hammer2_spin_t inum_spin; /* inumber lookup */
+ hammer2_spin_t lru_spin;
+ hammer2_mtx_t xop_lock;
+ struct mount *mp;
+ struct uuid pfs_clid;
+ hammer2_inode_t *iroot; /* PFS root inode */
+ hammer2_dev_t *spmp_hmp; /* only if super-root pmp */
+ hammer2_dev_t *force_local; /* only if 'local' mount */
+ hammer2_dev_t *pfs_hmps[HAMMER2_MAXCLUSTER];
+ char *pfs_names[HAMMER2_MAXCLUSTER];
+ uint8_t pfs_types[HAMMER2_MAXCLUSTER];
+ int flags; /* for HAMMER2_PMPF_xxx */
+ int lru_count; /* #of chains on LRU */
+ unsigned long ipdep_mask;
+ char mntpt[128];
+};
+
+#define HAMMER2_PMPF_SPMP 0x00000001
+#define HAMMER2_PMPF_WAITING 0x10000000
+
+#define HAMMER2_IHASH_SIZE 16
+
+/*
+ * NOTE: The LRU list contains at least all the chains with refs == 0
+ * that can be recycled, and may contain additional chains which
+ * cannot.
+ */
+#define HAMMER2_LRU_LIMIT 4096
+
+#define HAMMER2_CHECK_NULL 0x00000001
+
+#define MPTOPMP(mp) ((hammer2_pfs_t *)(mp)->mnt_data)
+#define VTOI(vp) ((hammer2_inode_t *)(vp)->v_data)
+
+MALLOC_DECLARE(M_HAMMER2);
+extern uma_zone_t zone_buffer_read;
+extern uma_zone_t zone_xops;
+
+extern int hammer2_cluster_meta_read;
+extern int hammer2_cluster_data_read;
+extern long hammer2_inode_allocs;
+extern long hammer2_chain_allocs;
+extern long hammer2_dio_allocs;
+extern int hammer2_dio_limit;
+
+extern struct vop_vector hammer2_vnodeops;
+extern struct vop_vector hammer2_fifoops;
+
+extern hammer2_xop_desc_t hammer2_readdir_desc;
+extern hammer2_xop_desc_t hammer2_nresolve_desc;
+extern hammer2_xop_desc_t hammer2_lookup_desc;
+extern hammer2_xop_desc_t hammer2_bmap_desc;
+extern hammer2_xop_desc_t hammer2_strategy_read_desc;
+
+/* hammer2_admin.c */
+void *hammer2_xop_alloc(hammer2_inode_t *);
+void hammer2_xop_setname(hammer2_xop_head_t *, const char *, size_t);
+void hammer2_xop_start(hammer2_xop_head_t *, hammer2_xop_desc_t *);
+void hammer2_xop_retire(hammer2_xop_head_t *, uint32_t);
+int hammer2_xop_feed(hammer2_xop_head_t *, hammer2_chain_t *, int, int);
+int hammer2_xop_collect(hammer2_xop_head_t *, int);
+
+/* hammer2_chain.c */
+void hammer2_chain_init(hammer2_chain_t *);
+void hammer2_chain_ref(hammer2_chain_t *);
+void hammer2_chain_ref_hold(hammer2_chain_t *);
+void hammer2_chain_drop(hammer2_chain_t *);
+void hammer2_chain_unhold(hammer2_chain_t *);
+void hammer2_chain_drop_unhold(hammer2_chain_t *);
+void hammer2_chain_rehold(hammer2_chain_t *);
+int hammer2_chain_lock(hammer2_chain_t *, int);
+void hammer2_chain_unlock(hammer2_chain_t *);
+hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *, int);
+void hammer2_chain_lookup_done(hammer2_chain_t *);
+hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **, hammer2_key_t *,
+ hammer2_key_t, hammer2_key_t, int *, int);
+hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **, hammer2_chain_t *,
+ hammer2_key_t *, hammer2_key_t, hammer2_key_t, int *, int);
+int hammer2_chain_inode_find(hammer2_pfs_t *, hammer2_key_t, int, int,
+ hammer2_chain_t **, hammer2_chain_t **);
+int hammer2_chain_dirent_test(const hammer2_chain_t *, const char *, size_t);
+void hammer2_dump_chain(hammer2_chain_t *, int, int, int *, char, unsigned int);
+
+/* hammer2_cluster.c */
+uint8_t hammer2_cluster_type(const hammer2_cluster_t *);
+void hammer2_cluster_bref(const hammer2_cluster_t *, hammer2_blockref_t *);
+void hammer2_dummy_xop_from_chain(hammer2_xop_head_t *, hammer2_chain_t *);
+void hammer2_cluster_unhold(hammer2_cluster_t *);
+void hammer2_cluster_rehold(hammer2_cluster_t *);
+int hammer2_cluster_check(hammer2_cluster_t *, hammer2_key_t, int);
+
+/* hammer2_inode.c */
+void hammer2_inode_lock(hammer2_inode_t *, int);
+void hammer2_inode_unlock(hammer2_inode_t *);
+hammer2_chain_t *hammer2_inode_chain(hammer2_inode_t *, int, int);
+hammer2_chain_t *hammer2_inode_chain_and_parent(hammer2_inode_t *, int,
+ hammer2_chain_t **, int);
+hammer2_inode_t *hammer2_inode_lookup(hammer2_pfs_t *, hammer2_tid_t);
+void hammer2_inode_ref(hammer2_inode_t *);
+void hammer2_inode_drop(hammer2_inode_t *);
+int hammer2_igetv(hammer2_inode_t *, int, struct vnode **);
+hammer2_inode_t *hammer2_inode_get(hammer2_pfs_t *, hammer2_xop_head_t *,
+ hammer2_tid_t, int);
+hammer2_key_t hammer2_inode_data_count(const hammer2_inode_t *);
+hammer2_key_t hammer2_inode_inode_count(const hammer2_inode_t *);
+
+/* hammer2_io.c */
+hammer2_io_t *hammer2_io_getblk(hammer2_dev_t *, int, off_t, int, int);
+void hammer2_io_putblk(hammer2_io_t **);
+void hammer2_io_cleanup(hammer2_dev_t *, hammer2_io_tree_t *);
+char *hammer2_io_data(hammer2_io_t *, off_t);
+int hammer2_io_bread(hammer2_dev_t *, int, off_t, int, hammer2_io_t **);
+void hammer2_io_bqrelse(hammer2_io_t **);
+
+/* hammer2_ioctl.c */
+int hammer2_ioctl_impl(hammer2_inode_t *, unsigned long, void *, int,
+ struct ucred *);
+
+/* hammer2_ondisk.c */
+int hammer2_open_devvp(struct mount *, const hammer2_devvp_list_t *);
+int hammer2_close_devvp(const hammer2_devvp_list_t *);
+int hammer2_init_devvp(const struct mount *, const char *,
+ hammer2_devvp_list_t *);
+void hammer2_cleanup_devvp(hammer2_devvp_list_t *);
+int hammer2_init_volumes(const hammer2_devvp_list_t *, hammer2_volume_t *,
+ hammer2_volume_data_t *, struct vnode **);
+hammer2_volume_t *hammer2_get_volume(hammer2_dev_t *, hammer2_off_t);
+
+/* hammer2_strategy.c */
+int hammer2_strategy(struct vop_strategy_args *);
+void hammer2_xop_strategy_read(hammer2_xop_t *, int);
+
+/* hammer2_subr.c */
+int hammer2_get_dtype(uint8_t);
+int hammer2_get_vtype(uint8_t);
+void hammer2_time_to_timespec(uint64_t, struct timespec *);
+uint32_t hammer2_to_unix_xid(const struct uuid *);
+hammer2_key_t hammer2_dirhash(const unsigned char *, size_t);
+int hammer2_calc_logical(hammer2_inode_t *, hammer2_off_t, hammer2_key_t *,
+ hammer2_key_t *);
+int hammer2_get_logical(void);
+const char *hammer2_breftype_to_str(uint8_t);
+
+/* hammer2_xops.c */
+void hammer2_xop_readdir(hammer2_xop_t *, int);
+void hammer2_xop_nresolve(hammer2_xop_t *, int);
+void hammer2_xop_lookup(hammer2_xop_t *, int);
+void hammer2_xop_bmap(hammer2_xop_t *, int);
+
+static __inline int
+hammer2_error_to_errno(int error)
+{
+ if (!error)
+ return (0);
+ else if (error & HAMMER2_ERROR_EIO)
+ return (EIO);
+ else if (error & HAMMER2_ERROR_CHECK)
+ return (EDOM);
+ else if (error & HAMMER2_ERROR_ENOENT)
+ return (ENOENT);
+ else if (error & HAMMER2_ERROR_EAGAIN)
+ return (EAGAIN);
+ else if (error & HAMMER2_ERROR_ABORTED)
+ return (EINTR);
+ else
+ return (EDOM);
+}
+
+static __inline const hammer2_media_data_t *
+hammer2_xop_gdata(hammer2_xop_head_t *xop)
+{
+ hammer2_chain_t *focus = xop->cluster.focus;
+ const void *data;
+
+ if (focus->dio) {
+ if ((xop->focus_dio = focus->dio) != NULL)
+ atomic_add_32(&xop->focus_dio->refs, 1);
+ data = focus->data;
+ } else {
+ data = focus->data;
+ }
+
+ return (data);
+}
+
+static __inline void
+hammer2_xop_pdata(hammer2_xop_head_t *xop)
+{
+ if (xop->focus_dio)
+ hammer2_io_putblk(&xop->focus_dio);
+}
+
+static __inline void
+hammer2_assert_cluster(const hammer2_cluster_t *cluster)
+{
+ /* Currently a valid cluster can only have 1 nchains. */
+ KASSERT(cluster->nchains == 1,
+ ("unexpected cluster nchains %d", cluster->nchains));
+}
+
+static __inline uint32_t
+hammer2_icrc32(const void *buf, size_t size)
+{
+ return (~calculate_crc32c(-1, buf, size));
+}
+
+static __inline uint32_t
+hammer2_icrc32c(const void *buf, size_t size, uint32_t ocrc)
+{
+ return (~calculate_crc32c(~ocrc, buf, size));
+}
+#endif /* !_FS_HAMMER2_HAMMER2_H_ */
diff --git a/sys/fs/hammer2/hammer2_admin.c b/sys/fs/hammer2/hammer2_admin.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_admin.c
@@ -0,0 +1,449 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+#include <vm/uma.h>
+
+#include "hammer2.h"
+
+#define H2XOPDESCRIPTOR(label) \
+ hammer2_xop_desc_t hammer2_##label##_desc = { \
+ .storage_func = hammer2_xop_##label, \
+ .id = #label \
+ }
+
+H2XOPDESCRIPTOR(readdir);
+H2XOPDESCRIPTOR(nresolve);
+H2XOPDESCRIPTOR(lookup);
+H2XOPDESCRIPTOR(bmap);
+H2XOPDESCRIPTOR(strategy_read);
+
+/*
+ * Allocate or reallocate XOP FIFO. This doesn't exist in DragonFly
+ * where XOP is handled by dedicated kernel threads and when FIFO stalls
+ * threads wait for frontend to collect results.
+ */
+static void
+hammer2_xop_fifo_alloc(hammer2_xop_fifo_t *fifo, int fifo_size)
+{
+ struct malloc_type *type = M_HAMMER2;
+ int flags = M_WAITOK | M_ZERO;
+ size_t size;
+
+ KKASSERT((fifo_size & (fifo_size - 1)) == 0);
+ KKASSERT(fifo_size >= HAMMER2_XOPFIFO);
+ KKASSERT(fifo_size <= INT_MAX);
+
+ size = sizeof(hammer2_chain_t*) * fifo_size;
+ if (!fifo->array)
+ fifo->array = malloc(size, type, flags);
+ else
+ fifo->array = realloc(fifo->array, size, type, flags);
+ KKASSERT(fifo->array);
+
+ size = sizeof(int) * fifo_size;
+ if (!fifo->errors)
+ fifo->errors = malloc(size, type, flags);
+ else
+ fifo->errors = realloc(fifo->errors, size, type, flags);
+ KKASSERT(fifo->errors);
+}
+
+/*
+ * Allocate a XOP request.
+ * Once allocated a XOP request can be started, collected, and retired,
+ * and can be retired early if desired.
+ */
+void *
+hammer2_xop_alloc(hammer2_inode_t *ip)
+{
+ hammer2_xop_t *xop;
+
+ xop = uma_zalloc(zone_xops, M_WAITOK | M_ZERO);
+ KKASSERT(xop->head.cluster.array[0].chain == NULL);
+
+ xop->head.ip1 = ip;
+ xop->head.cluster.nchains = ip->cluster.nchains;
+ xop->head.cluster.pmp = ip->pmp;
+ hammer2_assert_cluster(&ip->cluster);
+
+ /* run_mask - Frontend associated with XOP. */
+ xop->head.run_mask = HAMMER2_XOPMASK_VOP;
+
+ hammer2_xop_fifo_t *fifo = &xop->head.collect[0];
+ xop->head.fifo_size = HAMMER2_XOPFIFO;
+ hammer2_xop_fifo_alloc(fifo, xop->head.fifo_size);
+
+ hammer2_inode_ref(ip);
+
+ return (xop);
+}
+
+void
+hammer2_xop_setname(hammer2_xop_head_t *xop, const char *name, size_t name_len)
+{
+ xop->name1 = malloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
+ xop->name1_len = name_len;
+ bcopy(name, xop->name1, name_len);
+}
+
+/*
+ * (Backend) Returns non-zero if the frontend is still attached.
+ */
+static __inline int
+hammer2_xop_active(const hammer2_xop_head_t *xop)
+{
+ if (xop->run_mask & HAMMER2_XOPMASK_VOP)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * hashinit(9) based hash to track inode dependencies.
+ */
+static __inline int
+xop_ipdep_value(const hammer2_inode_t *ip)
+{
+ int idx;
+
+ hammer2_mtx_assert_ex(&ip->pmp->xop_lock);
+
+ KKASSERT(ip != NULL);
+ idx = ip->meta.inum % HAMMER2_IHASH_SIZE;
+ KKASSERT(idx >= 0 && idx < HAMMER2_IHASH_SIZE);
+
+ return (idx);
+}
+
+static int
+xop_testset_ipdep(hammer2_inode_t *ip)
+{
+ hammer2_ipdep_list_t *ipdep;
+ hammer2_inode_t *iptmp;
+
+ hammer2_mtx_assert_ex(&ip->pmp->xop_lock);
+
+ ipdep = &ip->pmp->ipdep_lists[xop_ipdep_value(ip)];
+ LIST_FOREACH(iptmp, ipdep, entry)
+ if (iptmp == ip)
+ return (1); /* collision */
+
+ LIST_INSERT_HEAD(ipdep, ip, entry);
+ return (0);
+}
+
+static void
+xop_unset_ipdep(hammer2_inode_t *ip)
+{
+ hammer2_ipdep_list_t *ipdep;
+ hammer2_inode_t *iptmp;
+
+ hammer2_mtx_assert_ex(&ip->pmp->xop_lock);
+
+ ipdep = &ip->pmp->ipdep_lists[xop_ipdep_value(ip)];
+ LIST_FOREACH(iptmp, ipdep, entry)
+ if (iptmp == ip) {
+ LIST_REMOVE(ip, entry);
+ return;
+ }
+}
+
+/*
+ * Start a XOP request, queueing it to all nodes in the cluster to
+ * execute the cluster op.
+ */
+void
+hammer2_xop_start(hammer2_xop_head_t *xop, hammer2_xop_desc_t *desc)
+{
+ hammer2_inode_t *ip = xop->ip1;
+ hammer2_pfs_t *pmp = ip->pmp;
+ uint32_t mask;
+ int i;
+
+ hammer2_assert_cluster(&ip->cluster);
+ xop->desc = desc;
+
+ for (i = 0; i < ip->cluster.nchains; ++i) {
+ if (ip->cluster.array[i].chain) {
+ atomic_set_32(&xop->run_mask, 1LLU << i);
+ atomic_set_32(&xop->chk_mask, 1LLU << i);
+ }
+ }
+
+ for (i = 0; i < ip->cluster.nchains; ++i) {
+ mask = 1LLU << i;
+ if (hammer2_xop_active(xop)) {
+ hammer2_mtx_ex(&pmp->xop_lock);
+again:
+ if (xop_testset_ipdep(ip)) {
+ pmp->flags |= HAMMER2_PMPF_WAITING;
+ hammer2_mtx_sleep(pmp, &pmp->xop_lock, "h2pmp");
+ goto again;
+ }
+ hammer2_mtx_unlock(&pmp->xop_lock);
+
+ xop->desc->storage_func((hammer2_xop_t *)xop, i);
+ hammer2_xop_retire(xop, mask);
+ } else {
+ hammer2_xop_feed(xop, NULL, i, ECONNABORTED);
+ hammer2_xop_retire(xop, mask);
+ }
+ }
+}
+
+/*
+ * Retire a XOP. Used by both the VOP frontend and by the XOP backend.
+ */
+void
+hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
+{
+ hammer2_pfs_t *pmp;
+ hammer2_chain_t *chain;
+ hammer2_xop_fifo_t *fifo;
+
+ uint32_t omask;
+ int i;
+
+ /* Remove the frontend collector or remove a backend feeder. */
+ KASSERT(xop->run_mask & mask, ("%x vs %x", xop->run_mask, mask));
+ omask = atomic_fetchadd_32(&xop->run_mask, -mask);
+
+ /* More than one entity left. */
+ if ((omask & HAMMER2_XOPMASK_ALLDONE) != mask)
+ return;
+
+ /*
+ * All collectors are gone, we can cleanup and dispose of the XOP.
+ * Cleanup the collection cluster.
+ */
+ for (i = 0; i < xop->cluster.nchains; ++i) {
+ xop->cluster.array[i].flags = 0;
+ chain = xop->cluster.array[i].chain;
+ if (chain) {
+ xop->cluster.array[i].chain = NULL;
+ hammer2_chain_drop_unhold(chain);
+ }
+ }
+
+ /*
+ * Cleanup the fifos. Since we are the only entity left on this
+ * xop we don't have to worry about fifo flow control.
+ */
+ mask = xop->chk_mask;
+ for (i = 0; mask && i < HAMMER2_MAXCLUSTER; ++i) {
+ fifo = &xop->collect[i];
+ while (fifo->ri != fifo->wi) {
+ chain = fifo->array[fifo->ri & fifo_mask(xop)];
+ if (chain)
+ hammer2_chain_drop_unhold(chain);
+ ++fifo->ri;
+ }
+ mask &= ~(1U << i);
+ }
+
+ /* The inode is only held at this point, simply drop it. */
+ if (xop->ip1) {
+ pmp = xop->ip1->pmp;
+ hammer2_mtx_ex(&pmp->xop_lock);
+ xop_unset_ipdep(xop->ip1);
+ if (pmp->flags & HAMMER2_PMPF_WAITING) {
+ pmp->flags &= ~HAMMER2_PMPF_WAITING;
+ hammer2_mtx_wakeup(pmp);
+ }
+ hammer2_mtx_unlock(&pmp->xop_lock);
+
+ hammer2_inode_drop(xop->ip1);
+ xop->ip1 = NULL;
+ }
+
+ if (xop->name1) {
+ free(xop->name1, M_HAMMER2);
+ xop->name1 = NULL;
+ xop->name1_len = 0;
+ }
+
+ for (i = 0; i < xop->cluster.nchains; ++i) {
+ fifo = &xop->collect[i];
+ free(fifo->array, M_HAMMER2);
+ free(fifo->errors, M_HAMMER2);
+ }
+
+ uma_zfree(zone_xops, xop);
+}
+
+/*
+ * (Backend) Feed chain data.
+ * The chain must be locked (either shared or exclusive). The caller may
+ * unlock and drop the chain on return. This function will add an extra
+ * ref and hold the chain's data for the pass-back.
+ *
+ * No xop lock is needed because we are only manipulating fields under
+ * our direct control.
+ *
+ * Returns 0 on success and a HAMMER2 error code if sync is permanently
+ * lost. The caller retains a ref on the chain but by convention
+ * the lock is typically inherited by the xop (caller loses lock).
+ *
+ * Returns non-zero on error. In this situation the caller retains a
+ * ref on the chain but loses the lock (we unlock here).
+ */
+int
+hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain, int clindex,
+ int error)
+{
+ hammer2_xop_fifo_t *fifo;
+
+ /* Early termination (typically of xop_readir). */
+ if (hammer2_xop_active(xop) == 0) {
+ error = HAMMER2_ERROR_ABORTED;
+ goto done;
+ }
+
+ /*
+ * Entry into the XOP collector.
+ * We own the fifo->wi for our clindex.
+ */
+ fifo = &xop->collect[clindex];
+ while (fifo->ri == fifo->wi - xop->fifo_size) {
+ if ((xop->run_mask & HAMMER2_XOPMASK_VOP) == 0) {
+ error = HAMMER2_ERROR_ABORTED;
+ goto done;
+ }
+ xop->fifo_size *= 2;
+ hammer2_xop_fifo_alloc(fifo, xop->fifo_size);
+ }
+
+ if (chain)
+ hammer2_chain_ref_hold(chain);
+ if (error == 0 && chain)
+ error = chain->error;
+ fifo->errors[fifo->wi & fifo_mask(xop)] = error;
+ fifo->array[fifo->wi & fifo_mask(xop)] = chain;
+ ++fifo->wi;
+
+ error = 0;
+done:
+ return (error);
+}
+
+/*
+ * (Frontend) collect a response from a running cluster op.
+ * Responses are collected into a cohesive response >= collect_key.
+ *
+ * Returns 0 on success plus a filled out xop->cluster structure.
+ * Return ENOENT on normal termination.
+ * Otherwise return an error.
+ */
+int
+hammer2_xop_collect(hammer2_xop_head_t *xop, int flags)
+{
+ hammer2_xop_fifo_t *fifo;
+ hammer2_chain_t *chain;
+ hammer2_key_t lokey;
+ int i, keynull, adv, error;
+
+ /*
+ * First loop tries to advance pieces of the cluster which
+ * are out of sync.
+ */
+ lokey = HAMMER2_KEY_MAX;
+ keynull = HAMMER2_CHECK_NULL;
+
+ for (i = 0; i < xop->cluster.nchains; ++i) {
+ chain = xop->cluster.array[i].chain;
+ if (chain == NULL) {
+ adv = 1;
+ } else if (chain->bref.key < xop->collect_key) {
+ adv = 1;
+ } else {
+ keynull &= ~HAMMER2_CHECK_NULL;
+ if (lokey > chain->bref.key)
+ lokey = chain->bref.key;
+ adv = 0;
+ }
+ if (adv == 0)
+ continue;
+
+ /* Advance element if possible, advanced element may be NULL. */
+ if (chain)
+ hammer2_chain_drop_unhold(chain);
+
+ fifo = &xop->collect[i];
+ if (fifo->ri != fifo->wi) {
+ chain = fifo->array[fifo->ri & fifo_mask(xop)];
+ error = fifo->errors[fifo->ri & fifo_mask(xop)];
+ ++fifo->ri;
+ xop->cluster.array[i].chain = chain;
+ xop->cluster.array[i].error = error;
+ if (chain == NULL)
+ xop->cluster.array[i].flags |=
+ HAMMER2_CITEM_NULL;
+ --i; /* Loop on same index. */
+ } else {
+ /*
+ * Retain CITEM_NULL flag. If set just repeat EOF.
+ * If not, the NULL,0 combination indicates an
+ * operation in-progress.
+ */
+ xop->cluster.array[i].chain = NULL;
+ /* Retain any CITEM_NULL setting. */
+ }
+ }
+
+ /*
+ * Determine whether the lowest collected key meets clustering
+ * requirements. Returns HAMMER2_ERROR_*:
+ *
+ * 0 - key valid, cluster can be returned.
+ * ENOENT - normal end of scan, return ENOENT.
+ * EIO - IO error or CRC check error from hammer2_cluster_check().
+ */
+ error = hammer2_cluster_check(&xop->cluster, lokey, keynull);
+
+ if (lokey == HAMMER2_KEY_MAX)
+ xop->collect_key = lokey;
+ else
+ xop->collect_key = lokey + 1;
+
+ return (error);
+}
diff --git a/sys/fs/hammer2/hammer2_chain.c b/sys/fs/hammer2/hammer2_chain.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_chain.c
@@ -0,0 +1,1940 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/tree.h>
+
+#include <crypto/sha2/sha256.h>
+
+#include "hammer2.h"
+#include "hammer2_xxhash.h"
+
+static hammer2_chain_t *hammer2_combined_find(hammer2_chain_t *,
+ hammer2_blockref_t *, int, hammer2_key_t *, hammer2_key_t, hammer2_key_t,
+ hammer2_blockref_t **);
+static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *, int);
+static void hammer2_chain_lru_flush(hammer2_pfs_t *);
+static void hammer2_chain_load_data(hammer2_chain_t *);
+static int hammer2_chain_testcheck(const hammer2_chain_t *, void *);
+
+/*
+ * Basic RBTree for chains.
+ */
+static int
+hammer2_chain_cmp(const hammer2_chain_t *chain1, const hammer2_chain_t *chain2)
+{
+ hammer2_key_t c1_beg, c1_end, c2_beg, c2_end;
+
+ /*
+ * Compare chains. Overlaps are not supposed to happen and catch
+ * any software issues early we count overlaps as a match.
+ */
+ c1_beg = chain1->bref.key;
+ c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1;
+ c2_beg = chain2->bref.key;
+ c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1;
+
+ if (c1_end < c2_beg) /* fully to the left */
+ return (-1);
+ if (c1_beg > c2_end) /* fully to the right */
+ return (1);
+ return (0); /* overlap (must not cross edge boundary) */
+}
+
+RB_GENERATE_STATIC(hammer2_chain_tree, hammer2_chain, rbnode,
+ hammer2_chain_cmp);
+RB_SCAN_INFO(hammer2_chain_tree, hammer2_chain);
+RB_GENERATE_SCAN_STATIC(hammer2_chain_tree, hammer2_chain, rbnode);
+
+/*
+ * Assert that a chain has no media data associated with it.
+ */
+static __inline void
+hammer2_chain_assert_no_data(const hammer2_chain_t *chain)
+{
+ KKASSERT(chain->dio == NULL);
+
+ if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME &&
+ chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP &&
+ chain->data)
+ hpanic("chain %p still has data", chain);
+}
+
+/*
+ * Allocate a new disconnected chain element representing the specified
+ * bref. chain->refs is set to 1 and the passed bref is copied to
+ * chain->bref. chain->bytes is derived from the bref.
+ *
+ * Returns a referenced but unlocked (because there is no core) chain.
+ */
+static hammer2_chain_t *
+hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp,
+ hammer2_blockref_t *bref)
+{
+ hammer2_chain_t *chain;
+ unsigned int bytes;
+
+ /*
+ * Special case - radix of 0 indicates a chain that does not
+ * need a data reference (context is completely embedded in the
+ * bref).
+ */
+ if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
+ bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
+ else
+ bytes = 0;
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ case HAMMER2_BREF_TYPE_DATA:
+ case HAMMER2_BREF_TYPE_DIRENT:
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ case HAMMER2_BREF_TYPE_VOLUME:
+ chain = malloc(sizeof(*chain), M_HAMMER2, M_WAITOK | M_ZERO);
+ atomic_add_long(&hammer2_chain_allocs, 1);
+ break;
+ case HAMMER2_BREF_TYPE_EMPTY:
+ default:
+ hpanic("bad blockref type %d", bref->type);
+ break;
+ }
+
+ /*
+ * Initialize the new chain structure. pmp must be set to NULL for
+ * chains belonging to the super-root topology of a device mount.
+ */
+ if (pmp == hmp->spmp)
+ chain->pmp = NULL;
+ else
+ chain->pmp = pmp;
+
+ chain->hmp = hmp;
+ chain->bref = *bref;
+ chain->bytes = bytes;
+ chain->refs = 1;
+ chain->flags = HAMMER2_CHAIN_ALLOCATED;
+
+ hammer2_chain_init(chain);
+
+ return (chain);
+}
+
+/*
+ * A common function to initialize chains including vchain.
+ */
+void
+hammer2_chain_init(hammer2_chain_t *chain)
+{
+ RB_INIT(&chain->core.rbtree);
+ hammer2_mtx_init_recurse(&chain->lock, "h2ch_lk");
+ hammer2_mtx_init(&chain->inp_lock, "h2ch_inplk");
+ hammer2_spin_init(&chain->core.spin, "h2ch_cosp");
+}
+
+/*
+ * Add a reference to a chain element, preventing its destruction.
+ * Can be called with spinlock held.
+ */
+void
+hammer2_chain_ref(hammer2_chain_t *chain)
+{
+ if (atomic_fetchadd_int(&chain->refs, 1) == 0) {
+ /*
+ * Just flag that the chain was used and should be recycled
+ * on the LRU if it encounters it later.
+ */
+ if (chain->flags & HAMMER2_CHAIN_ONLRU)
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT);
+ }
+}
+
+/*
+ * Ref a locked chain and force the data to be held across an unlock.
+ * Chain must be currently locked.
+ */
+void
+hammer2_chain_ref_hold(hammer2_chain_t *chain)
+{
+ hammer2_mtx_assert_locked(&chain->lock);
+
+ atomic_add_int(&chain->lockcnt, 1);
+ hammer2_chain_ref(chain);
+}
+
+/*
+ * Insert the chain in the core rbtree.
+ */
+static int
+hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain,
+ int generation)
+{
+ hammer2_chain_t *xchain __diagused;
+ int error = 0;
+
+ hammer2_spin_ex(&parent->core.spin);
+
+ /* Interlocked by spinlock, check for race. */
+ if (parent->core.generation != generation) {
+ error = HAMMER2_ERROR_EAGAIN;
+ goto failed;
+ }
+
+ /* Insert chain. */
+ xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain);
+ KASSERT(xchain == NULL,
+ ("collision %p %p %016jx", chain, xchain, chain->bref.key));
+
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE);
+ chain->parent = parent;
+ ++parent->core.chain_count;
+ ++parent->core.generation; /* XXX incs for _get() too */
+failed:
+ hammer2_spin_unex(&parent->core.spin);
+
+ return (error);
+}
+
+/*
+ * Drop the caller's reference to the chain. When the ref count drops to
+ * zero this function will try to disassociate the chain from its parent and
+ * deallocate it, then recursely drop the parent using the implied ref
+ * from the chain's chain->parent.
+ *
+ * Nobody should own chain's mutex on the 1->0 transition, unless this drop
+ * races an acquisition by another cpu. Therefore we can loop if we are
+ * unable to acquire the mutex, and refs is unlikely to be 1 unless we again
+ * race against another drop.
+ */
+void
+hammer2_chain_drop(hammer2_chain_t *chain)
+{
+ unsigned int refs;
+
+ KKASSERT(chain->refs > 0);
+
+ while (chain) {
+ refs = chain->refs;
+ __compiler_membar();
+
+ KKASSERT(refs > 0);
+ if (refs == 1) {
+ if (hammer2_mtx_ex_try(&chain->lock) == 0)
+ chain = hammer2_chain_lastdrop(chain, 0);
+ /* Retry the same chain, or chain from lastdrop. */
+ } else {
+ if (atomic_cmpset_int(&chain->refs, refs, refs - 1))
+ break;
+ /* Retry the same chain. */
+ }
+ cpu_spinwait();
+ }
+}
+
+/*
+ * Unhold a held and probably not-locked chain, ensure that the data is
+ * dropped on the 1->0 transition of lockcnt by obtaining an exclusive
+ * lock and then simply unlocking the chain.
+ */
+void
+hammer2_chain_unhold(hammer2_chain_t *chain)
+{
+ unsigned int lockcnt;
+ int iter = 0;
+
+ for (;;) {
+ lockcnt = chain->lockcnt;
+ __compiler_membar();
+
+ if (lockcnt > 1) {
+ if (atomic_cmpset_int(&chain->lockcnt, lockcnt,
+ lockcnt - 1))
+ break;
+ } else if (hammer2_mtx_ex_try(&chain->lock) == 0) {
+ hammer2_chain_unlock(chain);
+ break;
+ } else {
+ /*
+ * This situation can easily occur on SMP due to
+ * the gap inbetween the 1->0 transition and the
+ * final unlock. We cannot safely block on the
+ * mutex because lockcnt might go above 1.
+ */
+ if (++iter > 1000) {
+ if (iter > 1000 + hz) {
+ hprintf("h2race1\n");
+ iter = 1000;
+ }
+ pause("h2race1", 1);
+ }
+ cpu_spinwait();
+ }
+ }
+}
+
+void
+hammer2_chain_drop_unhold(hammer2_chain_t *chain)
+{
+ hammer2_chain_unhold(chain);
+ hammer2_chain_drop(chain);
+}
+
+void
+hammer2_chain_rehold(hammer2_chain_t *chain)
+{
+ hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED);
+ atomic_add_int(&chain->lockcnt, 1);
+ hammer2_chain_unlock(chain);
+}
+
+/*
+ * Handles the (potential) last drop of chain->refs from 1->0. Called with
+ * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are
+ * possible against refs and lockcnt. We must dispose of the mutex on chain.
+ *
+ * This function returns an unlocked chain for recursive drop or NULL. It
+ * can return the same chain if it determines it has raced another ref.
+ *
+ * The chain cannot be freed if it has any children.
+ * The core spinlock is allowed to nest child-to-parent (not parent-to-child).
+ */
+static hammer2_chain_t *
+hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth)
+{
+ hammer2_pfs_t *pmp;
+ hammer2_chain_t *parent, *rdrop;
+
+ hammer2_mtx_assert_ex(&chain->lock);
+
+ /*
+ * We need chain's spinlock to interlock the sub-tree test.
+ * We already have chain's mutex, protecting chain->parent.
+ * Remember that chain->refs can be in flux.
+ */
+ hammer2_spin_ex(&chain->core.spin);
+
+ if (chain->parent != NULL) {
+ /* Nothing to do for read-only mount. */
+ } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME ||
+ chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) {
+ /* Nothing to do for read-only mount. */
+ } else {
+ /*
+ * The chain has no parent and can be flagged for destruction.
+ * This can happen for e.g. via
+ * hammer2_chain_lookup()
+ * hammer2_chain_get()
+ * hammer2_chain_insert() -> HAMMER2_ERROR_EAGAIN
+ * hammer2_chain_drop() (chain->parent still NULL)
+ * hammer2_chain_lastdrop()
+ */
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY);
+ }
+
+ /*
+ * If any children exist we must leave the chain intact with refs == 0.
+ * They exist because chains are retained below us which have refs.
+ *
+ * Retry (return chain) if we fail to transition the refs to 0, else
+ * return NULL indication nothing more to do.
+ *
+ * Chains with children are NOT put on the LRU list.
+ */
+ if (chain->core.chain_count) {
+ if (atomic_cmpset_int(&chain->refs, 1, 0)) {
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_chain_assert_no_data(chain);
+ hammer2_mtx_unlock(&chain->lock);
+ chain = NULL;
+ } else {
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_mtx_unlock(&chain->lock);
+ }
+ return (chain);
+ }
+ /* Spinlock still held. */
+ /* No chains left under us. */
+
+ /*
+ * chain->core has no children left so no accessors can get to our
+ * chain from there. Now we have to lock the parent core to interlock
+ * remaining possible accessors that might bump chain's refs before
+ * we can safely drop chain's refs with intent to free the chain.
+ */
+ pmp = chain->pmp; /* can be NULL */
+ rdrop = NULL;
+ parent = chain->parent;
+
+ /*
+ * WARNING! chain's spin lock is still held here, and other spinlocks
+ * will be acquired and released in the code below. We
+ * cannot be making fancy procedure calls!
+ */
+
+ /*
+ * We can cache the chain if it is associated with a pmp
+ * and not flagged as being destroyed or requesting a full
+ * release. In this situation the chain is not removed
+ * from its parent, i.e. it can still be looked up.
+ *
+ * We intentionally do not cache DATA chains because these
+ * were likely used to load data into the logical buffer cache
+ * and will not be accessed again for some time.
+ */
+ if ((chain->flags &
+ (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 &&
+ chain->pmp && chain->bref.type != HAMMER2_BREF_TYPE_DATA) {
+ if (parent)
+ hammer2_spin_ex(&parent->core.spin);
+ if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) {
+ /*
+ * 1->0 transition failed, retry. Do not drop
+ * the chain's data yet!
+ */
+ if (parent)
+ hammer2_spin_unex(&parent->core.spin);
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_mtx_unlock(&chain->lock);
+ return (chain);
+ }
+
+ /* Success. */
+ hammer2_chain_assert_no_data(chain);
+
+ /*
+ * Make sure we are on the LRU list, clean up excessive
+ * LRU entries. We can only really drop one but there might
+ * be other entries that we can remove from the lru_list
+ * without dropping.
+ *
+ * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when
+ * chain->core.spin AND pmp->lru_spin are held, but
+ * can be safely cleared only holding pmp->lru_spin.
+ */
+ if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) {
+ hammer2_spin_ex(&pmp->lru_spin);
+ if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) {
+ atomic_set_int(&chain->flags,
+ HAMMER2_CHAIN_ONLRU);
+ TAILQ_INSERT_TAIL(&pmp->lru_list, chain, entry);
+ atomic_add_int(&pmp->lru_count, 1);
+ }
+ if (pmp->lru_count < HAMMER2_LRU_LIMIT)
+ depth = 1; /* Disable lru_list flush. */
+ hammer2_spin_unex(&pmp->lru_spin);
+ } else {
+ /* Disable lru_list flush. */
+ depth = 1;
+ }
+
+ if (parent)
+ hammer2_spin_unex(&parent->core.spin);
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_mtx_unlock(&chain->lock);
+
+ /*
+ * lru_list hysteresis (see above for depth overrides).
+ * Note that depth also prevents excessive lastdrop recursion.
+ */
+ if (depth == 0)
+ hammer2_chain_lru_flush(pmp);
+ return (NULL);
+ }
+
+ /* Make sure we are not on the LRU list. */
+ if (chain->flags & HAMMER2_CHAIN_ONLRU) {
+ hammer2_spin_ex(&pmp->lru_spin);
+ if (chain->flags & HAMMER2_CHAIN_ONLRU) {
+ atomic_add_int(&pmp->lru_count, -1);
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU);
+ TAILQ_REMOVE(&pmp->lru_list, chain, entry);
+ }
+ hammer2_spin_unex(&pmp->lru_spin);
+ }
+
+ /*
+ * Spinlock the parent and try to drop the last ref on chain.
+ * On success determine if we should dispose of the chain
+ * (remove the chain from its parent, etc).
+ *
+ * Normal core locks are top-down recursive but we define
+ * core spinlocks as bottom-up recursive, so this is safe.
+ */
+ if (parent) {
+ hammer2_spin_ex(&parent->core.spin);
+ if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) {
+ /* 1->0 transition failed, retry. */
+ hammer2_spin_unex(&parent->core.spin);
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_mtx_unlock(&chain->lock);
+ return (chain);
+ }
+
+ /*
+ * 1->0 transition successful, parent spin held to prevent
+ * new lookups, chain spinlock held to protect parent field.
+ * Remove chain from the parent.
+ */
+ if (chain->flags & HAMMER2_CHAIN_ONRBTREE) {
+ RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree,
+ chain);
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE);
+ --parent->core.chain_count;
+ chain->parent = NULL;
+ }
+
+ /*
+ * If our chain was the last chain in the parent's core the
+ * core is now empty and its parent might have to be
+ * re-dropped if it has 0 refs.
+ */
+ if (parent->core.chain_count == 0) {
+ rdrop = parent;
+ atomic_add_int(&rdrop->refs, 1);
+ /*
+ if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0)
+ rdrop = NULL;
+ */
+ }
+ hammer2_spin_unex(&parent->core.spin);
+ } else {
+ /* No-parent case. */
+ if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) {
+ /* 1->0 transition failed, retry. */
+ hammer2_spin_unex(&parent->core.spin);
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_mtx_unlock(&chain->lock);
+ return (chain);
+ }
+ }
+
+ /*
+ * Successful 1->0 transition, no parent, no children... no way for
+ * anyone to ref this chain any more. We can clean-up and free it.
+ *
+ * We still have the core spinlock, and core's chain_count is 0.
+ * Any parent spinlock is gone.
+ */
+ hammer2_spin_unex(&chain->core.spin);
+ hammer2_chain_assert_no_data(chain);
+ hammer2_mtx_unlock(&chain->lock);
+ KKASSERT(RB_EMPTY(&chain->core.rbtree) && chain->core.chain_count == 0);
+
+ /*
+ * All locks are gone, no pointers remain to the chain, finish
+ * freeing it.
+ */
+ if (chain->flags & HAMMER2_CHAIN_ALLOCATED) {
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED);
+ hammer2_mtx_destroy(&chain->lock);
+ hammer2_mtx_destroy(&chain->inp_lock);
+ hammer2_spin_destroy(&chain->core.spin);
+ chain->hmp = NULL;
+ free(chain, M_HAMMER2);
+ atomic_add_long(&hammer2_chain_allocs, -1);
+ }
+
+ /* Possible chaining loop when parent re-drop needed. */
+ return (rdrop);
+}
+
+/*
+ * Heuristical flush of the LRU, try to reduce the number of entries
+ * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called
+ * only when lru_count exceeds HAMMER2_LRU_LIMIT.
+ */
+static void
+hammer2_chain_lru_flush(hammer2_pfs_t *pmp)
+{
+ hammer2_chain_t *chain;
+ unsigned int refs;
+again:
+ chain = NULL;
+ hammer2_spin_ex(&pmp->lru_spin);
+ while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) {
+ /*
+ * Pick a chain off the lru_list, just recycle it quickly
+ * if LRUHINT is set (the chain was ref'd but left on
+ * the lru_list, so cycle to the end).
+ */
+ chain = TAILQ_FIRST(&pmp->lru_list);
+ TAILQ_REMOVE(&pmp->lru_list, chain, entry);
+
+ if (chain->flags & HAMMER2_CHAIN_LRUHINT) {
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT);
+ TAILQ_INSERT_TAIL(&pmp->lru_list, chain, entry);
+ chain = NULL;
+ continue;
+ }
+
+ /*
+ * Ok, we are off the LRU. We must adjust refs before we
+ * can safely clear the ONLRU flag.
+ */
+ atomic_add_int(&pmp->lru_count, -1);
+ if (atomic_cmpset_int(&chain->refs, 0, 1)) {
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU);
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE);
+ break;
+ }
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU);
+ chain = NULL;
+ }
+ hammer2_spin_unex(&pmp->lru_spin);
+ if (chain == NULL)
+ return;
+
+ /*
+ * If we picked a chain off the lru list we may be able to lastdrop
+ * it. Use a depth of 1 to prevent excessive lastdrop recursion.
+ */
+ while (chain) {
+ refs = chain->refs;
+ __compiler_membar();
+ KKASSERT(refs > 0);
+
+ if (refs == 1) {
+ if (hammer2_mtx_ex_try(&chain->lock) == 0)
+ chain = hammer2_chain_lastdrop(chain, 1);
+ /* Retry the same chain, or chain from lastdrop. */
+ } else {
+ if (atomic_cmpset_int(&chain->refs, refs, refs - 1))
+ break;
+ /* Retry the same chain. */
+ }
+ cpu_spinwait();
+ }
+ goto again;
+}
+
+/*
+ * On last lock release.
+ */
+static hammer2_io_t *
+hammer2_chain_drop_data(hammer2_chain_t *chain)
+{
+ hammer2_io_t *dio;
+
+ if ((dio = chain->dio) != NULL) {
+ chain->dio = NULL;
+ chain->data = NULL;
+ } else {
+ switch (chain->bref.type) {
+ case HAMMER2_BREF_TYPE_VOLUME:
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ break;
+ default:
+ if (chain->data != NULL) {
+ hammer2_spin_unex(&chain->core.spin);
+ hpanic("chain data not NULL: "
+ "chain=%p refs=%d bref=%016jx.%02x "
+ "parent=%p dio=%p data=%p",
+ chain, chain->refs, chain->bref.data_off,
+ chain->bref.type, chain->parent, chain->dio,
+ chain->data);
+ }
+ KKASSERT(chain->data == NULL);
+ break;
+ }
+ }
+ return (dio);
+}
+
+/*
+ * Lock a referenced chain element, acquiring its data with I/O if necessary,
+ * and specify how you would like the data to be resolved.
+ *
+ * If an I/O or other fatal error occurs, chain->error will be set to non-zero.
+ *
+ * The lock is allowed to recurse, multiple locking ops will aggregate
+ * the requested resolve types. Once data is assigned it will not be
+ * removed until the last unlock.
+ *
+ * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for DATA chains.
+ * (typically used to avoid device/logical buffer
+ * aliasing for data)
+ *
+ * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element.
+ *
+ * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise
+ * it will be locked exclusive.
+ *
+ * NOTE: Embedded elements (volume header, inodes) are always resolved
+ * regardless.
+ *
+ * NOTE: (data) elements are normally locked RESOLVE_MAYBE
+ * so as not to instantiate a device buffer, which could alias against
+ * a logical file buffer. However, if ALWAYS is specified the
+ * device buffer will be instantiated anyway.
+ *
+ * NOTE: The return value is currently always 0.
+ *
+ * WARNING! This function blocks on I/O if data needs to be fetched. This
+ * blocking can run concurrent with other compatible lock holders
+ * who do not need data returning.
+ */
+int
+hammer2_chain_lock(hammer2_chain_t *chain, int how)
+{
+ KKASSERT(chain->refs > 0);
+
+ /*
+ * Get the appropriate lock. If LOCKAGAIN is flagged with
+ * SHARED the caller expects a shared lock to already be
+ * present and we are giving it another ref. This case must
+ * importantly not block if there is a pending exclusive lock
+ * request.
+ */
+ atomic_add_int(&chain->lockcnt, 1);
+ if (how & HAMMER2_RESOLVE_SHARED) {
+ if (how & HAMMER2_RESOLVE_LOCKAGAIN) {
+ if (hammer2_mtx_owned(&chain->lock))
+ hammer2_mtx_ex(&chain->lock);
+ else
+ hammer2_mtx_sh(&chain->lock);
+ } else {
+ hammer2_mtx_sh(&chain->lock);
+ }
+ } else {
+ hammer2_mtx_ex(&chain->lock);
+ }
+
+ /*
+ * If we already have a valid data pointer no further action is
+ * necessary.
+ */
+ if (chain->data)
+ return (0);
+
+ /*
+ * Do we have to resolve the data? This is generally only
+ * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased.
+ * Other bref types expects the data to be there.
+ */
+ switch (how & HAMMER2_RESOLVE_MASK) {
+ case HAMMER2_RESOLVE_MAYBE:
+ if (chain->bref.type == HAMMER2_BREF_TYPE_DATA)
+ return (0);
+ /* fall through */
+ case HAMMER2_RESOLVE_ALWAYS:
+ default:
+ break;
+ }
+
+ /* Caller requires data. */
+ hammer2_chain_load_data(chain);
+
+ return (0);
+}
+
+/*
+ * Issue I/O and install chain->data. Caller must hold a chain lock, lock
+ * may be of any type.
+ *
+ * Once chain->data is set it cannot be disposed of until all locks are
+ * released.
+ */
+static void
+hammer2_chain_load_data(hammer2_chain_t *chain)
+{
+ hammer2_dev_t *hmp;
+ hammer2_blockref_t *bref;
+ char *bdata;
+ int error;
+
+ /*
+ * Degenerate case, data already present, or chain has no media
+ * reference to load.
+ */
+ if (chain->data)
+ return;
+ if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
+ return;
+
+ hmp = chain->hmp;
+ KKASSERT(hmp != NULL);
+
+ /*
+ * inp_lock protects HAMMER2_CHAIN_{IOINPROG,SIGNAL} bits.
+ * DragonFly uses tsleep_interlock(9) here without taking mutex.
+ */
+ hammer2_mtx_ex(&chain->inp_lock);
+again:
+ if (chain->flags & HAMMER2_CHAIN_IOINPROG) {
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_IOSIGNAL);
+ hammer2_mtx_sleep(&chain->flags, &chain->inp_lock, "h2ch");
+ goto again;
+ }
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_IOINPROG);
+ hammer2_mtx_unlock(&chain->inp_lock);
+
+ /*
+ * We own CHAIN_IOINPROG.
+ * Degenerate case if we raced another load.
+ */
+ if (chain->data)
+ goto done;
+
+ /* We must resolve to a device buffer by issuing I/O. */
+ bref = &chain->bref;
+ error = hammer2_io_bread(hmp, bref->type, bref->data_off, chain->bytes,
+ &chain->dio);
+ if (error) {
+ chain->error = HAMMER2_ERROR_EIO;
+ hprintf("bread error %d at data_off %016jx\n",
+ error, (intmax_t)bref->data_off);
+ hammer2_io_bqrelse(&chain->dio);
+ goto done;
+ }
+ chain->error = 0;
+
+ bdata = hammer2_io_data(chain->dio, chain->bref.data_off);
+
+ if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) {
+ if (hammer2_chain_testcheck(chain, bdata) == 0)
+ chain->error = HAMMER2_ERROR_CHECK;
+ else
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD);
+ }
+
+ switch (bref->type) {
+ case HAMMER2_BREF_TYPE_VOLUME:
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ hpanic("unresolved volume header");
+ break;
+ case HAMMER2_BREF_TYPE_DIRENT:
+ KKASSERT(chain->bytes != 0);
+ /* fall through */
+ case HAMMER2_BREF_TYPE_INODE:
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ case HAMMER2_BREF_TYPE_DATA:
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ default:
+ /* Point data at the device buffer and leave dio intact. */
+ chain->data = (void *)bdata;
+ break;
+ }
+done:
+ /* Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. */
+ KKASSERT(chain->flags & HAMMER2_CHAIN_IOINPROG);
+ hammer2_mtx_ex(&chain->inp_lock);
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_IOINPROG);
+ if (chain->flags & HAMMER2_CHAIN_IOSIGNAL) {
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_IOSIGNAL);
+ hammer2_mtx_wakeup(&chain->flags);
+ }
+ hammer2_mtx_unlock(&chain->inp_lock);
+}
+
+/*
+ * Unlock and deref a chain element.
+ *
+ * Remember that the presence of children under chain prevent the chain's
+ * destruction but do not add additional references, so the dio will still
+ * be dropped.
+ */
+void
+hammer2_chain_unlock(hammer2_chain_t *chain)
+{
+ hammer2_io_t *dio;
+ unsigned int lockcnt;
+ int iter = 0;
+
+ /*
+ * If multiple locks are present (or being attempted) on this
+ * particular chain we can just unlock, drop refs, and return.
+ *
+ * Otherwise fall-through on the 1->0 transition.
+ */
+ for (;;) {
+ lockcnt = chain->lockcnt;
+ KKASSERT(lockcnt > 0);
+ __compiler_membar();
+
+ if (lockcnt > 1) {
+ if (atomic_cmpset_int(&chain->lockcnt, lockcnt,
+ lockcnt - 1)) {
+ hammer2_mtx_unlock(&chain->lock);
+ return;
+ }
+ } else if (hammer2_mtx_owned(&chain->lock) ||
+ hammer2_mtx_upgrade_try(&chain->lock) == 0) {
+ /* While holding the mutex exclusively. */
+ if (atomic_cmpset_int(&chain->lockcnt, 1, 0))
+ break;
+ } else {
+ /*
+ * This situation can easily occur on SMP due to
+ * the gap inbetween the 1->0 transition and the
+ * final unlock. We cannot safely block on the
+ * mutex because lockcnt might go above 1.
+ */
+ if (++iter > 1000) {
+ if (iter > 1000 + hz) {
+ hprintf("h2race2\n");
+ iter = 1000;
+ }
+ pause("h2race2", 1);
+ }
+ cpu_spinwait();
+ }
+ }
+
+ /*
+ * Last unlock / mutex upgraded to exclusive. Drop the data
+ * reference.
+ */
+ dio = hammer2_chain_drop_data(chain);
+ if (dio)
+ hammer2_io_bqrelse(&dio);
+ hammer2_mtx_unlock(&chain->lock);
+}
+
+/*
+ * This calculates the point at which all remaining blockrefs are empty.
+ * This routine can only be called on a live chain.
+ *
+ * Caller holds the chain locked, but possibly with a shared lock. We
+ * must use an exclusive spinlock to prevent corruption.
+ *
+ * NOTE: Flag is not set until after the count is complete, allowing
+ * callers to test the flag without holding the spinlock.
+ */
+static void
+hammer2_chain_countbrefs(hammer2_chain_t *chain, hammer2_blockref_t *base,
+ int count)
+{
+ hammer2_mtx_assert_locked(&chain->lock);
+
+ hammer2_spin_ex(&chain->core.spin);
+ if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) {
+ if (base) {
+ while (--count >= 0)
+ if (base[count].type != HAMMER2_BREF_TYPE_EMPTY)
+ break;
+ chain->core.live_zero = count + 1;
+ } else {
+ chain->core.live_zero = 0;
+ }
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS);
+ }
+ hammer2_spin_unex(&chain->core.spin);
+}
+
+/*
+ * This function returns the chain at the nearest key within the specified
+ * range. The returned chain will be referenced but not locked.
+ *
+ * This function will recurse through chain->rbtree as necessary and will
+ * return a *key_nextp suitable for iteration. *key_nextp is only set if
+ * the iteration value is less than the current value of *key_nextp.
+ *
+ * The caller should use (*key_nextp) to calculate the actual range of
+ * the returned element, which will be (key_beg to *key_nextp - 1), because
+ * there might be another element which is superior to the returned element
+ * and overlaps it.
+ *
+ * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL
+ * chains continue to be returned. On EOF (*key_nextp) may overflow since
+ * it will wind up being (key_end + 1).
+ *
+ * WARNING! Must be called with child's spinlock held. Spinlock remains
+ * held through the operation.
+ */
+struct hammer2_chain_find_info {
+ hammer2_chain_t *best;
+ hammer2_key_t key_beg;
+ hammer2_key_t key_end;
+ hammer2_key_t key_next;
+};
+
+static int hammer2_chain_find_cmp(hammer2_chain_t *, void *);
+static int hammer2_chain_find_callback(hammer2_chain_t *, void *);
+
+static hammer2_chain_t *
+hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp,
+ hammer2_key_t key_beg, hammer2_key_t key_end)
+{
+ struct hammer2_chain_find_info info;
+
+ info.best = NULL;
+ info.key_beg = key_beg;
+ info.key_end = key_end;
+ info.key_next = *key_nextp;
+
+ RB_SCAN(hammer2_chain_tree, &parent->core.rbtree,
+ hammer2_chain_find_cmp, hammer2_chain_find_callback, &info);
+ *key_nextp = info.key_next;
+
+ return (info.best);
+}
+
+static int
+hammer2_chain_find_cmp(hammer2_chain_t *child, void *data)
+{
+ struct hammer2_chain_find_info *info = data;
+ hammer2_key_t child_beg, child_end;
+
+ child_beg = child->bref.key;
+ child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1;
+
+ if (child_end < info->key_beg)
+ return (-1);
+ if (child_beg > info->key_end)
+ return (1);
+ return (0);
+}
+
+static int
+hammer2_chain_find_callback(hammer2_chain_t *child, void *data)
+{
+ struct hammer2_chain_find_info *info = data;
+ hammer2_chain_t *best;
+ hammer2_key_t child_end;
+
+ if ((best = info->best) == NULL) {
+ /* No previous best. Assign best. */
+ info->best = child;
+ } else if (best->bref.key <= info->key_beg &&
+ child->bref.key <= info->key_beg) {
+ /* Illegal overlap. */
+ KKASSERT(0);
+ } else if (child->bref.key < best->bref.key) {
+ /*
+ * Child has a nearer key and best is not flush with key_beg.
+ * Set best to child. Truncate key_next to the old best key.
+ */
+ info->best = child;
+ if (info->key_next > best->bref.key || info->key_next == 0)
+ info->key_next = best->bref.key;
+ } else if (child->bref.key == best->bref.key) {
+ /*
+ * If our current best is flush with the child then this
+ * is an illegal overlap.
+ *
+ * key_next will automatically be limited to the smaller of
+ * the two end-points.
+ */
+ KKASSERT(0);
+ } else {
+ /*
+ * Keep the current best but truncate key_next to the child's
+ * base.
+ *
+ * key_next will also automatically be limited to the smaller
+ * of the two end-points (probably not necessary for this case
+ * but we do it anyway).
+ */
+ if (info->key_next > child->bref.key || info->key_next == 0)
+ info->key_next = child->bref.key;
+ }
+
+ /* Always truncate key_next based on child's end-of-range. */
+ child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits);
+ if (child_end && (info->key_next > child_end || info->key_next == 0))
+ info->key_next = child_end;
+
+ return (0);
+}
+
+/*
+ * Retrieve the specified chain from a media blockref, creating the
+ * in-memory chain structure which reflects it. The returned chain is
+ * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must
+ * handle crc-checks and so forth, and should check chain->error before
+ * assuming that the data is good.
+ *
+ * To handle insertion races pass the INSERT_RACE flag along with the
+ * generation number of the core. NULL will be returned if the generation
+ * number changes before we have a chance to insert the chain. Insert
+ * races can occur because the parent might be held shared.
+ *
+ * Caller must hold the parent locked shared or exclusive since we may
+ * need the parent's bref array to find our block.
+ *
+ * WARNING! chain->pmp is always set to NULL for any chain representing
+ * part of the super-root topology.
+ */
+static hammer2_chain_t *
+hammer2_chain_get(hammer2_chain_t *parent, int generation,
+ hammer2_blockref_t *bref, int how)
+{
+ hammer2_dev_t *hmp = parent->hmp;
+ hammer2_chain_t *chain;
+ int error;
+
+ hammer2_mtx_assert_locked(&parent->lock);
+
+ /*
+ * Allocate a chain structure representing the existing media
+ * entry. Resulting chain has one ref and is not locked.
+ */
+ if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT)
+ chain = hammer2_chain_alloc(hmp, NULL, bref);
+ else
+ chain = hammer2_chain_alloc(hmp, parent->pmp, bref);
+ /* Ref'd chain returned. */
+
+ /* Chain must be locked to avoid unexpected ripouts. */
+ hammer2_chain_lock(chain, how);
+
+ /*
+ * Link the chain into its parent. A spinlock is required to safely
+ * access the RBTREE, and it is possible to collide with another
+ * hammer2_chain_get() operation because the caller might only hold
+ * a shared lock on the parent.
+ */
+ KKASSERT(parent->refs > 0);
+ error = hammer2_chain_insert(parent, chain, generation);
+ if (error) {
+ KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0);
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ chain = NULL;
+ } else {
+ KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE);
+ }
+
+ /*
+ * Return our new chain referenced but not locked, or NULL if
+ * a race occurred.
+ */
+ return (chain);
+}
+
+/*
+ * Lookup initialization/completion API.
+ */
+hammer2_chain_t *
+hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags)
+{
+ hammer2_chain_ref(parent);
+
+ if (flags & HAMMER2_LOOKUP_SHARED)
+ hammer2_chain_lock(parent,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ else
+ hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
+
+ return (parent);
+}
+
+void
+hammer2_chain_lookup_done(hammer2_chain_t *parent)
+{
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+}
+
+/*
+ * Take the locked chain and return a locked parent. The chain is unlocked
+ * and dropped. *chainp is set to the returned parent as a convenience.
+ * Pass HAMMER2_RESOLVE_* flags in flags.
+ *
+ * This will work even if the chain is errored, and the caller can check
+ * parent->error on return if desired since the parent will be locked.
+ */
+static hammer2_chain_t *
+hammer2_chain_repparent(hammer2_chain_t **chainp, int flags)
+{
+ hammer2_chain_t *chain, *parent;
+
+ chain = *chainp;
+ hammer2_mtx_assert_locked(&chain->lock);
+
+ parent = chain->parent;
+ KKASSERT(parent);
+
+ hammer2_chain_ref(parent);
+ /* DragonFly uses flags|HAMMER2_RESOLVE_NONBLOCK followed by reptrack */
+ hammer2_chain_lock(parent, flags);
+
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ *chainp = parent;
+
+ return (parent);
+}
+
+/*
+ * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive.
+ * (*parentp) typically points to an inode but can also point to a related
+ * indirect block and this function will recurse upwards and find the inode
+ * or the nearest undeleted indirect block covering the key range.
+ *
+ * This function unconditionally sets *errorp, replacing any previous value.
+ *
+ * (*parentp) must be exclusive or shared locked (depending on flags) and
+ * referenced and can be an inode or an existing indirect block within the
+ * inode.
+ *
+ * If (*parent) is errored out, this function will not attempt to recurse
+ * the radix tree and will return NULL along with an appropriate *errorp.
+ * If NULL is returned and *errorp is 0, the requested lookup could not be
+ * located.
+ *
+ * On return (*parentp) will be modified to point at the deepest parent chain
+ * element encountered during the search, as a helper for an insertion or
+ * deletion.
+ *
+ * The new (*parentp) will be locked shared or exclusive (depending on flags),
+ * and referenced, and the old will be unlocked and dereferenced (no change
+ * if they are both the same). This is particularly important if the caller
+ * wishes to insert a new chain, (*parentp) will be set properly even if NULL
+ * is returned, as long as no error occurred.
+ *
+ * The matching chain will be returned locked according to flags.
+ *
+ * --
+ * NULL is returned if no match was found, but (*parentp) will still
+ * potentially be adjusted.
+ *
+ * On return (*key_nextp) will point to an iterative value for key_beg.
+ * (If NULL is returned (*key_nextp) is set to (key_end + 1)).
+ *
+ * This function will also recurse up the chain if the key is not within the
+ * current parent's range. (*parentp) can never be set to NULL. An iteration
+ * can simply allow (*parentp) to float inside the loop.
+ *
+ * NOTE! chain->data is not always resolved. By default it will not be
+ * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use
+ * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/
+ * BREF_TYPE_DATA as the device buffer can alias the logical file
+ * buffer).
+ */
+hammer2_chain_t *
+hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
+ hammer2_key_t key_beg, hammer2_key_t key_end, int *errorp, int flags)
+{
+ hammer2_chain_t *chain, *parent;
+ hammer2_blockref_t bsave, *base, *bref;
+ hammer2_key_t scan_beg, scan_end;
+ int how_always = HAMMER2_RESOLVE_ALWAYS;
+ int how_maybe = HAMMER2_RESOLVE_MAYBE;
+ int how, count, generation, maxloops = 300000;
+
+ if (flags & HAMMER2_LOOKUP_ALWAYS) {
+ how_maybe = how_always;
+ how = HAMMER2_RESOLVE_ALWAYS;
+ } else {
+ how = HAMMER2_RESOLVE_MAYBE;
+ }
+ if (flags & HAMMER2_LOOKUP_SHARED) {
+ how_maybe |= HAMMER2_RESOLVE_SHARED;
+ how_always |= HAMMER2_RESOLVE_SHARED;
+ how |= HAMMER2_RESOLVE_SHARED;
+ }
+
+ /*
+ * Recurse (*parentp) upward if necessary until the parent completely
+ * encloses the key range or we hit the inode.
+ */
+ parent = *parentp;
+ hammer2_mtx_assert_locked(&parent->lock);
+ *errorp = 0;
+
+ while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT ||
+ parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) {
+ scan_beg = parent->bref.key;
+ scan_end = scan_beg +
+ ((hammer2_key_t)1 << parent->bref.keybits) - 1;
+ if (key_beg >= scan_beg && key_end <= scan_end)
+ break;
+ parent = hammer2_chain_repparent(parentp, how_maybe);
+ }
+again:
+ if (--maxloops == 0)
+ hpanic("maxloops");
+
+ /*
+ * No lookup is possible if the parent is errored. We delayed
+ * this check as long as we could to ensure that the parent backup,
+ * embedded data code could still execute.
+ */
+ if (parent->error) {
+ *errorp = parent->error;
+ return (NULL);
+ }
+
+ /*
+ * Locate the blockref array. Currently we do a fully associative
+ * search through the array.
+ */
+ switch (parent->bref.type) {
+ case HAMMER2_BREF_TYPE_INODE:
+ /*
+ * Special shortcut for embedded data returns the inode
+ * itself. Callers must detect this condition and access
+ * the embedded data (the strategy code does this for us).
+ *
+ * This is only applicable to regular files and softlinks.
+ *
+ * We need a second lock on parent. Since we already have
+ * a lock we must pass LOCKAGAIN to prevent unexpected
+ * blocking (we don't want to block on a second shared
+ * ref if an exclusive lock is pending)
+ */
+ if (parent->data->ipdata.meta.op_flags &
+ HAMMER2_OPFLAG_DIRECTDATA) {
+ hammer2_chain_ref(parent);
+ hammer2_chain_lock(parent,
+ how_always | HAMMER2_RESOLVE_LOCKAGAIN);
+ *key_nextp = key_end + 1;
+ return (parent);
+ }
+ base = &parent->data->ipdata.u.blockset.blockref[0];
+ count = HAMMER2_SET_COUNT;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ KKASSERT(parent->data);
+ base = &parent->data->npdata[0];
+ count = parent->bytes / sizeof(hammer2_blockref_t);
+ break;
+ case HAMMER2_BREF_TYPE_VOLUME:
+ base = &parent->data->voldata.sroot_blockset.blockref[0];
+ count = HAMMER2_SET_COUNT;
+ break;
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ base = &parent->data->blkset.blockref[0];
+ count = HAMMER2_SET_COUNT;
+ break;
+ default:
+ hpanic("bad blockref type %d", parent->bref.type);
+ break;
+ }
+
+ /*
+ * Merged scan to find next candidate.
+ *
+ * hammer2_base_*() functions require the parent->core.live_* fields
+ * to be synchronized.
+ *
+ * We need to hold the spinlock to access the block array and RB tree
+ * and to interlock chain creation.
+ */
+ if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0)
+ hammer2_chain_countbrefs(parent, base, count);
+
+ /* Combined search. */
+ hammer2_spin_ex(&parent->core.spin);
+ chain = hammer2_combined_find(parent, base, count, key_nextp, key_beg,
+ key_end, &bref);
+ generation = parent->core.generation;
+
+ /* Exhausted parent chain, iterate. */
+ if (bref == NULL) {
+ KKASSERT(chain == NULL);
+ hammer2_spin_unex(&parent->core.spin);
+ if (key_beg == key_end) /* Short cut single-key case. */
+ return (NULL);
+
+ /* Stop if we reached the end of the iteration. */
+ if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT &&
+ parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE)
+ return (NULL);
+
+ /*
+ * Calculate next key, stop if we reached the end of the
+ * iteration, otherwise go up one level and loop.
+ */
+ key_beg = parent->bref.key +
+ ((hammer2_key_t)1 << parent->bref.keybits);
+ if (key_beg == 0 || key_beg > key_end)
+ return (NULL);
+ parent = hammer2_chain_repparent(parentp, how_maybe);
+ goto again;
+ }
+
+ /* Selected from blockref or in-memory chain. */
+ bsave = *bref;
+ if (chain == NULL) {
+ hammer2_spin_unex(&parent->core.spin);
+ if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT ||
+ bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE)
+ chain = hammer2_chain_get(parent, generation, &bsave,
+ how_maybe);
+ else
+ chain = hammer2_chain_get(parent, generation, &bsave,
+ how);
+ if (chain == NULL)
+ goto again;
+ } else {
+ hammer2_chain_ref(chain);
+ hammer2_spin_unex(&parent->core.spin);
+ /*
+ * chain is referenced but not locked. We must lock the
+ * chain to obtain definitive state.
+ */
+ if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT ||
+ bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE)
+ hammer2_chain_lock(chain, how_maybe);
+ else
+ hammer2_chain_lock(chain, how);
+ KKASSERT(chain->parent == parent);
+ }
+ if (bcmp(&bsave, &chain->bref, sizeof(bsave)) ||
+ chain->parent != parent) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ chain = NULL;
+ goto again;
+ }
+
+ /*
+ * If the chain element is an indirect block it becomes the new
+ * parent and we loop on it.
+ *
+ * The parent always has to be locked with at least RESOLVE_MAYBE
+ * so we can access its data. It might need a fixup if the caller
+ * passed incompatible flags. Be careful not to cause a deadlock
+ * as a data-load requires an exclusive lock.
+ */
+ if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT ||
+ chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ *parentp = parent = chain;
+ chain = NULL;
+ goto again;
+ }
+
+ /*
+ * All done, return the locked chain.
+ *
+ * NOTE! A chain->error must be tested by the caller upon return.
+ * *errorp is only set based on issues which occur while
+ * trying to reach the chain.
+ */
+ return (chain);
+}
+
+/*
+ * After having issued a lookup we can iterate all matching keys.
+ *
+ * If chain is non-NULL we continue the iteration from just after it's index.
+ * If chain is NULL we assume the parent was exhausted and continue the
+ * iteration at the next parent.
+ *
+ * If a fatal error occurs (typically an I/O error), a dummy chain is
+ * returned with chain->error and error-identifying information set. This
+ * chain will assert if you try to do anything fancy with it.
+ *
+ * XXX Depending on where the error occurs we should allow continued iteration.
+ *
+ * parent must be locked on entry and remains locked throughout. chain's
+ * lock status must match flags. Chain is always at least referenced.
+ */
+hammer2_chain_t *
+hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain,
+ hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end,
+ int *errorp, int flags)
+{
+ hammer2_chain_t *parent;
+ int how_maybe;
+
+ /* Calculate locking flags for upward recursion. */
+ how_maybe = HAMMER2_RESOLVE_MAYBE;
+ if (flags & HAMMER2_LOOKUP_SHARED)
+ how_maybe |= HAMMER2_RESOLVE_SHARED;
+
+ parent = *parentp;
+ hammer2_mtx_assert_locked(&parent->lock);
+ *errorp = 0;
+
+ /* Calculate the next index and recalculate the parent if necessary. */
+ if (chain) {
+ key_beg = chain->bref.key +
+ ((hammer2_key_t)1 << chain->bref.keybits);
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ /*
+ * chain invalid past this point, but we can still do a
+ * pointer comparison w/parent.
+ *
+ * Any scan where the lookup returned degenerate data embedded
+ * in the inode has an invalid index and must terminate.
+ */
+ if (chain == parent)
+ return (NULL);
+ if (key_beg == 0 || key_beg > key_end)
+ return (NULL);
+ chain = NULL;
+ } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT &&
+ parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) {
+ /* We reached the end of the iteration. */
+ return (NULL);
+ } else {
+ /*
+ * Continue iteration with next parent unless the current
+ * parent covers the range.
+ */
+ key_beg = parent->bref.key +
+ ((hammer2_key_t)1 << parent->bref.keybits);
+ if (key_beg == 0 || key_beg > key_end)
+ return (NULL);
+ parent = hammer2_chain_repparent(parentp, how_maybe);
+ }
+
+ /* And execute. */
+ return (hammer2_chain_lookup(parentp, key_nextp, key_beg, key_end,
+ errorp, flags));
+}
+
+/*
+ * Returns the index of the nearest element in the blockref array >= elm.
+ * Returns (count) if no element could be found.
+ *
+ * Sets *key_nextp to the next key for loop purposes but does not modify
+ * it if the next key would be higher than the current value of *key_nextp.
+ * Note that *key_nexp can overflow to 0, which should be tested by the
+ * caller.
+ *
+ * WARNING! Must be called with parent's spinlock held. Spinlock remains
+ * held through the operation.
+ */
+static int
+hammer2_base_find(hammer2_chain_t *parent, hammer2_blockref_t *base, int count,
+ hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end)
+{
+ hammer2_blockref_t *scan;
+ hammer2_key_t scan_end;
+ int i, limit;
+
+ /*
+ * Require the live chain's already have their core's counted
+ * so we can optimize operations.
+ */
+ KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS);
+
+ /* Degenerate case. */
+ if (count == 0 || base == NULL)
+ return (count);
+
+ /*
+ * Sequential optimization using parent->cache_index. This is
+ * the most likely scenario.
+ *
+ * We can avoid trailing empty entries on live chains, otherwise
+ * we might have to check the whole block array.
+ */
+ i = parent->cache_index; /* SMP RACE OK */
+ __compiler_membar();
+ limit = parent->core.live_zero;
+ if (i >= limit)
+ i = limit - 1;
+ if (i < 0)
+ i = 0;
+ KKASSERT(i < count);
+
+ /* Search backwards. */
+ scan = &base[i];
+ while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY ||
+ scan->key > key_beg)) {
+ --scan;
+ --i;
+ }
+ parent->cache_index = i;
+
+ /*
+ * Search forwards, stop when we find a scan element which
+ * encloses the key or until we know that there are no further
+ * elements.
+ */
+ while (i < count) {
+ if (scan->type != HAMMER2_BREF_TYPE_EMPTY) {
+ scan_end = scan->key +
+ ((hammer2_key_t)1 << scan->keybits) - 1;
+ if (scan->key > key_beg || scan_end >= key_beg)
+ break;
+ }
+ if (i >= limit)
+ return (count);
+ ++scan;
+ ++i;
+ }
+ if (i != count) {
+ parent->cache_index = i;
+ if (i >= limit) {
+ i = count;
+ } else {
+ scan_end = scan->key +
+ ((hammer2_key_t)1 << scan->keybits);
+ if (scan_end && (*key_nextp > scan_end ||
+ *key_nextp == 0))
+ *key_nextp = scan_end;
+ }
+ }
+ return (i);
+}
+
+/*
+ * Do a combined search and return the next match either from the blockref
+ * array or from the in-memory chain. Sets *brefp to the returned bref in
+ * both cases, or sets it to NULL if the search exhausted. Only returns
+ * a non-NULL chain if the search matched from the in-memory chain.
+ *
+ * When no in-memory chain has been found and a non-NULL bref is returned
+ * in *brefp.
+ *
+ * The returned chain is not locked or referenced. Use the returned bref
+ * to determine if the search exhausted or not. Iterate if the base find
+ * is chosen but matches a deleted chain.
+ *
+ * WARNING! Must be called with parent's spinlock held. Spinlock remains
+ * held through the operation.
+ */
+static hammer2_chain_t *
+hammer2_combined_find(hammer2_chain_t *parent, hammer2_blockref_t *base,
+ int count, hammer2_key_t *key_nextp, hammer2_key_t key_beg,
+ hammer2_key_t key_end, hammer2_blockref_t **brefp)
+{
+ hammer2_chain_t *chain;
+ hammer2_blockref_t *bref;
+ int i;
+
+ /* Lookup in block array and in rbtree. */
+ *key_nextp = key_end + 1;
+ i = hammer2_base_find(parent, base, count, key_nextp, key_beg, key_end);
+ chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end);
+
+ /* Neither matched. */
+ if (i == count && chain == NULL) {
+ *brefp = NULL;
+ return (NULL);
+ }
+
+ /* Only chain matched. */
+ if (i == count) {
+ bref = &chain->bref;
+ goto found;
+ }
+
+ /* Only blockref matched. */
+ if (chain == NULL) {
+ bref = &base[i];
+ goto found;
+ }
+
+ /*
+ * Both in-memory and blockref matched, select the nearer element.
+ *
+ * If both are flush with the left-hand side or both are the
+ * same distance away, select the chain. In this situation the
+ * chain must have been loaded from the matching blockmap.
+ */
+ if ((chain->bref.key <= key_beg && base[i].key <= key_beg) ||
+ chain->bref.key == base[i].key) {
+ KKASSERT(chain->bref.key == base[i].key);
+ bref = &chain->bref;
+ goto found;
+ }
+
+ /* Select the nearer key. */
+ if (chain->bref.key < base[i].key) {
+ bref = &chain->bref;
+ } else {
+ bref = &base[i];
+ chain = NULL;
+ }
+
+ /* If the bref is out of bounds we've exhausted our search. */
+found:
+ if (bref->key > key_end) {
+ *brefp = NULL;
+ chain = NULL;
+ } else {
+ *brefp = bref;
+ }
+ return (chain);
+}
+
+/*
+ * Returns non-zero on success, 0 on failure.
+ */
+static int
+hammer2_chain_testcheck(const hammer2_chain_t *chain, void *bdata)
+{
+ static int count = 0;
+ int r = 0;
+
+ switch (HAMMER2_DEC_CHECK(chain->bref.methods)) {
+ case HAMMER2_CHECK_NONE:
+ case HAMMER2_CHECK_DISABLED:
+ r = 1;
+ break;
+ case HAMMER2_CHECK_ISCSI32:
+ r = chain->bref.check.iscsi32.value ==
+ hammer2_icrc32(bdata, chain->bytes);
+ break;
+ case HAMMER2_CHECK_XXHASH64:
+ r = chain->bref.check.xxhash64.value ==
+ XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED);
+ break;
+ case HAMMER2_CHECK_SHA192:
+ {
+ SHA256_CTX hash_ctx;
+ union {
+ uint8_t digest[SHA256_DIGEST_LENGTH];
+ uint64_t digest64[SHA256_DIGEST_LENGTH/8];
+ } u;
+
+ SHA256_Init(&hash_ctx);
+ SHA256_Update(&hash_ctx, bdata, chain->bytes);
+ SHA256_Final(u.digest, &hash_ctx);
+ u.digest64[2] ^= u.digest64[3];
+ r = bcmp(u.digest, chain->bref.check.sha192.data,
+ sizeof(chain->bref.check.sha192.data)) == 0;
+ }
+ break;
+ case HAMMER2_CHECK_FREEMAP:
+ r = chain->bref.check.freemap.icrc32 ==
+ hammer2_icrc32(bdata, chain->bytes);
+ break;
+ default:
+ hpanic("unknown check type %02x\n", chain->bref.methods);
+ break;
+ }
+
+ if (r == 0 && count < 1000) {
+ hprintf("failed: chain %s %016jx %016jx/%-2d meth=%02x "
+ "mir=%016jx mod=%016jx flags=%08x\n",
+ hammer2_breftype_to_str(chain->bref.type),
+ chain->bref.data_off, chain->bref.key, chain->bref.keybits,
+ chain->bref.methods, chain->bref.mirror_tid,
+ chain->bref.modify_tid, chain->flags);
+ count++;
+ if (count >= 1000)
+ hprintf("gave up\n");
+ }
+
+ return (r);
+}
+
+/*
+ * Acquire the chain and parent representing the specified inode for the
+ * device at the specified cluster index.
+ *
+ * The flags passed in are LOOKUP flags, not RESOLVE flags.
+ *
+ * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK
+ * is returned. In case of error, *chainp and/or *parentp may still be returned
+ * non-NULL.
+ *
+ * The caller may pass-in a locked *parentp and/or *chainp, or neither.
+ * They will be unlocked and released by this function. The *parentp and
+ * *chainp representing the located inode are returned locked.
+ *
+ * The returned error includes any error on the returned chain in addition to
+ * errors incurred while trying to lookup the inode.
+ */
+int
+hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, int clindex,
+ int flags, hammer2_chain_t **parentp, hammer2_chain_t **chainp)
+{
+ hammer2_inode_t *ip;
+ hammer2_chain_t *parent, *rchain;
+ hammer2_key_t key_dummy;
+ int resolve_flags, error;
+
+ resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ?
+ HAMMER2_RESOLVE_SHARED : 0;
+
+ /* Caller expects us to replace these. */
+ if (*chainp) {
+ hammer2_chain_unlock(*chainp);
+ hammer2_chain_drop(*chainp);
+ *chainp = NULL;
+ }
+ if (*parentp) {
+ hammer2_chain_unlock(*parentp);
+ hammer2_chain_drop(*parentp);
+ *parentp = NULL;
+ }
+
+ /*
+ * Be very careful, this is a backend function and we CANNOT
+ * lock any frontend inode structure we find. But we have to
+ * look the inode up this way first in case it exists but is
+ * detached from the radix tree.
+ */
+ ip = hammer2_inode_lookup(pmp, inum);
+ if (ip) {
+ *chainp = hammer2_inode_chain_and_parent(ip, clindex, parentp,
+ resolve_flags);
+ hammer2_inode_drop(ip);
+ if (*chainp)
+ return ((*chainp)->error);
+ hammer2_chain_unlock(*chainp);
+ hammer2_chain_drop(*chainp);
+ *chainp = NULL;
+ if (*parentp) {
+ hammer2_chain_unlock(*parentp);
+ hammer2_chain_drop(*parentp);
+ *parentp = NULL;
+ }
+ }
+
+ /*
+ * Inodes hang off of the iroot (bit 63 is clear, differentiating
+ * inodes from root directory entries in the key lookup).
+ */
+ parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags);
+ rchain = NULL;
+ if (parent) {
+ /*
+ * NOTE: rchain can be returned as NULL even if error == 0
+ * (i.e. not found)
+ */
+ rchain = hammer2_chain_lookup(&parent, &key_dummy, inum, inum,
+ &error, flags);
+ /*
+ * Propagate a chain-specific error to caller.
+ *
+ * If the chain is not errored, we must still validate that the inode
+ * number is correct, because all hell will break loose if it isn't
+ * correct. It should always be correct so print to the console and
+ * simulate a CHECK error if it is not.
+ */
+ if (error == 0 && rchain) {
+ error = rchain->error;
+ if (error == 0 && rchain->data)
+ if (inum != rchain->data->ipdata.meta.inum) {
+ hprintf("lookup inum %ju, got valid "
+ "inode but with inum %ju\n",
+ inum,
+ rchain->data->ipdata.meta.inum);
+ error = HAMMER2_ERROR_CHECK;
+ rchain->error = error;
+ }
+ }
+ } else {
+ error = HAMMER2_ERROR_EIO;
+ }
+ *parentp = parent;
+ *chainp = rchain;
+
+ return (error);
+}
+
+/*
+ * Returns non-zero if the chain (INODE or DIRENT) matches the filename.
+ */
+int
+hammer2_chain_dirent_test(const hammer2_chain_t *chain, const char *name,
+ size_t name_len)
+{
+ const hammer2_inode_data_t *ripdata;
+
+ if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
+ ripdata = &chain->data->ipdata;
+ if (ripdata->meta.name_len == name_len &&
+ bcmp(ripdata->filename, name, name_len) == 0)
+ return (1);
+ }
+ if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT &&
+ chain->bref.embed.dirent.namlen == name_len) {
+ if (name_len > sizeof(chain->bref.check.buf) &&
+ bcmp(chain->data->buf, name, name_len) == 0)
+ return (1);
+ if (name_len <= sizeof(chain->bref.check.buf) &&
+ bcmp(chain->bref.check.buf, name, name_len) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+void
+hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp,
+ char pfx, unsigned int flags)
+{
+ hammer2_chain_t *scan, *parent;
+ int i;
+
+ --*countp;
+ if (*countp == 0) {
+ printf("%*.*s...\n", tab, tab, "");
+ return;
+ }
+ if (*countp < 0)
+ return;
+
+ printf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx "
+ "mod=%016jx\n",
+ tab, tab, "", pfx, chain,
+ hammer2_breftype_to_str(chain->bref.type), bi,
+ chain->bref.data_off, chain->bref.key, chain->bref.keybits,
+ chain->bref.mirror_tid, chain->bref.modify_tid);
+
+ printf("%*.*s [%08x] (%s) refs=%d",
+ tab, tab, "", chain->flags,
+ (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) ?
+ (char *)chain->data->ipdata.filename : "?",
+ chain->refs);
+
+ parent = chain->parent;
+ if (parent)
+ printf("\n%*.*s p=%p [pflags %08x prefs %d]",
+ tab, tab, "", parent, parent->flags, parent->refs);
+
+ if (RB_EMPTY(&chain->core.rbtree)) {
+ printf("\n");
+ } else {
+ i = 0;
+ printf(" {\n");
+ RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) {
+ if ((scan->flags & flags) || flags == (unsigned int)-1)
+ hammer2_dump_chain(scan, tab + 4, i, countp,
+ 'a', flags);
+ i++;
+ }
+ if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data)
+ printf("%*.*s}(%s)\n", tab, tab, "",
+ chain->data->ipdata.filename);
+ else
+ printf("%*.*s}\n", tab, tab, "");
+ }
+}
diff --git a/sys/fs/hammer2/hammer2_cluster.c b/sys/fs/hammer2/hammer2_cluster.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_cluster.c
@@ -0,0 +1,189 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include "hammer2.h"
+
+/*
+ * Returns the bref type of the cluster's foucs.
+ *
+ * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
+ * The cluster must be locked.
+ */
+uint8_t
+hammer2_cluster_type(const hammer2_cluster_t *cluster)
+{
+ if (cluster->error == 0) {
+ KKASSERT(cluster->focus != NULL);
+ return (cluster->focus->bref.type);
+ }
+
+ return (0);
+}
+
+/*
+ * Returns the bref of the cluster's focus, sans any data-offset information
+ * (since offset information is per-node and wouldn't be useful).
+ *
+ * If the cluster is errored, returns an empty bref.
+ * The cluster must be locked.
+ */
+void
+hammer2_cluster_bref(const hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
+{
+ if (cluster->error == 0) {
+ KKASSERT(cluster->focus != NULL);
+ *bref = cluster->focus->bref;
+ bref->data_off = 0;
+ } else {
+ bzero(bref, sizeof(*bref));
+ }
+}
+
+/*
+ * Create a degenerate cluster with one ref from a single locked chain.
+ * The returned cluster will be focused on the chain and inherit its
+ * error state.
+ *
+ * The chain's lock and reference are transferred to the new cluster, so
+ * the caller should not try to unlock the chain separately.
+ */
+void
+hammer2_dummy_xop_from_chain(hammer2_xop_head_t *xop, hammer2_chain_t *chain)
+{
+ hammer2_cluster_t *cluster = &xop->cluster;
+
+ bzero(xop, sizeof(*xop));
+
+ cluster->array[0].chain = chain;
+ cluster->nchains = 1;
+ cluster->focus = chain;
+ cluster->pmp = chain->pmp;
+ cluster->error = chain->error;
+
+ hammer2_assert_cluster(cluster);
+}
+
+void
+hammer2_cluster_unhold(hammer2_cluster_t *cluster)
+{
+ hammer2_chain_t *chain;
+ int i;
+
+ for (i = 0; i < cluster->nchains; ++i) {
+ chain = cluster->array[i].chain;
+ if (chain)
+ hammer2_chain_unhold(chain);
+ }
+}
+
+void
+hammer2_cluster_rehold(hammer2_cluster_t *cluster)
+{
+ hammer2_chain_t *chain;
+ int i;
+
+ for (i = 0; i < cluster->nchains; ++i) {
+ chain = cluster->array[i].chain;
+ if (chain)
+ hammer2_chain_rehold(chain);
+ }
+}
+
+/*
+ * This is used by the XOPS subsystem to calculate the state of
+ * the collection and tell hammer2_xop_collect() what to do with it.
+ */
+int
+hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
+{
+ hammer2_pfs_t *pmp;
+ hammer2_chain_t *chain;
+ int i, error;
+
+ cluster->focus = NULL;
+ cluster->error = 0;
+ hammer2_assert_cluster(cluster);
+
+ pmp = cluster->pmp;
+ KKASSERT(pmp != NULL);
+
+ /*
+ * NOTE: A NULL chain is not necessarily an error, it could be
+ * e.g. a lookup failure or the end of an iteration.
+ * Process normally.
+ */
+ for (i = 0; i < cluster->nchains; ++i) {
+ chain = cluster->array[i].chain;
+ error = cluster->array[i].error;
+
+ switch (pmp->pfs_types[i]) {
+ case HAMMER2_PFSTYPE_MASTER:
+ case HAMMER2_PFSTYPE_SUPROOT:
+ cluster->focus = chain;
+ cluster->error = error;
+ break;
+ default:
+ hpanic("invalid PFS type %d", pmp->pfs_types[i]);
+ break;
+ }
+ }
+
+ if (flags & HAMMER2_CHECK_NULL) {
+ if (cluster->error == 0)
+ cluster->error = HAMMER2_ERROR_ENOENT;
+ return (cluster->error);
+ }
+
+ if (cluster->focus == NULL)
+ return (HAMMER2_ERROR_EIO);
+
+ for (i = 0; i < cluster->nchains; ++i) {
+ chain = cluster->array[i].chain;
+ if (i == 0) {
+ KKASSERT(chain != NULL);
+ KKASSERT(chain == cluster->focus);
+ KKASSERT(chain->bref.key == key);
+ } else {
+ KKASSERT(chain == NULL);
+ }
+ }
+
+ return (cluster->error);
+}
diff --git a/sys/fs/hammer2/hammer2_disk.h b/sys/fs/hammer2/hammer2_disk.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_disk.h
@@ -0,0 +1,1134 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_HAMMER2_DISK_H_
+#define _FS_HAMMER2_DISK_H_
+
+#include <sys/types.h>
+#include <sys/uuid.h>
+
+/*
+ * The structures below represent the on-disk media structures for the HAMMER2
+ * filesystem. Note that all fields for on-disk structures are naturally
+ * aligned. The host endian format is typically used - compatibility is
+ * possible if the implementation detects reversed endian and adjusts accesses
+ * accordingly.
+ *
+ * HAMMER2 primarily revolves around the directory topology: inodes,
+ * directory entries, and block tables. Block device buffer cache buffers
+ * are always 64KB. Logical file buffers are typically 16KB. All data
+ * references utilize 64-bit byte offsets.
+ *
+ * Free block management is handled independently using blocks reserved by
+ * the media topology.
+ */
+
+/*
+ * The data at the end of a file or directory may be a fragment in order
+ * to optimize storage efficiency. The minimum fragment size is 1KB.
+ * Since allocations are in powers of 2 fragments must also be sized in
+ * powers of 2 (1024, 2048, ... 65536).
+ *
+ * For the moment the maximum allocation size is HAMMER2_PBUFSIZE (64K),
+ * which is 2^16. Larger extents may be supported in the future. Smaller
+ * fragments might be supported in the future (down to 64 bytes is possible),
+ * but probably will not be.
+ *
+ * A full indirect block use supports 512 x 128-byte blockrefs in a 64KB
+ * buffer. Indirect blocks down to 1KB are supported to keep small
+ * directories small.
+ *
+ * A maximally sized file (2^64-1 bytes) requires ~6 indirect block levels
+ * using 64KB indirect blocks (128 byte refs, 512 or radix 9 per indblk).
+ *
+ * 16(datablk) + 9 + 9 + 9 + 9 + 9 + 9 = ~70.
+ * 16(datablk) + 7 + 9 + 9 + 9 + 9 + 9 = ~68. (smaller top level indblk)
+ *
+ * The actual depth depends on copies redundancy and whether the filesystem
+ * has chosen to use a smaller indirect block size at the top level or not.
+ */
+#define HAMMER2_ALLOC_MIN 1024 /* minimum allocation size */
+#define HAMMER2_RADIX_MIN 10 /* minimum allocation size 2^N */
+#define HAMMER2_ALLOC_MAX 65536 /* maximum allocation size */
+#define HAMMER2_RADIX_MAX 16 /* maximum allocation size 2^N */
+#define HAMMER2_RADIX_KEY 64 /* number of bits in key */
+
+/*
+ * HAMMER2_LBUFSIZE - Nominal buffer size for I/O rollups.
+ *
+ * HAMMER2_PBUFSIZE - Topological block size used by files for all
+ * blocks except the block straddling EOF.
+ *
+ * HAMMER2_SEGSIZE - Allocation map segment size, typically 4MB
+ * (space represented by a level0 bitmap).
+ */
+#define HAMMER2_SEGSIZE (1 << HAMMER2_FREEMAP_LEVEL0_RADIX)
+#define HAMMER2_SEGRADIX HAMMER2_FREEMAP_LEVEL0_RADIX
+
+#define HAMMER2_PBUFRADIX 16 /* physical buf (1<<16) bytes */
+#define HAMMER2_PBUFSIZE 65536
+#define HAMMER2_LBUFRADIX 14 /* logical buf (1<<14) bytes */
+#define HAMMER2_LBUFSIZE 16384
+
+#define HAMMER2_IND_BYTES_MIN 4096
+#define HAMMER2_IND_BYTES_NOM HAMMER2_LBUFSIZE
+#define HAMMER2_IND_BYTES_MAX HAMMER2_PBUFSIZE
+#define HAMMER2_IND_RADIX_MIN 12
+#define HAMMER2_IND_RADIX_NOM HAMMER2_LBUFRADIX
+#define HAMMER2_IND_RADIX_MAX HAMMER2_PBUFRADIX
+#define HAMMER2_IND_COUNT_MIN (HAMMER2_IND_BYTES_MIN / \
+ sizeof(hammer2_blockref_t))
+#define HAMMER2_IND_COUNT_NOM (HAMMER2_IND_BYTES_NOM / \
+ sizeof(hammer2_blockref_t))
+#define HAMMER2_IND_COUNT_MAX (HAMMER2_IND_BYTES_MAX / \
+ sizeof(hammer2_blockref_t))
+
+/*
+ * In HAMMER2, arrays of blockrefs are fully set-associative, meaning that
+ * any element can occur at any index and holes can be anywhere.
+ *
+ * Inodes embed either 512 bytes of direct data or an array of 4 blockrefs,
+ * resulting in highly efficient storage for files <= 512 bytes and for files
+ * <= 512KB. Up to 4 directory entries can be referenced from a directory
+ * without requiring an indirect block.
+ */
+#define HAMMER2_SET_RADIX 2 /* radix 2 = 4 entries */
+#define HAMMER2_SET_COUNT (1 << HAMMER2_SET_RADIX)
+#define HAMMER2_EMBEDDED_BYTES 512 /* inode blockset/dd size */
+#define HAMMER2_EMBEDDED_RADIX 9
+
+#define HAMMER2_PBUFMASK (HAMMER2_PBUFSIZE - 1)
+#define HAMMER2_LBUFMASK (HAMMER2_LBUFSIZE - 1)
+#define HAMMER2_SEGMASK (HAMMER2_SEGSIZE - 1)
+
+#define HAMMER2_LBUFMASK64 ((hammer2_off_t)HAMMER2_LBUFMASK)
+#define HAMMER2_PBUFSIZE64 ((hammer2_off_t)HAMMER2_PBUFSIZE)
+#define HAMMER2_PBUFMASK64 ((hammer2_off_t)HAMMER2_PBUFMASK)
+#define HAMMER2_SEGSIZE64 ((hammer2_off_t)HAMMER2_SEGSIZE)
+#define HAMMER2_SEGMASK64 ((hammer2_off_t)HAMMER2_SEGMASK)
+
+#define HAMMER2_UUID_STRING "5cbb9ad1-862d-11dc-a94d-01301bb8a9f5"
+
+/*
+ * A 4MB segment is reserved at the beginning of each 1GB. This segment
+ * contains the volume header (or backup volume header), the free block
+ * table, and possibly other information in the future.
+ *
+ * 4MB = 64 x 64K blocks. Each 4MB segment is broken down as follows:
+ *
+ * ==========
+ * 0 volume header (for the first four 2GB zones)
+ * 1 freemap00 level1 FREEMAP_LEAF (256 x 128B bitmap data per 1GB)
+ * 2 level2 FREEMAP_NODE (256 x 128B indirect block per 256GB)
+ * 3 level3 FREEMAP_NODE (256 x 128B indirect block per 64TB)
+ * 4 level4 FREEMAP_NODE (256 x 128B indirect block per 16PB)
+ * 5 level5 FREEMAP_NODE (256 x 128B indirect block per 4EB)
+ * 6 freemap01 level1 (rotation)
+ * 7 level2
+ * 8 level3
+ * 9 level4
+ * 10 level5
+ * 11 freemap02 level1 (rotation)
+ * 12 level2
+ * 13 level3
+ * 14 level4
+ * 15 level5
+ * 16 freemap03 level1 (rotation)
+ * 17 level2
+ * 18 level3
+ * 19 level4
+ * 20 level5
+ * 21 freemap04 level1 (rotation)
+ * 22 level2
+ * 23 level3
+ * 24 level4
+ * 25 level5
+ * 26 freemap05 level1 (rotation)
+ * 27 level2
+ * 28 level3
+ * 29 level4
+ * 30 level5
+ * 31 freemap06 level1 (rotation)
+ * 32 level2
+ * 33 level3
+ * 34 level4
+ * 35 level5
+ * 36 freemap07 level1 (rotation)
+ * 37 level2
+ * 38 level3
+ * 39 level4
+ * 40 level5
+ * 41 unused
+ * .. unused
+ * 63 unused
+ * ==========
+ *
+ * The first four 2GB zones contain volume headers and volume header backups.
+ * After that the volume header block# is reserved for future use. Similarly,
+ * there are many blocks related to various Freemap levels which are not
+ * used in every segment and those are also reserved for future use.
+ * Note that each FREEMAP_LEAF or FREEMAP_NODE uses 32KB out of 64KB slot.
+ *
+ * Freemap (see the FREEMAP document)
+ *
+ * The freemap utilizes blocks #1-40 in 8 sets of 5 blocks. Each block in
+ * a set represents a level of depth in the freemap topology. Eight sets
+ * exist to prevent live updates from disturbing the state of the freemap
+ * were a crash/reboot to occur. That is, a live update is not committed
+ * until the update's flush reaches the volume root. There are FOUR volume
+ * roots representing the last four synchronization points, so the freemap
+ * must be consistent no matter which volume root is chosen by the mount
+ * code.
+ *
+ * Each freemap set is 5 x 64K blocks and represents the 1GB, 256GB, 64TB,
+ * 16PB and 4EB indirect map. The volume header itself has a set of 4 freemap
+ * blockrefs representing another 2 bits, giving us a total 64 bits of
+ * representable address space.
+ *
+ * The Level 0 64KB block represents 1GB of storage represented by 32KB
+ * (256 x struct hammer2_bmap_data). Each structure represents 4MB of storage
+ * and has a 512 bit bitmap, using 2 bits to represent a 16KB chunk of
+ * storage. These 2 bits represent the following states:
+ *
+ * 00 Free
+ * 01 (reserved) (Possibly partially allocated)
+ * 10 Possibly free
+ * 11 Allocated
+ *
+ * One important thing to note here is that the freemap resolution is 16KB,
+ * but the minimum storage allocation size is 1KB. The hammer2 vfs keeps
+ * track of sub-allocations in memory, which means that on a unmount or reboot
+ * the entire 16KB of a partially allocated block will be considered fully
+ * allocated. It is possible for fragmentation to build up over time, but
+ * defragmentation is fairly easy to accomplish since all modifications
+ * allocate a new block.
+ *
+ * The Second thing to note is that due to the way snapshots and inode
+ * replication works, deleting a file cannot immediately free the related
+ * space. Furthermore, deletions often do not bother to traverse the
+ * block subhierarchy being deleted. And to go even further, whole
+ * sub-directory trees can be deleted simply by deleting the directory inode
+ * at the top. So even though we have a symbol to represent a 'possibly free'
+ * block (binary 10), only the bulk free scanning code can actually use it.
+ * Normal 'rm's or other deletions do not.
+ *
+ * WARNING! ZONE_SEG and VOLUME_ALIGN must be a multiple of 1<<LEVEL0_RADIX
+ * (i.e. a multiple of 4MB). VOLUME_ALIGN must be >= ZONE_SEG.
+ *
+ * In Summary:
+ *
+ * (1) Modifications to freemap blocks 'allocate' a new copy (aka use a block
+ * from the next set). The new copy is reused until a flush occurs at
+ * which point the next modification will then rotate to the next set.
+ */
+#define HAMMER2_VOLUME_ALIGN (8 * 1024 * 1024)
+#define HAMMER2_VOLUME_ALIGN64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_VOLUME_ALIGNMASK (HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_VOLUME_ALIGNMASK64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGNMASK)
+
+#define HAMMER2_NEWFS_ALIGN (HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGN64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGNMASK (HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_NEWFS_ALIGNMASK64 ((hammer2_off_t)HAMMER2_NEWFS_ALIGNMASK)
+
+#define HAMMER2_ZONE_BYTES64 (2LLU * 1024 * 1024 * 1024)
+#define HAMMER2_ZONE_MASK64 (HAMMER2_ZONE_BYTES64 - 1)
+#define HAMMER2_ZONE_SEG (4 * 1024 * 1024)
+#define HAMMER2_ZONE_SEG64 ((hammer2_off_t)HAMMER2_ZONE_SEG)
+#define HAMMER2_ZONE_BLOCKS_SEG (HAMMER2_ZONE_SEG / HAMMER2_PBUFSIZE)
+
+#define HAMMER2_ZONE_FREEMAP_INC 5 /* 5 deep */
+
+#define HAMMER2_ZONE_VOLHDR 0 /* volume header or backup */
+#define HAMMER2_ZONE_FREEMAP_00 1 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_01 6 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_02 11 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_03 16 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_04 21 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_05 26 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_06 31 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_07 36 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_END 41 /* (non-inclusive) */
+ /* zone 41-63 unused */
+#define HAMMER2_ZONE_END 64 /* non-inclusive */
+
+#define HAMMER2_NFREEMAPS 8 /* FREEMAP_00 - FREEMAP_07 */
+
+ /* relative to FREEMAP_x */
+#define HAMMER2_ZONEFM_LEVEL1 0 /* 1GB leafmap */
+#define HAMMER2_ZONEFM_LEVEL2 1 /* 256GB indmap */
+#define HAMMER2_ZONEFM_LEVEL3 2 /* 64TB indmap */
+#define HAMMER2_ZONEFM_LEVEL4 3 /* 16PB indmap */
+#define HAMMER2_ZONEFM_LEVEL5 4 /* 4EB indmap */
+/* LEVEL6 is a set of 4 blockrefs in the volume header 16EB */
+
+/*
+ * Freemap radix. Assumes a set-count of 4, 128-byte blockrefs,
+ * 32KB indirect block for freemap (LEVELN_PSIZE below).
+ *
+ * Leaf entry represents 4MB of storage broken down into a 512-bit
+ * bitmap, 2-bits per entry. So course bitmap item represents 16KB.
+ */
+_Static_assert(HAMMER2_SET_COUNT == 4, "freemap assumes SET_COUNT is 4");
+#define HAMMER2_FREEMAP_LEVEL6_RADIX 64 /* 16EB (end) */
+#define HAMMER2_FREEMAP_LEVEL5_RADIX 62 /* 4EB */
+#define HAMMER2_FREEMAP_LEVEL4_RADIX 54 /* 16PB */
+#define HAMMER2_FREEMAP_LEVEL3_RADIX 46 /* 64TB */
+#define HAMMER2_FREEMAP_LEVEL2_RADIX 38 /* 256GB */
+#define HAMMER2_FREEMAP_LEVEL1_RADIX 30 /* 1GB */
+#define HAMMER2_FREEMAP_LEVEL0_RADIX 22 /* 4MB (x 256 in l-1 leaf) */
+
+#define HAMMER2_FREEMAP_LEVELN_PSIZE 32768 /* physical bytes */
+
+#define HAMMER2_FREEMAP_LEVEL5_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL5_RADIX)
+#define HAMMER2_FREEMAP_LEVEL4_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL4_RADIX)
+#define HAMMER2_FREEMAP_LEVEL3_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL3_RADIX)
+#define HAMMER2_FREEMAP_LEVEL2_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL2_RADIX)
+#define HAMMER2_FREEMAP_LEVEL1_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL1_RADIX)
+#define HAMMER2_FREEMAP_LEVEL0_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL0_RADIX)
+
+#define HAMMER2_FREEMAP_LEVEL5_MASK (HAMMER2_FREEMAP_LEVEL5_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL4_MASK (HAMMER2_FREEMAP_LEVEL4_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL3_MASK (HAMMER2_FREEMAP_LEVEL3_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL2_MASK (HAMMER2_FREEMAP_LEVEL2_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL1_MASK (HAMMER2_FREEMAP_LEVEL1_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL0_MASK (HAMMER2_FREEMAP_LEVEL0_SIZE - 1)
+
+#define HAMMER2_FREEMAP_COUNT (int)(HAMMER2_FREEMAP_LEVELN_PSIZE / \
+ sizeof(hammer2_bmap_data_t))
+
+/*
+ * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
+ * which is on a 1GB demark. This will eat a little more space but for
+ * now we retain compatibility and make FMZONEBASE every 1GB
+ */
+#define H2FMZONEBASE(key) ((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
+#define H2FMBASE(key, radix) rounddown2(key, (hammer2_off_t)1 << (radix))
+
+/*
+ * 16KB bitmap granularity (x2 bits per entry).
+ */
+#define HAMMER2_FREEMAP_BLOCK_RADIX 14
+#define HAMMER2_FREEMAP_BLOCK_SIZE (1 << HAMMER2_FREEMAP_BLOCK_RADIX)
+#define HAMMER2_FREEMAP_BLOCK_MASK (HAMMER2_FREEMAP_BLOCK_SIZE - 1)
+
+/*
+ * bitmap[] structure. 2 bits per HAMMER2_FREEMAP_BLOCK_SIZE.
+ *
+ * 8 x 64-bit elements, 2 bits per block.
+ * 32 blocks (radix 5) per element.
+ * representing INDEX_SIZE bytes worth of storage per element.
+ */
+typedef uint64_t hammer2_bitmap_t;
+
+#define HAMMER2_BMAP_ALLONES ((hammer2_bitmap_t)-1)
+#define HAMMER2_BMAP_ELEMENTS 8
+#define HAMMER2_BMAP_BITS_PER_ELEMENT 64
+#define HAMMER2_BMAP_INDEX_RADIX 5 /* 32 blocks per element */
+#define HAMMER2_BMAP_BLOCKS_PER_ELEMENT (1 << HAMMER2_BMAP_INDEX_RADIX)
+
+#define HAMMER2_BMAP_INDEX_SIZE (HAMMER2_FREEMAP_BLOCK_SIZE * \
+ HAMMER2_BMAP_BLOCKS_PER_ELEMENT)
+#define HAMMER2_BMAP_INDEX_MASK (HAMMER2_BMAP_INDEX_SIZE - 1)
+
+#define HAMMER2_BMAP_SIZE (HAMMER2_BMAP_INDEX_SIZE * \
+ HAMMER2_BMAP_ELEMENTS)
+#define HAMMER2_BMAP_MASK (HAMMER2_BMAP_SIZE - 1)
+
+/*
+ * Two linear areas can be reserved after the initial 4MB segment in the base
+ * zone (the one starting at offset 0). These areas are NOT managed by the
+ * block allocator and do not fall under HAMMER2 crc checking rules based
+ * at the volume header (but can be self-CRCd internally, depending).
+ */
+#define HAMMER2_BOOT_MIN_BYTES HAMMER2_VOLUME_ALIGN
+#define HAMMER2_BOOT_NOM_BYTES (64*1024*1024)
+#define HAMMER2_BOOT_MAX_BYTES (256*1024*1024)
+
+#define HAMMER2_AUX_MIN_BYTES HAMMER2_VOLUME_ALIGN
+#define HAMMER2_AUX_NOM_BYTES (256*1024*1024)
+#define HAMMER2_AUX_MAX_BYTES (1024*1024*1024)
+
+/*
+ * Most HAMMER2 types are implemented as unsigned 64-bit integers.
+ * Transaction ids are monotonic.
+ *
+ * We utilize 32-bit iSCSI CRCs.
+ */
+typedef uint64_t hammer2_tid_t;
+typedef uint64_t hammer2_off_t;
+typedef uint64_t hammer2_key_t;
+typedef uint32_t hammer2_crc32_t;
+
+/*
+ * Miscellaneous ranges (all are unsigned).
+ */
+#define HAMMER2_TID_MIN 1ULL
+#define HAMMER2_TID_MAX 0xFFFFFFFFFFFFFFFFULL
+#define HAMMER2_KEY_MIN 0ULL
+#define HAMMER2_KEY_MAX 0xFFFFFFFFFFFFFFFFULL
+
+/*
+ * HAMMER2 data offset special cases and masking.
+ *
+ * All HAMMER2 data offsets have to be broken down into a 64K buffer base
+ * offset (HAMMER2_OFF_MASK_HI) and a 64K buffer index (HAMMER2_OFF_MASK_LO).
+ *
+ * Indexes into physical buffers are always 64-byte aligned. The low 6 bits
+ * of the data offset field specifies how large the data chunk being pointed
+ * to as a power of 2. The theoretical minimum radix is thus 6 (The space
+ * needed in the low bits of the data offset field). However, the practical
+ * minimum allocation chunk size is 1KB (a radix of 10), so HAMMER2 sets
+ * HAMMER2_RADIX_MIN to 10. The maximum radix is currently 16 (64KB), but
+ * we fully intend to support larger extents in the future.
+ *
+ * WARNING! A radix of 0 (such as when data_off is all 0's) is a special
+ * case which means no data associated with the blockref, and
+ * not the '1 byte' it would otherwise calculate to.
+ */
+#define HAMMER2_OFF_MASK 0xFFFFFFFFFFFFFFC0ULL
+#define HAMMER2_OFF_MASK_LO (HAMMER2_OFF_MASK & HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_HI (~HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_RADIX 0x000000000000003FULL
+
+/*
+ * HAMMER2 directory support and pre-defined keys
+ */
+#define HAMMER2_DIRHASH_VISIBLE 0x8000000000000000ULL
+#define HAMMER2_DIRHASH_USERMSK 0x7FFFFFFFFFFFFFFFULL
+#define HAMMER2_DIRHASH_LOMASK 0x0000000000007FFFULL
+
+#define HAMMER2_SROOT_KEY 0x0000000000000000ULL /* volume to sroot */
+#define HAMMER2_BOOT_KEY 0xd9b36ce135528000ULL /* sroot to BOOT PFS */
+
+/*
+ * HAMMER2 directory entry header (embedded in blockref) exactly 16 bytes
+ */
+struct hammer2_dirent_head {
+ hammer2_tid_t inum; /* inode number */
+ uint16_t namlen; /* name length */
+ uint8_t type; /* OBJTYPE_* */
+ uint8_t unused0B;
+ uint8_t unused0C[4];
+} __packed;
+
+typedef struct hammer2_dirent_head hammer2_dirent_head_t;
+
+/*
+ * The media block reference structure. This forms the core of the HAMMER2
+ * media topology recursion. This 128-byte data structure is embedded in the
+ * volume header, in inodes (which are also directory entries), and in
+ * indirect blocks.
+ *
+ * A blockref references a single media item, which typically can be a
+ * directory entry (aka inode), indirect block, or data block.
+ *
+ * The primary feature a blockref represents is the ability to validate
+ * the entire tree underneath it via its check code. Any modification to
+ * anything propagates up the blockref tree all the way to the root, replacing
+ * the related blocks and compounding the generated check code.
+ *
+ * The check code can be a simple 32-bit iscsi code, a 64-bit crc, or as
+ * complex as a 512 bit cryptographic hash. I originally used a 64-byte
+ * blockref but later expanded it to 128 bytes to be able to support the
+ * larger check code as well as to embed statistics for quota operation.
+ *
+ * Simple check codes are not sufficient for unverified dedup. Even with
+ * a maximally-sized check code unverified dedup should only be used in
+ * in subdirectory trees where you do not need 100% data integrity.
+ *
+ * Unverified dedup is deduping based on meta-data only without verifying
+ * that the data blocks are actually identical. Verified dedup guarantees
+ * integrity but is a far more I/O-expensive operation.
+ *
+ * --
+ *
+ * mirror_tid - per cluster node modified (propagated upward by flush)
+ * modify_tid - clc record modified (not propagated).
+ * update_tid - clc record updated (propagated upward on verification)
+ *
+ * CLC - Stands for 'Cluster Level Change', identifiers which are identical
+ * within the topology across all cluster nodes (when fully
+ * synchronized).
+ *
+ * NOTE: The range of keys represented by the blockref is (key) to
+ * ((key) + (1LL << keybits) - 1). HAMMER2 usually populates
+ * blocks bottom-up, inserting a new root when radix expansion
+ * is required.
+ *
+ * leaf_count - Helps manage leaf collapse calculations when indirect
+ * blocks become mostly empty. This value caps out at
+ * HAMMER2_BLOCKREF_LEAF_MAX (65535).
+ *
+ * Used by the chain code to determine when to pull leafs up
+ * from nearly empty indirect blocks. For the purposes of this
+ * calculation, BREF_TYPE_INODE is considered a leaf, along
+ * with DIRENT and DATA.
+ *
+ * RESERVED FIELDS
+ *
+ * A number of blockref fields are reserved and should generally be set to
+ * 0 for future compatibility.
+ *
+ * FUTURE BLOCKREF EXPANSION
+ *
+ * CONTENT ADDRESSABLE INDEXING (future) - Using a 256 or 512-bit check code.
+ */
+struct hammer2_blockref { /* MUST BE EXACTLY 64 BYTES */
+ uint8_t type; /* type of underlying item */
+ uint8_t methods; /* check method & compression method */
+ uint8_t copyid; /* specify which copy this is */
+ uint8_t keybits; /* #of keybits masked off 0=leaf */
+ uint8_t vradix; /* virtual data/meta-data size */
+ uint8_t flags; /* blockref flags */
+ uint16_t leaf_count; /* leaf aggregation count */
+ hammer2_key_t key; /* key specification */
+ hammer2_tid_t mirror_tid; /* media flush topology & freemap */
+ hammer2_tid_t modify_tid; /* clc modify (not propagated) */
+ hammer2_off_t data_off; /* low 6 bits is phys size (radix)*/
+ hammer2_tid_t update_tid; /* clc modify (propagated upward) */
+ union {
+ char buf[16];
+
+ /*
+ * Directory entry header (BREF_TYPE_DIRENT)
+ *
+ * NOTE: check.buf contains filename if <= 64 bytes. Longer
+ * filenames are stored in a data reference of size
+ * HAMMER2_ALLOC_MIN (at least 256, typically 1024).
+ *
+ * NOTE: inode structure may contain a copy of a recently
+ * associated filename, for recovery purposes.
+ *
+ * NOTE: Superroot entries are INODEs, not DIRENTs. Code
+ * allows both cases.
+ */
+ hammer2_dirent_head_t dirent;
+
+ /*
+ * Statistics aggregation (BREF_TYPE_INODE, BREF_TYPE_INDIRECT)
+ */
+ struct {
+ hammer2_key_t data_count;
+ hammer2_key_t inode_count;
+ } stats;
+ } embed;
+ union { /* check info */
+ char buf[64];
+ struct {
+ uint32_t value;
+ uint32_t reserved[15];
+ } iscsi32;
+ struct {
+ uint64_t value;
+ uint64_t reserved[7];
+ } xxhash64;
+ struct {
+ char data[24];
+ char reserved[40];
+ } sha192;
+ struct {
+ char data[32];
+ char reserved[32];
+ } sha256;
+ struct {
+ char data[64];
+ } sha512;
+
+ /*
+ * Freemap hints are embedded in addition to the icrc32.
+ *
+ * bigmask - Radixes available for allocation (0-31).
+ * Heuristical (may be permissive but not
+ * restrictive). Typically only radix values
+ * 10-16 are used (i.e. (1<<10) through (1<<16)).
+ *
+ * avail - Total available space remaining, in bytes
+ */
+ struct {
+ uint32_t icrc32;
+ uint32_t bigmask; /* available radixes */
+ uint64_t avail; /* total available bytes */
+ char reserved[48];
+ } freemap;
+ } check;
+} __packed;
+
+typedef struct hammer2_blockref hammer2_blockref_t;
+
+#define HAMMER2_BLOCKREF_BYTES 128 /* blockref struct in bytes */
+#define HAMMER2_BLOCKREF_RADIX 7
+
+#define HAMMER2_BLOCKREF_LEAF_MAX 65535
+
+/*
+ * On-media and off-media blockref types.
+ * types >= 128 are pseudo values that should never be present on-media.
+ */
+#define HAMMER2_BREF_TYPE_EMPTY 0
+#define HAMMER2_BREF_TYPE_INODE 1
+#define HAMMER2_BREF_TYPE_INDIRECT 2
+#define HAMMER2_BREF_TYPE_DATA 3
+#define HAMMER2_BREF_TYPE_DIRENT 4
+#define HAMMER2_BREF_TYPE_FREEMAP_NODE 5
+#define HAMMER2_BREF_TYPE_FREEMAP_LEAF 6
+#define HAMMER2_BREF_TYPE_INVALID 7
+#define HAMMER2_BREF_TYPE_FREEMAP 254 /* pseudo-type */
+#define HAMMER2_BREF_TYPE_VOLUME 255 /* pseudo-type */
+
+#define HAMMER2_BREF_FLAG_PFSROOT 0x01 /* see also related opflag */
+#define HAMMER2_BREF_FLAG_EMERG_MIP 0x04 /* emerg modified-in-place */
+
+/*
+ * Encode/decode check mode and compression mode for bref.methods.
+ * The compression level is not encoded in bref.methods.
+ */
+#define HAMMER2_ENC_CHECK(n) (((n) & 15) << 4)
+#define HAMMER2_DEC_CHECK(n) (((n) >> 4) & 15)
+#define HAMMER2_ENC_COMP(n) ((n) & 15)
+#define HAMMER2_DEC_COMP(n) ((n) & 15)
+
+#define HAMMER2_CHECK_NONE 0
+#define HAMMER2_CHECK_DISABLED 1
+#define HAMMER2_CHECK_ISCSI32 2
+#define HAMMER2_CHECK_XXHASH64 3
+#define HAMMER2_CHECK_SHA192 4
+#define HAMMER2_CHECK_FREEMAP 5
+
+#define HAMMER2_CHECK_DEFAULT HAMMER2_CHECK_XXHASH64
+
+/*
+ * Encode/decode check or compression algorithm request in
+ * ipdata->meta.check_algo and ipdata->meta.comp_algo.
+ */
+#define HAMMER2_ENC_ALGO(n) (n)
+#define HAMMER2_DEC_ALGO(n) ((n) & 15)
+#define HAMMER2_ENC_LEVEL(n) ((n) << 4)
+#define HAMMER2_DEC_LEVEL(n) (((n) >> 4) & 15)
+
+#define HAMMER2_COMP_NONE 0
+#define HAMMER2_COMP_AUTOZERO 1
+#define HAMMER2_COMP_LZ4 2
+#define HAMMER2_COMP_ZLIB 3
+
+#define HAMMER2_COMP_NEWFS_DEFAULT HAMMER2_COMP_LZ4
+
+/*
+ * HAMMER2 block references are collected into sets of 4 blockrefs. These
+ * sets are fully associative, meaning the elements making up a set may
+ * contain duplicate entries, holes, but valid elements are always sorted.
+ *
+ * When redundancy is desired a set may contain several duplicate
+ * entries pointing to different copies of the same data. Up to 4 copies
+ * are supported. Not implemented.
+ *
+ * When a set fills up another level of indirection is inserted, moving
+ * some or all of the set's contents into indirect blocks placed under the
+ * set. This is a top-down approach in that indirect blocks are not created
+ * until the set actually becomes full (that is, the entries in the set can
+ * shortcut the indirect blocks when the set is not full). Depending on how
+ * things are filled multiple indirect blocks will eventually be created.
+ */
+struct hammer2_blockset {
+ hammer2_blockref_t blockref[HAMMER2_SET_COUNT];
+};
+
+typedef struct hammer2_blockset hammer2_blockset_t;
+
+/*
+ * hammer2_bmap_data - A freemap entry in the LEVEL1 block.
+ *
+ * Each 128-byte entry contains the bitmap and meta-data required to manage
+ * a LEVEL0 (4MB) block of storage. The storage is managed in 256 x 16KB
+ * chunks.
+ *
+ * A smaller allocation granularity is supported via a linear iterator and/or
+ * must otherwise be tracked in ram.
+ *
+ * (data structure must be 128 bytes exactly)
+ *
+ * linear - A BYTE linear allocation offset used for sub-16KB allocations
+ * only. May contain values between 0 and 4MB. Must be ignored
+ * if 16KB-aligned (i.e. force bitmap scan), otherwise may be
+ * used to sub-allocate within the 16KB block (which is already
+ * marked as allocated in the bitmap).
+ *
+ * Sub-allocations need only be 1KB-aligned and do not have to be
+ * size-aligned, and 16KB or larger allocations do not update this
+ * field, resulting in pretty good packing.
+ *
+ * Please note that file data granularity may be limited by
+ * other issues such as buffer cache direct-mapping and the
+ * desire to support sector sizes up to 16KB (so H2 only issues
+ * I/O's in multiples of 16KB anyway).
+ *
+ * class - Clustering class. Cleared to 0 only if the entire leaf becomes
+ * free. Used to cluster device buffers so all elements must have
+ * the same device block size, but may mix logical sizes.
+ *
+ * Typically integrated with the blockref type in the upper 8 bits
+ * to localize inodes and indrect blocks, improving bulk free scans
+ * and directory scans.
+ *
+ * bitmap - Two bits per 16KB allocation block arranged in arrays of
+ * 64-bit elements, 256x2 bits representing ~4MB worth of media
+ * storage. Bit patterns are as follows:
+ *
+ * 00 Unallocated
+ * 01 (reserved)
+ * 10 Possibly free
+ * 11 Allocated
+ *
+ * ==========
+ * level6 freemap
+ * blockref[0] : 4EB
+ * blockref[1] : 4EB
+ * blockref[2] : 4EB
+ * blockref[3] : 4EB
+ * -----------------------------------------------------------------------
+ * 4 x 128B = 512B : 4 x 4EB = 16EB
+ *
+ * level2-5 FREEMAP_NODE
+ * blockref[0] : 1GB,256GB,64TB,16PB
+ * blockref[1] : 1GB,256GB,64TB,16PB
+ * ...
+ * blockref[255] : 1GB,256GB,64TB,16PB
+ * -----------------------------------------------------------------------
+ * 256 x 128B = 32KB : 256 x 1GB,256GB,64TB,16PB = 256GB,64TB,16PB,4EB
+ *
+ * level1 FREEMAP_LEAF
+ * bmap_data[0] : 8 x 8B = 512bits = 256 x 2bits -> 256 x 16KB = 4MB
+ * bmap_data[1] : 8 x 8B = 512bits = 256 x 2bits -> 256 x 16KB = 4MB
+ * ...
+ * bmap_data[255] : 8 x 8B = 512bits = 256 x 2bits -> 256 x 16KB = 4MB
+ * -----------------------------------------------------------------------
+ * 256 x 128B = 32KB : 256 x 4MB = 1GB
+ * ==========
+ */
+struct hammer2_bmap_data {
+ int32_t linear; /* 00 linear sub-granular allocation offset */
+ uint16_t class; /* 04-05 clustering class ((type<<8)|radix) */
+ uint8_t reserved06; /* 06 */
+ uint8_t reserved07; /* 07 */
+ uint32_t reserved08; /* 08 */
+ uint32_t reserved0C; /* 0C */
+ uint32_t reserved10; /* 10 */
+ uint32_t reserved14; /* 14 */
+ uint32_t reserved18; /* 18 */
+ uint32_t avail; /* 1C */
+ uint32_t reserved20[8]; /* 20-3F 256 bits manages 128K/1KB/2-bits */
+ /* 40-7F 512 bits manages 4MB of storage */
+ hammer2_bitmap_t bitmapq[HAMMER2_BMAP_ELEMENTS];
+} __packed;
+
+typedef struct hammer2_bmap_data hammer2_bmap_data_t;
+
+/*
+ * The inode number is stored in the inode rather than being
+ * based on the location of the inode (since the location moves every time
+ * the inode or anything underneath the inode is modified).
+ *
+ * The inode is 1024 bytes, made up of 256 bytes of meta-data, 256 bytes
+ * for the filename, and 512 bytes worth of direct file data OR an embedded
+ * blockset. The in-memory hammer2_inode structure contains only the mostly-
+ * node-independent meta-data portion (some flags are node-specific and will
+ * not be synchronized). The rest of the inode is node-specific and chain I/O
+ * is required to obtain it.
+ *
+ * Directories represent one inode per blockref. Inodes are not laid out
+ * as a file but instead are represented by the related blockrefs. The
+ * blockrefs, in turn, are indexed by the 64-bit directory hash key. Remember
+ * that blocksets are fully associative, so a certain degree efficiency is
+ * achieved just from that.
+ *
+ * Up to 512 bytes of direct data can be embedded in an inode, and since
+ * inodes are essentially directory entries this also means that small data
+ * files end up simply being laid out linearly in the directory, resulting
+ * in fewer seeks and highly optimal access.
+ *
+ * The compression mode can be changed at any time in the inode and is
+ * recorded on a blockref-by-blockref basis.
+ */
+#define HAMMER2_INODE_BYTES 1024 /* (asserted by code) */
+#define HAMMER2_INODE_MAXNAME 256 /* maximum name in bytes */
+#define HAMMER2_INODE_VERSION_ONE 1
+
+#define HAMMER2_INODE_START 1024 /* dynamically allocated */
+
+struct hammer2_inode_meta {
+ uint16_t version; /* 0000 inode data version */
+ uint8_t reserved02; /* 0002 */
+ uint8_t pfs_subtype; /* 0003 pfs sub-type */
+
+ /*
+ * core inode attributes, inode type, misc flags
+ */
+ uint32_t uflags; /* 0004 chflags */
+ uint32_t rmajor; /* 0008 available for device nodes */
+ uint32_t rminor; /* 000C available for device nodes */
+ uint64_t ctime; /* 0010 inode change time */
+ uint64_t mtime; /* 0018 modified time */
+ uint64_t atime; /* 0020 access time (unsupported) */
+ uint64_t btime; /* 0028 birth time */
+ struct uuid uid; /* 0030 uid / degenerate unix uid */
+ struct uuid gid; /* 0040 gid / degenerate unix gid */
+
+ uint8_t type; /* 0050 object type */
+ uint8_t op_flags; /* 0051 operational flags */
+ uint16_t cap_flags; /* 0052 capability flags */
+ uint32_t mode; /* 0054 unix modes (typ low 16 bits) */
+
+ /*
+ * inode size, identification, localized recursive configuration
+ * for compression and backup copies.
+ *
+ * NOTE: Nominal parent inode number (iparent) is only applicable
+ * for directories but can also help for files during
+ * catastrophic recovery.
+ */
+ hammer2_tid_t inum; /* 0058 inode number */
+ hammer2_off_t size; /* 0060 size of file */
+ uint64_t nlinks; /* 0068 hard links (typ only dirs) */
+ hammer2_tid_t iparent; /* 0070 nominal parent inum */
+ hammer2_key_t name_key; /* 0078 full filename key */
+ uint16_t name_len; /* 0080 filename length */
+ uint8_t ncopies; /* 0082 ncopies to local media */
+ uint8_t comp_algo; /* 0083 compression request & algo */
+
+ /*
+ * These fields are currently only applicable to PFSROOTs.
+ */
+ uint8_t target_type; /* 0084 hardlink target type */
+ uint8_t check_algo; /* 0085 check code request & algo */
+ uint8_t pfs_nmasters; /* 0086 (if PFSROOT) if multi-master */
+ uint8_t pfs_type; /* 0087 (if PFSROOT) node type */
+ hammer2_tid_t pfs_inum; /* 0088 (if PFSROOT) inum allocator */
+ struct uuid pfs_clid; /* 0090 (if PFSROOT) cluster uuid */
+ struct uuid pfs_fsid; /* 00A0 (if PFSROOT) unique uuid */
+
+ /*
+ * Quotas and aggregate sub-tree inode and data counters. Note that
+ * quotas are not replicated downward, they are explicitly set by
+ * the sysop and in-memory structures keep track of inheritance.
+ */
+ hammer2_key_t data_quota; /* 00B0 subtree quota in bytes */
+ hammer2_key_t unusedB8; /* 00B8 subtree byte count */
+ hammer2_key_t inode_quota; /* 00C0 subtree quota inode count */
+ hammer2_key_t unusedC8; /* 00C8 subtree inode count */
+
+ /*
+ * The last snapshot tid is tested against modify_tid to determine
+ * when a copy must be made of a data block whos check mode has been
+ * disabled (a disabled check mode allows data blocks to be updated
+ * in place instead of copy-on-write).
+ */
+ hammer2_tid_t pfs_lsnap_tid; /* 00D0 last snapshot tid */
+ hammer2_tid_t reservedD8; /* 00D8 (avail) */
+
+ /*
+ * Tracks (possibly degenerate) free areas covering all sub-tree
+ * allocations under inode, not counting the inode itself.
+ * 0/0 indicates empty entry. fully set-associative.
+ *
+ * (not yet implemented)
+ */
+ uint64_t decrypt_check; /* 00E0 decryption validator */
+ hammer2_off_t reservedE8[3]; /* 00E8/F0/F8 */
+} __packed;
+
+typedef struct hammer2_inode_meta hammer2_inode_meta_t;
+
+struct hammer2_inode_data {
+ hammer2_inode_meta_t meta; /* 0000-00FF */
+ unsigned char filename[HAMMER2_INODE_MAXNAME];
+ /* 0100-01FF (256 char, unterminated) */
+ union { /* 0200-03FF (64x8 = 512 bytes) */
+ hammer2_blockset_t blockset;
+ char data[HAMMER2_EMBEDDED_BYTES];
+ } u;
+} __packed;
+
+typedef struct hammer2_inode_data hammer2_inode_data_t;
+
+#define HAMMER2_OPFLAG_DIRECTDATA 0x01
+#define HAMMER2_OPFLAG_PFSROOT 0x02 /* (see also bref flag) */
+
+#define HAMMER2_OBJTYPE_UNKNOWN 0
+#define HAMMER2_OBJTYPE_DIRECTORY 1
+#define HAMMER2_OBJTYPE_REGFILE 2
+#define HAMMER2_OBJTYPE_FIFO 4
+#define HAMMER2_OBJTYPE_CDEV 5
+#define HAMMER2_OBJTYPE_BDEV 6
+#define HAMMER2_OBJTYPE_SOFTLINK 7
+#define HAMMER2_OBJTYPE_UNUSED08 8
+#define HAMMER2_OBJTYPE_SOCKET 9
+#define HAMMER2_OBJTYPE_WHITEOUT 10
+
+#define HAMMER2_COPYID_LOCAL ((uint8_t)-1)
+
+/*
+ * PFS types identify the role of a PFS within a cluster.
+ */
+#define HAMMER2_PFSTYPE_NONE 0x00
+#define HAMMER2_PFSTYPE_MASTER 0x06
+#define HAMMER2_PFSTYPE_SUPROOT 0x08
+#define HAMMER2_PFSTYPE_MAX 16
+
+#define HAMMER2_PFSSUBTYPE_NONE 0
+
+/*
+ * The volume header eats a 64K block at the beginning of each 2GB zone
+ * up to four copies.
+ *
+ * All information is stored in host byte order. The volume header's magic
+ * number may be checked to determine the byte order. If you wish to mount
+ * between machines w/ different endian modes you'll need filesystem code
+ * which acts on the media data consistently (either all one way or all the
+ * other). Our code currently does not do that.
+ *
+ * A read-write mount may have to recover missing allocations by doing an
+ * incremental mirror scan looking for modifications made after alloc_tid.
+ * If alloc_tid == last_tid then no recovery operation is needed. Recovery
+ * operations are usually very, very fast.
+ *
+ * Read-only mounts do not need to do any recovery, access to the filesystem
+ * topology is always consistent after a crash (is always consistent, period).
+ * However, there may be shortcutted blockref updates present from deep in
+ * the tree which are stored in the volumeh eader and must be tracked on
+ * the fly.
+ *
+ * NOTE: sroot_blockset points to the super-root directory, not the root
+ * directory. The root directory will be a subdirectory under the
+ * super-root.
+ *
+ * The super-root directory contains all root directories and all
+ * snapshots (readonly or writable). It is possible to do a
+ * null-mount of the super-root using special path constructions
+ * relative to your mounted root.
+ */
+#define HAMMER2_VOLUME_ID_HBO 0x48414d3205172011LLU
+#define HAMMER2_VOLUME_ID_ABO 0x11201705324d4148LLU
+
+/*
+ * If volume version is HAMMER2_VOL_VERSION_MULTI_VOLUMES or above, max
+ * HAMMER2_MAX_VOLUMES volumes are supported. There must be 1 (and only 1)
+ * volume with volume id HAMMER2_ROOT_VOLUME.
+ * Otherwise filesystem only supports 1 volume, and that volume must have
+ * volume id HAMMER2_ROOT_VOLUME(0) which was a reserved field then.
+ */
+#define HAMMER2_MAX_VOLUMES 64
+#define HAMMER2_ROOT_VOLUME 0
+
+struct hammer2_volume_data {
+ /*
+ * sector #0 - 512 bytes
+ */
+ uint64_t magic; /* 0000 Signature */
+ hammer2_off_t boot_beg; /* 0008 Boot area (future) */
+ hammer2_off_t boot_end; /* 0010 (size = end - beg) */
+ hammer2_off_t aux_beg; /* 0018 Aux area (future) */
+ hammer2_off_t aux_end; /* 0020 (size = end - beg) */
+ hammer2_off_t volu_size; /* 0028 Volume size, bytes */
+
+ uint32_t version; /* 0030 */
+ uint32_t flags; /* 0034 */
+ uint8_t copyid; /* 0038 copyid of phys vol */
+ uint8_t freemap_version; /* 0039 freemap algorithm */
+ uint8_t peer_type; /* 003A HAMMER2_PEER_xxx */
+ uint8_t volu_id; /* 003B */
+ uint8_t nvolumes; /* 003C */
+ uint8_t reserved003D; /* 003D */
+ uint16_t reserved003E; /* 003E */
+
+ struct uuid fsid; /* 0040 */
+ struct uuid fstype; /* 0050 */
+
+ /*
+ * allocator_size is precalculated at newfs time and does not include
+ * reserved blocks, boot, or aux areas.
+ *
+ * Initial non-reserved-area allocations do not use the freemap
+ * but instead adjust alloc_iterator. Dynamic allocations take
+ * over starting at (allocator_beg). This makes newfs_hammer2's
+ * job a lot easier and can also serve as a testing jig.
+ */
+ hammer2_off_t allocator_size; /* 0060 Total data space */
+ hammer2_off_t allocator_free; /* 0068 Free space */
+ hammer2_off_t allocator_beg; /* 0070 Initial allocations */
+
+ /*
+ * mirror_tid reflects the highest committed change for this
+ * block device regardless of whether it is to the super-root
+ * or to a PFS or whatever.
+ *
+ * freemap_tid reflects the highest committed freemap change for
+ * this block device.
+ */
+ hammer2_tid_t mirror_tid; /* 0078 committed tid (vol) */
+ hammer2_tid_t reserved0080; /* 0080 */
+ hammer2_tid_t reserved0088; /* 0088 */
+ hammer2_tid_t freemap_tid; /* 0090 committed tid (fmap) */
+ hammer2_tid_t bulkfree_tid; /* 0098 bulkfree incremental */
+ hammer2_tid_t reserved00A0[4]; /* 00A0-00BF */
+
+ hammer2_off_t total_size; /* 00C0 Total volume size, bytes */
+
+ uint32_t copyexists[8]; /* 00C8-00E7 unused */
+ char reserved00E8[248]; /* 00E8-01DF */
+
+ /*
+ * 32 bit CRC array at the end of the first 512 byte sector.
+ *
+ * icrc_sects[7] - First 512-4 bytes of volume header (including all
+ * the other icrc's except this one).
+ *
+ * icrc_sects[6] - Sector 1 (512 bytes) of volume header, which is
+ * the blockset for the root.
+ *
+ * icrc_sects[5] - Sector 2
+ * icrc_sects[4] - Sector 3
+ * icrc_sects[3] - Sector 4 (the freemap blockset)
+ */
+ hammer2_crc32_t icrc_sects[8]; /* 01E0-01FF */
+
+ /*
+ * sector #1 - 512 bytes
+ *
+ * The entire sector is used by a blockset, but currently only first
+ * blockref is used.
+ */
+ hammer2_blockset_t sroot_blockset; /* 0200-03FF Superroot dir */
+
+ /*
+ * sector #2-6
+ */
+ char sector2[512]; /* 0400-05FF reserved */
+ char sector3[512]; /* 0600-07FF reserved */
+ hammer2_blockset_t freemap_blockset; /* 0800-09FF freemap */
+ char sector5[512]; /* 0A00-0BFF reserved */
+ char sector6[512]; /* 0C00-0DFF reserved */
+
+ /*
+ * sector #7 - 512 bytes
+ * Maximum 64 volume offsets within logical offset.
+ */
+ hammer2_off_t volu_loff[HAMMER2_MAX_VOLUMES];
+
+ /*
+ * sector #8-71 - 32768 bytes for unused 256 volconf array.
+ */
+ char reserved_volconf[0x8000]; /* 1000-8FFF reserved */
+
+ /*
+ * Remaining sections are reserved for future use.
+ */
+ char reserved9000[0x6FFC]; /* 9000-FFFB reserved */
+
+ /*
+ * icrc on entire volume header
+ */
+ hammer2_crc32_t icrc_volheader; /* FFFC-FFFF full volume icrc*/
+} __packed;
+
+typedef struct hammer2_volume_data hammer2_volume_data_t;
+
+/*
+ * Various parts of the volume header have their own iCRCs.
+ *
+ * The first 512 bytes has its own iCRC stored at the end of the 512 bytes
+ * and not included the icrc calculation.
+ *
+ * The second 512 bytes also has its own iCRC but it is stored in the first
+ * 512 bytes so it covers the entire second 512 bytes.
+ *
+ * The whole volume block (64KB) has an iCRC covering all but the last 4 bytes,
+ * which is where the iCRC for the whole volume is stored. This is currently
+ * a catch-all for anything not individually iCRCd.
+ */
+#define HAMMER2_VOL_ICRC_SECT0 7
+#define HAMMER2_VOL_ICRC_SECT1 6
+
+#define HAMMER2_VOLUME_BYTES 65536
+
+#define HAMMER2_VOLUME_ICRC0_OFF 0
+#define HAMMER2_VOLUME_ICRC1_OFF 512
+#define HAMMER2_VOLUME_ICRCVH_OFF 0
+
+#define HAMMER2_VOLUME_ICRC0_SIZE (512 - 4)
+#define HAMMER2_VOLUME_ICRC1_SIZE (512)
+#define HAMMER2_VOLUME_ICRCVH_SIZE (65536 - 4)
+
+#define HAMMER2_VOL_VERSION_MULTI_VOLUMES 2
+
+#define HAMMER2_VOL_VERSION_MIN 1
+#define HAMMER2_VOL_VERSION_DEFAULT HAMMER2_VOL_VERSION_MULTI_VOLUMES
+#define HAMMER2_VOL_VERSION_WIP (HAMMER2_VOL_VERSION_MULTI_VOLUMES + 1)
+
+#define HAMMER2_NUM_VOLHDRS 4
+
+union hammer2_media_data {
+ hammer2_volume_data_t voldata;
+ hammer2_inode_data_t ipdata;
+ hammer2_blockset_t blkset;
+ hammer2_blockref_t npdata[HAMMER2_IND_COUNT_MAX];
+ hammer2_bmap_data_t bmdata[HAMMER2_FREEMAP_COUNT];
+ char buf[HAMMER2_PBUFSIZE];
+} __packed;
+
+typedef union hammer2_media_data hammer2_media_data_t;
+
+_Static_assert((1 << HAMMER2_SET_RADIX) == HAMMER2_SET_COUNT,
+ "hammer2 direct radix is incorrect");
+_Static_assert((1 << HAMMER2_PBUFRADIX) == HAMMER2_PBUFSIZE,
+ "HAMMER2_PBUFRADIX and HAMMER2_PBUFSIZE are inconsistent");
+_Static_assert((1 << HAMMER2_RADIX_MIN) == HAMMER2_ALLOC_MIN,
+ "HAMMER2_RADIX_MIN and HAMMER2_ALLOC_MIN are inconsistent");
+
+_Static_assert(sizeof(struct hammer2_blockref) == HAMMER2_BLOCKREF_BYTES,
+ "struct hammer2_blockref size != HAMMER2_BLOCKREF_BYTES");
+_Static_assert(sizeof(struct hammer2_inode_data) == HAMMER2_INODE_BYTES,
+ "struct hammer2_inode_data size != HAMMER2_INODE_BYTES");
+_Static_assert(sizeof(struct hammer2_volume_data) == HAMMER2_VOLUME_BYTES,
+ "struct hammer2_volume_data size != HAMMER2_VOLUME_BYTES");
+
+#endif /* !_FS_HAMMER2_DISK_H_ */
diff --git a/sys/fs/hammer2/hammer2_inode.c b/sys/fs/hammer2/hammer2_inode.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_inode.c
@@ -0,0 +1,581 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/tree.h>
+#include <sys/vnode.h>
+
+#include "hammer2.h"
+
+static void hammer2_inode_repoint(hammer2_inode_t *, hammer2_cluster_t *);
+static void hammer2_inode_repoint_one(hammer2_inode_t *, hammer2_cluster_t *,
+ int);
+
+static int
+hammer2_inode_cmp(const hammer2_inode_t *ip1, const hammer2_inode_t *ip2)
+{
+ if (ip1->meta.inum < ip2->meta.inum)
+ return (-1);
+ if (ip1->meta.inum > ip2->meta.inum)
+ return (1);
+ return (0);
+}
+
+RB_GENERATE_STATIC(hammer2_inode_tree, hammer2_inode, rbnode,
+ hammer2_inode_cmp);
+
+/*
+ * HAMMER2 offers shared and exclusive locks on inodes.
+ * Pass a mask of flags for options:
+ * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired.
+ * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
+ * Most front-end inode locks do.
+ */
+void
+hammer2_inode_lock(hammer2_inode_t *ip, int how)
+{
+ hammer2_inode_ref(ip);
+
+ if (how & HAMMER2_RESOLVE_SHARED)
+ hammer2_mtx_sh(&ip->lock);
+ else
+ hammer2_mtx_ex(&ip->lock);
+}
+
+void
+hammer2_inode_unlock(hammer2_inode_t *ip)
+{
+ hammer2_mtx_unlock(&ip->lock);
+ hammer2_inode_drop(ip);
+}
+
+/*
+ * Select a chain out of an inode's cluster and lock it.
+ * The inode does not have to be locked.
+ */
+hammer2_chain_t *
+hammer2_inode_chain(hammer2_inode_t *ip, int clindex, int how)
+{
+ hammer2_chain_t *chain;
+ hammer2_cluster_t *cluster;
+
+ hammer2_spin_sh(&ip->cluster_spin);
+ cluster = &ip->cluster;
+ if (clindex >= cluster->nchains)
+ chain = NULL;
+ else
+ chain = cluster->array[clindex].chain;
+ if (chain) {
+ hammer2_chain_ref(chain);
+ hammer2_spin_unsh(&ip->cluster_spin);
+ hammer2_chain_lock(chain, how);
+ } else {
+ hammer2_spin_unsh(&ip->cluster_spin);
+ }
+
+ return (chain);
+}
+
+hammer2_chain_t *
+hammer2_inode_chain_and_parent(hammer2_inode_t *ip, int clindex,
+ hammer2_chain_t **parentp, int how)
+{
+ hammer2_chain_t *chain, *parent;
+
+ for (;;) {
+ hammer2_spin_sh(&ip->cluster_spin);
+ if (clindex >= ip->cluster.nchains)
+ chain = NULL;
+ else
+ chain = ip->cluster.array[clindex].chain;
+ if (chain) {
+ hammer2_chain_ref(chain);
+ hammer2_spin_unsh(&ip->cluster_spin);
+ hammer2_chain_lock(chain, how);
+ } else {
+ hammer2_spin_unsh(&ip->cluster_spin);
+ }
+
+ /* Get parent, lock order must be (parent, chain). */
+ parent = chain->parent;
+ if (parent) {
+ hammer2_chain_ref(parent);
+ hammer2_chain_unlock(chain);
+ hammer2_chain_lock(parent, how);
+ hammer2_chain_lock(chain, how);
+ }
+ if (ip->cluster.array[clindex].chain == chain &&
+ chain->parent == parent)
+ break;
+
+ /* Retry. */
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+ }
+ *parentp = parent;
+
+ return (chain);
+}
+
+/*
+ * Lookup an inode by inode number.
+ */
+hammer2_inode_t *
+hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
+{
+ hammer2_inode_t *ip, find;
+
+ KKASSERT(pmp);
+ if (pmp->spmp_hmp) {
+ ip = NULL;
+ } else {
+ hammer2_spin_ex(&pmp->inum_spin);
+ bzero(&find, sizeof(find));
+ find.meta.inum = inum;
+ ip = RB_FIND(hammer2_inode_tree, &pmp->inum_tree, &find);
+ if (ip)
+ hammer2_inode_ref(ip);
+ hammer2_spin_unex(&pmp->inum_spin);
+ }
+
+ return (ip);
+}
+
+/*
+ * Adding a ref to an inode is only legal if the inode already has at least
+ * one ref.
+ * Can be called with spinlock held.
+ */
+void
+hammer2_inode_ref(hammer2_inode_t *ip)
+{
+ atomic_add_int(&ip->refs, 1);
+}
+
+/*
+ * Drop an inode reference, freeing the inode when the last reference goes
+ * away.
+ */
+void
+hammer2_inode_drop(hammer2_inode_t *ip)
+{
+ hammer2_pfs_t *pmp;
+ unsigned int refs;
+
+ while (ip) {
+ refs = ip->refs;
+ __compiler_membar();
+ if (refs == 1) {
+ /*
+ * Transition to zero, must interlock with
+ * the inode inumber lookup tree (if applicable).
+ * It should not be possible for anyone to race
+ * the transition to 0.
+ */
+ pmp = ip->pmp;
+ KKASSERT(pmp);
+ hammer2_spin_ex(&pmp->inum_spin);
+
+ if (atomic_cmpset_int(&ip->refs, 1, 0)) {
+ if (ip->flags & HAMMER2_INODE_ONRBTREE) {
+ atomic_clear_int(&ip->flags,
+ HAMMER2_INODE_ONRBTREE);
+ RB_REMOVE(hammer2_inode_tree,
+ &pmp->inum_tree, ip);
+ }
+ hammer2_spin_unex(&pmp->inum_spin);
+ ip->pmp = NULL;
+
+ /*
+ * Cleaning out ip->cluster isn't entirely
+ * trivial.
+ */
+ hammer2_inode_repoint(ip, NULL);
+ hammer2_mtx_destroy(&ip->lock);
+ hammer2_spin_destroy(&ip->cluster_spin);
+
+ free(ip, M_HAMMER2);
+ atomic_add_long(&hammer2_inode_allocs, -1);
+ ip = NULL; /* Will terminate loop. */
+ } else {
+ hammer2_spin_unex(&ip->pmp->inum_spin);
+ }
+ } else {
+ /* Non zero transition. */
+ if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
+ break;
+ }
+ }
+}
+
+/*
+ * Get the vnode associated with the given inode, allocating the vnode if
+ * necessary. The vnode will be returned exclusively locked.
+ *
+ * The caller must lock the inode (shared or exclusive).
+ */
+int
+hammer2_igetv(hammer2_inode_t *ip, int flags, struct vnode **vpp)
+{
+ struct mount *mp;
+ struct vnode *vp = NULL;
+ struct thread *td = curthread;
+ hammer2_tid_t inum;
+ int error;
+
+ hammer2_mtx_assert_locked(&ip->lock);
+
+ KKASSERT(ip);
+ KKASSERT(ip->pmp);
+ KKASSERT(ip->pmp->mp);
+ mp = ip->pmp->mp;
+ inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
+
+ error = vfs_hash_get(mp, inum, flags, td, vpp, NULL, NULL);
+ if (error || *vpp != NULL)
+ return (error);
+
+ error = getnewvnode("hammer2", mp, &hammer2_vnodeops, &vp);
+ if (error) {
+ *vpp = NULL;
+ return (error);
+ }
+ KKASSERT(vp);
+
+ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+ vp->v_data = ip;
+ ip->vp = vp;
+ hammer2_inode_ref(ip); /* vp association */
+
+ error = insmntque(vp, mp);
+ if (error) {
+ *vpp = NULL;
+ return (error);
+ }
+
+ error = vfs_hash_insert(vp, inum, flags, td, vpp, NULL, NULL);
+ if (error || *vpp != NULL)
+ return (error);
+
+ KASSERT(ip->meta.mode, ("mode 0"));
+ KASSERT(ip->meta.type, ("type 0"));
+ vp->v_type = hammer2_get_vtype(ip->meta.type);
+ KASSERT(vp->v_type != VBAD, ("VBAD"));
+ KASSERT(vp->v_type != VNON, ("VNON"));
+
+ if (vp->v_type == VFIFO)
+ vp->v_op = &hammer2_fifoops;
+ KASSERT(vp->v_op, ("NULL vnode ops"));
+
+ if (inum == 1)
+ vp->v_vflag |= VV_ROOT;
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Returns the inode associated with the arguments, allocating a new
+ * hammer2_inode structure if necessary, then synchronizing it to the passed
+ * xop cluster. When synchronizing, if idx >= 0, only cluster index (idx)
+ * is synchronized. Otherwise the whole cluster is synchronized. inum will
+ * be extracted from the passed-in xop and the inum argument will be ignored.
+ *
+ * If xop is passed as NULL then a new hammer2_inode is allocated with the
+ * specified inum, and returned. For normal inodes, the inode will be
+ * indexed in memory and if it already exists the existing ip will be
+ * returned instead of allocating a new one. The superroot and PFS inodes
+ * are not indexed in memory.
+ *
+ * The returned inode will be locked and the caller may dispose of both
+ * via hammer2_inode_unlock() + hammer2_inode_drop().
+ *
+ * The hammer2_inode structure regulates the interface between the high level
+ * kernel VNOPS API and the filesystem backend (the chains).
+ */
+hammer2_inode_t *
+hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_xop_head_t *xop,
+ hammer2_tid_t inum, int idx)
+{
+ hammer2_inode_t *nip;
+ const hammer2_inode_data_t *iptmp, *nipdata;
+
+ KKASSERT(xop == NULL ||
+ hammer2_cluster_type(&xop->cluster) == HAMMER2_BREF_TYPE_INODE);
+ KKASSERT(pmp);
+
+ if (xop) {
+ iptmp = &hammer2_xop_gdata(xop)->ipdata;
+ inum = iptmp->meta.inum;
+ hammer2_xop_pdata(xop);
+ }
+again:
+ nip = hammer2_inode_lookup(pmp, inum);
+ if (nip) {
+ /*
+ * We may have to unhold the cluster to avoid a deadlock
+ * against vnlru (and possibly other XOPs).
+ */
+ if (xop) {
+ if (hammer2_mtx_ex_try(&nip->lock) != 0) {
+ hammer2_cluster_unhold(&xop->cluster);
+ hammer2_mtx_ex(&nip->lock);
+ hammer2_cluster_rehold(&xop->cluster);
+ }
+ } else {
+ hammer2_mtx_ex(&nip->lock);
+ }
+
+ /*
+ * Handle SMP race (not applicable to the super-root spmp
+ * which can't index inodes due to duplicative inode numbers).
+ */
+ if (pmp->spmp_hmp == NULL &&
+ (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
+ hammer2_mtx_unlock(&nip->lock);
+ hammer2_inode_drop(nip);
+ goto again;
+ }
+ if (xop) {
+ if (idx >= 0)
+ hammer2_inode_repoint_one(nip, &xop->cluster,
+ idx);
+ else
+ hammer2_inode_repoint(nip, &xop->cluster);
+ }
+ return (nip);
+ }
+
+ /*
+ * We couldn't find the inode number, create a new inode and try to
+ * insert it, handle insertion races.
+ */
+ nip = malloc(sizeof(*nip), M_HAMMER2, M_WAITOK | M_ZERO);
+ atomic_add_long(&hammer2_inode_allocs, 1);
+ hammer2_spin_init(&nip->cluster_spin, "h2ip_clsp");
+
+ nip->cluster.pmp = pmp;
+ if (xop) {
+ nipdata = &hammer2_xop_gdata(xop)->ipdata;
+ nip->meta = nipdata->meta;
+ hammer2_xop_pdata(xop);
+ hammer2_inode_repoint(nip, &xop->cluster);
+ } else {
+ nip->meta.inum = inum;
+ }
+
+ nip->pmp = pmp;
+
+ /*
+ * ref and lock on nip gives it state compatible to after a
+ * hammer2_inode_lock() call.
+ */
+ nip->refs = 1;
+ hammer2_mtx_init(&nip->lock, "h2ip_lk");
+ hammer2_mtx_ex(&nip->lock);
+
+ /*
+ * Attempt to add the inode. If it fails we raced another inode
+ * get. Undo all the work and try again.
+ */
+ if (pmp->spmp_hmp == NULL) {
+ hammer2_spin_ex(&pmp->inum_spin);
+ if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
+ hammer2_spin_unex(&pmp->inum_spin);
+ hammer2_mtx_unlock(&nip->lock);
+ hammer2_inode_drop(nip);
+ goto again;
+ }
+ atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
+ hammer2_spin_unex(&pmp->inum_spin);
+ }
+
+ return (nip);
+}
+
+/*
+ * Repoint ip->cluster's chains to cluster's chains and fixup the default
+ * focus. All items, valid or invalid, are repointed.
+ *
+ * Cluster may be NULL to clean out any chains in ip->cluster.
+ */
+static void
+hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
+{
+ hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER];
+ hammer2_chain_t *ochain, *nchain;
+ int i;
+
+ bzero(dropch, sizeof(dropch));
+
+ /*
+ * Replace chains in ip->cluster with chains from cluster and
+ * adjust the focus if necessary.
+ *
+ * NOTE: nchain and/or ochain can be NULL due to gaps
+ * in the cluster arrays.
+ */
+ hammer2_spin_ex(&ip->cluster_spin);
+ for (i = 0; cluster && i < cluster->nchains; ++i) {
+ /* Do not replace elements which are the same. */
+ nchain = cluster->array[i].chain;
+ if (i < ip->cluster.nchains) {
+ ochain = ip->cluster.array[i].chain;
+ if (ochain == nchain)
+ continue;
+ } else {
+ ochain = NULL;
+ }
+
+ /* Make adjustments. */
+ ip->cluster.array[i].chain = nchain;
+ if (nchain)
+ hammer2_chain_ref(nchain);
+ dropch[i] = ochain;
+ }
+
+ /* Release any left-over chains in ip->cluster. */
+ while (i < ip->cluster.nchains) {
+ nchain = ip->cluster.array[i].chain;
+ if (nchain)
+ ip->cluster.array[i].chain = NULL;
+ dropch[i] = nchain;
+ ++i;
+ }
+
+ /*
+ * Fixup fields. Note that the inode-embedded cluster is never
+ * directly locked.
+ */
+ if (cluster) {
+ ip->cluster.nchains = cluster->nchains;
+ ip->cluster.focus = cluster->focus;
+ hammer2_assert_cluster(&ip->cluster);
+ } else {
+ ip->cluster.nchains = 0;
+ ip->cluster.focus = NULL;
+ }
+
+ hammer2_spin_unex(&ip->cluster_spin);
+
+ /* Cleanup outside of spinlock. */
+ while (--i >= 0)
+ if (dropch[i])
+ hammer2_chain_drop(dropch[i]);
+}
+
+/*
+ * Repoint a single element from the cluster to the ip. Does not change
+ * focus and requires inode to be re-locked to clean-up flags.
+ */
+static void
+hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster,
+ int idx)
+{
+ hammer2_chain_t *ochain, *nchain;
+ int i;
+
+ hammer2_spin_ex(&ip->cluster_spin);
+ KKASSERT(idx < cluster->nchains);
+ if (idx < ip->cluster.nchains) {
+ ochain = ip->cluster.array[idx].chain;
+ nchain = cluster->array[idx].chain;
+ } else {
+ ochain = NULL;
+ nchain = cluster->array[idx].chain;
+ for (i = ip->cluster.nchains; i <= idx; ++i)
+ bzero(&ip->cluster.array[i],
+ sizeof(ip->cluster.array[i]));
+ ip->cluster.nchains = idx + 1;
+ hammer2_assert_cluster(&ip->cluster);
+ }
+ if (ochain != nchain) {
+ /* Make adjustments. */
+ ip->cluster.array[idx].chain = nchain;
+ }
+ hammer2_spin_unex(&ip->cluster_spin);
+
+ if (ochain != nchain) {
+ if (nchain)
+ hammer2_chain_ref(nchain);
+ if (ochain)
+ hammer2_chain_drop(ochain);
+ }
+}
+
+hammer2_key_t
+hammer2_inode_data_count(const hammer2_inode_t *ip)
+{
+ hammer2_chain_t *chain;
+ hammer2_key_t count = 0;
+ int i;
+
+ for (i = 0; i < ip->cluster.nchains; ++i) {
+ chain = ip->cluster.array[i].chain;
+ if (chain == NULL)
+ continue;
+ if (count < chain->bref.embed.stats.data_count)
+ count = chain->bref.embed.stats.data_count;
+ }
+
+ return (count);
+}
+
+hammer2_key_t
+hammer2_inode_inode_count(const hammer2_inode_t *ip)
+{
+ hammer2_chain_t *chain;
+ hammer2_key_t count = 0;
+ int i;
+
+ for (i = 0; i < ip->cluster.nchains; ++i) {
+ chain = ip->cluster.array[i].chain;
+ if (chain == NULL)
+ continue;
+ if (count < chain->bref.embed.stats.inode_count)
+ count = chain->bref.embed.stats.inode_count;
+ }
+
+ return (count);
+}
diff --git a/sys/fs/hammer2/hammer2_io.c b/sys/fs/hammer2/hammer2_io.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_io.c
@@ -0,0 +1,367 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/tree.h>
+
+#include "hammer2.h"
+
+#define HAMMER2_DOP_READ 1
+
+/*
+ * Implements an abstraction layer for buffered device I/O.
+ * Can be used as an OS-abstraction but the main purpose is to allow larger
+ * buffers to be used against hammer2_chain's using smaller allocations,
+ * without causing deadlocks.
+ */
+static int hammer2_io_cleanup_callback(hammer2_io_t *, void *);
+
+static int
+hammer2_io_cmp(const hammer2_io_t *io1, const hammer2_io_t *io2)
+{
+ if (io1->pbase < io2->pbase)
+ return (-1);
+ if (io1->pbase > io2->pbase)
+ return (1);
+ return (0);
+}
+
+RB_GENERATE_STATIC(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp);
+RB_SCAN_INFO(hammer2_io_tree, hammer2_io);
+RB_GENERATE_SCAN_STATIC(hammer2_io_tree, hammer2_io, rbnode);
+
+struct hammer2_cleanupcb_info {
+ struct hammer2_io_tree tmptree;
+ int count;
+};
+
+static __inline void
+hammer2_assert_io_refs(const hammer2_io_t *dio)
+{
+ hammer2_mtx_assert_ex(&dio->lock);
+ KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
+}
+
+/*
+ * Returns the locked DIO corresponding to the data|radix offset.
+ */
+static hammer2_io_t *
+hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off)
+{
+ hammer2_volume_t *vol;
+ hammer2_io_t *dio, *xio, find;
+ hammer2_key_t lbase, pbase, pmask;
+ uint64_t refs;
+ int lsize, psize;
+
+ hammer2_mtx_assert_ex(&hmp->iotree_lock);
+
+ psize = HAMMER2_PBUFSIZE;
+ pmask = ~(hammer2_off_t)(psize - 1);
+ if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
+ lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
+ else
+ lsize = 0;
+ lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
+ pbase = lbase & pmask;
+
+ if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase)
+ hpanic("illegal base: %016jx %016jx+%08x / %016jx\n",
+ pbase, lbase, lsize, pmask);
+
+ /* Access or allocate dio, bump dio->refs to prevent destruction. */
+ bzero(&find, sizeof(find));
+ find.pbase = pbase;
+ dio = RB_FIND(hammer2_io_tree, &hmp->iotree, &find);
+ if (dio) {
+ hammer2_mtx_ex(&dio->lock);
+ refs = atomic_fetchadd_32(&dio->refs, 1);
+ if ((refs & HAMMER2_DIO_MASK) == 0)
+ atomic_add_int(&dio->hmp->iofree_count, -1);
+ } else {
+ vol = hammer2_get_volume(hmp, pbase);
+ dio = malloc(sizeof(*dio), M_HAMMER2, M_WAITOK | M_ZERO);
+ dio->hmp = hmp;
+ dio->devvp = vol->dev->devvp;
+ dio->dbase = vol->offset;
+ KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
+ dio->pbase = pbase;
+ dio->psize = psize;
+ dio->refs = 1;
+ dio->act = 5;
+ hammer2_mtx_init(&dio->lock, "h2io_inplk");
+ hammer2_mtx_ex(&dio->lock);
+ xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
+ if (xio == NULL) {
+ atomic_add_long(&hammer2_dio_allocs, 1);
+ } else {
+ refs = atomic_fetchadd_32(&xio->refs, 1);
+ if ((refs & HAMMER2_DIO_MASK) == 0)
+ atomic_add_int(&xio->hmp->iofree_count, -1);
+ hammer2_mtx_unlock(&dio->lock);
+ hammer2_mtx_destroy(&dio->lock);
+ free(dio, M_HAMMER2);
+ dio = xio;
+ hammer2_mtx_ex(&dio->lock);
+ }
+ }
+
+ dio->ticks = ticks;
+ if (dio->act < 10)
+ ++dio->act;
+
+ hammer2_assert_io_refs(dio);
+
+ return (dio);
+}
+
+/*
+ * Acquire the requested dio.
+ * If DIO_GOOD is set the buffer already exists and is good to go.
+ */
+hammer2_io_t *
+hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
+{
+ hammer2_io_t *dio;
+ daddr_t lblkno;
+ off_t peof;
+ int error, hce;
+
+ KKASSERT(op == HAMMER2_DOP_READ);
+ KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
+
+ hammer2_mtx_ex(&hmp->iotree_lock);
+ dio = hammer2_io_alloc(hmp, lbase);
+ hammer2_assert_io_refs(dio); /* dio locked + refs > 0 */
+ hammer2_mtx_unlock(&hmp->iotree_lock);
+
+ if (dio->refs & HAMMER2_DIO_GOOD) {
+ hammer2_mtx_unlock(&dio->lock);
+ return (dio);
+ }
+
+ KKASSERT(dio->bp == NULL);
+ if (btype == HAMMER2_BREF_TYPE_DATA)
+ hce = hammer2_cluster_data_read;
+ else
+ hce = hammer2_cluster_meta_read;
+
+ lblkno = (dio->pbase - dio->dbase) / DEV_BSIZE;
+ if (hce > 0) {
+ peof = (dio->pbase + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64;
+ peof -= dio->dbase;
+ error = cluster_read(dio->devvp, peof, lblkno, dio->psize,
+ NOCRED, HAMMER2_PBUFSIZE * hce, hce, 0, &dio->bp);
+ } else {
+ error = bread(dio->devvp, lblkno, dio->psize, NOCRED, &dio->bp);
+ }
+
+ if (dio->bp)
+ BUF_KERNPROC(dio->bp);
+ dio->error = error;
+ if (error == 0)
+ dio->refs |= HAMMER2_DIO_GOOD;
+
+ hammer2_mtx_unlock(&dio->lock);
+
+ /* XXX error handling */
+
+ return (dio);
+}
+
+/*
+ * Release our ref on *diop.
+ * On the 1->0 transition we clear DIO_GOOD and dispose of dio->bp.
+ */
+void
+hammer2_io_putblk(hammer2_io_t **diop)
+{
+ hammer2_dev_t *hmp;
+ hammer2_io_t *dio;
+ struct buf *bp;
+ struct hammer2_cleanupcb_info info;
+ int dio_limit;
+
+ dio = *diop;
+ *diop = NULL;
+
+ hammer2_mtx_ex(&dio->lock);
+ if ((dio->refs & HAMMER2_DIO_MASK) == 0) {
+ hammer2_mtx_unlock(&dio->lock);
+ return; /* lost race */
+ }
+ hammer2_assert_io_refs(dio);
+
+ /*
+ * Drop refs.
+ * On the 1->0 transition clear DIO_GOOD.
+ * On any other transition we can return early.
+ */
+ if ((dio->refs & HAMMER2_DIO_MASK) == 1) {
+ dio->refs--;
+ dio->refs &= ~HAMMER2_DIO_GOOD;
+ } else {
+ dio->refs--;
+ hammer2_mtx_unlock(&dio->lock);
+ return;
+ }
+
+ /* Lastdrop (1->0 transition) case. */
+ bp = dio->bp;
+ dio->bp = NULL;
+
+ /*
+ * HAMMER2 with write support may write out buffer here,
+ * instead of just disposing of the buffer.
+ */
+ if (bp)
+ brelse(bp);
+
+ /* Update iofree_count before disposing of the dio. */
+ hmp = dio->hmp;
+ atomic_add_int(&hmp->iofree_count, 1);
+
+ KKASSERT(!(dio->refs & HAMMER2_DIO_GOOD));
+ hammer2_mtx_unlock(&dio->lock);
+ /* Another process may come in and get/put this dio. */
+
+ /*
+ * We cache free buffers so re-use cases can use a shared lock,
+ * but if too many build up we have to clean them out.
+ */
+ hammer2_mtx_ex(&hmp->iotree_lock);
+ dio_limit = hammer2_dio_limit;
+ if (dio_limit < 256)
+ dio_limit = 256;
+ if (dio_limit > 1024*1024)
+ dio_limit = 1024*1024;
+ if (hmp->iofree_count > dio_limit) {
+ RB_INIT(&info.tmptree);
+ if (hmp->iofree_count > dio_limit) {
+ info.count = hmp->iofree_count / 5;
+ RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
+ hammer2_io_cleanup_callback, &info);
+ }
+ hammer2_io_cleanup(hmp, &info.tmptree);
+ }
+ hammer2_mtx_unlock(&hmp->iotree_lock);
+}
+
+/*
+ * Cleanup dio with zero refs.
+ */
+static int
+hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
+{
+ struct hammer2_cleanupcb_info *info = arg;
+ hammer2_io_t *xio __diagused;
+ int act;
+
+ /* Only putblk'd dio does not require locking. */
+ hammer2_mtx_ex(&dio->lock);
+ if ((dio->refs & HAMMER2_DIO_MASK) == 0) {
+ if (dio->act > 0) {
+ act = dio->act - (ticks - dio->ticks) / hz - 1;
+ if (act > 0) {
+ dio->act = act;
+ hammer2_mtx_unlock(&dio->lock);
+ return (0);
+ }
+ dio->act = 0;
+ }
+ KKASSERT(dio->bp == NULL);
+ if (info->count > 0) {
+ RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
+ xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
+ KKASSERT(xio == NULL);
+ --info->count;
+ }
+ }
+ hammer2_mtx_unlock(&dio->lock);
+
+ return (0);
+}
+
+void
+hammer2_io_cleanup(hammer2_dev_t *hmp, hammer2_io_tree_t *tree)
+{
+ hammer2_io_t *dio;
+
+ while ((dio = RB_ROOT(tree)) != NULL) {
+ RB_REMOVE(hammer2_io_tree, tree, dio);
+ KKASSERT(dio->bp == NULL &&
+ (dio->refs & HAMMER2_DIO_MASK) == 0);
+
+ hammer2_mtx_destroy(&dio->lock);
+ free(dio, M_HAMMER2);
+ atomic_add_long(&hammer2_dio_allocs, -1);
+ atomic_add_int(&hmp->iofree_count, -1);
+ }
+}
+
+char *
+hammer2_io_data(hammer2_io_t *dio, off_t lbase)
+{
+ struct buf *bp;
+ int off;
+
+ bp = dio->bp;
+ KASSERT(bp != NULL, ("NULL dio buf"));
+
+ lbase -= dio->dbase;
+ off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_offset;
+ KASSERT(off >= 0 && off < bp->b_bufsize, ("bad offset"));
+
+ return (bp->b_data + off);
+}
+
+int
+hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
+ hammer2_io_t **diop)
+{
+ *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
+ return ((*diop)->error);
+}
+
+void
+hammer2_io_bqrelse(hammer2_io_t **diop)
+{
+ hammer2_io_putblk(diop);
+}
diff --git a/sys/fs/hammer2/hammer2_ioctl.h b/sys/fs/hammer2/hammer2_ioctl.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_ioctl.h
@@ -0,0 +1,124 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_HAMMER2_IOCTL_H_
+#define _FS_HAMMER2_IOCTL_H_
+
+#include <sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/syslimits.h>
+
+#include "hammer2_disk.h"
+
+/*
+ * Ioctl to get version.
+ */
+struct hammer2_ioc_version {
+ int version;
+ char reserved[256 - 4];
+};
+
+typedef struct hammer2_ioc_version hammer2_ioc_version_t;
+
+/*
+ * Ioctls to manage PFSs.
+ *
+ * PFSs can be clustered by matching their pfs_clid, and the PFSs making up
+ * a cluster can be uniquely identified by combining the vol_id with
+ * the pfs_clid.
+ */
+struct hammer2_ioc_pfs {
+ hammer2_key_t name_key; /* super-root directory scan */
+ hammer2_key_t name_next; /* (GET only) */
+ uint8_t pfs_type;
+ uint8_t pfs_subtype;
+ uint8_t reserved0012;
+ uint8_t reserved0013;
+ uint32_t pfs_flags;
+ uint64_t reserved0018;
+ struct uuid pfs_fsid; /* identifies PFS instance */
+ struct uuid pfs_clid; /* identifies PFS cluster */
+ char name[NAME_MAX+1]; /* PFS label */
+};
+
+typedef struct hammer2_ioc_pfs hammer2_ioc_pfs_t;
+
+/*
+ * Ioctl to manage inodes.
+ */
+struct hammer2_ioc_inode {
+ uint32_t flags;
+ void *unused;
+ hammer2_key_t data_count;
+ hammer2_key_t inode_count;
+ hammer2_inode_data_t ip_data;
+};
+
+typedef struct hammer2_ioc_inode hammer2_ioc_inode_t;
+
+/*
+ * Ioctl to manage volumes.
+ */
+struct hammer2_ioc_volume {
+ char path[MAXPATHLEN];
+ int id;
+ hammer2_off_t offset;
+ hammer2_off_t size;
+};
+
+typedef struct hammer2_ioc_volume hammer2_ioc_volume_t;
+
+struct hammer2_ioc_volume_list {
+ hammer2_ioc_volume_t *volumes;
+ int nvolumes;
+ int version;
+ char pfs_name[HAMMER2_INODE_MAXNAME];
+};
+
+typedef struct hammer2_ioc_volume_list hammer2_ioc_volume_list_t;
+
+/*
+ * Ioctl list.
+ */
+#define HAMMER2IOC_VERSION_GET _IOWR('h', 64, struct hammer2_ioc_version)
+#define HAMMER2IOC_PFS_GET _IOWR('h', 80, struct hammer2_ioc_pfs)
+#define HAMMER2IOC_PFS_LOOKUP _IOWR('h', 83, struct hammer2_ioc_pfs)
+#define HAMMER2IOC_INODE_GET _IOWR('h', 86, struct hammer2_ioc_inode)
+#define HAMMER2IOC_DEBUG_DUMP _IOWR('h', 91, int)
+#define HAMMER2IOC_VOLUME_LIST _IOWR('h', 97, struct hammer2_ioc_volume_list)
+
+#endif /* !_FS_HAMMER2_IOCTL_H_ */
diff --git a/sys/fs/hammer2/hammer2_ioctl.c b/sys/fs/hammer2/hammer2_ioctl.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_ioctl.c
@@ -0,0 +1,346 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include "hammer2.h"
+#include "hammer2_ioctl.h"
+
+/*
+ * Retrieve ondisk version.
+ */
+static int
+hammer2_ioctl_version_get(hammer2_inode_t *ip, void *data)
+{
+ hammer2_ioc_version_t *version = data;
+ hammer2_dev_t *hmp = ip->pmp->pfs_hmps[0];
+
+ if (hmp == NULL)
+ return (EINVAL);
+
+ if (hmp)
+ version->version = hmp->voldata.version;
+ else
+ version->version = -1;
+
+ return (0);
+}
+
+/*
+ * Used to scan and retrieve PFS information. PFS's are directories under
+ * the super-root.
+ *
+ * To scan PFSs pass name_key=0. The function will scan for the next
+ * PFS and set all fields, as well as set name_next to the next key.
+ * When no PFSs remain, name_next is set to (hammer2_key_t)-1.
+ *
+ * To retrieve a particular PFS by key, specify the key but note that
+ * the ioctl will return the lowest key >= specified_key, so the caller
+ * must verify the key.
+ *
+ * To retrieve the PFS associated with the file descriptor, pass
+ * name_key set to (hammer2_key_t)-1.
+ */
+static int
+hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
+{
+ hammer2_ioc_pfs_t *pfs = data;
+ hammer2_dev_t *hmp = ip->pmp->pfs_hmps[0];
+ const hammer2_inode_data_t *ripdata;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t key_next, save_key;
+ int error = 0;
+
+ if (hmp == NULL)
+ return (EINVAL);
+
+ save_key = pfs->name_key;
+
+ if (save_key == (hammer2_key_t)-1) {
+ hammer2_inode_lock(ip->pmp->iroot, 0);
+ parent = NULL;
+ chain = hammer2_inode_chain(ip->pmp->iroot, 0,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ } else {
+ hammer2_inode_lock(hmp->spmp->iroot, 0);
+ parent = hammer2_inode_chain(hmp->spmp->iroot, 0,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ chain = hammer2_chain_lookup(&parent, &key_next, pfs->name_key,
+ HAMMER2_KEY_MAX, &error, HAMMER2_LOOKUP_SHARED);
+ }
+
+ /* Locate next PFS. */
+ while (chain) {
+ if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
+ break;
+ if (parent == NULL) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ chain = NULL;
+ break;
+ }
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ HAMMER2_KEY_MAX, &error, HAMMER2_LOOKUP_SHARED);
+ }
+ error = hammer2_error_to_errno(error);
+
+ /* Load the data being returned by the ioctl. */
+ if (chain && chain->error == 0) {
+ ripdata = &chain->data->ipdata;
+ pfs->name_key = ripdata->meta.name_key;
+ pfs->pfs_type = ripdata->meta.pfs_type;
+ pfs->pfs_subtype = ripdata->meta.pfs_subtype;
+ pfs->pfs_clid = ripdata->meta.pfs_clid;
+ pfs->pfs_fsid = ripdata->meta.pfs_fsid;
+ KKASSERT(ripdata->meta.name_len < sizeof(pfs->name));
+ bcopy(ripdata->filename, pfs->name, ripdata->meta.name_len);
+ pfs->name[ripdata->meta.name_len] = 0;
+
+ /*
+ * Calculate name_next, if any. We are only accessing
+ * chain->bref so we can ignore chain->error (if the key
+ * is used later it will error then).
+ */
+ if (parent == NULL) {
+ pfs->name_next = (hammer2_key_t)-1;
+ } else {
+ chain = hammer2_chain_next(&parent, chain, &key_next,
+ key_next, HAMMER2_KEY_MAX, &error,
+ HAMMER2_LOOKUP_SHARED);
+ if (chain)
+ pfs->name_next = chain->bref.key;
+ else
+ pfs->name_next = (hammer2_key_t)-1;
+ }
+ } else {
+ pfs->name_next = (hammer2_key_t)-1;
+ error = ENOENT;
+ }
+
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+
+ if (save_key == (hammer2_key_t)-1)
+ hammer2_inode_unlock(ip->pmp->iroot);
+ else
+ hammer2_inode_unlock(hmp->spmp->iroot);
+
+ return (error);
+}
+
+/*
+ * Find a specific PFS by name.
+ */
+static int
+hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
+{
+ hammer2_ioc_pfs_t *pfs = data;
+ hammer2_dev_t *hmp = ip->pmp->pfs_hmps[0];
+ const hammer2_inode_data_t *ripdata;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t key_next, lhc;
+ size_t len;
+ int error = 0;
+
+ if (hmp == NULL)
+ return (EINVAL);
+
+ hammer2_inode_lock(hmp->spmp->iroot, HAMMER2_RESOLVE_SHARED);
+ parent = hammer2_inode_chain(hmp->spmp->iroot, 0,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+
+ pfs->name[sizeof(pfs->name) - 1] = 0;
+ len = strlen(pfs->name);
+ lhc = hammer2_dirhash(pfs->name, len);
+
+ chain = hammer2_chain_lookup(&parent, &key_next, lhc,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error, HAMMER2_LOOKUP_SHARED);
+ while (chain) {
+ if (hammer2_chain_dirent_test(chain, pfs->name, len))
+ break;
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error,
+ HAMMER2_LOOKUP_SHARED);
+ }
+ error = hammer2_error_to_errno(error);
+
+ /* Load the data being returned by the ioctl. */
+ if (chain && chain->error == 0) {
+ KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
+ ripdata = &chain->data->ipdata;
+ pfs->name_key = ripdata->meta.name_key;
+ pfs->pfs_type = ripdata->meta.pfs_type;
+ pfs->pfs_subtype = ripdata->meta.pfs_subtype;
+ pfs->pfs_clid = ripdata->meta.pfs_clid;
+ pfs->pfs_fsid = ripdata->meta.pfs_fsid;
+
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ } else if (error == 0) {
+ error = ENOENT;
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+
+ hammer2_inode_unlock(hmp->spmp->iroot);
+
+ return (error);
+}
+
+/*
+ * Retrieve the raw inode structure, non-inclusive of node-specific data.
+ */
+static int
+hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data)
+{
+ hammer2_ioc_inode_t *ino = data;
+
+ hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
+
+ ino->data_count = hammer2_inode_data_count(ip);
+ ino->inode_count = hammer2_inode_inode_count(ip);
+ bzero(&ino->ip_data, sizeof(ino->ip_data));
+ ino->ip_data.meta = ip->meta;
+
+ hammer2_inode_unlock(ip);
+
+ return (0);
+}
+
+/*
+ * Recursively dump chains of a given inode.
+ */
+static int
+hammer2_ioctl_debug_dump(hammer2_inode_t *ip, unsigned int flags)
+{
+#ifdef INVARIANTS
+ hammer2_chain_t *chain;
+ int i, count = 100000;
+
+ for (i = 0; i < ip->cluster.nchains; ++i) {
+ chain = ip->cluster.array[i].chain;
+ if (chain) {
+ hprintf("cluster #%d\n", i);
+ hammer2_dump_chain(chain, 0, 0, &count, 'i', flags);
+ }
+ }
+
+ return (0);
+#else
+ return (EOPNOTSUPP);
+#endif
+}
+
+/*
+ * Get a list of volumes.
+ */
+static int
+hammer2_ioctl_volume_list(hammer2_inode_t *ip, void *data)
+{
+ hammer2_ioc_volume_list_t *vollist = data;
+ hammer2_ioc_volume_t entry;
+ hammer2_volume_t *vol;
+ hammer2_dev_t *hmp = ip->pmp->pfs_hmps[0];
+ int i, error = 0, cnt = 0;
+
+ if (hmp == NULL)
+ return (EINVAL);
+
+ for (i = 0; i < hmp->nvolumes; ++i) {
+ if (cnt >= vollist->nvolumes)
+ break;
+ vol = &hmp->volumes[i];
+ bzero(&entry, sizeof(entry));
+ /* Copy hammer2_volume_t fields. */
+ entry.id = vol->id;
+ bcopy(vol->dev->path, entry.path, sizeof(entry.path));
+ entry.offset = vol->offset;
+ entry.size = vol->size;
+ error = copyout(&entry, &vollist->volumes[cnt], sizeof(entry));
+ if (error)
+ return (error);
+ cnt++;
+ }
+ vollist->nvolumes = cnt;
+ vollist->version = hmp->voldata.version;
+ bcopy(ip->pmp->pfs_names[0], vollist->pfs_name,
+ sizeof(vollist->pfs_name));
+
+ return (error);
+}
+
+int
+hammer2_ioctl_impl(hammer2_inode_t *ip, unsigned long com, void *data,
+ int fflag, struct ucred *cred)
+{
+ int error;
+
+ switch (com) {
+ case HAMMER2IOC_VERSION_GET:
+ error = hammer2_ioctl_version_get(ip, data);
+ break;
+ case HAMMER2IOC_PFS_GET:
+ error = hammer2_ioctl_pfs_get(ip, data);
+ break;
+ case HAMMER2IOC_PFS_LOOKUP:
+ error = hammer2_ioctl_pfs_lookup(ip, data);
+ break;
+ case HAMMER2IOC_INODE_GET:
+ error = hammer2_ioctl_inode_get(ip, data);
+ break;
+ case HAMMER2IOC_DEBUG_DUMP:
+ error = hammer2_ioctl_debug_dump(ip, *(unsigned int *)data);
+ break;
+ case HAMMER2IOC_VOLUME_LIST:
+ error = hammer2_ioctl_volume_list(ip, data);
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
diff --git a/sys/fs/hammer2/hammer2_lz4.h b/sys/fs/hammer2/hammer2_lz4.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_lz4.h
@@ -0,0 +1,93 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Header File
+ Copyright (C) 2011-2013, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+//**************************************
+// Compiler Options
+//**************************************
+//Should go here if they are needed
+
+//****************************
+// Simple Functions
+//****************************
+
+int LZ4_decompress_safe (char* source, char* dest, int inputSize,
+ int maxOutputSize);
+
+/*
+LZ4_decompress_safe() :
+ maxOutputSize :
+ is the size of the destination buffer (which must be already allocated)
+ return :
+ the number of bytes decoded in the destination buffer
+ (necessarily <= maxOutputSize)
+ If the source stream is malformed or too large, the function will
+ stop decoding and return a negative result.
+ This function is protected against any kind of buffer overflow attempts
+ (never writes outside of output buffer, and never reads outside of
+ input buffer). It is therefore protected against malicious data packets
+*/
+
+
+//****************************
+// Advanced Functions
+//****************************
+
+int LZ4_compress_limitedOutput(char* source, char* dest, int inputSize,
+ int maxOutputSize);
+
+/*
+LZ4_compress_limitedOutput() :
+ Compress 'inputSize' bytes from 'source' into an output buffer 'dest'
+ of maximum size 'maxOutputSize'.
+ If it cannot achieve it, compression will stop, and result of
+ the function will be zero.
+ This function never writes outside of provided output buffer.
+
+ inputSize :
+ Max supported value is ~1.9GB
+ maxOutputSize :
+ is the size of the destination buffer (which must bealready allocated)
+ return :
+ the number of bytes written in buffer 'dest' or 0 if the compression fails
+*/
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/sys/fs/hammer2/hammer2_lz4.c b/sys/fs/hammer2/hammer2_lz4.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_lz4.c
@@ -0,0 +1,525 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Copyright (C) 2011-2013, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/*
+Note : this source file requires "hammer2_lz4_encoder.h"
+*/
+
+//**************************************
+// Tuning parameters
+//**************************************
+// MEMORY_USAGE :
+// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
+// 16 -> 64KB; 20 -> 1MB; etc.)
+// Increasing memory usage improves compression ratio
+// Reduced memory usage can improve speed, due to cache effect
+// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+#define MEMORY_USAGE 14
+
+// HEAPMODE :
+// Select how default compression function will allocate memory for its
+// hash table,
+// in memory stack (0:default, fastest), or in memory heap (1:requires
+// memory allocation (malloc)).
+// Default allocation strategy is to use stack (HEAPMODE 0)
+// Note : explicit functions *_stack* and *_heap* are unaffected by this setting
+#define HEAPMODE 1
+
+// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
+// This will provide a small boost to performance for big endian cpu,
+// but the resulting compressed stream will be incompatible with little-endian CPU.
+// You can set this option to 1 in situations where data will remain within
+// closed environment
+// This option is useless on Little_Endian CPU (such as x86)
+//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
+
+
+//**************************************
+// CPU Feature Detection
+//**************************************
+// 32 or 64 bits ?
+#if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode
+# define LZ4_ARCH64 1
+#else
+# define LZ4_ARCH64 0
+#endif
+
+//This reduced library code is only Little Endian compatible,
+//if the need arises, please look for the appropriate defines in the
+//original complete LZ4 library.
+//Same is true for unaligned memory access which is enabled by default,
+//hardware bit count, also enabled by default, and Microsoft/Visual
+//Studio compilers.
+
+//**************************************
+// Compiler Options
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
+/* "restrict" is a known keyword */
+#else
+# define restrict // Disable restrict
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#define likely(expr) expect((expr) != 0, 1)
+#define unlikely(expr) expect((expr) != 0, 0)
+
+
+//**************************************
+// Includes
+//**************************************
+#include "hammer2.h"
+#include "hammer2_lz4.h"
+#include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h
+
+
+//Declaration for kmalloc functions
+static MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
+ "A hash table used by LZ4 compression function.");
+
+
+//**************************************
+// Basic Types
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
+# include <sys/stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
+# define _PACKED __attribute__ ((packed))
+#else
+# define _PACKED
+#endif
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# pragma pack(push, 1)
+#endif
+
+typedef struct _U16_S { U16 v; } _PACKED U16_S;
+typedef struct _U32_S { U32 v; } _PACKED U32_S;
+typedef struct _U64_S { U64 v; } _PACKED U64_S;
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# pragma pack(pop)
+#endif
+
+#define A64(x) (((U64_S *)(x))->v)
+#define A32(x) (((U32_S *)(x))->v)
+#define A16(x) (((U16_S *)(x))->v)
+
+
+//**************************************
+// Constants
+//**************************************
+#define HASHTABLESIZE (1 << MEMORY_USAGE)
+
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+#define MINLENGTH (MFLIMIT+1)
+
+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
+#define SKIPSTRENGTH 6
+// Increasing this value will make the compression run slower on
+// incompressible data
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+//**************************************
+// Architecture-specific macros
+//**************************************
+#if LZ4_ARCH64 // 64-bit
+# define STEPSIZE 8
+# define UARCH U64
+# define AARCH A64
+# define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
+# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
+# define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e)
+# define HTYPE U32
+# define INITBASE(base) BYTE* base = ip
+#else // 32-bit
+# define STEPSIZE 4
+# define UARCH U32
+# define AARCH A32
+# define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
+# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
+# define LZ4_SECURECOPY LZ4_WILDCOPY
+# define HTYPE BYTE*
+# define INITBASE(base) int base = 0
+#endif
+
+#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
+# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
+ v = lz4_bswap16(v);
+ d = (s) - v; }
+# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i);
+ v = lz4_bswap16(v);
+ A16(p) = v;
+ p+=2; }
+#else // Little Endian
+# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
+# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
+#define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
+
+
+//****************************
+// Private functions
+//****************************
+#if LZ4_ARCH64
+
+static
+inline
+int
+LZ4_NbCommonBytes (register U64 val)
+{
+#if defined(LZ4_BIG_ENDIAN)
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanReverse64( &r, val );
+ return (int)(r>>3);
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clzll(val) >> 3);
+ #else
+ int r;
+ if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+ #endif
+#else
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64( &r, val );
+ return (int)(r>>3);
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctzll(val) >> 3);
+ #else
+ static int DeBruijnBytePos[64] = {
+ 0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
+ 1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
+ 1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
+ 6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
+ 2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
+ 2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
+ 7, 6, 7, 7 };
+ return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
+ #endif
+#endif
+}
+
+#else
+
+static
+inline
+int
+LZ4_NbCommonBytes (register U32 val)
+{
+#if defined(LZ4_BIG_ENDIAN)
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanReverse( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clz(val) >> 3);
+# else
+ int r;
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+ r += (!val);
+ return r;
+# endif
+#else
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctz(val) >> 3);
+# else
+ static int DeBruijnBytePos[32] = {
+ 0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
+ 2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
+ 2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
+ 1, 1 };
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+# endif
+#endif
+}
+
+#endif
+
+
+
+//******************************
+// Compression functions
+//******************************
+
+#include "hammer2_lz4_encoder.h"
+
+/*
+void* LZ4_createHeapMemory();
+int LZ4_freeHeapMemory(void* ctx);
+
+Used to allocate and free hashTable memory
+to be used by the LZ4_compress_heap* family of functions.
+LZ4_createHeapMemory() returns NULL is memory allocation fails.
+*/
+void*
+LZ4_create(void)
+{
+ return malloc(HASHTABLESIZE, C_HASHTABLE, M_WAITOK);
+}
+
+int
+LZ4_free(void* ctx)
+{
+ free(ctx, C_HASHTABLE);
+ return 0;
+}
+
+int
+LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ void* ctx = LZ4_create();
+ int result;
+ if (ctx == NULL) return 0; // Failed allocation => compression not done
+ if (inputSize < LZ4_64KLIMIT)
+ result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
+ inputSize, maxOutputSize);
+ else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
+ inputSize, maxOutputSize);
+ LZ4_free(ctx);
+ return result;
+}
+
+
+//****************************
+// Decompression functions
+//****************************
+
+typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
+typedef enum { full = 0, partial = 1 } exit_directive;
+
+
+// This generic decompression function cover all use cases.
+// It shall be instanciated several times, using different sets of directives
+// Note that it is essential this generic function is really inlined,
+// in order to remove useless branches during compilation optimisation.
+static
+inline
+int LZ4_decompress_generic(
+ char* source,
+ char* dest,
+ int inputSize, //
+ int outputSize,
+ // OutputSize must be != 0; if endOnInput==endOnInputSize,
+ // this value is the max size of Output Buffer.
+
+ int endOnInput, // endOnOutputSize, endOnInputSize
+ int prefix64k, // noPrefix, withPrefix
+ int partialDecoding, // full, partial
+ int targetOutputSize // only used if partialDecoding==partial
+ )
+{
+ // Local Variables
+ BYTE* restrict ip = (BYTE*) source;
+ BYTE* ref;
+ BYTE* iend = ip + inputSize;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + outputSize;
+ BYTE* cpy;
+ BYTE* oexit = op + targetOutputSize;
+
+ size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+ size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+#endif
+
+
+ // Special case
+ if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
+ // targetOutputSize too large, better decode everything
+ if unlikely(outputSize==0) goto _output_error;
+ // Empty output buffer
+
+
+ // Main Loop
+ while (1)
+ {
+ unsigned token;
+ size_t length;
+
+ // get runlength
+ token = *ip++;
+ if ((length=(token>>ML_BITS)) == RUN_MASK)
+ {
+ unsigned s=255;
+ while (((endOnInput)?ip<iend:1) && (s==255))
+ {
+ s = *ip++;
+ length += s;
+ }
+ }
+
+ // copy literals
+ cpy = op+length;
+ if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
+ || (ip+length>iend-(2+1+LASTLITERALS))) )
+ || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+ {
+ if (partialDecoding)
+ {
+ if (cpy > oend) goto _output_error;
+ // Error : write attempt beyond end of output buffer
+ if ((endOnInput) && (ip+length > iend)) goto _output_error;
+ // Error : read attempt beyond end of input buffer
+ }
+ else
+ {
+ if ((!endOnInput) && (cpy != oend)) goto _output_error;
+ // Error : block decoding must stop exactly there,
+ // due to parsing restrictions
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
+ goto _output_error;
+ // Error : not enough place for another match (min 4) + 5 literals
+ }
+ memcpy(op, ip, length);
+ ip += length;
+ op += length;
+ break;
+ // Necessarily EOF, due to parsing restrictions
+ }
+ LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+
+ // get offset
+ LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+ if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
+ goto _output_error; // Error : offset outside destination buffer
+
+ // get matchlength
+ if ((length=(token&ML_MASK)) == ML_MASK)
+ {
+ while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
+ // A minimum nb of input bytes must remain for LASTLITERALS + token
+ {
+ unsigned s = *ip++;
+ length += s;
+ if (s==255) continue;
+ break;
+ }
+ }
+
+ // copy repeated sequence
+ if unlikely((op-ref)<STEPSIZE)
+ {
+#if LZ4_ARCH64
+ size_t dec64 = dec64table[op-ref];
+#else
+ const size_t dec64 = 0;
+#endif
+ op[0] = ref[0];
+ op[1] = ref[1];
+ op[2] = ref[2];
+ op[3] = ref[3];
+ op += 4, ref += 4; ref -= dec32table[op-ref];
+ A32(op) = A32(ref);
+ op += STEPSIZE-4; ref -= dec64;
+ } else { LZ4_COPYSTEP(ref,op); }
+ cpy = op + length - (STEPSIZE-4);
+
+ if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
+ {
+ if (cpy > oend-LASTLITERALS) goto _output_error;
+ // Error : last 5 bytes must be literals
+ LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
+ while(op<cpy) *op++=*ref++;
+ op=cpy;
+ continue;
+ }
+ LZ4_WILDCOPY(ref, op, cpy);
+ op=cpy; // correction
+ }
+
+ // end of decoding
+ if (endOnInput)
+ return (int) (((char*)op)-dest); // Nb of output bytes decoded
+ else
+ return (int) (((char*)ip)-source); // Nb of input bytes read
+
+ // Overflow error detected
+_output_error:
+ return (int) (-(((char*)ip)-source))-1;
+}
+
+
+int
+LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
+ endOnInputSize, noPrefix, full, 0);
+}
diff --git a/sys/fs/hammer2/hammer2_lz4_encoder.h b/sys/fs/hammer2/hammer2_lz4_encoder.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_lz4_encoder.h
@@ -0,0 +1,467 @@
+/*
+ LZ4 Encoder - Part of LZ4 compression algorithm
+ Copyright (C) 2011-2013, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/* lz4_encoder.h must be included into lz4.c
+ The objective of this file is to create a single LZ4 compression function source
+ which will be instanciated multiple times with minor variations
+ depending on a set of #define.
+*/
+
+void*
+LZ4_create(void);
+int
+LZ4_free(void* ctx);
+
+int
+LZ4_compress_heap_limitedOutput(
+ void* ctx,
+ char* source,
+ char* dest,
+ int inputSize,
+ int maxOutputSize);
+
+int
+LZ4_compress64k_heap_limitedOutput(
+ void* ctx,
+ char* source,
+ char* dest,
+ int inputSize,
+ int maxOutputSize);
+
+
+//****************************
+// Local definitions
+//****************************
+
+#ifdef COMPRESS_64K
+# define HASHLOG (MEMORY_USAGE-1)
+# define CURRENT_H_TYPE U16
+# define CURRENTBASE(base) BYTE* base = ip
+#else
+# define HASHLOG (MEMORY_USAGE-2)
+# define CURRENT_H_TYPE HTYPE
+# define CURRENTBASE(base) INITBASE(base)
+#endif
+
+#define HASHTABLE_NBCELLS (1U<<HASHLOG)
+#define LZ4_HASH(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG))
+#define LZ4_HASHVALUE(p) LZ4_HASH(A32(p))
+
+
+
+//****************************
+// Function code
+//****************************
+
+int
+LZ4_compress_heap_limitedOutput(
+ void* ctx,
+ char* source,
+ char* dest,
+ int inputSize,
+ int maxOutputSize)
+{
+ CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
+
+ BYTE* ip = (BYTE*) source;
+ CURRENTBASE(base);
+ BYTE* anchor = ip;
+ BYTE* iend = ip + inputSize;
+ BYTE* mflimit = iend - MFLIMIT;
+#define matchlimit (iend - LASTLITERALS)
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + maxOutputSize;
+
+ int length;
+ int skipStrength = SKIPSTRENGTH;
+ U32 forwardH;
+
+
+ // Init
+ if (inputSize<MINLENGTH) goto _last_literals;
+
+ memset((void*)HashTable, 0, HASHTABLESIZE);
+
+ // First Byte
+ HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+ ip++;
+ forwardH = LZ4_HASHVALUE(ip);
+
+ // Main Loop
+ for ( ; ; )
+ {
+ int findMatchAttempts = (1U << skipStrength) + 3;
+ BYTE* forwardIp = ip;
+ BYTE* ref;
+ BYTE* token;
+
+ // Find a match
+ do {
+ U32 h = forwardH;
+ int step = findMatchAttempts++ >> skipStrength;
+ ip = forwardIp;
+ forwardIp = ip + step;
+
+ if unlikely(forwardIp > mflimit) {
+ goto _last_literals;
+ }
+
+ forwardH = LZ4_HASHVALUE(forwardIp);
+ ref = base + HashTable[h];
+ HashTable[h] = (CURRENT_H_TYPE)(ip - base);
+
+ } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+
+ // Catch up
+ while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) {
+ ip--;
+ ref--;
+ }
+
+ // Encode Literal length
+ length = (int)(ip - anchor);
+ token = op++;
+
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)
+ return 0; // Check output limit
+
+ if (length>=(int)RUN_MASK)
+ {
+ int len = length-RUN_MASK;
+ *token=(RUN_MASK<<ML_BITS);
+ for(; len >= 255 ; len-=255)
+ *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(length<<ML_BITS);
+
+ // Copy Literals
+ LZ4_BLINDCOPY(anchor, op, length);
+
+_next_match:
+ // Encode Offset
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+ // Start Counting
+ ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
+ anchor = ip;
+ while likely(ip<matchlimit-(STEPSIZE-1))
+ {
+ UARCH diff = AARCH(ref) ^ AARCH(ip);
+ if (!diff) {
+ ip+=STEPSIZE;
+ ref+=STEPSIZE;
+ continue;
+ }
+ ip += LZ4_NbCommonBytes(diff);
+ goto _endCount;
+ }
+ if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) {
+ ip+=4;
+ ref+=4;
+ }
+ if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) {
+ ip+=2;
+ ref+=2;
+ }
+ if ((ip<matchlimit) && (*ref == *ip))
+ ip++;
+_endCount:
+
+ // Encode MatchLength
+ length = (int)(ip - anchor);
+
+ if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)
+ return 0; // Check output limit
+
+ if (length>=(int)ML_MASK)
+ {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for (; length > 509 ; length-=510) {
+ *op++ = 255;
+ *op++ = 255;
+ }
+ if (length >= 255) {
+ length-=255;
+ *op++ = 255;
+ }
+ *op++ = (BYTE)length;
+ }
+ else *token += (BYTE)length;
+
+ // Test end of chunk
+ if (ip > mflimit) {
+ anchor = ip;
+ break;
+ }
+
+ // Fill table
+ HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
+
+ // Test next position
+ ref = base + HashTable[LZ4_HASHVALUE(ip)];
+ HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+ if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) {
+ token = op++;
+ *token=0;
+ goto _next_match;
+ }
+
+ // Prepare next loop
+ anchor = ip++;
+ forwardH = LZ4_HASHVALUE(ip);
+ }
+
+_last_literals:
+ // Encode Last Literals
+ {
+ int lastRun = (int)(iend - anchor);
+
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)
+ return 0; // Check output limit
+
+ if (lastRun>=(int)RUN_MASK) {
+ *op++=(RUN_MASK<<ML_BITS);
+ lastRun-=RUN_MASK;
+ for(; lastRun >= 255 ; lastRun-=255)
+ *op++ = 255;
+ *op++ = (BYTE) lastRun;
+ }
+ else *op++ = (BYTE)(lastRun<<ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend-anchor;
+ }
+
+ // End
+ return (int) (((char*)op)-dest);
+}
+
+int
+LZ4_compress64k_heap_limitedOutput(
+ void* ctx,
+ char* source,
+ char* dest,
+ int inputSize,
+ int maxOutputSize)
+{
+ CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
+
+ BYTE* ip = (BYTE*) source;
+ CURRENTBASE(base);
+ BYTE* anchor = ip;
+ BYTE* iend = ip + inputSize;
+ BYTE* mflimit = iend - MFLIMIT;
+#define matchlimit (iend - LASTLITERALS)
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + maxOutputSize;
+
+ int length;
+ int skipStrength = SKIPSTRENGTH;
+ U32 forwardH;
+
+
+ // Init
+ if (inputSize<MINLENGTH) goto _last_literals;
+
+ memset((void*)HashTable, 0, HASHTABLESIZE);
+
+ // First Byte
+ HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+ ip++;
+ forwardH = LZ4_HASHVALUE(ip);
+
+ // Main Loop
+ for ( ; ; )
+ {
+ int findMatchAttempts = (1U << skipStrength) + 3;
+ BYTE* forwardIp = ip;
+ BYTE* ref;
+ BYTE* token;
+
+ // Find a match
+ do {
+ U32 h = forwardH;
+ int step = findMatchAttempts++ >> skipStrength;
+ ip = forwardIp;
+ forwardIp = ip + step;
+
+ if unlikely(forwardIp > mflimit) {
+ goto _last_literals;
+ }
+
+ forwardH = LZ4_HASHVALUE(forwardIp);
+ ref = base + HashTable[h];
+ HashTable[h] = (CURRENT_H_TYPE)(ip - base);
+
+ } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+
+ // Catch up
+ while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) {
+ ip--;
+ ref--;
+ }
+
+ // Encode Literal length
+ length = (int)(ip - anchor);
+ token = op++;
+
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)
+ return 0; // Check output limit
+
+ if (length>=(int)RUN_MASK)
+ {
+ int len = length-RUN_MASK;
+ *token=(RUN_MASK<<ML_BITS);
+ for(; len >= 255 ; len-=255)
+ *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(length<<ML_BITS);
+
+ // Copy Literals
+ LZ4_BLINDCOPY(anchor, op, length);
+
+_next_match:
+ // Encode Offset
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+ // Start Counting
+ ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
+ anchor = ip;
+ while likely(ip<matchlimit-(STEPSIZE-1))
+ {
+ UARCH diff = AARCH(ref) ^ AARCH(ip);
+ if (!diff) {
+ ip+=STEPSIZE;
+ ref+=STEPSIZE;
+ continue;
+ }
+ ip += LZ4_NbCommonBytes(diff);
+ goto _endCount;
+ }
+ if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) {
+ ip+=4;
+ ref+=4;
+ }
+ if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) {
+ ip+=2;
+ ref+=2;
+ }
+ if ((ip<matchlimit) && (*ref == *ip))
+ ip++;
+_endCount:
+
+ // Encode MatchLength
+ length = (int)(ip - anchor);
+
+ if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)
+ return 0; // Check output limit
+
+ if (length>=(int)ML_MASK)
+ {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for (; length > 509 ; length-=510) {
+ *op++ = 255;
+ *op++ = 255;
+ }
+ if (length >= 255) {
+ length-=255;
+ *op++ = 255;
+ }
+ *op++ = (BYTE)length;
+ }
+ else *token += (BYTE)length;
+
+ // Test end of chunk
+ if (ip > mflimit) {
+ anchor = ip;
+ break;
+ }
+
+ // Fill table
+ HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
+
+ // Test next position
+ ref = base + HashTable[LZ4_HASHVALUE(ip)];
+ HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+ if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) {
+ token = op++;
+ *token=0;
+ goto _next_match;
+ }
+
+ // Prepare next loop
+ anchor = ip++;
+ forwardH = LZ4_HASHVALUE(ip);
+ }
+
+_last_literals:
+ // Encode Last Literals
+ {
+ int lastRun = (int)(iend - anchor);
+
+ if (((char*)op - dest) + lastRun + 1 +
+ ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)
+ return 0; // Check output limit
+
+ if (lastRun>=(int)RUN_MASK) {
+ *op++=(RUN_MASK<<ML_BITS);
+ lastRun-=RUN_MASK;
+ for(; lastRun >= 255 ; lastRun-=255)
+ *op++ = 255;
+ *op++ = (BYTE) lastRun;
+ }
+ else *op++ = (BYTE)(lastRun<<ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend-anchor;
+ }
+
+ // End
+ return (int) (((char*)op)-dest);
+}
+
+//****************************
+// Clean defines
+//****************************
+
+// Locally Generated
+#undef HASHLOG
+#undef HASHTABLE_NBCELLS
+#undef LZ4_HASH
+#undef LZ4_HASHVALUE
+#undef CURRENT_H_TYPE
+#undef CURRENTBASE
diff --git a/sys/fs/hammer2/hammer2_mount.h b/sys/fs/hammer2/hammer2_mount.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_mount.h
@@ -0,0 +1,45 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_HAMMER2_MOUNT_H_
+#define _FS_HAMMER2_MOUNT_H_
+
+#define HMNT2_LOCAL 0x00000002
+
+#define HMNT2_DEVFLAGS (HMNT2_LOCAL)
+
+#endif /* !_FS_HAMMER2_MOUNT_H_ */
diff --git a/sys/fs/hammer2/hammer2_ondisk.c b/sys/fs/hammer2/hammer2_ondisk.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_ondisk.c
@@ -0,0 +1,758 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/queue.h>
+#include <sys/uuid.h>
+#include <sys/vnode.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include "hammer2.h"
+
+static int
+hammer2_lookup_device(const struct mount *mp, const char *path,
+ struct vnode **devvpp)
+{
+ struct nameidata nd, *ndp = &nd;
+ struct vnode *devvp;
+ struct thread *td = curthread;
+ accmode_t accmode;
+ int error;
+
+ KKASSERT(path);
+ KKASSERT(*path != '\0');
+
+ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path);
+ if ((error = namei(ndp)) != 0)
+ return (error);
+ NDFREE_PNBUF(ndp);
+ devvp = ndp->ni_vp;
+ KKASSERT(devvp);
+
+ if (!vn_isdisk_error(devvp, &error)) {
+ KKASSERT(error);
+ hprintf("%s not a block device %d\n", path, error);
+ vput(devvp);
+ return (error);
+ }
+
+ accmode = VREAD;
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ accmode |= VWRITE;
+
+ error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
+ if (error)
+ error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+ if (error) {
+ vput(devvp);
+ return (error);
+ }
+ VOP_UNLOCK(devvp);
+ *devvpp = devvp;
+
+ return (error);
+}
+
+int
+hammer2_open_devvp(struct mount *mp, const hammer2_devvp_list_t *devvpl)
+{
+ hammer2_devvp_t *e;
+ struct vnode *devvp;
+ struct bufobj *bo;
+ struct g_consumer *cp;
+ int lblksize, error;
+
+ TAILQ_FOREACH(e, devvpl, entry) {
+ devvp = e->devvp;
+ KKASSERT(devvp);
+
+ /* XXX: use VOP_ACESS to check FS perms */
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ g_topology_lock();
+ error = g_vfs_open(devvp, &cp, "hammer2", 0);
+ g_topology_unlock();
+ VOP_UNLOCK(devvp);
+ if (error)
+ return (error);
+
+ bo = &devvp->v_bufobj;
+ bo->bo_private = cp;
+ bo->bo_ops = g_vfs_bufops;
+ if (devvp->v_rdev->si_iosize_max != 0)
+ mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
+
+ lblksize = hammer2_get_logical();
+ if ((lblksize % cp->provider->sectorsize) != 0 ||
+ lblksize < cp->provider->sectorsize) {
+ hprintf("invalid sector size %d vs lblksize %d\n",
+ cp->provider->sectorsize, lblksize);
+ return (EINVAL);
+ }
+
+ e->open = 1;
+ KKASSERT(e->open);
+ }
+
+ return (0);
+}
+
+int
+hammer2_close_devvp(const hammer2_devvp_list_t *devvpl)
+{
+ hammer2_devvp_t *e;
+ struct g_consumer *cp;
+
+ TAILQ_FOREACH(e, devvpl, entry) {
+ if (e->open) {
+ g_topology_lock();
+ cp = e->devvp->v_bufobj.bo_private;
+ KASSERT(cp, ("NULL GEOM consumer"));
+ g_vfs_close(cp);
+ g_topology_unlock();
+ e->open = 0;
+ }
+ }
+
+ return (0);
+}
+
+int
+hammer2_init_devvp(const struct mount *mp, const char *blkdevs,
+ hammer2_devvp_list_t *devvpl)
+{
+ hammer2_devvp_t *e;
+ struct vnode *devvp;
+ const char *p;
+ char *path;
+ int i, error = 0;
+
+ KKASSERT(TAILQ_EMPTY(devvpl));
+ KKASSERT(blkdevs); /* Could be empty string. */
+ p = blkdevs;
+
+ path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
+ while (1) {
+ strcpy(path, "");
+ if (*p != '/')
+ strcpy(path, "/dev/"); /* Relative path. */
+
+ /* Scan beyond "/dev/". */
+ for (i = strlen(path); i < MAXPATHLEN-1; ++i) {
+ if (*p == '\0') {
+ break;
+ } else if (*p == ':') {
+ p++;
+ break;
+ } else {
+ path[i] = *p;
+ p++;
+ }
+ }
+ path[i] = '\0';
+ /* Path shorter than "/dev/" means invalid or done. */
+ if (strlen(path) <= strlen("/dev/")) {
+ if (strlen(p)) {
+ hprintf("ignore incomplete path %s\n", path);
+ continue;
+ } else {
+ /* End of string. */
+ KKASSERT(*p == '\0');
+ break;
+ }
+ }
+
+ /* Lookup path for device vnode. */
+ KKASSERT(strncmp(path, "/dev/", 5) == 0);
+ devvp = NULL;
+ error = hammer2_lookup_device(mp, path, &devvp);
+ if (error) {
+ KKASSERT(!devvp);
+ hprintf("failed to lookup %s %d\n", path, error);
+ break;
+ }
+ KKASSERT(devvp);
+
+ /* Keep device vnode and path. */
+ e = malloc(sizeof(*e), M_HAMMER2, M_WAITOK | M_ZERO);
+ e->devvp = devvp;
+ e->path = strdup(path, M_HAMMER2);
+ TAILQ_INSERT_TAIL(devvpl, e, entry);
+ }
+
+ return (error);
+}
+
+void
+hammer2_cleanup_devvp(hammer2_devvp_list_t *devvpl)
+{
+ hammer2_devvp_t *e;
+
+ while (!TAILQ_EMPTY(devvpl)) {
+ e = TAILQ_FIRST(devvpl);
+ TAILQ_REMOVE(devvpl, e, entry);
+
+ /* Cleanup device vnode. */
+ KKASSERT(e->devvp);
+ vrele(e->devvp);
+ e->devvp = NULL;
+
+ /* Cleanup path. */
+ KKASSERT(e->path);
+ free(e->path, M_HAMMER2);
+ e->path = NULL;
+
+ free(e, M_HAMMER2);
+ }
+}
+
+static int
+hammer2_verify_volumes_common(const hammer2_volume_t *volumes)
+{
+ const hammer2_volume_t *vol;
+ const struct g_consumer *cp;
+ const char *path;
+ int i;
+
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &volumes[i];
+ if (vol->id == -1)
+ continue;
+ path = vol->dev->path;
+
+ /* Check volume fields are initialized. */
+ if (!vol->dev->devvp) {
+ hprintf("%s has NULL devvp\n", path);
+ return (EINVAL);
+ }
+ if (vol->offset == (hammer2_off_t)-1) {
+ hprintf("%s has bad offset %016jx\n",
+ path, (intmax_t)vol->offset);
+ return (EINVAL);
+ }
+ if (vol->size == (hammer2_off_t)-1) {
+ hprintf("%s has bad size %016jx\n",
+ path, (intmax_t)vol->size);
+ return (EINVAL);
+ }
+
+ /* Check volume size vs block device size. */
+ cp = vol->dev->devvp->v_bufobj.bo_private;
+ KASSERT(cp, ("NULL GEOM consumer"));
+ KASSERT(cp->provider, ("NULL GEOM provider"));
+ if (vol->size > cp->provider->mediasize) {
+ hprintf("%s's size %016jx exceeds device size %016jx\n",
+ path, (intmax_t)vol->size, cp->provider->mediasize);
+ return (EINVAL);
+ }
+ }
+
+ return (0);
+}
+
+static int
+hammer2_verify_volumes_1(const hammer2_volume_t *volumes,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ const hammer2_volume_t *vol;
+ hammer2_off_t off;
+ const char *path;
+ int i, nvolumes = 0;
+
+ /* Check initialized volume count. */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &volumes[i];
+ if (vol->id != -1)
+ nvolumes++;
+ }
+ if (nvolumes != 1) {
+ hprintf("only 1 volume supported\n");
+ return (EINVAL);
+ }
+
+ /* Check volume header. */
+ if (rootvoldata->volu_id) {
+ hprintf("volume id %d must be 0\n", rootvoldata->volu_id);
+ return (EINVAL);
+ }
+ if (rootvoldata->nvolumes) {
+ hprintf("volume count %d must be 0\n", rootvoldata->nvolumes);
+ return (EINVAL);
+ }
+ if (rootvoldata->total_size) {
+ hprintf("total size %016jx must be 0\n",
+ (intmax_t)rootvoldata->total_size);
+ return (EINVAL);
+ }
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off) {
+ hprintf("volume offset[%d] %016jx must be 0\n",
+ i, (intmax_t)off);
+ return (EINVAL);
+ }
+ }
+
+ /* Check volume. */
+ vol = &volumes[0];
+ path = vol->dev->path;
+ if (vol->id) {
+ hprintf("%s has non zero id %d\n", path, vol->id);
+ return (EINVAL);
+ }
+ if (vol->offset) {
+ hprintf("%s has non zero offset %016jx\n",
+ path, (intmax_t)vol->offset);
+ return (EINVAL);
+ }
+ if (vol->size & HAMMER2_VOLUME_ALIGNMASK64) {
+ hprintf("%s's size is not %016jx aligned\n",
+ path, (intmax_t)HAMMER2_VOLUME_ALIGN);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+hammer2_verify_volumes_2(const hammer2_volume_t *volumes,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ const hammer2_volume_t *vol;
+ hammer2_off_t off, total_size = 0;
+ const char *path;
+ int i, nvolumes = 0;
+
+ /* Check initialized volume count. */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &volumes[i];
+ if (vol->id != -1) {
+ nvolumes++;
+ total_size += vol->size;
+ }
+ }
+
+ /* Check volume header. */
+ if (rootvoldata->volu_id != HAMMER2_ROOT_VOLUME) {
+ hprintf("volume id %d must be %d\n",
+ rootvoldata->volu_id, HAMMER2_ROOT_VOLUME);
+ return (EINVAL);
+ }
+ if (rootvoldata->nvolumes != nvolumes) {
+ hprintf("volume header requires %d devices, %d specified\n",
+ rootvoldata->nvolumes, nvolumes);
+ return (EINVAL);
+ }
+ if (rootvoldata->total_size != total_size) {
+ hprintf("total size %016jx does not equal sum of volumes "
+ "%016jx\n",
+ rootvoldata->total_size, total_size);
+ return (EINVAL);
+ }
+ for (i = 0; i < nvolumes; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off == (hammer2_off_t)-1) {
+ hprintf("volume offset[%d] %016jx must not be -1\n",
+ i, (intmax_t)off);
+ return (EINVAL);
+ }
+ }
+ for (i = nvolumes; i < HAMMER2_MAX_VOLUMES; ++i) {
+ off = rootvoldata->volu_loff[i];
+ if (off != (hammer2_off_t)-1) {
+ hprintf("volume offset[%d] %016jx must be -1\n",
+ i, (intmax_t)off);
+ return (EINVAL);
+ }
+ }
+
+ /* Check volumes. */
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &volumes[i];
+ if (vol->id == -1)
+ continue;
+ path = vol->dev->path;
+ /* Check offset. */
+ if (vol->offset & HAMMER2_FREEMAP_LEVEL1_MASK) {
+ hprintf("%s's offset %016jx not %016jx aligned\n",
+ path, (intmax_t)vol->offset,
+ HAMMER2_FREEMAP_LEVEL1_SIZE);
+ return (EINVAL);
+ }
+ /* Check vs previous volume. */
+ if (i) {
+ if (vol->id <= (vol-1)->id) {
+ hprintf("%s has inconsistent id %d\n",
+ path, vol->id);
+ return (EINVAL);
+ }
+ if (vol->offset != (vol-1)->offset + (vol-1)->size) {
+ hprintf("%s has inconsistent offset %016jx\n",
+ path, (intmax_t)vol->offset);
+ return (EINVAL);
+ }
+ } else { /* first */
+ if (vol->offset) {
+ hprintf("%s has non zero offset %016jx\n",
+ path, (intmax_t)vol->offset);
+ return (EINVAL);
+ }
+ }
+ /* Check size for non-last and last volumes. */
+ if (i != rootvoldata->nvolumes - 1) {
+ if (vol->size < HAMMER2_FREEMAP_LEVEL1_SIZE) {
+ hprintf("%s's size must be >= %016jx\n",
+ path,
+ (intmax_t)HAMMER2_FREEMAP_LEVEL1_SIZE);
+ return (EINVAL);
+ }
+ if (vol->size & HAMMER2_FREEMAP_LEVEL1_MASK) {
+ hprintf("%s's size is not %016jx aligned\n",
+ path,
+ (intmax_t)HAMMER2_FREEMAP_LEVEL1_SIZE);
+ return (EINVAL);
+ }
+ } else { /* last */
+ if (vol->size & HAMMER2_VOLUME_ALIGNMASK64) {
+ hprintf("%s's size is not %016jx aligned\n",
+ path,
+ (intmax_t)HAMMER2_VOLUME_ALIGN);
+ return (EINVAL);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+hammer2_verify_volumes(const hammer2_volume_t *volumes,
+ const hammer2_volume_data_t *rootvoldata)
+{
+ int error;
+
+ error = hammer2_verify_volumes_common(volumes);
+ if (error)
+ return (error);
+
+ if (rootvoldata->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES)
+ return (hammer2_verify_volumes_2(volumes, rootvoldata));
+ else
+ return (hammer2_verify_volumes_1(volumes, rootvoldata));
+}
+
+/*
+ * Returns zone# of returned volume header or < 0 on failure.
+ */
+static int
+hammer2_read_volume_header(struct vnode *devvp, const char *path,
+ hammer2_volume_data_t *voldata)
+{
+ hammer2_volume_data_t *vd;
+ hammer2_crc32_t crc0, crc1;
+ const struct g_consumer *cp;
+ struct buf *bp = NULL;
+ off_t blkoff;
+ daddr_t blkno;
+ int i, zone = -1;
+
+ cp = devvp->v_bufobj.bo_private;
+ KASSERT(cp, ("NULL GEOM consumer"));
+
+ /*
+ * There are up to 4 copies of the volume header (syncs iterate
+ * between them so there is no single master). We don't trust the
+ * volu_size field so we don't know precisely how large the filesystem
+ * is, so depend on the OS to return an error if we go beyond the
+ * block device's EOF.
+ */
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
+ /* Ignore if blkoff is beyond media size. */
+ blkoff = (off_t)i * HAMMER2_ZONE_BYTES64;
+ if (blkoff >= cp->provider->mediasize)
+ continue;
+
+ /* FreeBSD bread(9) doesn't fail with blkno beyond its size. */
+ blkno = blkoff / DEV_BSIZE;
+ if (bread(devvp, blkno, HAMMER2_VOLUME_BYTES, NOCRED, &bp)) {
+ bp = NULL;
+ continue;
+ }
+
+ vd = (struct hammer2_volume_data *)bp->b_data;
+ /* Verify volume header magic. */
+ if ((vd->magic != HAMMER2_VOLUME_ID_HBO) &&
+ (vd->magic != HAMMER2_VOLUME_ID_ABO)) {
+ hprintf("%s #%d: bad magic\n", path, i);
+ brelse(bp);
+ bp = NULL;
+ continue;
+ }
+ if (vd->magic == HAMMER2_VOLUME_ID_ABO) {
+ /* XXX: Reversed-endianness filesystem. */
+ hprintf("%s #%d: reverse-endian filesystem detected\n",
+ path, i);
+ brelse(bp);
+ bp = NULL;
+ continue;
+ }
+
+ /* Verify volume header CRC's. */
+ crc0 = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0];
+ crc1 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF,
+ HAMMER2_VOLUME_ICRC0_SIZE);
+ if (crc0 != crc1) {
+ hprintf("%s #%d: volume header crc mismatch sect0 "
+ "%08x/%08x\n",
+ path, i, crc0, crc1);
+ brelse(bp);
+ bp = NULL;
+ continue;
+ }
+ crc0 = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1];
+ crc1 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF,
+ HAMMER2_VOLUME_ICRC1_SIZE);
+ if (crc0 != crc1) {
+ hprintf("%s #%d: volume header crc mismatch sect1 "
+ "%08x/%08x\n",
+ path, i, crc0, crc1);
+ brelse(bp);
+ bp = NULL;
+ continue;
+ }
+ crc0 = vd->icrc_volheader;
+ crc1 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRCVH_OFF,
+ HAMMER2_VOLUME_ICRCVH_SIZE);
+ if (crc0 != crc1) {
+ hprintf("%s #%d: volume header crc mismatch vh "
+ "%08x/%08x\n",
+ path, i, crc0, crc1);
+ brelse(bp);
+ bp = NULL;
+ continue;
+ }
+
+ if (zone == -1 || voldata->mirror_tid < vd->mirror_tid) {
+ *voldata = *vd;
+ zone = i;
+ }
+ brelse(bp);
+ bp = NULL;
+ }
+
+ if (zone == -1) {
+ hprintf("%s has no valid volume headers\n", path);
+ return (-EINVAL);
+ }
+ return (zone);
+}
+
+static void
+hammer2_print_uuid_mismatch(struct uuid *uuid1, struct uuid *uuid2,
+ const char *id)
+{
+ char buf1[64], buf2[64];
+
+ snprintf_uuid(buf1, sizeof(buf1), uuid1);
+ snprintf_uuid(buf2, sizeof(buf2), uuid2);
+
+ hprintf("%s uuid mismatch %s vs %s\n", id, buf1, buf2);
+}
+
+int
+hammer2_init_volumes(const hammer2_devvp_list_t *devvpl,
+ hammer2_volume_t *volumes, hammer2_volume_data_t *rootvoldata,
+ struct vnode **rootvoldevvp)
+{
+ hammer2_volume_data_t *voldata;
+ hammer2_volume_t *vol;
+ hammer2_devvp_t *e;
+ struct vnode *devvp;
+ struct uuid fsid, fstype;
+ const char *path;
+ int i, error = 0, version = -1, nvolumes = 0;
+ int zone __diagused;
+
+ for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
+ vol = &volumes[i];
+ vol->dev = NULL;
+ vol->id = -1;
+ vol->offset = (hammer2_off_t)-1;
+ vol->size = (hammer2_off_t)-1;
+ }
+
+ voldata = malloc(sizeof(*voldata), M_HAMMER2, M_WAITOK | M_ZERO);
+ bzero(&fsid, sizeof(fsid));
+ bzero(&fstype, sizeof(fstype));
+ bzero(rootvoldata, sizeof(*rootvoldata));
+
+ TAILQ_FOREACH(e, devvpl, entry) {
+ devvp = e->devvp;
+ path = e->path;
+ KKASSERT(devvp);
+
+ /* Returns negative error or positive zone#. */
+ error = hammer2_read_volume_header(devvp, path, voldata);
+ if (error < 0) {
+ hprintf("failed to read %s's volume header\n", path);
+ error = -error;
+ goto done;
+ }
+ zone = error;
+ error = 0; /* Reset error. */
+
+ /* Check volume ID. */
+ if (voldata->volu_id >= HAMMER2_MAX_VOLUMES) {
+ hprintf("%s has bad volume id %d\n",
+ path, voldata->volu_id);
+ error = EINVAL;
+ goto done;
+ }
+ vol = &volumes[voldata->volu_id];
+ if (vol->id != -1) {
+ hprintf("volume id %d already initialized\n",
+ voldata->volu_id);
+ error = EINVAL;
+ goto done;
+ }
+
+ /* All headers must have the same version, nvolumes and uuid. */
+ if (version == -1) {
+ version = voldata->version;
+ nvolumes = voldata->nvolumes;
+ fsid = voldata->fsid;
+ fstype = voldata->fstype;
+ } else {
+ if (version != (int)voldata->version) {
+ hprintf("volume version mismatch %d vs %d\n",
+ version, (int)voldata->version);
+ error = ENXIO;
+ goto done;
+ }
+ if (nvolumes != voldata->nvolumes) {
+ hprintf("volume count mismatch %d vs %d\n",
+ nvolumes, voldata->nvolumes);
+ error = ENXIO;
+ goto done;
+ }
+ if (bcmp(&fsid, &voldata->fsid, sizeof(fsid))) {
+ hammer2_print_uuid_mismatch(&fsid,
+ &voldata->fsid, "fsid");
+ error = ENXIO;
+ goto done;
+ }
+ if (bcmp(&fstype, &voldata->fstype, sizeof(fstype))) {
+ hammer2_print_uuid_mismatch(&fstype,
+ &voldata->fstype, "fstype");
+ error = ENXIO;
+ goto done;
+ }
+ }
+ if (version < HAMMER2_VOL_VERSION_MIN ||
+ version > HAMMER2_VOL_VERSION_WIP) {
+ hprintf("bad volume version %d\n", version);
+ error = EINVAL;
+ goto done;
+ }
+
+ /* All per-volume tests passed. */
+ vol->dev = e;
+ vol->id = voldata->volu_id;
+ vol->offset = voldata->volu_loff[vol->id];
+ vol->size = voldata->volu_size;
+ if (vol->id == HAMMER2_ROOT_VOLUME) {
+ bcopy(voldata, rootvoldata, sizeof(*rootvoldata));
+ KKASSERT(*rootvoldevvp == NULL);
+ *rootvoldevvp = devvp;
+ }
+ debug_hprintf("\"%s\" zone=%d id=%d offset=%016jx size=%016jx\n",
+ path, zone, vol->id, (intmax_t)vol->offset,
+ (intmax_t)vol->size);
+ }
+done:
+ if (!error) {
+ if (!rootvoldata->version) {
+ hprintf("root volume not found\n");
+ error = EINVAL;
+ }
+ if (!error)
+ error = hammer2_verify_volumes(volumes, rootvoldata);
+ }
+ free(voldata, M_HAMMER2);
+
+ return (error);
+}
+
+hammer2_volume_t*
+hammer2_get_volume(hammer2_dev_t *hmp, hammer2_off_t offset)
+{
+ hammer2_volume_t *vol, *ret = NULL;
+ int i;
+
+ offset &= ~HAMMER2_OFF_MASK_RADIX;
+
+ /* Do binary search if users really use this many supported volumes. */
+ for (i = 0; i < hmp->nvolumes; ++i) {
+ vol = &hmp->volumes[i];
+ if ((offset >= vol->offset) &&
+ (offset < vol->offset + vol->size)) {
+ ret = vol;
+ break;
+ }
+ }
+
+ if (!ret)
+ hpanic("no volume for offset %016jx", (intmax_t)offset);
+
+ KKASSERT(ret);
+ KKASSERT(ret->dev);
+ KKASSERT(ret->dev->devvp);
+ KKASSERT(ret->dev->path);
+
+ return (ret);
+}
diff --git a/sys/fs/hammer2/hammer2_rb.h b/sys/fs/hammer2/hammer2_rb.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_rb.h
@@ -0,0 +1,140 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_HAMMER2_RB_H_
+#define _FS_HAMMER2_RB_H_
+
+/* prototype */
+#define RB_SCAN_INFO(name, type) \
+struct name##_scan_info { \
+ struct name##_scan_info *link; \
+ struct type *node; \
+}
+
+#define RB_PROTOTYPE_SCAN(name, type, field) \
+ _RB_PROTOTYPE_SCAN(name, type, field,)
+
+#define RB_PROTOTYPE_SCAN_STATIC(name, type, field) \
+ _RB_PROTOTYPE_SCAN(name, type, field, __unused static)
+
+#define _RB_PROTOTYPE_SCAN(name, type, field, STORQUAL) \
+STORQUAL int name##_RB_SCAN(struct name *, int (*)(struct type *, void *),\
+ int (*)(struct type *, void *), void *); \
+RB_SCAN_INFO(name, type)
+
+/* generate */
+#define RB_GENERATE_SCAN(name, type, field) \
+ _RB_GENERATE_SCAN(name, type, field,)
+
+#define RB_GENERATE_SCAN_STATIC(name, type, field) \
+ _RB_GENERATE_SCAN(name, type, field, __unused static)
+
+#define _RB_GENERATE_SCAN(name, type, field, STORQUAL) \
+/* \
+ * Issue a callback for all matching items. The scan function must \
+ * return < 0 for items below the desired range, 0 for items within \
+ * the range, and > 0 for items beyond the range. Any item may be \
+ * deleted while the scan is in progress. \
+ */ \
+static int \
+name##_SCANCMP_ALL(struct type *type __unused, void *data __unused) \
+{ \
+ return (0); \
+} \
+ \
+static __inline int \
+_##name##_RB_SCAN(struct name *head, \
+ int (*scancmp)(struct type *, void *), \
+ int (*callback)(struct type *, void *), \
+ void *data) \
+{ \
+ struct name##_scan_info info; \
+ struct type *best; \
+ struct type *tmp; \
+ int count; \
+ int comp; \
+ \
+ if (scancmp == NULL) \
+ scancmp = name##_SCANCMP_ALL; \
+ \
+ /* \
+ * Locate the first element. \
+ */ \
+ tmp = RB_ROOT(head); \
+ best = NULL; \
+ while (tmp) { \
+ comp = scancmp(tmp, data); \
+ if (comp < 0) { \
+ tmp = RB_RIGHT(tmp, field); \
+ } else if (comp > 0) { \
+ tmp = RB_LEFT(tmp, field); \
+ } else { \
+ best = tmp; \
+ if (RB_LEFT(tmp, field) == NULL) \
+ break; \
+ tmp = RB_LEFT(tmp, field); \
+ } \
+ } \
+ count = 0; \
+ if (best) { \
+ info.node = RB_NEXT(name, head, best); \
+ while ((comp = callback(best, data)) >= 0) { \
+ count += comp; \
+ best = info.node; \
+ if (best == NULL || scancmp(best, data) != 0) \
+ break; \
+ info.node = RB_NEXT(name, head, best); \
+ } \
+ if (comp < 0) /* error or termination */ \
+ count = comp; \
+ } \
+ return (count); \
+} \
+ \
+STORQUAL int \
+name##_RB_SCAN(struct name *head, \
+ int (*scancmp)(struct type *, void *), \
+ int (*callback)(struct type *, void *), \
+ void *data) \
+{ \
+ return _##name##_RB_SCAN(head, scancmp, callback, data); \
+}
+
+#define RB_SCAN(name, root, cmp, callback, data) \
+ name##_RB_SCAN(root, cmp, callback, data)
+
+#endif /* !_FS_HAMMER2_RB_H_ */
diff --git a/sys/fs/hammer2/hammer2_strategy.c b/sys/fs/hammer2/hammer2_strategy.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_strategy.c
@@ -0,0 +1,273 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+
+#include <vm/uma.h>
+
+#include "hammer2.h"
+#include "hammer2_lz4.h"
+
+#include "zlib/hammer2_zlib.h"
+
+static int hammer2_strategy_read(struct vop_strategy_args *);
+static void hammer2_strategy_read_completion(hammer2_chain_t *,
+ const char *, struct buf *);
+
+int
+hammer2_strategy(struct vop_strategy_args *ap)
+{
+ struct buf *bp = ap->a_bp;
+
+ switch (bp->b_iocmd) {
+ case BIO_READ:
+ hammer2_strategy_read(ap);
+ break;
+ default:
+ bp->b_error = EOPNOTSUPP;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * Callback used in read path in case that a block is compressed with LZ4.
+ */
+static void
+hammer2_decompress_LZ4_callback(const char *data, unsigned int bytes,
+ struct buf *bp)
+{
+ char *compressed_buffer;
+ int compressed_size;
+ int result;
+
+ KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
+ compressed_size = *(const int *)data;
+ KKASSERT((uint32_t)compressed_size <= bytes - sizeof(int));
+
+ compressed_buffer = uma_zalloc(zone_buffer_read, M_WAITOK);
+ result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
+ compressed_buffer, compressed_size, bp->b_bufsize);
+ if (result < 0) {
+ hprintf("error during decompression: buf %016jx/%d\n",
+ (intmax_t)bp->b_offset, bytes);
+ /* Make sure it isn't random garbage. */
+ bzero(compressed_buffer, bp->b_bufsize);
+ }
+
+ KKASSERT(result <= bp->b_bufsize);
+ bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
+ if (result < bp->b_bufsize)
+ bzero(bp->b_data + result, bp->b_bufsize - result);
+ uma_zfree(zone_buffer_read, compressed_buffer);
+ bp->b_resid = 0;
+}
+
+/*
+ * Callback used in read path in case that a block is compressed with ZLIB.
+ */
+static void
+hammer2_decompress_ZLIB_callback(const char *data, unsigned int bytes,
+ struct buf *bp)
+{
+ char *compressed_buffer;
+ z_stream strm_decompress;
+ int result;
+
+ KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
+ strm_decompress.avail_in = 0;
+ strm_decompress.next_in = Z_NULL;
+
+ result = inflateInit(&strm_decompress);
+ if (result != Z_OK)
+ hprintf("fatal error in inflateInit\n");
+
+ compressed_buffer = uma_zalloc(zone_buffer_read, M_WAITOK);
+ strm_decompress.next_in = __DECONST(char *, data);
+
+ /* XXX Supply proper size, subset of device bp. */
+ strm_decompress.avail_in = bytes;
+ strm_decompress.next_out = compressed_buffer;
+ strm_decompress.avail_out = bp->b_bufsize;
+
+ result = inflate(&strm_decompress, Z_FINISH);
+ if (result != Z_STREAM_END) {
+ hprintf("fatal error during decompression\n");
+ bzero(compressed_buffer, bp->b_bufsize);
+ }
+ bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
+ result = bp->b_bufsize - strm_decompress.avail_out;
+ if (result < bp->b_bufsize)
+ bzero(bp->b_data + result, strm_decompress.avail_out);
+ uma_zfree(zone_buffer_read, compressed_buffer);
+ inflateEnd(&strm_decompress);
+
+ bp->b_resid = 0;
+}
+
+/*
+ * Logical buffer I/O.
+ */
+static int
+hammer2_strategy_read(struct vop_strategy_args *ap)
+{
+ hammer2_xop_strategy_t *xop;
+ hammer2_inode_t *ip = VTOI(ap->a_vp);
+ struct buf *bp = ap->a_bp;
+ hammer2_key_t lbase;
+
+ lbase = bp->b_offset;
+ KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
+
+ xop = hammer2_xop_alloc(ip);
+ xop->bp = bp;
+ xop->lbase = lbase;
+ hammer2_xop_start(&xop->head, &hammer2_strategy_read_desc);
+
+ hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+
+ return (0);
+}
+
+/*
+ * Backend for hammer2_strategy_read().
+ * Do a synchronous lookup of the chain and its data.
+ */
+void
+hammer2_xop_strategy_read(hammer2_xop_t *arg, int clindex)
+{
+ hammer2_xop_strategy_t *xop = &arg->xop_strategy;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t lbase, key_dummy;
+ struct buf *bp;
+ const char *data;
+ int error;
+
+ lbase = xop->lbase;
+
+ parent = hammer2_inode_chain(xop->head.ip1, clindex,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ if (parent) {
+ chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase,
+ &error, HAMMER2_LOOKUP_ALWAYS | HAMMER2_LOOKUP_SHARED);
+ if (chain)
+ error = chain->error;
+ } else {
+ error = HAMMER2_ERROR_EIO;
+ chain = NULL;
+ }
+ error = hammer2_xop_feed(&xop->head, chain, clindex, error);
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+
+ bp = xop->bp;
+ error = hammer2_xop_collect(&xop->head, 0);
+ switch (error) {
+ case 0:
+ data = hammer2_xop_gdata(&xop->head)->buf;
+ hammer2_strategy_read_completion(xop->head.cluster.focus,
+ data, xop->bp);
+ hammer2_xop_pdata(&xop->head);
+ bufdone(bp);
+ break;
+ case HAMMER2_ERROR_ENOENT:
+ bp->b_resid = 0;
+ bp->b_error = 0;
+ bzero(bp->b_data, bp->b_bcount);
+ bufdone(bp);
+ break;
+ default:
+ hprintf("error %08x at b_offset %016jx\n", error, bp->b_offset);
+ bp->b_error = EIO;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ break;
+ }
+}
+
+static void
+hammer2_strategy_read_completion(hammer2_chain_t *focus, const char *data,
+ struct buf *bp)
+{
+ if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) {
+ /* Copy from in-memory inode structure. */
+ bcopy(((const hammer2_inode_data_t *)data)->u.data, bp->b_data,
+ HAMMER2_EMBEDDED_BYTES);
+ bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
+ bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
+ bp->b_resid = 0;
+ bp->b_error = 0;
+ } else if (focus->bref.type == HAMMER2_BREF_TYPE_DATA) {
+ atomic_set_int(&focus->flags, HAMMER2_CHAIN_RELEASE);
+ /* Decompression and copy. */
+ switch (HAMMER2_DEC_COMP(focus->bref.methods)) {
+ case HAMMER2_COMP_LZ4:
+ hammer2_decompress_LZ4_callback(data, focus->bytes, bp);
+ /* b_resid set by call */
+ break;
+ case HAMMER2_COMP_ZLIB:
+ hammer2_decompress_ZLIB_callback(data, focus->bytes, bp);
+ /* b_resid set by call */
+ break;
+ case HAMMER2_COMP_NONE:
+ KKASSERT(focus->bytes <= bp->b_bcount);
+ bcopy(data, bp->b_data, focus->bytes);
+ if (focus->bytes < bp->b_bcount)
+ bzero(bp->b_data + focus->bytes,
+ bp->b_bcount - focus->bytes);
+ bp->b_resid = 0;
+ bp->b_error = 0;
+ break;
+ default:
+ hpanic("unknown compression type");
+ }
+ } else {
+ hpanic("unknown blockref type %d", focus->bref.type);
+ }
+}
diff --git a/sys/fs/hammer2/hammer2_subr.c b/sys/fs/hammer2/hammer2_subr.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_subr.c
@@ -0,0 +1,231 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/uuid.h>
+
+#include "hammer2.h"
+
+int
+hammer2_get_dtype(uint8_t type)
+{
+ switch (type) {
+ case HAMMER2_OBJTYPE_UNKNOWN:
+ return (DT_UNKNOWN);
+ case HAMMER2_OBJTYPE_DIRECTORY:
+ return (DT_DIR);
+ case HAMMER2_OBJTYPE_REGFILE:
+ return (DT_REG);
+ case HAMMER2_OBJTYPE_FIFO:
+ return (DT_FIFO);
+ case HAMMER2_OBJTYPE_CDEV:
+ return (DT_CHR);
+ case HAMMER2_OBJTYPE_BDEV:
+ return (DT_BLK);
+ case HAMMER2_OBJTYPE_SOFTLINK:
+ return (DT_LNK);
+ case HAMMER2_OBJTYPE_SOCKET:
+ return (DT_SOCK);
+ case HAMMER2_OBJTYPE_WHITEOUT:
+ return (DT_UNKNOWN);
+ default:
+ return (DT_UNKNOWN);
+ }
+ /* not reached */
+}
+
+int
+hammer2_get_vtype(uint8_t type)
+{
+ switch (type) {
+ case HAMMER2_OBJTYPE_UNKNOWN:
+ return (VBAD);
+ case HAMMER2_OBJTYPE_DIRECTORY:
+ return (VDIR);
+ case HAMMER2_OBJTYPE_REGFILE:
+ return (VREG);
+ case HAMMER2_OBJTYPE_FIFO:
+ return (VFIFO);
+ case HAMMER2_OBJTYPE_CDEV:
+ return (VCHR);
+ case HAMMER2_OBJTYPE_BDEV:
+ return (VBLK);
+ case HAMMER2_OBJTYPE_SOFTLINK:
+ return (VLNK);
+ case HAMMER2_OBJTYPE_SOCKET:
+ return (VSOCK);
+ case HAMMER2_OBJTYPE_WHITEOUT:
+ return (VBAD);
+ default:
+ return (VBAD);
+ }
+ /* not reached */
+}
+
+/*
+ * Convert a HAMMER2 64-bit time to a timespec.
+ */
+void
+hammer2_time_to_timespec(uint64_t xtime, struct timespec *ts)
+{
+ ts->tv_sec = (unsigned long)(xtime / 1000000);
+ ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
+}
+
+/*
+ * Convert a uuid to a unix uid or gid.
+ */
+uint32_t
+hammer2_to_unix_xid(const struct uuid *uuid)
+{
+ return (*(const uint32_t *)&uuid->node[2]);
+}
+
+/*
+ * Borrow HAMMER1's directory hash algorithm #1 with a few modifications.
+ * The filename is split into fields which are hashed separately and then
+ * added together.
+ *
+ * Differences include: bit 63 must be set to 1 for HAMMER2 (HAMMER1 sets
+ * it to 0), this is because bit63=0 is used for hidden hardlinked inodes.
+ * (This means we do not need to do a 0-check/or-with-0x100000000 either).
+ *
+ * Also, the iscsi crc code is used instead of the old crc32 code.
+ */
+hammer2_key_t
+hammer2_dirhash(const unsigned char *name, size_t len)
+{
+ const unsigned char *aname = name;
+ uint32_t crcx;
+ uint64_t key;
+ size_t i, j;
+
+ key = 0;
+
+ /* m32 */
+ crcx = 0;
+ for (i = j = 0; i < len; ++i) {
+ if (aname[i] == '.' ||
+ aname[i] == '-' ||
+ aname[i] == '_' ||
+ aname[i] == '~') {
+ if (i != j)
+ crcx += hammer2_icrc32(aname + j, i - j);
+ j = i + 1;
+ }
+ }
+ if (i != j)
+ crcx += hammer2_icrc32(aname + j, i - j);
+
+ /*
+ * The directory hash utilizes the top 32 bits of the 64-bit key.
+ * Bit 63 must be set to 1.
+ */
+ crcx |= 0x80000000U;
+ key |= (uint64_t)crcx << 32;
+
+ /*
+ * l16 - crc of entire filename
+ * This crc reduces degenerate hash collision conditions.
+ */
+ crcx = hammer2_icrc32(aname, len);
+ crcx = crcx ^ (crcx << 16);
+ key |= crcx & 0xFFFF0000U;
+
+ /*
+ * Set bit 15. This allows readdir to strip bit 63 so a positive
+ * 64-bit cookie/offset can always be returned, and still guarantee
+ * that the values 0x0000-0x7FFF are available for artificial entries
+ * ('.' and '..').
+ */
+ key |= 0x8000U;
+
+ return (key);
+}
+
+/*
+ * The logical block size is currently always PBUFSIZE.
+ */
+int
+hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
+ hammer2_key_t *lbasep, hammer2_key_t *leofp)
+{
+ if (lbasep)
+ *lbasep = uoff & ~HAMMER2_PBUFMASK64;
+ if (leofp)
+ *leofp = (ip->meta.size + HAMMER2_PBUFMASK64) &
+ ~HAMMER2_PBUFMASK64;
+
+ return (HAMMER2_PBUFSIZE);
+}
+
+int
+hammer2_get_logical(void)
+{
+ return (hammer2_calc_logical(NULL, 0, NULL, NULL));
+}
+
+const char *
+hammer2_breftype_to_str(uint8_t type)
+{
+ switch (type) {
+ case HAMMER2_BREF_TYPE_EMPTY:
+ return ("empty");
+ case HAMMER2_BREF_TYPE_INODE:
+ return ("inode");
+ case HAMMER2_BREF_TYPE_INDIRECT:
+ return ("indirect");
+ case HAMMER2_BREF_TYPE_DATA:
+ return ("data");
+ case HAMMER2_BREF_TYPE_DIRENT:
+ return ("dirent");
+ case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+ return ("freemap_node");
+ case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+ return ("freemap_leaf");
+ case HAMMER2_BREF_TYPE_INVALID:
+ return ("invalid");
+ case HAMMER2_BREF_TYPE_FREEMAP:
+ return ("freemap");
+ case HAMMER2_BREF_TYPE_VOLUME:
+ return ("volume");
+ default:
+ return ("unknown");
+ }
+}
diff --git a/sys/fs/hammer2/hammer2_vfsops.c b/sys/fs/hammer2/hammer2_vfsops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_vfsops.c
@@ -0,0 +1,1195 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/tree.h>
+#include <sys/vnode.h>
+
+#include <vm/uma.h>
+
+#include "hammer2.h"
+#include "hammer2_mount.h"
+
+static int hammer2_unmount(struct mount *, int);
+static int hammer2_statfs(struct mount *, struct statfs *);
+static void hammer2_update_pmps(hammer2_dev_t *);
+static void hammer2_mount_helper(struct mount *, hammer2_pfs_t *);
+static void hammer2_unmount_helper(struct mount *, hammer2_pfs_t *,
+ hammer2_dev_t *);
+
+MALLOC_DEFINE(M_HAMMER2, "hammer2_mount", "HAMMER2 mount structure");
+uma_zone_t zone_buffer_read;
+uma_zone_t zone_xops;
+
+/* global list of HAMMER2 */
+TAILQ_HEAD(hammer2_mntlist, hammer2_dev); /* <-> hammer2_dev::mntentry */
+typedef struct hammer2_mntlist hammer2_mntlist_t;
+static hammer2_mntlist_t hammer2_mntlist;
+
+/* global list of PFS */
+TAILQ_HEAD(hammer2_pfslist, hammer2_pfs); /* <-> hammer2_pfs::mntentry */
+typedef struct hammer2_pfslist hammer2_pfslist_t;
+static hammer2_pfslist_t hammer2_pfslist;
+static hammer2_pfslist_t hammer2_spmplist;
+
+static struct lock hammer2_mntlk;
+
+static int hammer2_supported_version = HAMMER2_VOL_VERSION_DEFAULT;
+int hammer2_cluster_meta_read = 1; /* for physical read-ahead */
+int hammer2_cluster_data_read = 4; /* for physical read-ahead */
+long hammer2_inode_allocs;
+long hammer2_chain_allocs;
+long hammer2_dio_allocs;
+int hammer2_dio_limit = 256;
+
+SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem");
+SYSCTL_INT(_vfs_hammer2, OID_AUTO, supported_version, CTLFLAG_RD,
+ &hammer2_supported_version, 0, "Highest supported HAMMER2 version");
+SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_meta_read, CTLFLAG_RW,
+ &hammer2_cluster_meta_read, 0, "Cluster read count for meta data");
+SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_data_read, CTLFLAG_RW,
+ &hammer2_cluster_data_read, 0, "Cluster read count for user data");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, inode_allocs, CTLFLAG_RD,
+ &hammer2_inode_allocs, 0, "Number of inode allocated");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, chain_allocs, CTLFLAG_RD,
+ &hammer2_chain_allocs, 0, "Number of chain allocated");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, dio_allocs, CTLFLAG_RD,
+ &hammer2_dio_allocs, 0, "Number of dio allocated");
+SYSCTL_INT(_vfs_hammer2, OID_AUTO, dio_limit, CTLFLAG_RW,
+ &hammer2_dio_limit, 0, "Number of dio to keep for reuse");
+
+static const char *hammer2_opts[] = {
+ "export", "from", "hflags", NULL,
+};
+
+static int
+hammer2_assert_clean(void)
+{
+ int error = 0;
+
+ KKASSERT(hammer2_inode_allocs == 0);
+ if (hammer2_inode_allocs > 0) {
+ hprintf("%ld inode left\n", hammer2_inode_allocs);
+ error = EINVAL;
+ }
+ KKASSERT(hammer2_chain_allocs == 0);
+ if (hammer2_chain_allocs > 0) {
+ hprintf("%ld chain left\n", hammer2_chain_allocs);
+ error = EINVAL;
+ }
+ KKASSERT(hammer2_dio_allocs == 0);
+ if (hammer2_dio_allocs > 0) {
+ hprintf("%ld dio left\n", hammer2_dio_allocs);
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+static int
+hammer2_init(struct vfsconf *vfsp)
+{
+ hammer2_assert_clean();
+
+ hammer2_dio_limit = nbuf * 2;
+ if (hammer2_dio_limit > 100000)
+ hammer2_dio_limit = 100000;
+
+ zone_buffer_read = uma_zcreate("hammer2_buffer_read", 65536,
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ if (zone_buffer_read == NULL) {
+ hprintf("failed to create zone_buffer_read\n");
+ return (ENOMEM);
+ }
+
+ zone_xops = uma_zcreate("hammer2_xops", sizeof(hammer2_xop_t),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ if (zone_xops == NULL) {
+ uma_zdestroy(zone_buffer_read);
+ zone_buffer_read = NULL;
+ hprintf("failed to create zone_xops\n");
+ return (ENOMEM);
+ }
+
+ lockinit(&hammer2_mntlk, PVFS, "mntlk", 0, 0);
+
+ TAILQ_INIT(&hammer2_mntlist);
+ TAILQ_INIT(&hammer2_pfslist);
+ TAILQ_INIT(&hammer2_spmplist);
+
+ return (0);
+}
+
+static int
+hammer2_uninit(struct vfsconf *vfsp)
+{
+ lockdestroy(&hammer2_mntlk);
+
+ if (zone_buffer_read) {
+ uma_zdestroy(zone_buffer_read);
+ zone_buffer_read = NULL;
+ }
+ if (zone_xops) {
+ uma_zdestroy(zone_xops);
+ zone_xops = NULL;
+ }
+
+ hammer2_assert_clean();
+
+ KKASSERT(TAILQ_EMPTY(&hammer2_mntlist));
+ KKASSERT(TAILQ_EMPTY(&hammer2_pfslist));
+ KKASSERT(TAILQ_EMPTY(&hammer2_spmplist));
+
+ return (0);
+}
+
+/*
+ * Core PFS allocator. Used to allocate or reference the pmp structure
+ * for PFS cluster mounts and the spmp structure for media (hmp) structures.
+ */
+static hammer2_pfs_t *
+hammer2_pfsalloc(hammer2_chain_t *chain, const hammer2_inode_data_t *ripdata,
+ hammer2_dev_t *force_local)
+{
+ hammer2_pfs_t *pmp = NULL;
+ hammer2_inode_t *iroot;
+ int j;
+
+ KASSERT(force_local, ("only local mount allowed"));
+
+ /*
+ * Locate or create the PFS based on the cluster id. If ripdata
+ * is NULL this is a spmp which is unique and is always allocated.
+ *
+ * If the device is mounted in local mode all PFSs are considered
+ * independent and not part of any cluster.
+ */
+ if (ripdata) {
+ TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
+ if (force_local != pmp->force_local)
+ continue;
+ if (force_local == NULL &&
+ bcmp(&pmp->pfs_clid, &ripdata->meta.pfs_clid,
+ sizeof(pmp->pfs_clid)) == 0) {
+ break;
+ } else if (force_local && pmp->pfs_names[0] &&
+ strcmp(pmp->pfs_names[0], ripdata->filename) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (pmp == NULL) {
+ pmp = malloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO);
+ pmp->force_local = force_local;
+ hammer2_spin_init(&pmp->inum_spin, "h2pmp_inosp");
+ hammer2_spin_init(&pmp->lru_spin, "h2pmp_lrusp");
+ hammer2_mtx_init(&pmp->xop_lock, "h2pmp_xoplk");
+ RB_INIT(&pmp->inum_tree);
+ TAILQ_INIT(&pmp->lru_list);
+
+ KKASSERT((HAMMER2_IHASH_SIZE & (HAMMER2_IHASH_SIZE - 1)) == 0);
+ pmp->ipdep_lists = hashinit(HAMMER2_IHASH_SIZE, M_HAMMER2,
+ &pmp->ipdep_mask);
+ KKASSERT(HAMMER2_IHASH_SIZE == pmp->ipdep_mask + 1);
+
+ if (ripdata) {
+ pmp->pfs_clid = ripdata->meta.pfs_clid;
+ TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry);
+ } else {
+ pmp->flags |= HAMMER2_PMPF_SPMP;
+ TAILQ_INSERT_TAIL(&hammer2_spmplist, pmp, mntentry);
+ }
+ }
+
+ /* Create the PFS's root inode. */
+ if ((iroot = pmp->iroot) == NULL) {
+ iroot = hammer2_inode_get(pmp, NULL, 1, -1);
+ if (ripdata)
+ iroot->meta = ripdata->meta;
+ pmp->iroot = iroot;
+ hammer2_inode_ref(iroot);
+ hammer2_inode_unlock(iroot);
+ }
+
+ /* Stop here if no chain is passed in. */
+ if (chain == NULL)
+ goto done;
+
+ /*
+ * When a chain is passed in we must add it to the PFS's root
+ * inode, update pmp->pfs_types[].
+ * When forcing local mode, mark the PFS as a MASTER regardless.
+ */
+ hammer2_inode_ref(iroot);
+ hammer2_mtx_ex(&iroot->lock);
+
+ j = iroot->cluster.nchains; /* Currently always 0. */
+ KASSERT(j == 0, ("nchains %d not 0", j));
+
+ KKASSERT(chain->pmp == NULL);
+ chain->pmp = pmp;
+ hammer2_chain_ref(chain);
+ iroot->cluster.array[j].chain = chain;
+ if (force_local)
+ pmp->pfs_types[j] = HAMMER2_PFSTYPE_MASTER;
+ else
+ pmp->pfs_types[j] = ripdata->meta.pfs_type;
+ pmp->pfs_names[j] = strdup(ripdata->filename, M_HAMMER2);
+ pmp->pfs_hmps[j] = chain->hmp;
+
+ /*
+ * If the PFS is already mounted we must account
+ * for the mount_count here.
+ */
+ if (pmp->mp)
+ ++chain->hmp->mount_count;
+ ++j;
+
+ iroot->cluster.nchains = j;
+ hammer2_assert_cluster(&iroot->cluster);
+
+ hammer2_mtx_unlock(&iroot->lock);
+ hammer2_inode_drop(iroot);
+done:
+ return (pmp);
+}
+
+/*
+ * Destroy a PFS, typically only occurs after the last mount on a device
+ * has gone away.
+ */
+static void
+hammer2_pfsfree(hammer2_pfs_t *pmp)
+{
+ hammer2_inode_t *iroot;
+ hammer2_chain_t *chain;
+ int i, chains_still_present = 0;
+
+ KKASSERT(!(pmp->flags & HAMMER2_PMPF_WAITING));
+
+ /* Cleanup our reference on iroot. */
+ if (pmp->flags & HAMMER2_PMPF_SPMP)
+ TAILQ_REMOVE(&hammer2_spmplist, pmp, mntentry);
+ else
+ TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry);
+
+ /* Cleanup chains remaining on LRU list. */
+ hammer2_spin_ex(&pmp->lru_spin);
+ while ((chain = TAILQ_FIRST(&pmp->lru_list)) != NULL) {
+ KKASSERT(chain->flags & HAMMER2_CHAIN_ONLRU);
+ atomic_add_int(&pmp->lru_count, -1);
+ atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU);
+ TAILQ_REMOVE(&pmp->lru_list, chain, entry);
+ hammer2_chain_ref(chain);
+ hammer2_spin_unex(&pmp->lru_spin);
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE);
+ hammer2_chain_drop(chain);
+ hammer2_spin_ex(&pmp->lru_spin);
+ }
+ hammer2_spin_unex(&pmp->lru_spin);
+
+ /* Clean up iroot. */
+ iroot = pmp->iroot;
+ if (iroot) {
+ for (i = 0; i < iroot->cluster.nchains; ++i) {
+ chain = iroot->cluster.array[i].chain;
+ if (chain && !RB_EMPTY(&chain->core.rbtree)) {
+ hprintf("PFS at %s has active chains\n",
+ pmp->mntpt);
+ chains_still_present = 1;
+ }
+ }
+ KASSERT(iroot->refs == 1,
+ ("iroot %p refs %d not 1", iroot, iroot->refs));
+
+ hammer2_inode_drop(iroot);
+ pmp->iroot = NULL;
+ }
+
+ /* Free remaining pmp resources. */
+ if (chains_still_present) {
+ hprintf("PFS at %s still in use\n", pmp->mntpt);
+ } else {
+ hammer2_spin_destroy(&pmp->inum_spin);
+ hammer2_spin_destroy(&pmp->lru_spin);
+ hammer2_mtx_destroy(&pmp->xop_lock);
+ hashdestroy(pmp->ipdep_lists, M_HAMMER2, pmp->ipdep_mask);
+ free(pmp, M_HAMMER2);
+ }
+}
+
+/*
+ * Remove all references to hmp from the pfs list. Any PFS which becomes
+ * empty is terminated and freed.
+ */
+static void
+hammer2_pfsfree_scan(hammer2_dev_t *hmp, int which)
+{
+ hammer2_pfs_t *pmp;
+ hammer2_inode_t *iroot;
+ hammer2_chain_t *rchain;
+ struct hammer2_pfslist *wlist;
+ int i;
+
+ if (which == 0)
+ wlist = &hammer2_pfslist;
+ else
+ wlist = &hammer2_spmplist;
+again:
+ TAILQ_FOREACH(pmp, wlist, mntentry) {
+ if ((iroot = pmp->iroot) == NULL)
+ continue;
+
+ /* Determine if this PFS is affected. */
+ for (i = 0; i < HAMMER2_MAXCLUSTER; ++i)
+ if (pmp->pfs_hmps[i] == hmp)
+ break;
+ if (i == HAMMER2_MAXCLUSTER)
+ continue;
+
+ /*
+ * Lock the inode and clean out matching chains.
+ * Note that we cannot use hammer2_inode_lock_*()
+ * here because that would attempt to validate the
+ * cluster that we are in the middle of ripping
+ * apart.
+ */
+ hammer2_mtx_ex(&iroot->lock);
+
+ /* Remove the chain from matching elements of the PFS. */
+ for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+ if (pmp->pfs_hmps[i] != hmp)
+ continue;
+ rchain = iroot->cluster.array[i].chain;
+ iroot->cluster.array[i].chain = NULL;
+ pmp->pfs_types[i] = HAMMER2_PFSTYPE_NONE;
+ if (pmp->pfs_names[i]) {
+ free(pmp->pfs_names[i], M_HAMMER2);
+ pmp->pfs_names[i] = NULL;
+ }
+ if (rchain) {
+ hammer2_chain_drop(rchain);
+ /* focus hint */
+ if (iroot->cluster.focus == rchain)
+ iroot->cluster.focus = NULL;
+ }
+ pmp->pfs_hmps[i] = NULL;
+ }
+ hammer2_mtx_unlock(&iroot->lock);
+
+ /* Cleanup trailing chains. Gaps may remain. */
+ for (i = HAMMER2_MAXCLUSTER - 1; i >= 0; --i)
+ if (pmp->pfs_hmps[i])
+ break;
+ iroot->cluster.nchains = i + 1;
+
+ /* If the PMP has no elements remaining we can destroy it. */
+ if (iroot->cluster.nchains == 0) {
+ /*
+ * If this was the hmp's spmp, we need to clean
+ * a little more stuff out.
+ */
+ if (hmp->spmp == pmp) {
+ hmp->spmp = NULL;
+ hmp->vchain.pmp = NULL;
+ }
+
+ /* Free the pmp and restart the loop. */
+ hammer2_pfsfree(pmp);
+ goto again;
+ }
+ }
+}
+
+/*
+ * Mount or remount HAMMER2 fileystem from physical media.
+ */
+static int
+hammer2_mount(struct mount *mp)
+{
+ struct vfsoptlist *opts = mp->mnt_optnew;
+ struct cdev *dev;
+ hammer2_dev_t *hmp = NULL, *hmp_tmp, *force_local;
+ hammer2_pfs_t *pmp = NULL, *spmp;
+ hammer2_key_t key_next, key_dummy, lhc;
+ hammer2_chain_t *chain, *parent;
+ const hammer2_inode_data_t *ripdata;
+ hammer2_devvp_list_t devvpl;
+ hammer2_devvp_t *e, *e_tmp;
+ hammer2_chain_t *schain;
+ hammer2_xop_head_t *xop;
+ char devstr[MNAMELEN] = {0};
+ char *fspec = NULL, *mntpt = NULL, *label = NULL;
+ int rdonly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ int i, hflags, len, error, devvp_found;
+ int *hflagsp = NULL;
+
+ if (!rdonly) {
+ hprintf("write unsupported\n");
+ return (EINVAL);
+ }
+
+ /* Retrieve options first. */
+ if (vfs_filteropt(opts, hammer2_opts))
+ return (EINVAL);
+
+ error = vfs_getopt(opts, "from", (void **)&fspec, &len);
+ if (error)
+ return (EINVAL);
+ if (!fspec || fspec[len - 1] != '\0')
+ return (EINVAL);
+
+ error = vfs_getopt(opts, "fspath", (void **)&mntpt, NULL);
+ if (error)
+ return (EINVAL);
+
+ error = vfs_getopt(opts, "hflags", (void **)&hflagsp, NULL);
+ if (error)
+ return (EINVAL);
+ hflags = *hflagsp;
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (0);
+
+ bcopy(fspec, devstr, MNAMELEN - 1);
+ debug_hprintf("devstr=\"%s\" mntpt=\"%s\"\n", devstr, mntpt);
+
+ /*
+ * Extract device and label, automatically mount @DATA if no label
+ * specified. Error out if no label or device is specified. This is
+ * a convenience to match the default label created by newfs_hammer2,
+ * our preference is that a label always be specified.
+ *
+ * NOTE: We allow 'mount @LABEL <blah>'... that is, a mount command
+ * that does not specify a device, as long as some HAMMER2 label
+ * has already been mounted from that device. This makes
+ * mounting snapshots a lot easier.
+ */
+ label = strchr(devstr, '@');
+ if (label == NULL || label[1] == 0) {
+ /*
+ * DragonFly uses either "BOOT", "ROOT" or "DATA" based
+ * on label[-1]. In FreeBSD, simply use "DATA" by default.
+ */
+ label = "DATA";
+ } else {
+ *label = '\0';
+ label++;
+ }
+
+ debug_hprintf("device=\"%s\" label=\"%s\" rdonly=%d\n",
+ devstr, label, rdonly);
+
+ /* Initialize all device vnodes. */
+ TAILQ_INIT(&devvpl);
+ error = hammer2_init_devvp(mp, devstr, &devvpl);
+ if (error) {
+ hprintf("failed to initialize devvp in %s\n", devstr);
+ hammer2_cleanup_devvp(&devvpl);
+ return (error);
+ }
+
+ /*
+ * Determine if the device has already been mounted. After this
+ * check hmp will be non-NULL if we are doing the second or more
+ * HAMMER2 mounts from the same device.
+ */
+ lockmgr(&hammer2_mntlk, LK_EXCLUSIVE, NULL);
+ if (!TAILQ_EMPTY(&devvpl)) {
+ /*
+ * Match the device. Due to the way devfs works,
+ * we may not be able to directly match the vnode pointer,
+ * so also check to see if the underlying device matches.
+ */
+ TAILQ_FOREACH(hmp_tmp, &hammer2_mntlist, mntentry) {
+ TAILQ_FOREACH(e_tmp, &hmp_tmp->devvp_list, entry) {
+ devvp_found = 0;
+ TAILQ_FOREACH(e, &devvpl, entry) {
+ KKASSERT(e->devvp);
+ if (e_tmp->devvp == e->devvp)
+ devvp_found = 1;
+ if (e_tmp->devvp->v_rdev &&
+ e_tmp->devvp->v_rdev == e->devvp->v_rdev)
+ devvp_found = 1;
+ }
+ if (!devvp_found)
+ goto next_hmp;
+ }
+ hmp = hmp_tmp;
+ debug_hprintf("hmp=%p matched\n", hmp);
+ break;
+next_hmp:
+ continue;
+ }
+ } else {
+ /* Match the label to a pmp already probed. */
+ TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
+ for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+ if (pmp->pfs_names[i] &&
+ strcmp(pmp->pfs_names[i], label) == 0) {
+ hmp = pmp->pfs_hmps[i];
+ break;
+ }
+ }
+ if (hmp)
+ break;
+ }
+ if (hmp == NULL) {
+ hprintf("PFS label \"%s\" not found\n", label);
+ hammer2_cleanup_devvp(&devvpl);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ return (ENOENT);
+ }
+ }
+
+ /*
+ * Open the device if this isn't a secondary mount and construct the
+ * HAMMER2 device mount (hmp).
+ */
+ if (hmp == NULL) {
+ /* Now open the device(s). */
+ KKASSERT(!TAILQ_EMPTY(&devvpl));
+ error = hammer2_open_devvp(mp, &devvpl);
+ if (error) {
+ hammer2_close_devvp(&devvpl);
+ hammer2_cleanup_devvp(&devvpl);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ return (error);
+ }
+
+ /* Construct volumes and link with device vnodes. */
+ hmp = malloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO);
+ hmp->devvp = NULL;
+ error = hammer2_init_volumes(&devvpl, hmp->volumes,
+ &hmp->voldata, &hmp->devvp);
+ if (error) {
+ hammer2_close_devvp(&devvpl);
+ hammer2_cleanup_devvp(&devvpl);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ free(hmp, M_HAMMER2);
+ return (error);
+ }
+ if (!hmp->devvp) {
+ hprintf("failed to initialize root volume\n");
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+ return (EINVAL);
+ }
+
+ hmp->hflags = hflags & HMNT2_DEVFLAGS;
+ KKASSERT(hmp->hflags & HMNT2_LOCAL);
+
+ TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry);
+ RB_INIT(&hmp->iotree);
+ hammer2_mtx_init(&hmp->iotree_lock, "h2hmp_iotlk");
+
+ /*
+ * vchain setup. vchain.data is embedded.
+ * vchain.refs is initialized and will never drop to 0.
+ */
+ hmp->vchain.hmp = hmp;
+ hmp->vchain.refs = 1;
+ hmp->vchain.data = (void *)&hmp->voldata;
+ hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME;
+ hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX;
+ hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid;
+ hammer2_chain_init(&hmp->vchain);
+
+ /* Initialize volume header related fields. */
+ KKASSERT(hmp->voldata.magic == HAMMER2_VOLUME_ID_HBO ||
+ hmp->voldata.magic == HAMMER2_VOLUME_ID_ABO);
+ /*
+ * Must use hmp instead of volume header for these two
+ * in order to handle volume versions transparently.
+ */
+ if (hmp->voldata.version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES) {
+ hmp->nvolumes = hmp->voldata.nvolumes;
+ hmp->total_size = hmp->voldata.total_size;
+ } else {
+ hmp->nvolumes = 1;
+ hmp->total_size = hmp->voldata.volu_size;
+ }
+ KKASSERT(hmp->nvolumes > 0);
+
+ /* Move devvpl entries to hmp. */
+ TAILQ_INIT(&hmp->devvp_list);
+ while ((e = TAILQ_FIRST(&devvpl)) != NULL) {
+ TAILQ_REMOVE(&devvpl, e, entry);
+ TAILQ_INSERT_TAIL(&hmp->devvp_list, e, entry);
+ }
+ KKASSERT(TAILQ_EMPTY(&devvpl));
+ KKASSERT(!TAILQ_EMPTY(&hmp->devvp_list));
+
+ /*
+ * Really important to get these right or teardown code
+ * will get confused.
+ */
+ hmp->spmp = hammer2_pfsalloc(NULL, NULL, hmp);
+ spmp = hmp->spmp;
+ spmp->pfs_hmps[0] = hmp;
+
+ /*
+ * Dummy-up vchain's modify_tid.
+ * mirror_tid is inherited from the volume header.
+ */
+ hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid;
+ hmp->vchain.bref.modify_tid = hmp->vchain.bref.mirror_tid;
+ hmp->vchain.pmp = spmp;
+
+ /*
+ * First locate the super-root inode, which is key 0
+ * relative to the volume header's blockset.
+ *
+ * Then locate the root inode by scanning the directory keyspace
+ * represented by the label.
+ */
+ parent = hammer2_chain_lookup_init(&hmp->vchain, 0);
+ schain = hammer2_chain_lookup(&parent, &key_dummy,
+ HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, &error, 0);
+ hammer2_chain_lookup_done(parent);
+ if (schain == NULL) {
+ hprintf("invalid super-root\n");
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+ return (EINVAL);
+ }
+ if (schain->error) {
+ hprintf("chain error %08x reading super-root\n",
+ schain->error);
+ hammer2_chain_unlock(schain);
+ hammer2_chain_drop(schain);
+ schain = NULL;
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+ return (EINVAL);
+ }
+
+ /*
+ * Sanity-check schain's pmp and finish initialization.
+ * Any chain belonging to the super-root topology should
+ * have a NULL pmp (not even set to spmp).
+ */
+ ripdata = &schain->data->ipdata;
+ KKASSERT(schain->pmp == NULL);
+ spmp->pfs_clid = ripdata->meta.pfs_clid;
+
+ /*
+ * Replace the dummy spmp->iroot with a real one. It's
+ * easier to just do a wholesale replacement than to try
+ * to update the chain and fixup the iroot fields.
+ *
+ * The returned inode is locked with the supplied cluster.
+ */
+ xop = uma_zalloc(zone_xops, M_WAITOK | M_ZERO);
+ hammer2_dummy_xop_from_chain(xop, schain);
+ hammer2_inode_drop(spmp->iroot);
+ spmp->iroot = hammer2_inode_get(spmp, xop, -1, -1);
+ spmp->spmp_hmp = hmp;
+ spmp->pfs_types[0] = ripdata->meta.pfs_type;
+ spmp->pfs_hmps[0] = hmp;
+ hammer2_inode_ref(spmp->iroot);
+ hammer2_inode_unlock(spmp->iroot);
+ hammer2_chain_unlock(schain);
+ hammer2_chain_drop(schain);
+ schain = NULL;
+ uma_zfree(zone_xops, xop);
+ /* Leave spmp->iroot with one ref. */
+
+ /*
+ * A false-positive lock order reversal may be detected.
+ * There are 2 directions of locking, which is a bad design.
+ * chain is locked -> hammer2_inode_get() -> lock inode
+ * inode is locked -> hammer2_inode_chain() -> lock chain
+ */
+ hammer2_update_pmps(hmp);
+ } else {
+ spmp = hmp->spmp;
+ if (hflags & HMNT2_DEVFLAGS)
+ hprintf("Warning: mount flags pertaining to the whole "
+ "device may only be specified on the first mount "
+ "of the device: %08x\n",
+ hflags & HMNT2_DEVFLAGS);
+ }
+
+ /*
+ * Force local mount (disassociate all PFSs from their clusters)
+ * if HMNT2_LOCAL.
+ */
+ force_local = (hmp->hflags & HMNT2_LOCAL) ? hmp : NULL;
+
+ /*
+ * Lookup the mount point under the media-localized super-root.
+ * Scanning hammer2_pfslist doesn't help us because it represents
+ * PFS cluster ids which can aggregate several named PFSs together.
+ */
+ hammer2_inode_lock(spmp->iroot, 0);
+ parent = hammer2_inode_chain(spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS);
+ lhc = hammer2_dirhash(label, strlen(label));
+ chain = hammer2_chain_lookup(&parent, &key_next, lhc,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error, 0);
+ while (chain) {
+ if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
+ strcmp(label, chain->data->ipdata.filename) == 0)
+ break;
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error, 0);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+ hammer2_inode_unlock(spmp->iroot);
+
+ /* PFS could not be found? */
+ if (chain == NULL) {
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+
+ if (error) {
+ hprintf("PFS label \"%s\" error %08x\n", label, error);
+ return (EINVAL);
+ } else {
+ hprintf("PFS label \"%s\" not found\n", label);
+ return (ENOENT);
+ }
+ }
+
+ /* Acquire the pmp structure. */
+ if (chain->error) {
+ hprintf("PFS label \"%s\" chain error %08x\n",
+ label, chain->error);
+ } else {
+ ripdata = &chain->data->ipdata;
+ pmp = hammer2_pfsalloc(NULL, ripdata, force_local);
+ }
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+
+ /* PFS to mount must exist at this point. */
+ if (pmp == NULL) {
+ hprintf("failed to acquire PFS structure\n");
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+ return (EINVAL);
+ }
+
+ /* Finish the mount. */
+ debug_hprintf("hmp=%p pmp=%p\n", hmp, pmp);
+
+ if (pmp->mp) {
+ hprintf("PFS already mounted!\n");
+ hammer2_unmount_helper(mp, NULL, hmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+ hammer2_unmount(mp, MNT_FORCE);
+ return (EBUSY);
+ }
+
+ /*
+ * dev2udev(dev) alone isn't unique to PFS, but pfs_clid
+ * isn't either against multiple mounts with the same image.
+ */
+ KKASSERT(!TAILQ_EMPTY(&hmp->devvp_list));
+ dev = TAILQ_FIRST(&hmp->devvp_list)->devvp->v_rdev;
+ KKASSERT(dev);
+ mp->mnt_stat.f_fsid.val[0] = ((int32_t)dev2udev(dev)) ^
+ ((int32_t)pmp->pfs_clid.time_low);
+ mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
+ MNTK_USES_BCACHE;
+ MNT_IUNLOCK(mp);
+
+ /* Required mount structure initializations. */
+ mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE;
+ mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE;
+
+ /* Connect up mount pointers. */
+ hammer2_mount_helper(mp, pmp);
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+
+ /* Initial statfs to prime mnt_stat. */
+ hammer2_statfs(mp, &mp->mnt_stat);
+
+ strlcpy(pmp->mntpt, mntpt, sizeof(pmp->mntpt));
+ vfs_mountedfrom(mp, fspec);
+
+ return (0);
+}
+
+/*
+ * Scan PFSs under the super-root and create hammer2_pfs structures.
+ */
+static void
+hammer2_update_pmps(hammer2_dev_t *hmp)
+{
+ hammer2_dev_t *force_local;
+ hammer2_pfs_t *spmp;
+ const hammer2_inode_data_t *ripdata;
+ hammer2_chain_t *parent;
+ hammer2_chain_t *chain;
+ hammer2_key_t key_next;
+ int error;
+
+ /*
+ * Force local mount (disassociate all PFSs from their clusters)
+ * if HMNT2_LOCAL.
+ */
+ force_local = (hmp->hflags & HMNT2_LOCAL) ? hmp : NULL;
+
+ /* Lookup mount point under the media-localized super-root. */
+ spmp = hmp->spmp;
+ hammer2_inode_lock(spmp->iroot, 0);
+ parent = hammer2_inode_chain(spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS);
+ chain = hammer2_chain_lookup(&parent, &key_next, HAMMER2_KEY_MIN,
+ HAMMER2_KEY_MAX, &error, 0);
+ while (chain) {
+ if (chain->error) {
+ hprintf("chain error %08x reading PFS root\n",
+ chain->error);
+ } else if (chain->bref.type != HAMMER2_BREF_TYPE_INODE) {
+ hprintf("non inode chain type %d under super-root\n",
+ chain->bref.type);
+ } else {
+ ripdata = &chain->data->ipdata;
+ hammer2_pfsalloc(chain, ripdata, force_local);
+ }
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ HAMMER2_KEY_MAX, &error, 0);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+ hammer2_inode_unlock(spmp->iroot);
+}
+
+static int
+hammer2_unmount(struct mount *mp, int mntflags)
+{
+ hammer2_pfs_t *pmp = MPTOPMP(mp);
+ int flags = 0, error = 0;
+
+ /* Still NULL during mount before hammer2_mount_helper() called. */
+ if (pmp == NULL)
+ return(0);
+
+ KKASSERT(pmp->mp);
+ KKASSERT(pmp->iroot);
+
+ lockmgr(&hammer2_mntlk, LK_EXCLUSIVE, NULL);
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+ error = vflush(mp, 0, flags, curthread);
+ if (error) {
+ hprintf("vflush failed %d\n", error);
+ goto failed;
+ }
+
+ hammer2_unmount_helper(mp, pmp, NULL);
+failed:
+ lockmgr(&hammer2_mntlk, LK_RELEASE, NULL);
+
+ if (TAILQ_EMPTY(&hammer2_mntlist))
+ hammer2_assert_clean();
+
+ return (error);
+}
+
+/*
+ * Mount helper, hook the system mount into our PFS.
+ * The mount lock is held.
+ *
+ * We must bump the mount_count on related devices for any mounted PFSs.
+ */
+static void
+hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp)
+{
+ hammer2_cluster_t *cluster;
+ hammer2_chain_t *rchain;
+ int i;
+
+ mp->mnt_data = (qaddr_t)pmp;
+ pmp->mp = mp;
+
+ /* After pmp->mp is set adjust hmp->mount_count. */
+ cluster = &pmp->iroot->cluster;
+ for (i = 0; i < cluster->nchains; ++i) {
+ rchain = cluster->array[i].chain;
+ if (rchain == NULL)
+ continue;
+ ++rchain->hmp->mount_count;
+ }
+}
+
+/*
+ * Unmount helper, unhook the system mount from our PFS.
+ * The mount lock is held.
+ *
+ * If hmp is supplied a mount responsible for being the first to open
+ * the block device failed and the block device and all PFSs using the
+ * block device must be cleaned up.
+ *
+ * If pmp is supplied multiple devices might be backing the PFS and each
+ * must be disconnected. This might not be the last PFS using some of the
+ * underlying devices. Also, we have to adjust our hmp->mount_count
+ * accounting for the devices backing the pmp which is now undergoing an
+ * unmount.
+ */
+static void
+hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp, hammer2_dev_t *hmp)
+{
+ hammer2_cluster_t *cluster;
+ hammer2_chain_t *rchain;
+ int i, dumpcnt __diagused;
+
+ /*
+ * If no device supplied this is a high-level unmount and we have to
+ * to disconnect the mount, adjust mount_count, and locate devices
+ * that might now have no mounts.
+ */
+ if (pmp) {
+ KKASSERT(hmp == NULL);
+ KKASSERT(MPTOPMP(mp) == pmp);
+ pmp->mp = NULL;
+ mp->mnt_data = NULL;
+
+ /*
+ * After pmp->mp is cleared we have to account for
+ * mount_count.
+ */
+ cluster = &pmp->iroot->cluster;
+ for (i = 0; i < cluster->nchains; ++i) {
+ rchain = cluster->array[i].chain;
+ if (rchain == NULL)
+ continue;
+ --rchain->hmp->mount_count;
+ /* Scrapping hmp now may invalidate the pmp. */
+ }
+again:
+ TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) {
+ if (hmp->mount_count == 0) {
+ hammer2_unmount_helper(NULL, NULL, hmp);
+ goto again;
+ }
+ }
+ return;
+ }
+
+ /*
+ * Try to terminate the block device. We can't terminate it if
+ * there are still PFSs referencing it.
+ */
+ if (hmp->mount_count) {
+ hprintf("%d PFS mounts still exist\n", hmp->mount_count);
+ return;
+ }
+
+ hammer2_pfsfree_scan(hmp, 0);
+ hammer2_pfsfree_scan(hmp, 1);
+ KKASSERT(hmp->spmp == NULL);
+
+ /* Finish up with the device vnode. */
+ if (!TAILQ_EMPTY(&hmp->devvp_list)) {
+ hammer2_close_devvp(&hmp->devvp_list);
+ hammer2_cleanup_devvp(&hmp->devvp_list);
+ }
+ KKASSERT(TAILQ_EMPTY(&hmp->devvp_list));
+#ifdef INVARIANTS
+ /*
+ * Final drop of embedded volume root chain to clean up
+ * vchain.core (vchain structure is not flagged ALLOCATED
+ * so it is cleaned out and then left to rot).
+ */
+ dumpcnt = 50;
+ hammer2_dump_chain(&hmp->vchain, 0, 0, &dumpcnt, 'v', (unsigned int)-1);
+ hammer2_chain_drop(&hmp->vchain);
+#endif
+ hammer2_mtx_ex(&hmp->iotree_lock);
+ hammer2_io_cleanup(hmp, &hmp->iotree);
+ if (hmp->iofree_count)
+ debug_hprintf("%d I/O's left hanging\n", hmp->iofree_count);
+ hammer2_mtx_unlock(&hmp->iotree_lock);
+
+ TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
+ hammer2_mtx_destroy(&hmp->iotree_lock);
+
+ free(hmp, M_HAMMER2);
+}
+
+static int
+hammer2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
+{
+ hammer2_pfs_t *pmp = MPTOPMP(mp);
+ hammer2_inode_t *ip;
+ hammer2_xop_lookup_t *xop;
+ hammer2_tid_t inum;
+ int error;
+
+ inum = (hammer2_tid_t)ino & HAMMER2_DIRHASH_USERMSK;
+
+ /* Easy if we already have it cached. */
+ ip = hammer2_inode_lookup(pmp, inum);
+ if (ip) {
+ hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
+ error = hammer2_igetv(ip, flags, vpp);
+ hammer2_inode_unlock(ip);
+ hammer2_inode_drop(ip); /* from lookup */
+ return (error);
+ }
+
+ /* Otherwise we have to find the inode. */
+ xop = hammer2_xop_alloc(pmp->iroot);
+ xop->lhc = inum;
+ hammer2_xop_start(&xop->head, &hammer2_lookup_desc);
+ error = hammer2_xop_collect(&xop->head, 0);
+
+ if (error == 0)
+ ip = hammer2_inode_get(pmp, &xop->head, -1, -1);
+ hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+
+ if (ip) {
+ error = hammer2_igetv(ip, flags, vpp);
+ hammer2_inode_unlock(ip);
+ } else {
+ *vpp = NULL;
+ error = ENOENT;
+ }
+
+ return (error);
+}
+
+static int
+hammer2_root(struct mount *mp, int flags, struct vnode **vpp)
+{
+ hammer2_pfs_t *pmp = MPTOPMP(mp);
+ int error;
+
+ if (pmp->iroot == NULL) {
+ hprintf("%s has no root inode\n", mp->mnt_stat.f_mntfromname);
+ *vpp = NULL;
+ return (EINVAL);
+ }
+
+ hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_SHARED);
+ error = hammer2_igetv(pmp->iroot, LK_EXCLUSIVE, vpp);
+ hammer2_inode_unlock(pmp->iroot);
+
+ return (error);
+}
+
+static int
+hammer2_statfs(struct mount *mp, struct statfs *sbp)
+{
+ hammer2_pfs_t *pmp = MPTOPMP(mp);
+ hammer2_dev_t *hmp;
+ hammer2_cluster_t *cluster;
+ hammer2_chain_t *chain;
+
+ KKASSERT(mp->mnt_stat.f_iosize > 0);
+ KKASSERT(mp->mnt_stat.f_bsize > 0);
+
+ hmp = pmp->pfs_hmps[0];
+ if (hmp == NULL)
+ return (EINVAL);
+
+ cluster = &pmp->iroot->cluster;
+ hammer2_assert_cluster(cluster);
+
+ chain = cluster->array[0].chain;
+
+ sbp->f_bsize = mp->mnt_stat.f_bsize;
+ sbp->f_iosize = mp->mnt_stat.f_iosize;
+ sbp->f_blocks = hmp->voldata.allocator_size / mp->mnt_stat.f_bsize;
+ sbp->f_bfree = hmp->voldata.allocator_free / mp->mnt_stat.f_bsize;
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_files = chain ? chain->bref.embed.stats.inode_count : 0;
+ sbp->f_ffree = 0;
+
+ return (0);
+}
+
+static int
+hammer2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
+{
+ hammer2_inode_t *ip;
+ hammer2_tid_t inum;
+ int error;
+
+ inum = ((hammer2_tid_t *)fhp->fid_data)[0] & HAMMER2_DIRHASH_USERMSK;
+ if (vpp) {
+ if (inum == 1)
+ error = hammer2_root(mp, LK_EXCLUSIVE, vpp);
+ else
+ error = hammer2_vget(mp, inum, LK_EXCLUSIVE, vpp);
+ } else {
+ error = 0;
+ }
+
+ ip = VTOI(*vpp);
+ vnode_create_vobject(*vpp, ip->meta.size, curthread);
+
+ return (error);
+}
+
+static struct vfsops hammer2_vfsops = {
+ .vfs_init = hammer2_init,
+ .vfs_uninit = hammer2_uninit,
+ .vfs_mount = hammer2_mount,
+ .vfs_unmount = hammer2_unmount,
+ .vfs_vget = hammer2_vget,
+ .vfs_root = hammer2_root,
+ .vfs_statfs = hammer2_statfs,
+ .vfs_fhtovp = hammer2_fhtovp,
+};
+
+VFS_SET(hammer2_vfsops, hammer2, VFCF_READONLY);
+MODULE_VERSION(hammer2, 1);
diff --git a/sys/fs/hammer2/hammer2_vnops.c b/sys/fs/hammer2/hammer2_vnops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_vnops.c
@@ -0,0 +1,726 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/uio.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+#include <vm/vnode_pager.h>
+
+#include "hammer2.h"
+
+static int
+hammer2_inactive(struct vop_inactive_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+
+ if (ip->meta.mode == 0)
+ vrecycle(vp);
+
+ return (0);
+}
+
+static int
+hammer2_reclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+
+ vfs_hash_remove(vp);
+
+ vp->v_data = NULL;
+ ip->vp = NULL;
+
+ hammer2_inode_drop(ip);
+
+ return (0);
+}
+
+static int
+hammer2_access(struct vop_access_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ /*
+ * Disallow write attempts unless the file is a socket,
+ * fifo resident on the filesystem.
+ */
+ if (ap->a_accmode & VWRITE) {
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ return (EROFS);
+ /* NOT REACHED */
+ default:
+ break;
+ }
+ }
+
+ uid = hammer2_to_unix_xid(&ip->meta.uid);
+ gid = hammer2_to_unix_xid(&ip->meta.gid);
+ mode = ip->meta.mode;
+
+ return (vaccess(vp->v_type, mode, uid, gid, ap->a_accmode, ap->a_cred));
+}
+
+static int
+hammer2_getattr(struct vop_getattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ hammer2_inode_t *ip = VTOI(vp);
+ hammer2_pfs_t *pmp = ip->pmp;
+
+ vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = ip->meta.inum;
+ vap->va_mode = ip->meta.mode;
+ vap->va_nlink = ip->meta.nlinks;
+ vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
+ vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
+ vap->va_rdev = NODEV;
+ vap->va_size = ip->meta.size;
+ vap->va_flags = ip->meta.uflags;
+ hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
+ hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
+ hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
+ vap->va_gen = 1;
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) {
+ /*
+ * Can't really calculate directory use sans the files under
+ * it, just assume one block for now.
+ */
+ vap->va_bytes = HAMMER2_INODE_BYTES;
+ } else {
+ vap->va_bytes = hammer2_inode_data_count(ip);
+ }
+ vap->va_type = hammer2_get_vtype(ip->meta.type);
+ vap->va_filerev = 0;
+
+ return (0);
+}
+
+static int
+hammer2_setattr(struct vop_setattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
+ return (EROFS);
+
+ if (vap->va_size != (u_quad_t)VNOVAL) {
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ case VREG:
+ return (EROFS);
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ case VNON:
+ case VBAD:
+ case VMARKER:
+ return (0);
+ }
+ }
+ return (0);
+}
+
+static int
+hammer2_write_dirent(struct uio *uio, ino_t d_fileno, uint8_t d_type,
+ uint16_t d_namlen, const char *d_name, int *errorp)
+{
+ struct dirent dirent;
+ size_t reclen;
+
+ reclen = _GENERIC_DIRLEN(d_namlen);
+ if (reclen > uio->uio_resid)
+ return (1); /* uio has no space left, end this readdir */
+
+ dirent.d_fileno = d_fileno;
+ dirent.d_off = uio->uio_offset + reclen;
+ dirent.d_reclen = reclen;
+ dirent.d_type = d_type;
+ dirent.d_namlen = d_namlen;
+ bcopy(d_name, dirent.d_name, d_namlen);
+ dirent_terminate(&dirent);
+
+ *errorp = uiomove(&dirent, reclen, uio);
+
+ return (0); /* uio has space left */
+}
+
+static int
+hammer2_readdir(struct vop_readdir_args *ap)
+{
+ hammer2_xop_readdir_t *xop;
+ hammer2_inode_t *ip = VTOI(ap->a_vp);
+ const hammer2_inode_data_t *ripdata;
+ hammer2_blockref_t bref;
+ hammer2_tid_t inum;
+ hammer2_key_t lkey;
+ struct uio *uio = ap->a_uio;
+ off_t saveoff = uio->uio_offset;
+ off_t *cookies;
+ int ncookies, r, dtype;
+ int cookie_index = 0, eofflag = 0, error = 0;
+ uint16_t namlen;
+ const char *dname;
+
+ /* Setup cookies directory entry cookies if requested. */
+ if (ap->a_ncookies) {
+ ncookies = uio->uio_resid / 16 + 1;
+ if (ncookies > 1024)
+ ncookies = 1024;
+ cookies = malloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
+ } else {
+ ncookies = -1;
+ cookies = NULL;
+ }
+
+ hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
+
+ /*
+ * Handle artificial entries. To ensure that only positive 64 bit
+ * quantities are returned to userland we always strip off bit 63.
+ * The hash code is designed such that codes 0x0000-0x7FFF are not
+ * used, allowing us to use these codes for articial entries.
+ *
+ * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
+ * allow '..' to cross the mount point into (e.g.) the super-root.
+ */
+ if (saveoff == 0) {
+ inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
+ r = hammer2_write_dirent(uio, inum, DT_DIR, 1, ".", &error);
+ if (r)
+ goto done;
+ if (cookies)
+ cookies[cookie_index] = saveoff;
+ ++saveoff;
+ ++cookie_index;
+ if (cookie_index == ncookies)
+ goto done;
+ }
+
+ if (saveoff == 1) {
+ inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
+ if (ip != ip->pmp->iroot)
+ inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK;
+ r = hammer2_write_dirent(uio, inum, DT_DIR, 2, "..", &error);
+ if (r)
+ goto done;
+ if (cookies)
+ cookies[cookie_index] = saveoff;
+ ++saveoff;
+ ++cookie_index;
+ if (cookie_index == ncookies)
+ goto done;
+ }
+
+ lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
+ if (error)
+ goto done;
+
+ /* Use XOP for remaining entries. */
+ xop = hammer2_xop_alloc(ip);
+ xop->lkey = lkey;
+ hammer2_xop_start(&xop->head, &hammer2_readdir_desc);
+
+ for (;;) {
+ error = hammer2_xop_collect(&xop->head, 0);
+ error = hammer2_error_to_errno(error);
+ if (error)
+ break;
+ if (cookie_index == ncookies)
+ break;
+ hammer2_cluster_bref(&xop->head.cluster, &bref);
+
+ if (bref.type == HAMMER2_BREF_TYPE_INODE) {
+ ripdata = &hammer2_xop_gdata(&xop->head)->ipdata;
+ dtype = hammer2_get_dtype(ripdata->meta.type);
+ saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
+ r = hammer2_write_dirent(uio,
+ ripdata->meta.inum & HAMMER2_DIRHASH_USERMSK,
+ dtype, ripdata->meta.name_len, ripdata->filename,
+ &error);
+ hammer2_xop_pdata(&xop->head);
+ if (r)
+ break;
+ if (cookies)
+ cookies[cookie_index] = saveoff;
+ ++cookie_index;
+ } else if (bref.type == HAMMER2_BREF_TYPE_DIRENT) {
+ dtype = hammer2_get_dtype(bref.embed.dirent.type);
+ saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
+ namlen = bref.embed.dirent.namlen;
+ if (namlen <= sizeof(bref.check.buf))
+ dname = bref.check.buf;
+ else
+ dname = hammer2_xop_gdata(&xop->head)->buf;
+ r = hammer2_write_dirent(uio, bref.embed.dirent.inum,
+ dtype, namlen, dname, &error);
+ if (namlen > sizeof(bref.check.buf))
+ hammer2_xop_pdata(&xop->head);
+ if (r)
+ break;
+ if (cookies)
+ cookies[cookie_index] = saveoff;
+ ++cookie_index;
+ } else {
+ /* XXX chain error */
+ hprintf("bad blockref type %d\n", bref.type);
+ }
+ }
+ hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+ if (error == ENOENT) {
+ error = 0;
+ eofflag = 1;
+ saveoff = (hammer2_key_t)-1;
+ } else {
+ saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
+ }
+done:
+ hammer2_inode_unlock(ip);
+
+ if (ap->a_eofflag)
+ *ap->a_eofflag = eofflag;
+ uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
+
+ if (error && cookie_index == 0) {
+ if (cookies) {
+ free(cookies, M_TEMP);
+ *ap->a_ncookies = 0;
+ *ap->a_cookies = NULL;
+ }
+ } else {
+ if (cookies) {
+ *ap->a_ncookies = cookie_index;
+ *ap->a_cookies = cookies;
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Perform read operations on a file or symlink given an unlocked
+ * inode and uio.
+ */
+static int
+hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int ioflag)
+{
+ struct vnode *vp = ip->vp;
+ struct buf *bp;
+ hammer2_off_t isize = ip->meta.size;
+ hammer2_key_t lbase;
+ daddr_t lbn;
+ int lblksize, loff, n, seqcount = 0, error = 0;
+
+ if (ioflag)
+ seqcount = ioflag >> IO_SEQSHIFT;
+
+ while (uio->uio_resid > 0 && uio->uio_offset < isize) {
+ lblksize = hammer2_calc_logical(ip, uio->uio_offset, &lbase,
+ NULL);
+ lbn = lbase / lblksize;
+ bp = NULL;
+
+ if ((lbn + 1) * lblksize >= isize)
+ error = bread(ip->vp, lbn, lblksize, NOCRED, &bp);
+ else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
+ error = cluster_read(vp, isize, lbn, lblksize, NOCRED,
+ uio->uio_resid, seqcount, 0, &bp);
+ else
+ error = bread(ip->vp, lbn, lblksize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ bp = NULL;
+ break;
+ }
+
+ loff = (int)(uio->uio_offset - lbase);
+ n = lblksize - loff;
+ if (n > uio->uio_resid)
+ n = uio->uio_resid;
+ if (n > isize - uio->uio_offset)
+ n = (int)(isize - uio->uio_offset);
+ error = uiomove((char *)bp->b_data + loff, n, uio);
+ if (error) {
+ brelse(bp);
+ bp = NULL;
+ break;
+ }
+ vfs_bio_brelse(bp, ioflag);
+ }
+
+ return (error);
+}
+
+static int
+hammer2_readlink(struct vop_readlink_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ return (hammer2_read_file(ip, ap->a_uio, 0));
+}
+
+static int
+hammer2_read(struct vop_read_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ return (hammer2_read_file(ip, ap->a_uio, ap->a_ioflag));
+}
+
+static int
+hammer2_bmap(struct vop_bmap_args *ap)
+{
+ hammer2_xop_bmap_t *xop;
+ hammer2_dev_t *hmp;
+ hammer2_inode_t *ip = VTOI(ap->a_vp);
+ int error;
+
+ hmp = ip->pmp->pfs_hmps[0];
+ if (ap->a_bop != NULL)
+ *ap->a_bop = &hmp->devvp->v_bufobj;
+ if (ap->a_bnp == NULL)
+ return (0);
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0; /* unsupported */
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0; /* unsupported */
+
+ xop = hammer2_xop_alloc(ip);
+ xop->lbn = ap->a_bn;
+ hammer2_xop_start(&xop->head, &hammer2_bmap_desc);
+
+ error = hammer2_xop_collect(&xop->head, 0);
+ error = hammer2_error_to_errno(error);
+ if (error) {
+ /* No physical block assigned. */
+ if (error == ENOENT) {
+ error = 0;
+ if (ap->a_bnp)
+ *ap->a_bnp = -1;
+ }
+ goto done;
+ }
+ if (ap->a_bnp)
+ *ap->a_bnp = xop->pbn;
+done:
+ hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+
+ return (error);
+}
+
+static int
+hammer2_nresolve(struct vop_cachedlookup_args *ap)
+{
+ hammer2_xop_nresolve_t *xop;
+ hammer2_inode_t *ip, *dip;
+ struct vnode *vp, *dvp;
+ struct componentname *cnp = ap->a_cnp;
+ int nameiop = cnp->cn_nameiop;
+ int error;
+ u_int64_t flags = cnp->cn_flags;
+
+ KKASSERT(ap->a_vpp);
+ *ap->a_vpp = NULL;
+
+ dvp = ap->a_dvp;
+ dip = VTOI(dvp);
+ xop = hammer2_xop_alloc(dip);
+
+ hammer2_xop_setname(&xop->head, cnp->cn_nameptr, cnp->cn_namelen);
+
+ hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
+ hammer2_xop_start(&xop->head, &hammer2_nresolve_desc);
+
+ error = hammer2_xop_collect(&xop->head, 0);
+ error = hammer2_error_to_errno(error);
+ if (error)
+ ip = NULL;
+ else
+ ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1);
+ hammer2_inode_unlock(dip);
+
+ if (ip) {
+ error = hammer2_igetv(ip, LK_EXCLUSIVE, &vp);
+ if (error == 0) {
+ *ap->a_vpp = vp;
+ if (flags & MAKEENTRY)
+ cache_enter(dvp, vp, cnp);
+ } else if (error == ENOENT) {
+ if (flags & MAKEENTRY)
+ cache_enter(dvp, NULL, cnp);
+ }
+ hammer2_inode_unlock(ip);
+ } else {
+ if (flags & MAKEENTRY)
+ cache_enter(dvp, NULL, cnp);
+ if ((flags & ISLASTCN) &&
+ (nameiop == CREATE || nameiop == RENAME))
+ error = EROFS;
+ else
+ error = ENOENT;
+ }
+ hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+
+ return (error);
+}
+
+static int
+hammer2_open(struct vop_open_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ vnode_create_vobject(vp, ip->meta.size, ap->a_td);
+
+ return (0);
+}
+
+static int
+hammer2_ioctl(struct vop_ioctl_args *ap)
+{
+ hammer2_inode_t *ip = VTOI(ap->a_vp);
+
+ return (hammer2_ioctl_impl(ip, ap->a_command, ap->a_data, ap->a_fflag,
+ ap->a_cred));
+}
+
+static int
+hammer2_print(struct vop_print_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ hammer2_inode_t *ip = VTOI(vp);
+ hammer2_dev_t *hmp = ip->pmp->pfs_hmps[0];
+
+ vn_printf(hmp->devvp, "\tino %ju", (uintmax_t)ip->meta.inum);
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+ printf("\n");
+
+ return (0);
+}
+
+static int
+hammer2_pathconf(struct vop_pathconf_args *ap)
+{
+ int error = 0;
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = INT_MAX;
+ break;
+ case _PC_NAME_MAX:
+ *ap->a_retval = HAMMER2_INODE_MAXNAME;
+ break;
+ case _PC_PIPE_BUF:
+ if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
+ *ap->a_retval = PIPE_BUF;
+ else
+ error = EINVAL;
+ break;
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ break;
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 0;
+ break;
+ case _PC_MIN_HOLE_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ case _PC_PRIO_IO:
+ *ap->a_retval = 0;
+ break;
+ case _PC_SYNC_IO:
+ *ap->a_retval = 0;
+ break;
+ case _PC_ALLOC_SIZE_MIN:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
+ break;
+ case _PC_FILESIZEBITS:
+ *ap->a_retval = 64;
+ break;
+ case _PC_REC_INCR_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ case _PC_REC_MAX_XFER_SIZE:
+ *ap->a_retval = -1; /* means ``unlimited'' */
+ break;
+ case _PC_REC_MIN_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ case _PC_REC_XFER_ALIGN:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
+ break;
+ case _PC_SYMLINK_MAX:
+ *ap->a_retval = HAMMER2_INODE_MAXNAME;
+ break;
+ default:
+ error = vop_stdpathconf(ap);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+hammer2_vptofh(struct vop_vptofh_args *ap)
+{
+ hammer2_inode_t *ip = VTOI(ap->a_vp);
+ struct fid *fhp;
+
+ KKASSERT(MAXFIDSZ >= 16);
+
+ fhp = (struct fid *)ap->a_fhp;
+ fhp->fid_len = offsetof(struct fid, fid_data[16]);
+ ((hammer2_tid_t *)fhp->fid_data)[0] = ip->meta.inum;
+ ((hammer2_tid_t *)fhp->fid_data)[1] = 0;
+
+ return (0);
+}
+
+static daddr_t
+hammer2_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
+{
+ int lblksize = hammer2_get_logical();
+
+ return (off / lblksize);
+}
+
+static int
+hammer2_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
+{
+ int lblksize = hammer2_get_logical();
+
+ *sz = lblksize;
+
+ return (0);
+}
+
+static int use_buf_pager = 1;
+
+static int
+hammer2_getpages(struct vop_getpages_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ if (use_buf_pager)
+ return (vfs_bio_getpages(vp, ap->a_m, ap->a_count,
+ ap->a_rbehind, ap->a_rahead, hammer2_gbp_getblkno,
+ hammer2_gbp_getblksz));
+
+ KKASSERT(0);
+ /* panic: vnode_pager_generic_getpages: sector size 65536 too large */
+ return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
+ ap->a_rbehind, ap->a_rahead, NULL, NULL));
+}
+
+struct vop_vector hammer2_vnodeops = {
+ .vop_default = &default_vnodeops,
+ .vop_inactive = hammer2_inactive,
+ .vop_reclaim = hammer2_reclaim,
+ .vop_access = hammer2_access,
+ .vop_getattr = hammer2_getattr,
+ .vop_setattr = hammer2_setattr,
+ .vop_readdir = hammer2_readdir,
+ .vop_readlink = hammer2_readlink,
+ .vop_read = hammer2_read,
+ .vop_bmap = hammer2_bmap,
+ .vop_cachedlookup = hammer2_nresolve,
+ .vop_lookup = vfs_cache_lookup,
+ .vop_open = hammer2_open,
+ .vop_ioctl = hammer2_ioctl,
+ .vop_print = hammer2_print,
+ .vop_pathconf = hammer2_pathconf,
+ .vop_vptofh = hammer2_vptofh,
+ .vop_getpages = hammer2_getpages,
+ .vop_strategy = hammer2_strategy,
+};
+VFS_VOP_VECTOR_REGISTER(hammer2_vnodeops);
+
+struct vop_vector hammer2_fifoops = {
+ .vop_default = &fifo_specops,
+ .vop_inactive = hammer2_inactive,
+ .vop_reclaim = hammer2_reclaim,
+ .vop_access = hammer2_access,
+ .vop_getattr = hammer2_getattr,
+ .vop_setattr = hammer2_setattr,
+ .vop_print = hammer2_print,
+ .vop_pathconf = hammer2_pathconf,
+ .vop_vptofh = hammer2_vptofh,
+};
+VFS_VOP_VECTOR_REGISTER(hammer2_fifoops);
diff --git a/sys/fs/hammer2/hammer2_xops.c b/sys/fs/hammer2/hammer2_xops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_xops.c
@@ -0,0 +1,248 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include "hammer2.h"
+
+/*
+ * Backend for hammer2_readdir().
+ */
+void
+hammer2_xop_readdir(hammer2_xop_t *arg, int clindex)
+{
+ hammer2_xop_readdir_t *xop = &arg->xop_readdir;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t lkey, key_next;
+ int error = 0;
+
+ lkey = xop->lkey;
+
+ /*
+ * The inode's chain is the iterator. If we cannot acquire it our
+ * contribution ends here.
+ */
+ parent = hammer2_inode_chain(xop->head.ip1, clindex,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ if (parent == NULL) {
+ hprintf("NULL parent\n");
+ goto done;
+ }
+
+ /*
+ * Directory scan [re]start and loop, the feed inherits the chain's
+ * lock so do not unlock it on the iteration.
+ */
+ chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey, &error,
+ HAMMER2_LOOKUP_SHARED);
+ if (chain == NULL)
+ chain = hammer2_chain_lookup(&parent, &key_next, lkey,
+ HAMMER2_KEY_MAX, &error, HAMMER2_LOOKUP_SHARED);
+ while (chain) {
+ error = hammer2_xop_feed(&xop->head, chain, clindex, 0);
+ if (error)
+ goto break2;
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ HAMMER2_KEY_MAX, &error, HAMMER2_LOOKUP_SHARED);
+ }
+break2:
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+done:
+ hammer2_xop_feed(&xop->head, NULL, clindex, error);
+}
+
+/*
+ * Backend for hammer2_nresolve().
+ */
+void
+hammer2_xop_nresolve(hammer2_xop_t *arg, int clindex)
+{
+ hammer2_xop_nresolve_t *xop = &arg->xop_nresolve;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t lhc, key_next;
+ const char *name;
+ size_t name_len;
+ int error;
+
+ chain = NULL;
+ parent = hammer2_inode_chain(xop->head.ip1, clindex,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ if (parent == NULL) {
+ hprintf("NULL parent\n");
+ error = HAMMER2_ERROR_EIO;
+ goto done;
+ }
+ name = xop->head.name1;
+ name_len = xop->head.name1_len;
+
+ /* Lookup the directory entry. */
+ lhc = hammer2_dirhash(name, name_len);
+ chain = hammer2_chain_lookup(&parent, &key_next, lhc,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error,
+ HAMMER2_LOOKUP_ALWAYS | HAMMER2_LOOKUP_SHARED);
+ while (chain) {
+ if (hammer2_chain_dirent_test(chain, name, name_len))
+ break;
+ chain = hammer2_chain_next(&parent, chain, &key_next, key_next,
+ lhc + HAMMER2_DIRHASH_LOMASK, &error,
+ HAMMER2_LOOKUP_ALWAYS | HAMMER2_LOOKUP_SHARED);
+ }
+
+ /* Locate the target inode for a directory entry. */
+ if (chain && chain->error == 0) {
+ if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
+ lhc = chain->bref.embed.dirent.inum;
+ error = hammer2_chain_inode_find(chain->pmp, lhc,
+ clindex, HAMMER2_LOOKUP_SHARED, &parent, &chain);
+ }
+ } else if (chain && error == 0) {
+ error = chain->error;
+ }
+done:
+ error = hammer2_xop_feed(&xop->head, chain, clindex, error);
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+}
+
+/*
+ * Generic lookup of a specific key.
+ */
+void
+hammer2_xop_lookup(hammer2_xop_t *arg, int clindex)
+{
+ hammer2_xop_lookup_t *xop = &arg->xop_lookup;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t key_next;
+ int error = 0;
+
+ chain = NULL;
+ parent = hammer2_inode_chain(xop->head.ip1, clindex,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ if (parent == NULL) {
+ hprintf("NULL parent\n");
+ error = HAMMER2_ERROR_EIO;
+ goto done;
+ }
+
+ /*
+ * Lookup all possibly conflicting directory entries, the feed
+ * inherits the chain's lock so do not unlock it on the iteration.
+ */
+ chain = hammer2_chain_lookup(&parent, &key_next, xop->lhc, xop->lhc,
+ &error, HAMMER2_LOOKUP_ALWAYS | HAMMER2_LOOKUP_SHARED);
+ if (error == 0) {
+ if (chain)
+ error = chain->error;
+ else
+ error = HAMMER2_ERROR_ENOENT;
+ }
+ hammer2_xop_feed(&xop->head, chain, clindex, error);
+done:
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+}
+
+/*
+ * Backend for hammer2_bmap().
+ */
+void
+hammer2_xop_bmap(hammer2_xop_t *arg, int clindex)
+{
+ hammer2_xop_bmap_t *xop = &arg->xop_bmap;
+ hammer2_inode_t *ip = xop->head.ip1;
+ hammer2_chain_t *chain, *parent;
+ hammer2_key_t lbase, key_dummy;
+ int lblksize, error = 0;
+
+ lblksize = hammer2_get_logical();
+ lbase = (hammer2_key_t)xop->lbn * lblksize;
+ KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
+
+ chain = NULL;
+ parent = hammer2_inode_chain(ip, clindex,
+ HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED);
+ if (parent == NULL) {
+ hprintf("NULL parent\n");
+ error = HAMMER2_ERROR_EIO;
+ goto done;
+ }
+
+ /*
+ * NULL chain isn't necessarily an error.
+ * It could be a zero filled data without physical block assigned.
+ */
+ chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase,
+ &error, HAMMER2_LOOKUP_ALWAYS | HAMMER2_LOOKUP_SHARED);
+ if (error == 0) {
+ if (chain) {
+ error = chain->error;
+ if (error == 0)
+ xop->pbn = (chain->bref.data_off &
+ ~HAMMER2_OFF_MASK_RADIX) / lblksize;
+ } else {
+ error = HAMMER2_ERROR_ENOENT;
+ }
+ }
+done:
+ error = hammer2_xop_feed(&xop->head, chain, clindex, error);
+ if (chain) {
+ hammer2_chain_unlock(chain);
+ hammer2_chain_drop(chain);
+ }
+ if (parent) {
+ hammer2_chain_unlock(parent);
+ hammer2_chain_drop(parent);
+ }
+}
diff --git a/sys/fs/hammer2/hammer2_xxhash.h b/sys/fs/hammer2/hammer2_xxhash.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/hammer2_xxhash.h
@@ -0,0 +1,45 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _FS_HAMMER2_XXHASH_H_
+#define _FS_HAMMER2_XXHASH_H_
+
+#include "xxhash/xxhash.h"
+
+#define XXH_HAMMER2_SEED 0x4d617474446c6c6eLLU
+
+#endif /* !_FS_HAMMER2_XXHASH_H_ */
diff --git a/sys/fs/hammer2/xxhash/xxhash.h b/sys/fs/hammer2/xxhash/xxhash.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/xxhash/xxhash.h
@@ -0,0 +1,278 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Header File
+ Copyright (C) 2012-2016, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* DRAGONFLY ADDITION - allows inclusion in conf/files */
+#define XXH_NAMESPACE h2_
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MumurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+* Definitions
+******************************/
+#if !defined(_KERNEL)
+#include <stddef.h> /* size_t */
+#endif
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+* API modifier
+******************************/
+/*!XXH_PRIVATE_API
+* Transforms all publics symbols within `xxhash.c` into private ones.
+* Methodology :
+* instead of : #include "xxhash.h"
+* do :
+* #define XXH_PRIVATE_API
+* #include "xxhash.c" // note the .c , instead of .h
+* also : don't compile and link xxhash.c separately
+*/
+#ifdef XXH_PRIVATE_API
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
+# endif
+#else
+# define XXH_PUBLIC_API /* do nothing */
+#endif
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c`
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it also includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 6
+#define XXH_VERSION_RELEASE 0
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+* Simple Hash Functions
+******************************/
+typedef unsigned int XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*!
+XXH32() :
+ Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+ The memory between input & input+length must be valid (allocated and read-accessible).
+ "seed" can be used to alter the result predictably.
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+ Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+ "seed" can be used to alter the result predictably.
+ This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+
+
+/* ****************************
+* Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+
+/*! Dynamic allocation of states
+ Compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/*!
+These functions generate the xxHash of an input provided in multiple segments,
+as opposed to provided as a single block.
+
+XXH state must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, hence allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXHnn_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and later on generate some new hashes, by calling again XXHnn_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+
+/* **************************
+* Canonical representation
+****************************/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*! Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* This part contains definition which shall only be used with static linking.
+ The prototypes / types defined here are not guaranteed to remain stable.
+ They could change in a future version, becoming incompatible with a different version of the library */
+
+ struct XXH32_state_s {
+ unsigned long long total_len;
+ unsigned seed;
+ unsigned v1;
+ unsigned v2;
+ unsigned v3;
+ unsigned v4;
+ unsigned mem32[4]; /* buffer defined as U32 for alignment */
+ unsigned memsize;
+ }; /* typedef'd to XXH32_state_t */
+
+ struct XXH64_state_s {
+ unsigned long long total_len;
+ unsigned long long seed;
+ unsigned long long v1;
+ unsigned long long v2;
+ unsigned long long v3;
+ unsigned long long v4;
+ unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
+ unsigned memsize;
+ }; /* typedef'd to XXH64_state_t */
+
+
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */
diff --git a/sys/fs/hammer2/xxhash/xxhash.c b/sys/fs/hammer2/xxhash/xxhash.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/xxhash/xxhash.c
@@ -0,0 +1,863 @@
+/*
+* xxHash - Fast Hash algorithm
+* Copyright (C) 2012-2016, Yann Collet
+*
+* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following disclaimer
+* in the documentation and/or other materials provided with the
+* distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* You can contact the author at :
+* - xxHash homepage: http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+* Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ * It can generate buggy code on targets which do not support unaligned memory accesses.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif defined(__INTEL_COMPILER) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
+# define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+#if defined(_KERNEL)
+#include <sys/types.h>
+#include <sys/systm.h>
+#else
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void XXH_free (void* p) { free(p); }
+/* for memcpy() */
+#include <string.h>
+#endif
+
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# define FORCE_INLINE static __forceinline
+#else
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+# else
+# define FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# include <sys/stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+ U32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+ U64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+# define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+# define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* *************************************
+* Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+ static const int g_one = 1;
+# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/* *************************************
+* Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+
+
+/* *************************************
+* Constants
+***************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 = 668265263U;
+static const U32 PRIME32_5 = 374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 = 1609587929392839161ULL;
+static const U64 PRIME64_4 = 9650029242287828579ULL;
+static const U64 PRIME64_5 = 2870177450012600261ULL;
+
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* ***************************
+* Simple Hash Functions
+*****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+ seed += input * PRIME32_2;
+ seed = XXH_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = seed + PRIME32_1 + PRIME32_2;
+ U32 v2 = seed + PRIME32_2;
+ U32 v3 = seed + 0;
+ U32 v4 = seed - PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+ } while (p<=limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (U32) len;
+
+ while (p+4<=bEnd) {
+ h32 += XXH_get32bits(p) * PRIME32_3;
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h32 += (*p) * PRIME32_5;
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_CREATESTATE_STATIC(state);
+ XXH32_reset(state, seed);
+ XXH32_update(state, input, len);
+ return XXH32_digest(state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+ acc += input * PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+ U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = seed + PRIME64_1 + PRIME64_2;
+ U64 v2 = seed + PRIME64_2;
+ U64 v3 = seed + 0;
+ U64 v4 = seed - PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+ } while (p<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (U64) len;
+
+ while (p+8<=bEnd) {
+ U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+ p+=8;
+ }
+
+ if (p+4<=bEnd) {
+ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_CREATESTATE_STATIC(state);
+ XXH64_reset(state, seed);
+ XXH64_update(state, input, len);
+ return XXH64_digest(state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+/* **************************************************
+* Advanced Hash Functions
+****************************************************/
+
+#if !defined(_KERNEL)
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+#endif
+
+/*** Hash feed ***/
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.seed = seed;
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ memcpy(statePtr, &state, sizeof(state));
+ return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+ XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.seed = seed;
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ memcpy(statePtr, &state, sizeof(state));
+ return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (input==NULL) return XXH_ERROR;
+#endif
+
+ state->total_len += len;
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, bEnd-p);
+ state->memsize = (int)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+ const BYTE * p = (const BYTE*)state->mem32;
+ const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+ U32 h32;
+
+ if (state->total_len >= 16) {
+ h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->seed + PRIME32_5;
+ }
+
+ h32 += (U32) state->total_len;
+
+ while (p+4<=bEnd) {
+ h32 += XXH_readLE32(p, endian) * PRIME32_3;
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h32 += (*p) * PRIME32_5;
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+
+/* **** XXH64 **** */
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (input==NULL) return XXH_ERROR;
+#endif
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, bEnd-p);
+ state->memsize = (int)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+ const BYTE * p = (const BYTE*)state->mem64;
+ const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+ U64 h64;
+
+ if (state->total_len >= 32) {
+ U64 const v1 = state->v1;
+ U64 const v2 = state->v2;
+ U64 const v3 = state->v3;
+ U64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->seed + PRIME64_5;
+ }
+
+ h64 += (U64) state->total_len;
+
+ while (p+8<=bEnd) {
+ U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+ p+=8;
+ }
+
+ if (p+4<=bEnd) {
+ h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/* **************************
+* Canonical representation
+****************************/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib.h b/sys/fs/hammer2/zlib/hammer2_zlib.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib.h
@@ -0,0 +1,551 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+ version 1.2.8, April 28th, 2013
+
+ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ jloup@gzip.org madler@alumni.caltech.edu
+
+
+ The data format used by the zlib library is described by RFCs (Request for
+ Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+ (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+//#include "zconf.h"
+
+#include "hammer2_zlib_zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.8"
+#define ZLIB_VERNUM 0x1280
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 8
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+ The 'zlib' compression library provides in-memory compression and
+ decompression functions, including integrity checks of the uncompressed data.
+ This version of the library supports only one compression method (deflation)
+ but other algorithms will be added later and will have the same stream
+ interface.
+
+ Compression can be done in a single step if the buffers are large enough,
+ or can be done by repeated calls of the compression function. In the latter
+ case, the application must provide more input and/or consume the output
+ (providing more output space) before each call.
+
+ The compressed data format used by default by the in-memory functions is
+ the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+ around a deflate stream, which is itself documented in RFC 1951.
+
+ The library also supports reading and writing files in gzip (.gz) format
+ with an interface similar to that of stdio using the functions that start
+ with "gz". The gzip format is different from the zlib format. gzip is a
+ gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+ This library can optionally read and write gzip streams in memory as well.
+
+ The zlib format was designed to be compact and fast for use in memory
+ and on communications channels. The gzip format was designed for single-
+ file compression on file systems, has a larger header than zlib to maintain
+ directory information, and uses a different, slower check method than zlib.
+
+ The library does not install any signal handler. The decoder checks
+ the consistency of the compressed data, so the library should never crash
+ even in case of corrupted input.
+*/
+
+struct internal_state;
+
+typedef struct z_stream_s {
+ z_const Bytef *next_in; /* next input byte */
+ uInt avail_in; /* number of bytes available at next_in */
+ uLong total_in; /* total number of input bytes read so far */
+
+ Bytef *next_out; /* next output byte should be put there */
+ uInt avail_out; /* remaining free space at next_out */
+ uLong total_out; /* total number of bytes output so far */
+
+ z_const char *msg; /* last error message, NULL if no error */
+ struct internal_state FAR *state; /* not visible by applications */
+
+ int data_type; /* best guess about the data type: binary or text */
+ uLong adler; /* adler32 value of the uncompressed data */
+ uLong reserved; /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+ The application must update next_in and avail_in when avail_in has dropped
+ to zero. It must update next_out and avail_out when avail_out has dropped
+ to zero. The application must initialize zalloc, zfree and opaque before
+ calling the init function. All other fields are set by the compression
+ library and must not be updated by the application.
+
+ The opaque value provided by the application will be passed as the first
+ parameter for calls of zalloc and zfree. This can be useful for custom
+ memory management. The compression library attaches no meaning to the
+ opaque value.
+
+ zalloc must return Z_NULL if there is not enough memory for the object.
+ If zlib is used in a multi-threaded application, zalloc and zfree must be
+ thread safe.
+
+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
+ exactly 65536 bytes, but will not be required to allocate more than this if
+ the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers
+ returned by zalloc for objects of exactly 65536 bytes *must* have their
+ offset normalized to zero. The default allocation function provided by this
+ library ensures this (see zutil.c). To reduce memory requirements and avoid
+ any allocation of 64K objects, at the expense of compression ratio, compile
+ the library with -DMAX_WBITS=14 (see zconf.h).
+
+ The fields total_in and total_out can be used for statistics or progress
+ reports. After compression, total_in holds the total size of the
+ uncompressed data and may be saved for use in the decompressor (particularly
+ if the decompressor wants to decompress everything in a single step).
+*/
+
+ /* constants */
+
+#define Z_NO_FLUSH 0
+#define Z_PARTIAL_FLUSH 1
+#define Z_SYNC_FLUSH 2
+#define Z_FULL_FLUSH 3
+#define Z_FINISH 4
+#define Z_BLOCK 5
+#define Z_TREES 6
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK 0
+#define Z_STREAM_END 1
+#define Z_NEED_DICT 2
+#define Z_ERRNO (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR (-3)
+#define Z_MEM_ERROR (-4)
+#define Z_BUF_ERROR (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION 0
+#define Z_BEST_SPEED 1
+#define Z_BEST_COMPRESSION 9
+#define Z_DEFAULT_COMPRESSION (-1)
+/* compression levels */
+
+#define Z_FILTERED 1
+#define Z_HUFFMAN_ONLY 2
+#define Z_RLE 3
+#define Z_FIXED 4
+#define Z_DEFAULT_STRATEGY 0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY 0
+#define Z_TEXT 1
+#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN 2
+/* Possible values of the data_type field (though see inflate()) */
+
+#define Z_DEFLATED 8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+
+ /* basic functions */
+
+//ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+ If the first character differs, the library code actually used is not
+ compatible with the zlib.h header file used by the application. This check
+ is automatically made by deflateInit and inflateInit.
+ */
+
+int deflateInit(z_streamp strm, int level);
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+ Initializes the internal stream state for compression. The fields
+ zalloc, zfree and opaque must be initialized before by the caller. If
+ zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+ allocation functions.
+
+ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+ 1 gives best speed, 9 gives best compression, 0 gives no compression at all
+ (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION
+ requests a default compromise between speed and compression (currently
+ equivalent to level 6).
+
+ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_STREAM_ERROR if level is not a valid compression level, or
+ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+ with the version assumed by the caller (ZLIB_VERSION). msg is set to null
+ if there is no error message. deflateInit does not perform any compression:
+ this will be done by deflate().
+*/
+
+
+int deflate(z_streamp strm, int flush);
+/*
+ deflate compresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. deflate performs one or both of the
+ following actions:
+
+ - Compress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in and avail_in are updated and
+ processing will resume at this point for the next call of deflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. This action is forced if the parameter flush is non zero.
+ Forcing flush frequently degrades the compression ratio, so this parameter
+ should be set only when necessary (in interactive applications). Some
+ output may be provided even if flush is not set.
+
+ Before the call of deflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating avail_in or avail_out accordingly; avail_out should
+ never be zero before the call. The application can consume the compressed
+ output when it wants, for example when the output buffer is full (avail_out
+ == 0), or after each call of deflate(). If deflate returns Z_OK and with
+ zero avail_out, it must be called again after making room in the output
+ buffer because there might be more output pending.
+
+ Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+ decide how much data to accumulate before producing output, in order to
+ maximize compression.
+
+ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+ flushed to the output buffer and the output is aligned on a byte boundary, so
+ that the decompressor can get all input data available so far. (In
+ particular avail_in is zero after the call if enough output space has been
+ provided before the call.) Flushing may degrade compression for some
+ compression algorithms and so it should be used only when necessary. This
+ completes the current deflate block and follows it with an empty stored block
+ that is three bits plus filler bits to the next byte, followed by four bytes
+ (00 00 ff ff).
+
+ If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+ output buffer, but the output is not aligned to a byte boundary. All of the
+ input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+ This completes the current deflate block and follows it with an empty fixed
+ codes block that is 10 bits long. This assures that enough bytes are output
+ in order for the decompressor to finish the block before the empty fixed code
+ block.
+
+ If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+ for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+ seven bits of the current block are held to be written as the next byte after
+ the next deflate block is completed. In this case, the decompressor may not
+ be provided enough bits at this point in order to complete decompression of
+ the data provided so far to the compressor. It may need to wait for the next
+ block to be emitted. This is for advanced applications that need to control
+ the emission of deflate blocks.
+
+ If flush is set to Z_FULL_FLUSH, all output is flushed as with
+ Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+ restart from this point if previous compressed data has been damaged or if
+ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+ compression.
+
+ If deflate returns with avail_out == 0, this function must be called again
+ with the same value of the flush parameter and more output space (updated
+ avail_out), until the flush is complete (deflate returns with non-zero
+ avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+ avail_out is greater than six to avoid repeated flush markers due to
+ avail_out == 0 on return.
+
+ If the parameter flush is set to Z_FINISH, pending input is processed,
+ pending output is flushed and deflate returns with Z_STREAM_END if there was
+ enough output space; if deflate returns with Z_OK, this function must be
+ called again with Z_FINISH and more output space (updated avail_out) but no
+ more input data, until it returns with Z_STREAM_END or an error. After
+ deflate has returned Z_STREAM_END, the only possible operations on the stream
+ are deflateReset or deflateEnd.
+
+ Z_FINISH can be used immediately after deflateInit if all the compression
+ is to be done in a single step. In this case, avail_out must be at least the
+ value returned by deflateBound (see below). Then deflate is guaranteed to
+ return Z_STREAM_END. If not enough output space is provided, deflate will
+ not return Z_STREAM_END, and it must be called again as described above.
+
+ deflate() sets strm->adler to the adler32 checksum of all input read
+ so far (that is, total_in bytes).
+
+ deflate() may update strm->data_type if it can make a good guess about
+ the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
+ binary. This field is only for information purposes and does not affect the
+ compression algorithm in any manner.
+
+ deflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if all input has been
+ consumed and all output has been produced (only when flush is set to
+ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+ if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible
+ (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
+ fatal, and deflate() can be called again with more input and more output
+ space to continue compressing.
+*/
+
+
+int deflateEnd(z_streamp strm);
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+ stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+ prematurely (some input or output was discarded). In the error case, msg
+ may be set but then points to a static string (which must not be
+ deallocated).
+*/
+
+int inflateInit(z_streamp strm);
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+ Initializes the internal stream state for decompression. The fields
+ next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+ the caller. If next_in is not Z_NULL and avail_in is large enough (the
+ exact value depends on the compression method), inflateInit determines the
+ compression method from the zlib header and allocates all data structures
+ accordingly; otherwise the allocation will be deferred to the first call of
+ inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+ use default allocation functions.
+
+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+ version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+ invalid, such as a null pointer to the structure. msg is set to null if
+ there is no error message. inflateInit does not perform any decompression
+ apart from possibly reading the zlib header if present: actual decompression
+ will be done by inflate(). (So next_in and avail_in may be modified, but
+ next_out and avail_out are unused and unchanged.) The current implementation
+ of inflateInit() does not process any header information -- that is deferred
+ until inflate() is called.
+*/
+
+
+int inflate(z_streamp strm, int flush);
+/*
+ inflate decompresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. inflate performs one or both of the
+ following actions:
+
+ - Decompress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in is updated and processing will
+ resume at this point for the next call of inflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. inflate() provides as much output as possible, until there is
+ no more input data or no more space in the output buffer (see below about
+ the flush parameter).
+
+ Before the call of inflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating the next_* and avail_* values accordingly. The
+ application can consume the uncompressed output when it wants, for example
+ when the output buffer is full (avail_out == 0), or after each call of
+ inflate(). If inflate returns Z_OK and with zero avail_out, it must be
+ called again after making room in the output buffer because there might be
+ more output pending.
+
+ The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+ Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much
+ output as possible to the output buffer. Z_BLOCK requests that inflate()
+ stop if and when it gets to the next deflate block boundary. When decoding
+ the zlib or gzip format, this will cause inflate() to return immediately
+ after the header and before the first block. When doing a raw inflate,
+ inflate() will go ahead and process the first block, and will return when it
+ gets to the end of that block, or when it runs out of data.
+
+ The Z_BLOCK option assists in appending to or combining deflate streams.
+ Also to assist in this, on return inflate() will set strm->data_type to the
+ number of unused bits in the last byte taken from strm->next_in, plus 64 if
+ inflate() is currently decoding the last block in the deflate stream, plus
+ 128 if inflate() returned immediately after decoding an end-of-block code or
+ decoding the complete header up to just before the first byte of the deflate
+ stream. The end-of-block will not be indicated until all of the uncompressed
+ data from that block has been written to strm->next_out. The number of
+ unused bits may in general be greater than seven, except when bit 7 of
+ data_type is set, in which case the number of unused bits will be less than
+ eight. data_type is set as noted here every time inflate() returns for all
+ flush options, and so can be used to determine the amount of currently
+ consumed input in bits.
+
+ The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+ end of each deflate block header is reached, before any actual data in that
+ block is decoded. This allows the caller to determine the length of the
+ deflate block header for later use in random access within a deflate block.
+ 256 is added to the value of strm->data_type when inflate() returns
+ immediately after reaching the end of the deflate block header.
+
+ inflate() should normally be called until it returns Z_STREAM_END or an
+ error. However if all decompression is to be performed in a single step (a
+ single call of inflate), the parameter flush should be set to Z_FINISH. In
+ this case all pending input is processed and all pending output is flushed;
+ avail_out must be large enough to hold all of the uncompressed data for the
+ operation to complete. (The size of the uncompressed data may have been
+ saved by the compressor for this purpose.) The use of Z_FINISH is not
+ required to perform an inflation in one step. However it may be used to
+ inform inflate that a faster approach can be used for the single inflate()
+ call. Z_FINISH also informs inflate to not maintain a sliding window if the
+ stream completes, which reduces inflate's memory footprint. If the stream
+ does not complete, either because not all of the stream is provided or not
+ enough output space is provided, then a sliding window will be allocated and
+ inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+ been used.
+
+ In this implementation, inflate() always flushes as much output as
+ possible to the output buffer, and always uses the faster approach on the
+ first call. So the effects of the flush parameter in this implementation are
+ on the return value of inflate() as noted below, when inflate() returns early
+ when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+ memory for a sliding window when Z_FINISH is used.
+
+ If a preset dictionary is needed after this call (see inflateSetDictionary
+ below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+ chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+ strm->adler to the Adler-32 checksum of all output produced so far (that is,
+ total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+ below. At the end of the stream, inflate() checks that its computed adler32
+ checksum is equal to that saved by the compressor and returns Z_STREAM_END
+ only if the checksum is correct.
+
+ inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+ deflate data. The header type is detected automatically, if requested when
+ initializing with inflateInit2(). Any information contained in the gzip
+ header is not retained, so applications that need that information should
+ instead use raw inflate, see inflateInit2() below, or inflateBack() and
+ perform their own processing of the gzip header and trailer. When processing
+ gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+ producted so far. The CRC-32 is checked against the gzip trailer.
+
+ inflate() returns Z_OK if some progress has been made (more input processed
+ or more output produced), Z_STREAM_END if the end of the compressed data has
+ been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+ corrupted (input stream not conforming to the zlib format or incorrect check
+ value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+ next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory,
+ Z_BUF_ERROR if no progress is possible or if there was not enough room in the
+ output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
+ inflate() can be called again with more input and more output space to
+ continue decompressing. If Z_DATA_ERROR is returned, the application may
+ then call inflateSync() to look for a good compression block if a partial
+ recovery of the data is desired.
+*/
+
+
+int inflateEnd(z_streamp strm);
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+ was inconsistent. In the error case, msg may be set but then points to a
+ static string (which must not be deallocated).
+*/
+
+ /* checksum functions */
+
+/*
+ These functions are not related to compression but are exported
+ anyway because they might be useful in applications using the compression
+ library.
+*/
+
+uLong adler32(uLong adler, const Bytef *buf, uInt len);
+/*
+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+ return the updated checksum. If buf is Z_NULL, this function returns the
+ required initial value for the checksum.
+
+ An Adler-32 checksum is almost as reliable as a crc32_zlib but can be computed
+ much faster.
+
+ Usage example:
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ adler = adler32(adler, buffer, length);
+ }
+ if (adler != original_adler) error();
+*/
+
+ /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+int deflateInit_(z_streamp strm, int level,
+ const char *version, int stream_size);
+int inflateInit_(z_streamp strm,
+ const char *version, int stream_size);
+
+#define deflateInit(strm, level) \
+ deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit(strm) \
+ inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+ (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+ inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+ (int)sizeof(z_stream))
+
+/* hack for buggy compilers */
+#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
+ struct internal_state {int dummy;};
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_adler32.c b/sys/fs/hammer2/zlib/hammer2_zlib_adler32.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_adler32.c
@@ -0,0 +1,175 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "hammer2_zlib_zutil.h"
+
+#define local static
+
+//local uLong adler32_combine_ (uLong adler1, uLong adler2, z_off64_t len2);
+
+#define BASE 65521 /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+
+/* use NO_DIVIDE if your processor does not do division in hardware --
+ try it both ways to see which is faster */
+#ifdef NO_DIVIDE
+/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
+ (thank you to John Reiser for pointing this out) */
+# define CHOP(a) \
+ do { \
+ unsigned long tmp = a >> 16; \
+ a &= 0xffffUL; \
+ a += (tmp << 4) - tmp; \
+ } while (0)
+# define MOD28(a) \
+ do { \
+ CHOP(a); \
+ if (a >= BASE) a -= BASE; \
+ } while (0)
+# define MOD(a) \
+ do { \
+ CHOP(a); \
+ MOD28(a); \
+ } while (0)
+# define MOD63(a) \
+ do { /* this assumes a is not negative */ \
+ z_off64_t tmp = a >> 32; \
+ a &= 0xffffffffL; \
+ a += (tmp << 8) - (tmp << 5) + tmp; \
+ tmp = a >> 16; \
+ a &= 0xffffL; \
+ a += (tmp << 4) - tmp; \
+ tmp = a >> 16; \
+ a &= 0xffffL; \
+ a += (tmp << 4) - tmp; \
+ if (a >= BASE) a -= BASE; \
+ } while (0)
+#else
+# define MOD(a) a %= BASE
+# define MOD28(a) a %= BASE
+# define MOD63(a) a %= BASE
+#endif
+
+local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2);
+uLong adler32_combine(uLong adler1, uLong adler2, z_off_t len2);
+
+/* ========================================================================= */
+uLong
+adler32(uLong adler, const Bytef *buf, uInt len)
+{
+ unsigned long sum2;
+ unsigned n;
+
+ /* split Adler-32 into component sums */
+ sum2 = (adler >> 16) & 0xffff;
+ adler &= 0xffff;
+
+ /* in case user likes doing a byte at a time, keep it fast */
+ if (len == 1) {
+ adler += buf[0];
+ if (adler >= BASE)
+ adler -= BASE;
+ sum2 += adler;
+ if (sum2 >= BASE)
+ sum2 -= BASE;
+ return adler | (sum2 << 16);
+ }
+
+ /* initial Adler-32 value (deferred check for len == 1 speed) */
+ if (buf == Z_NULL)
+ return 1L;
+
+ /* in case short lengths are provided, keep it somewhat fast */
+ if (len < 16) {
+ while (len--) {
+ adler += *buf++;
+ sum2 += adler;
+ }
+ if (adler >= BASE)
+ adler -= BASE;
+ MOD28(sum2); /* only added so many BASE's */
+ return adler | (sum2 << 16);
+ }
+
+ /* do length NMAX blocks -- requires just one modulo operation */
+ while (len >= NMAX) {
+ len -= NMAX;
+ n = NMAX / 16; /* NMAX is divisible by 16 */
+ do {
+ DO16(buf); /* 16 sums unrolled */
+ buf += 16;
+ } while (--n);
+ MOD(adler);
+ MOD(sum2);
+ }
+
+ /* do remaining bytes (less than NMAX, still just one modulo) */
+ if (len) { /* avoid modulos if none remaining */
+ while (len >= 16) {
+ len -= 16;
+ DO16(buf);
+ buf += 16;
+ }
+ while (len--) {
+ adler += *buf++;
+ sum2 += adler;
+ }
+ MOD(adler);
+ MOD(sum2);
+ }
+
+ /* return recombined sums */
+ return adler | (sum2 << 16);
+}
+
+/* ========================================================================= */
+local
+uLong
+adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2)
+{
+ unsigned long sum1;
+ unsigned long sum2;
+ unsigned rem;
+
+ /* for negative len, return invalid adler32 as a clue for debugging */
+ if (len2 < 0)
+ return 0xffffffffUL;
+
+ /* the derivation of this formula is left as an exercise for the reader */
+ MOD63(len2); /* assumes len2 >= 0 */
+ rem = (unsigned)len2;
+ sum1 = adler1 & 0xffff;
+ sum2 = rem * sum1;
+ MOD(sum2);
+ sum1 += (adler2 & 0xffff) + BASE - 1;
+ sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+ if (sum1 >= BASE) sum1 -= BASE;
+ if (sum1 >= BASE) sum1 -= BASE;
+ if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
+ if (sum2 >= BASE) sum2 -= BASE;
+ return sum1 | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong
+adler32_combine(uLong adler1, uLong adler2, z_off_t len2)
+{
+ return adler32_combine_(adler1, adler2, len2);
+}
+
+uLong
+adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2)
+{
+ return adler32_combine_(adler1, adler2, len2);
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_deflate.h b/sys/fs/hammer2/zlib/hammer2_zlib_deflate.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_deflate.h
@@ -0,0 +1,337 @@
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-2012 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef DEFLATE_H
+#define DEFLATE_H
+
+#include "hammer2_zlib_zutil.h"
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS 256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES 30
+/* number of distance codes */
+
+#define BL_CODES 19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define Buf_size 16
+/* size of bit buffer in bi_buf */
+
+#define INIT_STATE 42
+#define EXTRA_STATE 69
+#define NAME_STATE 73
+#define COMMENT_STATE 91
+#define HCRC_STATE 103
+#define BUSY_STATE 113
+#define FINISH_STATE 666
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+ union {
+ ush freq; /* frequency count */
+ ush code; /* bit string */
+ } fc;
+ union {
+ ush dad; /* father node in Huffman tree */
+ ush len; /* length of bit string */
+ } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad dl.dad
+#define Len dl.len
+
+typedef struct static_tree_desc_s static_tree_desc;
+
+typedef struct tree_desc_s {
+ ct_data *dyn_tree; /* the dynamic tree */
+ int max_code; /* largest code with non zero frequency */
+ static_tree_desc *stat_desc; /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct internal_state {
+ z_streamp strm; /* pointer back to this zlib stream */
+ int status; /* as the name implies */
+ Bytef *pending_buf; /* output still pending */
+ ulg pending_buf_size; /* size of pending_buf */
+ Bytef *pending_out; /* next pending byte to output to the stream */
+ uInt pending; /* nb of bytes in the pending buffer */
+ int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
+ uInt gzindex; /* where in extra, name, or comment */
+ Byte method; /* can only be DEFLATED */
+ int last_flush; /* value of flush param for previous deflate call */
+
+ /* used by deflate.c: */
+
+ uInt w_size; /* LZ77 window size (32K by default) */
+ uInt w_bits; /* log2(w_size) (8..16) */
+ uInt w_mask; /* w_size - 1 */
+
+ Bytef *window;
+ /* Sliding window. Input bytes are read into the second half of the window,
+ * and move to the first half later to keep a dictionary of at least wSize
+ * bytes. With this organization, matches are limited to a distance of
+ * wSize-MAX_MATCH bytes, but this ensures that IO is always
+ * performed with a length multiple of the block size. Also, it limits
+ * the window size to 64K, which is quite useful on MSDOS.
+ * To do: use the user input buffer as sliding window.
+ */
+
+ ulg window_size;
+ /* Actual size of window: 2*wSize, except when the user input buffer
+ * is directly used as sliding window.
+ */
+
+ Posf *prev;
+ /* Link to older string with same hash index. To limit the size of this
+ * array to 64K, this link is maintained only for the last 32K strings.
+ * An index in this array is thus a window index modulo 32K.
+ */
+
+ Posf *head; /* Heads of the hash chains or NIL. */
+
+ uInt ins_h; /* hash index of string to be inserted */
+ uInt hash_size; /* number of elements in hash table */
+ uInt hash_bits; /* log2(hash_size) */
+ uInt hash_mask; /* hash_size-1 */
+
+ uInt hash_shift;
+ /* Number of bits by which ins_h must be shifted at each input
+ * step. It must be such that after MIN_MATCH steps, the oldest
+ * byte no longer takes part in the hash key, that is:
+ * hash_shift * MIN_MATCH >= hash_bits
+ */
+
+ long block_start;
+ /* Window position at the beginning of the current output block. Gets
+ * negative when the window is moved backwards.
+ */
+
+ uInt match_length; /* length of best match */
+ IPos prev_match; /* previous match */
+ int match_available; /* set if previous match exists */
+ uInt strstart; /* start of string to insert */
+ uInt match_start; /* start of matching string */
+ uInt lookahead; /* number of valid bytes ahead in window */
+
+ uInt prev_length;
+ /* Length of the best match at previous step. Matches not greater than this
+ * are discarded. This is used in the lazy match evaluation.
+ */
+
+ uInt max_chain_length;
+ /* To speed up deflation, hash chains are never searched beyond this
+ * length. A higher limit improves compression ratio but degrades the
+ * speed.
+ */
+
+ uInt max_lazy_match;
+ /* Attempt to find a better match only when the current match is strictly
+ * smaller than this value. This mechanism is used only for compression
+ * levels >= 4.
+ */
+# define max_insert_length max_lazy_match
+ /* Insert new strings in the hash table only if the match length is not
+ * greater than this length. This saves time but degrades compression.
+ * max_insert_length is used only for compression levels <= 3.
+ */
+
+ int level; /* compression level (1..9) */
+ int strategy; /* favor or force Huffman coding*/
+
+ uInt good_match;
+ /* Use a faster search when the previous match is longer than this */
+
+ int nice_match; /* Stop searching when current match exceeds this */
+
+ /* used by trees.c: */
+ /* Didn't use ct_data typedef below to suppress compiler warning */
+ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
+ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
+
+ struct tree_desc_s l_desc; /* desc. for literal tree */
+ struct tree_desc_s d_desc; /* desc. for distance tree */
+ struct tree_desc_s bl_desc; /* desc. for bit length tree */
+
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
+ int heap_len; /* number of elements in the heap */
+ int heap_max; /* element of largest frequency */
+ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+ * The same heap array is used to build all trees.
+ */
+
+ uch depth[2*L_CODES+1];
+ /* Depth of each subtree used as tie breaker for trees of equal frequency
+ */
+
+ uchf *l_buf; /* buffer for literals or lengths */
+
+ uInt lit_bufsize;
+ /* Size of match buffer for literals/lengths. There are 4 reasons for
+ * limiting lit_bufsize to 64K:
+ * - frequencies can be kept in 16 bit counters
+ * - if compression is not successful for the first block, all input
+ * data is still in the window so we can still emit a stored block even
+ * when input comes from standard input. (This can also be done for
+ * all blocks if lit_bufsize is not greater than 32K.)
+ * - if compression is not successful for a file smaller than 64K, we can
+ * even emit a stored file instead of a stored block (saving 5 bytes).
+ * This is applicable only for zip (not gzip or zlib).
+ * - creating new Huffman trees less frequently may not provide fast
+ * adaptation to changes in the input data statistics. (Take for
+ * example a binary file with poorly compressible code followed by
+ * a highly compressible string table.) Smaller buffer sizes give
+ * fast adaptation but have of course the overhead of transmitting
+ * trees more frequently.
+ * - I can't count above 4
+ */
+
+ uInt last_lit; /* running index in l_buf */
+
+ ushf *d_buf;
+ /* Buffer for distances. To simplify the code, d_buf and l_buf have
+ * the same number of elements. To use different lengths, an extra flag
+ * array would be necessary.
+ */
+
+ ulg opt_len; /* bit length of current block with optimal trees */
+ ulg static_len; /* bit length of current block with static trees */
+ uInt matches; /* number of string matches in current block */
+ uInt insert; /* bytes at end of window left to insert */
+
+#ifdef H2_ZLIB_DEBUG
+ ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+ ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
+#endif
+
+ ush bi_buf;
+ /* Output buffer. bits are inserted starting at the bottom (least
+ * significant bits).
+ */
+ int bi_valid;
+ /* Number of valid bits in bi_buf. All bits above the last valid bit
+ * are always zero.
+ */
+
+ ulg high_water;
+ /* High water mark offset in window for initialized bytes -- bytes above
+ * this are set to zero in order to avoid memory check warnings when
+ * longest match routines access bytes past the input. This is then
+ * updated to the new high water mark.
+ */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+#define WIN_INIT MAX_MATCH
+/* Number of bytes after end of data in window to initialize in order to avoid
+ memory checker errors from longest match routines */
+
+ /* in trees.c */
+void ZLIB_INTERNAL _tr_init(deflate_state *s);
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+ ulg stored_len, int last);
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
+void ZLIB_INTERNAL _tr_align(deflate_state *s);
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+ ulg stored_len, int last);
+
+#define d_code(dist) \
+ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
+
+#ifndef H2_ZLIB_DEBUG
+/* Inline versions of _tr_tally for speed: */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+ extern uch ZLIB_INTERNAL _length_code[];
+ extern uch ZLIB_INTERNAL _dist_code[];
+#else
+ extern const uch ZLIB_INTERNAL _length_code[];
+ extern const uch ZLIB_INTERNAL _dist_code[];
+#endif
+
+# define _tr_tally_lit(s, c, flush) \
+ { uch cc = (c); \
+ s->d_buf[s->last_lit] = 0; \
+ s->l_buf[s->last_lit++] = cc; \
+ s->dyn_ltree[cc].Freq++; \
+ flush = (s->last_lit == s->lit_bufsize-1); \
+ }
+# define _tr_tally_dist(s, distance, length, flush) \
+ { uch len = (length); \
+ ush dist = (distance); \
+ s->d_buf[s->last_lit] = dist; \
+ s->l_buf[s->last_lit++] = len; \
+ dist--; \
+ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+ s->dyn_dtree[d_code(dist)].Freq++; \
+ flush = (s->last_lit == s->lit_bufsize-1); \
+ }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+ flush = _tr_tally(s, distance, length)
+#endif
+
+#endif /* DEFLATE_H */
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_deflate.c b/sys/fs/hammer2/zlib/hammer2_zlib_deflate.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_deflate.c
@@ -0,0 +1,1210 @@
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process depends on being able to identify portions
+ * of the input text which are identical to earlier input (within a
+ * sliding window trailing behind the input currently being processed).
+ *
+ * The most straightforward technique turns out to be the fastest for
+ * most input files: try all possible matches and select the longest.
+ * The key feature of this algorithm is that insertions into the string
+ * dictionary are very simple and thus fast, and deletions are avoided
+ * completely. Insertions are performed at each input character, whereas
+ * string matches are performed only when the previous match ends. So it
+ * is preferable to spend more time in matches to allow very fast string
+ * insertions and avoid deletions. The matching algorithm for small
+ * strings is inspired from that of Rabin & Karp. A brute force approach
+ * is used to find longer strings when a small match has been found.
+ * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ * (by Leonid Broukhis).
+ * A previous version of this file used a more sophisticated algorithm
+ * (by Fiala and Greene) which is guaranteed to run in linear amortized
+ * time, but has a larger average cost, uses more memory and is patented.
+ * However the F&G algorithm may be faster for some highly redundant
+ * files if the parameter max_chain_length (described below) is too large.
+ *
+ * ACKNOWLEDGEMENTS
+ *
+ * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ * I found it in 'freeze' written by Leonid Broukhis.
+ * Thanks to many people for bug reports and testing.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ * Available in http://tools.ietf.org/html/rfc1951
+ *
+ * A description of the Rabin and Karp algorithm is given in the book
+ * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ * Fiala,E.R., and Greene,D.H.
+ * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* @(#) $Id$ */
+
+#include "hammer2_zlib_deflate.h"
+#include "../hammer2.h"
+#include <sys/malloc.h> //for malloc macros
+
+MALLOC_DECLARE(C_ZLIB_BUFFER_DEFLATE);
+MALLOC_DEFINE(C_ZLIB_BUFFER_DEFLATE, "compzlibbufferdeflate",
+ "A private buffer used by zlib library for deflate function.");
+
+const char deflate_copyright[] =
+ " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler ";
+/*
+ If you use the zlib library in a product, an acknowledgment is welcome
+ in the documentation of your product. If for some reason you cannot
+ include such an acknowledgment, I would appreciate that you keep this
+ copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ * Function prototypes.
+ */
+typedef enum {
+ need_more, /* block not completed, need more input or more output */
+ block_done, /* block flush performed */
+ finish_started, /* finish started, need only more output at next deflate */
+ finish_done /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func)(deflate_state *s, int flush);
+/* Compression function. Returns the block state after the call. */
+
+local void fill_window (deflate_state *s);
+#ifndef FASTEST
+local block_state deflate_slow(deflate_state *s, int flush);
+#endif
+local block_state deflate_rle(deflate_state *s, int flush);
+local block_state deflate_huff(deflate_state *s, int flush);
+local void lm_init(deflate_state *s);
+local void putShortMSB(deflate_state *s, uInt b);
+local void flush_pending(z_streamp strm);
+local int read_buf(z_streamp strm, Bytef *buf, unsigned size);
+#ifdef ASMV
+ void match_init(void); /* asm code initialization */
+ uInt longest_match(deflate_state *s, IPos cur_match);
+#else
+local uInt longest_match(deflate_state *s, IPos cur_match);
+#endif
+
+#ifdef H2_ZLIB_DEBUG
+local void check_match(deflate_state *s, IPos start, IPos match,
+ int length);
+#endif
+
+int deflateInit2_(z_streamp strm, int level, int method, int windowBits,
+ int memLevel, int strategy, const char *version,
+ int stream_size);
+int deflateReset (z_streamp strm);
+int deflateResetKeep (z_streamp strm);
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+# define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+ ush good_length; /* reduce lazy search above this match length */
+ ush max_lazy; /* do not perform lazy search above this match length */
+ ush nice_length; /* quit search above this match length */
+ ush max_chain;
+ compress_func func;
+} config;
+
+local const config configuration_table[10] = {
+/* good lazy nice chain */
+/* 0 */ {0, 0, 0, 0, deflate_slow/*deflate_stored*/}, /* store only */
+/* 1 */ {4, 4, 8, 4, deflate_slow/*deflate_fast*/}, /* max speed, no lazy matches */
+/* 2 */ {4, 5, 16, 8, deflate_slow/*deflate_fast*/},
+/* 3 */ {4, 6, 32, 32, deflate_slow/*deflate_fast*/},
+
+/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
+/* 5 */ {8, 16, 32, 32, deflate_slow},
+/* 6 */ {8, 16, 128, 128, deflate_slow},
+/* 7 */ {8, 32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+#ifndef NO_DUMMY_DECL
+struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
+#endif
+
+/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
+#define RANK(f) (((f) << 1) - ((f) > 4 ? 9 : 0))
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN assertion: all calls to to UPDATE_HASH are made with consecutive
+ * input characters, so that a running hash key can be computed from the
+ * previous key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN assertion: all calls to to INSERT_STRING are made with consecutive
+ * input characters and the first MIN_MATCH bytes of str are valid
+ * (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#define INSERT_STRING(s, str, match_head) \
+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+ match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
+ s->head[s->ins_h] = (Pos)(str))
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+ s->head[s->hash_size-1] = NIL; \
+ zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ========================================================================= */
+int
+deflateInit_(z_streamp strm, int level, const char *version, int stream_size)
+{
+ return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY, version, stream_size);
+ /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int
+deflateInit2_(z_streamp strm, int level, int method, int windowBits,
+ int memLevel, int strategy, const char *version, int stream_size)
+{
+ deflate_state *s;
+ int wrap = 1;
+ static const char my_version[] = ZLIB_VERSION;
+
+ ushf *overlay;
+ /* We overlay pending_buf and d_buf+l_buf. This works since the average
+ * output size for (length,distance) codes is <= 24 bits.
+ */
+
+ if (version == Z_NULL || version[0] != my_version[0] ||
+ stream_size != sizeof(z_stream)) {
+ return Z_VERSION_ERROR;
+ }
+ if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+ strm->msg = Z_NULL;
+
+ if (level == Z_DEFAULT_COMPRESSION) level = 6;
+
+ if (windowBits < 0) { /* suppress zlib wrapper */
+ wrap = 0;
+ windowBits = -windowBits;
+ }
+ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+ windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
+ strategy < 0 || strategy > Z_FIXED) {
+ return Z_STREAM_ERROR;
+ }
+ if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */
+ s = (deflate_state *) malloc(sizeof(*s), C_ZLIB_BUFFER_DEFLATE, M_WAITOK);
+ if (s == Z_NULL) return Z_MEM_ERROR;
+ strm->state = (struct internal_state FAR *)s;
+ s->strm = strm;
+
+ s->wrap = wrap;
+ s->w_bits = windowBits;
+ s->w_size = 1 << s->w_bits;
+ s->w_mask = s->w_size - 1;
+
+ s->hash_bits = memLevel + 7;
+ s->hash_size = 1 << s->hash_bits;
+ s->hash_mask = s->hash_size - 1;
+ s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+ s->window = (Bytef *) malloc((s->w_size)*2*sizeof(Byte), C_ZLIB_BUFFER_DEFLATE, M_WAITOK);
+ s->prev = (Posf *) malloc((s->w_size)*sizeof(Pos), C_ZLIB_BUFFER_DEFLATE, M_WAITOK);
+ s->head = (Posf *) malloc((s->hash_size)*sizeof(Pos), C_ZLIB_BUFFER_DEFLATE, M_WAITOK);
+
+ s->high_water = 0; /* nothing written to s->window yet */
+
+ s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+ overlay = (ushf *) malloc((s->lit_bufsize)*(sizeof(ush)+2), C_ZLIB_BUFFER_DEFLATE, M_WAITOK);
+ s->pending_buf = (uchf *) overlay;
+ s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+ if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+ s->pending_buf == Z_NULL) {
+ s->status = FINISH_STATE;
+ strm->msg = ERR_MSG(Z_MEM_ERROR);
+ deflateEnd (strm);
+ return Z_MEM_ERROR;
+ }
+ s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+ s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+ s->level = level;
+ s->strategy = strategy;
+ s->method = (Byte)method;
+
+ return deflateReset(strm);
+}
+
+/* ========================================================================= */
+int
+deflateResetKeep (z_streamp strm)
+{
+ deflate_state *s;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) {
+ return Z_STREAM_ERROR;
+ }
+
+ strm->total_in = strm->total_out = 0;
+ strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+ strm->data_type = Z_UNKNOWN;
+
+ s = (deflate_state *)strm->state;
+ s->pending = 0;
+ s->pending_out = s->pending_buf;
+
+ if (s->wrap < 0) {
+ s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
+ }
+ s->status = s->wrap ? INIT_STATE : BUSY_STATE;
+ strm->adler = adler32(0L, Z_NULL, 0);
+ s->last_flush = Z_NO_FLUSH;
+
+ _tr_init(s);
+
+ return Z_OK;
+}
+
+/* ========================================================================= */
+int
+deflateReset (z_streamp strm)
+{
+ int ret;
+
+ ret = deflateResetKeep(strm);
+ if (ret == Z_OK)
+ lm_init(strm->state);
+ return ret;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local
+void
+putShortMSB (deflate_state *s, uInt b)
+{
+ put_byte(s, (Byte)(b >> 8));
+ put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output goes
+ * through this function so some applications may wish to modify it
+ * to avoid allocating a large strm->next_out buffer and copying into it.
+ * (See also read_buf()).
+ */
+local
+void
+flush_pending(z_streamp strm)
+{
+ unsigned len;
+ deflate_state *s = strm->state;
+
+ _tr_flush_bits(s);
+ len = s->pending;
+ if (len > strm->avail_out) len = strm->avail_out;
+ if (len == 0) return;
+
+ zmemcpy(strm->next_out, s->pending_out, len);
+ strm->next_out += len;
+ s->pending_out += len;
+ strm->total_out += len;
+ strm->avail_out -= len;
+ s->pending -= len;
+ if (s->pending == 0) {
+ s->pending_out = s->pending_buf;
+ }
+}
+
+/* ========================================================================= */
+int
+deflate (z_streamp strm, int flush)
+{
+ int old_flush; /* value of flush param for previous deflate call */
+ deflate_state *s;
+
+ if (strm == Z_NULL || strm->state == Z_NULL ||
+ flush > Z_BLOCK || flush < 0) {
+ return Z_STREAM_ERROR;
+ }
+ s = strm->state;
+
+ if (strm->next_out == Z_NULL ||
+ (strm->next_in == Z_NULL && strm->avail_in != 0) ||
+ (s->status == FINISH_STATE && flush != Z_FINISH)) {
+ ERR_RETURN(strm, Z_STREAM_ERROR);
+ }
+ if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+ s->strm = strm; /* just in case */
+ old_flush = s->last_flush;
+ s->last_flush = flush;
+
+ /* Write the header */
+ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+ uInt level_flags;
+
+ if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+ level_flags = 0;
+ else if (s->level < 6)
+ level_flags = 1;
+ else if (s->level == 6)
+ level_flags = 2;
+ else
+ level_flags = 3;
+ header |= (level_flags << 6);
+ if (s->strstart != 0) header |= PRESET_DICT;
+ header += 31 - (header % 31);
+
+ s->status = BUSY_STATE;
+ putShortMSB(s, header);
+
+ /* Save the adler32 of the preset dictionary: */
+ if (s->strstart != 0) {
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+ }
+ strm->adler = adler32(0L, Z_NULL, 0);
+
+ /* Flush as much pending output as possible */
+ if (s->pending != 0) {
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ /* Since avail_out is 0, deflate will be called again with
+ * more output space, but possibly with both pending and
+ * avail_in equal to zero. There won't be anything to do,
+ * but this is not an error situation so make sure we
+ * return OK instead of BUF_ERROR at next call of deflate:
+ */
+ s->last_flush = -1;
+ return Z_OK;
+ }
+
+ /* Make sure there is something to do and avoid duplicate consecutive
+ * flushes. For repeated and useless calls with Z_FINISH, we keep
+ * returning Z_STREAM_END instead of Z_BUF_ERROR.
+ */
+ } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+ flush != Z_FINISH) {
+ ERR_RETURN(strm, Z_BUF_ERROR);
+ }
+
+ /* User must not provide more input after the first FINISH: */
+ if (s->status == FINISH_STATE && strm->avail_in != 0) {
+ ERR_RETURN(strm, Z_BUF_ERROR);
+ }
+
+ /* Start a new block or continue the current one.
+ */
+ if (strm->avail_in != 0 || s->lookahead != 0 ||
+ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+ block_state bstate;
+
+ bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+ (s->strategy == Z_RLE ? deflate_rle(s, flush) :
+ (*(configuration_table[s->level].func))(s, flush));
+
+ if (bstate == finish_started || bstate == finish_done) {
+ s->status = FINISH_STATE;
+ }
+ if (bstate == need_more || bstate == finish_started) {
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+ }
+ return Z_OK;
+ /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+ * of deflate should use the same flush parameter to make sure
+ * that the flush is complete. So we don't have to output an
+ * empty block here, this will be done at next call. This also
+ * ensures that for a very small output buffer, we emit at most
+ * one empty block.
+ */
+ }
+ if (bstate == block_done) {
+ if (flush == Z_PARTIAL_FLUSH) {
+ _tr_align(s);
+ } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
+ _tr_stored_block(s, (char*)0, 0L, 0);
+ /* For a full flush, this empty block will be recognized
+ * as a special marker by inflate_sync().
+ */
+ if (flush == Z_FULL_FLUSH) {
+ CLEAR_HASH(s); /* forget history */
+ if (s->lookahead == 0) {
+ s->strstart = 0;
+ s->block_start = 0L;
+ s->insert = 0;
+ }
+ }
+ }
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+ return Z_OK;
+ }
+ }
+ }
+ Assert(strm->avail_out > 0, "bug2");
+
+ if (flush != Z_FINISH) return Z_OK;
+ if (s->wrap <= 0) return Z_STREAM_END;
+
+ /* Write the trailer */
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+
+ flush_pending(strm);
+ /* If avail_out is zero, the application will call deflate again
+ * to flush the rest.
+ */
+ if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
+ return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int
+deflateEnd (z_streamp strm)
+{
+ int status;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+
+ status = strm->state->status;
+ if (status != INIT_STATE &&
+ status != EXTRA_STATE &&
+ status != NAME_STATE &&
+ status != COMMENT_STATE &&
+ status != HCRC_STATE &&
+ status != BUSY_STATE &&
+ status != FINISH_STATE) {
+ return Z_STREAM_ERROR;
+ }
+
+ /* Deallocate in reverse order of allocations: */
+ free(strm->state->pending_buf, C_ZLIB_BUFFER_DEFLATE);
+ free(strm->state->head, C_ZLIB_BUFFER_DEFLATE);
+ free(strm->state->prev, C_ZLIB_BUFFER_DEFLATE);
+ free(strm->state->window, C_ZLIB_BUFFER_DEFLATE);
+
+ free(strm->state, C_ZLIB_BUFFER_DEFLATE);
+ strm->state = Z_NULL;
+
+ return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read. All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local
+int
+read_buf(z_streamp strm, Bytef *buf, unsigned size)
+{
+ unsigned len = strm->avail_in;
+
+ if (len > size) len = size;
+ if (len == 0) return 0;
+
+ strm->avail_in -= len;
+
+ zmemcpy(buf, strm->next_in, len);
+ if (strm->state->wrap == 1) {
+ strm->adler = adler32(strm->adler, buf, len);
+ }
+
+ strm->next_in += len;
+ strm->total_in += len;
+
+ return (int)len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local
+void
+lm_init (deflate_state *s)
+{
+ s->window_size = (ulg)2L*s->w_size;
+
+ CLEAR_HASH(s);
+
+ /* Set the default configuration parameters:
+ */
+ s->max_lazy_match = configuration_table[s->level].max_lazy;
+ s->good_match = configuration_table[s->level].good_length;
+ s->nice_match = configuration_table[s->level].nice_length;
+ s->max_chain_length = configuration_table[s->level].max_chain;
+
+ s->strstart = 0;
+ s->block_start = 0L;
+ s->lookahead = 0;
+ s->insert = 0;
+ s->match_length = s->prev_length = MIN_MATCH-1;
+ s->match_available = 0;
+ s->ins_h = 0;
+#ifndef FASTEST
+#ifdef ASMV
+ match_init(); /* initialize the asm code */
+#endif
+#endif
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+local
+uInt
+longest_match(deflate_state *s, IPos cur_match) /* cur_match = current match */
+{
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
+ register Bytef *scan = s->window + s->strstart; /* current string */
+ register Bytef *match; /* matched string */
+ register int len; /* length of current match */
+ int best_len = s->prev_length; /* best match length so far */
+ int nice_match = s->nice_match; /* stop if match long enough */
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ s->strstart - (IPos)MAX_DIST(s) : NIL;
+ /* Stop when cur_match becomes <= limit. To simplify the code,
+ * we prevent matches with the string of window index 0.
+ */
+ Posf *prev = s->prev;
+ uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+ /* Compare two bytes at a time. Note: this is not always beneficial.
+ * Try with and without -DUNALIGNED_OK to check.
+ */
+ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+ register ush scan_start = *(ushf*)scan;
+ register ush scan_end = *(ushf*)(scan+best_len-1);
+#else
+ register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+ register Byte scan_end1 = scan[best_len-1];
+ register Byte scan_end = scan[best_len];
+#endif
+
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+ * It is easy to get rid of this optimization if necessary.
+ */
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+ /* Do not waste too much time if we already have a good match: */
+ if (s->prev_length >= s->good_match) {
+ chain_length >>= 2;
+ }
+ /* Do not look for matches beyond the end of the input. This is necessary
+ * to make deflate deterministic.
+ */
+ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+ do {
+ Assert(cur_match < s->strstart, "no future");
+ match = s->window + cur_match;
+
+ /* Skip to next match if the match length cannot increase
+ * or if the match length is less than 2. Note that the checks below
+ * for insufficient lookahead only occur occasionally for performance
+ * reasons. Therefore uninitialized memory will be accessed, and
+ * conditional jumps will be made that depend on those values.
+ * However the length of the match is limited to the lookahead, so
+ * the output of deflate is not affected by the uninitialized values.
+ */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+ /* This code assumes sizeof(unsigned short) == 2. Do not use
+ * UNALIGNED_OK if your compiler uses a different size.
+ */
+ if (*(ushf*)(match+best_len-1) != scan_end ||
+ *(ushf*)match != scan_start) continue;
+
+ /* It is not necessary to compare scan[2] and match[2] since they are
+ * always equal when the other bytes match, given that the hash keys
+ * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+ * strstart+3, +5, ... up to strstart+257. We check for insufficient
+ * lookahead only every 4th comparison; the 128th check will be made
+ * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+ * necessary to put more guard bytes at the end of the window, or
+ * to check more often for insufficient lookahead.
+ */
+ Assert(scan[2] == match[2], "scan[2]?");
+ scan++, match++;
+ do {
+ } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+ scan < strend);
+ /* The funny "do {}" generates better code on most compilers */
+
+ /* Here, scan <= window+strstart+257 */
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+ if (*scan == *match) scan++;
+
+ len = (MAX_MATCH - 1) - (int)(strend-scan);
+ scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+ if (match[best_len] != scan_end ||
+ match[best_len-1] != scan_end1 ||
+ *match != *scan ||
+ *++match != scan[1]) continue;
+
+ /* The check at best_len-1 can be removed because it will be made
+ * again later. (This heuristic is not always a win.)
+ * It is not necessary to compare scan[2] and match[2] since they
+ * are always equal when the other bytes match, given that
+ * the hash keys are equal and that HASH_BITS >= 8.
+ */
+ scan += 2, match++;
+ Assert(*scan == *match, "match[2]?");
+
+ /* We check for insufficient lookahead only every 8th comparison;
+ * the 256th check will be made at strstart+258.
+ */
+ do {
+ } while (*++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ scan < strend);
+
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+ len = MAX_MATCH - (int)(strend - scan);
+ scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+ if (len > best_len) {
+ s->match_start = cur_match;
+ best_len = len;
+ if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+ scan_end = *(ushf*)(scan+best_len-1);
+#else
+ scan_end1 = scan[best_len-1];
+ scan_end = scan[best_len];
+#endif
+ }
+ } while ((cur_match = prev[cur_match & wmask]) > limit
+ && --chain_length != 0);
+
+ if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+ return s->lookahead;
+}
+#endif /* ASMV */
+
+#endif /* FASTEST */
+
+#ifdef H2_ZLIB_DEBUG
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local
+void
+check_match(deflate_state *s, IPos start, IPos match, int length)
+{
+ /* check that the match is indeed a match */
+ if (zmemcmp(s->window + match,
+ s->window + start, length) != EQUAL) {
+ fprintf(stderr, " start %u, match %u, length %d\n",
+ start, match, length);
+ do {
+ fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+ } while (--length != 0);
+ z_error("invalid match");
+ }
+ if (z_verbose > 1) {
+ fprintf(stderr,"\\[%d,%d]", start-match, length);
+ do { putc(s->window[start++], stderr); } while (--length != 0);
+ }
+}
+#else
+# define check_match(s, start, match, length)
+#endif /* H2_ZLIB_DEBUG */
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ * At least one byte has been read, or avail_in == 0; reads are
+ * performed for at least two bytes (required for the zip translate_eol
+ * option -- not supported here).
+ */
+local
+void
+fill_window(deflate_state *s)
+{
+ register unsigned n, m;
+ register Posf *p;
+ unsigned more; /* Amount of free space at the end of the window. */
+ uInt wsize = s->w_size;
+
+ Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+ do {
+ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+ /* Deal with !@#$% 64K limit: */
+ if (sizeof(int) <= 2) {
+ if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+ more = wsize;
+
+ } else if (more == (unsigned)(-1)) {
+ /* Very unlikely, but possible on 16 bit machine if
+ * strstart == 0 && lookahead == 1 (input done a byte at time)
+ */
+ more--;
+ }
+ }
+
+ /* If the window is almost full and there is insufficient lookahead,
+ * move the upper half to the lower one to make room in the upper half.
+ */
+ if (s->strstart >= wsize+MAX_DIST(s)) {
+
+ zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
+ s->match_start -= wsize;
+ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
+ s->block_start -= (long) wsize;
+
+ /* Slide the hash table (could be avoided with 32 bit values
+ at the expense of memory usage). We slide even when level == 0
+ to keep the hash table consistent if we switch back to level > 0
+ later. (Using level 0 permanently is not an optimal usage of
+ zlib, so we don't care about this pathological case.)
+ */
+ n = s->hash_size;
+ p = &s->head[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ } while (--n);
+
+ n = wsize;
+#ifndef FASTEST
+ p = &s->prev[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ /* If n is not on any hash chain, prev[n] is garbage but
+ * its value will never be used.
+ */
+ } while (--n);
+#endif
+ more += wsize;
+ }
+ if (s->strm->avail_in == 0) break;
+
+ /* If there was no sliding:
+ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+ * more == window_size - lookahead - strstart
+ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+ * => more >= window_size - 2*WSIZE + 2
+ * In the BIG_MEM or MMAP case (not yet supported),
+ * window_size == input_size + MIN_LOOKAHEAD &&
+ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+ * Otherwise, window_size == 2*WSIZE so more >= 2.
+ * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+ */
+ Assert(more >= 2, "more < 2");
+
+ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+ s->lookahead += n;
+
+ /* Initialize the hash value now that we have some input: */
+ if (s->lookahead + s->insert >= MIN_MATCH) {
+ uInt str = s->strstart - s->insert;
+ s->ins_h = s->window[str];
+ UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+ Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+ while (s->insert) {
+ UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+ s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+ s->head[s->ins_h] = (Pos)str;
+ str++;
+ s->insert--;
+ if (s->lookahead + s->insert < MIN_MATCH)
+ break;
+ }
+ }
+ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+ * but this is not important since only literal bytes will be emitted.
+ */
+
+ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+ /* If the WIN_INIT bytes after the end of the current data have never been
+ * written, then zero those bytes in order to avoid memory check reports of
+ * the use of uninitialized (or uninitialised as Julian writes) bytes by
+ * the longest match routines. Update the high water mark for the next
+ * time through here. WIN_INIT is set to MAX_MATCH since the longest match
+ * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+ */
+ if (s->high_water < s->window_size) {
+ ulg curr = s->strstart + (ulg)(s->lookahead);
+ ulg init;
+
+ if (s->high_water < curr) {
+ /* Previous high water mark below current data -- zero WIN_INIT
+ * bytes or up to end of window, whichever is less.
+ */
+ init = s->window_size - curr;
+ if (init > WIN_INIT)
+ init = WIN_INIT;
+ zmemzero(s->window + curr, (unsigned)init);
+ s->high_water = curr + init;
+ }
+ else if (s->high_water < (ulg)curr + WIN_INIT) {
+ /* High water mark at or above current data, but below current data
+ * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+ * to end of window, whichever is less.
+ */
+ init = (ulg)curr + WIN_INIT - s->high_water;
+ if (init > s->window_size - s->high_water)
+ init = s->window_size - s->high_water;
+ zmemzero(s->window + s->high_water, (unsigned)init);
+ s->high_water += init;
+ }
+ }
+
+ Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+ "not enough room for search");
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, last) { \
+ _tr_flush_block(s, (s->block_start >= 0L ? \
+ (charf *)&s->window[(unsigned)s->block_start] : \
+ (charf *)Z_NULL), \
+ (ulg)((long)s->strstart - s->block_start), \
+ (last)); \
+ s->block_start = s->strstart; \
+ flush_pending(s->strm); \
+ Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, last) { \
+ FLUSH_BLOCK_ONLY(s, last); \
+ if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local
+block_state
+deflate_slow(deflate_state *s, int flush)
+{
+ IPos hash_head; /* head of hash chain */
+ int bflush; /* set if current block must be flushed */
+
+ /* Process the input block. */
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ fill_window(s);
+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ hash_head = NIL;
+ if (s->lookahead >= MIN_MATCH) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+
+ /* Find the longest match, discarding those <= prev_length.
+ */
+ s->prev_length = s->match_length, s->prev_match = s->match_start;
+ s->match_length = MIN_MATCH-1;
+
+ if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+ s->strstart - hash_head <= MAX_DIST(s)) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ s->match_length = longest_match (s, hash_head);
+ /* longest_match() sets match_start */
+
+ if (s->match_length <= 5 && (s->strategy == Z_FILTERED
+#if TOO_FAR <= 32767
+ || (s->match_length == MIN_MATCH &&
+ s->strstart - s->match_start > TOO_FAR)
+#endif
+ )) {
+
+ /* If prev_match is also MIN_MATCH, match_start is garbage
+ * but we will ignore the current match anyway.
+ */
+ s->match_length = MIN_MATCH-1;
+ }
+ }
+ /* If there was a match at the previous step and the current
+ * match is not better, output the previous match:
+ */
+ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+ uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+ /* Do not insert strings in hash table beyond this. */
+
+ check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+ _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+ s->prev_length - MIN_MATCH, bflush);
+
+ /* Insert in hash table all strings up to the end of the match.
+ * strstart-1 and strstart are already inserted. If there is not
+ * enough lookahead, the last two strings are not inserted in
+ * the hash table.
+ */
+ s->lookahead -= s->prev_length-1;
+ s->prev_length -= 2;
+ do {
+ if (++s->strstart <= max_insert) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+ } while (--s->prev_length != 0);
+ s->match_available = 0;
+ s->match_length = MIN_MATCH-1;
+ s->strstart++;
+
+ if (bflush) FLUSH_BLOCK(s, 0);
+
+ } else if (s->match_available) {
+ /* If there was no match at the previous position, output a
+ * single literal. If there was a match but the current match
+ * is longer, truncate the previous match to a single literal.
+ */
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+ if (bflush) {
+ FLUSH_BLOCK_ONLY(s, 0);
+ }
+ s->strstart++;
+ s->lookahead--;
+ if (s->strm->avail_out == 0) return need_more;
+ } else {
+ /* There is no previous match to compare with, wait for
+ * the next step to decide.
+ */
+ s->match_available = 1;
+ s->strstart++;
+ s->lookahead--;
+ }
+ }
+ Assert (flush != Z_NO_FLUSH, "no flush?");
+ if (s->match_available) {
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+ s->match_available = 0;
+ }
+ s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+ if (flush == Z_FINISH) {
+ FLUSH_BLOCK(s, 1);
+ return finish_done;
+ }
+ if (s->last_lit)
+ FLUSH_BLOCK(s, 0);
+ return block_done;
+}
+#endif /* FASTEST */
+
+/* ===========================================================================
+ * For Z_RLE, simply look for runs of bytes, generate matches only of distance
+ * one. Do not maintain a hash table. (It will be regenerated if this run of
+ * deflate switches away from Z_RLE.)
+ */
+local
+block_state
+deflate_rle(deflate_state *s, int flush)
+{
+ int bflush; /* set if current block must be flushed */
+ uInt prev; /* byte at distance one to match */
+ Bytef *scan, *strend; /* scan goes up to strend for length of run */
+
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the longest run, plus one for the unrolled loop.
+ */
+ if (s->lookahead <= MAX_MATCH) {
+ fill_window(s);
+ if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* See how many times the previous byte repeats */
+ s->match_length = 0;
+ if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
+ scan = s->window + s->strstart - 1;
+ prev = *scan;
+ if (prev == *++scan && prev == *++scan && prev == *++scan) {
+ strend = s->window + s->strstart + MAX_MATCH;
+ do {
+ } while (prev == *++scan && prev == *++scan &&
+ prev == *++scan && prev == *++scan &&
+ prev == *++scan && prev == *++scan &&
+ prev == *++scan && prev == *++scan &&
+ scan < strend);
+ s->match_length = MAX_MATCH - (int)(strend - scan);
+ if (s->match_length > s->lookahead)
+ s->match_length = s->lookahead;
+ }
+ Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan");
+ }
+
+ /* Emit match if have run of MIN_MATCH or longer, else emit literal */
+ if (s->match_length >= MIN_MATCH) {
+ check_match(s, s->strstart, s->strstart - 1, s->match_length);
+
+ _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
+
+ s->lookahead -= s->match_length;
+ s->strstart += s->match_length;
+ s->match_length = 0;
+ } else {
+ /* No match, output a literal byte */
+ Tracevv((stderr,"%c", s->window[s->strstart]));
+ _tr_tally_lit (s, s->window[s->strstart], bflush);
+ s->lookahead--;
+ s->strstart++;
+ }
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+ s->insert = 0;
+ if (flush == Z_FINISH) {
+ FLUSH_BLOCK(s, 1);
+ return finish_done;
+ }
+ if (s->last_lit)
+ FLUSH_BLOCK(s, 0);
+ return block_done;
+}
+
+/* ===========================================================================
+ * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table.
+ * (It will be regenerated if this run of deflate switches away from Huffman.)
+ */
+local
+block_state
+deflate_huff(deflate_state *s, int flush)
+{
+ int bflush; /* set if current block must be flushed */
+
+ for (;;) {
+ /* Make sure that we have a literal to write. */
+ if (s->lookahead == 0) {
+ fill_window(s);
+ if (s->lookahead == 0) {
+ if (flush == Z_NO_FLUSH)
+ return need_more;
+ break; /* flush the current block */
+ }
+ }
+
+ /* Output a literal byte */
+ s->match_length = 0;
+ Tracevv((stderr,"%c", s->window[s->strstart]));
+ _tr_tally_lit (s, s->window[s->strstart], bflush);
+ s->lookahead--;
+ s->strstart++;
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+ s->insert = 0;
+ if (flush == Z_FINISH) {
+ FLUSH_BLOCK(s, 1);
+ return finish_done;
+ }
+ if (s->last_lit)
+ FLUSH_BLOCK(s, 0);
+ return block_done;
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inffast.h b/sys/fs/hammer2/zlib/hammer2_zlib_inffast.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inffast.h
@@ -0,0 +1,11 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start);
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inffast.c b/sys/fs/hammer2/zlib/hammer2_zlib_inffast.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inffast.c
@@ -0,0 +1,340 @@
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "hammer2_zlib_zutil.h"
+#include "hammer2_zlib_inftrees.h"
+#include "hammer2_zlib_inflate.h"
+#include "hammer2_zlib_inffast.h"
+
+#ifndef ASMINF
+
+/* Allow machine dependent optimization for post-increment or pre-increment.
+ Based on testing to date,
+ Pre-increment preferred for:
+ - PowerPC G3 (Adler)
+ - MIPS R5000 (Randers-Pehrson)
+ Post-increment preferred for:
+ - none
+ No measurable difference:
+ - Pentium III (Anderson)
+ - M68060 (Nikl)
+ */
+#ifdef POSTINC
+# define OFF 0
+# define PUP(a) *(a)++
+#else
+# define OFF 1
+# define PUP(a) *++(a)
+#endif
+
+/*
+ Decode literal, length, and distance codes and write out the resulting
+ literal and match bytes until either not enough input or output is
+ available, an end-of-block is encountered, or a data error is encountered.
+ When large enough input and output buffers are supplied to inflate(), for
+ example, a 16K input buffer and a 64K output buffer, more than 95% of the
+ inflate execution time is spent in this routine.
+
+ Entry assumptions:
+
+ state->mode == LEN
+ strm->avail_in >= 6
+ strm->avail_out >= 258
+ start >= strm->avail_out
+ state->bits < 8
+
+ On return, state->mode is one of:
+
+ LEN -- ran out of enough output space or enough available input
+ TYPE -- reached end of block code, inflate() to interpret next block
+ BAD -- error in block data
+
+ Notes:
+
+ - The maximum input bits used by a length/distance pair is 15 bits for the
+ length code, 5 bits for the length extra, 15 bits for the distance code,
+ and 13 bits for the distance extra. This totals 48 bits, or six bytes.
+ Therefore if strm->avail_in >= 6, then there is enough input to avoid
+ checking for available input while decoding.
+
+ - The maximum bytes that a single length/distance pair can output is 258
+ bytes, which is the maximum length that can be coded. inflate_fast()
+ requires strm->avail_out >= 258 for each loop to avoid checking for
+ output space.
+ */
+void
+ZLIB_INTERNAL
+inflate_fast(z_streamp strm, unsigned start) /* inflate()'s starting value for strm->avail_out */
+{
+ struct inflate_state FAR *state;
+ z_const unsigned char FAR *in; /* local strm->next_in */
+ z_const unsigned char FAR *last; /* have enough input while in < last */
+ unsigned char FAR *out; /* local strm->next_out */
+ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
+ unsigned char FAR *end; /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+ unsigned dmax; /* maximum distance from zlib header */
+#endif
+ unsigned wsize; /* window size or zero if not using window */
+ unsigned whave; /* valid bytes in the window */
+ unsigned wnext; /* window write index */
+ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
+ unsigned long hold; /* local strm->hold */
+ unsigned bits; /* local strm->bits */
+ code const FAR *lcode; /* local strm->lencode */
+ code const FAR *dcode; /* local strm->distcode */
+ unsigned lmask; /* mask for first level of length codes */
+ unsigned dmask; /* mask for first level of distance codes */
+ code here; /* retrieved table entry */
+ unsigned op; /* code bits, operation, extra bits, or */
+ /* window position, window bytes to copy */
+ unsigned len; /* match length, unused bytes */
+ unsigned dist; /* match distance */
+ unsigned char FAR *from; /* where to copy match from */
+
+ /* copy state to local variables */
+ state = (struct inflate_state FAR *)strm->state;
+ in = strm->next_in - OFF;
+ last = in + (strm->avail_in - 5);
+ out = strm->next_out - OFF;
+ beg = out - (start - strm->avail_out);
+ end = out + (strm->avail_out - 257);
+#ifdef INFLATE_STRICT
+ dmax = state->dmax;
+#endif
+ wsize = state->wsize;
+ whave = state->whave;
+ wnext = state->wnext;
+ window = state->window;
+ hold = state->hold;
+ bits = state->bits;
+ lcode = state->lencode;
+ dcode = state->distcode;
+ lmask = (1U << state->lenbits) - 1;
+ dmask = (1U << state->distbits) - 1;
+
+ /* decode literals and length/distances until end-of-block or not enough
+ input data or output space */
+ do {
+ if (bits < 15) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ here = lcode[hold & lmask];
+ dolen:
+ op = (unsigned)(here.bits);
+ hold >>= op;
+ bits -= op;
+ op = (unsigned)(here.op);
+ if (op == 0) { /* literal */
+ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+ "inflate: literal '%c'\n" :
+ "inflate: literal 0x%02x\n", here.val));
+ PUP(out) = (unsigned char)(here.val);
+ }
+ else if (op & 16) { /* length base */
+ len = (unsigned)(here.val);
+ op &= 15; /* number of extra bits */
+ if (op) {
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ len += (unsigned)hold & ((1U << op) - 1);
+ hold >>= op;
+ bits -= op;
+ }
+ Tracevv((stderr, "inflate: length %u\n", len));
+ if (bits < 15) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ here = dcode[hold & dmask];
+ dodist:
+ op = (unsigned)(here.bits);
+ hold >>= op;
+ bits -= op;
+ op = (unsigned)(here.op);
+ if (op & 16) { /* distance base */
+ dist = (unsigned)(here.val);
+ op &= 15; /* number of extra bits */
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ }
+ dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+ if (dist > dmax) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ hold >>= op;
+ bits -= op;
+ Tracevv((stderr, "inflate: distance %u\n", dist));
+ op = (unsigned)(out - beg); /* max distance in output */
+ if (dist > op) { /* see if copy from window */
+ op = dist - op; /* distance back in window */
+ if (op > whave) {
+ if (state->sane) {
+ strm->msg =
+ (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+ if (len <= op - whave) {
+ do {
+ PUP(out) = 0;
+ } while (--len);
+ continue;
+ }
+ len -= op - whave;
+ do {
+ PUP(out) = 0;
+ } while (--op > whave);
+ if (op == 0) {
+ from = out - dist;
+ do {
+ PUP(out) = PUP(from);
+ } while (--len);
+ continue;
+ }
+#endif
+ }
+ from = window - OFF;
+ if (wnext == 0) { /* very common case */
+ from += wsize - op;
+ if (op < len) { /* some from window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ else if (wnext < op) { /* wrap around window */
+ from += wsize + wnext - op;
+ op -= wnext;
+ if (op < len) { /* some from end of window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = window - OFF;
+ if (wnext < len) { /* some from start of window */
+ op = wnext;
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ }
+ else { /* contiguous in window */
+ from += wnext - op;
+ if (op < len) { /* some from window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ while (len > 2) {
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ len -= 3;
+ }
+ if (len) {
+ PUP(out) = PUP(from);
+ if (len > 1)
+ PUP(out) = PUP(from);
+ }
+ }
+ else {
+ from = out - dist; /* copy direct from output */
+ do { /* minimum length is three */
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ len -= 3;
+ } while (len > 2);
+ if (len) {
+ PUP(out) = PUP(from);
+ if (len > 1)
+ PUP(out) = PUP(from);
+ }
+ }
+ }
+ else if ((op & 64) == 0) { /* 2nd level distance code */
+ here = dcode[here.val + (hold & ((1U << op) - 1))];
+ goto dodist;
+ }
+ else {
+ strm->msg = (char *)"invalid distance code";
+ state->mode = BAD;
+ break;
+ }
+ }
+ else if ((op & 64) == 0) { /* 2nd level length code */
+ here = lcode[here.val + (hold & ((1U << op) - 1))];
+ goto dolen;
+ }
+ else if (op & 32) { /* end-of-block */
+ Tracevv((stderr, "inflate: end of block\n"));
+ state->mode = TYPE;
+ break;
+ }
+ else {
+ strm->msg = (char *)"invalid literal/length code";
+ state->mode = BAD;
+ break;
+ }
+ } while (in < last && out < end);
+
+ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+ len = bits >> 3;
+ in -= len;
+ bits -= len << 3;
+ hold &= (1U << bits) - 1;
+
+ /* update state and return */
+ strm->next_in = in + OFF;
+ strm->next_out = out + OFF;
+ strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+ strm->avail_out = (unsigned)(out < end ?
+ 257 + (end - out) : 257 - (out - end));
+ state->hold = hold;
+ state->bits = bits;
+ return;
+}
+
+/*
+ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+ - Using bit fields for code structure
+ - Different op definition to avoid & for extra bits (do & for table bits)
+ - Three separate decoding do-loops for direct, window, and wnext == 0
+ - Special case for distance > 1 copies to do overlapped load and store copy
+ - Explicit branch predictions (based on measured branch probabilities)
+ - Deferring match copy and interspersed it with decoding subsequent codes
+ - Swapping literal/length else
+ - Swapping window/direct else
+ - Larger unrolled copy loops (three is about right)
+ - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inffixed.h b/sys/fs/hammer2/zlib/hammer2_zlib_inffixed.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inffixed.h
@@ -0,0 +1,94 @@
+ /* inffixed.h -- table for decoding fixed codes
+ * Generated automatically by makefixed().
+ */
+
+ /* WARNING: this file should *not* be used by applications.
+ It is part of the implementation of this library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+ static const code lenfix[512] = {
+ {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+ {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+ {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+ {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+ {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+ {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+ {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+ {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+ {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+ {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+ {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+ {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+ {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+ {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+ {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+ {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+ {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+ {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+ {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+ {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+ {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+ {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+ {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+ {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+ {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+ {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+ {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+ {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+ {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+ {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+ {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+ {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+ {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+ {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+ {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+ {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+ {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+ {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+ {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+ {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+ {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+ {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+ {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+ {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+ {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+ {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+ {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+ {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+ {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+ {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+ {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+ {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+ {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+ {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+ {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+ {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+ {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+ {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+ {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+ {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+ {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+ {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+ {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+ {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+ {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+ {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+ {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+ {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+ {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+ {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+ {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+ {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+ {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+ {0,9,255}
+ };
+
+ static const code distfix[32] = {
+ {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+ {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+ {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+ {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+ {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+ {22,5,193},{64,5,0}
+ };
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inflate.h b/sys/fs/hammer2/zlib/hammer2_zlib_inflate.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inflate.h
@@ -0,0 +1,113 @@
+/* inflate.h -- internal inflate state definition
+ * Copyright (C) 1995-2009 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Possible inflate modes between inflate() calls */
+typedef enum {
+ HEAD, /* i: waiting for magic header */
+ FLAGS, /* i: waiting for method and flags (gzip) */
+ TIME, /* i: waiting for modification time (gzip) */
+ OS, /* i: waiting for extra flags and operating system (gzip) */
+ EXLEN, /* i: waiting for extra length (gzip) */
+ EXTRA, /* i: waiting for extra bytes (gzip) */
+ NAME, /* i: waiting for end of file name (gzip) */
+ COMMENT, /* i: waiting for end of comment (gzip) */
+ HCRC, /* i: waiting for header crc (gzip) */
+ DICTID, /* i: waiting for dictionary check value */
+ DICT, /* waiting for inflateSetDictionary() call */
+ TYPE, /* i: waiting for type bits, including last-flag bit */
+ TYPEDO, /* i: same, but skip check to exit inflate on new block */
+ STORED, /* i: waiting for stored size (length and complement) */
+ COPY_, /* i/o: same as COPY below, but only first time in */
+ COPY, /* i/o: waiting for input or output to copy stored block */
+ TABLE, /* i: waiting for dynamic block table lengths */
+ LENLENS, /* i: waiting for code length code lengths */
+ CODELENS, /* i: waiting for length/lit and distance code lengths */
+ LEN_, /* i: same as LEN below, but only first time in */
+ LEN, /* i: waiting for length/lit/eob code */
+ LENEXT, /* i: waiting for length extra bits */
+ DIST, /* i: waiting for distance code */
+ DISTEXT, /* i: waiting for distance extra bits */
+ MATCH, /* o: waiting for output space to copy string */
+ LIT, /* o: waiting for output space to write literal */
+ CHECK, /* i: waiting for 32-bit check value */
+ LENGTH, /* i: waiting for 32-bit length (gzip) */
+ DONE, /* finished check, done -- remain here until reset */
+ BAD, /* got a data error -- remain here until reset */
+ MEM, /* got an inflate() memory error -- remain here until reset */
+ SYNC /* looking for synchronization bytes to restart inflate() */
+} inflate_mode;
+
+/*
+ State transitions between above modes -
+
+ (most modes can go to BAD or MEM on error -- not shown for clarity)
+
+ Process header:
+ HEAD -> (gzip) or (zlib) or (raw)
+ (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
+ HCRC -> TYPE
+ (zlib) -> DICTID or TYPE
+ DICTID -> DICT -> TYPE
+ (raw) -> TYPEDO
+ Read deflate blocks:
+ TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
+ STORED -> COPY_ -> COPY -> TYPE
+ TABLE -> LENLENS -> CODELENS -> LEN_
+ LEN_ -> LEN
+ Read deflate codes in fixed or dynamic block:
+ LEN -> LENEXT or LIT or TYPE
+ LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+ LIT -> LEN
+ Process trailer:
+ CHECK -> LENGTH -> DONE
+ */
+
+/* state maintained between inflate() calls. Approximately 10K bytes. */
+struct inflate_state {
+ inflate_mode mode; /* current inflate mode */
+ int last; /* true if processing last block */
+ int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
+ int havedict; /* true if dictionary provided */
+ int flags; /* gzip header method and flags (0 if zlib) */
+ unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */
+ unsigned long check; /* protected copy of check value */
+ unsigned long total; /* protected copy of output count */
+ /* sliding window */
+ unsigned wbits; /* log base 2 of requested window size */
+ unsigned wsize; /* window size or zero if not using window */
+ unsigned whave; /* valid bytes in the window */
+ unsigned wnext; /* window write index */
+ unsigned char FAR *window; /* allocated sliding window, if needed */
+ /* bit accumulator */
+ unsigned long hold; /* input bit accumulator */
+ unsigned bits; /* number of bits in "in" */
+ /* for string and stored block copying */
+ unsigned length; /* literal or length of data to copy */
+ unsigned offset; /* distance back to copy string from */
+ /* for table and code decoding */
+ unsigned extra; /* extra bits needed */
+ /* fixed and dynamic code tables */
+ code const FAR *lencode; /* starting table for length/literal codes */
+ code const FAR *distcode; /* starting table for distance codes */
+ unsigned lenbits; /* index bits for lencode */
+ unsigned distbits; /* index bits for distcode */
+ /* dynamic table building */
+ unsigned ncode; /* number of code length code lengths */
+ unsigned nlen; /* number of length code lengths */
+ unsigned ndist; /* number of distance code lengths */
+ unsigned have; /* number of code lengths in lens[] */
+ code FAR *next; /* next available space in codes[] */
+ unsigned short lens[320]; /* temporary storage for code lengths */
+ unsigned short work[288]; /* work area for code table building */
+ code codes[ENOUGH]; /* space for code tables */
+ int sane; /* if false, allow invalid distance too far */
+ int back; /* bits back of last unprocessed length/lit */
+ unsigned was; /* initial length of match */
+};
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inflate.c b/sys/fs/hammer2/zlib/hammer2_zlib_inflate.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inflate.c
@@ -0,0 +1,1052 @@
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2012 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * Change history:
+ *
+ * 1.2.beta0 24 Nov 2002
+ * - First version -- complete rewrite of inflate to simplify code, avoid
+ * creation of window when not needed, minimize use of window when it is
+ * needed, make inffast.c even faster, implement gzip decoding, and to
+ * improve code readability and style over the previous zlib inflate code
+ *
+ * 1.2.beta1 25 Nov 2002
+ * - Use pointers for available input and output checking in inffast.c
+ * - Remove input and output counters in inffast.c
+ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
+ * - Remove unnecessary second byte pull from length extra in inffast.c
+ * - Unroll direct copy to three copies per loop in inffast.c
+ *
+ * 1.2.beta2 4 Dec 2002
+ * - Change external routine names to reduce potential conflicts
+ * - Correct filename to inffixed.h for fixed tables in inflate.c
+ * - Make hbuf[] unsigned char to match parameter type in inflate.c
+ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
+ * to avoid negation problem on Alphas (64 bit) in inflate.c
+ *
+ * 1.2.beta3 22 Dec 2002
+ * - Add comments on state->bits assertion in inffast.c
+ * - Add comments on op field in inftrees.h
+ * - Fix bug in reuse of allocated window after inflateReset()
+ * - Remove bit fields--back to byte structure for speed
+ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
+ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
+ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
+ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
+ * - Use local copies of stream next and avail values, as well as local bit
+ * buffer and bit count in inflate()--for speed when inflate_fast() not used
+ *
+ * 1.2.beta4 1 Jan 2003
+ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
+ * - Move a comment on output buffer sizes from inffast.c to inflate.c
+ * - Add comments in inffast.c to introduce the inflate_fast() routine
+ * - Rearrange window copies in inflate_fast() for speed and simplification
+ * - Unroll last copy for window match in inflate_fast()
+ * - Use local copies of window variables in inflate_fast() for speed
+ * - Pull out common wnext == 0 case for speed in inflate_fast()
+ * - Make op and len in inflate_fast() unsigned for consistency
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
+ * - Simplified bad distance check in inflate_fast()
+ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
+ * source file infback.c to provide a call-back interface to inflate for
+ * programs like gzip and unzip -- uses window as output buffer to avoid
+ * window copying
+ *
+ * 1.2.beta5 1 Jan 2003
+ * - Improved inflateBack() interface to allow the caller to provide initial
+ * input in strm.
+ * - Fixed stored blocks bug in inflateBack()
+ *
+ * 1.2.beta6 4 Jan 2003
+ * - Added comments in inffast.c on effectiveness of POSTINC
+ * - Typecasting all around to reduce compiler warnings
+ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
+ * make compilers happy
+ * - Changed type of window in inflateBackInit() to unsigned char *
+ *
+ * 1.2.beta7 27 Jan 2003
+ * - Changed many types to unsigned or unsigned short to avoid warnings
+ * - Added inflateCopy() function
+ *
+ * 1.2.0 9 Mar 2003
+ * - Changed inflateBack() interface to provide separate opaque descriptors
+ * for the in() and out() functions
+ * - Changed inflateBack() argument and in_func typedef to swap the length
+ * and buffer address return values for the input function
+ * - Check next_in and next_out for Z_NULL on entry to inflate()
+ *
+ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
+ */
+
+#include "hammer2_zlib_zutil.h"
+#include "hammer2_zlib_inftrees.h"
+#include "hammer2_zlib_inflate.h"
+#include "hammer2_zlib_inffast.h"
+#include "../hammer2.h"
+#include <sys/malloc.h> //for malloc macros
+
+MALLOC_DECLARE(C_ZLIB_BUFFER_INFLATE);
+MALLOC_DEFINE(C_ZLIB_BUFFER_INFLATE, "compzlibbufferinflate",
+ "A private buffer used by zlib library for inflate function.");
+
+#ifdef MAKEFIXED
+# ifndef BUILDFIXED
+# define BUILDFIXED
+# endif
+#endif
+
+/* function prototypes */
+int inflateResetKeep(z_streamp strm);
+int inflateReset(z_streamp strm);
+int inflateReset2(z_streamp strm, int windowBits);
+int inflateInit2_(z_streamp strm, int windowBits, const char *version,
+ int stream_size);
+int inflatePrime(z_streamp strm, int bits, int value);
+local void fixedtables(struct inflate_state FAR *state);
+local int updatewindow(z_streamp strm, const unsigned char FAR *end,
+ unsigned copy);
+#ifdef BUILDFIXED
+ void makefixed(void);
+#endif
+
+int
+inflateResetKeep(z_streamp strm)
+{
+ struct inflate_state FAR *state;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state FAR *)strm->state;
+ strm->total_in = strm->total_out = state->total = 0;
+ strm->msg = Z_NULL;
+ if (state->wrap) /* to support ill-conceived Java test suite */
+ strm->adler = state->wrap & 1;
+ state->mode = HEAD;
+ state->last = 0;
+ state->havedict = 0;
+ state->dmax = 32768U;
+ state->hold = 0;
+ state->bits = 0;
+ state->lencode = state->distcode = state->next = state->codes;
+ state->sane = 1;
+ state->back = -1;
+ Tracev((stderr, "inflate: reset\n"));
+ return Z_OK;
+}
+
+int
+inflateReset(z_streamp strm)
+{
+ struct inflate_state FAR *state;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state FAR *)strm->state;
+ state->wsize = 0;
+ state->whave = 0;
+ state->wnext = 0;
+ return inflateResetKeep(strm);
+}
+
+int
+inflateReset2(z_streamp strm, int windowBits)
+{
+ int wrap;
+ struct inflate_state FAR *state;
+
+ /* get the state */
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state FAR *)strm->state;
+
+ /* extract wrap request from windowBits parameter */
+ if (windowBits < 0) {
+ wrap = 0;
+ windowBits = -windowBits;
+ }
+ else {
+ wrap = (windowBits >> 4) + 1;
+ }
+
+ /* set number of window bits, free window if different */
+ if (windowBits && (windowBits < 8 || windowBits > 15))
+ return Z_STREAM_ERROR;
+ if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
+ free(state->window, C_ZLIB_BUFFER_INFLATE);
+ state->window = Z_NULL;
+ }
+
+ /* update state and reset the rest of it */
+ state->wrap = wrap;
+ state->wbits = (unsigned)windowBits;
+ return inflateReset(strm);
+}
+
+int
+inflateInit2_(z_streamp strm, int windowBits, const char *version,
+ int stream_size)
+{
+ int ret;
+ struct inflate_state FAR *state;
+
+ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+ stream_size != (int)(sizeof(z_stream)))
+ return Z_VERSION_ERROR;
+ if (strm == Z_NULL) return Z_STREAM_ERROR;
+ strm->msg = Z_NULL; /* in case we return an error */
+ state = (struct inflate_state FAR *) malloc(sizeof(struct inflate_state), C_ZLIB_BUFFER_INFLATE, M_WAITOK);
+ if (state == Z_NULL) return Z_MEM_ERROR;
+ Tracev((stderr, "inflate: allocated\n"));
+ strm->state = (struct internal_state FAR *)state;
+ state->window = Z_NULL;
+ ret = inflateReset2(strm, windowBits);
+ if (ret != Z_OK) {
+ free(state, C_ZLIB_BUFFER_INFLATE);
+ strm->state = Z_NULL;
+ }
+ return ret;
+}
+
+int
+inflateInit_(z_streamp strm, const char *version, int stream_size)
+{
+ return inflateInit2_(strm, DEF_WBITS, version, stream_size);
+}
+
+int
+inflatePrime(z_streamp strm, int bits, int value)
+{
+ struct inflate_state FAR *state;
+
+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state FAR *)strm->state;
+ if (bits < 0) {
+ state->hold = 0;
+ state->bits = 0;
+ return Z_OK;
+ }
+ if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
+ value &= (1L << bits) - 1;
+ state->hold += value << state->bits;
+ state->bits += bits;
+ return Z_OK;
+}
+
+/*
+ Return state with length and distance decoding tables and index sizes set to
+ fixed code decoding. Normally this returns fixed tables from inffixed.h.
+ If BUILDFIXED is defined, then instead this routine builds the tables the
+ first time it's called, and returns those tables the first time and
+ thereafter. This reduces the size of the code by about 2K bytes, in
+ exchange for a little execution time. However, BUILDFIXED should not be
+ used for threaded applications, since the rewriting of the tables and virgin
+ may not be thread-safe.
+ */
+local
+void
+fixedtables(struct inflate_state FAR *state)
+{
+#ifdef BUILDFIXED
+ static int virgin = 1;
+ static code *lenfix, *distfix;
+ static code fixed[544];
+
+ /* build fixed huffman tables if first call (may not be thread safe) */
+ if (virgin) {
+ unsigned sym, bits;
+ static code *next;
+
+ /* literal/length table */
+ sym = 0;
+ while (sym < 144) state->lens[sym++] = 8;
+ while (sym < 256) state->lens[sym++] = 9;
+ while (sym < 280) state->lens[sym++] = 7;
+ while (sym < 288) state->lens[sym++] = 8;
+ next = fixed;
+ lenfix = next;
+ bits = 9;
+ inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+ /* distance table */
+ sym = 0;
+ while (sym < 32) state->lens[sym++] = 5;
+ distfix = next;
+ bits = 5;
+ inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+ /* do this just once */
+ virgin = 0;
+ }
+#else /* !BUILDFIXED */
+# include "hammer2_zlib_inffixed.h"
+#endif /* BUILDFIXED */
+ state->lencode = lenfix;
+ state->lenbits = 9;
+ state->distcode = distfix;
+ state->distbits = 5;
+}
+
+#ifdef MAKEFIXED
+#include <stdio.h>
+
+/*
+ Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
+ defines BUILDFIXED, so the tables are built on the fly. makefixed() writes
+ those tables to stdout, which would be piped to inffixed.h. A small program
+ can simply call makefixed to do this:
+
+ void makefixed(void);
+
+ int main(void)
+ {
+ makefixed();
+ return 0;
+ }
+
+ Then that can be linked with zlib built with MAKEFIXED defined and run:
+
+ a.out > inffixed.h
+ */
+void
+makefixed()
+{
+ unsigned low, size;
+ struct inflate_state state;
+
+ fixedtables(&state);
+ puts(" /* inffixed.h -- table for decoding fixed codes");
+ puts(" * Generated automatically by makefixed().");
+ puts(" */");
+ puts("");
+ puts(" /* WARNING: this file should *not* be used by applications.");
+ puts(" It is part of the implementation of this library and is");
+ puts(" subject to change. Applications should only use zlib.h.");
+ puts(" */");
+ puts("");
+ size = 1U << 9;
+ printf(" static const code lenfix[%u] = {", size);
+ low = 0;
+ for (;;) {
+ if ((low % 7) == 0) printf("\n ");
+ printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
+ state.lencode[low].bits, state.lencode[low].val);
+ if (++low == size) break;
+ putchar(',');
+ }
+ puts("\n };");
+ size = 1U << 5;
+ printf("\n static const code distfix[%u] = {", size);
+ low = 0;
+ for (;;) {
+ if ((low % 6) == 0) printf("\n ");
+ printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
+ state.distcode[low].val);
+ if (++low == size) break;
+ putchar(',');
+ }
+ puts("\n };");
+}
+#endif /* MAKEFIXED */
+
+/*
+ Update the window with the last wsize (normally 32K) bytes written before
+ returning. If window does not exist yet, create it. This is only called
+ when a window is already in use, or when output has been written during this
+ inflate call, but the end of the deflate stream has not been reached yet.
+ It is also called to create a window for dictionary data when a dictionary
+ is loaded.
+
+ Providing output buffers larger than 32K to inflate() should provide a speed
+ advantage, since only the last 32K of output is copied to the sliding window
+ upon return from inflate(), and since all distances after the first 32K of
+ output will fall in the output data, making match copies simpler and faster.
+ The advantage may be dependent on the size of the processor's data caches.
+ */
+local
+int
+updatewindow(z_streamp strm, const Bytef *end, unsigned copy)
+{
+ struct inflate_state FAR *state;
+ unsigned dist;
+
+ state = (struct inflate_state FAR *)strm->state;
+
+ /* if window not in use yet, initialize */
+ if (state->wsize == 0) {
+ state->wsize = 1U << state->wbits;
+ state->wnext = 0;
+ state->whave = 0;
+ }
+
+ /* copy state->wsize or less output bytes into the circular window */
+ if (copy >= state->wsize) {
+ zmemcpy(state->window, end - state->wsize, state->wsize);
+ state->wnext = 0;
+ state->whave = state->wsize;
+ }
+ else {
+ dist = state->wsize - state->wnext;
+ if (dist > copy) dist = copy;
+ zmemcpy(state->window + state->wnext, end - copy, dist);
+ copy -= dist;
+ if (copy) {
+ zmemcpy(state->window, end - copy, copy);
+ state->wnext = copy;
+ state->whave = state->wsize;
+ }
+ else {
+ state->wnext += dist;
+ if (state->wnext == state->wsize) state->wnext = 0;
+ if (state->whave < state->wsize) state->whave += dist;
+ }
+ }
+ return 0;
+}
+
+/* Macros for inflate(): */
+#define UPDATE(check, buf, len) adler32(check, buf, len)
+
+/* Load registers with state in inflate() for speed */
+#define LOAD() \
+ do { \
+ put = strm->next_out; \
+ left = strm->avail_out; \
+ next = strm->next_in; \
+ have = strm->avail_in; \
+ hold = state->hold; \
+ bits = state->bits; \
+ } while (0)
+
+/* Restore state from registers in inflate() */
+#define RESTORE() \
+ do { \
+ strm->next_out = put; \
+ strm->avail_out = left; \
+ strm->next_in = next; \
+ strm->avail_in = have; \
+ state->hold = hold; \
+ state->bits = bits; \
+ } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+ do { \
+ hold = 0; \
+ bits = 0; \
+ } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflate()
+ if there is no input available. */
+#define PULLBYTE() \
+ do { \
+ if (have == 0) goto inf_leave; \
+ have--; \
+ hold += (unsigned long)(*next++) << bits; \
+ bits += 8; \
+ } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator. If there is
+ not enough available input to do that, then return from inflate(). */
+#define NEEDBITS(n) \
+ do { \
+ while (bits < (unsigned)(n)) \
+ PULLBYTE(); \
+ } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+ ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+ do { \
+ hold >>= (n); \
+ bits -= (unsigned)(n); \
+ } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+ do { \
+ hold >>= bits & 7; \
+ bits -= bits & 7; \
+ } while (0)
+
+/*
+ inflate() uses a state machine to process as much input data and generate as
+ much output data as possible before returning. The state machine is
+ structured roughly as follows:
+
+ for (;;) switch (state) {
+ ...
+ case STATEn:
+ if (not enough input data or output space to make progress)
+ return;
+ ... make progress ...
+ state = STATEm;
+ break;
+ ...
+ }
+
+ so when inflate() is called again, the same case is attempted again, and
+ if the appropriate resources are provided, the machine proceeds to the
+ next state. The NEEDBITS() macro is usually the way the state evaluates
+ whether it can proceed or should return. NEEDBITS() does the return if
+ the requested bits are not available. The typical use of the BITS macros
+ is:
+
+ NEEDBITS(n);
+ ... do something with BITS(n) ...
+ DROPBITS(n);
+
+ where NEEDBITS(n) either returns from inflate() if there isn't enough
+ input left to load n bits into the accumulator, or it continues. BITS(n)
+ gives the low n bits in the accumulator. When done, DROPBITS(n) drops
+ the low n bits off the accumulator. INITBITS() clears the accumulator
+ and sets the number of available bits to zero. BYTEBITS() discards just
+ enough bits to put the accumulator on a byte boundary. After BYTEBITS()
+ and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+ NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+ if there is no input available. The decoding of variable length codes uses
+ PULLBYTE() directly in order to pull just enough bytes to decode the next
+ code, and no more.
+
+ Some states loop until they get enough input, making sure that enough
+ state information is maintained to continue the loop where it left off
+ if NEEDBITS() returns in the loop. For example, want, need, and keep
+ would all have to actually be part of the saved state in case NEEDBITS()
+ returns:
+
+ case STATEw:
+ while (want < need) {
+ NEEDBITS(n);
+ keep[want++] = BITS(n);
+ DROPBITS(n);
+ }
+ state = STATEx;
+ case STATEx:
+
+ As shown above, if the next state is also the next case, then the break
+ is omitted.
+
+ A state may also return if there is not enough output space available to
+ complete that state. Those states are copying stored data, writing a
+ literal byte, and copying a matching string.
+
+ When returning, a "goto inf_leave" is used to update the total counters,
+ update the check value, and determine whether any progress has been made
+ during that inflate() call in order to return the proper return code.
+ Progress is defined as a change in either strm->avail_in or strm->avail_out.
+ When there is a window, goto inf_leave will update the window with the last
+ output written. If a goto inf_leave occurs in the middle of decompression
+ and there is no window currently, goto inf_leave will create one and copy
+ output to the window for the next call of inflate().
+
+ In this implementation, the flush parameter of inflate() only affects the
+ return code (per zlib.h). inflate() always writes as much as possible to
+ strm->next_out, given the space available and the provided input--the effect
+ documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
+ the allocation of and copying into a sliding window until necessary, which
+ provides the effect documented in zlib.h for Z_FINISH when the entire input
+ stream available. So the only thing the flush parameter actually does is:
+ when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
+ will return Z_BUF_ERROR if it has not reached the end of the stream.
+ */
+
+int
+inflate(z_streamp strm, int flush)
+{
+ struct inflate_state FAR *state;
+ z_const unsigned char FAR *next; /* next input */
+ unsigned char FAR *put; /* next output */
+ unsigned have, left; /* available input and output */
+ unsigned long hold; /* bit buffer */
+ unsigned bits; /* bits in bit buffer */
+ unsigned in, out; /* save starting available input and output */
+ unsigned copy; /* number of stored or match bytes to copy */
+ unsigned char FAR *from; /* where to copy match bytes from */
+ code here; /* current decoding table entry */
+ code last; /* parent table entry */
+ unsigned len; /* length to copy for repeats, bits to drop */
+ int ret; /* return code */
+
+ static const unsigned short order[19] = /* permutation of code lengths */
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+ if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL ||
+ (strm->next_in == Z_NULL && strm->avail_in != 0))
+ return Z_STREAM_ERROR;
+
+ state = (struct inflate_state FAR *)strm->state;
+ if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
+ LOAD();
+ in = have;
+ out = left;
+ ret = Z_OK;
+ for (;;)
+ switch (state->mode) {
+ case HEAD:
+ if (state->wrap == 0) {
+ state->mode = TYPEDO;
+ break;
+ }
+ NEEDBITS(16);
+ if (((BITS(8) << 8) + (hold >> 8)) % 31) {
+ strm->msg = (char *)"incorrect header check";
+ state->mode = BAD;
+ break;
+ }
+ if (BITS(4) != Z_DEFLATED) {
+ strm->msg = (char *)"unknown compression method";
+ state->mode = BAD;
+ break;
+ }
+ DROPBITS(4);
+ len = BITS(4) + 8;
+ if (state->wbits == 0)
+ state->wbits = len;
+ else if (len > state->wbits) {
+ strm->msg = (char *)"invalid window size";
+ state->mode = BAD;
+ break;
+ }
+ state->dmax = 1U << len;
+ Tracev((stderr, "inflate: zlib header ok\n"));
+ strm->adler = state->check = adler32(0L, Z_NULL, 0);
+ state->mode = hold & 0x200 ? DICTID : TYPE;
+ INITBITS();
+ break;
+ case DICTID:
+ NEEDBITS(32);
+ strm->adler = state->check = ZSWAP32(hold);
+ INITBITS();
+ state->mode = DICT;
+ case DICT:
+ if (state->havedict == 0) {
+ RESTORE();
+ return Z_NEED_DICT;
+ }
+ strm->adler = state->check = adler32(0L, Z_NULL, 0);
+ state->mode = TYPE;
+ case TYPE:
+ if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+ case TYPEDO:
+ if (state->last) {
+ BYTEBITS();
+ state->mode = CHECK;
+ break;
+ }
+ NEEDBITS(3);
+ state->last = BITS(1);
+ DROPBITS(1);
+ switch (BITS(2)) {
+ case 0: /* stored block */
+ Tracev((stderr, "inflate: stored block%s\n",
+ state->last ? " (last)" : ""));
+ state->mode = STORED;
+ break;
+ case 1: /* fixed block */
+ fixedtables(state);
+ Tracev((stderr, "inflate: fixed codes block%s\n",
+ state->last ? " (last)" : ""));
+ state->mode = LEN_; /* decode codes */
+ if (flush == Z_TREES) {
+ DROPBITS(2);
+ goto inf_leave;
+ }
+ break;
+ case 2: /* dynamic block */
+ Tracev((stderr, "inflate: dynamic codes block%s\n",
+ state->last ? " (last)" : ""));
+ state->mode = TABLE;
+ break;
+ case 3:
+ strm->msg = (char *)"invalid block type";
+ state->mode = BAD;
+ }
+ DROPBITS(2);
+ break;
+ case STORED:
+ BYTEBITS(); /* go to byte boundary */
+ NEEDBITS(32);
+ if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+ strm->msg = (char *)"invalid stored block lengths";
+ state->mode = BAD;
+ break;
+ }
+ state->length = (unsigned)hold & 0xffff;
+ Tracev((stderr, "inflate: stored length %u\n",
+ state->length));
+ INITBITS();
+ state->mode = COPY_;
+ if (flush == Z_TREES) goto inf_leave;
+ case COPY_:
+ state->mode = COPY;
+ case COPY:
+ copy = state->length;
+ if (copy) {
+ if (copy > have) copy = have;
+ if (copy > left) copy = left;
+ if (copy == 0) goto inf_leave;
+ zmemcpy(put, next, copy);
+ have -= copy;
+ next += copy;
+ left -= copy;
+ put += copy;
+ state->length -= copy;
+ break;
+ }
+ Tracev((stderr, "inflate: stored end\n"));
+ state->mode = TYPE;
+ break;
+ case TABLE:
+ NEEDBITS(14);
+ state->nlen = BITS(5) + 257;
+ DROPBITS(5);
+ state->ndist = BITS(5) + 1;
+ DROPBITS(5);
+ state->ncode = BITS(4) + 4;
+ DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+ if (state->nlen > 286 || state->ndist > 30) {
+ strm->msg = (char *)"too many length or distance symbols";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ Tracev((stderr, "inflate: table sizes ok\n"));
+ state->have = 0;
+ state->mode = LENLENS;
+ case LENLENS:
+ while (state->have < state->ncode) {
+ NEEDBITS(3);
+ state->lens[order[state->have++]] = (unsigned short)BITS(3);
+ DROPBITS(3);
+ }
+ while (state->have < 19)
+ state->lens[order[state->have++]] = 0;
+ state->next = state->codes;
+ state->lencode = (const code FAR *)(state->next);
+ state->lenbits = 7;
+ ret = inflate_table(CODES, state->lens, 19, &(state->next),
+ &(state->lenbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid code lengths set";
+ state->mode = BAD;
+ break;
+ }
+ Tracev((stderr, "inflate: code lengths ok\n"));
+ state->have = 0;
+ state->mode = CODELENS;
+ case CODELENS:
+ while (state->have < state->nlen + state->ndist) {
+ for (;;) {
+ here = state->lencode[BITS(state->lenbits)];
+ if ((unsigned)(here.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if (here.val < 16) {
+ DROPBITS(here.bits);
+ state->lens[state->have++] = here.val;
+ }
+ else {
+ if (here.val == 16) {
+ NEEDBITS(here.bits + 2);
+ DROPBITS(here.bits);
+ if (state->have == 0) {
+ strm->msg = (char *)"invalid bit length repeat";
+ state->mode = BAD;
+ break;
+ }
+ len = state->lens[state->have - 1];
+ copy = 3 + BITS(2);
+ DROPBITS(2);
+ }
+ else if (here.val == 17) {
+ NEEDBITS(here.bits + 3);
+ DROPBITS(here.bits);
+ len = 0;
+ copy = 3 + BITS(3);
+ DROPBITS(3);
+ }
+ else {
+ NEEDBITS(here.bits + 7);
+ DROPBITS(here.bits);
+ len = 0;
+ copy = 11 + BITS(7);
+ DROPBITS(7);
+ }
+ if (state->have + copy > state->nlen + state->ndist) {
+ strm->msg = (char *)"invalid bit length repeat";
+ state->mode = BAD;
+ break;
+ }
+ while (copy--)
+ state->lens[state->have++] = (unsigned short)len;
+ }
+ }
+
+ /* handle error breaks in while */
+ if (state->mode == BAD) break;
+
+ /* check for end-of-block code (better have one) */
+ if (state->lens[256] == 0) {
+ strm->msg = (char *)"invalid code -- missing end-of-block";
+ state->mode = BAD;
+ break;
+ }
+
+ /* build code tables -- note: do not change the lenbits or distbits
+ values here (9 and 6) without reading the comments in inftrees.h
+ concerning the ENOUGH constants, which depend on those values */
+ state->next = state->codes;
+ state->lencode = (const code FAR *)(state->next);
+ state->lenbits = 9;
+ ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+ &(state->lenbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid literal/lengths set";
+ state->mode = BAD;
+ break;
+ }
+ state->distcode = (const code FAR *)(state->next);
+ state->distbits = 6;
+ ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+ &(state->next), &(state->distbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid distances set";
+ state->mode = BAD;
+ break;
+ }
+ Tracev((stderr, "inflate: codes ok\n"));
+ state->mode = LEN_;
+ if (flush == Z_TREES) goto inf_leave;
+ case LEN_:
+ state->mode = LEN;
+ case LEN:
+ if (have >= 6 && left >= 258) {
+ RESTORE();
+ inflate_fast(strm, out);
+ LOAD();
+ if (state->mode == TYPE)
+ state->back = -1;
+ break;
+ }
+ state->back = 0;
+ for (;;) {
+ here = state->lencode[BITS(state->lenbits)];
+ if ((unsigned)(here.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if (here.op && (here.op & 0xf0) == 0) {
+ last = here;
+ for (;;) {
+ here = state->lencode[last.val +
+ (BITS(last.bits + last.op) >> last.bits)];
+ if ((unsigned)(last.bits + here.bits) <= bits) break;
+ PULLBYTE();
+ }
+ DROPBITS(last.bits);
+ state->back += last.bits;
+ }
+ DROPBITS(here.bits);
+ state->back += here.bits;
+ state->length = (unsigned)here.val;
+ if ((int)(here.op) == 0) {
+ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+ "inflate: literal '%c'\n" :
+ "inflate: literal 0x%02x\n", here.val));
+ state->mode = LIT;
+ break;
+ }
+ if (here.op & 32) {
+ Tracevv((stderr, "inflate: end of block\n"));
+ state->back = -1;
+ state->mode = TYPE;
+ break;
+ }
+ if (here.op & 64) {
+ strm->msg = (char *)"invalid literal/length code";
+ state->mode = BAD;
+ break;
+ }
+ state->extra = (unsigned)(here.op) & 15;
+ state->mode = LENEXT;
+ case LENEXT:
+ if (state->extra) {
+ NEEDBITS(state->extra);
+ state->length += BITS(state->extra);
+ DROPBITS(state->extra);
+ state->back += state->extra;
+ }
+ Tracevv((stderr, "inflate: length %u\n", state->length));
+ state->was = state->length;
+ state->mode = DIST;
+ case DIST:
+ for (;;) {
+ here = state->distcode[BITS(state->distbits)];
+ if ((unsigned)(here.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if ((here.op & 0xf0) == 0) {
+ last = here;
+ for (;;) {
+ here = state->distcode[last.val +
+ (BITS(last.bits + last.op) >> last.bits)];
+ if ((unsigned)(last.bits + here.bits) <= bits) break;
+ PULLBYTE();
+ }
+ DROPBITS(last.bits);
+ state->back += last.bits;
+ }
+ DROPBITS(here.bits);
+ state->back += here.bits;
+ if (here.op & 64) {
+ strm->msg = (char *)"invalid distance code";
+ state->mode = BAD;
+ break;
+ }
+ state->offset = (unsigned)here.val;
+ state->extra = (unsigned)(here.op) & 15;
+ state->mode = DISTEXT;
+ case DISTEXT:
+ if (state->extra) {
+ NEEDBITS(state->extra);
+ state->offset += BITS(state->extra);
+ DROPBITS(state->extra);
+ state->back += state->extra;
+ }
+#ifdef INFLATE_STRICT
+ if (state->offset > state->dmax) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ Tracevv((stderr, "inflate: distance %u\n", state->offset));
+ state->mode = MATCH;
+ case MATCH:
+ if (left == 0) goto inf_leave;
+ copy = out - left;
+ if (state->offset > copy) { /* copy from window */
+ copy = state->offset - copy;
+ if (copy > state->whave) {
+ if (state->sane) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+ Trace((stderr, "inflate.c too far\n"));
+ copy -= state->whave;
+ if (copy > state->length) copy = state->length;
+ if (copy > left) copy = left;
+ left -= copy;
+ state->length -= copy;
+ do {
+ *put++ = 0;
+ } while (--copy);
+ if (state->length == 0) state->mode = LEN;
+ break;
+#endif
+ }
+ if (copy > state->wnext) {
+ copy -= state->wnext;
+ from = state->window + (state->wsize - copy);
+ }
+ else
+ from = state->window + (state->wnext - copy);
+ if (copy > state->length) copy = state->length;
+ }
+ else { /* copy from output */
+ from = put - state->offset;
+ copy = state->length;
+ }
+ if (copy > left) copy = left;
+ left -= copy;
+ state->length -= copy;
+ do {
+ *put++ = *from++;
+ } while (--copy);
+ if (state->length == 0) state->mode = LEN;
+ break;
+ case LIT:
+ if (left == 0) goto inf_leave;
+ *put++ = (unsigned char)(state->length);
+ left--;
+ state->mode = LEN;
+ break;
+ case CHECK:
+ if (state->wrap) {
+ NEEDBITS(32);
+ out -= left;
+ strm->total_out += out;
+ state->total += out;
+ if (out)
+ strm->adler = state->check =
+ UPDATE(state->check, put - out, out);
+ out = left;
+ if ((ZSWAP32(hold)) != state->check) {
+ strm->msg = (char *)"incorrect data check";
+ state->mode = BAD;
+ break;
+ }
+ INITBITS();
+ Tracev((stderr, "inflate: check matches trailer\n"));
+ }
+ state->mode = DONE;
+ case DONE:
+ ret = Z_STREAM_END;
+ goto inf_leave;
+ case BAD:
+ ret = Z_DATA_ERROR;
+ goto inf_leave;
+ case MEM:
+ return Z_MEM_ERROR;
+ case SYNC:
+ default:
+ return Z_STREAM_ERROR;
+ }
+
+ /*
+ Return from inflate(), updating the total counts and the check value.
+ If there was no progress during the inflate() call, return a buffer
+ error. Call updatewindow() to create and/or update the window state.
+ Note: a memory error from inflate() is non-recoverable.
+ */
+ inf_leave:
+ RESTORE();
+ if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+ (state->mode < CHECK || flush != Z_FINISH)))
+ if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
+ state->mode = MEM;
+ return Z_MEM_ERROR;
+ }
+ in -= strm->avail_in;
+ out -= strm->avail_out;
+ strm->total_in += in;
+ strm->total_out += out;
+ state->total += out;
+ if (state->wrap && out)
+ strm->adler = state->check =
+ UPDATE(state->check, strm->next_out - out, out);
+ strm->data_type = state->bits + (state->last ? 64 : 0) +
+ (state->mode == TYPE ? 128 : 0) +
+ (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
+ if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+ ret = Z_BUF_ERROR;
+ return ret;
+}
+
+int
+inflateEnd(z_streamp strm)
+{
+ struct inflate_state FAR *state;
+ if (strm == Z_NULL || strm->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ state = (struct inflate_state FAR *)strm->state;
+ if (state->window != Z_NULL) free(state->window, C_ZLIB_BUFFER_INFLATE);
+ free(strm->state, C_ZLIB_BUFFER_INFLATE);
+ strm->state = Z_NULL;
+ Tracev((stderr, "inflate: end\n"));
+ return Z_OK;
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.h b/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.h
@@ -0,0 +1,62 @@
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Structure for decoding tables. Each entry provides either the
+ information needed to do the operation requested by the code that
+ indexed that table entry, or it provides a pointer to another
+ table that indexes more bits of the code. op indicates whether
+ the entry is a pointer to another table, a literal, a length or
+ distance, an end-of-block, or an invalid code. For a table
+ pointer, the low four bits of op is the number of index bits of
+ that table. For a length or distance, the low four bits of op
+ is the number of extra bits to get after the code. bits is
+ the number of bits in this code or part of the code to drop off
+ of the bit buffer. val is the actual byte to output in the case
+ of a literal, the base length or distance, or the offset from
+ the current table to the next table. Each entry is four bytes. */
+typedef struct {
+ unsigned char op; /* operation, extra bits, table bits */
+ unsigned char bits; /* bits in this part of the code */
+ unsigned short val; /* offset in table or code value */
+} code;
+
+/* op values as set by inflate_table():
+ 00000000 - literal
+ 0000tttt - table link, tttt != 0 is the number of table index bits
+ 0001eeee - length or distance, eeee is the number of extra bits
+ 01100000 - end of block
+ 01000000 - invalid code
+ */
+
+/* Maximum size of the dynamic table. The maximum number of code structures is
+ 1444, which is the sum of 852 for literal/length codes and 592 for distance
+ codes. These values were found by exhaustive searches using the program
+ examples/enough.c found in the zlib distribtution. The arguments to that
+ program are the number of symbols, the initial root table size, and the
+ maximum bit length of a code. "enough 286 9 15" for literal/length codes
+ returns returns 852, and "enough 30 6 15" for distance codes returns 592.
+ The initial root table size (9 or 6) is found in the fifth argument of the
+ inflate_table() calls in inflate.c and infback.c. If the root table size is
+ changed, then these maximum sizes would be need to be recalculated and
+ updated. */
+#define ENOUGH_LENS 852
+#define ENOUGH_DISTS 592
+#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
+
+/* Type of code to build for inflate_table() */
+typedef enum {
+ CODES,
+ LENS,
+ DISTS
+} codetype;
+
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+ unsigned codes, code FAR * FAR *table,
+ unsigned FAR *bits, unsigned short FAR *work);
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.c b/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_inftrees.c
@@ -0,0 +1,304 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2013 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "hammer2_zlib_zutil.h"
+#include "hammer2_zlib_inftrees.h"
+
+#define MAXBITS 15
+
+const char inflate_copyright[] =
+ " inflate 1.2.8 Copyright 1995-2013 Mark Adler ";
+/*
+ If you use the zlib library in a product, an acknowledgment is welcome
+ in the documentation of your product. If for some reason you cannot
+ include such an acknowledgment, I would appreciate that you keep this
+ copyright string in the executable of your product.
+ */
+
+/*
+ Build a set of tables to decode the provided canonical Huffman code.
+ The code lengths are lens[0..codes-1]. The result starts at *table,
+ whose indices are 0..2^bits-1. work is a writable array of at least
+ lens shorts, which is used as a work area. type is the type of code
+ to be generated, CODES, LENS, or DISTS. On return, zero is success,
+ -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
+ on return points to the next available entry's address. bits is the
+ requested root table index bits, and on return it is the actual root
+ table index bits. It will differ if the request is greater than the
+ longest code or if it is less than the shortest code.
+ */
+int
+ZLIB_INTERNAL
+inflate_table(codetype type, unsigned short FAR *lens, unsigned codes,
+ code FAR * FAR *table, unsigned FAR *bits,
+ unsigned short FAR *work)
+{
+ unsigned len; /* a code's length in bits */
+ unsigned sym; /* index of code symbols */
+ unsigned min, max; /* minimum and maximum code lengths */
+ unsigned root; /* number of index bits for root table */
+ unsigned curr; /* number of index bits for current table */
+ unsigned drop; /* code bits to drop for sub-table */
+ int left; /* number of prefix codes available */
+ unsigned used; /* code entries in table used */
+ unsigned huff; /* Huffman code */
+ unsigned incr; /* for incrementing code, index */
+ unsigned fill; /* index for replicating entries */
+ unsigned low; /* low bits for current root entry */
+ unsigned mask; /* mask for low root bits */
+ code here; /* table entry for duplication */
+ code FAR *next; /* next available space in table */
+ const unsigned short FAR *base; /* base value table to use */
+ const unsigned short FAR *extra; /* extra bits table to use */
+ int end; /* use base and extra for symbol > end */
+ unsigned short count[MAXBITS+1]; /* number of codes of each length */
+ unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
+ static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+ 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78};
+ static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577, 0, 0};
+ static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+ 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+ 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+ 28, 28, 29, 29, 64, 64};
+
+ /*
+ Process a set of code lengths to create a canonical Huffman code. The
+ code lengths are lens[0..codes-1]. Each length corresponds to the
+ symbols 0..codes-1. The Huffman code is generated by first sorting the
+ symbols by length from short to long, and retaining the symbol order
+ for codes with equal lengths. Then the code starts with all zero bits
+ for the first code of the shortest length, and the codes are integer
+ increments for the same length, and zeros are appended as the length
+ increases. For the deflate format, these bits are stored backwards
+ from their more natural integer increment ordering, and so when the
+ decoding tables are built in the large loop below, the integer codes
+ are incremented backwards.
+
+ This routine assumes, but does not check, that all of the entries in
+ lens[] are in the range 0..MAXBITS. The caller must assure this.
+ 1..MAXBITS is interpreted as that code length. zero means that that
+ symbol does not occur in this code.
+
+ The codes are sorted by computing a count of codes for each length,
+ creating from that a table of starting indices for each length in the
+ sorted table, and then entering the symbols in order in the sorted
+ table. The sorted table is work[], with that space being provided by
+ the caller.
+
+ The length counts are used for other purposes as well, i.e. finding
+ the minimum and maximum length codes, determining if there are any
+ codes at all, checking for a valid set of lengths, and looking ahead
+ at length counts to determine sub-table sizes when building the
+ decoding tables.
+ */
+
+ /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+ for (len = 0; len <= MAXBITS; len++)
+ count[len] = 0;
+ for (sym = 0; sym < codes; sym++)
+ count[lens[sym]]++;
+
+ /* bound code lengths, force root to be within code lengths */
+ root = *bits;
+ for (max = MAXBITS; max >= 1; max--)
+ if (count[max] != 0) break;
+ if (root > max) root = max;
+ if (max == 0) { /* no symbols to code at all */
+ here.op = (unsigned char)64; /* invalid code marker */
+ here.bits = (unsigned char)1;
+ here.val = (unsigned short)0;
+ *(*table)++ = here; /* make a table to force an error */
+ *(*table)++ = here;
+ *bits = 1;
+ return 0; /* no symbols, but wait for decoding to report error */
+ }
+ for (min = 1; min < max; min++)
+ if (count[min] != 0) break;
+ if (root < min) root = min;
+
+ /* check for an over-subscribed or incomplete set of lengths */
+ left = 1;
+ for (len = 1; len <= MAXBITS; len++) {
+ left <<= 1;
+ left -= count[len];
+ if (left < 0) return -1; /* over-subscribed */
+ }
+ if (left > 0 && (type == CODES || max != 1))
+ return -1; /* incomplete set */
+
+ /* generate offsets into symbol table for each length for sorting */
+ offs[1] = 0;
+ for (len = 1; len < MAXBITS; len++)
+ offs[len + 1] = offs[len] + count[len];
+
+ /* sort symbols by length, by symbol order within each length */
+ for (sym = 0; sym < codes; sym++)
+ if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+ /*
+ Create and fill in decoding tables. In this loop, the table being
+ filled is at next and has curr index bits. The code being used is huff
+ with length len. That code is converted to an index by dropping drop
+ bits off of the bottom. For codes where len is less than drop + curr,
+ those top drop + curr - len bits are incremented through all values to
+ fill the table with replicated entries.
+
+ root is the number of index bits for the root table. When len exceeds
+ root, sub-tables are created pointed to by the root entry with an index
+ of the low root bits of huff. This is saved in low to check for when a
+ new sub-table should be started. drop is zero when the root table is
+ being filled, and drop is root when sub-tables are being filled.
+
+ When a new sub-table is needed, it is necessary to look ahead in the
+ code lengths to determine what size sub-table is needed. The length
+ counts are used for this, and so count[] is decremented as codes are
+ entered in the tables.
+
+ used keeps track of how many table entries have been allocated from the
+ provided *table space. It is checked for LENS and DIST tables against
+ the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
+ the initial root table size constants. See the comments in inftrees.h
+ for more information.
+
+ sym increments through all symbols, and the loop terminates when
+ all codes of length max, i.e. all codes, have been processed. This
+ routine permits incomplete codes, so another loop after this one fills
+ in the rest of the decoding tables with invalid code markers.
+ */
+
+ /* set up for code type */
+ switch (type) {
+ case CODES:
+ base = extra = work; /* dummy value--not used */
+ end = 19;
+ break;
+ case LENS:
+ base = lbase;
+ base -= 257;
+ extra = lext;
+ extra -= 257;
+ end = 256;
+ break;
+ default: /* DISTS */
+ base = dbase;
+ extra = dext;
+ end = -1;
+ }
+
+ /* initialize state for loop */
+ huff = 0; /* starting code */
+ sym = 0; /* starting code symbol */
+ len = min; /* starting code length */
+ next = *table; /* current table to fill in */
+ curr = root; /* current table index bits */
+ drop = 0; /* current bits to drop from code for index */
+ low = (unsigned)(-1); /* trigger new sub-table when len > root */
+ used = 1U << root; /* use root table entries */
+ mask = used - 1; /* mask for comparing low */
+
+ /* check available table space */
+ if ((type == LENS && used > ENOUGH_LENS) ||
+ (type == DISTS && used > ENOUGH_DISTS))
+ return 1;
+
+ /* process all codes and make table entries */
+ for (;;) {
+ /* create table entry */
+ here.bits = (unsigned char)(len - drop);
+ if ((int)(work[sym]) < end) {
+ here.op = (unsigned char)0;
+ here.val = work[sym];
+ }
+ else if ((int)(work[sym]) > end) {
+ here.op = (unsigned char)(extra[work[sym]]);
+ here.val = base[work[sym]];
+ }
+ else {
+ here.op = (unsigned char)(32 + 64); /* end of block */
+ here.val = 0;
+ }
+
+ /* replicate for those indices with low len bits equal to huff */
+ incr = 1U << (len - drop);
+ fill = 1U << curr;
+ min = fill; /* save offset to next table */
+ do {
+ fill -= incr;
+ next[(huff >> drop) + fill] = here;
+ } while (fill != 0);
+
+ /* backwards increment the len-bit code huff */
+ incr = 1U << (len - 1);
+ while (huff & incr)
+ incr >>= 1;
+ if (incr != 0) {
+ huff &= incr - 1;
+ huff += incr;
+ }
+ else
+ huff = 0;
+
+ /* go to next symbol, update count, len */
+ sym++;
+ if (--(count[len]) == 0) {
+ if (len == max) break;
+ len = lens[work[sym]];
+ }
+
+ /* create new sub-table if needed */
+ if (len > root && (huff & mask) != low) {
+ /* if first time, transition to sub-tables */
+ if (drop == 0)
+ drop = root;
+
+ /* increment past last table */
+ next += min; /* here min is 1 << curr */
+
+ /* determine length of next table */
+ curr = len - drop;
+ left = (int)(1 << curr);
+ while (curr + drop < max) {
+ left -= count[curr + drop];
+ if (left <= 0) break;
+ curr++;
+ left <<= 1;
+ }
+
+ /* check for enough space */
+ used += 1U << curr;
+ if ((type == LENS && used > ENOUGH_LENS) ||
+ (type == DISTS && used > ENOUGH_DISTS))
+ return 1;
+
+ /* point entry in root table to sub-table */
+ low = huff & mask;
+ (*table)[low].op = (unsigned char)curr;
+ (*table)[low].bits = (unsigned char)root;
+ (*table)[low].val = (unsigned short)(next - *table);
+ }
+ }
+
+ /* fill in remaining table entry if code is incomplete (guaranteed to have
+ at most one remaining entry, since if the code is incomplete, the
+ maximum code length that was allowed to get this far is one bit) */
+ if (huff != 0) {
+ here.op = (unsigned char)64; /* invalid code marker */
+ here.bits = (unsigned char)(len - drop);
+ here.val = (unsigned short)0;
+ next[huff] = here;
+ }
+
+ /* set return parameters */
+ *table += used;
+ *bits = root;
+ return 0;
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_trees.h b/sys/fs/hammer2/zlib/hammer2_zlib_trees.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_trees.h
@@ -0,0 +1,128 @@
+/* header created automatically with -DGEN_TREES_H */
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}},
+{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}},
+{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}},
+{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}},
+{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}},
+{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}},
+{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}},
+{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}},
+{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}},
+{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}},
+{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}},
+{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}},
+{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}},
+{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}},
+{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}},
+{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}},
+{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}},
+{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}},
+{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}},
+{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}},
+{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}},
+{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}},
+{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}},
+{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}},
+{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}},
+{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}},
+{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}},
+{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}},
+{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}},
+{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}},
+{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}},
+{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}},
+{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}},
+{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}},
+{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}},
+{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}},
+{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}},
+{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}},
+{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}},
+{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}},
+{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}},
+{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}},
+{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}},
+{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}},
+{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}},
+{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}},
+{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}},
+{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}},
+{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}},
+{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}},
+{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}},
+{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}},
+{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}},
+{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}},
+{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}},
+{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}},
+{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}},
+{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
+ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
+ 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24,
+ 32, 48, 64, 96, 128, 192, 256, 384, 512, 768,
+ 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576
+};
+
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_trees.c b/sys/fs/hammer2/zlib/hammer2_zlib_trees.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_trees.c
@@ -0,0 +1,1232 @@
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2012 Jean-loup Gailly
+ * detect_data_type() function provided freely by Cosmin Truta, 2006
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process uses several Huffman trees. The more
+ * common source values are represented by shorter bit sequences.
+ *
+ * Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values). The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ * Storer, James A.
+ * Data Compression: Methods and Theory, pp. 49-50.
+ * Computer Science Press, 1988. ISBN 0-7167-8156-5.
+ *
+ * Sedgewick, R.
+ * Algorithms, p290.
+ * Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* @(#) $Id$ */
+
+/* #define GEN_TREES_H */
+
+#include "hammer2_zlib_deflate.h"
+
+#ifdef H2_ZLIB_DEBUG
+# include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6 16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10 17
+/* repeat a zero length 3-10 times (3 bits of repeat count) */
+
+#define REPZ_11_138 18
+/* repeat a zero length 11-138 times (7 bits of repeat count) */
+
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local const uch bl_order[BL_CODES]
+ = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+#define DIST_CODE_LEN 512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+# include "hammer2_zlib_trees.h"
+#endif /* GEN_TREES_H */
+
+struct static_tree_desc_s {
+ const ct_data *static_tree; /* static tree or NULL */
+ const intf *extra_bits; /* extra bits for each code or NULL */
+ int extra_base; /* base index for extra_bits */
+ int elems; /* max number of elements in the tree */
+ int max_length; /* max bit length for the codes */
+};
+
+local static_tree_desc static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local static_tree_desc static_d_desc =
+{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
+
+local static_tree_desc static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init (void);
+local void init_block (deflate_state *s);
+local void pqdownheap (deflate_state *s, ct_data *tree, int k);
+local void gen_bitlen (deflate_state *s, tree_desc *desc);
+local void gen_codes (ct_data *tree, int max_code, ushf *bl_count);
+local void build_tree (deflate_state *s, tree_desc *desc);
+local void scan_tree (deflate_state *s, ct_data *tree, int max_code);
+local void send_tree (deflate_state *s, ct_data *tree, int max_code);
+local int build_bl_tree (deflate_state *s);
+local void send_all_trees (deflate_state *s, int lcodes, int dcodes,
+ int blcodes);
+local void compress_block (deflate_state *s, const ct_data *ltree,
+ const ct_data *dtree);
+local int detect_data_type (deflate_state *s);
+local unsigned bi_reverse (unsigned value, int length);
+local void bi_windup (deflate_state *s);
+local void bi_flush (deflate_state *s);
+local void copy_block (deflate_state *s, charf *buf, unsigned len,
+ int header);
+
+#ifdef GEN_TREES_H
+local void gen_trees_header (void);
+#endif
+
+#ifndef H2_ZLIB_DEBUG
+# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+ /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* H2_ZLIB_DEBUG */
+# define send_code(s, c, tree) \
+ { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+ send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+ put_byte(s, (uch)((w) & 0xff)); \
+ put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef H2_ZLIB_DEBUG
+local void send_bits (deflate_state *s, int value, int length);
+
+local
+void
+send_bits(deflate_state *s, int value, int length)
+{
+ Tracevv((stderr," l %2d v %4x ", length, value));
+ Assert(length > 0 && length <= 15, "invalid length");
+ s->bits_sent += (ulg)length;
+
+ /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+ * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+ * unused bits in value.
+ */
+ if (s->bi_valid > (int)Buf_size - length) {
+ s->bi_buf |= (ush)value << s->bi_valid;
+ put_short(s, s->bi_buf);
+ s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+ s->bi_valid += length - Buf_size;
+ } else {
+ s->bi_buf |= (ush)value << s->bi_valid;
+ s->bi_valid += length;
+ }
+}
+#else /* !H2_ZLIB_DEBUG */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+ if (s->bi_valid > (int)Buf_size - len) {\
+ int val = value;\
+ s->bi_buf |= (ush)val << s->bi_valid;\
+ put_short(s, s->bi_buf);\
+ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+ s->bi_valid += len - Buf_size;\
+ } else {\
+ s->bi_buf |= (ush)(value) << s->bi_valid;\
+ s->bi_valid += len;\
+ }\
+}
+#endif /* H2_ZLIB_DEBUG */
+
+
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables.
+ */
+local
+void
+tr_static_init(void)
+{
+#if defined(GEN_TREES_H) || !defined(STDC)
+ static int static_init_done = 0;
+ int n; /* iterates over tree elements */
+ int bits; /* bit counter */
+ int length; /* length value */
+ int code; /* code value */
+ int dist; /* distance index */
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ if (static_init_done) return;
+
+ /* For some embedded targets, global variables are not initialized: */
+#ifdef NO_INIT_GLOBAL_POINTERS
+ static_l_desc.static_tree = static_ltree;
+ static_l_desc.extra_bits = extra_lbits;
+ static_d_desc.static_tree = static_dtree;
+ static_d_desc.extra_bits = extra_dbits;
+ static_bl_desc.extra_bits = extra_blbits;
+#endif
+
+ /* Initialize the mapping length (0..255) -> length code (0..28) */
+ length = 0;
+ for (code = 0; code < LENGTH_CODES-1; code++) {
+ base_length[code] = length;
+ for (n = 0; n < (1<<extra_lbits[code]); n++) {
+ _length_code[length++] = (uch)code;
+ }
+ }
+ Assert (length == 256, "tr_static_init: length != 256");
+ /* Note that the length 255 (match length 258) can be represented
+ * in two different ways: code 284 + 5 bits or code 285, so we
+ * overwrite length_code[255] to use the best encoding:
+ */
+ _length_code[length-1] = (uch)code;
+
+ /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+ dist = 0;
+ for (code = 0 ; code < 16; code++) {
+ base_dist[code] = dist;
+ for (n = 0; n < (1<<extra_dbits[code]); n++) {
+ _dist_code[dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: dist != 256");
+ dist >>= 7; /* from now on, all distances are divided by 128 */
+ for ( ; code < D_CODES; code++) {
+ base_dist[code] = dist << 7;
+ for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+ _dist_code[256 + dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+ /* Construct the codes of the static literal tree */
+ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+ n = 0;
+ while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+ while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+ while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+ while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+ /* Codes 286 and 287 do not exist, but we must include them in the
+ * tree construction to get a canonical Huffman tree (longest code
+ * all ones)
+ */
+ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+ /* The static distance tree is trivial: */
+ for (n = 0; n < D_CODES; n++) {
+ static_dtree[n].Len = 5;
+ static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+ }
+ static_init_done = 1;
+
+# ifdef GEN_TREES_H
+ gen_trees_header();
+# endif
+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
+}
+
+/* ===========================================================================
+ * Genererate the file trees.h describing the static trees.
+ */
+#ifdef GEN_TREES_H
+# ifndef H2_ZLIB_DEBUG
+# include <stdio.h>
+# endif
+
+# define SEPARATOR(i, last, width) \
+ ((i) == (last)? "\n};\n\n" : \
+ ((i) % (width) == (width)-1 ? ",\n" : ", "))
+
+void
+gen_trees_header()
+{
+ FILE *header = fopen("trees.h", "w");
+ int i;
+
+ Assert (header != NULL, "Can't open trees.h");
+ fprintf(header,
+ "/* header created automatically with -DGEN_TREES_H */\n\n");
+
+ fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+ for (i = 0; i < L_CODES+2; i++) {
+ fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+ static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+ }
+
+ fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+ for (i = 0; i < D_CODES; i++) {
+ fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+ static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+ }
+
+ fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
+ for (i = 0; i < DIST_CODE_LEN; i++) {
+ fprintf(header, "%2u%s", _dist_code[i],
+ SEPARATOR(i, DIST_CODE_LEN-1, 20));
+ }
+
+ fprintf(header,
+ "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+ for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+ fprintf(header, "%2u%s", _length_code[i],
+ SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+ }
+
+ fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+ for (i = 0; i < LENGTH_CODES; i++) {
+ fprintf(header, "%1u%s", base_length[i],
+ SEPARATOR(i, LENGTH_CODES-1, 20));
+ }
+
+ fprintf(header, "local const int base_dist[D_CODES] = {\n");
+ for (i = 0; i < D_CODES; i++) {
+ fprintf(header, "%5u%s", base_dist[i],
+ SEPARATOR(i, D_CODES-1, 10));
+ }
+
+ fclose(header);
+}
+#endif /* GEN_TREES_H */
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void
+ZLIB_INTERNAL
+_tr_init(deflate_state *s)
+{
+ tr_static_init();
+
+ s->l_desc.dyn_tree = s->dyn_ltree;
+ s->l_desc.stat_desc = &static_l_desc;
+
+ s->d_desc.dyn_tree = s->dyn_dtree;
+ s->d_desc.stat_desc = &static_d_desc;
+
+ s->bl_desc.dyn_tree = s->bl_tree;
+ s->bl_desc.stat_desc = &static_bl_desc;
+
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len = 0L;
+ s->bits_sent = 0L;
+#endif
+
+ /* Initialize the first block of the first file: */
+ init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local
+void
+init_block(deflate_state *s)
+{
+ int n; /* iterates over tree elements */
+
+ /* Initialize the trees. */
+ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
+ for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
+ for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+ s->dyn_ltree[END_BLOCK].Freq = 1;
+ s->opt_len = s->static_len = 0L;
+ s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+ top = s->heap[SMALLEST]; \
+ s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+ pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+ (tree[n].Freq < tree[m].Freq || \
+ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local
+void
+pqdownheap(deflate_state *s, ct_data *tree, int k) /* the tree to restore, node to move down */
+{
+ int v = s->heap[k];
+ int j = k << 1; /* left son of k */
+ while (j <= s->heap_len) {
+ /* Set j to the smallest of the two sons: */
+ if (j < s->heap_len &&
+ smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+ j++;
+ }
+ /* Exit if v is smaller than both sons */
+ if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+ /* Exchange v with the smallest son */
+ s->heap[k] = s->heap[j]; k = j;
+
+ /* And continue down the tree, setting j to the left son of k */
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ * above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ * array bl_count contains the frequencies for each bit length.
+ * The length opt_len is updated; static_len is also updated if stree is
+ * not null.
+ */
+local
+void
+gen_bitlen(deflate_state *s, tree_desc *desc)
+{
+ ct_data *tree = desc->dyn_tree;
+ int max_code = desc->max_code;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ const intf *extra = desc->stat_desc->extra_bits;
+ int base = desc->stat_desc->extra_base;
+ int max_length = desc->stat_desc->max_length;
+ int h; /* heap index */
+ int n, m; /* iterate over the tree elements */
+ int bits; /* bit length */
+ int xbits; /* extra bits */
+ ush f; /* frequency */
+ int overflow = 0; /* number of elements with bit length too large */
+
+ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+ /* In a first pass, compute the optimal bit lengths (which may
+ * overflow in the case of the bit length tree).
+ */
+ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+ for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+ n = s->heap[h];
+ bits = tree[tree[n].Dad].Len + 1;
+ if (bits > max_length) bits = max_length, overflow++;
+ tree[n].Len = (ush)bits;
+ /* We overwrite tree[n].Dad which is no longer needed */
+
+ if (n > max_code) continue; /* not a leaf node */
+
+ s->bl_count[bits]++;
+ xbits = 0;
+ if (n >= base) xbits = extra[n-base];
+ f = tree[n].Freq;
+ s->opt_len += (ulg)f * (bits + xbits);
+ if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+ }
+ if (overflow == 0) return;
+
+ Trace((stderr,"\nbit length overflow\n"));
+ /* This happens for example on obj2 and pic of the Calgary corpus */
+
+ /* Find the first bit length which could increase: */
+ do {
+ bits = max_length-1;
+ while (s->bl_count[bits] == 0) bits--;
+ s->bl_count[bits]--; /* move one leaf down the tree */
+ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+ s->bl_count[max_length]--;
+ /* The brother of the overflow item also moves one step up,
+ * but this does not affect bl_count[max_length]
+ */
+ overflow -= 2;
+ } while (overflow > 0);
+
+ /* Now recompute all bit lengths, scanning in increasing frequency.
+ * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+ * lengths instead of fixing only the wrong ones. This idea is taken
+ * from 'ar' written by Haruhiko Okumura.)
+ */
+ for (bits = max_length; bits != 0; bits--) {
+ n = s->bl_count[bits];
+ while (n != 0) {
+ m = s->heap[--h];
+ if (m > max_code) continue;
+ if ((unsigned) tree[m].Len != (unsigned) bits) {
+ Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+ s->opt_len += ((long)bits - (long)tree[m].Len)
+ *(long)tree[m].Freq;
+ tree[m].Len = (ush)bits;
+ }
+ n--;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ * zero code length.
+ */
+local
+void
+gen_codes (ct_data *tree, int max_code, ushf *bl_count)
+ /* the tree to decorate */
+ /* max_code = largest code with non zero frequency */
+ /* *bl_count = number of codes at each bit length */
+{
+ ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+ ush code = 0; /* running code value */
+ int bits; /* bit index */
+ int n; /* code index */
+
+ /* The distribution counts are first used to generate the code values
+ * without bit reversal.
+ */
+ for (bits = 1; bits <= MAX_BITS; bits++) {
+ next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+ }
+ /* Check that the bit counts in bl_count are consistent. The last code
+ * must be all ones.
+ */
+ Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+ "inconsistent bit counts");
+ Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+ for (n = 0; n <= max_code; n++) {
+ int len = tree[n].Len;
+ if (len == 0) continue;
+ /* Now reverse the bits */
+ tree[n].Code = bi_reverse(next_code[len]++, len);
+
+ Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+ n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+ }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ * and corresponding code. The length opt_len is updated; static_len is
+ * also updated if stree is not null. The field max_code is set.
+ */
+local
+void
+build_tree(deflate_state *s, tree_desc *desc) /* the tree descriptor */
+{
+ ct_data *tree = desc->dyn_tree;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ int elems = desc->stat_desc->elems;
+ int n, m; /* iterate over heap elements */
+ int max_code = -1; /* largest code with non zero frequency */
+ int node; /* new node being created */
+
+ /* Construct the initial heap, with least frequent element in
+ * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+ * heap[0] is not used.
+ */
+ s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+ for (n = 0; n < elems; n++) {
+ if (tree[n].Freq != 0) {
+ s->heap[++(s->heap_len)] = max_code = n;
+ s->depth[n] = 0;
+ } else {
+ tree[n].Len = 0;
+ }
+ }
+
+ /* The pkzip format requires that at least one distance code exists,
+ * and that at least one bit should be sent even if there is only one
+ * possible code. So to avoid special checks later on we force at least
+ * two codes of non zero frequency.
+ */
+ while (s->heap_len < 2) {
+ node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+ tree[node].Freq = 1;
+ s->depth[node] = 0;
+ s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+ /* node is 0 or 1 so it does not have extra bits */
+ }
+ desc->max_code = max_code;
+
+ /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+ * establish sub-heaps of increasing lengths:
+ */
+ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+ /* Construct the Huffman tree by repeatedly combining the least two
+ * frequent nodes.
+ */
+ node = elems; /* next internal node of the tree */
+ do {
+ pqremove(s, tree, n); /* n = node of least frequency */
+ m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+ s->heap[--(s->heap_max)] = m;
+
+ /* Create a new node father of n and m */
+ tree[node].Freq = tree[n].Freq + tree[m].Freq;
+ s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
+ s->depth[n] : s->depth[m]) + 1);
+ tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+ if (tree == s->bl_tree) {
+ fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+ node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+ }
+#endif
+ /* and insert the new node in the heap */
+ s->heap[SMALLEST] = node++;
+ pqdownheap(s, tree, SMALLEST);
+
+ } while (s->heap_len >= 2);
+
+ s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+ /* At this point, the fields freq and dad are set. We can now
+ * generate the bit lengths.
+ */
+ gen_bitlen(s, (tree_desc *)desc);
+
+ /* The field len is now set, we can generate the bit codes */
+ gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local
+void
+scan_tree (deflate_state *s, ct_data *tree, int max_code)
+ /* the tree to be scanned */
+ /* and its largest code of non zero frequency */
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ if (nextlen == 0) max_count = 138, min_count = 3;
+ tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ s->bl_tree[curlen].Freq += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+ s->bl_tree[REP_3_6].Freq++;
+ } else if (count <= 10) {
+ s->bl_tree[REPZ_3_10].Freq++;
+ } else {
+ s->bl_tree[REPZ_11_138].Freq++;
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local
+void
+send_tree (deflate_state *s, ct_data *tree, int max_code) /* same as above */
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ /* tree[max_code+1].Len = -1; */ /* guard already set */
+ if (nextlen == 0) max_count = 138, min_count = 3;
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+ } else if (curlen != 0) {
+ if (curlen != prevlen) {
+ send_code(s, curlen, s->bl_tree); count--;
+ }
+ Assert(count >= 3 && count <= 6, " 3_6?");
+ send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+ } else if (count <= 10) {
+ send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+ } else {
+ send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local
+int
+build_bl_tree(deflate_state *s)
+{
+ int max_blindex; /* index of last bit length code of non zero freq */
+
+ /* Determine the bit length frequencies for literal and distance trees */
+ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+ scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+ /* Build the bit length tree: */
+ build_tree(s, (tree_desc *)(&(s->bl_desc)));
+ /* opt_len now includes the length of the tree representations, except
+ * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+ */
+
+ /* Determine the number of bit length codes to send. The pkzip format
+ * requires that at least 4 bit length codes be sent. (appnote.txt says
+ * 3 but the actual value used is 4.)
+ */
+ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+ if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+ }
+ /* Update opt_len to include the bit length tree and counts */
+ s->opt_len += 3*(max_blindex+1) + 5+5+4;
+ Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+ s->opt_len, s->static_len));
+
+ return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local
+void
+send_all_trees(deflate_state *s, int lcodes, int dcodes, int blcodes)
+/* number of codes for each tree */
+{
+ int rank; /* index in bl_order */
+
+ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+ Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+ "too many codes");
+ Tracev((stderr, "\nbl counts: "));
+ send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+ send_bits(s, dcodes-1, 5);
+ send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
+ for (rank = 0; rank < blcodes; rank++) {
+ Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+ send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+ }
+ Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void
+ZLIB_INTERNAL
+_tr_stored_block(deflate_state *s, charf *buf,
+ ulg stored_len, int last) /* one if this is the last block for a file */
+{
+ send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+ s->compressed_len += (stored_len + 4) << 3;
+#endif
+ copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* ===========================================================================
+ * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
+ */
+void
+ZLIB_INTERNAL
+_tr_flush_bits(deflate_state *s)
+{
+ bi_flush(s);
+}
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ */
+void
+ZLIB_INTERNAL
+_tr_align(deflate_state *s)
+{
+ send_bits(s, STATIC_TREES<<1, 3);
+ send_code(s, END_BLOCK, static_ltree);
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+#endif
+ bi_flush(s);
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file.
+ */
+void
+ZLIB_INTERNAL
+_tr_flush_block(deflate_state *s, charf *buf,
+ ulg stored_len, int last)
+{
+ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+ int max_blindex = 0; /* index of last bit length code of non zero freq */
+
+ /* Build the Huffman trees unless a stored block is forced */
+ if (s->level > 0) {
+
+ /* Check if the file is binary or text */
+ if (s->strm->data_type == Z_UNKNOWN)
+ s->strm->data_type = detect_data_type(s);
+
+ /* Construct the literal and distance trees */
+ build_tree(s, (tree_desc *)(&(s->l_desc)));
+ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+
+ build_tree(s, (tree_desc *)(&(s->d_desc)));
+ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+ /* At this point, opt_len and static_len are the total bit lengths of
+ * the compressed block data, excluding the tree representations.
+ */
+
+ /* Build the bit length tree for the above two trees, and get the index
+ * in bl_order of the last bit length code to send.
+ */
+ max_blindex = build_bl_tree(s);
+
+ /* Determine the best encoding. Compute the block lengths in bytes. */
+ opt_lenb = (s->opt_len+3+7)>>3;
+ static_lenb = (s->static_len+3+7)>>3;
+
+ Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+ opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+ s->last_lit));
+
+ if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+ } else {
+ Assert(buf != (char*)0, "lost buf");
+ opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+ }
+
+#ifdef FORCE_STORED
+ if (buf != (char*)0) { /* force stored block */
+#else
+ if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+ /* 4: two words for the lengths */
+#endif
+ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+ * Otherwise we can't have processed more than WSIZE input bytes since
+ * the last block flush, because compression would have been
+ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+ * transform a block into a stored block.
+ */
+ _tr_stored_block(s, buf, stored_len, last);
+
+#ifdef FORCE_STATIC
+ } else if (static_lenb >= 0) { /* force static trees */
+#else
+ } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+#endif
+ send_bits(s, (STATIC_TREES<<1)+last, 3);
+ compress_block(s, (const ct_data *)static_ltree,
+ (const ct_data *)static_dtree);
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len += 3 + s->static_len;
+#endif
+ } else {
+ send_bits(s, (DYN_TREES<<1)+last, 3);
+ send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+ max_blindex+1);
+ compress_block(s, (const ct_data *)s->dyn_ltree,
+ (const ct_data *)s->dyn_dtree);
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len += 3 + s->opt_len;
+#endif
+ }
+ Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+ /* The above check is made mod 2^32, for files larger than 512 MB
+ * and uLong implemented on 32 bits.
+ */
+ init_block(s);
+
+ if (last) {
+ bi_windup(s);
+#ifdef H2_ZLIB_DEBUG
+ s->compressed_len += 7; /* align on byte boundary */
+#endif
+ }
+ Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+ s->compressed_len-7*last));
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int
+ZLIB_INTERNAL
+_tr_tally (deflate_state *s, unsigned dist, unsigned lc)
+{
+ s->d_buf[s->last_lit] = (ush)dist;
+ s->l_buf[s->last_lit++] = (uch)lc;
+ if (dist == 0) {
+ /* lc is the unmatched char */
+ s->dyn_ltree[lc].Freq++;
+ } else {
+ s->matches++;
+ /* Here, lc is the match length - MIN_MATCH */
+ dist--; /* dist = match distance - 1 */
+ Assert((ush)dist < (ush)MAX_DIST(s) &&
+ (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+ (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
+
+ s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+ s->dyn_dtree[d_code(dist)].Freq++;
+ }
+
+#ifdef TRUNCATE_BLOCK
+ /* Try to guess if it is profitable to stop the current block here */
+ if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+ /* Compute an upper bound for the compressed length */
+ ulg out_length = (ulg)s->last_lit*8L;
+ ulg in_length = (ulg)((long)s->strstart - s->block_start);
+ int dcode;
+ for (dcode = 0; dcode < D_CODES; dcode++) {
+ out_length += (ulg)s->dyn_dtree[dcode].Freq *
+ (5L+extra_dbits[dcode]);
+ }
+ out_length >>= 3;
+ Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+ s->last_lit, in_length, out_length,
+ 100L - out_length*100L/in_length));
+ if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+ }
+#endif
+ return (s->last_lit == s->lit_bufsize-1);
+ /* We avoid equality with lit_bufsize because of wraparound at 64K
+ * on 16 bit machines and because stored blocks are restricted to
+ * 64K-1 bytes.
+ */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local
+void
+compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree)
+{
+ unsigned dist; /* distance of matched string */
+ int lc; /* match length or unmatched char (if dist == 0) */
+ unsigned lx = 0; /* running index in l_buf */
+ unsigned code; /* the code to send */
+ int extra; /* number of extra bits to send */
+
+ if (s->last_lit != 0) do {
+ dist = s->d_buf[lx];
+ lc = s->l_buf[lx++];
+ if (dist == 0) {
+ send_code(s, lc, ltree); /* send a literal byte */
+ Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+ } else {
+ /* Here, lc is the match length - MIN_MATCH */
+ code = _length_code[lc];
+ send_code(s, code+LITERALS+1, ltree); /* send the length code */
+ extra = extra_lbits[code];
+ if (extra != 0) {
+ lc -= base_length[code];
+ send_bits(s, lc, extra); /* send the extra length bits */
+ }
+ dist--; /* dist is now the match distance - 1 */
+ code = d_code(dist);
+ Assert (code < D_CODES, "bad d_code");
+
+ send_code(s, code, dtree); /* send the distance code */
+ extra = extra_dbits[code];
+ if (extra != 0) {
+ dist -= base_dist[code];
+ send_bits(s, dist, extra); /* send the extra distance bits */
+ }
+ } /* literal or match pair ? */
+
+ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+ Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
+ "pendingBuf overflow");
+
+ } while (lx < s->last_lit);
+
+ send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ * a) There are no non-portable control characters belonging to the
+ * "black list" (0..6, 14..25, 28..31).
+ * b) There is at least one printable character belonging to the
+ * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ * "gray list" that is ignored in this detection algorithm:
+ * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local
+int
+detect_data_type(deflate_state *s)
+{
+ /* black_mask is the bit mask of black-listed bytes
+ * set bits 0..6, 14..25, and 28..31
+ * 0xf3ffc07f = binary 11110011111111111100000001111111
+ */
+ unsigned long black_mask = 0xf3ffc07fUL;
+ int n;
+
+ /* Check for non-textual ("black-listed") bytes. */
+ for (n = 0; n <= 31; n++, black_mask >>= 1)
+ if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+ return Z_BINARY;
+
+ /* Check for textual ("white-listed") bytes. */
+ if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+ || s->dyn_ltree[13].Freq != 0)
+ return Z_TEXT;
+ for (n = 32; n < LITERALS; n++)
+ if (s->dyn_ltree[n].Freq != 0)
+ return Z_TEXT;
+
+ /* There are no "black-listed" or "white-listed" bytes:
+ * this stream either is empty or has tolerated ("gray-listed") bytes only.
+ */
+ return Z_BINARY;
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local
+unsigned
+bi_reverse(unsigned code, int len)
+{
+ register unsigned res = 0;
+ do {
+ res |= code & 1;
+ code >>= 1, res <<= 1;
+ } while (--len > 0);
+ return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local
+void
+bi_flush(deflate_state *s)
+{
+ if (s->bi_valid == 16) {
+ put_short(s, s->bi_buf);
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+ } else if (s->bi_valid >= 8) {
+ put_byte(s, (Byte)s->bi_buf);
+ s->bi_buf >>= 8;
+ s->bi_valid -= 8;
+ }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local
+void
+bi_windup(deflate_state *s)
+{
+ if (s->bi_valid > 8) {
+ put_short(s, s->bi_buf);
+ } else if (s->bi_valid > 0) {
+ put_byte(s, (Byte)s->bi_buf);
+ }
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+#ifdef H2_ZLIB_DEBUG
+ s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Copy a stored block, storing first the length and its
+ * one's complement if requested.
+ */
+local
+void
+copy_block(deflate_state *s, charf *buf, unsigned len, int header)
+{
+ bi_windup(s); /* align on byte boundary */
+
+ if (header) {
+ put_short(s, (ush)len);
+ put_short(s, (ush)~len);
+#ifdef H2_ZLIB_DEBUG
+ s->bits_sent += 2*16;
+#endif
+ }
+#ifdef H2_ZLIB_DEBUG
+ s->bits_sent += (ulg)len<<3;
+#endif
+ while (len--) {
+ put_byte(s, *buf++);
+ }
+}
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_zconf.h b/sys/fs/hammer2/zlib/hammer2_zlib_zconf.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_zconf.h
@@ -0,0 +1,292 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/* DRAGONFLY ADDITION - Allows inclusion in conf/files */
+#define Z_PREFIX
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
+# define Z_PREFIX_SET
+
+/* all linked symbols */
+# define _dist_code z__dist_code
+# define _length_code z__length_code
+# define _tr_align z__tr_align
+# define _tr_flush_bits z__tr_flush_bits
+# define _tr_flush_block z__tr_flush_block
+# define _tr_init z__tr_init
+# define _tr_stored_block z__tr_stored_block
+# define _tr_tally z__tr_tally
+# define adler32 z_adler32
+# define adler32_combine z_adler32_combine
+# define adler32_combine64 z_adler32_combine64
+# define deflate z_deflate
+# define deflateBound z_deflateBound
+# define deflateCopy z_deflateCopy
+# define deflateEnd z_deflateEnd
+# define deflateInit2_ z_deflateInit2_
+# define deflateInit_ z_deflateInit_
+# define deflateParams z_deflateParams
+# define deflatePending z_deflatePending
+# define deflatePrime z_deflatePrime
+# define deflateReset z_deflateReset
+# define deflateResetKeep z_deflateResetKeep
+# define deflateSetDictionary z_deflateSetDictionary
+# define deflateSetHeader z_deflateSetHeader
+# define deflateTune z_deflateTune
+# define deflate_copyright z_deflate_copyright
+# define inflate z_inflate
+# define inflateCopy z_inflateCopy
+# define inflateEnd z_inflateEnd
+# define inflateGetHeader z_inflateGetHeader
+# define inflateInit2_ z_inflateInit2_
+# define inflateInit_ z_inflateInit_
+# define inflateMark z_inflateMark
+# define inflatePrime z_inflatePrime
+# define inflateReset z_inflateReset
+# define inflateReset2 z_inflateReset2
+# define inflateSetDictionary z_inflateSetDictionary
+# define inflateGetDictionary z_inflateGetDictionary
+# define inflateSync z_inflateSync
+# define inflateSyncPoint z_inflateSyncPoint
+# define inflateUndermine z_inflateUndermine
+# define inflateResetKeep z_inflateResetKeep
+# define inflate_copyright z_inflate_copyright
+# define inflate_fast z_inflate_fast
+# define inflate_table z_inflate_table
+# define zError z_zError
+# define zlibCompileFlags z_zlibCompileFlags
+# define zlibVersion z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+# define Byte z_Byte
+# define Bytef z_Bytef
+# define alloc_func z_alloc_func
+# define charf z_charf
+# define free_func z_free_func
+# define in_func z_in_func
+# define intf z_intf
+# define out_func z_out_func
+# define uInt z_uInt
+# define uIntf z_uIntf
+# define uLong z_uLong
+# define uLongf z_uLongf
+# define voidp z_voidp
+# define voidpc z_voidpc
+# define voidpf z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+# define internal_state z_internal_state
+
+#endif
+
+#ifdef __STDC_VERSION__
+# ifndef STDC
+# define STDC
+# endif
+# if __STDC_VERSION__ >= 199901L
+# ifndef STDC99
+# define STDC99
+# endif
+# endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+# define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
+# define STDC
+#endif
+
+#ifndef STDC
+# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+# define const /* note: need a more gentle solution here */
+# endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+# define z_const const
+#else
+# define z_const
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+# ifdef MAXSEG_64K
+# define MAX_MEM_LEVEL 8
+# else
+# define MAX_MEM_LEVEL 9
+# endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+# define MAX_WBITS 15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+ (1 << (windowBits+2)) + (1 << (memLevel+9))
+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+ The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+ /* Type declarations */
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+# if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# define Z_ARG(args) args
+# else
+# define Z_ARG(args) ()
+# endif
+#endif
+
+#ifndef ZEXTERN
+# define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+# define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+# define ZEXPORTVA
+#endif
+
+#ifndef FAR
+# define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char Byte; /* 8 bits */
+#endif
+typedef unsigned int uInt; /* 16 bits or more */
+typedef unsigned long uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+ /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+# define Bytef Byte FAR
+#else
+ typedef Byte FAR Bytef;
+#endif
+typedef char FAR charf;
+typedef int FAR intf;
+typedef uInt FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+ typedef void const *voidpc;
+ typedef void FAR *voidpf;
+ typedef void *voidp;
+#else
+ typedef Byte const *voidpc;
+ typedef Byte FAR *voidpf;
+ typedef Byte *voidp;
+#endif
+
+#if !defined(Z_U4) && defined(STDC)
+# include <sys/limits.h>
+# if (UINT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned
+# elif (ULONG_MAX == 0xffffffffUL)
+# define Z_U4 unsigned long
+# elif (USHRT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned short
+# endif
+#endif
+
+#ifdef Z_U4
+ typedef Z_U4 z_crc_t;
+#else
+ typedef unsigned long z_crc_t;
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_UNISTD_H
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_STDARG_H
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+//# include <sys/stdarg.h> /* for va_list */
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+# undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+# define Z_HAVE_UNISTD_H
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+# define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+# define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+# define Z_WANT64
+#endif
+
+#ifndef z_off_t
+# define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+# define z_off64_t off64_t
+#else
+# define z_off64_t z_off_t
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+ #pragma map(deflateInit_,"DEIN")
+ #pragma map(deflateInit2_,"DEIN2")
+ #pragma map(deflateEnd,"DEEND")
+ #pragma map(deflateBound,"DEBND")
+ #pragma map(inflateInit_,"ININ")
+ #pragma map(inflateInit2_,"ININ2")
+ #pragma map(inflateEnd,"INEND")
+ #pragma map(inflateSync,"INSY")
+ #pragma map(inflateSetDictionary,"INSEDI")
+ //#pragma map(compressBound,"CMBND")
+ #pragma map(inflate_table,"INTABL")
+ #pragma map(inflate_fast,"INFA")
+ #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_zutil.h b/sys/fs/hammer2/zlib/hammer2_zlib_zutil.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_zutil.h
@@ -0,0 +1,149 @@
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZUTIL_H
+#define ZUTIL_H
+
+#ifdef HAVE_HIDDEN
+# define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+# define ZLIB_INTERNAL
+#endif
+
+#include <sys/param.h> /* for panic() */
+#include "hammer2_zlib.h"
+
+#ifndef local
+# define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+typedef unsigned char uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long ulg;
+
+extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+ return (strm->msg = ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+ /* common constants */
+
+#ifndef DEF_WBITS
+# define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+# define DEF_MEM_LEVEL 8
+#else
+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES 2
+/* The three kinds of block type */
+
+#define MIN_MATCH 3
+#define MAX_MATCH 258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+ /* target dependencies */
+
+#if defined(__BORLANDC__) && !defined(MSDOS)
+ #pragma warn -8004
+ #pragma warn -8008
+ #pragma warn -8066
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_WIN32) && \
+ (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
+ uLong adler32_combine64(uLong, uLong, z_off_t);
+#endif
+
+ /* common defaults */
+
+#ifndef OS_CODE
+# define OS_CODE 0x03 /* assume Unix */
+#endif
+
+#ifndef F_OPEN
+# define F_OPEN(name, mode) fopen((name), (mode))
+#endif
+
+ /* functions */
+
+#if defined(pyr) || defined(Z_SOLO)
+# define NO_MEMCPY
+#endif
+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+ * You may have to use the same strategy for Borland C (untested).
+ * The __SC__ check is for Symantec.
+ */
+# define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+# define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+# ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+# define zmemcpy _fmemcpy
+# define zmemcmp _fmemcmp
+# define zmemzero(dest, len) _fmemset(dest, 0, len)
+# else
+# define zmemcpy memcpy
+# define zmemcmp memcmp
+# define zmemzero(dest, len) memset(dest, 0, len)
+# endif
+#else
+ void ZLIB_INTERNAL zmemcpy((Bytef* dest, const Bytef* source, uInt len));
+ int ZLIB_INTERNAL zmemcmp((const Bytef* s1, const Bytef* s2, uInt len));
+ void ZLIB_INTERNAL zmemzero((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef H2_ZLIB_DEBUG
+# include <stdio.h>
+ extern int ZLIB_INTERNAL z_verbose;
+ extern void ZLIB_INTERNAL z_error(char *m);
+# define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+# define Trace(x) {if (z_verbose>=0) fprintf x ;}
+# define Tracev(x) {if (z_verbose>0) fprintf x ;}
+# define Tracevv(x) {if (z_verbose>1) fprintf x ;}
+# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
+# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
+#else
+# define Assert(cond,msg)
+# define Trace(x)
+# define Tracev(x)
+# define Tracevv(x)
+# define Tracec(c,x)
+# define Tracecv(c,x)
+#endif
+
+/* Reverse the bytes in a 32-bit value */
+#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+ (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
+#endif /* ZUTIL_H */
diff --git a/sys/fs/hammer2/zlib/hammer2_zlib_zutil.c b/sys/fs/hammer2/zlib/hammer2_zlib_zutil.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/hammer2/zlib/hammer2_zlib_zutil.c
@@ -0,0 +1,182 @@
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "hammer2_zlib_zutil.h"
+
+#ifndef NO_DUMMY_DECL
+struct internal_state {int dummy;}; /* for buggy compilers */
+#endif
+
+z_const char * const z_errmsg[10] = {
+"need dictionary", /* Z_NEED_DICT 2 */
+"stream end", /* Z_STREAM_END 1 */
+"", /* Z_OK 0 */
+"file error", /* Z_ERRNO (-1) */
+"stream error", /* Z_STREAM_ERROR (-2) */
+"data error", /* Z_DATA_ERROR (-3) */
+"insufficient memory", /* Z_MEM_ERROR (-4) */
+"buffer error", /* Z_BUF_ERROR (-5) */
+"incompatible version",/* Z_VERSION_ERROR (-6) */
+""};
+
+const char * zlibVersion(void);
+uLong zlibCompileFlags(void);
+const char * zError(int err);
+
+const
+char*
+zlibVersion(void)
+{
+ return ZLIB_VERSION;
+}
+
+uLong
+zlibCompileFlags(void)
+{
+ uLong flags;
+
+ flags = 0;
+ switch ((int)(sizeof(uInt))) {
+ case 2: break;
+ case 4: flags += 1; break;
+ case 8: flags += 2; break;
+ default: flags += 3;
+ }
+ switch ((int)(sizeof(uLong))) {
+ case 2: break;
+ case 4: flags += 1 << 2; break;
+ case 8: flags += 2 << 2; break;
+ default: flags += 3 << 2;
+ }
+ switch ((int)(sizeof(voidpf))) {
+ case 2: break;
+ case 4: flags += 1 << 4; break;
+ case 8: flags += 2 << 4; break;
+ default: flags += 3 << 4;
+ }
+ switch ((int)(sizeof(z_off_t))) {
+ case 2: break;
+ case 4: flags += 1 << 6; break;
+ case 8: flags += 2 << 6; break;
+ default: flags += 3 << 6;
+ }
+#ifdef H2_ZLIB_DEBUG
+ flags += 1 << 8;
+#endif
+#if defined(ASMV) || defined(ASMINF)
+ flags += 1 << 9;
+#endif
+#ifdef ZLIB_WINAPI
+ flags += 1 << 10;
+#endif
+#ifdef BUILDFIXED
+ flags += 1 << 12;
+#endif
+#ifdef DYNAMIC_CRC_TABLE
+ flags += 1 << 13;
+#endif
+#ifdef NO_GZCOMPRESS
+ flags += 1L << 16;
+#endif
+#ifdef NO_GZIP
+ flags += 1L << 17;
+#endif
+#ifdef PKZIP_BUG_WORKAROUND
+ flags += 1L << 20;
+#endif
+#ifdef FASTEST
+ flags += 1L << 21;
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# ifdef NO_vsnprintf
+ flags += 1L << 25;
+# ifdef HAS_vsprintf_void
+ flags += 1L << 26;
+# endif
+# else
+# ifdef HAS_vsnprintf_void
+ flags += 1L << 26;
+# endif
+# endif
+#else
+ flags += 1L << 24;
+# ifdef NO_snprintf
+ flags += 1L << 25;
+# ifdef HAS_sprintf_void
+ flags += 1L << 26;
+# endif
+# else
+# ifdef HAS_snprintf_void
+ flags += 1L << 26;
+# endif
+# endif
+#endif
+ return flags;
+}
+
+#ifdef H2_ZLIB_DEBUG
+
+# ifndef verbose
+# define verbose 0
+# endif
+int ZLIB_INTERNAL z_verbose = verbose;
+
+void ZLIB_INTERNAL z_error (char *m)
+{
+#if defined(_KERNEL)
+ panic("h2 %s: %s", __func__, m);
+#else
+ fprintf(stderr, "%s\n", m);
+ exit(1);
+#endif
+}
+#endif
+
+/* exported to allow conversion of error code to string for compress() and
+ * uncompress()
+ */
+const
+char*
+zError(int err)
+{
+ return ERR_MSG(err);
+}
+
+#ifndef HAVE_MEMCPY
+
+void
+ZLIB_INTERNAL
+zmemcpy(Bytef* dest, const Bytef* source, uInt len)
+{
+ if (len == 0) return;
+ do {
+ *dest++ = *source++; /* ??? to be unrolled */
+ } while (--len != 0);
+}
+
+int
+ZLIB_INTERNAL
+zmemcmp(const Bytef* s1, const Bytef* s2, uInt len)
+{
+ uInt j;
+
+ for (j = 0; j < len; j++) {
+ if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+ }
+ return 0;
+}
+
+void
+ZLIB_INTERNAL
+zmemzero(Bytef* dest, uInt len)
+{
+ if (len == 0) return;
+ do {
+ *dest++ = 0; /* ??? to be unrolled */
+ } while (--len != 0);
+}
+#endif
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -131,6 +131,7 @@
${_glxiic} \
${_glxsb} \
gpio \
+ hammer2 \
hid \
hifn \
${_hpt27xx} \
diff --git a/sys/modules/hammer2/Makefile b/sys/modules/hammer2/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/hammer2/Makefile
@@ -0,0 +1,16 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/fs/hammer2
+
+KMOD= hammer2
+SRCS= vnode_if.h \
+ hammer2_admin.c hammer2_chain.c hammer2_cluster.c hammer2_inode.c \
+ hammer2_io.c hammer2_ioctl.c hammer2_lz4.c hammer2_ondisk.c \
+ hammer2_strategy.c hammer2_subr.c hammer2_vfsops.c hammer2_vnops.c \
+ hammer2_xops.c \
+ zlib/hammer2_zlib_adler32.c zlib/hammer2_zlib_deflate.c \
+ zlib/hammer2_zlib_inffast.c zlib/hammer2_zlib_inflate.c \
+ zlib/hammer2_zlib_inftrees.c zlib/hammer2_zlib_trees.c \
+ zlib/hammer2_zlib_zutil.c xxhash/xxhash.c
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Mon, Oct 13, 6:10 AM (12 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23661367
Default Alt Text
D37354.diff (789 KB)

Event Timeline