Index: head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c
===================================================================
--- head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c	(revision 322167)
+++ head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c	(revision 322168)
@@ -1,1948 +1,1948 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #define	ELF_TARGET_ALL
 #include <elf.h>
 
 #include <sys/types.h>
 #ifdef illumos
 #include <sys/sysmacros.h>
 #else
 #define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
 #endif
 
 #include <unistd.h>
 #include <strings.h>
 #ifdef illumos
 #include <alloca.h>
 #endif
 #include <limits.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
 #ifdef illumos
 #include <wait.h>
 #else
 #include <sys/wait.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <sys/mman.h>
 #endif
 #include <assert.h>
 #include <sys/ipc.h>
 
 #include <dt_impl.h>
 #include <dt_provider.h>
 #include <dt_program.h>
 #include <dt_string.h>
 
 #define	ESHDR_NULL	0
 #define	ESHDR_SHSTRTAB	1
 #define	ESHDR_DOF	2
 #define	ESHDR_STRTAB	3
 #define	ESHDR_SYMTAB	4
 #define	ESHDR_REL	5
 #define	ESHDR_NUM	6
 
 #define	PWRITE_SCN(index, data) \
 	(lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \
 	(off64_t)elf_file.shdr[(index)].sh_offset || \
 	dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \
 	elf_file.shdr[(index)].sh_size)
 
 static const char DTRACE_SHSTRTAB32[] = "\0"
 ".shstrtab\0"		/* 1 */
 ".SUNW_dof\0"		/* 11 */
 ".strtab\0"		/* 21 */
 ".symtab\0"		/* 29 */
 #ifdef __sparc
 ".rela.SUNW_dof";	/* 37 */
 #else
 ".rel.SUNW_dof";	/* 37 */
 #endif
 
 static const char DTRACE_SHSTRTAB64[] = "\0"
 ".shstrtab\0"		/* 1 */
 ".SUNW_dof\0"		/* 11 */
 ".strtab\0"		/* 21 */
 ".symtab\0"		/* 29 */
 ".rela.SUNW_dof";	/* 37 */
 
 static const char DOFSTR[] = "__SUNW_dof";
 static const char DOFLAZYSTR[] = "___SUNW_dof";
 
 typedef struct dt_link_pair {
 	struct dt_link_pair *dlp_next;	/* next pair in linked list */
 	void *dlp_str;			/* buffer for string table */
 	void *dlp_sym;			/* buffer for symbol table */
 } dt_link_pair_t;
 
 typedef struct dof_elf32 {
 	uint32_t de_nrel;		/* relocation count */
 #ifdef __sparc
 	Elf32_Rela *de_rel;		/* array of relocations for sparc */
 #else
 	Elf32_Rel *de_rel;		/* array of relocations for x86 */
 #endif
 	uint32_t de_nsym;		/* symbol count */
 	Elf32_Sym *de_sym;		/* array of symbols */
 	uint32_t de_strlen;		/* size of of string table */
 	char *de_strtab;		/* string table */
 	uint32_t de_global;		/* index of the first global symbol */
 } dof_elf32_t;
 
 static int
 prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep)
 {
 	dof_sec_t *dofs, *s;
 	dof_relohdr_t *dofrh;
 	dof_relodesc_t *dofr;
 	char *strtab;
 	int i, j, nrel;
 	size_t strtabsz = 1;
 	uint32_t count = 0;
 	size_t base;
 	Elf32_Sym *sym;
 #ifdef __sparc
 	Elf32_Rela *rel;
 #else
 	Elf32_Rel *rel;
 #endif
 
 	/*LINTED*/
 	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 
 	/*
 	 * First compute the size of the string table and the number of
 	 * relocations present in the DOF.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		assert(strtab[0] == '\0');
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		count += s->dofs_size / s->dofs_entsize;
 	}
 
 	dep->de_strlen = strtabsz;
 	dep->de_nrel = count;
 	dep->de_nsym = count + 1; /* the first symbol is always null */
 
 	if (dtp->dt_lazyload) {
 		dep->de_strlen += sizeof (DOFLAZYSTR);
 		dep->de_nsym++;
 	} else {
 		dep->de_strlen += sizeof (DOFSTR);
 		dep->de_nsym++;
 	}
 
 	if ((dep->de_rel = calloc(dep->de_nrel,
 	    sizeof (dep->de_rel[0]))) == NULL) {
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) {
 		free(dep->de_rel);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 		free(dep->de_rel);
 		free(dep->de_sym);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	count = 0;
 	strtabsz = 1;
 	dep->de_strtab[0] = '\0';
 	rel = dep->de_rel;
 	sym = dep->de_sym;
 	dep->de_global = 1;
 
 	/*
 	 * The first symbol table entry must be zeroed and is always ignored.
 	 */
 	bzero(sym, sizeof (Elf32_Sym));
 	sym++;
 
 	/*
 	 * Take a second pass through the DOF sections filling in the
 	 * memory we allocated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 		base = strtabsz;
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		nrel = s->dofs_size / s->dofs_entsize;
 
 		s = &dofs[dofrh->dofr_tgtsec];
 
 		for (j = 0; j < nrel; j++) {
 #if defined(__aarch64__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__arm__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__i386) || defined(__amd64)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
 			rel->r_info = ELF32_R_INFO(count + dep->de_global,
 			    R_386_PC32);
 #elif defined(__mips__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__powerpc__)
 			/*
 			 * Add 4 bytes to hit the low half of this 64-bit
 			 * big-endian address.
 			 */
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset + 4;
 			rel->r_info = ELF32_R_INFO(count + dep->de_global,
 			    R_PPC_REL32);
-#elif defined(__riscv__)
+#elif defined(__riscv)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #else
 #error unknown ISA
 #endif
 
 			sym->st_name = base + dofr[j].dofr_name - 1;
 			sym->st_value = 0;
 			sym->st_size = 0;
 			sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
 			sym->st_other = 0;
 			sym->st_shndx = SHN_UNDEF;
 
 			rel++;
 			sym++;
 			count++;
 		}
 	}
 
 	/*
 	 * Add a symbol for the DOF itself. We use a different symbol for
 	 * lazily and actively loaded DOF to make them easy to distinguish.
 	 */
 	sym->st_name = strtabsz;
 	sym->st_value = 0;
 	sym->st_size = dof->dofh_filesz;
 	sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT);
 	sym->st_other = ELF32_ST_VISIBILITY(STV_HIDDEN);
 	sym->st_shndx = ESHDR_DOF;
 	sym++;
 
 	if (dtp->dt_lazyload) {
 		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 		    sizeof (DOFLAZYSTR));
 		strtabsz += sizeof (DOFLAZYSTR);
 	} else {
 		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 		strtabsz += sizeof (DOFSTR);
 	}
 
 	assert(count == dep->de_nrel);
 	assert(strtabsz == dep->de_strlen);
 
 	return (0);
 }
 
 
 typedef struct dof_elf64 {
 	uint32_t de_nrel;
 	Elf64_Rela *de_rel;
 	uint32_t de_nsym;
 	Elf64_Sym *de_sym;
 
 	uint32_t de_strlen;
 	char *de_strtab;
 
 	uint32_t de_global;
 } dof_elf64_t;
 
 static int
 prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep)
 {
 	dof_sec_t *dofs, *s;
 	dof_relohdr_t *dofrh;
 	dof_relodesc_t *dofr;
 	char *strtab;
 	int i, j, nrel;
 	size_t strtabsz = 1;
 #ifdef illumos
 	uint32_t count = 0;
 #else
 	uint64_t count = 0;
 #endif
 	size_t base;
 	Elf64_Sym *sym;
 	Elf64_Rela *rel;
 
 	/*LINTED*/
 	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 
 	/*
 	 * First compute the size of the string table and the number of
 	 * relocations present in the DOF.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		assert(strtab[0] == '\0');
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		count += s->dofs_size / s->dofs_entsize;
 	}
 
 	dep->de_strlen = strtabsz;
 	dep->de_nrel = count;
 	dep->de_nsym = count + 1; /* the first symbol is always null */
 
 	if (dtp->dt_lazyload) {
 		dep->de_strlen += sizeof (DOFLAZYSTR);
 		dep->de_nsym++;
 	} else {
 		dep->de_strlen += sizeof (DOFSTR);
 		dep->de_nsym++;
 	}
 
 	if ((dep->de_rel = calloc(dep->de_nrel,
 	    sizeof (dep->de_rel[0]))) == NULL) {
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) {
 		free(dep->de_rel);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 		free(dep->de_rel);
 		free(dep->de_sym);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	count = 0;
 	strtabsz = 1;
 	dep->de_strtab[0] = '\0';
 	rel = dep->de_rel;
 	sym = dep->de_sym;
 	dep->de_global = 1;
 
 	/*
 	 * The first symbol table entry must be zeroed and is always ignored.
 	 */
 	bzero(sym, sizeof (Elf64_Sym));
 	sym++;
 
 	/*
 	 * Take a second pass through the DOF sections filling in the
 	 * memory we allocated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 		base = strtabsz;
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		nrel = s->dofs_size / s->dofs_entsize;
 
 		s = &dofs[dofrh->dofr_tgtsec];
 
 		for (j = 0; j < nrel; j++) {
 #if defined(__aarch64__)
 /* XXX */
 #elif defined(__arm__)
 /* XXX */
 #elif defined(__mips__)
 /* XXX */
 #elif defined(__powerpc__)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
 			rel->r_info = ELF64_R_INFO(count + dep->de_global,
 			    R_PPC64_REL64);
-#elif defined(__riscv__)
+#elif defined(__riscv)
 /* XXX */
 #elif defined(__i386) || defined(__amd64)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
 			rel->r_info = ELF64_R_INFO(count + dep->de_global,
 			    R_X86_64_PC64);
 #else
 #error unknown ISA
 #endif
 
 			sym->st_name = base + dofr[j].dofr_name - 1;
 			sym->st_value = 0;
 			sym->st_size = 0;
 			sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
 			sym->st_other = 0;
 			sym->st_shndx = SHN_UNDEF;
 
 			rel++;
 			sym++;
 			count++;
 		}
 	}
 
 	/*
 	 * Add a symbol for the DOF itself. We use a different symbol for
 	 * lazily and actively loaded DOF to make them easy to distinguish.
 	 */
 	sym->st_name = strtabsz;
 	sym->st_value = 0;
 	sym->st_size = dof->dofh_filesz;
 	sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT);
 	sym->st_other = ELF64_ST_VISIBILITY(STV_HIDDEN);
 	sym->st_shndx = ESHDR_DOF;
 	sym++;
 
 	if (dtp->dt_lazyload) {
 		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 		    sizeof (DOFLAZYSTR));
 		strtabsz += sizeof (DOFLAZYSTR);
 	} else {
 		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 		strtabsz += sizeof (DOFSTR);
 	}
 
 	assert(count == dep->de_nrel);
 	assert(strtabsz == dep->de_strlen);
 
 	return (0);
 }
 
 /*
  * Write out an ELF32 file prologue consisting of a header, section headers,
  * and a section header string table.  The DOF data will follow this prologue
  * and complete the contents of the given ELF file.
  */
 static int
 dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 {
 	struct {
 		Elf32_Ehdr ehdr;
 		Elf32_Shdr shdr[ESHDR_NUM];
 	} elf_file;
 
 	Elf32_Shdr *shp;
 	Elf32_Off off;
 	dof_elf32_t de;
 	int ret = 0;
 	uint_t nshdr;
 
 	if (prepare_elf32(dtp, dof, &de) != 0)
 		return (-1); /* errno is set for us */
 
 	/*
 	 * If there are no relocations, we only need enough sections for
 	 * the shstrtab and the DOF.
 	 */
 	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 
 	bzero(&elf_file, sizeof (elf_file));
 
 	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32;
 #if BYTE_ORDER == _BIG_ENDIAN
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 #else
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 #endif
 #if defined(__FreeBSD__)
 	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 #endif
 	elf_file.ehdr.e_type = ET_REL;
 #if defined(__arm__)
 	elf_file.ehdr.e_machine = EM_ARM;
 #elif defined(__mips__)
 	elf_file.ehdr.e_machine = EM_MIPS;
 #elif defined(__powerpc__)
 	elf_file.ehdr.e_machine = EM_PPC;
 #elif defined(__sparc)
 	elf_file.ehdr.e_machine = EM_SPARC;
 #elif defined(__i386) || defined(__amd64)
 	elf_file.ehdr.e_machine = EM_386;
 #endif
 	elf_file.ehdr.e_version = EV_CURRENT;
 	elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr);
 	elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr);
 	elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr);
 	elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr);
 	elf_file.ehdr.e_shnum = nshdr;
 	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 	off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr);
 
 	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 	shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = sizeof (DTRACE_SHSTRTAB32);
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_DOF];
 	shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SUNW_dof;
 	shp->sh_offset = off;
 	shp->sh_size = dof->dofh_filesz;
 	shp->sh_addralign = 8;
 	off = shp->sh_offset + shp->sh_size;
 
 	shp = &elf_file.shdr[ESHDR_STRTAB];
 	shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = de.de_strlen;
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 
 	shp = &elf_file.shdr[ESHDR_SYMTAB];
 	shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SYMTAB;
 	shp->sh_entsize = sizeof (Elf32_Sym);
 	shp->sh_link = ESHDR_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_info = de.de_global;
 	shp->sh_size = de.de_nsym * sizeof (Elf32_Sym);
 	shp->sh_addralign = 4;
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 
 	if (de.de_nrel == 0) {
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	} else {
 		shp = &elf_file.shdr[ESHDR_REL];
 		shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */
 		shp->sh_flags = SHF_ALLOC;
 #ifdef __sparc
 		shp->sh_type = SHT_RELA;
 #else
 		shp->sh_type = SHT_REL;
 #endif
 		shp->sh_entsize = sizeof (de.de_rel[0]);
 		shp->sh_link = ESHDR_SYMTAB;
 		shp->sh_info = ESHDR_DOF;
 		shp->sh_offset = off;
 		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 		shp->sh_addralign = 4;
 
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	}
 
 	free(de.de_strtab);
 	free(de.de_sym);
 	free(de.de_rel);
 
 	return (ret);
 }
 
 /*
  * Write out an ELF64 file prologue consisting of a header, section headers,
  * and a section header string table.  The DOF data will follow this prologue
  * and complete the contents of the given ELF file.
  */
 static int
 dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 {
 	struct {
 		Elf64_Ehdr ehdr;
 		Elf64_Shdr shdr[ESHDR_NUM];
 	} elf_file;
 
 	Elf64_Shdr *shp;
 	Elf64_Off off;
 	dof_elf64_t de;
 	int ret = 0;
 	uint_t nshdr;
 
 	if (prepare_elf64(dtp, dof, &de) != 0)
 		return (-1); /* errno is set for us */
 
 	/*
 	 * If there are no relocations, we only need enough sections for
 	 * the shstrtab and the DOF.
 	 */
 	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 
 	bzero(&elf_file, sizeof (elf_file));
 
 	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
 #if BYTE_ORDER == _BIG_ENDIAN
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 #else
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 #endif
 #if defined(__FreeBSD__)
 	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 #endif
 	elf_file.ehdr.e_type = ET_REL;
 #if defined(__arm__)
 	elf_file.ehdr.e_machine = EM_ARM;
 #elif defined(__mips__)
 	elf_file.ehdr.e_machine = EM_MIPS;
 #elif defined(__powerpc64__)
 	elf_file.ehdr.e_machine = EM_PPC64;
 #elif defined(__sparc)
 	elf_file.ehdr.e_machine = EM_SPARCV9;
 #elif defined(__i386) || defined(__amd64)
 	elf_file.ehdr.e_machine = EM_AMD64;
 #endif
 	elf_file.ehdr.e_version = EV_CURRENT;
 	elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr);
 	elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr);
 	elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr);
 	elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr);
 	elf_file.ehdr.e_shnum = nshdr;
 	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 	off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr);
 
 	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 	shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = sizeof (DTRACE_SHSTRTAB64);
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_DOF];
 	shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SUNW_dof;
 	shp->sh_offset = off;
 	shp->sh_size = dof->dofh_filesz;
 	shp->sh_addralign = 8;
 	off = shp->sh_offset + shp->sh_size;
 
 	shp = &elf_file.shdr[ESHDR_STRTAB];
 	shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = de.de_strlen;
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_SYMTAB];
 	shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SYMTAB;
 	shp->sh_entsize = sizeof (Elf64_Sym);
 	shp->sh_link = ESHDR_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_info = de.de_global;
 	shp->sh_size = de.de_nsym * sizeof (Elf64_Sym);
 	shp->sh_addralign = 8;
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	if (de.de_nrel == 0) {
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	} else {
 		shp = &elf_file.shdr[ESHDR_REL];
 		shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */
 		shp->sh_flags = SHF_ALLOC;
 		shp->sh_type = SHT_RELA;
 		shp->sh_entsize = sizeof (de.de_rel[0]);
 		shp->sh_link = ESHDR_SYMTAB;
 		shp->sh_info = ESHDR_DOF;
 		shp->sh_offset = off;
 		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 		shp->sh_addralign = 8;
 
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	}
 
 	free(de.de_strtab);
 	free(de.de_sym);
 	free(de.de_rel);
 
 	return (ret);
 }
 
 static int
 dt_symtab_lookup(Elf_Data *data_sym, int start, int end, uintptr_t addr,
     uint_t shn, GElf_Sym *sym, int uses_funcdesc, Elf *elf)
 {
 	Elf64_Addr symval;
 	Elf_Scn *opd_scn;
 	Elf_Data *opd_desc;
 	int i;
 
 	for (i = start; i < end && gelf_getsym(data_sym, i, sym) != NULL; i++) {
 		if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
 			symval = sym->st_value;
 			if (uses_funcdesc) {
 				opd_scn = elf_getscn(elf, sym->st_shndx);
 				opd_desc = elf_rawdata(opd_scn, NULL);
 				symval =
 				    *(uint64_t*)((char *)opd_desc->d_buf + symval);
 			}
 			if ((uses_funcdesc || shn == sym->st_shndx) &&
 			    symval <= addr && addr < symval + sym->st_size)
 				return (0);
 		}
 	}
 
 	return (-1);
 }
 
 #if defined(__aarch64__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__arm__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__mips__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__powerpc__)
 /* The sentinel is 'xor r3,r3,r3'. */
 #define DT_OP_XOR_R3	0x7c631a78
 
 #define DT_OP_NOP		0x60000000
 #define DT_OP_BLR		0x4e800020
 
 /* This captures all forms of branching to address. */
 #define DT_IS_BRANCH(inst)	((inst & 0xfc000000) == 0x48000000)
 #define DT_IS_BL(inst)	(DT_IS_BRANCH(inst) && (inst & 0x01))
 
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint32_t *ip;
 
 	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 		return (-1);
 
 	/*LINTED*/
 	ip = (uint32_t *)(p + rela->r_offset);
 
 	/*
 	 * We only know about some specific relocation types.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_PPC_REL24 &&
 	    GELF_R_TYPE(rela->r_info) != R_PPC_PLTREL24)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install below.
 	 */
 	if (isenabled) {
 		if (ip[0] == DT_OP_XOR_R3) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	} else {
 		if (ip[0] == DT_OP_NOP) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	}
 
 	/*
 	 * We only expect branch to address instructions.
 	 */
 	if (!DT_IS_BRANCH(ip[0])) {
 		dt_dprintf("found %x instead of a branch instruction at %llx\n",
 		    ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	if (isenabled) {
 		/*
 		 * It would necessarily indicate incorrect usage if an is-
 		 * enabled probe were tail-called so flag that as an error.
 		 * It's also potentially (very) tricky to handle gracefully,
 		 * but could be done if this were a desired use scenario.
 		 */
 		if (!DT_IS_BL(ip[0])) {
 			dt_dprintf("tail call to is-enabled probe at %llx\n",
 			    (u_longlong_t)rela->r_offset);
 			return (-1);
 		}
 
 		ip[0] = DT_OP_XOR_R3;
 		(*off) += sizeof (ip[0]);
 	} else {
 		if (DT_IS_BL(ip[0]))
 			ip[0] = DT_OP_NOP;
 		else
 			ip[0] = DT_OP_BLR;
 	}
 
 	return (0);
 }
-#elif defined(__riscv__)
+#elif defined(__riscv)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__sparc)
 
 #define	DT_OP_RET		0x81c7e008
 #define	DT_OP_NOP		0x01000000
 #define	DT_OP_CALL		0x40000000
 #define	DT_OP_CLR_O0		0x90102000
 
 #define	DT_IS_MOV_O7(inst)	(((inst) & 0xffffe000) == 0x9e100000)
 #define	DT_IS_RESTORE(inst)	(((inst) & 0xc1f80000) == 0x81e80000)
 #define	DT_IS_RETL(inst)	(((inst) & 0xfff83fff) == 0x81c02008)
 
 #define	DT_RS2(inst)		((inst) & 0x1f)
 #define	DT_MAKE_RETL(reg)	(0x81c02008 | ((reg) << 14))
 
 /*ARGSUSED*/
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint32_t *ip;
 
 	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 		return (-1);
 
 	/*LINTED*/
 	ip = (uint32_t *)(p + rela->r_offset);
 
 	/*
 	 * We only know about some specific relocation types.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 &&
 	    GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install below.
 	 */
 	if (isenabled) {
 		if (ip[0] == DT_OP_NOP) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	} else {
 		if (DT_IS_RESTORE(ip[1])) {
 			if (ip[0] == DT_OP_RET) {
 				(*off) += sizeof (ip[0]);
 				return (0);
 			}
 		} else if (DT_IS_MOV_O7(ip[1])) {
 			if (DT_IS_RETL(ip[0]))
 				return (0);
 		} else {
 			if (ip[0] == DT_OP_NOP) {
 				(*off) += sizeof (ip[0]);
 				return (0);
 			}
 		}
 	}
 
 	/*
 	 * We only expect call instructions with a displacement of 0.
 	 */
 	if (ip[0] != DT_OP_CALL) {
 		dt_dprintf("found %x instead of a call instruction at %llx\n",
 		    ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	if (isenabled) {
 		/*
 		 * It would necessarily indicate incorrect usage if an is-
 		 * enabled probe were tail-called so flag that as an error.
 		 * It's also potentially (very) tricky to handle gracefully,
 		 * but could be done if this were a desired use scenario.
 		 */
 		if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) {
 			dt_dprintf("tail call to is-enabled probe at %llx\n",
 			    (u_longlong_t)rela->r_offset);
 			return (-1);
 		}
 
 
 		/*
 		 * On SPARC, we take advantage of the fact that the first
 		 * argument shares the same register as for the return value.
 		 * The macro handles the work of zeroing that register so we
 		 * don't need to do anything special here. We instrument the
 		 * instruction in the delay slot as we'll need to modify the
 		 * return register after that instruction has been emulated.
 		 */
 		ip[0] = DT_OP_NOP;
 		(*off) += sizeof (ip[0]);
 	} else {
 		/*
 		 * If the call is followed by a restore, it's a tail call so
 		 * change the call to a ret. If the call if followed by a mov
 		 * of a register into %o7, it's a tail call in leaf context
 		 * so change the call to a retl-like instruction that returns
 		 * to that register value + 8 (rather than the typical %o7 +
 		 * 8); the delay slot instruction is left, but should have no
 		 * effect. Otherwise we change the call to be a nop. We
 		 * identify the subsequent instruction as the probe point in
 		 * all but the leaf tail-call case to ensure that arguments to
 		 * the probe are complete and consistent. An astute, though
 		 * largely hypothetical, observer would note that there is the
 		 * possibility of a false-positive probe firing if the function
 		 * contained a branch to the instruction in the delay slot of
 		 * the call. Fixing this would require significant in-kernel
 		 * modifications, and isn't worth doing until we see it in the
 		 * wild.
 		 */
 		if (DT_IS_RESTORE(ip[1])) {
 			ip[0] = DT_OP_RET;
 			(*off) += sizeof (ip[0]);
 		} else if (DT_IS_MOV_O7(ip[1])) {
 			ip[0] = DT_MAKE_RETL(DT_RS2(ip[1]));
 		} else {
 			ip[0] = DT_OP_NOP;
 			(*off) += sizeof (ip[0]);
 		}
 	}
 
 	return (0);
 }
 
 #elif defined(__i386) || defined(__amd64)
 
 #define	DT_OP_NOP		0x90
 #define	DT_OP_RET		0xc3
 #define	DT_OP_CALL		0xe8
 #define	DT_OP_JMP32		0xe9
 #define	DT_OP_REX_RAX		0x48
 #define	DT_OP_XOR_EAX_0		0x33
 #define	DT_OP_XOR_EAX_1		0xc0
 
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1);
 	uint8_t ret;
 
 	/*
 	 * On x86, the first byte of the instruction is the call opcode and
 	 * the next four bytes are the 32-bit address; the relocation is for
 	 * the address operand. We back up the offset to the first byte of
 	 * the instruction. For is-enabled probes, we later advance the offset
 	 * so that it hits the first nop in the instruction sequence.
 	 */
 	(*off) -= 1;
 
 	/*
 	 * We only know about some specific relocation types. Luckily
 	 * these types have the same values on both 32-bit and 64-bit
 	 * x86 architectures.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_386_PC32 &&
 	    GELF_R_TYPE(rela->r_info) != R_386_PLT32)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install. For is-enabled probes, we advance the
 	 * offset to the first nop instruction in the sequence to match the
 	 * text modification code below.
 	 */
 	if (!isenabled) {
 		if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) &&
 		    ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP &&
 		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP)
 			return (0);
 	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
 		if (ip[0] == DT_OP_REX_RAX &&
 		    ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 &&
 		    (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) &&
 		    ip[4] == DT_OP_NOP) {
 			(*off) += 3;
 			return (0);
 		}
 	} else {
 		if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 &&
 		    (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) &&
 		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) {
 			(*off) += 2;
 			return (0);
 		}
 	}
 
 	/*
 	 * We expect either a call instrution with a 32-bit displacement or a
 	 * jmp instruction with a 32-bit displacement acting as a tail-call.
 	 */
 	if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) {
 		dt_dprintf("found %x instead of a call or jmp instruction at "
 		    "%llx\n", ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP;
 
 	/*
 	 * Establish the instruction sequence -- all nops for probes, and an
 	 * instruction to clear the return value register (%eax/%rax) followed
 	 * by nops for is-enabled probes. For is-enabled probes, we advance
 	 * the offset to the first nop. This isn't stricly necessary but makes
 	 * for more readable disassembly when the probe is enabled.
 	 */
 	if (!isenabled) {
 		ip[0] = ret;
 		ip[1] = DT_OP_NOP;
 		ip[2] = DT_OP_NOP;
 		ip[3] = DT_OP_NOP;
 		ip[4] = DT_OP_NOP;
 	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
 		ip[0] = DT_OP_REX_RAX;
 		ip[1] = DT_OP_XOR_EAX_0;
 		ip[2] = DT_OP_XOR_EAX_1;
 		ip[3] = ret;
 		ip[4] = DT_OP_NOP;
 		(*off) += 3;
 	} else {
 		ip[0] = DT_OP_XOR_EAX_0;
 		ip[1] = DT_OP_XOR_EAX_1;
 		ip[2] = ret;
 		ip[3] = DT_OP_NOP;
 		ip[4] = DT_OP_NOP;
 		(*off) += 2;
 	}
 
 	return (0);
 }
 
 #else
 #error unknown ISA
 #endif
 
 /*PRINTFLIKE5*/
 static int
 dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs,
     const char *format, ...)
 {
 	va_list ap;
 	dt_link_pair_t *pair;
 
 	va_start(ap, format);
 	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
 	va_end(ap);
 
 	if (elf != NULL)
 		(void) elf_end(elf);
 
 	if (fd >= 0)
 		(void) close(fd);
 
 	while ((pair = bufs) != NULL) {
 		bufs = pair->dlp_next;
 		dt_free(dtp, pair->dlp_str);
 		dt_free(dtp, pair->dlp_sym);
 		dt_free(dtp, pair);
 	}
 
 	return (dt_set_errno(dtp, EDT_COMPILER));
 }
 
 static int
 process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 {
 	static const char dt_prefix[] = "__dtrace";
 	static const char dt_enabled[] = "enabled";
 	static const char dt_symprefix[] = "$dtrace";
 	static const char dt_symfmt[] = "%s%ld.%s";
 	static const char dt_weaksymfmt[] = "%s.%s";
 	char probename[DTRACE_NAMELEN];
 	int fd, i, ndx, eprobe, mod = 0;
 	Elf *elf = NULL;
 	GElf_Ehdr ehdr;
 	Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt;
 	Elf_Data *data_rel, *data_sym, *data_str, *data_tgt;
 	GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt;
 	GElf_Sym rsym, fsym, dsym;
 	GElf_Rela rela;
 	char *s, *p, *r;
 	char pname[DTRACE_PROVNAMELEN];
 	dt_provider_t *pvp;
 	dt_probe_t *prp;
 	uint32_t off, eclass, emachine1, emachine2;
 	size_t symsize, osym, nsym, isym, istr, len;
 	key_t objkey;
 	dt_link_pair_t *pair, *bufs = NULL;
 	dt_strtab_t *strtab;
 	void *tmp;
 
 	if ((fd = open64(obj, O_RDWR)) == -1) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to open %s: %s", obj, strerror(errno)));
 	}
 
 	if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to process %s: %s", obj, elf_errmsg(elf_errno())));
 	}
 
 	switch (elf_kind(elf)) {
 	case ELF_K_ELF:
 		break;
 	case ELF_K_AR:
 		return (dt_link_error(dtp, elf, fd, bufs, "archives are not "
 		    "permitted; use the contents of the archive instead: %s",
 		    obj));
 	default:
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "invalid file type: %s", obj));
 	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
 		return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s",
 		    obj));
 	}
 
 	if (dtp->dt_oflags & DTRACE_O_LP64) {
 		eclass = ELFCLASS64;
 #if defined(__mips__)
 		emachine1 = emachine2 = EM_MIPS;
 #elif defined(__powerpc__)
 		emachine1 = emachine2 = EM_PPC64;
 #elif defined(__sparc)
 		emachine1 = emachine2 = EM_SPARCV9;
 #elif defined(__i386) || defined(__amd64)
 		emachine1 = emachine2 = EM_AMD64;
 #endif
 		symsize = sizeof (Elf64_Sym);
 	} else {
 		eclass = ELFCLASS32;
 #if defined(__arm__)
 		emachine1 = emachine2 = EM_ARM;
 #elif defined(__mips__)
 		emachine1 = emachine2 = EM_MIPS;
 #elif defined(__powerpc__)
 		emachine1 = emachine2 = EM_PPC;
 #elif defined(__sparc)
 		emachine1 = EM_SPARC;
 		emachine2 = EM_SPARC32PLUS;
 #elif defined(__i386) || defined(__amd64)
 		emachine1 = emachine2 = EM_386;
 #endif
 		symsize = sizeof (Elf32_Sym);
 	}
 
 	if (ehdr.e_ident[EI_CLASS] != eclass) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "incorrect ELF class for object file: %s", obj));
 	}
 
 	if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "incorrect ELF machine type for object file: %s", obj));
 	}
 
 	/*
 	 * We use this token as a relatively unique handle for this file on the
 	 * system in order to disambiguate potential conflicts between files of
 	 * the same name which contain identially named local symbols.
 	 */
 	if ((objkey = ftok(obj, 0)) == (key_t)-1) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to generate unique key for object file: %s", obj));
 	}
 
 	scn_rel = NULL;
 	while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
 		if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
 			goto err;
 
 		/*
 		 * Skip any non-relocation sections.
 		 */
 		if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
 			continue;
 
 		if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Grab the section, section header and section data for the
 		 * symbol table that this relocation section references.
 		 */
 		if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
 		    gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
 		    (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Ditto for that symbol table's string table.
 		 */
 		if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
 		    gelf_getshdr(scn_str, &shdr_str) == NULL ||
 		    (data_str = elf_getdata(scn_str, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Grab the section, section header and section data for the
 		 * target section for the relocations. For the relocations
 		 * we're looking for -- this will typically be the text of the
 		 * object file.
 		 */
 		if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
 		    gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
 		    (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * We're looking for relocations to symbols matching this form:
 		 *
 		 *   __dtrace[enabled]_<prov>___<probe>
 		 *
 		 * For the generated object, we need to record the location
 		 * identified by the relocation, and create a new relocation
 		 * in the generated object that will be resolved at link time
 		 * to the location of the function in which the probe is
 		 * embedded. In the target object, we change the matched symbol
 		 * so that it will be ignored at link time, and we modify the
 		 * target (text) section to replace the call instruction with
 		 * one or more nops.
 		 *
 		 * To avoid runtime overhead, the relocations added to the
 		 * generated object should be resolved at static link time. We
 		 * therefore create aliases for the functions that contain
 		 * probes. An alias is global (so that the relocation from the
 		 * generated object can be resolved), and hidden (so that its
 		 * address is known at static link time). Such aliases have this
 		 * form:
 		 *
 		 *   $dtrace<key>.<function>
 		 *
 		 * We take a first pass through all the relocations to
 		 * populate our string table and count the number of extra
 		 * symbols we'll require.
 		 */
 		strtab = dt_strtab_create(1);
 		nsym = 0;
 		isym = data_sym->d_size / symsize;
 		istr = data_str->d_size;
 
 		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
 
 			if (shdr_rel.sh_type == SHT_RELA) {
 				if (gelf_getrela(data_rel, i, &rela) == NULL)
 					continue;
 			} else {
 				GElf_Rel rel;
 				if (gelf_getrel(data_rel, i, &rel) == NULL)
 					continue;
 				rela.r_offset = rel.r_offset;
 				rela.r_info = rel.r_info;
 				rela.r_addend = 0;
 			}
 
 			if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
 			    &rsym) == NULL) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
 			s = (char *)data_str->d_buf + rsym.st_name;
 
 			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
 				continue;
 
 			if (dt_symtab_lookup(data_sym, 0, isym, rela.r_offset,
 			    shdr_rel.sh_info, &fsym, (emachine1 == EM_PPC64),
 			    elf) != 0) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
 			if (fsym.st_name > data_str->d_size) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
 			s = (char *)data_str->d_buf + fsym.st_name;
 
 			/*
 			 * If this symbol isn't of type function, we've really
 			 * driven off the rails or the object file is corrupt.
 			 */
 			if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) {
 				dt_strtab_destroy(strtab);
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "expected %s to be of type function", s));
 			}
 
 			len = snprintf(NULL, 0, dt_symfmt, dt_symprefix,
 			    objkey, s) + 1;
 			if ((p = dt_alloc(dtp, len)) == NULL) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 			(void) snprintf(p, len, dt_symfmt, dt_symprefix,
 			    objkey, s);
 
 			if (dt_strtab_index(strtab, p) == -1) {
 				nsym++;
 				(void) dt_strtab_insert(strtab, p);
 			}
 
 			dt_free(dtp, p);
 		}
 
 		/*
 		 * If any probes were found, allocate the additional space for
 		 * the symbol table and string table, copying the old data into
 		 * the new buffers, and marking the buffers as dirty. We inject
 		 * those newly allocated buffers into the libelf data
 		 * structures, but are still responsible for freeing them once
 		 * we're done with the elf handle.
 		 */
 		if (nsym > 0) {
 			/*
 			 * The first byte of the string table is reserved for
 			 * the \0 entry.
 			 */
 			len = dt_strtab_size(strtab) - 1;
 
 			assert(len > 0);
 			assert(dt_strtab_index(strtab, "") == 0);
 
 			dt_strtab_destroy(strtab);
 
 			if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL)
 				goto err;
 
 			if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size +
 			    len)) == NULL) {
 				dt_free(dtp, pair);
 				goto err;
 			}
 
 			if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size +
 			    nsym * symsize)) == NULL) {
 				dt_free(dtp, pair->dlp_str);
 				dt_free(dtp, pair);
 				goto err;
 			}
 
 			pair->dlp_next = bufs;
 			bufs = pair;
 
 			bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size);
 			tmp = data_str->d_buf;
 			data_str->d_buf = pair->dlp_str;
 			pair->dlp_str = tmp;
 			data_str->d_size += len;
 			(void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY);
 
 			shdr_str.sh_size += len;
 			(void) gelf_update_shdr(scn_str, &shdr_str);
 
 			bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size);
 			tmp = data_sym->d_buf;
 			data_sym->d_buf = pair->dlp_sym;
 			pair->dlp_sym = tmp;
 			data_sym->d_size += nsym * symsize;
 			(void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY);
 
 			shdr_sym.sh_size += nsym * symsize;
 			(void) gelf_update_shdr(scn_sym, &shdr_sym);
 
 			osym = isym;
 			nsym += isym;
 		} else {
 			dt_strtab_destroy(strtab);
 			continue;
 		}
 
 		/*
 		 * Now that the tables have been allocated, perform the
 		 * modifications described above.
 		 */
 		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
 
 			if (shdr_rel.sh_type == SHT_RELA) {
 				if (gelf_getrela(data_rel, i, &rela) == NULL)
 					continue;
 			} else {
 				GElf_Rel rel;
 				if (gelf_getrel(data_rel, i, &rel) == NULL)
 					continue;
 				rela.r_offset = rel.r_offset;
 				rela.r_info = rel.r_info;
 				rela.r_addend = 0;
 			}
 
 			ndx = GELF_R_SYM(rela.r_info);
 
 			if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
 			    rsym.st_name > data_str->d_size)
 				goto err;
 
 			s = (char *)data_str->d_buf + rsym.st_name;
 
 			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
 				continue;
 
 			s += sizeof (dt_prefix) - 1;
 
 			/*
 			 * Check to see if this is an 'is-enabled' check as
 			 * opposed to a normal probe.
 			 */
 			if (strncmp(s, dt_enabled,
 			    sizeof (dt_enabled) - 1) == 0) {
 				s += sizeof (dt_enabled) - 1;
 				eprobe = 1;
 				*eprobesp = 1;
 				dt_dprintf("is-enabled probe\n");
 			} else {
 				eprobe = 0;
 				dt_dprintf("normal probe\n");
 			}
 
 			if (*s++ != '_')
 				goto err;
 
 			if ((p = strstr(s, "___")) == NULL ||
 			    p - s >= sizeof (pname))
 				goto err;
 
 			bcopy(s, pname, p - s);
 			pname[p - s] = '\0';
 
 			if (dt_symtab_lookup(data_sym, osym, isym,
 			    rela.r_offset, shdr_rel.sh_info, &fsym,
 			    (emachine1 == EM_PPC64), elf) == 0) {
 				if (fsym.st_name > data_str->d_size)
 					goto err;
 
 				r = s = (char *) data_str->d_buf + fsym.st_name;
 				assert(strstr(s, dt_symprefix) == s);
 				s = strchr(s, '.') + 1;
 			} else if (dt_symtab_lookup(data_sym, 0, osym,
 			    rela.r_offset, shdr_rel.sh_info, &fsym,
 			    (emachine1 == EM_PPC64), elf) == 0) {
 				u_int bind;
 
 				bind = GELF_ST_BIND(fsym.st_info) == STB_WEAK ?
 				    STB_WEAK : STB_GLOBAL;
 
 				/*
 				 * Emit an alias for the symbol. It needs to be
 				 * non-preemptible so that .SUNW_dof relocations
 				 * may be resolved at static link time. Aliases
 				 * of weak symbols are given a non-unique name
 				 * so that they may be merged by the linker.
 				 */
 				dsym = fsym;
 				dsym.st_name = istr;
 				dsym.st_info = GELF_ST_INFO(bind, STT_FUNC);
 				dsym.st_other = GELF_ST_VISIBILITY(STV_HIDDEN);
 				(void) gelf_update_sym(data_sym, isym, &dsym);
 				r = (char *) data_str->d_buf + istr;
 				s = (char *) data_str->d_buf + fsym.st_name;
 				if (bind == STB_WEAK)
 					istr += sprintf(r, dt_weaksymfmt,
 					    dt_symprefix, s);
 				else
 					istr += sprintf(r, dt_symfmt,
 					    dt_symprefix, objkey, s);
 				istr++;
 				isym++;
 				assert(isym <= nsym);
 			} else
 				goto err;
 
 			if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) {
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "no such provider %s", pname));
 			}
 
 			if (strlcpy(probename, p + 3, sizeof (probename)) >=
 			    sizeof (probename))
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "invalid probe name %s", probename));
 			(void) strhyphenate(probename);
 			if ((prp = dt_probe_lookup(pvp, probename)) == NULL)
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "no such probe %s", probename));
 
 			assert(fsym.st_value <= rela.r_offset);
 
 			off = rela.r_offset - fsym.st_value;
 			if (dt_modtext(dtp, data_tgt->d_buf, eprobe,
 			    &rela, &off) != 0)
 				goto err;
 
 			if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) {
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "failed to allocate space for probe"));
 			}
 #ifndef illumos
 			/*
 			 * Our linker doesn't understand the SUNW_IGNORE ndx and
 			 * will try to use this relocation when we build the
 			 * final executable. Since we are done processing this
 			 * relocation, mark it as inexistant and let libelf
 			 * remove it from the file.
 			 * If this wasn't done, we would have garbage added to
 			 * the executable file as the symbol is going to be
 			 * change from UND to ABS.
 			 */
 			if (shdr_rel.sh_type == SHT_RELA) {
 				rela.r_offset = 0;
 				rela.r_info  = 0;
 				rela.r_addend = 0;
 				(void) gelf_update_rela(data_rel, i, &rela);
 			} else {
 				GElf_Rel rel;
 				rel.r_offset = 0;
 				rel.r_info = 0;
 				(void) gelf_update_rel(data_rel, i, &rel);
 			}
 #endif
 
 			mod = 1;
 			(void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY);
 
 			/*
 			 * This symbol may already have been marked to
 			 * be ignored by another relocation referencing
 			 * the same symbol or if this object file has
 			 * already been processed by an earlier link
 			 * invocation.
 			 */
 #ifndef illumos
 #define SHN_SUNW_IGNORE	SHN_ABS
 #endif
 			if (rsym.st_shndx != SHN_SUNW_IGNORE) {
 				rsym.st_shndx = SHN_SUNW_IGNORE;
 				(void) gelf_update_sym(data_sym, ndx, &rsym);
 			}
 		}
 	}
 
 	if (mod && elf_update(elf, ELF_C_WRITE) == -1)
 		goto err;
 
 	(void) elf_end(elf);
 	(void) close(fd);
 
 	while ((pair = bufs) != NULL) {
 		bufs = pair->dlp_next;
 		dt_free(dtp, pair->dlp_str);
 		dt_free(dtp, pair->dlp_sym);
 		dt_free(dtp, pair);
 	}
 
 	return (0);
 
 err:
 	return (dt_link_error(dtp, elf, fd, bufs,
 	    "an error was encountered while processing %s", obj));
 }
 
 int
 dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags,
     const char *file, int objc, char *const objv[])
 {
 #ifndef illumos
 	char tfile[PATH_MAX];
 #endif
 	char drti[PATH_MAX];
 	dof_hdr_t *dof;
 	int fd, status, i, cur;
 	char *cmd, tmp;
 	size_t len;
 	int eprobes = 0, ret = 0;
 
 #ifndef illumos
 	if (access(file, R_OK) == 0) {
 		fprintf(stderr, "dtrace: target object (%s) already exists. "
 		    "Please remove the target\ndtrace: object and rebuild all "
 		    "the source objects if you wish to run the DTrace\n"
 		    "dtrace: linking process again\n", file);
 		/*
 		 * Several build infrastructures run DTrace twice (e.g.
 		 * postgres) and we don't want the build to fail. Return
 		 * 0 here since this isn't really a fatal error.
 		 */
 		return (0);
 	}
 #endif
 
 	/*
 	 * A NULL program indicates a special use in which we just link
 	 * together a bunch of object files specified in objv and then
 	 * unlink(2) those object files.
 	 */
 	if (pgp == NULL) {
 		const char *fmt = "%s -o %s -r";
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1;
 
 		for (i = 0; i < objc; i++)
 			len += strlen(objv[i]) + 1;
 
 		cmd = alloca(len);
 
 		cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file);
 
 		for (i = 0; i < objc; i++)
 			cur += snprintf(cmd + cur, len - cur, " %s", objv[i]);
 
 		if ((status = system(cmd)) == -1) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to run %s: %s", dtp->dt_ld_path,
 			    strerror(errno)));
 		}
 
 		if (WIFSIGNALED(status)) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_ld_path, WTERMSIG(status)));
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_ld_path, WEXITSTATUS(status)));
 		}
 
 		for (i = 0; i < objc; i++) {
 			if (strcmp(objv[i], file) != 0)
 				(void) unlink(objv[i]);
 		}
 
 		return (0);
 	}
 
 	for (i = 0; i < objc; i++) {
 		if (process_obj(dtp, objv[i], &eprobes) != 0)
 			return (-1); /* errno is set for us */
 	}
 
 	/*
 	 * If there are is-enabled probes then we need to force use of DOF
 	 * version 2.
 	 */
 	if (eprobes && pgp->dp_dofversion < DOF_VERSION_2)
 		pgp->dp_dofversion = DOF_VERSION_2;
 
 	if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL)
 		return (-1); /* errno is set for us */
 
 #ifdef illumos
 	/*
 	 * Create a temporary file and then unlink it if we're going to
 	 * combine it with drti.o later.  We can still refer to it in child
 	 * processes as /dev/fd/<fd>.
 	 */
 	if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) {
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to open %s: %s", file, strerror(errno)));
 	}
 #else
 	snprintf(tfile, sizeof(tfile), "%s.XXXXXX", file);
 	if ((fd = mkostemp(tfile, O_CLOEXEC)) == -1)
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to create temporary file %s: %s",
 		    tfile, strerror(errno)));
 #endif
 
 	/*
 	 * If -xlinktype=DOF has been selected, just write out the DOF.
 	 * Otherwise proceed to the default of generating and linking ELF.
 	 */
 	switch (dtp->dt_linktype) {
 	case DT_LTYP_DOF:
 		if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz)
 			ret = errno;
 
 		if (close(fd) != 0 && ret == 0)
 			ret = errno;
 
 		if (ret != 0) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to write %s: %s", file, strerror(ret)));
 		}
 
 		return (0);
 
 	case DT_LTYP_ELF:
 		break; /* fall through to the rest of dtrace_program_link() */
 
 	default:
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "invalid link type %u\n", dtp->dt_linktype));
 	}
 
 
 #ifdef illumos
 	if (!dtp->dt_lazyload)
 		(void) unlink(file);
 #endif
 
 	if (dtp->dt_oflags & DTRACE_O_LP64)
 		status = dump_elf64(dtp, dof, fd);
 	else
 		status = dump_elf32(dtp, dof, fd);
 
 #ifdef illumos
 	if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) {
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to write %s: %s", file, strerror(errno)));
 	}
 #else
 	if (status != 0)
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to write %s: %s", tfile,
 		    strerror(dtrace_errno(dtp))));
 #endif
 
 	if (!dtp->dt_lazyload) {
 #ifdef illumos
 		const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s";
 
 		if (dtp->dt_oflags & DTRACE_O_LP64) {
 			(void) snprintf(drti, sizeof (drti),
 			    "%s/64/drti.o", _dtrace_libdir);
 		} else {
 			(void) snprintf(drti, sizeof (drti),
 			    "%s/drti.o", _dtrace_libdir);
 		}
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd,
 		    drti) + 1;
 
 		cmd = alloca(len);
 
 		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti);
 #else
 		const char *fmt = "%s -o %s -r %s %s";
 		dt_dirpath_t *dp = dt_list_next(&dtp->dt_lib_path);
 
 		(void) snprintf(drti, sizeof (drti), "%s/drti.o", dp->dir_path);
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, tfile,
 		    drti) + 1;
 
 		cmd = alloca(len);
 
 		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, tfile,
 		    drti);
 #endif
 		if ((status = system(cmd)) == -1) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to run %s: %s", dtp->dt_ld_path,
 			    strerror(errno));
 			goto done;
 		}
 
 		if (WIFSIGNALED(status)) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_ld_path, WTERMSIG(status));
 			goto done;
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_ld_path, WEXITSTATUS(status));
 			goto done;
 		}
 		(void) close(fd); /* release temporary file */
 
 #ifdef __FreeBSD__
 		/*
 		 * Now that we've linked drti.o, reduce the global __SUNW_dof
 		 * symbol to a local symbol. This is needed to so that multiple
 		 * generated object files (for different providers, for
 		 * instance) can be linked together. This is accomplished using
 		 * the -Blocal flag with Sun's linker, but GNU ld doesn't appear
 		 * to have an equivalent option.
 		 */
 		asprintf(&cmd, "%s --localize-hidden %s", dtp->dt_objcopy_path,
 		    file);
 		if ((status = system(cmd)) == -1) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to run %s: %s", dtp->dt_objcopy_path,
 			    strerror(errno));
 			free(cmd);
 			goto done;
 		}
 		free(cmd);
 
 		if (WIFSIGNALED(status)) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_objcopy_path, WTERMSIG(status));
 			goto done;
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_objcopy_path, WEXITSTATUS(status));
 			goto done;
 		}
 #endif
 	} else {
 #ifdef __FreeBSD__
 		if (rename(tfile, file) != 0) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to rename %s to %s: %s", tfile, file,
 			    strerror(errno));
 			goto done;
 		}
 #endif
 		(void) close(fd);
 	}
 
 done:
 	dtrace_dof_destroy(dtp, dof);
 
 #ifdef __FreeBSD__
 	if (!dtp->dt_lazyload)
 		(void) unlink(tfile);
 #endif
 	return (ret);
 }
Index: head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c
===================================================================
--- head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c	(revision 322167)
+++ head/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c	(revision 322168)
@@ -1,2083 +1,2083 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #ifdef illumos
 #include <sys/sysmacros.h>
 #else
 #define	ABS(a)		((a) < 0 ? -(a) : (a))
 #endif
 #include <string.h>
 #include <strings.h>
 #include <stdlib.h>
 #ifdef illumos
 #include <alloca.h>
 #endif
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>
 #include <sys/socket.h>
 #include <netdb.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <arpa/nameser.h>
 
 #include <dt_printf.h>
 #include <dt_string.h>
 #include <dt_impl.h>
 
 /*ARGSUSED*/
 static int
 pfcheck_addr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_kaddr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp) ||
 	    dt_node_is_symaddr(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_uaddr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	dtrace_hdl_t *dtp = pfv->pfv_dtp;
 	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
 
 	if (dt_node_is_usymaddr(dnp))
 		return (1);
 
 	if (idp == NULL || idp->di_id == 0)
 		return (0);
 
 	return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_stack(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_stack(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_time(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_integer(dnp) &&
 	    dt_node_type_size(dnp) == sizeof (uint64_t));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_str(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	ctf_file_t *ctfp;
 	ctf_encoding_t e;
 	ctf_arinfo_t r;
 	ctf_id_t base;
 	uint_t kind;
 
 	if (dt_node_is_string(dnp))
 		return (1);
 
 	ctfp = dnp->dn_ctfp;
 	base = ctf_type_resolve(ctfp, dnp->dn_type);
 	kind = ctf_type_kind(ctfp, base);
 
 	return (kind == CTF_K_ARRAY && ctf_array_info(ctfp, base, &r) == 0 &&
 	    (base = ctf_type_resolve(ctfp, r.ctr_contents)) != CTF_ERR &&
 	    ctf_type_encoding(ctfp, base, &e) == 0 && IS_CHAR(e));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_wstr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	ctf_file_t *ctfp = dnp->dn_ctfp;
 	ctf_id_t base = ctf_type_resolve(ctfp, dnp->dn_type);
 	uint_t kind = ctf_type_kind(ctfp, base);
 
 	ctf_encoding_t e;
 	ctf_arinfo_t r;
 
 	return (kind == CTF_K_ARRAY && ctf_array_info(ctfp, base, &r) == 0 &&
 	    (base = ctf_type_resolve(ctfp, r.ctr_contents)) != CTF_ERR &&
 	    ctf_type_kind(ctfp, base) == CTF_K_INTEGER &&
 	    ctf_type_encoding(ctfp, base, &e) == 0 && e.cte_bits == 32);
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_csi(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_integer(dnp) &&
 	    dt_node_type_size(dnp) <= sizeof (int));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_fp(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_float(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_xint(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (dt_node_is_integer(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_dint(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	if (dnp->dn_flags & DT_NF_SIGNED)
 		pfd->pfd_fmt[strlen(pfd->pfd_fmt) - 1] = 'i';
 	else
 		pfd->pfd_fmt[strlen(pfd->pfd_fmt) - 1] = 'u';
 
 	return (dt_node_is_integer(dnp));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_xshort(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	ctf_file_t *ctfp = dnp->dn_ctfp;
 	ctf_id_t type = ctf_type_resolve(ctfp, dnp->dn_type);
 	char n[DT_TYPE_NAMELEN];
 
 	return (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL && (
 	    strcmp(n, "short") == 0 || strcmp(n, "signed short") == 0 ||
 	    strcmp(n, "unsigned short") == 0));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_xlong(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	ctf_file_t *ctfp = dnp->dn_ctfp;
 	ctf_id_t type = ctf_type_resolve(ctfp, dnp->dn_type);
 	char n[DT_TYPE_NAMELEN];
 
 	return (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL && (
 	    strcmp(n, "long") == 0 || strcmp(n, "signed long") == 0 ||
 	    strcmp(n, "unsigned long") == 0));
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_xlonglong(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	ctf_file_t *ctfp = dnp->dn_ctfp;
 	ctf_id_t type = dnp->dn_type;
 	char n[DT_TYPE_NAMELEN];
 
 	if (ctf_type_name(ctfp, ctf_type_resolve(ctfp, type), n,
 	    sizeof (n)) != NULL && (strcmp(n, "long long") == 0 ||
 	    strcmp(n, "signed long long") == 0 ||
 	    strcmp(n, "unsigned long long") == 0))
 		return (1);
 
 	/*
 	 * If the type used for %llx or %llX is not an [unsigned] long long, we
 	 * also permit it to be a [u]int64_t or any typedef thereof.  We know
 	 * that these typedefs are guaranteed to work with %ll[xX] in either
 	 * compilation environment even though they alias to "long" in LP64.
 	 */
 	while (ctf_type_kind(ctfp, type) == CTF_K_TYPEDEF) {
 		if (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL &&
 		    (strcmp(n, "int64_t") == 0 || strcmp(n, "uint64_t") == 0))
 			return (1);
 
 		type = ctf_type_reference(ctfp, type);
 	}
 
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 pfcheck_type(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp)
 {
 	return (ctf_type_compat(dnp->dn_ctfp, ctf_type_resolve(dnp->dn_ctfp,
 	    dnp->dn_type), pfd->pfd_conv->pfc_dctfp, pfd->pfd_conv->pfc_dtype));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_sint(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t unormal)
 {
 	int64_t normal = (int64_t)unormal;
 	int32_t n = (int32_t)normal;
 
 	switch (size) {
 	case sizeof (int8_t):
 		return (dt_printf(dtp, fp, format,
 		    (int32_t)*((int8_t *)addr) / n));
 	case sizeof (int16_t):
 		return (dt_printf(dtp, fp, format,
 		    (int32_t)*((int16_t *)addr) / n));
 	case sizeof (int32_t):
 		return (dt_printf(dtp, fp, format,
 		    *((int32_t *)addr) / n));
 	case sizeof (int64_t):
 		return (dt_printf(dtp, fp, format,
 		    *((int64_t *)addr) / normal));
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 }
 
 /*ARGSUSED*/
 static int
 pfprint_uint(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	uint32_t n = (uint32_t)normal;
 
 	switch (size) {
 	case sizeof (uint8_t):
 		return (dt_printf(dtp, fp, format,
 		    (uint32_t)*((uint8_t *)addr) / n));
 	case sizeof (uint16_t):
 		return (dt_printf(dtp, fp, format,
 		    (uint32_t)*((uint16_t *)addr) / n));
 	case sizeof (uint32_t):
 		return (dt_printf(dtp, fp, format,
 		    *((uint32_t *)addr) / n));
 	case sizeof (uint64_t):
 		return (dt_printf(dtp, fp, format,
 		    *((uint64_t *)addr) / normal));
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 }
 
 static int
 pfprint_dint(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	if (pfd->pfd_flags & DT_PFCONV_SIGNED)
 		return (pfprint_sint(dtp, fp, format, pfd, addr, size, normal));
 	else
 		return (pfprint_uint(dtp, fp, format, pfd, addr, size, normal));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_fp(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	double n = (double)normal;
 	long double ldn = (long double)normal;
 
 	switch (size) {
 	case sizeof (float):
 		return (dt_printf(dtp, fp, format,
 		    (double)*((float *)addr) / n));
 	case sizeof (double):
 		return (dt_printf(dtp, fp, format,
 		    *((double *)addr) / n));
 #if !defined(__arm__) && !defined(__powerpc__) && \
-    !defined(__mips__) && !defined(__riscv__)
+    !defined(__mips__) && !defined(__riscv)
 	case sizeof (long double):
 		return (dt_printf(dtp, fp, format,
 		    *((long double *)addr) / ldn));
 #endif
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 }
 
 /*ARGSUSED*/
 static int
 pfprint_addr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char *s;
 	int n, len = 256;
 	uint64_t val;
 
 	switch (size) {
 	case sizeof (uint32_t):
 		val = *((uint32_t *)addr);
 		break;
 	case sizeof (uint64_t):
 		val = *((uint64_t *)addr);
 		break;
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 
 	do {
 		n = len;
 		s = alloca(n);
 	} while ((len = dtrace_addr2str(dtp, val, s, n)) > n);
 
 	return (dt_printf(dtp, fp, format, s));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_print_mod(dtp, fp, format, (caddr_t)addr));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_print_umod(dtp, fp, format, (caddr_t)addr));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_uaddr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char *s;
 	int n, len = 256;
 	uint64_t val, pid = 0;
 
 	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
 
 	switch (size) {
 	case sizeof (uint32_t):
 		val = (u_longlong_t)*((uint32_t *)addr);
 		break;
 	case sizeof (uint64_t):
 		val = (u_longlong_t)*((uint64_t *)addr);
 		break;
 	case sizeof (uint64_t) * 2:
 		pid = ((uint64_t *)(uintptr_t)addr)[0];
 		val = ((uint64_t *)(uintptr_t)addr)[1];
 		break;
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 
 	if (pid == 0 && dtp->dt_vector == NULL && idp != NULL)
 		pid = idp->di_id;
 
 	do {
 		n = len;
 		s = alloca(n);
 	} while ((len = dtrace_uaddr2str(dtp, pid, val, s, n)) > n);
 
 	return (dt_printf(dtp, fp, format, s));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *vaddr, size_t size, uint64_t normal)
 {
 	int width;
 	dtrace_optval_t saved = dtp->dt_options[DTRACEOPT_STACKINDENT];
 	const dtrace_recdesc_t *rec = pfd->pfd_rec;
 	caddr_t addr = (caddr_t)vaddr;
 	int err = 0;
 
 	/*
 	 * We have stashed the value of the STACKINDENT option, and we will
 	 * now override it for the purposes of formatting the stack.  If the
 	 * field has been specified as left-aligned (i.e. (%-#), we set the
 	 * indentation to be the width.  This is a slightly odd semantic, but
 	 * it's useful functionality -- and it's slightly odd to begin with to
 	 * be using a single format specifier to be formatting multiple lines
 	 * of text...
 	 */
 	if (pfd->pfd_dynwidth < 0) {
 		assert(pfd->pfd_flags & DT_PFCONV_DYNWIDTH);
 		width = -pfd->pfd_dynwidth;
 	} else if (pfd->pfd_flags & DT_PFCONV_LEFT) {
 		width = pfd->pfd_dynwidth ? pfd->pfd_dynwidth : pfd->pfd_width;
 	} else {
 		width = 0;
 	}
 
 	dtp->dt_options[DTRACEOPT_STACKINDENT] = width;
 
 	switch (rec->dtrd_action) {
 	case DTRACEACT_USTACK:
 	case DTRACEACT_JSTACK:
 		err = dt_print_ustack(dtp, fp, format, addr, rec->dtrd_arg);
 		break;
 
 	case DTRACEACT_STACK:
 		err = dt_print_stack(dtp, fp, format, addr, rec->dtrd_arg,
 		    rec->dtrd_size / rec->dtrd_arg);
 		break;
 
 	default:
 		assert(0);
 	}
 
 	dtp->dt_options[DTRACEOPT_STACKINDENT] = saved;
 
 	return (err);
 }
 
 /*ARGSUSED*/
 static int
 pfprint_time(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char src[32], buf[32], *dst = buf;
 	hrtime_t time = *((uint64_t *)addr);
 	time_t sec = (time_t)(time / NANOSEC);
 	int i;
 
 	/*
 	 * ctime(3C) returns a string of the form "Dec  3 17:20:00 1973\n\0".
 	 * Below, we turn this into the canonical adb/mdb /[yY] format,
 	 * "1973 Dec  3 17:20:00".
 	 */
 #ifdef illumos
 	(void) ctime_r(&sec, src, sizeof (src));
 #else
 	(void) ctime_r(&sec, src);
 #endif
 
 	/*
 	 * Place the 4-digit year at the head of the string...
 	 */
 	for (i = 20; i < 24; i++)
 		*dst++ = src[i];
 
 	/*
 	 * ...and follow it with the remainder (month, day, hh:mm:ss).
 	 */
 	for (i = 3; i < 19; i++)
 		*dst++ = src[i];
 
 	*dst = '\0';
 	return (dt_printf(dtp, fp, format, buf));
 }
 
 /*
  * This prints the time in RFC 822 standard form.  This is useful for emitting
  * notions of time that are consumed by standard tools (e.g., as part of an
  * RSS feed).
  */
 /*ARGSUSED*/
 static int
 pfprint_time822(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	hrtime_t time = *((uint64_t *)addr);
 	time_t sec = (time_t)(time / NANOSEC);
 	struct tm tm;
 	char buf[64];
 
 	(void) localtime_r(&sec, &tm);
 	(void) strftime(buf, sizeof (buf), "%a, %d %b %G %T %Z", &tm);
 	return (dt_printf(dtp, fp, format, buf));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_port(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	uint16_t port = htons(*((uint16_t *)addr));
 	char buf[256];
 	struct servent *sv, res;
 
 #ifdef illumos
 	if ((sv = getservbyport_r(port, NULL, &res, buf, sizeof (buf))) != NULL)
 #else
 	if (getservbyport_r(port, NULL, &res, buf, sizeof (buf), &sv) > 0)
 #endif
 		return (dt_printf(dtp, fp, format, sv->s_name));
 
 	(void) snprintf(buf, sizeof (buf), "%d", *((uint16_t *)addr));
 	return (dt_printf(dtp, fp, format, buf));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_inetaddr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char *s = alloca(size + 1);
 	struct hostent *host, res;
 	char inetaddr[NS_IN6ADDRSZ];
 	char buf[1024];
 	int e;
 
 	bcopy(addr, s, size);
 	s[size] = '\0';
 
 	if (strchr(s, ':') == NULL && inet_pton(AF_INET, s, inetaddr) != -1) {
 #ifdef illumos
 		if ((host = gethostbyaddr_r(inetaddr, NS_INADDRSZ,
 		    AF_INET, &res, buf, sizeof (buf), &e)) != NULL)
 #else
 		if (gethostbyaddr_r(inetaddr, NS_INADDRSZ,
 		    AF_INET, &res, buf, sizeof (buf), &host, &e) > 0)
 #endif
 			return (dt_printf(dtp, fp, format, host->h_name));
 	} else if (inet_pton(AF_INET6, s, inetaddr) != -1) {
 		if ((host = getipnodebyaddr(inetaddr, NS_IN6ADDRSZ,
 		    AF_INET6, &e)) != NULL)
 			return (dt_printf(dtp, fp, format, host->h_name));
 	}
 
 	return (dt_printf(dtp, fp, format, s));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_cstr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char *s = alloca(size + 1);
 
 	bcopy(addr, s, size);
 	s[size] = '\0';
 	return (dt_printf(dtp, fp, format, s));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_wstr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	wchar_t *ws = alloca(size + sizeof (wchar_t));
 
 	bcopy(addr, ws, size);
 	ws[size / sizeof (wchar_t)] = L'\0';
 	return (dt_printf(dtp, fp, format, ws));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_estr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char *s;
 	int n;
 
 	if ((s = strchr2esc(addr, size)) == NULL)
 		return (dt_set_errno(dtp, EDT_NOMEM));
 
 	n = dt_printf(dtp, fp, format, s);
 	free(s);
 	return (n);
 }
 
 static int
 pfprint_echr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	char c;
 
 	switch (size) {
 	case sizeof (int8_t):
 		c = *(int8_t *)addr;
 		break;
 	case sizeof (int16_t):
 		c = *(int16_t *)addr;
 		break;
 	case sizeof (int32_t):
 		c = *(int32_t *)addr;
 		break;
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 
 	return (pfprint_estr(dtp, fp, format, pfd, &c, 1, normal));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_pct(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_printf(dtp, fp, "%%"));
 }
 
 static const char pfproto_xint[] = "char, short, int, long, or long long";
 static const char pfproto_csi[] = "char, short, or int";
 static const char pfproto_fp[] = "float, double, or long double";
 static const char pfproto_addr[] = "pointer or integer";
 static const char pfproto_uaddr[] =
 	"pointer or integer (with -p/-c) or _usymaddr (without -p/-c)";
 static const char pfproto_cstr[] = "char [] or string (or use stringof)";
 static const char pfproto_wstr[] = "wchar_t []";
 
 /*
  * Printf format conversion dictionary.  This table should match the set of
  * conversions offered by printf(3C), as well as some additional extensions.
  * The second parameter is an ASCII string which is either an actual type
  * name we should look up (if pfcheck_type is specified), or just a descriptive
  * string of the types expected for use in error messages.
  */
 static const dt_pfconv_t _dtrace_conversions[] = {
 { "a", "s", pfproto_addr, pfcheck_kaddr, pfprint_addr },
 { "A", "s", pfproto_uaddr, pfcheck_uaddr, pfprint_uaddr },
 { "c", "c", pfproto_csi, pfcheck_csi, pfprint_sint },
 { "C", "s", pfproto_csi, pfcheck_csi, pfprint_echr },
 { "d", "d", pfproto_xint, pfcheck_dint, pfprint_dint },
 { "e", "e", pfproto_fp, pfcheck_fp, pfprint_fp },
 { "E", "E", pfproto_fp, pfcheck_fp, pfprint_fp },
 { "f", "f", pfproto_fp, pfcheck_fp, pfprint_fp },
 { "g", "g", pfproto_fp, pfcheck_fp, pfprint_fp },
 { "G", "G", pfproto_fp, pfcheck_fp, pfprint_fp },
 { "hd", "d", "short", pfcheck_type, pfprint_sint },
 { "hi", "i", "short", pfcheck_type, pfprint_sint },
 { "ho", "o", "unsigned short", pfcheck_type, pfprint_uint },
 { "hu", "u", "unsigned short", pfcheck_type, pfprint_uint },
 { "hx", "x", "short", pfcheck_xshort, pfprint_uint },
 { "hX", "X", "short", pfcheck_xshort, pfprint_uint },
 { "i", "i", pfproto_xint, pfcheck_xint, pfprint_sint },
 { "I", "s", pfproto_cstr, pfcheck_str, pfprint_inetaddr },
 { "k", "s", "stack", pfcheck_stack, pfprint_stack },
 { "lc", "lc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wint_t */
 { "ld",	"d", "long", pfcheck_type, pfprint_sint },
 { "li",	"i", "long", pfcheck_type, pfprint_sint },
 { "lo",	"o", "unsigned long", pfcheck_type, pfprint_uint },
 { "lu", "u", "unsigned long", pfcheck_type, pfprint_uint },
 { "ls",	"ls", pfproto_wstr, pfcheck_wstr, pfprint_wstr },
 { "lx",	"x", "long", pfcheck_xlong, pfprint_uint },
 { "lX",	"X", "long", pfcheck_xlong, pfprint_uint },
 { "lld", "d", "long long", pfcheck_type, pfprint_sint },
 { "lli", "i", "long long", pfcheck_type, pfprint_sint },
 { "llo", "o", "unsigned long long", pfcheck_type, pfprint_uint },
 { "llu", "u", "unsigned long long", pfcheck_type, pfprint_uint },
 { "llx", "x", "long long", pfcheck_xlonglong, pfprint_uint },
 { "llX", "X", "long long", pfcheck_xlonglong, pfprint_uint },
 { "Le",	"e", "long double", pfcheck_type, pfprint_fp },
 { "LE",	"E", "long double", pfcheck_type, pfprint_fp },
 { "Lf",	"f", "long double", pfcheck_type, pfprint_fp },
 { "Lg",	"g", "long double", pfcheck_type, pfprint_fp },
 { "LG",	"G", "long double", pfcheck_type, pfprint_fp },
 { "o", "o", pfproto_xint, pfcheck_xint, pfprint_uint },
 { "p", "x", pfproto_addr, pfcheck_addr, pfprint_uint },
 { "P", "s", "uint16_t", pfcheck_type, pfprint_port },
 { "s", "s", "char [] or string (or use stringof)", pfcheck_str, pfprint_cstr },
 { "S", "s", pfproto_cstr, pfcheck_str, pfprint_estr },
 { "T", "s", "int64_t", pfcheck_time, pfprint_time822 },
 { "u", "u", pfproto_xint, pfcheck_xint, pfprint_uint },
 #ifdef illumos
 { "wc",	"wc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wchar_t */
 { "ws", "ws", pfproto_wstr, pfcheck_wstr, pfprint_wstr },
 #else
 { "wc", "lc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wchar_t */
 { "ws", "ls", pfproto_wstr, pfcheck_wstr, pfprint_wstr },
 #endif
 { "x", "x", pfproto_xint, pfcheck_xint, pfprint_uint },
 { "X", "X", pfproto_xint, pfcheck_xint, pfprint_uint },
 { "Y", "s", "int64_t", pfcheck_time, pfprint_time },
 { "%", "%", "void", pfcheck_type, pfprint_pct },
 { NULL, NULL, NULL, NULL, NULL }
 };
 
 int
 dt_pfdict_create(dtrace_hdl_t *dtp)
 {
 	uint_t n = _dtrace_strbuckets;
 	const dt_pfconv_t *pfd;
 	dt_pfdict_t *pdi;
 
 	if ((pdi = malloc(sizeof (dt_pfdict_t))) == NULL ||
 	    (pdi->pdi_buckets = malloc(sizeof (dt_pfconv_t *) * n)) == NULL) {
 		free(pdi);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	dtp->dt_pfdict = pdi;
 	bzero(pdi->pdi_buckets, sizeof (dt_pfconv_t *) * n);
 	pdi->pdi_nbuckets = n;
 
 	for (pfd = _dtrace_conversions; pfd->pfc_name != NULL; pfd++) {
 		dtrace_typeinfo_t dtt;
 		dt_pfconv_t *pfc;
 		uint_t h;
 
 		if ((pfc = malloc(sizeof (dt_pfconv_t))) == NULL) {
 			dt_pfdict_destroy(dtp);
 			return (dt_set_errno(dtp, EDT_NOMEM));
 		}
 
 		bcopy(pfd, pfc, sizeof (dt_pfconv_t));
 		h = dt_strtab_hash(pfc->pfc_name, NULL) % n;
 		pfc->pfc_next = pdi->pdi_buckets[h];
 		pdi->pdi_buckets[h] = pfc;
 
 		dtt.dtt_ctfp = NULL;
 		dtt.dtt_type = CTF_ERR;
 
 		/*
 		 * The "D" container or its parent must contain a definition of
 		 * any type referenced by a printf conversion.  If none can be
 		 * found, we fail to initialize the printf dictionary.
 		 */
 		if (pfc->pfc_check == &pfcheck_type && dtrace_lookup_by_type(
 		    dtp, DTRACE_OBJ_DDEFS, pfc->pfc_tstr, &dtt) != 0) {
 			dt_pfdict_destroy(dtp);
 			return (dt_set_errno(dtp, EDT_NOCONV));
 		}
 
 		pfc->pfc_dctfp = dtt.dtt_ctfp;
 		pfc->pfc_dtype = dtt.dtt_type;
 
 		/*
 		 * The "C" container may contain an alternate definition of an
 		 * explicit conversion type.  If it does, use it; otherwise
 		 * just set pfc_ctype to pfc_dtype so it is always valid.
 		 */
 		if (pfc->pfc_check == &pfcheck_type && dtrace_lookup_by_type(
 		    dtp, DTRACE_OBJ_CDEFS, pfc->pfc_tstr, &dtt) == 0) {
 			pfc->pfc_cctfp = dtt.dtt_ctfp;
 			pfc->pfc_ctype = dtt.dtt_type;
 		} else {
 			pfc->pfc_cctfp = pfc->pfc_dctfp;
 			pfc->pfc_ctype = pfc->pfc_dtype;
 		}
 
 		if (pfc->pfc_check == NULL || pfc->pfc_print == NULL ||
 		    pfc->pfc_ofmt == NULL || pfc->pfc_tstr == NULL) {
 			dt_pfdict_destroy(dtp);
 			return (dt_set_errno(dtp, EDT_BADCONV));
 		}
 
 		dt_dprintf("loaded printf conversion %%%s\n", pfc->pfc_name);
 	}
 
 	return (0);
 }
 
 void
 dt_pfdict_destroy(dtrace_hdl_t *dtp)
 {
 	dt_pfdict_t *pdi = dtp->dt_pfdict;
 	dt_pfconv_t *pfc, *nfc;
 	uint_t i;
 
 	if (pdi == NULL)
 		return;
 
 	for (i = 0; i < pdi->pdi_nbuckets; i++) {
 		for (pfc = pdi->pdi_buckets[i]; pfc != NULL; pfc = nfc) {
 			nfc = pfc->pfc_next;
 			free(pfc);
 		}
 	}
 
 	free(pdi->pdi_buckets);
 	free(pdi);
 	dtp->dt_pfdict = NULL;
 }
 
 static const dt_pfconv_t *
 dt_pfdict_lookup(dtrace_hdl_t *dtp, const char *name)
 {
 	dt_pfdict_t *pdi = dtp->dt_pfdict;
 	uint_t h = dt_strtab_hash(name, NULL) % pdi->pdi_nbuckets;
 	const dt_pfconv_t *pfc;
 
 	for (pfc = pdi->pdi_buckets[h]; pfc != NULL; pfc = pfc->pfc_next) {
 		if (strcmp(pfc->pfc_name, name) == 0)
 			break;
 	}
 
 	return (pfc);
 }
 
 static dt_pfargv_t *
 dt_printf_error(dtrace_hdl_t *dtp, int err)
 {
 	if (yypcb != NULL)
 		longjmp(yypcb->pcb_jmpbuf, err);
 
 	(void) dt_set_errno(dtp, err);
 	return (NULL);
 }
 
 dt_pfargv_t *
 dt_printf_create(dtrace_hdl_t *dtp, const char *s)
 {
 	dt_pfargd_t *pfd, *nfd = NULL;
 	dt_pfargv_t *pfv;
 	const char *p, *q;
 	char *format;
 
 	if ((pfv = malloc(sizeof (dt_pfargv_t))) == NULL ||
 	    (format = strdup(s)) == NULL) {
 		free(pfv);
 		return (dt_printf_error(dtp, EDT_NOMEM));
 	}
 
 	pfv->pfv_format = format;
 	pfv->pfv_argv = NULL;
 	pfv->pfv_argc = 0;
 	pfv->pfv_flags = 0;
 	pfv->pfv_dtp = dtp;
 
 	for (q = format; (p = strchr(q, '%')) != NULL; q = *p ? p + 1 : p) {
 		uint_t namelen = 0;
 		int digits = 0;
 		int dot = 0;
 
 		char name[8];
 		char c;
 		int n;
 
 		if ((pfd = malloc(sizeof (dt_pfargd_t))) == NULL) {
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_NOMEM));
 		}
 
 		if (pfv->pfv_argv != NULL)
 			nfd->pfd_next = pfd;
 		else
 			pfv->pfv_argv = pfd;
 
 		bzero(pfd, sizeof (dt_pfargd_t));
 		pfv->pfv_argc++;
 		nfd = pfd;
 
 		if (p > q) {
 			pfd->pfd_preflen = (size_t)(p - q);
 			pfd->pfd_prefix = q;
 		}
 
 		fmt_switch:
 		switch (c = *++p) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			if (dot == 0 && digits == 0 && c == '0') {
 				pfd->pfd_flags |= DT_PFCONV_ZPAD;
 				pfd->pfd_flags &= ~DT_PFCONV_LEFT;
 				goto fmt_switch;
 			}
 
 			for (n = 0; isdigit(c); c = *++p)
 				n = n * 10 + c - '0';
 
 			if (dot)
 				pfd->pfd_prec = n;
 			else
 				pfd->pfd_width = n;
 
 			p--;
 			digits++;
 			goto fmt_switch;
 
 		case '#':
 			pfd->pfd_flags |= DT_PFCONV_ALT;
 			goto fmt_switch;
 
 		case '*':
 			n = dot ? DT_PFCONV_DYNPREC : DT_PFCONV_DYNWIDTH;
 
 			if (pfd->pfd_flags & n) {
 				yywarn("format conversion #%u has more than "
 				    "one '*' specified for the output %s\n",
 				    pfv->pfv_argc, n ? "precision" : "width");
 
 				dt_printf_destroy(pfv);
 				return (dt_printf_error(dtp, EDT_COMPILER));
 			}
 
 			pfd->pfd_flags |= n;
 			goto fmt_switch;
 
 		case '+':
 			pfd->pfd_flags |= DT_PFCONV_SPOS;
 			goto fmt_switch;
 
 		case '-':
 			pfd->pfd_flags |= DT_PFCONV_LEFT;
 			pfd->pfd_flags &= ~DT_PFCONV_ZPAD;
 			goto fmt_switch;
 
 		case '.':
 			if (dot++ != 0) {
 				yywarn("format conversion #%u has more than "
 				    "one '.' specified\n", pfv->pfv_argc);
 
 				dt_printf_destroy(pfv);
 				return (dt_printf_error(dtp, EDT_COMPILER));
 			}
 			digits = 0;
 			goto fmt_switch;
 
 		case '?':
 			if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64)
 				pfd->pfd_width = 16;
 			else
 				pfd->pfd_width = 8;
 			goto fmt_switch;
 
 		case '@':
 			pfd->pfd_flags |= DT_PFCONV_AGG;
 			goto fmt_switch;
 
 		case '\'':
 			pfd->pfd_flags |= DT_PFCONV_GROUP;
 			goto fmt_switch;
 
 		case ' ':
 			pfd->pfd_flags |= DT_PFCONV_SPACE;
 			goto fmt_switch;
 
 		case '$':
 			yywarn("format conversion #%u uses unsupported "
 			    "positional format (%%n$)\n", pfv->pfv_argc);
 
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_COMPILER));
 
 		case '%':
 			if (p[-1] == '%')
 				goto default_lbl; /* if %% then use "%" conv */
 
 			yywarn("format conversion #%u cannot be combined "
 			    "with other format flags: %%%%\n", pfv->pfv_argc);
 
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_COMPILER));
 
 		case '\0':
 			yywarn("format conversion #%u name expected before "
 			    "end of format string\n", pfv->pfv_argc);
 
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_COMPILER));
 
 		case 'h':
 		case 'l':
 		case 'L':
 		case 'w':
 			if (namelen < sizeof (name) - 2)
 				name[namelen++] = c;
 			goto fmt_switch;
 
 		default_lbl:
 		default:
 			name[namelen++] = c;
 			name[namelen] = '\0';
 		}
 
 		pfd->pfd_conv = dt_pfdict_lookup(dtp, name);
 
 		if (pfd->pfd_conv == NULL) {
 			yywarn("format conversion #%u is undefined: %%%s\n",
 			    pfv->pfv_argc, name);
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_COMPILER));
 		}
 	}
 
 	if (*q != '\0' || *format == '\0') {
 		if ((pfd = malloc(sizeof (dt_pfargd_t))) == NULL) {
 			dt_printf_destroy(pfv);
 			return (dt_printf_error(dtp, EDT_NOMEM));
 		}
 
 		if (pfv->pfv_argv != NULL)
 			nfd->pfd_next = pfd;
 		else
 			pfv->pfv_argv = pfd;
 
 		bzero(pfd, sizeof (dt_pfargd_t));
 		pfv->pfv_argc++;
 
 		pfd->pfd_prefix = q;
 		pfd->pfd_preflen = strlen(q);
 	}
 
 	return (pfv);
 }
 
 void
 dt_printf_destroy(dt_pfargv_t *pfv)
 {
 	dt_pfargd_t *pfd, *nfd;
 
 	for (pfd = pfv->pfv_argv; pfd != NULL; pfd = nfd) {
 		nfd = pfd->pfd_next;
 		free(pfd);
 	}
 
 	free(pfv->pfv_format);
 	free(pfv);
 }
 
 void
 dt_printf_validate(dt_pfargv_t *pfv, uint_t flags,
     dt_ident_t *idp, int foff, dtrace_actkind_t kind, dt_node_t *dnp)
 {
 	dt_pfargd_t *pfd = pfv->pfv_argv;
 	const char *func = idp->di_name;
 
 	char n[DT_TYPE_NAMELEN];
 	dtrace_typeinfo_t dtt;
 	const char *aggtype;
 	dt_node_t aggnode;
 	int i, j;
 
 	if (pfv->pfv_format[0] == '\0') {
 		xyerror(D_PRINTF_FMT_EMPTY,
 		    "%s( ) format string is empty\n", func);
 	}
 
 	pfv->pfv_flags = flags;
 
 	/*
 	 * We fake up a parse node representing the type that can be used with
 	 * an aggregation result conversion, which -- for all but count() --
 	 * is a signed quantity.
 	 */
 	if (kind != DTRACEAGG_COUNT)
 		aggtype = "int64_t";
 	else
 		aggtype = "uint64_t";
 
 	if (dt_type_lookup(aggtype, &dtt) != 0)
 		xyerror(D_TYPE_ERR, "failed to lookup agg type %s\n", aggtype);
 
 	bzero(&aggnode, sizeof (aggnode));
 	dt_node_type_assign(&aggnode, dtt.dtt_ctfp, dtt.dtt_type, B_FALSE);
 
 	for (i = 0, j = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) {
 		const dt_pfconv_t *pfc = pfd->pfd_conv;
 		const char *dyns[2];
 		int dync = 0;
 
 		char vname[64];
 		dt_node_t *vnp;
 
 		if (pfc == NULL)
 			continue; /* no checking if argd is just a prefix */
 
 		if (pfc->pfc_print == &pfprint_pct) {
 			(void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt);
 			continue;
 		}
 
 		if (pfd->pfd_flags & DT_PFCONV_DYNPREC)
 			dyns[dync++] = ".*";
 		if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH)
 			dyns[dync++] = "*";
 
 		for (; dync != 0; dync--) {
 			if (dnp == NULL) {
 				xyerror(D_PRINTF_DYN_PROTO,
 				    "%s( ) prototype mismatch: conversion "
 				    "#%d (%%%s) is missing a corresponding "
 				    "\"%s\" argument\n", func, i + 1,
 				    pfc->pfc_name, dyns[dync - 1]);
 			}
 
 			if (dt_node_is_integer(dnp) == 0) {
 				xyerror(D_PRINTF_DYN_TYPE,
 				    "%s( ) argument #%d is incompatible "
 				    "with conversion #%d prototype:\n"
 				    "\tconversion: %% %s %s\n"
 				    "\t prototype: int\n\t  argument: %s\n",
 				    func, j + foff + 1, i + 1,
 				    dyns[dync - 1], pfc->pfc_name,
 				    dt_node_type_name(dnp, n, sizeof (n)));
 			}
 
 			dnp = dnp->dn_list;
 			j++;
 		}
 
 		/*
 		 * If this conversion is consuming the aggregation data, set
 		 * the value node pointer (vnp) to a fake node based on the
 		 * aggregating function result type.  Otherwise assign vnp to
 		 * the next parse node in the argument list, if there is one.
 		 */
 		if (pfd->pfd_flags & DT_PFCONV_AGG) {
 			if (!(flags & DT_PRINTF_AGGREGATION)) {
 				xyerror(D_PRINTF_AGG_CONV,
 				    "%%@ conversion requires an aggregation"
 				    " and is not for use with %s( )\n", func);
 			}
 			(void) strlcpy(vname, "aggregating action",
 			    sizeof (vname));
 			vnp = &aggnode;
 		} else if (dnp == NULL) {
 			xyerror(D_PRINTF_ARG_PROTO,
 			    "%s( ) prototype mismatch: conversion #%d (%%"
 			    "%s) is missing a corresponding value argument\n",
 			    func, i + 1, pfc->pfc_name);
 		} else {
 			(void) snprintf(vname, sizeof (vname),
 			    "argument #%d", j + foff + 1);
 			vnp = dnp;
 			dnp = dnp->dn_list;
 			j++;
 		}
 
 		/*
 		 * Fill in the proposed final format string by prepending any
 		 * size-related prefixes to the pfconv's format string.  The
 		 * pfc_check() function below may optionally modify the format
 		 * as part of validating the type of the input argument.
 		 */
 		if (pfc->pfc_print == &pfprint_sint ||
 		    pfc->pfc_print == &pfprint_uint ||
 		    pfc->pfc_print == &pfprint_dint) {
 			if (dt_node_type_size(vnp) == sizeof (uint64_t))
 				(void) strcpy(pfd->pfd_fmt, "ll");
 		} else if (pfc->pfc_print == &pfprint_fp) {
 			if (dt_node_type_size(vnp) == sizeof (long double))
 				(void) strcpy(pfd->pfd_fmt, "L");
 		}
 
 		(void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt);
 
 		/*
 		 * Validate the format conversion against the value node type.
 		 * If the conversion is good, create the descriptor format
 		 * string by concatenating together any required printf(3C)
 		 * size prefixes with the conversion's native format string.
 		 */
 		if (pfc->pfc_check(pfv, pfd, vnp) == 0) {
 			xyerror(D_PRINTF_ARG_TYPE,
 			    "%s( ) %s is incompatible with "
 			    "conversion #%d prototype:\n\tconversion: %%%s\n"
 			    "\t prototype: %s\n\t  argument: %s\n", func,
 			    vname, i + 1, pfc->pfc_name, pfc->pfc_tstr,
 			    dt_node_type_name(vnp, n, sizeof (n)));
 		}
 	}
 
 	if ((flags & DT_PRINTF_EXACTLEN) && dnp != NULL) {
 		xyerror(D_PRINTF_ARG_EXTRA,
 		    "%s( ) prototype mismatch: only %d arguments "
 		    "required by this format string\n", func, j);
 	}
 }
 
 void
 dt_printa_validate(dt_node_t *lhs, dt_node_t *rhs)
 {
 	dt_ident_t *lid, *rid;
 	dt_node_t *lproto, *rproto;
 	int largc, rargc, argn;
 	char n1[DT_TYPE_NAMELEN];
 	char n2[DT_TYPE_NAMELEN];
 
 	assert(lhs->dn_kind == DT_NODE_AGG);
 	assert(rhs->dn_kind == DT_NODE_AGG);
 
 	lid = lhs->dn_ident;
 	rid = rhs->dn_ident;
 
 	lproto = ((dt_idsig_t *)lid->di_data)->dis_args;
 	rproto = ((dt_idsig_t *)rid->di_data)->dis_args;
 
 	/*
 	 * First, get an argument count on each side.  These must match.
 	 */
 	for (largc = 0; lproto != NULL; lproto = lproto->dn_list)
 		largc++;
 
 	for (rargc = 0; rproto != NULL; rproto = rproto->dn_list)
 		rargc++;
 
 	if (largc != rargc) {
 		xyerror(D_PRINTA_AGGKEY, "printa( ): @%s and @%s do not have "
 		    "matching key signatures: @%s has %d key%s, @%s has %d "
 		    "key%s", lid->di_name, rid->di_name,
 		    lid->di_name, largc, largc == 1 ? "" : "s",
 		    rid->di_name, rargc, rargc == 1 ? "" : "s");
 	}
 
 	/*
 	 * Now iterate over the keys to verify that each type matches.
 	 */
 	lproto = ((dt_idsig_t *)lid->di_data)->dis_args;
 	rproto = ((dt_idsig_t *)rid->di_data)->dis_args;
 
 	for (argn = 1; lproto != NULL; argn++, lproto = lproto->dn_list,
 	    rproto = rproto->dn_list) {
 		assert(rproto != NULL);
 
 		if (dt_node_is_argcompat(lproto, rproto))
 			continue;
 
 		xyerror(D_PRINTA_AGGPROTO, "printa( ): @%s[ ] key #%d is "
 		    "incompatible with @%s:\n%9s key #%d: %s\n"
 		    "%9s key #%d: %s\n",
 		    rid->di_name, argn, lid->di_name, lid->di_name, argn,
 		    dt_node_type_name(lproto, n1, sizeof (n1)), rid->di_name,
 		    argn, dt_node_type_name(rproto, n2, sizeof (n2)));
 	}
 }
 
 static int
 dt_printf_getint(dtrace_hdl_t *dtp, const dtrace_recdesc_t *recp,
     uint_t nrecs, const void *buf, size_t len, int *ip)
 {
 	uintptr_t addr;
 
 	if (nrecs == 0)
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 	addr = (uintptr_t)buf + recp->dtrd_offset;
 
 	if (addr + sizeof (int) > (uintptr_t)buf + len)
 		return (dt_set_errno(dtp, EDT_DOFFSET));
 
 	if (addr & (recp->dtrd_alignment - 1))
 		return (dt_set_errno(dtp, EDT_DALIGN));
 
 	switch (recp->dtrd_size) {
 	case sizeof (int8_t):
 		*ip = (int)*((int8_t *)addr);
 		break;
 	case sizeof (int16_t):
 		*ip = (int)*((int16_t *)addr);
 		break;
 	case sizeof (int32_t):
 		*ip = (int)*((int32_t *)addr);
 		break;
 	case sizeof (int64_t):
 		*ip = (int)*((int64_t *)addr);
 		break;
 	default:
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 	}
 
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 pfprint_average(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	const uint64_t *data = addr;
 
 	if (size != sizeof (uint64_t) * 2)
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 	return (dt_printf(dtp, fp, format,
 	    data[0] ? data[1] / normal / data[0] : 0));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_stddev(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	const uint64_t *data = addr;
 
 	if (size != sizeof (uint64_t) * 4)
 		return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 	return (dt_printf(dtp, fp, format,
 	    dt_stddev((uint64_t *)data, normal)));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_quantize(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_print_quantize(dtp, fp, addr, size, normal));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_lquantize(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_print_lquantize(dtp, fp, addr, size, normal));
 }
 
 /*ARGSUSED*/
 static int
 pfprint_llquantize(dtrace_hdl_t *dtp, FILE *fp, const char *format,
     const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal)
 {
 	return (dt_print_llquantize(dtp, fp, addr, size, normal));
 }
 
 static int
 dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
     const dtrace_recdesc_t *recs, uint_t nrecs, const void *buf,
     size_t len, const dtrace_aggdata_t **aggsdata, int naggvars)
 {
 	dt_pfargd_t *pfd = pfv->pfv_argv;
 	const dtrace_recdesc_t *recp = recs;
 	const dtrace_aggdata_t *aggdata;
 	dtrace_aggdesc_t *agg;
 	caddr_t lim = (caddr_t)buf + len, limit;
 	char format[64] = "%";
 	size_t ret;
 	int i, aggrec, curagg = -1;
 	uint64_t normal;
 
 	/*
 	 * If we are formatting an aggregation, set 'aggrec' to the index of
 	 * the final record description (the aggregation result) so we can use
 	 * this record index with any conversion where DT_PFCONV_AGG is set.
 	 * (The actual aggregation used will vary as we increment through the
 	 * aggregation variables that we have been passed.)  Finally, we
 	 * decrement nrecs to prevent this record from being used with any
 	 * other conversion.
 	 */
 	if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) {
 		assert(aggsdata != NULL);
 		assert(naggvars > 0);
 
 		if (nrecs == 0)
 			return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 		curagg = naggvars > 1 ? 1 : 0;
 		aggdata = aggsdata[0];
 		aggrec = aggdata->dtada_desc->dtagd_nrecs - 1;
 		nrecs--;
 	}
 
 	for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) {
 		const dt_pfconv_t *pfc = pfd->pfd_conv;
 		int width = pfd->pfd_width;
 		int prec = pfd->pfd_prec;
 		int rval;
 
 		const char *start;
 		char *f = format + 1; /* skip initial '%' */
 		size_t fmtsz = sizeof(format) - 1;
 		const dtrace_recdesc_t *rec;
 		dt_pfprint_f *func;
 		caddr_t addr;
 		size_t size;
 		uint32_t flags;
 
 		if (pfd->pfd_preflen != 0) {
 			char *tmp = alloca(pfd->pfd_preflen + 1);
 
 			bcopy(pfd->pfd_prefix, tmp, pfd->pfd_preflen);
 			tmp[pfd->pfd_preflen] = '\0';
 
 			if ((rval = dt_printf(dtp, fp, tmp)) < 0)
 				return (rval);
 
 			if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) {
 				/*
 				 * For printa(), we flush the buffer after each
 				 * prefix, setting the flags to indicate that
 				 * this is part of the printa() format string.
 				 */
 				flags = DTRACE_BUFDATA_AGGFORMAT;
 
 				if (pfc == NULL && i == pfv->pfv_argc - 1)
 					flags |= DTRACE_BUFDATA_AGGLAST;
 
 				if (dt_buffered_flush(dtp, NULL, NULL,
 				    aggdata, flags) < 0)
 					return (-1);
 			}
 		}
 
 		if (pfc == NULL) {
 			if (pfv->pfv_argc == 1)
 				return (nrecs != 0);
 			continue;
 		}
 
 		/*
 		 * If the conversion is %%, just invoke the print callback
 		 * with no data record and continue; it consumes no record.
 		 */
 		if (pfc->pfc_print == &pfprint_pct) {
 			if (pfc->pfc_print(dtp, fp, NULL, pfd, NULL, 0, 1) >= 0)
 				continue;
 			return (-1); /* errno is set for us */
 		}
 
 		if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH) {
 			if (dt_printf_getint(dtp, recp++, nrecs--, buf,
 			    len, &width) == -1)
 				return (-1); /* errno is set for us */
 			pfd->pfd_dynwidth = width;
 		} else {
 			pfd->pfd_dynwidth = 0;
 		}
 
 		if ((pfd->pfd_flags & DT_PFCONV_DYNPREC) && dt_printf_getint(
 		    dtp, recp++, nrecs--, buf, len, &prec) == -1)
 			return (-1); /* errno is set for us */
 
 		if (pfd->pfd_flags & DT_PFCONV_AGG) {
 			/*
 			 * This should be impossible -- the compiler shouldn't
 			 * create a DT_PFCONV_AGG conversion without an
 			 * aggregation present.  Still, we'd rather fail
 			 * gracefully than blow up...
 			 */
 			if (aggsdata == NULL)
 				return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 			aggdata = aggsdata[curagg];
 			agg = aggdata->dtada_desc;
 
 			/*
 			 * We increment the current aggregation variable, but
 			 * not beyond the number of aggregation variables that
 			 * we're printing. This has the (desired) effect that
 			 * DT_PFCONV_AGG conversions beyond the number of
 			 * aggregation variables (re-)convert the aggregation
 			 * value of the last aggregation variable.
 			 */
 			if (curagg < naggvars - 1)
 				curagg++;
 
 			rec = &agg->dtagd_rec[aggrec];
 			addr = aggdata->dtada_data + rec->dtrd_offset;
 			limit = addr + aggdata->dtada_size;
 			normal = aggdata->dtada_normal;
 			flags = DTRACE_BUFDATA_AGGVAL;
 		} else {
 			if (nrecs == 0)
 				return (dt_set_errno(dtp, EDT_DMISMATCH));
 
 			if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) {
 				/*
 				 * When printing aggregation keys, we always
 				 * set the aggdata to be the representative
 				 * (zeroth) aggregation.  The aggdata isn't
 				 * actually used here in this case, but it is
 				 * passed to the buffer handler and must
 				 * therefore still be correct.
 				 */
 				aggdata = aggsdata[0];
 				flags = DTRACE_BUFDATA_AGGKEY;
 			}
 
 			rec = recp++;
 			nrecs--;
 			addr = (caddr_t)buf + rec->dtrd_offset;
 			limit = lim;
 			normal = 1;
 		}
 
 		size = rec->dtrd_size;
 
 		if (addr + size > limit) {
 			dt_dprintf("bad size: addr=%p size=0x%x lim=%p\n",
 			    (void *)addr, rec->dtrd_size, (void *)lim);
 			return (dt_set_errno(dtp, EDT_DOFFSET));
 		}
 
 		if (rec->dtrd_alignment != 0 &&
 		    ((uintptr_t)addr & (rec->dtrd_alignment - 1)) != 0) {
 			dt_dprintf("bad align: addr=%p size=0x%x align=0x%x\n",
 			    (void *)addr, rec->dtrd_size, rec->dtrd_alignment);
 			return (dt_set_errno(dtp, EDT_DALIGN));
 		}
 
 		switch (rec->dtrd_action) {
 		case DTRACEAGG_AVG:
 			func = pfprint_average;
 			break;
 		case DTRACEAGG_STDDEV:
 			func = pfprint_stddev;
 			break;
 		case DTRACEAGG_QUANTIZE:
 			func = pfprint_quantize;
 			break;
 		case DTRACEAGG_LQUANTIZE:
 			func = pfprint_lquantize;
 			break;
 		case DTRACEAGG_LLQUANTIZE:
 			func = pfprint_llquantize;
 			break;
 		case DTRACEACT_MOD:
 			func = pfprint_mod;
 			break;
 		case DTRACEACT_UMOD:
 			func = pfprint_umod;
 			break;
 		default:
 			func = pfc->pfc_print;
 			break;
 		}
 
 		start = f;
 		if (pfd->pfd_flags & DT_PFCONV_ALT)
 			*f++ = '#';
 		if (pfd->pfd_flags & DT_PFCONV_ZPAD)
 			*f++ = '0';
 		if (width < 0 || (pfd->pfd_flags & DT_PFCONV_LEFT))
 			*f++ = '-';
 		if (pfd->pfd_flags & DT_PFCONV_SPOS)
 			*f++ = '+';
 		if (pfd->pfd_flags & DT_PFCONV_GROUP)
 			*f++ = '\'';
 		if (pfd->pfd_flags & DT_PFCONV_SPACE)
 			*f++ = ' ';
 		fmtsz -= f - start;
 
 		/*
 		 * If we're printing a stack and DT_PFCONV_LEFT is set, we
 		 * don't add the width to the format string.  See the block
 		 * comment in pfprint_stack() for a description of the
 		 * behavior in this case.
 		 */
 		if (func == pfprint_stack && (pfd->pfd_flags & DT_PFCONV_LEFT))
 			width = 0;
 
 		if (width != 0) {
 			ret = snprintf(f, fmtsz, "%d", ABS(width));
 			f += ret;
 			fmtsz = MAX(0, fmtsz - ret);
 		}
 
 		if (prec > 0) {
 			ret = snprintf(f, fmtsz, ".%d", prec);
 			f += ret;
 			fmtsz = MAX(0, fmtsz - ret);
 		}
 
 		if (strlcpy(f, pfd->pfd_fmt, fmtsz) >= fmtsz)
 			return (dt_set_errno(dtp, EDT_COMPILER));
 		pfd->pfd_rec = rec;
 
 		if (func(dtp, fp, format, pfd, addr, size, normal) < 0)
 			return (-1); /* errno is set for us */
 
 		if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) {
 			/*
 			 * For printa(), we flush the buffer after each tuple
 			 * element, inidicating that this is the last record
 			 * as appropriate.
 			 */
 			if (i == pfv->pfv_argc - 1)
 				flags |= DTRACE_BUFDATA_AGGLAST;
 
 			if (dt_buffered_flush(dtp, NULL,
 			    rec, aggdata, flags) < 0)
 				return (-1);
 		}
 	}
 
 	return ((int)(recp - recs));
 }
 
 int
 dtrace_sprintf(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
     const dtrace_recdesc_t *recp, uint_t nrecs, const void *buf, size_t len)
 {
 	dtrace_optval_t size;
 	int rval;
 
 	rval = dtrace_getopt(dtp, "strsize", &size);
 	assert(rval == 0);
 	assert(dtp->dt_sprintf_buflen == 0);
 
 	if (dtp->dt_sprintf_buf != NULL)
 		free(dtp->dt_sprintf_buf);
 
 	if ((dtp->dt_sprintf_buf = malloc(size)) == NULL)
 		return (dt_set_errno(dtp, EDT_NOMEM));
 
 	bzero(dtp->dt_sprintf_buf, size);
 	dtp->dt_sprintf_buflen = size;
 	rval = dt_printf_format(dtp, fp, fmtdata, recp, nrecs, buf, len,
 	    NULL, 0);
 	dtp->dt_sprintf_buflen = 0;
 
 	if (rval == -1)
 		free(dtp->dt_sprintf_buf);
 
 	return (rval);
 }
 
 /*ARGSUSED*/
 int
 dtrace_system(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
     const dtrace_probedata_t *data, const dtrace_recdesc_t *recp,
     uint_t nrecs, const void *buf, size_t len)
 {
 	int rval = dtrace_sprintf(dtp, fp, fmtdata, recp, nrecs, buf, len);
 
 	if (rval == -1)
 		return (rval);
 
 	/*
 	 * Before we execute the specified command, flush fp to assure that
 	 * any prior dt_printf()'s appear before the output of the command
 	 * not after it.
 	 */
 	(void) fflush(fp);
 
 	if (system(dtp->dt_sprintf_buf) == -1)
 		return (dt_set_errno(dtp, errno));
 
 	return (rval);
 }
 
 int
 dtrace_freopen(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
     const dtrace_probedata_t *data, const dtrace_recdesc_t *recp,
     uint_t nrecs, const void *buf, size_t len)
 {
 	char selfbuf[40], restorebuf[40], *filename;
 	FILE *nfp;
 	int rval, errval;
 	dt_pfargv_t *pfv = fmtdata;
 	dt_pfargd_t *pfd = pfv->pfv_argv;
 
 	rval = dtrace_sprintf(dtp, fp, fmtdata, recp, nrecs, buf, len);
 
 	if (rval == -1 || fp == NULL)
 		return (rval);
 
 #ifdef illumos
 	if (pfd->pfd_preflen != 0 &&
 	    strcmp(pfd->pfd_prefix, DT_FREOPEN_RESTORE) == 0) {
 		/*
 		 * The only way to have the format string set to the value
 		 * DT_FREOPEN_RESTORE is via the empty freopen() string --
 		 * denoting that we should restore the old stdout.
 		 */
 		assert(strcmp(dtp->dt_sprintf_buf, DT_FREOPEN_RESTORE) == 0);
 
 		if (dtp->dt_stdout_fd == -1) {
 			/*
 			 * We could complain here by generating an error,
 			 * but it seems like overkill:  it seems that calling
 			 * freopen() to restore stdout when freopen() has
 			 * never before been called should just be a no-op,
 			 * so we just return in this case.
 			 */
 			return (rval);
 		}
 
 		(void) snprintf(restorebuf, sizeof (restorebuf),
 		    "/dev/fd/%d", dtp->dt_stdout_fd);
 		filename = restorebuf;
 	} else {
 		filename = dtp->dt_sprintf_buf;
 	}
 
 	/*
 	 * freopen(3C) will always close the specified stream and underlying
 	 * file descriptor -- even if the specified file can't be opened.
 	 * Even for the semantic cesspool that is standard I/O, this is
 	 * surprisingly brain-dead behavior:  it means that any failure to
 	 * open the specified file destroys the specified stream in the
 	 * process -- which is particularly relevant when the specified stream
 	 * happens (or rather, happened) to be stdout.  This could be resolved
 	 * were there an "fdreopen()" equivalent of freopen() that allowed one
 	 * to pass a file descriptor instead of the name of a file, but there
 	 * is no such thing.  However, we can effect this ourselves by first
 	 * fopen()'ing the desired file, and then (assuming that that works),
 	 * freopen()'ing "/dev/fd/[fileno]", where [fileno] is the underlying
 	 * file descriptor for the fopen()'d file.  This way, if the fopen()
 	 * fails, we can fail the operation without destroying stdout.
 	 */
 	if ((nfp = fopen(filename, "aF")) == NULL) {
 		char *msg = strerror(errno);
 		char *faultstr;
 		int len = 80;
 
 		len += strlen(msg) + strlen(filename);
 		faultstr = alloca(len);
 
 		(void) snprintf(faultstr, len, "couldn't freopen() \"%s\": %s",
 		    filename, strerror(errno));
 
 		if ((errval = dt_handle_liberr(dtp, data, faultstr)) == 0)
 			return (rval);
 
 		return (errval);
 	}
 
 	(void) snprintf(selfbuf, sizeof (selfbuf), "/dev/fd/%d", fileno(nfp));
 
 	if (dtp->dt_stdout_fd == -1) {
 		/*
 		 * If this is the first time that we're calling freopen(),
 		 * we're going to stash away the file descriptor for stdout.
 		 * We don't expect the dup(2) to fail, so if it does we must
 		 * return failure.
 		 */
 		if ((dtp->dt_stdout_fd = dup(fileno(fp))) == -1) {
 			(void) fclose(nfp);
 			return (dt_set_errno(dtp, errno));
 		}
 	}
 
 	if (freopen(selfbuf, "aF", fp) == NULL) {
 		(void) fclose(nfp);
 		return (dt_set_errno(dtp, errno));
 	}
 
 	(void) fclose(nfp);
 #else	/* !illumos */
 	/*
 	 * The 'standard output' (which is not necessarily stdout)
 	 * treatment on FreeBSD is implemented differently than on
 	 * Solaris because FreeBSD's freopen() will attempt to re-use
 	 * the current file descriptor, causing the previous file to
 	 * be closed and thereby preventing it from be re-activated
 	 * later.
 	 *
 	 * For FreeBSD we use the concept of setting an output file
 	 * pointer in the DTrace handle if a dtrace_freopen() has 
 	 * enabled another output file and we leave the caller's
 	 * file pointer untouched. If it was actually stdout, then
 	 * stdout remains open. If it was another file, then that
 	 * file remains open. While a dtrace_freopen() has activated
 	 * another file, we keep a pointer to that which we use in
 	 * the output functions by preference and only use the caller's
 	 * file pointer if no dtrace_freopen() call has been made.
 	 *
 	 * The check to see if we're re-activating the caller's
 	 * output file is much the same as on Solaris.
 	 */
 	if (pfd->pfd_preflen != 0 &&
 	    strcmp(pfd->pfd_prefix, DT_FREOPEN_RESTORE) == 0) {
 		/*
 		 * The only way to have the format string set to the value
 		 * DT_FREOPEN_RESTORE is via the empty freopen() string --
 		 * denoting that we should restore the old stdout.
 		 */
 		assert(strcmp(dtp->dt_sprintf_buf, DT_FREOPEN_RESTORE) == 0);
 
 		if (dtp->dt_freopen_fp == NULL) {
 			/*
 			 * We could complain here by generating an error,
 			 * but it seems like overkill:  it seems that calling
 			 * freopen() to restore stdout when freopen() has
 			 * never before been called should just be a no-op,
 			 * so we just return in this case.
 			 */
 			return (rval);
 		}
 
 		/*
 		 * At this point, to re-active the original output file,
 		 * on FreeBSD we only code the current file that this
 		 * function opened previously.
 		 */
 		(void) fclose(dtp->dt_freopen_fp);
 		dtp->dt_freopen_fp = NULL;
 
 		return (rval);
 	}
 
 	if ((nfp = fopen(dtp->dt_sprintf_buf, "a")) == NULL) {
 		char *msg = strerror(errno);
 		char *faultstr;
 		int len = 80;
 
 		len += strlen(msg) + strlen(dtp->dt_sprintf_buf);
 		faultstr = alloca(len);
 
 		(void) snprintf(faultstr, len, "couldn't freopen() \"%s\": %s",
 		    dtp->dt_sprintf_buf, strerror(errno));
 
 		if ((errval = dt_handle_liberr(dtp, data, faultstr)) == 0)
 			return (rval);
 
 		return (errval);
 	}
 
 	if (dtp->dt_freopen_fp != NULL)
 		(void) fclose(dtp->dt_freopen_fp);
 
 	/* Remember that the output has been redirected to the new file. */
 	dtp->dt_freopen_fp = nfp;
 #endif	/* illumos */
 
 	return (rval);
 }
 
 /*ARGSUSED*/
 int
 dtrace_fprintf(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
     const dtrace_probedata_t *data, const dtrace_recdesc_t *recp,
     uint_t nrecs, const void *buf, size_t len)
 {
 	return (dt_printf_format(dtp, fp, fmtdata,
 	    recp, nrecs, buf, len, NULL, 0));
 }
 
 void *
 dtrace_printf_create(dtrace_hdl_t *dtp, const char *s)
 {
 	dt_pfargv_t *pfv = dt_printf_create(dtp, s);
 	dt_pfargd_t *pfd;
 	int i;
 
 	if (pfv == NULL)
 		return (NULL);		/* errno has been set for us */
 
 	pfd = pfv->pfv_argv;
 
 	for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) {
 		const dt_pfconv_t *pfc = pfd->pfd_conv;
 
 		if (pfc == NULL)
 			continue;
 
 		/*
 		 * If the output format is not %s then we assume that we have
 		 * been given a correctly-sized format string, so we copy the
 		 * true format name including the size modifier.  If the output
 		 * format is %s, then either the input format is %s as well or
 		 * it is one of our custom formats (e.g. pfprint_addr), so we
 		 * must set pfd_fmt to be the output format conversion "s".
 		 */
 		if (strcmp(pfc->pfc_ofmt, "s") != 0)
 			(void) strcat(pfd->pfd_fmt, pfc->pfc_name);
 		else
 			(void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt);
 	}
 
 	return (pfv);
 }
 
 void *
 dtrace_printa_create(dtrace_hdl_t *dtp, const char *s)
 {
 	dt_pfargv_t *pfv = dtrace_printf_create(dtp, s);
 
 	if (pfv == NULL)
 		return (NULL);		/* errno has been set for us */
 
 	pfv->pfv_flags |= DT_PRINTF_AGGREGATION;
 
 	return (pfv);
 }
 
 /*ARGSUSED*/
 size_t
 dtrace_printf_format(dtrace_hdl_t *dtp, void *fmtdata, char *s, size_t len)
 {
 	dt_pfargv_t *pfv = fmtdata;
 	dt_pfargd_t *pfd = pfv->pfv_argv;
 
 	/*
 	 * An upper bound on the string length is the length of the original
 	 * format string, plus three times the number of conversions (each
 	 * conversion could add up an additional "ll" and/or pfd_width digit
 	 * in the case of converting %? to %16) plus one for a terminating \0.
 	 */
 	size_t formatlen = strlen(pfv->pfv_format) + 3 * pfv->pfv_argc + 1;
 	char *format = alloca(formatlen);
 	char *f = format;
 	int i, j;
 
 	for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) {
 		const dt_pfconv_t *pfc = pfd->pfd_conv;
 		const char *str;
 		int width = pfd->pfd_width;
 		int prec = pfd->pfd_prec;
 
 		if (pfd->pfd_preflen != 0) {
 			for (j = 0; j < pfd->pfd_preflen; j++)
 				*f++ = pfd->pfd_prefix[j];
 		}
 
 		if (pfc == NULL)
 			continue;
 
 		*f++ = '%';
 
 		if (pfd->pfd_flags & DT_PFCONV_ALT)
 			*f++ = '#';
 		if (pfd->pfd_flags & DT_PFCONV_ZPAD)
 			*f++ = '0';
 		if (pfd->pfd_flags & DT_PFCONV_LEFT)
 			*f++ = '-';
 		if (pfd->pfd_flags & DT_PFCONV_SPOS)
 			*f++ = '+';
 		if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH)
 			*f++ = '*';
 		if (pfd->pfd_flags & DT_PFCONV_DYNPREC) {
 			*f++ = '.';
 			*f++ = '*';
 		}
 		if (pfd->pfd_flags & DT_PFCONV_GROUP)
 			*f++ = '\'';
 		if (pfd->pfd_flags & DT_PFCONV_SPACE)
 			*f++ = ' ';
 		if (pfd->pfd_flags & DT_PFCONV_AGG)
 			*f++ = '@';
 
 		if (width != 0)
 			f += snprintf(f, sizeof (format), "%d", width);
 
 		if (prec != 0)
 			f += snprintf(f, sizeof (format), ".%d", prec);
 
 		/*
 		 * If the output format is %s, then either %s is the underlying
 		 * conversion or the conversion is one of our customized ones,
 		 * e.g. pfprint_addr.  In these cases, put the original string
 		 * name of the conversion (pfc_name) into the pickled format
 		 * string rather than the derived conversion (pfd_fmt).
 		 */
 		if (strcmp(pfc->pfc_ofmt, "s") == 0)
 			str = pfc->pfc_name;
 		else
 			str = pfd->pfd_fmt;
 
 		for (j = 0; str[j] != '\0'; j++)
 			*f++ = str[j];
 	}
 
 	*f = '\0'; /* insert nul byte; do not count in return value */
 
 	assert(f < format + formatlen);
 	(void) strncpy(s, format, len);
 
 	return ((size_t)(f - format));
 }
 
 static int
 dt_fprinta(const dtrace_aggdata_t *adp, void *arg)
 {
 	const dtrace_aggdesc_t *agg = adp->dtada_desc;
 	const dtrace_recdesc_t *recp = &agg->dtagd_rec[0];
 	uint_t nrecs = agg->dtagd_nrecs;
 	dt_pfwalk_t *pfw = arg;
 	dtrace_hdl_t *dtp = pfw->pfw_argv->pfv_dtp;
 	int id;
 
 	if (dt_printf_getint(dtp, recp++, nrecs--,
 	    adp->dtada_data, adp->dtada_size, &id) != 0 || pfw->pfw_aid != id)
 		return (0); /* no aggregation id or id does not match */
 
 	if (dt_printf_format(dtp, pfw->pfw_fp, pfw->pfw_argv,
 	    recp, nrecs, adp->dtada_data, adp->dtada_size, &adp, 1) == -1)
 		return (pfw->pfw_err = dtp->dt_errno);
 
 	/*
 	 * Cast away the const to set the bit indicating that this aggregation
 	 * has been printed.
 	 */
 	((dtrace_aggdesc_t *)agg)->dtagd_flags |= DTRACE_AGD_PRINTED;
 
 	return (0);
 }
 
 static int
 dt_fprintas(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 {
 	const dtrace_aggdata_t *aggdata = aggsdata[0];
 	const dtrace_aggdesc_t *agg = aggdata->dtada_desc;
 	const dtrace_recdesc_t *rec = &agg->dtagd_rec[1];
 	uint_t nrecs = agg->dtagd_nrecs - 1;
 	dt_pfwalk_t *pfw = arg;
 	dtrace_hdl_t *dtp = pfw->pfw_argv->pfv_dtp;
 	int i;
 
 	if (dt_printf_format(dtp, pfw->pfw_fp, pfw->pfw_argv,
 	    rec, nrecs, aggdata->dtada_data, aggdata->dtada_size,
 	    aggsdata, naggvars) == -1)
 		return (pfw->pfw_err = dtp->dt_errno);
 
 	/*
 	 * For each aggregation, indicate that it has been printed, casting
 	 * away the const as necessary.
 	 */
 	for (i = 1; i < naggvars; i++) {
 		agg = aggsdata[i]->dtada_desc;
 		((dtrace_aggdesc_t *)agg)->dtagd_flags |= DTRACE_AGD_PRINTED;
 	}
 
 	return (0);
 }
 /*ARGSUSED*/
 int
 dtrace_fprinta(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
     const dtrace_probedata_t *data, const dtrace_recdesc_t *recs,
     uint_t nrecs, const void *buf, size_t len)
 {
 	dt_pfwalk_t pfw;
 	int i, naggvars = 0;
 	dtrace_aggvarid_t *aggvars;
 
 	aggvars = alloca(nrecs * sizeof (dtrace_aggvarid_t));
 
 	/*
 	 * This might be a printa() with multiple aggregation variables.  We
 	 * need to scan forward through the records until we find a record from
 	 * a different statement.
 	 */
 	for (i = 0; i < nrecs; i++) {
 		const dtrace_recdesc_t *nrec = &recs[i];
 
 		if (nrec->dtrd_uarg != recs->dtrd_uarg)
 			break;
 
 		if (nrec->dtrd_action != recs->dtrd_action)
 			return (dt_set_errno(dtp, EDT_BADAGG));
 
 		aggvars[naggvars++] =
 		    /* LINTED - alignment */
 		    *((dtrace_aggvarid_t *)((caddr_t)buf + nrec->dtrd_offset));
 	}
 
 	if (naggvars == 0)
 		return (dt_set_errno(dtp, EDT_BADAGG));
 
 	pfw.pfw_argv = fmtdata;
 	pfw.pfw_fp = fp;
 	pfw.pfw_err = 0;
 
 	if (naggvars == 1) {
 		pfw.pfw_aid = aggvars[0];
 
 		if (dtrace_aggregate_walk_sorted(dtp,
 		    dt_fprinta, &pfw) == -1 || pfw.pfw_err != 0)
 			return (-1); /* errno is set for us */
 	} else {
 		if (dtrace_aggregate_walk_joined(dtp, aggvars, naggvars,
 		    dt_fprintas, &pfw) == -1 || pfw.pfw_err != 0)
 			return (-1); /* errno is set for us */
 	}
 
 	return (i);
 }
Index: head/contrib/compiler-rt/lib/builtins/int_lib.h
===================================================================
--- head/contrib/compiler-rt/lib/builtins/int_lib.h	(revision 322167)
+++ head/contrib/compiler-rt/lib/builtins/int_lib.h	(revision 322168)
@@ -1,156 +1,156 @@
 /* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
  *
  *                     The LLVM Compiler Infrastructure
  *
  * This file is dual licensed under the MIT and the University of Illinois Open
  * Source Licenses. See LICENSE.TXT for details.
  *
  * ===----------------------------------------------------------------------===
  *
  * This file is a configuration header for compiler-rt.
  * This file is not part of the interface of this library.
  *
  * ===----------------------------------------------------------------------===
  */
 
 #ifndef INT_LIB_H
 #define INT_LIB_H
 
 /* Assumption: Signed integral is 2's complement. */
 /* Assumption: Right shift of signed negative is arithmetic shift. */
 /* Assumption: Endianness is little or big (not mixed). */
 
 #if defined(__ELF__)
 #define FNALIAS(alias_name, original_name) \
   void alias_name() __attribute__((alias(#original_name)))
 #else
 #define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
 #endif
 
 /* ABI macro definitions */
 
 #if __ARM_EABI__
 # if defined(COMPILER_RT_ARMHF_TARGET) || (!defined(__clang__) && \
      defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 5))
 /* The pcs attribute was introduced in GCC 4.5.0 */
 #   define COMPILER_RT_ABI
 # else
 #   define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
 # endif
 #else
 # define COMPILER_RT_ABI
 #endif
 
 #define AEABI_RTABI __attribute__((__pcs__("aapcs")))
 
 #ifdef _MSC_VER
 #define ALWAYS_INLINE __forceinline
 #define NOINLINE __declspec(noinline)
 #define NORETURN __declspec(noreturn)
 #define UNUSED
 #else
 #define ALWAYS_INLINE __attribute__((always_inline))
 #define NOINLINE __attribute__((noinline))
 #define NORETURN __attribute__((noreturn))
 #define UNUSED __attribute__((unused))
 #endif
 
 #if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
 /*
  * Kernel and boot environment can't use normal headers,
  * so use the equivalent system headers.
  */
 #  include <machine/limits.h>
 #  include <sys/stdint.h>
 #  include <sys/types.h>
 #else
 /* Include the standard compiler builtin headers we use functionality from. */
 #  include <limits.h>
 #  include <stdint.h>
 #  include <stdbool.h>
 #  include <float.h>
 #endif
 
 /* Include the commonly used internal type definitions. */
 #include "int_types.h"
 
 /* Include internal utility function declarations. */
 #include "int_util.h"
 
 /*
  * Workaround for LLVM bug 11663.  Prevent endless recursion in
  * __c?zdi2(), where calls to __builtin_c?z() are expanded to
  * __c?zdi2() instead of __c?zsi2().
  *
  * Instead of placing this workaround in c?zdi2.c, put it in this
  * global header to prevent other C files from making the detour
  * through __c?zdi2() as well.
  *
  * This problem has been observed on FreeBSD for sparc64 and
  * mips64 with GCC 4.2.1, and for riscv with GCC 5.2.0.
  * Presumably it's any version of GCC, and targeting an arch that
  * does not have dedicated bit counting instructions.
  */
 #if defined(__FreeBSD__) && (defined(__sparc64__) || \
-    defined(__mips_n64) || defined(__mips_o64) || defined(__riscv__))
+    defined(__mips_n64) || defined(__mips_o64) || defined(__riscv))
 si_int __clzsi2(si_int);
 si_int __ctzsi2(si_int);
 #define	__builtin_clz __clzsi2
 #define	__builtin_ctz __ctzsi2
 #endif /* FreeBSD && (sparc64 || mips_n64 || mips_o64) */
 
 COMPILER_RT_ABI si_int __paritysi2(si_int a);
 COMPILER_RT_ABI si_int __paritydi2(di_int a);
 
 COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
 COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
 COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
 
 COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
 COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
 #ifdef CRT_HAS_128BIT
 COMPILER_RT_ABI si_int __clzti2(ti_int a);
 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
 #endif
 
 /* Definitions for builtins unavailable on MSVC */
 #if defined(_MSC_VER) && !defined(__clang__)
 #include <intrin.h>
 
 uint32_t __inline __builtin_ctz(uint32_t value) {
   unsigned long trailing_zero = 0;
   if (_BitScanForward(&trailing_zero, value))
     return trailing_zero;
   return 32;
 }
 
 uint32_t __inline __builtin_clz(uint32_t value) {
   unsigned long leading_zero = 0;
   if (_BitScanReverse(&leading_zero, value))
     return 31 - leading_zero;
   return 32;
 }
 
 #if defined(_M_ARM) || defined(_M_X64)
 uint32_t __inline __builtin_clzll(uint64_t value) {
   unsigned long leading_zero = 0;
   if (_BitScanReverse64(&leading_zero, value))
     return 63 - leading_zero;
   return 64;
 }
 #else
 uint32_t __inline __builtin_clzll(uint64_t value) {
   if (value == 0)
     return 64;
   uint32_t msh = (uint32_t)(value >> 32);
   uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
   if (msh != 0)
     return __builtin_clz(msh);
   return 32 + __builtin_clz(lsh);
 }
 #endif
 
 #define __builtin_clzl __builtin_clzll
 #endif /* defined(_MSC_VER) && !defined(__clang__) */
 
 #endif /* INT_LIB_H */
Index: head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
===================================================================
--- head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 322167)
+++ head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 322168)
@@ -1,1492 +1,1492 @@
 //===-- sanitizer_platform_limits_posix.h ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file is a part of Sanitizer common code.
 //
 // Sizes and layouts of platform-specific POSIX data structures.
 //===----------------------------------------------------------------------===//
 
 #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H
 #define SANITIZER_PLATFORM_LIMITS_POSIX_H
 
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_platform.h"
 
 #if SANITIZER_FREEBSD
 // FreeBSD's dlopen() returns a pointer to an Obj_Entry structure that
 // incorporates the map structure.
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
     ((link_map*)((handle) == nullptr ? nullptr : ((char*)(handle) + 544)))
 // Get sys/_types.h, because that tells us whether 64-bit inodes are
 // used in struct dirent below.
 #include <sys/_types.h>
 #else
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle))
 #endif  // !SANITIZER_FREEBSD
 
 #ifndef __GLIBC_PREREQ
 #define __GLIBC_PREREQ(x, y) 0
 #endif
 
 namespace __sanitizer {
   extern unsigned struct_utsname_sz;
   extern unsigned struct_stat_sz;
 #if !SANITIZER_FREEBSD && !SANITIZER_IOS
   extern unsigned struct_stat64_sz;
 #endif
   extern unsigned struct_rusage_sz;
   extern unsigned siginfo_t_sz;
   extern unsigned struct_itimerval_sz;
   extern unsigned pthread_t_sz;
   extern unsigned pthread_cond_t_sz;
   extern unsigned pid_t_sz;
   extern unsigned timeval_sz;
   extern unsigned uid_t_sz;
   extern unsigned gid_t_sz;
   extern unsigned mbstate_t_sz;
   extern unsigned struct_timezone_sz;
   extern unsigned struct_tms_sz;
   extern unsigned struct_itimerspec_sz;
   extern unsigned struct_sigevent_sz;
   extern unsigned struct_sched_param_sz;
   extern unsigned struct_statfs64_sz;
 
 #if !SANITIZER_ANDROID
   extern unsigned struct_statfs_sz;
   extern unsigned struct_sockaddr_sz;
   extern unsigned ucontext_t_sz;
 #endif // !SANITIZER_ANDROID
 
 #if SANITIZER_LINUX
 
 #if defined(__x86_64__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 0;
 #elif defined(__i386__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 96;
 #elif defined(__arm__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__aarch64__)
   const unsigned struct_kernel_stat_sz = 128;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__powerpc__) && !defined(__powerpc64__)
   const unsigned struct_kernel_stat_sz = 72;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__powerpc64__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 104;
-#elif defined(__riscv__)
+#elif defined(__riscv)
   /* RISCVTODO: check that these values are correct */
   const unsigned struct_kernel_stat_sz = 128;
   const unsigned struct_kernel_stat64_sz = 128;
 #elif defined(__mips__)
   const unsigned struct_kernel_stat_sz =
                  SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) :
                                      FIRST_32_SECOND_64(160, 216);
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390__) && !defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 0;
 #elif defined(__sparc__) && defined(__arch64__)
   const unsigned struct___old_kernel_stat_sz = 0;
   const unsigned struct_kernel_stat_sz = 104;
   const unsigned struct_kernel_stat64_sz = 144;
 #elif defined(__sparc__) && !defined(__arch64__)
   const unsigned struct___old_kernel_stat_sz = 0;
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
     unsigned type;
     unsigned size;
     // More fields that vary with the kernel version.
   };
 
   extern unsigned struct_epoll_event_sz;
   extern unsigned struct_sysinfo_sz;
   extern unsigned __user_cap_header_struct_sz;
   extern unsigned __user_cap_data_struct_sz;
   extern unsigned struct_new_utsname_sz;
   extern unsigned struct_old_utsname_sz;
   extern unsigned struct_oldold_utsname_sz;
 
   const unsigned struct_kexec_segment_sz = 4 * sizeof(unsigned long);
 #endif  // SANITIZER_LINUX
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
 
-#if defined(__powerpc64__) || defined(__riscv__) || defined(__s390__)
+#if defined(__powerpc64__) || defined(__riscv) || defined(__s390__)
   const unsigned struct___old_kernel_stat_sz = 0;
 #elif !defined(__sparc__)
   const unsigned struct___old_kernel_stat_sz = 32;
 #endif
 
   extern unsigned struct_rlimit_sz;
   extern unsigned struct_utimbuf_sz;
   extern unsigned struct_timespec_sz;
 
   struct __sanitizer_iocb {
     u64   aio_data;
     u32   aio_key_or_aio_reserved1; // Simply crazy.
     u32   aio_reserved1_or_aio_key; // Luckily, we don't need these.
     u16   aio_lio_opcode;
     s16   aio_reqprio;
     u32   aio_fildes;
     u64   aio_buf;
     u64   aio_nbytes;
     s64   aio_offset;
     u64   aio_reserved2;
     u64   aio_reserved3;
   };
 
   struct __sanitizer_io_event {
     u64 data;
     u64 obj;
     u64 res;
     u64 res2;
   };
 
   const unsigned iocb_cmd_pread = 0;
   const unsigned iocb_cmd_pwrite = 1;
   const unsigned iocb_cmd_preadv = 7;
   const unsigned iocb_cmd_pwritev = 8;
 
   struct __sanitizer___sysctl_args {
     int *name;
     int nlen;
     void *oldval;
     uptr *oldlenp;
     void *newval;
     uptr newlen;
     unsigned long ___unused[4];
   };
 
   const unsigned old_sigset_t_sz = sizeof(unsigned long);
 
   struct __sanitizer_sem_t {
 #if SANITIZER_ANDROID && defined(_LP64)
     int data[4];
 #elif SANITIZER_ANDROID && !defined(_LP64)
     int data;
 #elif SANITIZER_LINUX
     uptr data[4];
 #elif SANITIZER_FREEBSD
     u32 data[4];
 #endif
   };
 #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_ANDROID
   struct __sanitizer_mallinfo {
     uptr v[10];
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_mallinfo {
     int v[10];
   };
 
   extern unsigned struct_ustat_sz;
   extern unsigned struct_rlimit64_sz;
   extern unsigned struct_statvfs64_sz;
 
   struct __sanitizer_ipc_perm {
     int __key;
     int uid;
     int gid;
     int cuid;
     int cgid;
 #ifdef __powerpc__
     unsigned mode;
     unsigned __seq;
     u64 __unused1;
     u64 __unused2;
 #elif defined(__sparc__)
 #if defined(__arch64__)
     unsigned mode;
     unsigned short __pad1;
 #else
     unsigned short __pad1;
     unsigned short mode;
     unsigned short __pad2;
 #endif
     unsigned short __seq;
     unsigned long long __unused1;
     unsigned long long __unused2;
 #elif defined(__mips__) || defined(__aarch64__) || defined(__s390x__)
     unsigned int mode;
     unsigned short __seq;
     unsigned short __pad1;
     unsigned long __unused1;
     unsigned long __unused2;
 #else
     unsigned short mode;
     unsigned short __pad1;
     unsigned short __seq;
     unsigned short __pad2;
 #if defined(__x86_64__) && !defined(_LP64)
     u64 __unused1;
     u64 __unused2;
 #else
     unsigned long __unused1;
     unsigned long __unused2;
 #endif
 #endif
   };
 
   struct __sanitizer_shmid_ds {
     __sanitizer_ipc_perm shm_perm;
   #if defined(__sparc__)
   #if !defined(__arch64__)
     u32 __pad1;
   #endif
     long shm_atime;
   #if !defined(__arch64__)
     u32 __pad2;
   #endif
     long shm_dtime;
   #if !defined(__arch64__)
     u32 __pad3;
   #endif
     long shm_ctime;
     uptr shm_segsz;
     int shm_cpid;
     int shm_lpid;
     unsigned long shm_nattch;
     unsigned long __glibc_reserved1;
     unsigned long __glibc_reserved2;
   #else
   #ifndef __powerpc__
     uptr shm_segsz;
   #elif !defined(__powerpc64__)
     uptr __unused0;
   #endif
   #if defined(__x86_64__) && !defined(_LP64)
     u64 shm_atime;
     u64 shm_dtime;
     u64 shm_ctime;
   #else
     uptr shm_atime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused1;
   #endif
     uptr shm_dtime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused2;
   #endif
     uptr shm_ctime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused3;
   #endif
   #endif
   #ifdef __powerpc__
     uptr shm_segsz;
   #endif
     int shm_cpid;
     int shm_lpid;
   #if defined(__x86_64__) && !defined(_LP64)
     u64 shm_nattch;
     u64 __unused4;
     u64 __unused5;
   #else
     uptr shm_nattch;
     uptr __unused4;
     uptr __unused5;
   #endif
 #endif
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_ipc_perm {
     unsigned int cuid;
     unsigned int cgid;
     unsigned int uid;
     unsigned int gid;
     unsigned short mode;
     unsigned short seq;
     long key;
   };
 
   struct __sanitizer_shmid_ds {
     __sanitizer_ipc_perm shm_perm;
     unsigned long shm_segsz;
     unsigned int shm_lpid;
     unsigned int shm_cpid;
     int shm_nattch;
     unsigned long shm_atime;
     unsigned long shm_dtime;
     unsigned long shm_ctime;
   };
 #endif
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_msqid_ds_sz;
   extern unsigned struct_mq_attr_sz;
   extern unsigned struct_timex_sz;
   extern unsigned struct_statvfs_sz;
 #endif  // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
   struct __sanitizer_iovec {
     void *iov_base;
     uptr iov_len;
   };
 
 #if !SANITIZER_ANDROID
   struct __sanitizer_ifaddrs {
     struct __sanitizer_ifaddrs *ifa_next;
     char *ifa_name;
     unsigned int ifa_flags;
     void *ifa_addr;    // (struct sockaddr *)
     void *ifa_netmask; // (struct sockaddr *)
     // This is a union on Linux.
 # ifdef ifa_dstaddr
 # undef ifa_dstaddr
 # endif
     void *ifa_dstaddr; // (struct sockaddr *)
     void *ifa_data;
   };
 #endif  // !SANITIZER_ANDROID
 
 #if SANITIZER_MAC
   typedef unsigned long __sanitizer_pthread_key_t;
 #else
   typedef unsigned __sanitizer_pthread_key_t;
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 
   struct __sanitizer_XDR {
     int x_op;
     void *x_ops;
     uptr x_public;
     uptr x_private;
     uptr x_base;
     unsigned x_handy;
   };
 
   const int __sanitizer_XDR_ENCODE = 0;
   const int __sanitizer_XDR_DECODE = 1;
   const int __sanitizer_XDR_FREE = 2;
 #endif
 
   struct __sanitizer_passwd {
     char *pw_name;
     char *pw_passwd;
     int pw_uid;
     int pw_gid;
 #if SANITIZER_MAC || SANITIZER_FREEBSD
     long pw_change;
     char *pw_class;
 #endif
 #if !(SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32))
     char *pw_gecos;
 #endif
     char *pw_dir;
     char *pw_shell;
 #if SANITIZER_MAC || SANITIZER_FREEBSD
     long pw_expire;
 #endif
 #if SANITIZER_FREEBSD
     int pw_fields;
 #endif
   };
 
   struct __sanitizer_group {
     char *gr_name;
     char *gr_passwd;
     int gr_gid;
     char **gr_mem;
   };
 
 #if defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer_time_t;
 #else
   typedef long __sanitizer_time_t;
 #endif
 
   struct __sanitizer_timeb {
     __sanitizer_time_t time;
     unsigned short millitm;
     short timezone;
     short dstflag;
   };
 
   struct __sanitizer_ether_addr {
     u8 octet[6];
   };
 
   struct __sanitizer_tm {
     int tm_sec;
     int tm_min;
     int tm_hour;
     int tm_mday;
     int tm_mon;
     int tm_year;
     int tm_wday;
     int tm_yday;
     int tm_isdst;
     long int tm_gmtoff;
     const char *tm_zone;
   };
 
 #if SANITIZER_LINUX
   struct __sanitizer_mntent {
     char *mnt_fsname;
     char *mnt_dir;
     char *mnt_type;
     char *mnt_opts;
     int mnt_freq;
     int mnt_passno;
   };
 #endif
 
 #if SANITIZER_MAC || SANITIZER_FREEBSD
   struct __sanitizer_msghdr {
     void *msg_name;
     unsigned msg_namelen;
     struct __sanitizer_iovec *msg_iov;
     unsigned msg_iovlen;
     void *msg_control;
     unsigned msg_controllen;
     int msg_flags;
   };
   struct __sanitizer_cmsghdr {
     unsigned cmsg_len;
     int cmsg_level;
     int cmsg_type;
   };
 #else
   struct __sanitizer_msghdr {
     void *msg_name;
     unsigned msg_namelen;
     struct __sanitizer_iovec *msg_iov;
     uptr msg_iovlen;
     void *msg_control;
     uptr msg_controllen;
     int msg_flags;
   };
   struct __sanitizer_cmsghdr {
     uptr cmsg_len;
     int cmsg_level;
     int cmsg_type;
   };
 #endif
 
 #if SANITIZER_MAC
   struct __sanitizer_dirent {
     unsigned long long d_ino;
     unsigned long long d_seekoff;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_dirent {
 #if defined(__INO64)
     unsigned long long d_fileno;
     unsigned long long d_off;
 #else
     unsigned int d_fileno;
 #endif
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #elif SANITIZER_ANDROID || defined(__x86_64__)
   struct __sanitizer_dirent {
     unsigned long long d_ino;
     unsigned long long d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #else
   struct __sanitizer_dirent {
     uptr d_ino;
     uptr d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_dirent64 {
     unsigned long long d_ino;
     unsigned long long d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #endif
 
 // 'clock_t' is 32 bits wide on x64 FreeBSD
 #if SANITIZER_FREEBSD
   typedef int __sanitizer_clock_t;
 #elif defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer_clock_t;
 #else
   typedef long __sanitizer_clock_t;
 #endif
 
 #if SANITIZER_LINUX
   typedef int __sanitizer_clockid_t;
 #endif
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
 #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__)\
                    || defined(__mips__)
   typedef unsigned __sanitizer___kernel_uid_t;
   typedef unsigned __sanitizer___kernel_gid_t;
 #else
   typedef unsigned short __sanitizer___kernel_uid_t;
   typedef unsigned short __sanitizer___kernel_gid_t;
 #endif
 #if defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer___kernel_off_t;
 #else
   typedef long __sanitizer___kernel_off_t;
 #endif
 
-#if defined(__powerpc__) || defined(__mips__) || defined(__riscv__)
+#if defined(__powerpc__) || defined(__mips__) || defined(__riscv)
   typedef unsigned int __sanitizer___kernel_old_uid_t;
   typedef unsigned int __sanitizer___kernel_old_gid_t;
 #else
   typedef unsigned short __sanitizer___kernel_old_uid_t;
   typedef unsigned short __sanitizer___kernel_old_gid_t;
 #endif
 
   typedef long long __sanitizer___kernel_loff_t;
   typedef struct {
     unsigned long fds_bits[1024 / (8 * sizeof(long))];
   } __sanitizer___kernel_fd_set;
 #endif
 
   // This thing depends on the platform. We are only interested in the upper
   // limit. Verified with a compiler assert in .cc.
   const int pthread_attr_t_max_sz = 128;
   union __sanitizer_pthread_attr_t {
     char size[pthread_attr_t_max_sz]; // NOLINT
     void *align;
   };
 
 #if SANITIZER_ANDROID
 # if SANITIZER_MIPS
   typedef unsigned long __sanitizer_sigset_t[16/sizeof(unsigned long)];
 # else
   typedef unsigned long __sanitizer_sigset_t;
 # endif
 #elif SANITIZER_MAC
   typedef unsigned __sanitizer_sigset_t;
 #elif SANITIZER_LINUX
   struct __sanitizer_sigset_t {
     // The size is determined by looking at sizeof of real sigset_t on linux.
     uptr val[128 / sizeof(uptr)];
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_sigset_t {
      // uint32_t * 4
      unsigned int __bits[4];
   };
 #endif
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
 #if SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 64)
   struct __sanitizer_sigaction {
     unsigned sa_flags;
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
     void (*sa_restorer)();
   };
 #elif SANITIZER_ANDROID && SANITIZER_MIPS32  // check this before WORDSIZE == 32
   struct __sanitizer_sigaction {
     unsigned sa_flags;
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
   };
 #elif SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32)
   struct __sanitizer_sigaction {
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
     uptr sa_flags;
     void (*sa_restorer)();
   };
 #else // !SANITIZER_ANDROID
   struct __sanitizer_sigaction {
 #if defined(__mips__) && !SANITIZER_FREEBSD
     unsigned int sa_flags;
 #endif
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
 #if SANITIZER_FREEBSD
     int sa_flags;
     __sanitizer_sigset_t sa_mask;
 #else
 #if defined(__s390x__)
     int sa_resv;
 #else
     __sanitizer_sigset_t sa_mask;
 #endif
 #ifndef __mips__
 #if defined(__sparc__)
 #if __GLIBC_PREREQ (2, 20)
     // On sparc glibc 2.19 and earlier sa_flags was unsigned long.
 #if defined(__arch64__)
     // To maintain ABI compatibility on sparc64 when switching to an int,
     // __glibc_reserved0 was added.
     int __glibc_reserved0;
 #endif
     int sa_flags;
 #else
     unsigned long sa_flags;
 #endif
 #else
     int sa_flags;
 #endif
 #endif
 #endif
 #if SANITIZER_LINUX
     void (*sa_restorer)();
 #endif
 #if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
     int sa_resv[1];
 #endif
 #if defined(__s390x__)
     __sanitizer_sigset_t sa_mask;
 #endif
   };
 #endif // !SANITIZER_ANDROID
 
 #if SANITIZER_FREEBSD
   typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t;
 #elif defined(__mips__)
   struct __sanitizer_kernel_sigset_t {
     uptr sig[2];
   };
 #else
   struct __sanitizer_kernel_sigset_t {
     u8 sig[8];
   };
 #endif
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
 #if SANITIZER_MIPS
   struct __sanitizer_kernel_sigaction_t {
     unsigned int sa_flags;
     union {
       void (*handler)(int signo);
       void (*sigaction)(int signo, void *info, void *ctx);
     };
     __sanitizer_kernel_sigset_t sa_mask;
     void (*sa_restorer)(void);
   };
 #else
   struct __sanitizer_kernel_sigaction_t {
     union {
       void (*handler)(int signo);
       void (*sigaction)(int signo, void *info, void *ctx);
     };
     unsigned long sa_flags;
     void (*sa_restorer)(void);
     __sanitizer_kernel_sigset_t sa_mask;
   };
 #endif
 
   extern uptr sig_ign;
   extern uptr sig_dfl;
   extern uptr sa_siginfo;
 
 #if SANITIZER_LINUX
   extern int e_tabsz;
 #endif
 
   extern int af_inet;
   extern int af_inet6;
   uptr __sanitizer_in_addr_sz(int af);
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   struct __sanitizer_dl_phdr_info {
     uptr dlpi_addr;
     const char *dlpi_name;
     const void *dlpi_phdr;
     short dlpi_phnum;
   };
 
   extern unsigned struct_ElfW_Phdr_sz;
 #endif
 
   struct __sanitizer_addrinfo {
     int ai_flags;
     int ai_family;
     int ai_socktype;
     int ai_protocol;
 #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD
     unsigned ai_addrlen;
     char *ai_canonname;
     void *ai_addr;
 #else // LINUX
     unsigned ai_addrlen;
     void *ai_addr;
     char *ai_canonname;
 #endif
     struct __sanitizer_addrinfo *ai_next;
   };
 
   struct __sanitizer_hostent {
     char *h_name;
     char **h_aliases;
     int h_addrtype;
     int h_length;
     char **h_addr_list;
   };
 
   struct __sanitizer_pollfd {
     int fd;
     short events;
     short revents;
   };
 
 #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD
   typedef unsigned __sanitizer_nfds_t;
 #else
   typedef unsigned long __sanitizer_nfds_t;
 #endif
 
 #if !SANITIZER_ANDROID
 # if SANITIZER_LINUX
   struct __sanitizer_glob_t {
     uptr gl_pathc;
     char **gl_pathv;
     uptr gl_offs;
     int gl_flags;
 
     void (*gl_closedir)(void *dirp);
     void *(*gl_readdir)(void *dirp);
     void *(*gl_opendir)(const char *);
     int (*gl_lstat)(const char *, void *);
     int (*gl_stat)(const char *, void *);
   };
 # elif SANITIZER_FREEBSD
   struct __sanitizer_glob_t {
     uptr gl_pathc;
     uptr gl_matchc;
     uptr gl_offs;
     int gl_flags;
     char **gl_pathv;
     int (*gl_errfunc)(const char*, int);
     void (*gl_closedir)(void *dirp);
     struct dirent *(*gl_readdir)(void *dirp);
     void *(*gl_opendir)(const char*);
     int (*gl_lstat)(const char*, void* /* struct stat* */);
     int (*gl_stat)(const char*, void* /* struct stat* */);
   };
 # endif  // SANITIZER_FREEBSD
 
 # if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern int glob_nomatch;
   extern int glob_altdirfunc;
 # endif
 #endif  // !SANITIZER_ANDROID
 
   extern unsigned path_max;
 
   struct __sanitizer_wordexp_t {
     uptr we_wordc;
     char **we_wordv;
     uptr we_offs;
 #if SANITIZER_FREEBSD
     char *we_strings;
     uptr we_nbytes;
 #endif
   };
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_FILE {
     int _flags;
     char *_IO_read_ptr;
     char *_IO_read_end;
     char *_IO_read_base;
     char *_IO_write_base;
     char *_IO_write_ptr;
     char *_IO_write_end;
     char *_IO_buf_base;
     char *_IO_buf_end;
     char *_IO_save_base;
     char *_IO_backup_base;
     char *_IO_save_end;
     void *_markers;
     __sanitizer_FILE *_chain;
     int _fileno;
   };
 # define SANITIZER_HAS_STRUCT_FILE 1
 #else
   typedef void __sanitizer_FILE;
 # define SANITIZER_HAS_STRUCT_FILE 0
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
     defined(__s390__))
   extern unsigned struct_user_regs_struct_sz;
   extern unsigned struct_user_fpregs_struct_sz;
   extern unsigned struct_user_fpxregs_struct_sz;
   extern unsigned struct_user_vfpregs_struct_sz;
 
   extern int ptrace_peektext;
   extern int ptrace_peekdata;
   extern int ptrace_peekuser;
   extern int ptrace_getregs;
   extern int ptrace_setregs;
   extern int ptrace_getfpregs;
   extern int ptrace_setfpregs;
   extern int ptrace_getfpxregs;
   extern int ptrace_setfpxregs;
   extern int ptrace_getvfpregs;
   extern int ptrace_setvfpregs;
   extern int ptrace_getsiginfo;
   extern int ptrace_setsiginfo;
   extern int ptrace_getregset;
   extern int ptrace_setregset;
   extern int ptrace_geteventmsg;
 #endif
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_shminfo_sz;
   extern unsigned struct_shm_info_sz;
   extern int shmctl_ipc_stat;
   extern int shmctl_ipc_info;
   extern int shmctl_shm_info;
   extern int shmctl_shm_stat;
 #endif
 
 #if !SANITIZER_MAC && !SANITIZER_FREEBSD
   extern unsigned struct_utmp_sz;
 #endif
 #if !SANITIZER_ANDROID
   extern unsigned struct_utmpx_sz;
 #endif
 
   extern int map_fixed;
 
   // ioctl arguments
   struct __sanitizer_ifconf {
     int ifc_len;
     union {
       void *ifcu_req;
     } ifc_ifcu;
 #if SANITIZER_MAC
   } __attribute__((packed));
 #else
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 struct __sanitizer__obstack_chunk {
   char *limit;
   struct __sanitizer__obstack_chunk *prev;
 };
 
 struct __sanitizer_obstack {
   long chunk_size;
   struct __sanitizer__obstack_chunk *chunk;
   char *object_base;
   char *next_free;
   uptr more_fields[7];
 };
 
 typedef uptr (*__sanitizer_cookie_io_read)(void *cookie, char *buf, uptr size);
 typedef uptr (*__sanitizer_cookie_io_write)(void *cookie, const char *buf,
                                             uptr size);
 typedef int (*__sanitizer_cookie_io_seek)(void *cookie, u64 *offset,
                                           int whence);
 typedef int (*__sanitizer_cookie_io_close)(void *cookie);
 
 struct __sanitizer_cookie_io_functions_t {
   __sanitizer_cookie_io_read read;
   __sanitizer_cookie_io_write write;
   __sanitizer_cookie_io_seek seek;
   __sanitizer_cookie_io_close close;
 };
 #endif
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \
     defined(__sparc__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
 #define IOC_WRITE 4U
 #define IOC_READ 2U
 #else
 #define IOC_SIZEBITS 14
 #define IOC_DIRBITS 2
 #define IOC_NONE 0U
 #define IOC_WRITE 1U
 #define IOC_READ 2U
 #endif
 #define IOC_NRMASK ((1 << IOC_NRBITS) - 1)
 #define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1)
 #define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1)
 #if defined(IOC_DIRMASK)
 #undef IOC_DIRMASK
 #endif
 #define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1)
 #define IOC_NRSHIFT 0
 #define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS)
 #define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS)
 #define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS)
 #define EVIOC_EV_MAX 0x1f
 #define EVIOC_ABS_MAX 0x3f
 
 #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
 #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
 #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
 
 #if defined(__sparc__)
 // In sparc the 14 bits SIZE field overlaps with the
 // least significant bit of DIR, so either IOC_READ or
 // IOC_WRITE shall be 1 in order to get a non-zero SIZE.
 #define IOC_SIZE(nr) \
   ((((((nr) >> 29) & 0x7) & (4U | 2U)) == 0) ? 0 : (((nr) >> 16) & 0x3fff))
 #else
 #define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
 #endif
 
   extern unsigned struct_ifreq_sz;
   extern unsigned struct_termios_sz;
   extern unsigned struct_winsize_sz;
 
 #if SANITIZER_LINUX
   extern unsigned struct_arpreq_sz;
   extern unsigned struct_cdrom_msf_sz;
   extern unsigned struct_cdrom_multisession_sz;
   extern unsigned struct_cdrom_read_audio_sz;
   extern unsigned struct_cdrom_subchnl_sz;
   extern unsigned struct_cdrom_ti_sz;
   extern unsigned struct_cdrom_tocentry_sz;
   extern unsigned struct_cdrom_tochdr_sz;
   extern unsigned struct_cdrom_volctrl_sz;
   extern unsigned struct_ff_effect_sz;
   extern unsigned struct_floppy_drive_params_sz;
   extern unsigned struct_floppy_drive_struct_sz;
   extern unsigned struct_floppy_fdc_state_sz;
   extern unsigned struct_floppy_max_errors_sz;
   extern unsigned struct_floppy_raw_cmd_sz;
   extern unsigned struct_floppy_struct_sz;
   extern unsigned struct_floppy_write_errors_sz;
   extern unsigned struct_format_descr_sz;
   extern unsigned struct_hd_driveid_sz;
   extern unsigned struct_hd_geometry_sz;
   extern unsigned struct_input_absinfo_sz;
   extern unsigned struct_input_id_sz;
   extern unsigned struct_mtpos_sz;
   extern unsigned struct_termio_sz;
   extern unsigned struct_vt_consize_sz;
   extern unsigned struct_vt_sizes_sz;
   extern unsigned struct_vt_stat_sz;
 #endif  // SANITIZER_LINUX
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern unsigned struct_copr_buffer_sz;
   extern unsigned struct_copr_debug_buf_sz;
   extern unsigned struct_copr_msg_sz;
   extern unsigned struct_midi_info_sz;
   extern unsigned struct_mtget_sz;
   extern unsigned struct_mtop_sz;
   extern unsigned struct_rtentry_sz;
   extern unsigned struct_sbi_instrument_sz;
   extern unsigned struct_seq_event_rec_sz;
   extern unsigned struct_synth_info_sz;
   extern unsigned struct_vt_mode_sz;
 #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   extern unsigned struct_ax25_parms_struct_sz;
   extern unsigned struct_cyclades_monitor_sz;
   extern unsigned struct_input_keymap_entry_sz;
   extern unsigned struct_ipx_config_data_sz;
   extern unsigned struct_kbdiacrs_sz;
   extern unsigned struct_kbentry_sz;
   extern unsigned struct_kbkeycode_sz;
   extern unsigned struct_kbsentry_sz;
   extern unsigned struct_mtconfiginfo_sz;
   extern unsigned struct_nr_parms_struct_sz;
   extern unsigned struct_scc_modem_sz;
   extern unsigned struct_scc_stat_sz;
   extern unsigned struct_serial_multiport_struct_sz;
   extern unsigned struct_serial_struct_sz;
   extern unsigned struct_sockaddr_ax25_sz;
   extern unsigned struct_unimapdesc_sz;
   extern unsigned struct_unimapinit_sz;
 #endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_audio_buf_info_sz;
   extern unsigned struct_ppp_stats_sz;
 #endif  // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
 #if !SANITIZER_ANDROID && !SANITIZER_MAC
   extern unsigned struct_sioc_sg_req_sz;
   extern unsigned struct_sioc_vif_req_sz;
 #endif
 
   // ioctl request identifiers
 
   // A special value to mark ioctls that are not present on the target platform,
   // when it can not be determined without including any system headers.
   extern const unsigned IOCTL_NOT_PRESENT;
 
   extern unsigned IOCTL_FIOASYNC;
   extern unsigned IOCTL_FIOCLEX;
   extern unsigned IOCTL_FIOGETOWN;
   extern unsigned IOCTL_FIONBIO;
   extern unsigned IOCTL_FIONCLEX;
   extern unsigned IOCTL_FIOSETOWN;
   extern unsigned IOCTL_SIOCADDMULTI;
   extern unsigned IOCTL_SIOCATMARK;
   extern unsigned IOCTL_SIOCDELMULTI;
   extern unsigned IOCTL_SIOCGIFADDR;
   extern unsigned IOCTL_SIOCGIFBRDADDR;
   extern unsigned IOCTL_SIOCGIFCONF;
   extern unsigned IOCTL_SIOCGIFDSTADDR;
   extern unsigned IOCTL_SIOCGIFFLAGS;
   extern unsigned IOCTL_SIOCGIFMETRIC;
   extern unsigned IOCTL_SIOCGIFMTU;
   extern unsigned IOCTL_SIOCGIFNETMASK;
   extern unsigned IOCTL_SIOCGPGRP;
   extern unsigned IOCTL_SIOCSIFADDR;
   extern unsigned IOCTL_SIOCSIFBRDADDR;
   extern unsigned IOCTL_SIOCSIFDSTADDR;
   extern unsigned IOCTL_SIOCSIFFLAGS;
   extern unsigned IOCTL_SIOCSIFMETRIC;
   extern unsigned IOCTL_SIOCSIFMTU;
   extern unsigned IOCTL_SIOCSIFNETMASK;
   extern unsigned IOCTL_SIOCSPGRP;
   extern unsigned IOCTL_TIOCCONS;
   extern unsigned IOCTL_TIOCEXCL;
   extern unsigned IOCTL_TIOCGETD;
   extern unsigned IOCTL_TIOCGPGRP;
   extern unsigned IOCTL_TIOCGWINSZ;
   extern unsigned IOCTL_TIOCMBIC;
   extern unsigned IOCTL_TIOCMBIS;
   extern unsigned IOCTL_TIOCMGET;
   extern unsigned IOCTL_TIOCMSET;
   extern unsigned IOCTL_TIOCNOTTY;
   extern unsigned IOCTL_TIOCNXCL;
   extern unsigned IOCTL_TIOCOUTQ;
   extern unsigned IOCTL_TIOCPKT;
   extern unsigned IOCTL_TIOCSCTTY;
   extern unsigned IOCTL_TIOCSETD;
   extern unsigned IOCTL_TIOCSPGRP;
   extern unsigned IOCTL_TIOCSTI;
   extern unsigned IOCTL_TIOCSWINSZ;
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned IOCTL_SIOCGETSGCNT;
   extern unsigned IOCTL_SIOCGETVIFCNT;
 #endif
 #if SANITIZER_LINUX
   extern unsigned IOCTL_EVIOCGABS;
   extern unsigned IOCTL_EVIOCGBIT;
   extern unsigned IOCTL_EVIOCGEFFECTS;
   extern unsigned IOCTL_EVIOCGID;
   extern unsigned IOCTL_EVIOCGKEY;
   extern unsigned IOCTL_EVIOCGKEYCODE;
   extern unsigned IOCTL_EVIOCGLED;
   extern unsigned IOCTL_EVIOCGNAME;
   extern unsigned IOCTL_EVIOCGPHYS;
   extern unsigned IOCTL_EVIOCGRAB;
   extern unsigned IOCTL_EVIOCGREP;
   extern unsigned IOCTL_EVIOCGSND;
   extern unsigned IOCTL_EVIOCGSW;
   extern unsigned IOCTL_EVIOCGUNIQ;
   extern unsigned IOCTL_EVIOCGVERSION;
   extern unsigned IOCTL_EVIOCRMFF;
   extern unsigned IOCTL_EVIOCSABS;
   extern unsigned IOCTL_EVIOCSFF;
   extern unsigned IOCTL_EVIOCSKEYCODE;
   extern unsigned IOCTL_EVIOCSREP;
   extern unsigned IOCTL_BLKFLSBUF;
   extern unsigned IOCTL_BLKGETSIZE;
   extern unsigned IOCTL_BLKRAGET;
   extern unsigned IOCTL_BLKRASET;
   extern unsigned IOCTL_BLKROGET;
   extern unsigned IOCTL_BLKROSET;
   extern unsigned IOCTL_BLKRRPART;
   extern unsigned IOCTL_CDROMAUDIOBUFSIZ;
   extern unsigned IOCTL_CDROMEJECT;
   extern unsigned IOCTL_CDROMEJECT_SW;
   extern unsigned IOCTL_CDROMMULTISESSION;
   extern unsigned IOCTL_CDROMPAUSE;
   extern unsigned IOCTL_CDROMPLAYMSF;
   extern unsigned IOCTL_CDROMPLAYTRKIND;
   extern unsigned IOCTL_CDROMREADAUDIO;
   extern unsigned IOCTL_CDROMREADCOOKED;
   extern unsigned IOCTL_CDROMREADMODE1;
   extern unsigned IOCTL_CDROMREADMODE2;
   extern unsigned IOCTL_CDROMREADRAW;
   extern unsigned IOCTL_CDROMREADTOCENTRY;
   extern unsigned IOCTL_CDROMREADTOCHDR;
   extern unsigned IOCTL_CDROMRESET;
   extern unsigned IOCTL_CDROMRESUME;
   extern unsigned IOCTL_CDROMSEEK;
   extern unsigned IOCTL_CDROMSTART;
   extern unsigned IOCTL_CDROMSTOP;
   extern unsigned IOCTL_CDROMSUBCHNL;
   extern unsigned IOCTL_CDROMVOLCTRL;
   extern unsigned IOCTL_CDROMVOLREAD;
   extern unsigned IOCTL_CDROM_GET_UPC;
   extern unsigned IOCTL_FDCLRPRM;
   extern unsigned IOCTL_FDDEFPRM;
   extern unsigned IOCTL_FDFLUSH;
   extern unsigned IOCTL_FDFMTBEG;
   extern unsigned IOCTL_FDFMTEND;
   extern unsigned IOCTL_FDFMTTRK;
   extern unsigned IOCTL_FDGETDRVPRM;
   extern unsigned IOCTL_FDGETDRVSTAT;
   extern unsigned IOCTL_FDGETDRVTYP;
   extern unsigned IOCTL_FDGETFDCSTAT;
   extern unsigned IOCTL_FDGETMAXERRS;
   extern unsigned IOCTL_FDGETPRM;
   extern unsigned IOCTL_FDMSGOFF;
   extern unsigned IOCTL_FDMSGON;
   extern unsigned IOCTL_FDPOLLDRVSTAT;
   extern unsigned IOCTL_FDRAWCMD;
   extern unsigned IOCTL_FDRESET;
   extern unsigned IOCTL_FDSETDRVPRM;
   extern unsigned IOCTL_FDSETEMSGTRESH;
   extern unsigned IOCTL_FDSETMAXERRS;
   extern unsigned IOCTL_FDSETPRM;
   extern unsigned IOCTL_FDTWADDLE;
   extern unsigned IOCTL_FDWERRORCLR;
   extern unsigned IOCTL_FDWERRORGET;
   extern unsigned IOCTL_HDIO_DRIVE_CMD;
   extern unsigned IOCTL_HDIO_GETGEO;
   extern unsigned IOCTL_HDIO_GET_32BIT;
   extern unsigned IOCTL_HDIO_GET_DMA;
   extern unsigned IOCTL_HDIO_GET_IDENTITY;
   extern unsigned IOCTL_HDIO_GET_KEEPSETTINGS;
   extern unsigned IOCTL_HDIO_GET_MULTCOUNT;
   extern unsigned IOCTL_HDIO_GET_NOWERR;
   extern unsigned IOCTL_HDIO_GET_UNMASKINTR;
   extern unsigned IOCTL_HDIO_SET_32BIT;
   extern unsigned IOCTL_HDIO_SET_DMA;
   extern unsigned IOCTL_HDIO_SET_KEEPSETTINGS;
   extern unsigned IOCTL_HDIO_SET_MULTCOUNT;
   extern unsigned IOCTL_HDIO_SET_NOWERR;
   extern unsigned IOCTL_HDIO_SET_UNMASKINTR;
   extern unsigned IOCTL_MTIOCPOS;
   extern unsigned IOCTL_PPPIOCGASYNCMAP;
   extern unsigned IOCTL_PPPIOCGDEBUG;
   extern unsigned IOCTL_PPPIOCGFLAGS;
   extern unsigned IOCTL_PPPIOCGUNIT;
   extern unsigned IOCTL_PPPIOCGXASYNCMAP;
   extern unsigned IOCTL_PPPIOCSASYNCMAP;
   extern unsigned IOCTL_PPPIOCSDEBUG;
   extern unsigned IOCTL_PPPIOCSFLAGS;
   extern unsigned IOCTL_PPPIOCSMAXCID;
   extern unsigned IOCTL_PPPIOCSMRU;
   extern unsigned IOCTL_PPPIOCSXASYNCMAP;
   extern unsigned IOCTL_SIOCDARP;
   extern unsigned IOCTL_SIOCDRARP;
   extern unsigned IOCTL_SIOCGARP;
   extern unsigned IOCTL_SIOCGIFENCAP;
   extern unsigned IOCTL_SIOCGIFHWADDR;
   extern unsigned IOCTL_SIOCGIFMAP;
   extern unsigned IOCTL_SIOCGIFMEM;
   extern unsigned IOCTL_SIOCGIFNAME;
   extern unsigned IOCTL_SIOCGIFSLAVE;
   extern unsigned IOCTL_SIOCGRARP;
   extern unsigned IOCTL_SIOCGSTAMP;
   extern unsigned IOCTL_SIOCSARP;
   extern unsigned IOCTL_SIOCSIFENCAP;
   extern unsigned IOCTL_SIOCSIFHWADDR;
   extern unsigned IOCTL_SIOCSIFLINK;
   extern unsigned IOCTL_SIOCSIFMAP;
   extern unsigned IOCTL_SIOCSIFMEM;
   extern unsigned IOCTL_SIOCSIFSLAVE;
   extern unsigned IOCTL_SIOCSRARP;
   extern unsigned IOCTL_SNDCTL_COPR_HALT;
   extern unsigned IOCTL_SNDCTL_COPR_LOAD;
   extern unsigned IOCTL_SNDCTL_COPR_RCODE;
   extern unsigned IOCTL_SNDCTL_COPR_RCVMSG;
   extern unsigned IOCTL_SNDCTL_COPR_RDATA;
   extern unsigned IOCTL_SNDCTL_COPR_RESET;
   extern unsigned IOCTL_SNDCTL_COPR_RUN;
   extern unsigned IOCTL_SNDCTL_COPR_SENDMSG;
   extern unsigned IOCTL_SNDCTL_COPR_WCODE;
   extern unsigned IOCTL_SNDCTL_COPR_WDATA;
   extern unsigned IOCTL_TCFLSH;
   extern unsigned IOCTL_TCGETA;
   extern unsigned IOCTL_TCGETS;
   extern unsigned IOCTL_TCSBRK;
   extern unsigned IOCTL_TCSBRKP;
   extern unsigned IOCTL_TCSETA;
   extern unsigned IOCTL_TCSETAF;
   extern unsigned IOCTL_TCSETAW;
   extern unsigned IOCTL_TCSETS;
   extern unsigned IOCTL_TCSETSF;
   extern unsigned IOCTL_TCSETSW;
   extern unsigned IOCTL_TCXONC;
   extern unsigned IOCTL_TIOCGLCKTRMIOS;
   extern unsigned IOCTL_TIOCGSOFTCAR;
   extern unsigned IOCTL_TIOCINQ;
   extern unsigned IOCTL_TIOCLINUX;
   extern unsigned IOCTL_TIOCSERCONFIG;
   extern unsigned IOCTL_TIOCSERGETLSR;
   extern unsigned IOCTL_TIOCSERGWILD;
   extern unsigned IOCTL_TIOCSERSWILD;
   extern unsigned IOCTL_TIOCSLCKTRMIOS;
   extern unsigned IOCTL_TIOCSSOFTCAR;
   extern unsigned IOCTL_VT_DISALLOCATE;
   extern unsigned IOCTL_VT_GETSTATE;
   extern unsigned IOCTL_VT_RESIZE;
   extern unsigned IOCTL_VT_RESIZEX;
   extern unsigned IOCTL_VT_SENDSIG;
 #endif  // SANITIZER_LINUX
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern unsigned IOCTL_MTIOCGET;
   extern unsigned IOCTL_MTIOCTOP;
   extern unsigned IOCTL_SIOCADDRT;
   extern unsigned IOCTL_SIOCDELRT;
   extern unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE;
   extern unsigned IOCTL_SNDCTL_DSP_GETFMTS;
   extern unsigned IOCTL_SNDCTL_DSP_NONBLOCK;
   extern unsigned IOCTL_SNDCTL_DSP_POST;
   extern unsigned IOCTL_SNDCTL_DSP_RESET;
   extern unsigned IOCTL_SNDCTL_DSP_SETFMT;
   extern unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT;
   extern unsigned IOCTL_SNDCTL_DSP_SPEED;
   extern unsigned IOCTL_SNDCTL_DSP_STEREO;
   extern unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE;
   extern unsigned IOCTL_SNDCTL_DSP_SYNC;
   extern unsigned IOCTL_SNDCTL_FM_4OP_ENABLE;
   extern unsigned IOCTL_SNDCTL_FM_LOAD_INSTR;
   extern unsigned IOCTL_SNDCTL_MIDI_INFO;
   extern unsigned IOCTL_SNDCTL_MIDI_PRETIME;
   extern unsigned IOCTL_SNDCTL_SEQ_CTRLRATE;
   extern unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT;
   extern unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT;
   extern unsigned IOCTL_SNDCTL_SEQ_NRMIDIS;
   extern unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS;
   extern unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND;
   extern unsigned IOCTL_SNDCTL_SEQ_PANIC;
   extern unsigned IOCTL_SNDCTL_SEQ_PERCMODE;
   extern unsigned IOCTL_SNDCTL_SEQ_RESET;
   extern unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES;
   extern unsigned IOCTL_SNDCTL_SEQ_SYNC;
   extern unsigned IOCTL_SNDCTL_SEQ_TESTMIDI;
   extern unsigned IOCTL_SNDCTL_SEQ_THRESHOLD;
   extern unsigned IOCTL_SNDCTL_SYNTH_INFO;
   extern unsigned IOCTL_SNDCTL_SYNTH_MEMAVL;
   extern unsigned IOCTL_SNDCTL_TMR_CONTINUE;
   extern unsigned IOCTL_SNDCTL_TMR_METRONOME;
   extern unsigned IOCTL_SNDCTL_TMR_SELECT;
   extern unsigned IOCTL_SNDCTL_TMR_SOURCE;
   extern unsigned IOCTL_SNDCTL_TMR_START;
   extern unsigned IOCTL_SNDCTL_TMR_STOP;
   extern unsigned IOCTL_SNDCTL_TMR_TEMPO;
   extern unsigned IOCTL_SNDCTL_TMR_TIMEBASE;
   extern unsigned IOCTL_SOUND_MIXER_READ_ALTPCM;
   extern unsigned IOCTL_SOUND_MIXER_READ_BASS;
   extern unsigned IOCTL_SOUND_MIXER_READ_CAPS;
   extern unsigned IOCTL_SOUND_MIXER_READ_CD;
   extern unsigned IOCTL_SOUND_MIXER_READ_DEVMASK;
   extern unsigned IOCTL_SOUND_MIXER_READ_ENHANCE;
   extern unsigned IOCTL_SOUND_MIXER_READ_IGAIN;
   extern unsigned IOCTL_SOUND_MIXER_READ_IMIX;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE1;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE2;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE3;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE;
   extern unsigned IOCTL_SOUND_MIXER_READ_LOUD;
   extern unsigned IOCTL_SOUND_MIXER_READ_MIC;
   extern unsigned IOCTL_SOUND_MIXER_READ_MUTE;
   extern unsigned IOCTL_SOUND_MIXER_READ_OGAIN;
   extern unsigned IOCTL_SOUND_MIXER_READ_PCM;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECLEV;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECMASK;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECSRC;
   extern unsigned IOCTL_SOUND_MIXER_READ_SPEAKER;
   extern unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS;
   extern unsigned IOCTL_SOUND_MIXER_READ_SYNTH;
   extern unsigned IOCTL_SOUND_MIXER_READ_TREBLE;
   extern unsigned IOCTL_SOUND_MIXER_READ_VOLUME;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_BASS;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_CD;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_IMIX;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE1;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE2;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE3;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LOUD;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_MIC;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_MUTE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_PCM;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME;
   extern unsigned IOCTL_SOUND_PCM_READ_BITS;
   extern unsigned IOCTL_SOUND_PCM_READ_CHANNELS;
   extern unsigned IOCTL_SOUND_PCM_READ_FILTER;
   extern unsigned IOCTL_SOUND_PCM_READ_RATE;
   extern unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS;
   extern unsigned IOCTL_SOUND_PCM_WRITE_FILTER;
   extern unsigned IOCTL_VT_ACTIVATE;
   extern unsigned IOCTL_VT_GETMODE;
   extern unsigned IOCTL_VT_OPENQRY;
   extern unsigned IOCTL_VT_RELDISP;
   extern unsigned IOCTL_VT_SETMODE;
   extern unsigned IOCTL_VT_WAITACTIVE;
 #endif  // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   extern unsigned IOCTL_CYGETDEFTHRESH;
   extern unsigned IOCTL_CYGETDEFTIMEOUT;
   extern unsigned IOCTL_CYGETMON;
   extern unsigned IOCTL_CYGETTHRESH;
   extern unsigned IOCTL_CYGETTIMEOUT;
   extern unsigned IOCTL_CYSETDEFTHRESH;
   extern unsigned IOCTL_CYSETDEFTIMEOUT;
   extern unsigned IOCTL_CYSETTHRESH;
   extern unsigned IOCTL_CYSETTIMEOUT;
   extern unsigned IOCTL_EQL_EMANCIPATE;
   extern unsigned IOCTL_EQL_ENSLAVE;
   extern unsigned IOCTL_EQL_GETMASTRCFG;
   extern unsigned IOCTL_EQL_GETSLAVECFG;
   extern unsigned IOCTL_EQL_SETMASTRCFG;
   extern unsigned IOCTL_EQL_SETSLAVECFG;
   extern unsigned IOCTL_EVIOCGKEYCODE_V2;
   extern unsigned IOCTL_EVIOCGPROP;
   extern unsigned IOCTL_EVIOCSKEYCODE_V2;
   extern unsigned IOCTL_FS_IOC_GETFLAGS;
   extern unsigned IOCTL_FS_IOC_GETVERSION;
   extern unsigned IOCTL_FS_IOC_SETFLAGS;
   extern unsigned IOCTL_FS_IOC_SETVERSION;
   extern unsigned IOCTL_GIO_CMAP;
   extern unsigned IOCTL_GIO_FONT;
   extern unsigned IOCTL_GIO_UNIMAP;
   extern unsigned IOCTL_GIO_UNISCRNMAP;
   extern unsigned IOCTL_KDADDIO;
   extern unsigned IOCTL_KDDELIO;
   extern unsigned IOCTL_KDGETKEYCODE;
   extern unsigned IOCTL_KDGKBDIACR;
   extern unsigned IOCTL_KDGKBENT;
   extern unsigned IOCTL_KDGKBLED;
   extern unsigned IOCTL_KDGKBMETA;
   extern unsigned IOCTL_KDGKBSENT;
   extern unsigned IOCTL_KDMAPDISP;
   extern unsigned IOCTL_KDSETKEYCODE;
   extern unsigned IOCTL_KDSIGACCEPT;
   extern unsigned IOCTL_KDSKBDIACR;
   extern unsigned IOCTL_KDSKBENT;
   extern unsigned IOCTL_KDSKBLED;
   extern unsigned IOCTL_KDSKBMETA;
   extern unsigned IOCTL_KDSKBSENT;
   extern unsigned IOCTL_KDUNMAPDISP;
   extern unsigned IOCTL_LPABORT;
   extern unsigned IOCTL_LPABORTOPEN;
   extern unsigned IOCTL_LPCAREFUL;
   extern unsigned IOCTL_LPCHAR;
   extern unsigned IOCTL_LPGETIRQ;
   extern unsigned IOCTL_LPGETSTATUS;
   extern unsigned IOCTL_LPRESET;
   extern unsigned IOCTL_LPSETIRQ;
   extern unsigned IOCTL_LPTIME;
   extern unsigned IOCTL_LPWAIT;
   extern unsigned IOCTL_MTIOCGETCONFIG;
   extern unsigned IOCTL_MTIOCSETCONFIG;
   extern unsigned IOCTL_PIO_CMAP;
   extern unsigned IOCTL_PIO_FONT;
   extern unsigned IOCTL_PIO_UNIMAP;
   extern unsigned IOCTL_PIO_UNIMAPCLR;
   extern unsigned IOCTL_PIO_UNISCRNMAP;
   extern unsigned IOCTL_SCSI_IOCTL_GET_IDLUN;
   extern unsigned IOCTL_SCSI_IOCTL_PROBE_HOST;
   extern unsigned IOCTL_SCSI_IOCTL_TAGGED_DISABLE;
   extern unsigned IOCTL_SCSI_IOCTL_TAGGED_ENABLE;
   extern unsigned IOCTL_SIOCAIPXITFCRT;
   extern unsigned IOCTL_SIOCAIPXPRISLT;
   extern unsigned IOCTL_SIOCAX25ADDUID;
   extern unsigned IOCTL_SIOCAX25DELUID;
   extern unsigned IOCTL_SIOCAX25GETPARMS;
   extern unsigned IOCTL_SIOCAX25GETUID;
   extern unsigned IOCTL_SIOCAX25NOUID;
   extern unsigned IOCTL_SIOCAX25SETPARMS;
   extern unsigned IOCTL_SIOCDEVPLIP;
   extern unsigned IOCTL_SIOCIPXCFGDATA;
   extern unsigned IOCTL_SIOCNRDECOBS;
   extern unsigned IOCTL_SIOCNRGETPARMS;
   extern unsigned IOCTL_SIOCNRRTCTL;
   extern unsigned IOCTL_SIOCNRSETPARMS;
   extern unsigned IOCTL_SNDCTL_DSP_GETISPACE;
   extern unsigned IOCTL_SNDCTL_DSP_GETOSPACE;
   extern unsigned IOCTL_TIOCGSERIAL;
   extern unsigned IOCTL_TIOCSERGETMULTI;
   extern unsigned IOCTL_TIOCSERSETMULTI;
   extern unsigned IOCTL_TIOCSSERIAL;
 #endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned IOCTL_GIO_SCRNMAP;
   extern unsigned IOCTL_KDDISABIO;
   extern unsigned IOCTL_KDENABIO;
   extern unsigned IOCTL_KDGETLED;
   extern unsigned IOCTL_KDGETMODE;
   extern unsigned IOCTL_KDGKBMODE;
   extern unsigned IOCTL_KDGKBTYPE;
   extern unsigned IOCTL_KDMKTONE;
   extern unsigned IOCTL_KDSETLED;
   extern unsigned IOCTL_KDSETMODE;
   extern unsigned IOCTL_KDSKBMODE;
   extern unsigned IOCTL_KIOCSOUND;
   extern unsigned IOCTL_PIO_SCRNMAP;
 #endif
 
   extern const int si_SEGV_MAPERR;
   extern const int si_SEGV_ACCERR;
 }  // namespace __sanitizer
 
 #define CHECK_TYPE_SIZE(TYPE) \
   COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE))
 
 #define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER)                       \
   COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *) NULL)->MEMBER) == \
                  sizeof(((CLASS *) NULL)->MEMBER));                \
   COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) ==          \
                  offsetof(CLASS, MEMBER))
 
 // For sigaction, which is a function and struct at the same time,
 // and thus requires explicit "struct" in sizeof() expression.
 #define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER)                       \
   COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *) NULL)->MEMBER) == \
                  sizeof(((struct CLASS *) NULL)->MEMBER));                \
   COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) ==          \
                  offsetof(struct CLASS, MEMBER))
 
 #endif
Index: head/contrib/elftoolchain/libelf/_libelf_config.h
===================================================================
--- head/contrib/elftoolchain/libelf/_libelf_config.h	(revision 322167)
+++ head/contrib/elftoolchain/libelf/_libelf_config.h	(revision 322168)
@@ -1,189 +1,189 @@
 /*-
  * Copyright (c) 2008-2011 Joseph Koshy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: _libelf_config.h 3400 2016-02-12 18:38:49Z emaste $
  */
 
 #if defined(__APPLE__) || defined(__DragonFly__)
 
 #if	defined(__amd64__)
 #define	LIBELF_ARCH		EM_X86_64
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS64
 #elif	defined(__i386__)
 #define	LIBELF_ARCH		EM_386
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS32
 #endif
 
 #endif	/* __DragonFly__ */
 
 #ifdef __FreeBSD__
 
 /*
  * Define LIBELF_{ARCH,BYTEORDER,CLASS} based on the machine architecture.
  * See also: <machine/elf.h>.
  */
 
 #if	defined(__amd64__)
 
 #define	LIBELF_ARCH		EM_X86_64
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS64
 
 #elif	defined(__aarch64__)
 
 #define	LIBELF_ARCH		EM_AARCH64
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS64
 
 #elif	defined(__arm__)
 
 #define	LIBELF_ARCH		EM_ARM
 #if	defined(__ARMEB__)	/* Big-endian ARM. */
 #define	LIBELF_BYTEORDER	ELFDATA2MSB
 #else
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #endif
 #define	LIBELF_CLASS		ELFCLASS32
 
 #elif	defined(__i386__)
 
 #define	LIBELF_ARCH		EM_386
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS32
 
 #elif	defined(__ia64__)
 
 #define	LIBELF_ARCH		EM_IA_64
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS64
 
 #elif	defined(__mips__)
 
 #define	LIBELF_ARCH		EM_MIPS
 #if	defined(__MIPSEB__)
 #define	LIBELF_BYTEORDER	ELFDATA2MSB
 #else
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #endif
 #define	LIBELF_CLASS		ELFCLASS32
 
 #elif	defined(__powerpc__)
 
 #define	LIBELF_ARCH		EM_PPC
 #define	LIBELF_BYTEORDER	ELFDATA2MSB
 #define	LIBELF_CLASS		ELFCLASS32
 
-#elif	defined(__riscv64)
+#elif	defined(__riscv) && (__riscv_xlen == 64)
 
 #define	LIBELF_ARCH		EM_RISCV
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS64
 
 #elif	defined(__sparc__)
 
 #define	LIBELF_ARCH		EM_SPARCV9
 #define	LIBELF_BYTEORDER	ELFDATA2MSB
 #define	LIBELF_CLASS		ELFCLASS64
 
 #else
 #error	Unknown FreeBSD architecture.
 #endif
 #endif  /* __FreeBSD__ */
 
 /*
  * Definitions for Minix3.
  */
 #ifdef __minix
 
 #define	LIBELF_ARCH		EM_386
 #define	LIBELF_BYTEORDER	ELFDATA2LSB
 #define	LIBELF_CLASS		ELFCLASS32
 
 #endif	/* __minix */
 
 #ifdef __NetBSD__
 
 #include <machine/elf_machdep.h>
 
 #if	!defined(ARCH_ELFSIZE)
 #error	ARCH_ELFSIZE is not defined.
 #endif
 
 #if	ARCH_ELFSIZE == 32
 #define	LIBELF_ARCH		ELF32_MACHDEP_ID
 #define	LIBELF_BYTEORDER	ELF32_MACHDEP_ENDIANNESS
 #define	LIBELF_CLASS		ELFCLASS32
 #define	Elf_Note		Elf32_Nhdr
 #else
 #define	LIBELF_ARCH		ELF64_MACHDEP_ID
 #define	LIBELF_BYTEORDER	ELF64_MACHDEP_ENDIANNESS
 #define	LIBELF_CLASS		ELFCLASS64
 #define	Elf_Note		Elf64_Nhdr
 #endif
 
 #endif	/* __NetBSD__ */
 
 #if defined(__OpenBSD__)
 
 #include <machine/exec.h>
 
 #define	LIBELF_ARCH		ELF_TARG_MACH
 #define	LIBELF_BYTEORDER	ELF_TARG_DATA
 #define	LIBELF_CLASS		ELF_TARG_CLASS
 
 #endif
 
 /*
  * GNU & Linux compatibility.
  *
  * `__linux__' is defined in an environment runs the Linux kernel and glibc.
  * `__GNU__' is defined in an environment runs a GNU kernel (Hurd) and glibc.
  * `__GLIBC__' is defined for an environment that runs glibc over a non-GNU
  *     kernel such as GNU/kFreeBSD.
  */
 
 #if defined(__linux__) || defined(__GNU__) || defined(__GLIBC__)
 
 #if defined(__linux__)
 
 #include "native-elf-format.h"
 
 #define	LIBELF_CLASS		ELFTC_CLASS
 #define	LIBELF_ARCH		ELFTC_ARCH
 #define	LIBELF_BYTEORDER	ELFTC_BYTEORDER
 
 #endif	/* defined(__linux__) */
 
 #if	LIBELF_CLASS == ELFCLASS32
 #define	Elf_Note		Elf32_Nhdr
 #elif   LIBELF_CLASS == ELFCLASS64
 #define	Elf_Note		Elf64_Nhdr
 #else
 #error  LIBELF_CLASS needs to be one of ELFCLASS32 or ELFCLASS64
 #endif
 
 #endif /* defined(__linux__) || defined(__GNU__) || defined(__GLIBC__) */
Index: head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
===================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h	(revision 322167)
+++ head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h	(revision 322168)
@@ -1,178 +1,178 @@
 #ifndef JEMALLOC_INTERNAL_TYPES_H
 #define JEMALLOC_INTERNAL_TYPES_H
 
 /* Page size index type. */
 typedef unsigned pszind_t;
 
 /* Size class index type. */
 typedef unsigned szind_t;
 
 /* Processor / core id type. */
 typedef int malloc_cpuid_t;
 
 /*
  * Flags bits:
  *
  * a: arena
  * t: tcache
  * 0: unused
  * z: zero
  * n: alignment
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
 #define MALLOCX_ARENA_BITS	12
 #define MALLOCX_TCACHE_BITS	12
 #define MALLOCX_LG_ALIGN_BITS	6
 #define MALLOCX_ARENA_SHIFT	20
 #define MALLOCX_TCACHE_SHIFT	8
 #define MALLOCX_ARENA_MASK \
     (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
 #define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
 #define MALLOCX_TCACHE_MASK \
     (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
 #define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
 #define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
 #define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
     (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
 #define MALLOCX_ALIGN_GET(flags)					\
     (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
 #define MALLOCX_ZERO_GET(flags)						\
     ((bool)(flags & MALLOCX_ZERO))
 
 #define MALLOCX_TCACHE_GET(flags)					\
     (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
 #define MALLOCX_ARENA_GET(flags)					\
     (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
 #define TINY_MIN		(1U << LG_TINY_MIN)
 
 /*
  * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
  * classes).
  */
 #ifndef LG_QUANTUM
 #  if (defined(__i386__) || defined(_M_IX86))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __ia64__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __arm__
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __aarch64__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __hppa__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __mips__
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __riscv__
+#  ifdef __riscv
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __SH4__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __tile__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __le32__
 #    define LG_QUANTUM		4
 #  endif
 #  ifndef LG_QUANTUM
 #    error "Unknown minimum alignment for architecture; specify via "
 	 "--with-lg-quantum"
 #  endif
 #endif
 
 #define QUANTUM			((size_t)(1U << LG_QUANTUM))
 #define QUANTUM_MASK		(QUANTUM - 1)
 
 /* Return the smallest quantum multiple that is >= a. */
 #define QUANTUM_CEILING(a)						\
 	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
 #define LONG			((size_t)(1U << LG_SIZEOF_LONG))
 #define LONG_MASK		(LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
 #define LONG_CEILING(a)							\
 	(((a) + LONG_MASK) & ~LONG_MASK)
 
 #define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
 #define PTR_MASK		(SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
 #define PTR_CEILING(a)							\
 	(((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
  * In addition, this controls the spacing of cacheline-spaced size classes.
  *
  * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
  * only handle raw constants.
  */
 #define LG_CACHELINE		6
 #define CACHELINE		64
 #define CACHELINE_MASK		(CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
 #define CACHELINE_CEILING(s)						\
 	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
 /* Return the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2BASE(a, alignment)				\
 	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
 	((size_t)((uintptr_t)(a) & (alignment - 1)))
 
 /* Return the smallest alignment multiple that is >= s. */
 #define ALIGNMENT_CEILING(s, alignment)					\
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L
 #  ifdef _MSC_VER
 #    include <malloc.h>
 #    define alloca _alloca
 #  else
 #    ifdef JEMALLOC_HAS_ALLOCA_H
 #      include <alloca.h>
 #    else
 #      include <stdlib.h>
 #    endif
 #  endif
 #  define VARIABLE_ARRAY(type, name, count) \
 	type *name = alloca(sizeof(type) * (count))
 #else
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
Index: head/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h
===================================================================
--- head/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h	(revision 322167)
+++ head/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h	(revision 322168)
@@ -1,185 +1,185 @@
 /*
  * Override settings that were generated in jemalloc_defs.h as necessary.
  */
 
 #undef JEMALLOC_OVERRIDE_VALLOC
 
 #ifndef MALLOC_PRODUCTION
 #define	JEMALLOC_DEBUG
 #endif
 
 #undef JEMALLOC_DSS
 
 #undef JEMALLOC_BACKGROUND_THREAD
 
 /*
  * The following are architecture-dependent, so conditionally define them for
  * each supported architecture.
  */
 #undef JEMALLOC_TLS_MODEL
 #undef STATIC_PAGE_SHIFT
 #undef LG_VADDR
 #undef LG_SIZEOF_PTR
 #undef LG_SIZEOF_INT
 #undef LG_SIZEOF_LONG
 #undef LG_SIZEOF_INTMAX_T
 
 #ifdef __i386__
 #  define LG_VADDR		32
 #  define LG_SIZEOF_PTR		2
 #  define JEMALLOC_TLS_MODEL	__attribute__((tls_model("initial-exec")))
 #endif
 #ifdef __ia64__
 #  define LG_VADDR		64
 #  define LG_SIZEOF_PTR		3
 #endif
 #ifdef __sparc64__
 #  define LG_VADDR		64
 #  define LG_SIZEOF_PTR		3
 #  define JEMALLOC_TLS_MODEL	__attribute__((tls_model("initial-exec")))
 #endif
 #ifdef __amd64__
 #  define LG_VADDR		48
 #  define LG_SIZEOF_PTR		3
 #  define JEMALLOC_TLS_MODEL	__attribute__((tls_model("initial-exec")))
 #endif
 #ifdef __arm__
 #  define LG_VADDR		32
 #  define LG_SIZEOF_PTR		2
 #endif
 #ifdef __aarch64__
 #  define LG_VADDR		48
 #  define LG_SIZEOF_PTR		3
 #endif
 #ifdef __mips__
 #ifdef __mips_n64
 #  define LG_VADDR		64
 #  define LG_SIZEOF_PTR		3
 #else
 #  define LG_VADDR		32
 #  define LG_SIZEOF_PTR		2
 #endif
 #endif
 #ifdef __powerpc64__
 #  define LG_VADDR		64
 #  define LG_SIZEOF_PTR		3
 #elif defined(__powerpc__)
 #  define LG_VADDR		32
 #  define LG_SIZEOF_PTR		2
 #endif
-#ifdef __riscv__
+#ifdef __riscv
 #  define LG_VADDR		64
 #  define LG_SIZEOF_PTR		3
 #endif
 
 #ifndef JEMALLOC_TLS_MODEL
 #  define JEMALLOC_TLS_MODEL	/* Default. */
 #endif
 
 #define	STATIC_PAGE_SHIFT	PAGE_SHIFT
 #define	LG_SIZEOF_INT		2
 #define	LG_SIZEOF_LONG		LG_SIZEOF_PTR
 #define	LG_SIZEOF_INTMAX_T	3
 
 #undef CPU_SPINWAIT
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #define	CPU_SPINWAIT		cpu_spinwait()
 
 /* Disable lazy-lock machinery, mangle isthreaded, and adjust its type. */
 #undef JEMALLOC_LAZY_LOCK
 extern int __isthreaded;
 #define	isthreaded		((bool)__isthreaded)
 
 /* Mangle. */
 #undef je_malloc
 #undef je_calloc
 #undef je_posix_memalign
 #undef je_aligned_alloc
 #undef je_realloc
 #undef je_free
 #undef je_malloc_usable_size
 #undef je_mallocx
 #undef je_rallocx
 #undef je_xallocx
 #undef je_sallocx
 #undef je_dallocx
 #undef je_sdallocx
 #undef je_nallocx
 #undef je_mallctl
 #undef je_mallctlnametomib
 #undef je_mallctlbymib
 #undef je_malloc_stats_print
 #undef je_allocm
 #undef je_rallocm
 #undef je_sallocm
 #undef je_dallocm
 #undef je_nallocm
 #define	je_malloc		__malloc
 #define	je_calloc		__calloc
 #define	je_posix_memalign	__posix_memalign
 #define	je_aligned_alloc	__aligned_alloc
 #define	je_realloc		__realloc
 #define	je_free			__free
 #define	je_malloc_usable_size	__malloc_usable_size
 #define	je_mallocx		__mallocx
 #define	je_rallocx		__rallocx
 #define	je_xallocx		__xallocx
 #define	je_sallocx		__sallocx
 #define	je_dallocx		__dallocx
 #define	je_sdallocx		__sdallocx
 #define	je_nallocx		__nallocx
 #define	je_mallctl		__mallctl
 #define	je_mallctlnametomib	__mallctlnametomib
 #define	je_mallctlbymib		__mallctlbymib
 #define	je_malloc_stats_print	__malloc_stats_print
 #define	je_allocm		__allocm
 #define	je_rallocm		__rallocm
 #define	je_sallocm		__sallocm
 #define	je_dallocm		__dallocm
 #define	je_nallocm		__nallocm
 #define	open			_open
 #define	read			_read
 #define	write			_write
 #define	close			_close
 #define	pthread_join		_pthread_join
 #define	pthread_once		_pthread_once
 #define	pthread_self		_pthread_self
 #define	pthread_equal		_pthread_equal
 #define	pthread_mutex_lock	_pthread_mutex_lock
 #define	pthread_mutex_trylock	_pthread_mutex_trylock
 #define	pthread_mutex_unlock	_pthread_mutex_unlock
 #define	pthread_cond_init	_pthread_cond_init
 #define	pthread_cond_wait	_pthread_cond_wait
 #define	pthread_cond_timedwait	_pthread_cond_timedwait
 #define	pthread_cond_signal	_pthread_cond_signal
 
 #ifdef JEMALLOC_C_
 /*
  * Define 'weak' symbols so that an application can have its own versions
  * of malloc, calloc, realloc, free, et al.
  */
 __weak_reference(__malloc, malloc);
 __weak_reference(__calloc, calloc);
 __weak_reference(__posix_memalign, posix_memalign);
 __weak_reference(__aligned_alloc, aligned_alloc);
 __weak_reference(__realloc, realloc);
 __weak_reference(__free, free);
 __weak_reference(__malloc_usable_size, malloc_usable_size);
 __weak_reference(__mallocx, mallocx);
 __weak_reference(__rallocx, rallocx);
 __weak_reference(__xallocx, xallocx);
 __weak_reference(__sallocx, sallocx);
 __weak_reference(__dallocx, dallocx);
 __weak_reference(__sdallocx, sdallocx);
 __weak_reference(__nallocx, nallocx);
 __weak_reference(__mallctl, mallctl);
 __weak_reference(__mallctlnametomib, mallctlnametomib);
 __weak_reference(__mallctlbymib, mallctlbymib);
 __weak_reference(__malloc_stats_print, malloc_stats_print);
 __weak_reference(__allocm, allocm);
 __weak_reference(__rallocm, rallocm);
 __weak_reference(__sallocm, sallocm);
 __weak_reference(__dallocm, dallocm);
 __weak_reference(__nallocm, nallocm);
 #endif
Index: head/contrib/llvm/projects/libunwind/include/__libunwind_config.h
===================================================================
--- head/contrib/llvm/projects/libunwind/include/__libunwind_config.h	(revision 322167)
+++ head/contrib/llvm/projects/libunwind/include/__libunwind_config.h	(revision 322168)
@@ -1,71 +1,71 @@
 //===------------------------- __libunwind_config.h -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef ____LIBUNWIND_CONFIG_H__
 #define ____LIBUNWIND_CONFIG_H__
 
 #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
     !defined(__ARM_DWARF_EH__)
 #define _LIBUNWIND_ARM_EHABI 1
 #else
 #define _LIBUNWIND_ARM_EHABI 0
 #endif
 
 #if defined(_LIBUNWIND_IS_NATIVE_ONLY)
 # if defined(__i386__)
 #  define _LIBUNWIND_TARGET_I386 1
 #  define _LIBUNWIND_CONTEXT_SIZE 8
 #  define _LIBUNWIND_CURSOR_SIZE 19
 #  define _LIBUNWIND_MAX_REGISTER 9
 # elif defined(__x86_64__)
 #  define _LIBUNWIND_TARGET_X86_64 1
 #  define _LIBUNWIND_CONTEXT_SIZE 21
 #  define _LIBUNWIND_CURSOR_SIZE 33
 #  define _LIBUNWIND_MAX_REGISTER 17
 # elif defined(__ppc__)
 #  define _LIBUNWIND_TARGET_PPC 1
 #  define _LIBUNWIND_CONTEXT_SIZE 117
 #  define _LIBUNWIND_CURSOR_SIZE 128
 #  define _LIBUNWIND_MAX_REGISTER 113
 # elif defined(__aarch64__)
 #  define _LIBUNWIND_TARGET_AARCH64 1
 #  define _LIBUNWIND_CONTEXT_SIZE 66
 #  define _LIBUNWIND_CURSOR_SIZE 78
 #  define _LIBUNWIND_MAX_REGISTER 96
 # elif defined(__arm__)
 #  define _LIBUNWIND_TARGET_ARM 1
 #  define _LIBUNWIND_CONTEXT_SIZE 60
 #  define _LIBUNWIND_CURSOR_SIZE 67
 #  define _LIBUNWIND_MAX_REGISTER 96
 # elif defined(__or1k__)
 #  define _LIBUNWIND_TARGET_OR1K 1
 #  define _LIBUNWIND_CONTEXT_SIZE 16
 #  define _LIBUNWIND_CURSOR_SIZE 28
 #  define _LIBUNWIND_MAX_REGISTER 32
-# elif defined(__riscv__)
+# elif defined(__riscv)
 #  define _LIBUNWIND_TARGET_RISCV 1
 #  define _LIBUNWIND_CONTEXT_SIZE 64
 #  define _LIBUNWIND_CURSOR_SIZE 76
 #  define _LIBUNWIND_MAX_REGISTER 96
 # else
 #  error "Unsupported architecture."
 # endif
 #else // !_LIBUNWIND_IS_NATIVE_ONLY
 # define _LIBUNWIND_TARGET_I386 1
 # define _LIBUNWIND_TARGET_X86_64 1
 # define _LIBUNWIND_TARGET_PPC 1
 # define _LIBUNWIND_TARGET_AARCH64 1
 # define _LIBUNWIND_TARGET_ARM 1
 # define _LIBUNWIND_TARGET_OR1K 1
 # define _LIBUNWIND_CONTEXT_SIZE 128
 # define _LIBUNWIND_CURSOR_SIZE 140
 # define _LIBUNWIND_MAX_REGISTER 120
 #endif // _LIBUNWIND_IS_NATIVE_ONLY
 
 #endif // ____LIBUNWIND_CONFIG_H__
Index: head/contrib/llvm/projects/libunwind/src/UnwindRegistersRestore.S
===================================================================
--- head/contrib/llvm/projects/libunwind/src/UnwindRegistersRestore.S	(revision 322167)
+++ head/contrib/llvm/projects/libunwind/src/UnwindRegistersRestore.S	(revision 322168)
@@ -1,531 +1,531 @@
 //===-------------------- UnwindRegistersRestore.S ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "assembly.h"
 
   .text
 
 #if defined(__i386__)
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv)
 #
 # void libunwind::Registers_x86::jumpto()
 #
 # On entry:
 #  +                       +
 #  +-----------------------+
 #  + thread_state pointer  +
 #  +-----------------------+
 #  + return address        +
 #  +-----------------------+   <-- SP
 #  +                       +
   movl   4(%esp), %eax
   # set up eax and ret on new stack location
   movl  28(%eax), %edx # edx holds new stack pointer
   subl  $8,%edx
   movl  %edx, 28(%eax)
   movl  0(%eax), %ebx
   movl  %ebx, 0(%edx)
   movl  40(%eax), %ebx
   movl  %ebx, 4(%edx)
   # we now have ret and eax pushed onto where new stack will be
   # restore all registers
   movl   4(%eax), %ebx
   movl   8(%eax), %ecx
   movl  12(%eax), %edx
   movl  16(%eax), %edi
   movl  20(%eax), %esi
   movl  24(%eax), %ebp
   movl  28(%eax), %esp
   # skip ss
   # skip eflags
   pop    %eax  # eax was already pushed on new stack
   ret        # eip was already pushed on new stack
   # skip cs
   # skip ds
   # skip es
   # skip fs
   # skip gs
 
 #elif defined(__x86_64__)
 
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv)
 #
 # void libunwind::Registers_x86_64::jumpto()
 #
 # On entry, thread_state pointer is in rdi
 
   movq  56(%rdi), %rax # rax holds new stack pointer
   subq  $16, %rax
   movq  %rax, 56(%rdi)
   movq  32(%rdi), %rbx  # store new rdi on new stack
   movq  %rbx, 0(%rax)
   movq  128(%rdi), %rbx # store new rip on new stack
   movq  %rbx, 8(%rax)
   # restore all registers
   movq    0(%rdi), %rax
   movq    8(%rdi), %rbx
   movq   16(%rdi), %rcx
   movq   24(%rdi), %rdx
   # restore rdi later
   movq   40(%rdi), %rsi
   movq   48(%rdi), %rbp
   # restore rsp later
   movq   64(%rdi), %r8
   movq   72(%rdi), %r9
   movq   80(%rdi), %r10
   movq   88(%rdi), %r11
   movq   96(%rdi), %r12
   movq  104(%rdi), %r13
   movq  112(%rdi), %r14
   movq  120(%rdi), %r15
   # skip rflags
   # skip cs
   # skip fs
   # skip gs
   movq  56(%rdi), %rsp  # cut back rsp to new location
   pop    %rdi      # rdi was saved here earlier
   ret            # rip was saved here
 
 
 #elif defined(__ppc__)
 
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
 ;
 ; void libunwind::Registers_ppc::jumpto()
 ;
 ; On entry:
 ;  thread_state pointer is in r3
 ;
 
   ; restore integral registerrs
   ; skip r0 for now
   ; skip r1 for now
   lwz     r2, 16(r3)
   ; skip r3 for now
   ; skip r4 for now
   ; skip r5 for now
   lwz     r6, 32(r3)
   lwz     r7, 36(r3)
   lwz     r8, 40(r3)
   lwz     r9, 44(r3)
   lwz    r10, 48(r3)
   lwz    r11, 52(r3)
   lwz    r12, 56(r3)
   lwz    r13, 60(r3)
   lwz    r14, 64(r3)
   lwz    r15, 68(r3)
   lwz    r16, 72(r3)
   lwz    r17, 76(r3)
   lwz    r18, 80(r3)
   lwz    r19, 84(r3)
   lwz    r20, 88(r3)
   lwz    r21, 92(r3)
   lwz    r22, 96(r3)
   lwz    r23,100(r3)
   lwz    r24,104(r3)
   lwz    r25,108(r3)
   lwz    r26,112(r3)
   lwz    r27,116(r3)
   lwz    r28,120(r3)
   lwz    r29,124(r3)
   lwz    r30,128(r3)
   lwz    r31,132(r3)
 
   ; restore float registers
   lfd    f0, 160(r3)
   lfd    f1, 168(r3)
   lfd    f2, 176(r3)
   lfd    f3, 184(r3)
   lfd    f4, 192(r3)
   lfd    f5, 200(r3)
   lfd    f6, 208(r3)
   lfd    f7, 216(r3)
   lfd    f8, 224(r3)
   lfd    f9, 232(r3)
   lfd    f10,240(r3)
   lfd    f11,248(r3)
   lfd    f12,256(r3)
   lfd    f13,264(r3)
   lfd    f14,272(r3)
   lfd    f15,280(r3)
   lfd    f16,288(r3)
   lfd    f17,296(r3)
   lfd    f18,304(r3)
   lfd    f19,312(r3)
   lfd    f20,320(r3)
   lfd    f21,328(r3)
   lfd    f22,336(r3)
   lfd    f23,344(r3)
   lfd    f24,352(r3)
   lfd    f25,360(r3)
   lfd    f26,368(r3)
   lfd    f27,376(r3)
   lfd    f28,384(r3)
   lfd    f29,392(r3)
   lfd    f30,400(r3)
   lfd    f31,408(r3)
 
   ; restore vector registers if any are in use
   lwz    r5,156(r3)  ; test VRsave
   cmpwi  r5,0
   beq    Lnovec
 
   subi  r4,r1,16
   rlwinm  r4,r4,0,0,27  ; mask low 4-bits
   ; r4 is now a 16-byte aligned pointer into the red zone
   ; the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer
 
 
 #define LOAD_VECTOR_UNALIGNEDl(_index) \
   andis.  r0,r5,(1<<(15-_index))  @\
   beq    Ldone  ## _index     @\
   lwz    r0, 424+_index*16(r3)  @\
   stw    r0, 0(r4)        @\
   lwz    r0, 424+_index*16+4(r3)  @\
   stw    r0, 4(r4)        @\
   lwz    r0, 424+_index*16+8(r3)  @\
   stw    r0, 8(r4)        @\
   lwz    r0, 424+_index*16+12(r3)@\
   stw    r0, 12(r4)        @\
   lvx    v ## _index,0,r4    @\
 Ldone  ## _index:
 
 #define LOAD_VECTOR_UNALIGNEDh(_index) \
   andi.  r0,r5,(1<<(31-_index))  @\
   beq    Ldone  ## _index    @\
   lwz    r0, 424+_index*16(r3)  @\
   stw    r0, 0(r4)        @\
   lwz    r0, 424+_index*16+4(r3)  @\
   stw    r0, 4(r4)        @\
   lwz    r0, 424+_index*16+8(r3)  @\
   stw    r0, 8(r4)        @\
   lwz    r0, 424+_index*16+12(r3)@\
   stw    r0, 12(r4)        @\
   lvx    v ## _index,0,r4    @\
   Ldone  ## _index:
 
 
   LOAD_VECTOR_UNALIGNEDl(0)
   LOAD_VECTOR_UNALIGNEDl(1)
   LOAD_VECTOR_UNALIGNEDl(2)
   LOAD_VECTOR_UNALIGNEDl(3)
   LOAD_VECTOR_UNALIGNEDl(4)
   LOAD_VECTOR_UNALIGNEDl(5)
   LOAD_VECTOR_UNALIGNEDl(6)
   LOAD_VECTOR_UNALIGNEDl(7)
   LOAD_VECTOR_UNALIGNEDl(8)
   LOAD_VECTOR_UNALIGNEDl(9)
   LOAD_VECTOR_UNALIGNEDl(10)
   LOAD_VECTOR_UNALIGNEDl(11)
   LOAD_VECTOR_UNALIGNEDl(12)
   LOAD_VECTOR_UNALIGNEDl(13)
   LOAD_VECTOR_UNALIGNEDl(14)
   LOAD_VECTOR_UNALIGNEDl(15)
   LOAD_VECTOR_UNALIGNEDh(16)
   LOAD_VECTOR_UNALIGNEDh(17)
   LOAD_VECTOR_UNALIGNEDh(18)
   LOAD_VECTOR_UNALIGNEDh(19)
   LOAD_VECTOR_UNALIGNEDh(20)
   LOAD_VECTOR_UNALIGNEDh(21)
   LOAD_VECTOR_UNALIGNEDh(22)
   LOAD_VECTOR_UNALIGNEDh(23)
   LOAD_VECTOR_UNALIGNEDh(24)
   LOAD_VECTOR_UNALIGNEDh(25)
   LOAD_VECTOR_UNALIGNEDh(26)
   LOAD_VECTOR_UNALIGNEDh(27)
   LOAD_VECTOR_UNALIGNEDh(28)
   LOAD_VECTOR_UNALIGNEDh(29)
   LOAD_VECTOR_UNALIGNEDh(30)
   LOAD_VECTOR_UNALIGNEDh(31)
 
 Lnovec:
   lwz    r0, 136(r3) ; __cr
   mtocrf  255,r0
   lwz    r0, 148(r3) ; __ctr
   mtctr  r0
   lwz    r0, 0(r3)  ; __ssr0
   mtctr  r0
   lwz    r0, 8(r3)  ; do r0 now
   lwz    r5,28(r3)  ; do r5 now
   lwz    r4,24(r3)  ; do r4 now
   lwz    r1,12(r3)  ; do sp now
   lwz    r3,20(r3)  ; do r3 last
   bctr
 
 #elif defined(__arm64__) || defined(__aarch64__)
 
 //
 // void libunwind::Registers_arm64::jumpto()
 //
 // On entry:
 //  thread_state pointer is in x0
 //
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv)
   // skip restore of x0,x1 for now
   ldp    x2, x3,  [x0, #0x010]
   ldp    x4, x5,  [x0, #0x020]
   ldp    x6, x7,  [x0, #0x030]
   ldp    x8, x9,  [x0, #0x040]
   ldp    x10,x11, [x0, #0x050]
   ldp    x12,x13, [x0, #0x060]
   ldp    x14,x15, [x0, #0x070]
   ldp    x16,x17, [x0, #0x080]
   ldp    x18,x19, [x0, #0x090]
   ldp    x20,x21, [x0, #0x0A0]
   ldp    x22,x23, [x0, #0x0B0]
   ldp    x24,x25, [x0, #0x0C0]
   ldp    x26,x27, [x0, #0x0D0]
   ldp    x28,x29, [x0, #0x0E0]
   ldr    x30,     [x0, #0x100]  // restore pc into lr
   ldr    x1,      [x0, #0x0F8]
   mov    sp,x1                  // restore sp
 
   ldp    d0, d1,  [x0, #0x110]
   ldp    d2, d3,  [x0, #0x120]
   ldp    d4, d5,  [x0, #0x130]
   ldp    d6, d7,  [x0, #0x140]
   ldp    d8, d9,  [x0, #0x150]
   ldp    d10,d11, [x0, #0x160]
   ldp    d12,d13, [x0, #0x170]
   ldp    d14,d15, [x0, #0x180]
   ldp    d16,d17, [x0, #0x190]
   ldp    d18,d19, [x0, #0x1A0]
   ldp    d20,d21, [x0, #0x1B0]
   ldp    d22,d23, [x0, #0x1C0]
   ldp    d24,d25, [x0, #0x1D0]
   ldp    d26,d27, [x0, #0x1E0]
   ldp    d28,d29, [x0, #0x1F0]
   ldr    d30,     [x0, #0x200]
   ldr    d31,     [x0, #0x208]
 
   ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
   ret    x30                    // jump to pc
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
 #if !defined(__ARM_ARCH_ISA_ARM)
   .thumb
 #endif
 
 @
 @ void libunwind::Registers_arm::restoreCoreAndJumpTo()
 @
 @ On entry:
 @  thread_state pointer is in r0
 @
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv)
 #if !defined(__ARM_ARCH_ISA_ARM)
   ldr r2, [r0, #52]
   ldr r3, [r0, #60]
   mov sp, r2
   mov lr, r3         @ restore pc into lr
   ldm r0, {r0-r7}
 #else
   @ Use lr as base so that r0 can be restored.
   mov lr, r0
   @ 32bit thumb-2 restrictions for ldm:
   @ . the sp (r13) cannot be in the list
   @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction
   ldm lr, {r0-r12}
   ldr sp, [lr, #52]
   ldr lr, [lr, #60]  @ restore pc into lr
 #endif
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3-d16
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPy)
   @ VFP and iwMMX instructions are only available when compiling with the flags
   @ that enable them. We do not want to do that in the library (because we do not
   @ want the compiler to generate instructions that access those) but this is
   @ only accessed if the personality routine needs these registers. Use of
   @ these registers implies they are, actually, available on the target, so
   @ it's ok to execute.
   @ So, generate the instruction using the corresponding coprocessor mnemonic.
   vldmia r0, {d0-d15}
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3-d16
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPy)
   vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPy)
   vldmia r0, {d16-d31}
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPy)
 #if (!defined(__ARM_ARCH_6M__) && !defined(__ARM_ARCH_6SM__)) || defined(__ARM_WMMX)
   ldcl p1, cr0, [r0], #8  @ wldrd wR0, [r0], #8
   ldcl p1, cr1, [r0], #8  @ wldrd wR1, [r0], #8
   ldcl p1, cr2, [r0], #8  @ wldrd wR2, [r0], #8
   ldcl p1, cr3, [r0], #8  @ wldrd wR3, [r0], #8
   ldcl p1, cr4, [r0], #8  @ wldrd wR4, [r0], #8
   ldcl p1, cr5, [r0], #8  @ wldrd wR5, [r0], #8
   ldcl p1, cr6, [r0], #8  @ wldrd wR6, [r0], #8
   ldcl p1, cr7, [r0], #8  @ wldrd wR7, [r0], #8
   ldcl p1, cr8, [r0], #8  @ wldrd wR8, [r0], #8
   ldcl p1, cr9, [r0], #8  @ wldrd wR9, [r0], #8
   ldcl p1, cr10, [r0], #8  @ wldrd wR10, [r0], #8
   ldcl p1, cr11, [r0], #8  @ wldrd wR11, [r0], #8
   ldcl p1, cr12, [r0], #8  @ wldrd wR12, [r0], #8
   ldcl p1, cr13, [r0], #8  @ wldrd wR13, [r0], #8
   ldcl p1, cr14, [r0], #8  @ wldrd wR14, [r0], #8
   ldcl p1, cr15, [r0], #8  @ wldrd wR15, [r0], #8
 #endif
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj)
 #if (!defined(__ARM_ARCH_6M__) && !defined(__ARM_ARCH_6SM__)) || defined(__ARM_WMMX)
   ldc2 p1, cr8, [r0], #4  @ wldrw wCGR0, [r0], #4
   ldc2 p1, cr9, [r0], #4  @ wldrw wCGR1, [r0], #4
   ldc2 p1, cr10, [r0], #4  @ wldrw wCGR2, [r0], #4
   ldc2 p1, cr11, [r0], #4  @ wldrw wCGR3, [r0], #4
 #endif
   JMP(lr)
 
 #elif defined(__or1k__)
 
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv)
 #
 # void libunwind::Registers_or1k::jumpto()
 #
 # On entry:
 #  thread_state pointer is in r3
 #
 
   # restore integral registerrs
   l.lwz     r0,  0(r3)
   l.lwz     r1,  4(r3)
   l.lwz     r2,  8(r3)
   # skip r3 for now
   l.lwz     r4, 16(r3)
   l.lwz     r5, 20(r3)
   l.lwz     r6, 24(r3)
   l.lwz     r7, 28(r3)
   l.lwz     r8, 32(r3)
   l.lwz     r9, 36(r3)
   l.lwz    r10, 40(r3)
   l.lwz    r11, 44(r3)
   l.lwz    r12, 48(r3)
   l.lwz    r13, 52(r3)
   l.lwz    r14, 56(r3)
   l.lwz    r15, 60(r3)
   l.lwz    r16, 64(r3)
   l.lwz    r17, 68(r3)
   l.lwz    r18, 72(r3)
   l.lwz    r19, 76(r3)
   l.lwz    r20, 80(r3)
   l.lwz    r21, 84(r3)
   l.lwz    r22, 88(r3)
   l.lwz    r23, 92(r3)
   l.lwz    r24, 96(r3)
   l.lwz    r25,100(r3)
   l.lwz    r26,104(r3)
   l.lwz    r27,108(r3)
   l.lwz    r28,112(r3)
   l.lwz    r29,116(r3)
   l.lwz    r30,120(r3)
   l.lwz    r31,124(r3)
 
   # at last, restore r3
   l.lwz    r3,  12(r3)
 
   # jump to pc
   l.jr     r9
    l.nop
 
-#elif defined(__riscv__)
+#elif defined(__riscv)
 
 //
 // void libunwind::Registers_riscv::jumpto()
 //
 // On entry:
 //  thread_state pointer is in a0
 //
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_riscv6jumptoEv)
   // x0 is zero
   ld    x1, (8 * 1)(a0)
   ld    x2, (8 * 2)(a0)
   ld    x3, (8 * 3)(a0)
   ld    x4, (8 * 4)(a0)
   ld    x5, (8 * 5)(a0)
   ld    x6, (8 * 6)(a0)
   ld    x7, (8 * 7)(a0)
   ld    x8, (8 * 8)(a0)
   ld    x9, (8 * 9)(a0)
   // skip a0 for now
   ld    x11, (8 * 11)(a0)
   ld    x12, (8 * 12)(a0)
   ld    x13, (8 * 13)(a0)
   ld    x14, (8 * 14)(a0)
   ld    x15, (8 * 15)(a0)
   ld    x16, (8 * 16)(a0)
   ld    x17, (8 * 17)(a0)
   ld    x18, (8 * 18)(a0)
   ld    x19, (8 * 19)(a0)
   ld    x20, (8 * 20)(a0)
   ld    x21, (8 * 21)(a0)
   ld    x22, (8 * 22)(a0)
   ld    x23, (8 * 23)(a0)
   ld    x24, (8 * 24)(a0)
   ld    x25, (8 * 25)(a0)
   ld    x26, (8 * 26)(a0)
   ld    x27, (8 * 27)(a0)
   ld    x28, (8 * 28)(a0)
   ld    x29, (8 * 29)(a0)
   ld    x30, (8 * 30)(a0)
   ld    x31, (8 * 31)(a0)
   ld    x10, (8 * 10)(a0)   // restore a0
 
   /* RISCVTODO: restore FPU registers */
 
   ret                       // jump to ra
 
 #endif
 
   .section .note.GNU-stack,"",@progbits
Index: head/contrib/llvm/projects/libunwind/src/UnwindRegistersSave.S
===================================================================
--- head/contrib/llvm/projects/libunwind/src/UnwindRegistersSave.S	(revision 322167)
+++ head/contrib/llvm/projects/libunwind/src/UnwindRegistersSave.S	(revision 322168)
@@ -1,473 +1,473 @@
 //===------------------------ UnwindRegistersSave.S -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "assembly.h"
 
     .text
 
 #if defined(__i386__)
 
 #
 # extern int unw_getcontext(unw_context_t* thread_state)
 #
 # On entry:
 #   +                       +
 #   +-----------------------+
 #   + thread_state pointer  +
 #   +-----------------------+
 #   + return address        +
 #   +-----------------------+   <-- SP
 #   +                       +
 #
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   push  %eax
   movl  8(%esp), %eax
   movl  %ebx,  4(%eax)
   movl  %ecx,  8(%eax)
   movl  %edx, 12(%eax)
   movl  %edi, 16(%eax)
   movl  %esi, 20(%eax)
   movl  %ebp, 24(%eax)
   movl  %esp, %edx
   addl  $8, %edx
   movl  %edx, 28(%eax)  # store what sp was at call site as esp
   # skip ss
   # skip eflags
   movl  4(%esp), %edx
   movl  %edx, 40(%eax)  # store return address as eip
   # skip cs
   # skip ds
   # skip es
   # skip fs
   # skip gs
   movl  (%esp), %edx
   movl  %edx, (%eax)  # store original eax
   popl  %eax
   xorl  %eax, %eax    # return UNW_ESUCCESS
   ret
 
 #elif defined(__x86_64__)
 
 #
 # extern int unw_getcontext(unw_context_t* thread_state)
 #
 # On entry:
 #  thread_state pointer is in rdi
 #
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   movq  %rax,   (%rdi)
   movq  %rbx,  8(%rdi)
   movq  %rcx, 16(%rdi)
   movq  %rdx, 24(%rdi)
   movq  %rdi, 32(%rdi)
   movq  %rsi, 40(%rdi)
   movq  %rbp, 48(%rdi)
   movq  %rsp, 56(%rdi)
   addq  $8,   56(%rdi)
   movq  %r8,  64(%rdi)
   movq  %r9,  72(%rdi)
   movq  %r10, 80(%rdi)
   movq  %r11, 88(%rdi)
   movq  %r12, 96(%rdi)
   movq  %r13,104(%rdi)
   movq  %r14,112(%rdi)
   movq  %r15,120(%rdi)
   movq  (%rsp),%rsi
   movq  %rsi,128(%rdi) # store return address as rip
   # skip rflags
   # skip cs
   # skip fs
   # skip gs
   xorl  %eax, %eax    # return UNW_ESUCCESS
   ret
 
 # elif defined(__mips__)
 
 #
 # extern int unw_getcontext(unw_context_t* thread_state)
 #
 # Just trap for the time being.
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   teq $0, $0
 
 #elif defined(__ppc__)
 
 ;
 ; extern int unw_getcontext(unw_context_t* thread_state)
 ;
 ; On entry:
 ;  thread_state pointer is in r3
 ;
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   stw    r0,  8(r3)
   mflr  r0
   stw    r0,  0(r3)  ; store lr as ssr0
   stw    r1, 12(r3)
   stw    r2, 16(r3)
   stw    r3, 20(r3)
   stw    r4, 24(r3)
   stw    r5, 28(r3)
   stw    r6, 32(r3)
   stw    r7, 36(r3)
   stw    r8, 40(r3)
   stw    r9, 44(r3)
   stw     r10, 48(r3)
   stw     r11, 52(r3)
   stw     r12, 56(r3)
   stw     r13, 60(r3)
   stw     r14, 64(r3)
   stw     r15, 68(r3)
   stw     r16, 72(r3)
   stw     r17, 76(r3)
   stw     r18, 80(r3)
   stw     r19, 84(r3)
   stw     r20, 88(r3)
   stw     r21, 92(r3)
   stw     r22, 96(r3)
   stw     r23,100(r3)
   stw     r24,104(r3)
   stw     r25,108(r3)
   stw     r26,112(r3)
   stw     r27,116(r3)
   stw     r28,120(r3)
   stw     r29,124(r3)
   stw     r30,128(r3)
   stw     r31,132(r3)
 
   ; save VRSave register
   mfspr  r0,256
   stw    r0,156(r3)
   ; save CR registers
   mfcr  r0
   stw    r0,136(r3)
   ; save CTR register
   mfctr  r0
   stw    r0,148(r3)
 
   ; save float registers
   stfd    f0, 160(r3)
   stfd    f1, 168(r3)
   stfd    f2, 176(r3)
   stfd    f3, 184(r3)
   stfd    f4, 192(r3)
   stfd    f5, 200(r3)
   stfd    f6, 208(r3)
   stfd    f7, 216(r3)
   stfd    f8, 224(r3)
   stfd    f9, 232(r3)
   stfd    f10,240(r3)
   stfd    f11,248(r3)
   stfd    f12,256(r3)
   stfd    f13,264(r3)
   stfd    f14,272(r3)
   stfd    f15,280(r3)
   stfd    f16,288(r3)
   stfd    f17,296(r3)
   stfd    f18,304(r3)
   stfd    f19,312(r3)
   stfd    f20,320(r3)
   stfd    f21,328(r3)
   stfd    f22,336(r3)
   stfd    f23,344(r3)
   stfd    f24,352(r3)
   stfd    f25,360(r3)
   stfd    f26,368(r3)
   stfd    f27,376(r3)
   stfd    f28,384(r3)
   stfd    f29,392(r3)
   stfd    f30,400(r3)
   stfd    f31,408(r3)
 
 
   ; save vector registers
 
   subi  r4,r1,16
   rlwinm  r4,r4,0,0,27  ; mask low 4-bits
   ; r4 is now a 16-byte aligned pointer into the red zone
 
 #define SAVE_VECTOR_UNALIGNED(_vec, _offset) \
   stvx  _vec,0,r4           @\
   lwz    r5, 0(r4)          @\
   stw    r5, _offset(r3)    @\
   lwz    r5, 4(r4)          @\
   stw    r5, _offset+4(r3)  @\
   lwz    r5, 8(r4)          @\
   stw    r5, _offset+8(r3)  @\
   lwz    r5, 12(r4)         @\
   stw    r5, _offset+12(r3)
 
   SAVE_VECTOR_UNALIGNED( v0, 424+0x000)
   SAVE_VECTOR_UNALIGNED( v1, 424+0x010)
   SAVE_VECTOR_UNALIGNED( v2, 424+0x020)
   SAVE_VECTOR_UNALIGNED( v3, 424+0x030)
   SAVE_VECTOR_UNALIGNED( v4, 424+0x040)
   SAVE_VECTOR_UNALIGNED( v5, 424+0x050)
   SAVE_VECTOR_UNALIGNED( v6, 424+0x060)
   SAVE_VECTOR_UNALIGNED( v7, 424+0x070)
   SAVE_VECTOR_UNALIGNED( v8, 424+0x080)
   SAVE_VECTOR_UNALIGNED( v9, 424+0x090)
   SAVE_VECTOR_UNALIGNED(v10, 424+0x0A0)
   SAVE_VECTOR_UNALIGNED(v11, 424+0x0B0)
   SAVE_VECTOR_UNALIGNED(v12, 424+0x0C0)
   SAVE_VECTOR_UNALIGNED(v13, 424+0x0D0)
   SAVE_VECTOR_UNALIGNED(v14, 424+0x0E0)
   SAVE_VECTOR_UNALIGNED(v15, 424+0x0F0)
   SAVE_VECTOR_UNALIGNED(v16, 424+0x100)
   SAVE_VECTOR_UNALIGNED(v17, 424+0x110)
   SAVE_VECTOR_UNALIGNED(v18, 424+0x120)
   SAVE_VECTOR_UNALIGNED(v19, 424+0x130)
   SAVE_VECTOR_UNALIGNED(v20, 424+0x140)
   SAVE_VECTOR_UNALIGNED(v21, 424+0x150)
   SAVE_VECTOR_UNALIGNED(v22, 424+0x160)
   SAVE_VECTOR_UNALIGNED(v23, 424+0x170)
   SAVE_VECTOR_UNALIGNED(v24, 424+0x180)
   SAVE_VECTOR_UNALIGNED(v25, 424+0x190)
   SAVE_VECTOR_UNALIGNED(v26, 424+0x1A0)
   SAVE_VECTOR_UNALIGNED(v27, 424+0x1B0)
   SAVE_VECTOR_UNALIGNED(v28, 424+0x1C0)
   SAVE_VECTOR_UNALIGNED(v29, 424+0x1D0)
   SAVE_VECTOR_UNALIGNED(v30, 424+0x1E0)
   SAVE_VECTOR_UNALIGNED(v31, 424+0x1F0)
 
   li  r3, 0    ; return UNW_ESUCCESS
   blr
 
 
 #elif defined(__arm64__) || defined(__aarch64__)
 
 //
 // extern int unw_getcontext(unw_context_t* thread_state)
 //
 // On entry:
 //  thread_state pointer is in x0
 //
   .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   stp    x0, x1,  [x0, #0x000]
   stp    x2, x3,  [x0, #0x010]
   stp    x4, x5,  [x0, #0x020]
   stp    x6, x7,  [x0, #0x030]
   stp    x8, x9,  [x0, #0x040]
   stp    x10,x11, [x0, #0x050]
   stp    x12,x13, [x0, #0x060]
   stp    x14,x15, [x0, #0x070]
   stp    x16,x17, [x0, #0x080]
   stp    x18,x19, [x0, #0x090]
   stp    x20,x21, [x0, #0x0A0]
   stp    x22,x23, [x0, #0x0B0]
   stp    x24,x25, [x0, #0x0C0]
   stp    x26,x27, [x0, #0x0D0]
   stp    x28,x29, [x0, #0x0E0]
   str    x30,     [x0, #0x0F0]
   mov    x1,sp
   str    x1,      [x0, #0x0F8]
   str    x30,     [x0, #0x100]    // store return address as pc
   // skip cpsr
   stp    d0, d1,  [x0, #0x110]
   stp    d2, d3,  [x0, #0x120]
   stp    d4, d5,  [x0, #0x130]
   stp    d6, d7,  [x0, #0x140]
   stp    d8, d9,  [x0, #0x150]
   stp    d10,d11, [x0, #0x160]
   stp    d12,d13, [x0, #0x170]
   stp    d14,d15, [x0, #0x180]
   stp    d16,d17, [x0, #0x190]
   stp    d18,d19, [x0, #0x1A0]
   stp    d20,d21, [x0, #0x1B0]
   stp    d22,d23, [x0, #0x1C0]
   stp    d24,d25, [x0, #0x1D0]
   stp    d26,d27, [x0, #0x1E0]
   stp    d28,d29, [x0, #0x1F0]
   str    d30,     [x0, #0x200]
   str    d31,     [x0, #0x208]
   mov    x0, #0                   // return UNW_ESUCCESS
   ret
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
 #if !defined(__ARM_ARCH_ISA_ARM)
   .thumb
 #endif
 
 @
 @ extern int unw_getcontext(unw_context_t* thread_state)
 @
 @ On entry:
 @  thread_state pointer is in r0
 @ 
 @ Per EHABI #4.7 this only saves the core integer registers.
 @ EHABI #7.4.5 notes that in general all VRS registers should be restored
 @ however this is very hard to do for VFP registers because it is unknown
 @ to the library how many registers are implemented by the architecture.
 @ Instead, VFP registers are demand saved by logic external to unw_getcontext.
 @
   .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
 #if !defined(__ARM_ARCH_ISA_ARM)
   stm r0, {r0-r7}
   mov r2, sp
   mov r3, lr
   str r2, [r0, #52]
   str r3, [r0, #56]
   str r3, [r0, #60]  @ store return address as pc
 #else
   @ 32bit thumb-2 restrictions for stm:
   @ . the sp (r13) cannot be in the list
   @ . the pc (r15) cannot be in the list in an STM instruction
   stm r0, {r0-r12}
   str sp, [r0, #52]
   str lr, [r0, #56]
   str lr, [r0, #60]  @ store return address as pc
 #endif
 #if __ARM_ARCH_ISA_THUMB == 1
   @ T1 does not have a non-cpsr-clobbering register-zeroing instruction.
   @ It is safe to use here though because we are about to return, and cpsr is
   @ not expected to be preserved.
   movs r0, #0        @ return UNW_ESUCCESS
 #else
   mov r0, #0         @ return UNW_ESUCCESS
 #endif
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3-d16
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPy)
   vstmia r0, {d0-d15}
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3-d16
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPy)
   vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
   .fpu vfpv3
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPy)
   @ VFP and iwMMX instructions are only available when compiling with the flags
   @ that enable them. We do not want to do that in the library (because we do not
   @ want the compiler to generate instructions that access those) but this is
   @ only accessed if the personality routine needs these registers. Use of
   @ these registers implies they are, actually, available on the target, so
   @ it's ok to execute.
   @ So, generate the instructions using the corresponding coprocessor mnemonic.
   vstmia r0, {d16-d31}
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPy)
 #if (!defined(__ARM_ARCH_6M__) && !defined(__ARM_ARCH_6SM__)) || defined(__ARM_WMMX)
   stcl p1, cr0, [r0], #8  @ wstrd wR0, [r0], #8
   stcl p1, cr1, [r0], #8  @ wstrd wR1, [r0], #8
   stcl p1, cr2, [r0], #8  @ wstrd wR2, [r0], #8
   stcl p1, cr3, [r0], #8  @ wstrd wR3, [r0], #8
   stcl p1, cr4, [r0], #8  @ wstrd wR4, [r0], #8
   stcl p1, cr5, [r0], #8  @ wstrd wR5, [r0], #8
   stcl p1, cr6, [r0], #8  @ wstrd wR6, [r0], #8
   stcl p1, cr7, [r0], #8  @ wstrd wR7, [r0], #8
   stcl p1, cr8, [r0], #8  @ wstrd wR8, [r0], #8
   stcl p1, cr9, [r0], #8  @ wstrd wR9, [r0], #8
   stcl p1, cr10, [r0], #8  @ wstrd wR10, [r0], #8
   stcl p1, cr11, [r0], #8  @ wstrd wR11, [r0], #8
   stcl p1, cr12, [r0], #8  @ wstrd wR12, [r0], #8
   stcl p1, cr13, [r0], #8  @ wstrd wR13, [r0], #8
   stcl p1, cr14, [r0], #8  @ wstrd wR14, [r0], #8
   stcl p1, cr15, [r0], #8  @ wstrd wR15, [r0], #8
 #endif
   JMP(lr)
 
 @
 @ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values)
 @
 @ On entry:
 @  values pointer is in r0
 @
   .p2align 2
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj)
 #if (!defined(__ARM_ARCH_6M__) && !defined(__ARM_ARCH_6SM__)) || defined(__ARM_WMMX)
   stc2 p1, cr8, [r0], #4  @ wstrw wCGR0, [r0], #4
   stc2 p1, cr9, [r0], #4  @ wstrw wCGR1, [r0], #4
   stc2 p1, cr10, [r0], #4  @ wstrw wCGR2, [r0], #4
   stc2 p1, cr11, [r0], #4  @ wstrw wCGR3, [r0], #4
 #endif
   JMP(lr)
 
 #elif defined(__or1k__)
 
 #
 # extern int unw_getcontext(unw_context_t* thread_state)
 #
 # On entry:
 #  thread_state pointer is in r3
 #
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
   l.sw       0(r3), r0
   l.sw       4(r3), r1
   l.sw       8(r3), r2
   l.sw      12(r3), r3
   l.sw      16(r3), r4
   l.sw      20(r3), r5
   l.sw      24(r3), r6
   l.sw      28(r3), r7
   l.sw      32(r3), r8
   l.sw      36(r3), r9
   l.sw      40(r3), r10
   l.sw      44(r3), r11
   l.sw      48(r3), r12
   l.sw      52(r3), r13
   l.sw      56(r3), r14
   l.sw      60(r3), r15
   l.sw      64(r3), r16
   l.sw      68(r3), r17
   l.sw      72(r3), r18
   l.sw      76(r3), r19
   l.sw      80(r3), r20
   l.sw      84(r3), r21
   l.sw      88(r3), r22
   l.sw      92(r3), r23
   l.sw      96(r3), r24
   l.sw     100(r3), r25
   l.sw     104(r3), r26
   l.sw     108(r3), r27
   l.sw     112(r3), r28
   l.sw     116(r3), r29
   l.sw     120(r3), r30
   l.sw     124(r3), r31
 
-#elif defined(__riscv__)
+#elif defined(__riscv)
 
 /* RISCVTODO */
 
 #endif
 
     .section .note.GNU-stack,"",@progbits
Index: head/contrib/llvm/projects/libunwind/src/config.h
===================================================================
--- head/contrib/llvm/projects/libunwind/src/config.h	(revision 322167)
+++ head/contrib/llvm/projects/libunwind/src/config.h	(revision 322168)
@@ -1,144 +1,144 @@
 //===----------------------------- config.h -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //
 //  Defines macros used within libunwind project.
 //
 //===----------------------------------------------------------------------===//
 
 
 #ifndef LIBUNWIND_CONFIG_H
 #define LIBUNWIND_CONFIG_H
 
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 // Define static_assert() unless already defined by compiler.
 #ifndef __has_feature
   #define __has_feature(__x) 0
 #endif
 #if !(__has_feature(cxx_static_assert)) && !defined(static_assert)
   #define static_assert(__b, __m) \
       extern int compile_time_assert_failed[ ( __b ) ? 1 : -1 ]  \
                                                   __attribute__( ( unused ) );
 #endif
 
 // Platform specific configuration defines.
 #ifdef __APPLE__
   #if defined(FOR_DYLD)
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND   0
     #define _LIBUNWIND_SUPPORT_DWARF_INDEX    0
   #else
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND   1
     #define _LIBUNWIND_SUPPORT_DWARF_INDEX    0
   #endif
 #else
   #if defined(__ARM_DWARF_EH__) || !defined(__arm__)
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 0
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1
   #else
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 0
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 0
     #define _LIBUNWIND_SUPPORT_DWARF_INDEX 0
   #endif
 #endif
 
 // FIXME: these macros are not correct for COFF targets
 #define _LIBUNWIND_EXPORT __attribute__((visibility("default")))
 #define _LIBUNWIND_HIDDEN __attribute__((visibility("hidden")))
 
 #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__)
 #define _LIBUNWIND_BUILD_SJLJ_APIS 1
 #else
 #define _LIBUNWIND_BUILD_SJLJ_APIS 0
 #endif
 
 #if defined(__i386__) || defined(__x86_64__)
 #define _LIBUNWIND_SUPPORT_FRAME_APIS 1
 #else
 #define _LIBUNWIND_SUPPORT_FRAME_APIS 0
 #endif
 
 #if defined(__i386__) || defined(__x86_64__) ||                                \
     (!defined(__APPLE__) && defined(__arm__)) ||                               \
     (defined(__arm64__) || defined(__aarch64__)) ||                            \
     (defined(__APPLE__) && defined(__mips__)) ||                               \
-    defined(__riscv__)
+    defined(__riscv)
 #define _LIBUNWIND_BUILD_ZERO_COST_APIS 1
 #else
 #define _LIBUNWIND_BUILD_ZERO_COST_APIS 0
 #endif
 
 #define _LIBUNWIND_ABORT(msg)                                                  \
   do {                                                                         \
     fprintf(stderr, "libunwind: %s %s:%d - %s\n", __func__, __FILE__,          \
             __LINE__, msg);                                                    \
     fflush(stderr);                                                            \
     abort();                                                                   \
   } while (0)
 #define _LIBUNWIND_LOG(msg, ...) fprintf(stderr, "libunwind: " msg "\n", __VA_ARGS__)
 
 // Macros that define away in non-Debug builds
 #ifdef NDEBUG
   #define _LIBUNWIND_DEBUG_LOG(msg, ...)
   #define _LIBUNWIND_TRACE_API(msg, ...)
   #define _LIBUNWIND_TRACING_UNWINDING 0
   #define _LIBUNWIND_TRACE_UNWINDING(msg, ...)
   #define _LIBUNWIND_LOG_NON_ZERO(x) x
 #else
   #ifdef __cplusplus
     extern "C" {
   #endif
     extern  bool logAPIs();
     extern  bool logUnwinding();
   #ifdef __cplusplus
     }
   #endif
   #define _LIBUNWIND_DEBUG_LOG(msg, ...)  _LIBUNWIND_LOG(msg, __VA_ARGS__)
   #define _LIBUNWIND_LOG_NON_ZERO(x) \
             do { \
               int _err = x; \
               if ( _err != 0 ) \
                 _LIBUNWIND_LOG("" #x "=%d in %s", _err, __FUNCTION__); \
              } while (0)
   #define _LIBUNWIND_TRACE_API(msg, ...) \
             do { \
               if ( logAPIs() ) _LIBUNWIND_LOG(msg, __VA_ARGS__); \
             } while(0)
   #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) \
             do { \
               if ( logUnwinding() ) _LIBUNWIND_LOG(msg, __VA_ARGS__); \
             } while(0)
   #define _LIBUNWIND_TRACING_UNWINDING logUnwinding()
 #endif
 
 #ifdef __cplusplus
 // Used to fit UnwindCursor and Registers_xxx types against unw_context_t /
 // unw_cursor_t sized memory blocks.
 #if defined(_LIBUNWIND_IS_NATIVE_ONLY)
 # define COMP_OP ==
 #else
 # define COMP_OP <
 #endif
 template <typename _Type, typename _Mem>
 struct check_fit {
   template <typename T>
   struct blk_count {
     static const size_t count =
       (sizeof(T) + sizeof(uint64_t) - 1) / sizeof(uint64_t);
   };
   static const bool does_fit =
     (blk_count<_Type>::count COMP_OP blk_count<_Mem>::count);
 };
 #undef COMP_OP
 #endif // __cplusplus
 
 #endif // LIBUNWIND_CONFIG_H
Index: head/contrib/llvm/projects/libunwind/src/libunwind.cpp
===================================================================
--- head/contrib/llvm/projects/libunwind/src/libunwind.cpp	(revision 322167)
+++ head/contrib/llvm/projects/libunwind/src/libunwind.cpp	(revision 322168)
@@ -1,377 +1,377 @@
 //===--------------------------- libunwind.cpp ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //
 //  Implements unw_* functions from <libunwind.h>
 //
 //===----------------------------------------------------------------------===//
 
 #include <libunwind.h>
 
 #ifndef NDEBUG
 #include <cstdlib> // getenv
 #endif
 #include <new>
 #include <algorithm>
 
 #include "libunwind_ext.h"
 #include "config.h"
 
 #include <stdlib.h>
 
 
 #include "UnwindCursor.hpp"
 
 using namespace libunwind;
 
 /// internal object to represent this processes address space
 LocalAddressSpace LocalAddressSpace::sThisAddressSpace;
 
 _LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space =
     (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace;
 
 /// record the registers and stack position of the caller
 extern int unw_getcontext(unw_context_t *);
 // note: unw_getcontext() implemented in assembly
 
 /// Create a cursor of a thread in this process given 'context' recorded by
 /// unw_getcontext().
 _LIBUNWIND_EXPORT int unw_init_local(unw_cursor_t *cursor,
                                      unw_context_t *context) {
   _LIBUNWIND_TRACE_API("unw_init_local(cursor=%p, context=%p)",
                        static_cast<void *>(cursor),
                        static_cast<void *>(context));
 #if defined(__i386__)
 # define REGISTER_KIND Registers_x86
 #elif defined(__x86_64__)
 # define REGISTER_KIND Registers_x86_64
 #elif defined(__ppc__)
 # define REGISTER_KIND Registers_ppc
 #elif defined(__aarch64__)
 # define REGISTER_KIND Registers_arm64
 #elif _LIBUNWIND_ARM_EHABI
 # define REGISTER_KIND Registers_arm
 #elif defined(__or1k__)
 # define REGISTER_KIND Registers_or1k
-#elif defined(__riscv__)
+#elif defined(__riscv)
 # define REGISTER_KIND Registers_riscv
 #elif defined(__mips__)
 # warning The MIPS architecture is not supported.
 #else
 # error Architecture not supported
 #endif
   // Use "placement new" to allocate UnwindCursor in the cursor buffer.
   new ((void *)cursor) UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
                                  context, LocalAddressSpace::sThisAddressSpace);
 #undef REGISTER_KIND
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   co->setInfoBasedOnIPRegister();
 
   return UNW_ESUCCESS;
 }
 
 #ifdef UNW_REMOTE
 /// Create a cursor into a thread in another process.
 _LIBUNWIND_EXPORT int unw_init_remote_thread(unw_cursor_t *cursor,
                                              unw_addr_space_t as,
                                              void *arg) {
   // special case: unw_init_remote(xx, unw_local_addr_space, xx)
   if (as == (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace)
     return unw_init_local(cursor, NULL); //FIXME
 
   // use "placement new" to allocate UnwindCursor in the cursor buffer
   switch (as->cpuType) {
   case CPU_TYPE_I386:
     new ((void *)cursor)
         UnwindCursor<OtherAddressSpace<Pointer32<LittleEndian> >,
                      Registers_x86>(((unw_addr_space_i386 *)as)->oas, arg);
     break;
   case CPU_TYPE_X86_64:
     new ((void *)cursor) UnwindCursor<
         OtherAddressSpace<Pointer64<LittleEndian> >, Registers_x86_64>(
         ((unw_addr_space_x86_64 *)as)->oas, arg);
     break;
   case CPU_TYPE_POWERPC:
     new ((void *)cursor)
         UnwindCursor<OtherAddressSpace<Pointer32<BigEndian> >, Registers_ppc>(
             ((unw_addr_space_ppc *)as)->oas, arg);
     break;
   default:
     return UNW_EUNSPEC;
   }
   return UNW_ESUCCESS;
 }
 
 
 static bool is64bit(task_t task) {
   return false; // FIXME
 }
 
 /// Create an address_space object for use in examining another task.
 _LIBUNWIND_EXPORT unw_addr_space_t unw_create_addr_space_for_task(task_t task) {
 #if __i386__
   if (is64bit(task)) {
     unw_addr_space_x86_64 *as = new unw_addr_space_x86_64(task);
     as->taskPort = task;
     as->cpuType = CPU_TYPE_X86_64;
     //as->oas
   } else {
     unw_addr_space_i386 *as = new unw_addr_space_i386(task);
     as->taskPort = task;
     as->cpuType = CPU_TYPE_I386;
     //as->oas
   }
 #else
 // FIXME
 #endif
 }
 
 
 /// Delete an address_space object.
 _LIBUNWIND_EXPORT void unw_destroy_addr_space(unw_addr_space_t asp) {
   switch (asp->cpuType) {
 #if __i386__ || __x86_64__
   case CPU_TYPE_I386: {
     unw_addr_space_i386 *as = (unw_addr_space_i386 *)asp;
     delete as;
   }
   break;
   case CPU_TYPE_X86_64: {
     unw_addr_space_x86_64 *as = (unw_addr_space_x86_64 *)asp;
     delete as;
   }
   break;
 #endif
   case CPU_TYPE_POWERPC: {
     unw_addr_space_ppc *as = (unw_addr_space_ppc *)asp;
     delete as;
   }
   break;
   }
 }
 #endif // UNW_REMOTE
 
 
 /// Get value of specified register at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                   unw_word_t *value) {
   _LIBUNWIND_TRACE_API("unw_get_reg(cursor=%p, regNum=%d, &value=%p)",
                        static_cast<void *>(cursor), regNum,
                        static_cast<void *>(value));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->validReg(regNum)) {
     *value = co->getReg(regNum);
     return UNW_ESUCCESS;
   }
   return UNW_EBADREG;
 }
 
 
 /// Set value of specified register at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                   unw_word_t value) {
   _LIBUNWIND_TRACE_API("unw_set_reg(cursor=%p, regNum=%d, value=0x%llX)",
                        static_cast<void *>(cursor), regNum, (long long)value);
   typedef LocalAddressSpace::pint_t pint_t;
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->validReg(regNum)) {
     co->setReg(regNum, (pint_t)value);
     // specical case altering IP to re-find info (being called by personality
     // function)
     if (regNum == UNW_REG_IP)
       co->setInfoBasedOnIPRegister(false);
     return UNW_ESUCCESS;
   }
   return UNW_EBADREG;
 }
 
 
 /// Get value of specified float register at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                     unw_fpreg_t *value) {
   _LIBUNWIND_TRACE_API("unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)",
                        static_cast<void *>(cursor), regNum,
                        static_cast<void *>(value));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->validFloatReg(regNum)) {
     *value = co->getFloatReg(regNum);
     return UNW_ESUCCESS;
   }
   return UNW_EBADREG;
 }
 
 
 /// Set value of specified float register at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                     unw_fpreg_t value) {
 #if _LIBUNWIND_ARM_EHABI
   _LIBUNWIND_TRACE_API("unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)",
                        static_cast<void *>(cursor), regNum, value);
 #else
   _LIBUNWIND_TRACE_API("unw_set_fpreg(cursor=%p, regNum=%d, value=%g)",
                        static_cast<void *>(cursor), regNum, value);
 #endif
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->validFloatReg(regNum)) {
     co->setFloatReg(regNum, value);
     return UNW_ESUCCESS;
   }
   return UNW_EBADREG;
 }
 
 
 /// Move cursor to next frame.
 _LIBUNWIND_EXPORT int unw_step(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("unw_step(cursor=%p)", static_cast<void *>(cursor));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->step();
 }
 
 
 /// Get unwind info at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_get_proc_info(unw_cursor_t *cursor,
                                         unw_proc_info_t *info) {
   _LIBUNWIND_TRACE_API("unw_get_proc_info(cursor=%p, &info=%p)",
                        static_cast<void *>(cursor), static_cast<void *>(info));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   co->getInfo(info);
   if (info->end_ip == 0)
     return UNW_ENOINFO;
   else
     return UNW_ESUCCESS;
 }
 
 
 /// Resume execution at cursor position (aka longjump).
 _LIBUNWIND_EXPORT int unw_resume(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("unw_resume(cursor=%p)", static_cast<void *>(cursor));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   co->jumpto();
   return UNW_EUNSPEC;
 }
 
 
 /// Get name of function at cursor position in stack frame.
 _LIBUNWIND_EXPORT int unw_get_proc_name(unw_cursor_t *cursor, char *buf,
                                         size_t bufLen, unw_word_t *offset) {
   _LIBUNWIND_TRACE_API("unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)",
                        static_cast<void *>(cursor), static_cast<void *>(buf),
                        static_cast<unsigned long>(bufLen));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->getFunctionName(buf, bufLen, offset))
     return UNW_ESUCCESS;
   else
     return UNW_EUNSPEC;
 }
 
 
 /// Checks if a register is a floating-point register.
 _LIBUNWIND_EXPORT int unw_is_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum) {
   _LIBUNWIND_TRACE_API("unw_is_fpreg(cursor=%p, regNum=%d)",
                        static_cast<void *>(cursor), regNum);
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->validFloatReg(regNum);
 }
 
 
 /// Checks if a register is a floating-point register.
 _LIBUNWIND_EXPORT const char *unw_regname(unw_cursor_t *cursor,
                                           unw_regnum_t regNum) {
   _LIBUNWIND_TRACE_API("unw_regname(cursor=%p, regNum=%d)",
                        static_cast<void *>(cursor), regNum);
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->getRegisterName(regNum);
 }
 
 
 /// Checks if current frame is signal trampoline.
 _LIBUNWIND_EXPORT int unw_is_signal_frame(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("unw_is_signal_frame(cursor=%p)",
                        static_cast<void *>(cursor));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->isSignalFrame();
 }
 
 #ifdef __arm__
 // Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD
 _LIBUNWIND_EXPORT void unw_save_vfp_as_X(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("unw_fpreg_save_vfp_as_X(cursor=%p)",
                        static_cast<void *>(cursor));
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->saveVFPAsX();
 }
 #endif
 
 
 #if _LIBUNWIND_SUPPORT_DWARF_UNWIND
 /// SPI: walks cached dwarf entries
 _LIBUNWIND_EXPORT void unw_iterate_dwarf_unwind_cache(void (*func)(
     unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) {
   _LIBUNWIND_TRACE_API("unw_iterate_dwarf_unwind_cache(func=%p)",
                        reinterpret_cast<void *>(func));
   DwarfFDECache<LocalAddressSpace>::iterateCacheEntries(func);
 }
 
 
 /// IPI: for __register_frame()
 void _unw_add_dynamic_fde(unw_word_t fde) {
   CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
   CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
   const char *message = CFI_Parser<LocalAddressSpace>::decodeFDE(
                            LocalAddressSpace::sThisAddressSpace,
                           (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo);
   if (message == NULL) {
     // dynamically registered FDEs don't have a mach_header group they are in.
     // Use fde as mh_group
     unw_word_t mh_group = fdeInfo.fdeStart;
     DwarfFDECache<LocalAddressSpace>::add((LocalAddressSpace::pint_t)mh_group,
                                           fdeInfo.pcStart, fdeInfo.pcEnd,
                                           fdeInfo.fdeStart);
   } else {
     _LIBUNWIND_DEBUG_LOG("_unw_add_dynamic_fde: bad fde: %s", message);
   }
 }
 
 /// IPI: for __deregister_frame()
 void _unw_remove_dynamic_fde(unw_word_t fde) {
   // fde is own mh_group
   DwarfFDECache<LocalAddressSpace>::removeAllIn((LocalAddressSpace::pint_t)fde);
 }
 #endif // _LIBUNWIND_SUPPORT_DWARF_UNWIND
 
 
 
 // Add logging hooks in Debug builds only
 #ifndef NDEBUG
 #include <stdlib.h>
 
 _LIBUNWIND_HIDDEN
 bool logAPIs() {
   // do manual lock to avoid use of _cxa_guard_acquire or initializers
   static bool checked = false;
   static bool log = false;
   if (!checked) {
     log = (getenv("LIBUNWIND_PRINT_APIS") != NULL);
     checked = true;
   }
   return log;
 }
 
 _LIBUNWIND_HIDDEN
 bool logUnwinding() {
   // do manual lock to avoid use of _cxa_guard_acquire or initializers
   static bool checked = false;
   static bool log = false;
   if (!checked) {
     log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL);
     checked = true;
   }
   return log;
 }
 
 #endif // NDEBUG
 
Index: head/contrib/netbsd-tests/lib/libc/gen/t_dir.c
===================================================================
--- head/contrib/netbsd-tests/lib/libc/gen/t_dir.c	(revision 322167)
+++ head/contrib/netbsd-tests/lib/libc/gen/t_dir.c	(revision 322168)
@@ -1,193 +1,193 @@
 /* $NetBSD: t_dir.c,v 1.10 2017/01/11 18:15:02 christos Exp $ */
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/stat.h>
 #include <assert.h>
 #include <atf-c.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 
 
 ATF_TC(seekdir_basic);
 ATF_TC_HEAD(seekdir_basic, tc)
 {
 
 	atf_tc_set_md_var(tc, "descr", "Check telldir(3) and seekdir(3) "
 	    "for correct behavior (PR lib/24324)");
 }
 
 ATF_TC_BODY(seekdir_basic, tc)
 {
 	DIR *dp;
 	char *wasname;
 	struct dirent *entry;
 	long here;
 
 #define	CREAT(x, m)	do {						\
 		int _creat_fd;						\
 		ATF_REQUIRE_MSG((_creat_fd = creat((x), (m))) != -1,	\
 		    "creat(%s, %x) failed: %s", (x), (m),		\
 		    strerror(errno));					\
 		(void)close(_creat_fd);					\
 	} while(0);
 
 	ATF_REQUIRE_MSG(mkdir("t", 0755) == 0,
 	    "mkdir failed: %s", strerror(errno));
 	CREAT("t/a", 0600);
 	CREAT("t/b", 0600);
 	CREAT("t/c", 0600);
 
 	dp = opendir("t");
 	if ( dp == NULL)
 		atf_tc_fail("Could not open temp directory.");
 
 	/* skip two for . and .. */
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    ".", strerror(errno));
 
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "..", strerror(errno));
 
 	/* get first entry */
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "first", strerror(errno));
 
 	here = telldir(dp);
 	ATF_REQUIRE_MSG(here != -1, "telldir failed: %s", strerror(errno));
 
 	/* get second entry */
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "second", strerror(errno));
 
 	wasname = strdup(entry->d_name);
 	if (wasname == NULL)
 		atf_tc_fail("cannot allocate memory");
 
 	/* get third entry */
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "third", strerror(errno));
 
 	/* try to return to the position after the first entry */
 	seekdir(dp, here);
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "first[1]", strerror(errno));
 	if (strcmp(entry->d_name, wasname) != 0)
 		atf_tc_fail("1st seekdir found wrong name");
 
 	/* try again, and throw in a telldir() for good measure */
 	seekdir(dp, here);
 	here = telldir(dp);
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "second[1]", strerror(errno));
 	if (strcmp(entry->d_name, wasname) != 0)
 		atf_tc_fail("2nd seekdir found wrong name");
 
 	/* One more time, to make sure that telldir() doesn't affect result */
 	seekdir(dp, here);
 	entry = readdir(dp);
 	ATF_REQUIRE_MSG(entry != NULL, "readdir[%s] failed: %s",
 	    "third[1]", strerror(errno));
 
 	if (strcmp(entry->d_name, wasname) != 0)
 		atf_tc_fail("3rd seekdir found wrong name");
 
 	closedir(dp);
 	free(wasname);
 }
 
 /* There is no sbrk on AArch64 and RISC-V */
-#if !defined(__aarch64__) && !defined(__riscv__)
+#if !defined(__aarch64__) && !defined(__riscv)
 ATF_TC(telldir_leak);
 ATF_TC_HEAD(telldir_leak, tc)
 {
 
 	atf_tc_set_md_var(tc, "descr",
 	    "Check telldir(3) for memory leakage (PR lib/24324)");
 }
 
 ATF_TC_BODY(telldir_leak, tc)
 {
 	DIR *dp;
 	char *memused;
 	int i;
 	int oktouse = 4096;
 
 	dp = opendir(".");
 	if (dp == NULL)
 		atf_tc_fail("Could not open current directory");
 
 	(void)telldir(dp);
 	memused = sbrk(0);
 	closedir(dp);
 
 	for (i = 0; i < 1000; i++) {
 		dp = opendir(".");
 		if (dp == NULL)
 			atf_tc_fail("Could not open current directory");
 
 		(void)telldir(dp);
 		closedir(dp);
 
 		if ((char *)sbrk(0) - memused > oktouse) {
 			(void)printf("Used %td extra bytes for %d telldir "
 			    "calls", ((char *)sbrk(0) - memused), i);
 			oktouse = (char *)sbrk(0) - memused;
 		}
 	}
 	if (oktouse > 4096) {
 		atf_tc_fail("Failure: leaked %d bytes", oktouse);
 	} else {
 		(void)printf("OK: used %td bytes\n", (char *)(sbrk(0))-memused);
 	}
 }
 #endif
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, seekdir_basic);
-#if !defined(__aarch64__) && !defined(__riscv__)
+#if !defined(__aarch64__) && !defined(__riscv)
 	ATF_TP_ADD_TC(tp, telldir_leak);
 #endif
 
 	return atf_no_error();
 }
Index: head/contrib/netbsd-tests/lib/libc/sys/t_mlock.c
===================================================================
--- head/contrib/netbsd-tests/lib/libc/sys/t_mlock.c	(revision 322167)
+++ head/contrib/netbsd-tests/lib/libc/sys/t_mlock.c	(revision 322168)
@@ -1,376 +1,376 @@
 /* $NetBSD: t_mlock.c,v 1.6 2016/08/09 12:02:44 kre Exp $ */
 
 /*-
  * Copyright (c) 2012 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jukka Ruohonen.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: t_mlock.c,v 1.6 2016/08/09 12:02:44 kre Exp $");
 
 #ifdef __FreeBSD__
 #include <sys/param.h> /* NetBSD requires sys/param.h for sysctl(3), unlike FreeBSD */
 #endif
 #include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/wait.h>
 
 #include <errno.h>
 #include <atf-c.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #ifdef __FreeBSD__
 #include <limits.h>
 #define _KMEMUSER
 #include <machine/vmparam.h>
 
 void set_vm_max_wired(int);
 void restore_vm_max_wired(void);
 #endif
 
 static long page = 0;
 
 ATF_TC(mlock_clip);
 ATF_TC_HEAD(mlock_clip, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test with mlock(2) that UVM only "
 	    "clips if the clip address is within the entry (PR kern/44788)");
 }
 
 ATF_TC_BODY(mlock_clip, tc)
 {
 	void *buf;
 
 	buf = malloc(page);
 	ATF_REQUIRE(buf != NULL);
 
 	if (page < 1024)
 		atf_tc_skip("page size too small");
 
 	for (size_t i = page; i >= 1; i = i - 1024) {
 		(void)mlock(buf, page - i);
 		(void)munlock(buf, page - i);
 	}
 
 	free(buf);
 }
 
 #ifdef __FreeBSD__
 ATF_TC_WITH_CLEANUP(mlock_err);
 #else
 ATF_TC(mlock_err);
 #endif
 ATF_TC_HEAD(mlock_err, tc)
 {
 	atf_tc_set_md_var(tc, "descr",
 	    "Test error conditions in mlock(2) and munlock(2)");
 #ifdef __FreeBSD__
 	atf_tc_set_md_var(tc, "require.config", "allow_sysctl_side_effects");
 	atf_tc_set_md_var(tc, "require.user", "root");
 #endif
 }
 
 ATF_TC_BODY(mlock_err, tc)
 {
 #ifdef __NetBSD__
 	unsigned long vmin = 0;
 	size_t len = sizeof(vmin);
 #endif
-#if !defined(__aarch64__) && !defined(__riscv__)
+#if !defined(__aarch64__) && !defined(__riscv)
 	void *invalid_ptr;
 #endif
 	int null_errno = ENOMEM;	/* error expected for NULL */
 	void *buf;
 
 #ifdef __FreeBSD__
 #ifdef VM_MIN_ADDRESS
 	if ((uintptr_t)VM_MIN_ADDRESS > 0)
 		null_errno = EINVAL;	/* NULL is not inside user VM */
 #endif
 	/* Set max_wired really really high to avoid EAGAIN */
 	set_vm_max_wired(INT_MAX);
 #else
 	if (sysctlbyname("vm.minaddress", &vmin, &len, NULL, 0) != 0)
 		atf_tc_fail("failed to read vm.minaddress");
 	/*
 	 * Any bad address must return ENOMEM (for lock & unlock)
 	 */
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, mlock(NULL, page) == -1);
 
 	if (vmin > 0)
 		null_errno = EINVAL;	/* NULL is not inside user VM */
 #endif
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, mlock((char *)0, page) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, mlock((char *)-1, page) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, munlock(NULL, page) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, munlock((char *)0, page) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, munlock((char *)-1, page) == -1);
 
 	buf = malloc(page);
 	ATF_REQUIRE(buf != NULL);
 
 	/*
 	 * unlocking memory that is not locked is an error...
 	 */
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, munlock(buf, page) == -1);
 
 /* There is no sbrk on AArch64 and RISC-V */
-#if !defined(__aarch64__) && !defined(__riscv__)
+#if !defined(__aarch64__) && !defined(__riscv)
 	/*
 	 * These are permitted to fail (EINVAL) but do not on NetBSD
 	 */
 	ATF_REQUIRE(mlock((void *)(((uintptr_t)buf) + page/3), page/5) == 0);
 	ATF_REQUIRE(munlock((void *)(((uintptr_t)buf) + page/3), page/5) == 0);
 
 	(void)free(buf);
 
 	/*
 	 * Try to create a pointer to an unmapped page - first after current
 	 * brk will likely do.
 	 */
 	invalid_ptr = (void*)(((uintptr_t)sbrk(0)+page) & ~(page-1));
 	printf("testing with (hopefully) invalid pointer %p\n", invalid_ptr);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, mlock(invalid_ptr, page) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOMEM, munlock(invalid_ptr, page) == -1);
 #endif
 }
 
 #ifdef __FreeBSD__
 ATF_TC_CLEANUP(mlock_err, tc)
 {
 
 	restore_vm_max_wired();
 }
 #endif
 
 ATF_TC(mlock_limits);
 ATF_TC_HEAD(mlock_limits, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test system limits with mlock(2)");
 }
 
 ATF_TC_BODY(mlock_limits, tc)
 {
 	struct rlimit res;
 	void *buf;
 	pid_t pid;
 	int sta;
 
 	buf = malloc(page);
 	ATF_REQUIRE(buf != NULL);
 
 	pid = fork();
 	ATF_REQUIRE(pid >= 0);
 
 	if (pid == 0) {
 
 		for (ssize_t i = page; i >= 2; i -= 100) {
 
 			res.rlim_cur = i - 1;
 			res.rlim_max = i - 1;
 
 			(void)fprintf(stderr, "trying to lock %zd bytes "
 			    "with %zu byte limit\n", i, (size_t)res.rlim_cur);
 
 			if (setrlimit(RLIMIT_MEMLOCK, &res) != 0)
 				_exit(EXIT_FAILURE);
 
 			errno = 0;
 
 #ifdef __FreeBSD__
 			/*
 			 * NetBSD doesn't conform to POSIX with ENOMEM requirement;
 			 * FreeBSD does.
 			 *
 			 * See: NetBSD PR # kern/48962 for more details.
 			 */
 			if (mlock(buf, i) != -1 || errno != ENOMEM) {
 #else
 			if (mlock(buf, i) != -1 || errno != EAGAIN) {
 #endif
 				(void)munlock(buf, i);
 				_exit(EXIT_FAILURE);
 			}
 		}
 
 		_exit(EXIT_SUCCESS);
 	}
 
 	(void)wait(&sta);
 
 	if (WIFEXITED(sta) == 0 || WEXITSTATUS(sta) != EXIT_SUCCESS)
 		atf_tc_fail("mlock(2) locked beyond system limits");
 
 	free(buf);
 }
 
 #ifdef __FreeBSD__
 ATF_TC_WITH_CLEANUP(mlock_mmap);
 #else
 ATF_TC(mlock_mmap);
 #endif
 ATF_TC_HEAD(mlock_mmap, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test mlock(2)-mmap(2) interaction");
 #ifdef __FreeBSD__
 	atf_tc_set_md_var(tc, "require.config", "allow_sysctl_side_effects");
 	atf_tc_set_md_var(tc, "require.user", "root");
 #endif
 }
 
 ATF_TC_BODY(mlock_mmap, tc)
 {
 #ifdef __NetBSD__
 	static const int flags = MAP_ANON | MAP_PRIVATE | MAP_WIRED;
 #else
 	static const int flags = MAP_ANON | MAP_PRIVATE;
 #endif
 	void *buf;
 
 #ifdef __FreeBSD__
 	/* Set max_wired really really high to avoid EAGAIN */
 	set_vm_max_wired(INT_MAX);
 #endif
 
 	/*
 	 * Make a wired RW mapping and check that mlock(2)
 	 * does not fail for the (already locked) mapping.
 	 */
 	buf = mmap(NULL, page, PROT_READ | PROT_WRITE, flags, -1, 0);
 
 	ATF_REQUIRE(buf != MAP_FAILED);
 #ifdef __FreeBSD__
 	/*
 	 * The duplicate mlock call is added to ensure that the call works
 	 * as described above without MAP_WIRED support.
 	 */
 	ATF_REQUIRE(mlock(buf, page) == 0);
 #endif
 	ATF_REQUIRE(mlock(buf, page) == 0);
 	ATF_REQUIRE(munlock(buf, page) == 0);
 	ATF_REQUIRE(munmap(buf, page) == 0);
 	ATF_REQUIRE(munlock(buf, page) != 0);
 
 	/*
 	 * But it should be impossible to mlock(2) a PROT_NONE mapping.
 	 */
 	buf = mmap(NULL, page, PROT_NONE, flags, -1, 0);
 
 	ATF_REQUIRE(buf != MAP_FAILED);
 #ifdef __FreeBSD__
 	ATF_REQUIRE_ERRNO(ENOMEM, mlock(buf, page) != 0);
 #else
 	ATF_REQUIRE(mlock(buf, page) != 0);
 #endif
 	ATF_REQUIRE(munmap(buf, page) == 0);
 }
 
 #ifdef __FreeBSD__
 ATF_TC_CLEANUP(mlock_mmap, tc)
 {
 
 	restore_vm_max_wired();
 }
 #endif
 
 #ifdef __FreeBSD__
 ATF_TC_WITH_CLEANUP(mlock_nested);
 #else
 ATF_TC(mlock_nested);
 #endif
 ATF_TC_HEAD(mlock_nested, tc)
 {
 	atf_tc_set_md_var(tc, "descr",
 	    "Test that consecutive mlock(2) calls succeed");
 #ifdef __FreeBSD__
 	atf_tc_set_md_var(tc, "require.config", "allow_sysctl_side_effects");
 	atf_tc_set_md_var(tc, "require.user", "root");
 #endif
 }
 
 ATF_TC_BODY(mlock_nested, tc)
 {
 	const size_t maxiter = 100;
 	void *buf;
 
 #ifdef __FreeBSD__
 	/* Set max_wired really really high to avoid EAGAIN */
 	set_vm_max_wired(INT_MAX);
 #endif
 
 	buf = malloc(page);
 	ATF_REQUIRE(buf != NULL);
 
 	for (size_t i = 0; i < maxiter; i++)
 		ATF_REQUIRE(mlock(buf, page) == 0);
 
 	ATF_REQUIRE(munlock(buf, page) == 0);
 	free(buf);
 }
 
 #ifdef __FreeBSD__
 ATF_TC_CLEANUP(mlock_nested, tc)
 {
 
 	restore_vm_max_wired();
 }
 #endif
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	page = sysconf(_SC_PAGESIZE);
 	ATF_REQUIRE(page >= 0);
 
 	ATF_TP_ADD_TC(tp, mlock_clip);
 	ATF_TP_ADD_TC(tp, mlock_err);
 	ATF_TP_ADD_TC(tp, mlock_limits);
 	ATF_TP_ADD_TC(tp, mlock_mmap);
 	ATF_TP_ADD_TC(tp, mlock_nested);
 
 	return atf_no_error();
 }
Index: head/contrib/zstd/lib/common/xxhash.c
===================================================================
--- head/contrib/zstd/lib/common/xxhash.c	(revision 322167)
+++ head/contrib/zstd/lib/common/xxhash.c	(revision 322168)
@@ -1,869 +1,869 @@
 /*
 *  xxHash - Fast Hash algorithm
 *  Copyright (C) 2012-2016, Yann Collet
 *
 *  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are
 *  met:
 *
 *  * Redistributions of source code must retain the above copyright
 *  notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above
 *  copyright notice, this list of conditions and the following disclaimer
 *  in the documentation and/or other materials provided with the
 *  distribution.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *  You can contact the author at :
 *  - xxHash homepage: http://www.xxhash.com
 *  - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
 
 
 /* *************************************
 *  Tuning parameters
 ***************************************/
 /*!XXH_FORCE_MEMORY_ACCESS :
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
  * Method 0 (default) : use `memcpy()`. Safe and portable.
  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
  *            It can generate buggy code on targets which do not support unaligned memory accesses.
  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  * See http://stackoverflow.com/a/32095106/646947 for details.
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
 
 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
  * By default, this option is disabled. To enable it, uncomment below define :
  */
 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
 
 /*!XXH_FORCE_NATIVE_FORMAT :
  * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
  * Results are therefore identical for little-endian and big-endian CPU.
  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
  * Should endian-independance be of no importance for your application, you may set the #define below to 1,
  * to improve speed for Big-endian CPU.
  * This option has no impact on Little_Endian CPU.
  */
 #ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
 #  define XXH_FORCE_NATIVE_FORMAT 0
 #endif
 
 /*!XXH_FORCE_ALIGN_CHECK :
  * This is a minor performance trick, only useful with lots of very small keys.
  * It means : check for aligned/unaligned input.
  * The check costs one initial branch per hash; set to 0 when the input data
  * is guaranteed to be aligned.
  */
 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
 #    define XXH_FORCE_ALIGN_CHECK 0
 #  else
 #    define XXH_FORCE_ALIGN_CHECK 1
 #  endif
 #endif
 
 
 /* *************************************
 *  Includes & Memory related functions
 ***************************************/
 /* Modify the local functions below should you wish to use some other memory routines */
 /* for malloc(), free() */
 #include <stdlib.h>
 static void* XXH_malloc(size_t s) { return malloc(s); }
 static void  XXH_free  (void* p)  { free(p); }
 /* for memcpy() */
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
 #ifndef XXH_STATIC_LINKING_ONLY
 #  define XXH_STATIC_LINKING_ONLY
 #endif
 #include "xxhash.h"
 
 
 /* *************************************
 *  Compiler Specific Options
 ***************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
 #  define FORCE_INLINE static __forceinline
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
 #      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
 #      define FORCE_INLINE static inline
 #    endif
 #  else
 #    define FORCE_INLINE static
 #  endif /* __STDC_VERSION__ */
 #endif
 
 
 /* *************************************
 *  Basic Types
 ***************************************/
 #ifndef MEM_MODULE
 # define MEM_MODULE
 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 #   include <stdint.h>
     typedef uint8_t  BYTE;
     typedef uint16_t U16;
     typedef uint32_t U32;
     typedef  int32_t S32;
     typedef uint64_t U64;
 #  else
     typedef unsigned char      BYTE;
     typedef unsigned short     U16;
     typedef unsigned int       U32;
     typedef   signed int       S32;
     typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
 #  endif
 #endif
 
 
 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
 
 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
 
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 /* currently only defined for gcc and icc */
 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
 
 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
 
 #else
 
 /* portable and safe solution. Generally efficient.
  * see : http://stackoverflow.com/a/32095106/646947
  */
 
 static U32 XXH_read32(const void* memPtr)
 {
     U32 val;
     memcpy(&val, memPtr, sizeof(val));
     return val;
 }
 
 static U64 XXH_read64(const void* memPtr)
 {
     U64 val;
     memcpy(&val, memPtr, sizeof(val));
     return val;
 }
 
 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
 
 
 /* ****************************************
 *  Compiler-specific Functions and Macros
 ******************************************/
 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 
 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
 #if defined(_MSC_VER)
 #  define XXH_rotl32(x,r) _rotl(x,r)
 #  define XXH_rotl64(x,r) _rotl64(x,r)
 #else
 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
 #endif
 
 #if defined(_MSC_VER)     /* Visual Studio */
 #  define XXH_swap32 _byteswap_ulong
 #  define XXH_swap64 _byteswap_uint64
-#elif (GCC_VERSION >= 403 && !defined(__riscv__))
+#elif (GCC_VERSION >= 403 && !defined(__riscv))
 #  define XXH_swap32 __builtin_bswap32
 #  define XXH_swap64 __builtin_bswap64
 #else
 static U32 XXH_swap32 (U32 x)
 {
     return  ((x << 24) & 0xff000000 ) |
             ((x <<  8) & 0x00ff0000 ) |
             ((x >>  8) & 0x0000ff00 ) |
             ((x >> 24) & 0x000000ff );
 }
 static U64 XXH_swap64 (U64 x)
 {
     return  ((x << 56) & 0xff00000000000000ULL) |
             ((x << 40) & 0x00ff000000000000ULL) |
             ((x << 24) & 0x0000ff0000000000ULL) |
             ((x << 8)  & 0x000000ff00000000ULL) |
             ((x >> 8)  & 0x00000000ff000000ULL) |
             ((x >> 24) & 0x0000000000ff0000ULL) |
             ((x >> 40) & 0x000000000000ff00ULL) |
             ((x >> 56) & 0x00000000000000ffULL);
 }
 #endif
 
 
 /* *************************************
 *  Architecture Macros
 ***************************************/
 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 
 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
     static const int g_one = 1;
 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
 #endif
 
 
 /* ***************************
 *  Memory reads
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
 
 FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
     else
         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
 }
 
 FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
 
 static U32 XXH_readBE32(const void* ptr)
 {
     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
 }
 
 FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
     else
         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
 }
 
 FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
 }
 
 static U64 XXH_readBE64(const void* ptr)
 {
     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
 }
 
 
 /* *************************************
 *  Macros
 ***************************************/
 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
 
 
 /* *************************************
 *  Constants
 ***************************************/
 static const U32 PRIME32_1 = 2654435761U;
 static const U32 PRIME32_2 = 2246822519U;
 static const U32 PRIME32_3 = 3266489917U;
 static const U32 PRIME32_4 =  668265263U;
 static const U32 PRIME32_5 =  374761393U;
 
 static const U64 PRIME64_1 = 11400714785074694791ULL;
 static const U64 PRIME64_2 = 14029467366897019727ULL;
 static const U64 PRIME64_3 =  1609587929392839161ULL;
 static const U64 PRIME64_4 =  9650029242287828579ULL;
 static const U64 PRIME64_5 =  2870177450012600261ULL;
 
 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
 
 /* **************************
 *  Utils
 ****************************/
 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
 {
     memcpy(dstState, srcState, sizeof(*dstState));
 }
 
 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
 {
     memcpy(dstState, srcState, sizeof(*dstState));
 }
 
 
 /* ***************************
 *  Simple Hash Functions
 *****************************/
 
 static U32 XXH32_round(U32 seed, U32 input)
 {
     seed += input * PRIME32_2;
     seed  = XXH_rotl32(seed, 13);
     seed *= PRIME32_1;
     return seed;
 }
 
 FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
     U32 h32;
 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
     if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)16;
     }
 #endif
 
     if (len>=16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = seed + PRIME32_1 + PRIME32_2;
         U32 v2 = seed + PRIME32_2;
         U32 v3 = seed + 0;
         U32 v4 = seed - PRIME32_1;
 
         do {
             v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
             v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
             v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
             v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
         } while (p<=limit);
 
         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
     } else {
         h32  = seed + PRIME32_5;
     }
 
     h32 += (U32) len;
 
     while (p+4<=bEnd) {
         h32 += XXH_get32bits(p) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
         p+=4;
     }
 
     while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
         p++;
     }
 
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
     h32 ^= h32 >> 13;
     h32 *= PRIME32_3;
     h32 ^= h32 >> 16;
 
     return h32;
 }
 
 
 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH32_CREATESTATE_STATIC(state);
     XXH32_reset(state, seed);
     XXH32_update(state, input, len);
     return XXH32_digest(state);
 #else
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if (XXH_FORCE_ALIGN_CHECK) {
         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
                 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
             else
                 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
     }   }
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
     else
         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
 #endif
 }
 
 
 static U64 XXH64_round(U64 acc, U64 input)
 {
     acc += input * PRIME64_2;
     acc  = XXH_rotl64(acc, 31);
     acc *= PRIME64_1;
     return acc;
 }
 
 static U64 XXH64_mergeRound(U64 acc, U64 val)
 {
     val  = XXH64_round(0, val);
     acc ^= val;
     acc  = acc * PRIME64_1 + PRIME64_4;
     return acc;
 }
 
 FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
     U64 h64;
 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
     if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)32;
     }
 #endif
 
     if (len>=32) {
         const BYTE* const limit = bEnd - 32;
         U64 v1 = seed + PRIME64_1 + PRIME64_2;
         U64 v2 = seed + PRIME64_2;
         U64 v3 = seed + 0;
         U64 v4 = seed - PRIME64_1;
 
         do {
             v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
             v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
             v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
             v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
         } while (p<=limit);
 
         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
         h64 = XXH64_mergeRound(h64, v1);
         h64 = XXH64_mergeRound(h64, v2);
         h64 = XXH64_mergeRound(h64, v3);
         h64 = XXH64_mergeRound(h64, v4);
 
     } else {
         h64  = seed + PRIME64_5;
     }
 
     h64 += (U64) len;
 
     while (p+8<=bEnd) {
         U64 const k1 = XXH64_round(0, XXH_get64bits(p));
         h64 ^= k1;
         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
         p+=8;
     }
 
     if (p+4<=bEnd) {
         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
         p+=4;
     }
 
     while (p<bEnd) {
         h64 ^= (*p) * PRIME64_5;
         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
         p++;
     }
 
     h64 ^= h64 >> 33;
     h64 *= PRIME64_2;
     h64 ^= h64 >> 29;
     h64 *= PRIME64_3;
     h64 ^= h64 >> 32;
 
     return h64;
 }
 
 
 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH64_CREATESTATE_STATIC(state);
     XXH64_reset(state, seed);
     XXH64_update(state, input, len);
     return XXH64_digest(state);
 #else
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if (XXH_FORCE_ALIGN_CHECK) {
         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
                 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
             else
                 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
     }   }
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
     else
         return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
 #endif
 }
 
 
 /* **************************************************
 *  Advanced Hash Functions
 ****************************************************/
 
 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
 
 /*** Hash feed ***/
 
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
     memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
     state.v1 = seed + PRIME32_1 + PRIME32_2;
     state.v2 = seed + PRIME32_2;
     state.v3 = seed + 0;
     state.v4 = seed - PRIME32_1;
     memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
 
 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
 {
     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
     memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
     state.v1 = seed + PRIME64_1 + PRIME64_2;
     state.v2 = seed + PRIME64_2;
     state.v3 = seed + 0;
     state.v4 = seed - PRIME64_1;
     memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
 
 FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
     if (input==NULL) return XXH_ERROR;
 #endif
 
     state->total_len_32 += (unsigned)len;
     state->large_len |= (len>=16) | (state->total_len_32>=16);
 
     if (state->memsize + len < 16)  {   /* fill in tmp buffer */
         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
         state->memsize += (unsigned)len;
         return XXH_OK;
     }
 
     if (state->memsize) {   /* some data left from previous update */
         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
         {   const U32* p32 = state->mem32;
             state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
             state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
             state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
             state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
         }
         p += 16-state->memsize;
         state->memsize = 0;
     }
 
     if (p <= bEnd-16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = state->v1;
         U32 v2 = state->v2;
         U32 v3 = state->v3;
         U32 v4 = state->v4;
 
         do {
             v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
             v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
             v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
             v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
         } while (p<=limit);
 
         state->v1 = v1;
         state->v2 = v2;
         state->v3 = v3;
         state->v4 = v4;
     }
 
     if (p < bEnd) {
         XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
         state->memsize = (unsigned)(bEnd-p);
     }
 
     return XXH_OK;
 }
 
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
     else
         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
 }
 
 
 
 FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem32;
     const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
 
     if (state->large_len) {
         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
     } else {
         h32 = state->v3 /* == seed */ + PRIME32_5;
     }
 
     h32 += state->total_len_32;
 
     while (p+4<=bEnd) {
         h32 += XXH_readLE32(p, endian) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
         p+=4;
     }
 
     while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
         h32  = XXH_rotl32(h32, 11) * PRIME32_1;
         p++;
     }
 
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
     h32 ^= h32 >> 13;
     h32 *= PRIME32_3;
     h32 ^= h32 >> 16;
 
     return h32;
 }
 
 
 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_digest_endian(state_in, XXH_littleEndian);
     else
         return XXH32_digest_endian(state_in, XXH_bigEndian);
 }
 
 
 
 /* **** XXH64 **** */
 
 FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
     if (input==NULL) return XXH_ERROR;
 #endif
 
     state->total_len += len;
 
     if (state->memsize + len < 32) {  /* fill in tmp buffer */
         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
         state->memsize += (U32)len;
         return XXH_OK;
     }
 
     if (state->memsize) {   /* tmp buffer is full */
         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
         state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
         state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
         state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
         state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
         p += 32-state->memsize;
         state->memsize = 0;
     }
 
     if (p+32 <= bEnd) {
         const BYTE* const limit = bEnd - 32;
         U64 v1 = state->v1;
         U64 v2 = state->v2;
         U64 v3 = state->v3;
         U64 v4 = state->v4;
 
         do {
             v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
             v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
             v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
             v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
         } while (p<=limit);
 
         state->v1 = v1;
         state->v2 = v2;
         state->v3 = v3;
         state->v4 = v4;
     }
 
     if (p < bEnd) {
         XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
         state->memsize = (unsigned)(bEnd-p);
     }
 
     return XXH_OK;
 }
 
 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
     else
         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
 }
 
 
 
 FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem64;
     const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
     U64 h64;
 
     if (state->total_len >= 32) {
         U64 const v1 = state->v1;
         U64 const v2 = state->v2;
         U64 const v3 = state->v3;
         U64 const v4 = state->v4;
 
         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
         h64 = XXH64_mergeRound(h64, v1);
         h64 = XXH64_mergeRound(h64, v2);
         h64 = XXH64_mergeRound(h64, v3);
         h64 = XXH64_mergeRound(h64, v4);
     } else {
         h64  = state->v3 + PRIME64_5;
     }
 
     h64 += (U64) state->total_len;
 
     while (p+8<=bEnd) {
         U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
         h64 ^= k1;
         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
         p+=8;
     }
 
     if (p+4<=bEnd) {
         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
         h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
         p+=4;
     }
 
     while (p<bEnd) {
         h64 ^= (*p) * PRIME64_5;
         h64  = XXH_rotl64(h64, 11) * PRIME64_1;
         p++;
     }
 
     h64 ^= h64 >> 33;
     h64 *= PRIME64_2;
     h64 ^= h64 >> 29;
     h64 *= PRIME64_3;
     h64 ^= h64 >> 32;
 
     return h64;
 }
 
 
 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH64_digest_endian(state_in, XXH_littleEndian);
     else
         return XXH64_digest_endian(state_in, XXH_bigEndian);
 }
 
 
 /* **************************
 *  Canonical representation
 ****************************/
 
 /*! Default XXH result types are basic unsigned 32 and 64 bits.
 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
 *   These functions allow transformation of hash result into and from its canonical format.
 *   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
 */
 
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
     memcpy(dst, &hash, sizeof(*dst));
 }
 
 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
     memcpy(dst, &hash, sizeof(*dst));
 }
 
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
 {
     return XXH_readBE32(src);
 }
 
 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
 {
     return XXH_readBE64(src);
 }
Index: head/lib/libc/gen/tls.c
===================================================================
--- head/lib/libc/gen/tls.c	(revision 322167)
+++ head/lib/libc/gen/tls.c	(revision 322168)
@@ -1,326 +1,326 @@
 /*-
  * Copyright (c) 2004 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$FreeBSD$
  */
 
 /*
  * Define stubs for TLS internals so that programs and libraries can
  * link. These functions will be replaced by functional versions at
  * runtime from ld-elf.so.1.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <stdlib.h>
 #include <string.h>
 #include <elf.h>
 
 #include "libc_private.h"
 
 /* Provided by jemalloc to avoid bootstrapping issues. */
 void	*__je_bootstrap_malloc(size_t size);
 void	*__je_bootstrap_calloc(size_t num, size_t size);
 void	__je_bootstrap_free(void *ptr);
 
 __weak_reference(__libc_allocate_tls, _rtld_allocate_tls);
 __weak_reference(__libc_free_tls, _rtld_free_tls);
 
 #ifdef __i386__
 
 __weak_reference(___libc_tls_get_addr, ___tls_get_addr);
 __attribute__((__regparm__(1))) void * ___libc_tls_get_addr(void *);
 
 #endif
 
 void * __libc_tls_get_addr(void *);
 __weak_reference(__libc_tls_get_addr, __tls_get_addr);
 
 void *_rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign);
 void _rtld_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
 void *__libc_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign);
 void __libc_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
 
 #if defined(__amd64__)
 #define TLS_TCB_ALIGN 16
 #elif defined(__aarch64__) || defined(__arm__) || defined(__i386__) || \
-    defined(__mips__) || defined(__powerpc__) || defined(__riscv__) || \
+    defined(__mips__) || defined(__powerpc__) || defined(__riscv) || \
     defined(__sparc64__)
 #define TLS_TCB_ALIGN sizeof(void *)
 #else
 #error TLS_TCB_ALIGN undefined for target architecture
 #endif
 
 #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \
-    defined(__powerpc__) || defined(__riscv__)
+    defined(__powerpc__) || defined(__riscv)
 #define TLS_VARIANT_I
 #endif
 #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__)
 #define TLS_VARIANT_II
 #endif
 
 #ifndef PIC
 
 static size_t tls_static_space;
 static size_t tls_init_size;
 static void *tls_init;
 #endif
 
 #ifdef __i386__
 
 /* GNU ABI */
 
 __attribute__((__regparm__(1)))
 void *
 ___libc_tls_get_addr(void *ti __unused)
 {
 	return (0);
 }
 
 #endif
 
 void *
 __libc_tls_get_addr(void *ti __unused)
 {
 	return (0);
 }
 
 #ifndef PIC
 
 #ifdef TLS_VARIANT_I
 
 #define	TLS_TCB_SIZE	(2 * sizeof(void *))
 
 /*
  * Free Static TLS using the Variant I method.
  */
 void
 __libc_free_tls(void *tcb, size_t tcbsize, size_t tcbalign __unused)
 {
 	Elf_Addr *dtv;
 	Elf_Addr **tls;
 
 	tls = (Elf_Addr **)((Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE);
 	dtv = tls[0];
 	__je_bootstrap_free(dtv);
 	__je_bootstrap_free(tcb);
 }
 
 /*
  * Allocate Static TLS using the Variant I method.
  */
 void *
 __libc_allocate_tls(void *oldtcb, size_t tcbsize, size_t tcbalign __unused)
 {
 	Elf_Addr *dtv;
 	Elf_Addr **tls;
 	char *tcb;
 
 	if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE)
 		return (oldtcb);
 
 	tcb = __je_bootstrap_calloc(1, tls_static_space + tcbsize - TLS_TCB_SIZE);
 	tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE);
 
 	if (oldtcb != NULL) {
 		memcpy(tls, oldtcb, tls_static_space);
 		__je_bootstrap_free(oldtcb);
 
 		/* Adjust the DTV. */
 		dtv = tls[0];
 		dtv[2] = (Elf_Addr)tls + TLS_TCB_SIZE;
 	} else {
 		dtv = __je_bootstrap_malloc(3 * sizeof(Elf_Addr));
 		tls[0] = dtv;
 		dtv[0] = 1;
 		dtv[1] = 1;
 		dtv[2] = (Elf_Addr)tls + TLS_TCB_SIZE;
 
 		if (tls_init_size > 0)
 			memcpy((void*)dtv[2], tls_init, tls_init_size);
 		if (tls_static_space > tls_init_size)
 			memset((void*)(dtv[2] + tls_init_size), 0,
 			    tls_static_space - tls_init_size);
 	}
 
 	return(tcb); 
 }
 
 #endif
 
 #ifdef TLS_VARIANT_II
 
 #define	TLS_TCB_SIZE	(3 * sizeof(Elf_Addr))
 
 /*
  * Free Static TLS using the Variant II method.
  */
 void
 __libc_free_tls(void *tcb, size_t tcbsize __unused, size_t tcbalign)
 {
 	size_t size;
 	Elf_Addr* dtv;
 	Elf_Addr tlsstart, tlsend;
 
 	/*
 	 * Figure out the size of the initial TLS block so that we can
 	 * find stuff which ___tls_get_addr() allocated dynamically.
 	 */
 	size = roundup2(tls_static_space, tcbalign);
 
 	dtv = ((Elf_Addr**)tcb)[1];
 	tlsend = (Elf_Addr) tcb;
 	tlsstart = tlsend - size;
 	__je_bootstrap_free((void*) tlsstart);
 	__je_bootstrap_free(dtv);
 }
 
 /*
  * Allocate Static TLS using the Variant II method.
  */
 void *
 __libc_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign)
 {
 	size_t size;
 	char *tls;
 	Elf_Addr *dtv;
 	Elf_Addr segbase, oldsegbase;
 
 	size = roundup2(tls_static_space, tcbalign);
 
 	if (tcbsize < 2 * sizeof(Elf_Addr))
 		tcbsize = 2 * sizeof(Elf_Addr);
 	tls = __je_bootstrap_calloc(1, size + tcbsize);
 	dtv = __je_bootstrap_malloc(3 * sizeof(Elf_Addr));
 
 	segbase = (Elf_Addr)(tls + size);
 	((Elf_Addr*)segbase)[0] = segbase;
 	((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv;
 
 	dtv[0] = 1;
 	dtv[1] = 1;
 	dtv[2] = segbase - tls_static_space;
 
 	if (oldtls) {
 		/*
 		 * Copy the static TLS block over whole.
 		 */
 		oldsegbase = (Elf_Addr) oldtls;
 		memcpy((void *)(segbase - tls_static_space),
 		    (const void *)(oldsegbase - tls_static_space),
 		    tls_static_space);
 
 		/*
 		 * We assume that this block was the one we created with
 		 * allocate_initial_tls().
 		 */
 		_rtld_free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr));
 	} else {
 		memcpy((void *)(segbase - tls_static_space),
 		    tls_init, tls_init_size);
 		memset((void *)(segbase - tls_static_space + tls_init_size),
 		    0, tls_static_space - tls_init_size);
 	}
 
 	return (void*) segbase;
 }
 
 #endif /* TLS_VARIANT_II */
 
 #else
 
 void *
 __libc_allocate_tls(void *oldtls __unused, size_t tcbsize __unused,
 	size_t tcbalign __unused)
 {
 	return (0);
 }
 
 void
 __libc_free_tls(void *tcb __unused, size_t tcbsize __unused,
 	size_t tcbalign __unused)
 {
 }
 
 #endif /* PIC */
 
 extern char **environ;
 
 void
 _init_tls(void)
 {
 #ifndef PIC
 	Elf_Addr *sp;
 	Elf_Auxinfo *aux, *auxp;
 	Elf_Phdr *phdr;
 	size_t phent, phnum;
 	int i;
 	void *tls;
 
 	sp = (Elf_Addr *) environ;
 	while (*sp++ != 0)
 		;
 	aux = (Elf_Auxinfo *) sp;
 	phdr = NULL;
 	phent = phnum = 0;
 	for (auxp = aux; auxp->a_type != AT_NULL; auxp++) {
 		switch (auxp->a_type) {
 		case AT_PHDR:
 			phdr = auxp->a_un.a_ptr;
 			break;
 
 		case AT_PHENT:
 			phent = auxp->a_un.a_val;
 			break;
 
 		case AT_PHNUM:
 			phnum = auxp->a_un.a_val;
 			break;
 		}
 	}
 	if (phdr == NULL || phent != sizeof(Elf_Phdr) || phnum == 0)
 		return;
 
 	for (i = 0; (unsigned) i < phnum; i++) {
 		if (phdr[i].p_type == PT_TLS) {
 			tls_static_space = roundup2(phdr[i].p_memsz,
 			    phdr[i].p_align);
 			tls_init_size = phdr[i].p_filesz;
 			tls_init = (void*) phdr[i].p_vaddr;
 		}
 	}
 
 #ifdef TLS_VARIANT_I
 	/*
 	 * tls_static_space should include space for TLS structure
 	 */
 	tls_static_space += TLS_TCB_SIZE;
 #endif
 
 	tls = _rtld_allocate_tls(NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN);
 
 	_set_tp(tls);
 #endif
 }
Index: head/lib/libproc/proc_bkpt.c
===================================================================
--- head/lib/libproc/proc_bkpt.c	(revision 322167)
+++ head/lib/libproc/proc_bkpt.c	(revision 322168)
@@ -1,272 +1,272 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Rui Paulo under sponsorship from the
  * FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 
 #include <assert.h>
 #include <err.h>
 #include <errno.h>
 #include <signal.h>
 #include <stdio.h>
 
 #include "_libproc.h"
 
 #if defined(__aarch64__)
 #define	AARCH64_BRK		0xd4200000
 #define	AARCH64_BRK_IMM16_SHIFT	5
 #define	AARCH64_BRK_IMM16_VAL	(0xd << AARCH64_BRK_IMM16_SHIFT)
 #define	BREAKPOINT_INSTR	(AARCH64_BRK | AARCH64_BRK_IMM16_VAL)
 #define	BREAKPOINT_INSTR_SZ	4
 #elif defined(__amd64__) || defined(__i386__)
 #define	BREAKPOINT_INSTR	0xcc	/* int 0x3 */
 #define	BREAKPOINT_INSTR_SZ	1
 #define	BREAKPOINT_ADJUST_SZ	BREAKPOINT_INSTR_SZ
 #elif defined(__arm__)
 #define	BREAKPOINT_INSTR	0xe7ffffff	/* bkpt */
 #define	BREAKPOINT_INSTR_SZ	4
 #elif defined(__mips__)
 #define	BREAKPOINT_INSTR	0xd	/* break */
 #define	BREAKPOINT_INSTR_SZ	4
 #elif defined(__powerpc__)
 #define	BREAKPOINT_INSTR	0x7fe00008	/* trap */
 #define	BREAKPOINT_INSTR_SZ	4
-#elif defined(__riscv__)
+#elif defined(__riscv)
 #define	BREAKPOINT_INSTR	0x00100073	/* sbreak */
 #define	BREAKPOINT_INSTR_SZ	4
 #else
 #error "Add support for your architecture"
 #endif
 
 /*
  * Use 4-bytes holder for breakpoint instruction on all the platforms.
  * Works for x86 as well until it is endian-little platform.
  * (We are coping one byte only on x86 from this 4-bytes piece of
  * memory).
  */
 typedef uint32_t instr_t;
 
 static int
 proc_stop(struct proc_handle *phdl)
 {
 	int status;
 
 	if (kill(proc_getpid(phdl), SIGSTOP) == -1) {
 		DPRINTF("kill %d", proc_getpid(phdl));
 		return (-1);
 	} else if (waitpid(proc_getpid(phdl), &status, WSTOPPED) == -1) {
 		DPRINTF("waitpid %d", proc_getpid(phdl));
 		return (-1);
 	} else if (!WIFSTOPPED(status)) {
 		DPRINTFX("waitpid: unexpected status 0x%x", status);
 		return (-1);
 	}
 
 	return (0);
 }
 
 int
 proc_bkptset(struct proc_handle *phdl, uintptr_t address,
     unsigned long *saved)
 {
 	struct ptrace_io_desc piod;
 	unsigned long caddr;
 	int ret = 0, stopped;
 	instr_t instr;
 
 	*saved = 0;
 	if (phdl->status == PS_DEAD || phdl->status == PS_UNDEAD ||
 	    phdl->status == PS_IDLE) {
 		errno = ENOENT;
 		return (-1);
 	}
 
 	DPRINTFX("adding breakpoint at 0x%lx", address);
 
 	stopped = 0;
 	if (phdl->status != PS_STOP) {
 		if (proc_stop(phdl) != 0)
 			return (-1);
 		stopped = 1;
 	}
 
 	/*
 	 * Read the original instruction.
 	 */
 	caddr = address;
 	instr = 0;
 	piod.piod_op = PIOD_READ_I;
 	piod.piod_offs = (void *)caddr;
 	piod.piod_addr = &instr;
 	piod.piod_len  = BREAKPOINT_INSTR_SZ;
 	if (ptrace(PT_IO, proc_getpid(phdl), (caddr_t)&piod, 0) < 0) {
 		DPRINTF("ERROR: couldn't read instruction at address 0x%jx",
 		    (uintmax_t)address);
 		ret = -1;
 		goto done;
 	}
 	*saved = instr;
 	/*
 	 * Write a breakpoint instruction to that address.
 	 */
 	caddr = address;
 	instr = BREAKPOINT_INSTR;
 	piod.piod_op = PIOD_WRITE_I;
 	piod.piod_offs = (void *)caddr;
 	piod.piod_addr = &instr;
 	piod.piod_len  = BREAKPOINT_INSTR_SZ;
 	if (ptrace(PT_IO, proc_getpid(phdl), (caddr_t)&piod, 0) < 0) {
 		DPRINTF("ERROR: couldn't write instruction at address 0x%jx",
 		    (uintmax_t)address);
 		ret = -1;
 		goto done;
 	}
 
 done:
 	if (stopped)
 		/* Restart the process if we had to stop it. */
 		proc_continue(phdl);
 
 	return (ret);
 }
 
 int
 proc_bkptdel(struct proc_handle *phdl, uintptr_t address,
     unsigned long saved)
 {
 	struct ptrace_io_desc piod;
 	unsigned long caddr;
 	int ret = 0, stopped;
 	instr_t instr;
 
 	if (phdl->status == PS_DEAD || phdl->status == PS_UNDEAD ||
 	    phdl->status == PS_IDLE) {
 		errno = ENOENT;
 		return (-1);
 	}
 
 	DPRINTFX("removing breakpoint at 0x%lx", address);
 
 	stopped = 0;
 	if (phdl->status != PS_STOP) {
 		if (proc_stop(phdl) != 0)
 			return (-1);
 		stopped = 1;
 	}
 
 	/*
 	 * Overwrite the breakpoint instruction that we setup previously.
 	 */
 	caddr = address;
 	instr = saved;
 	piod.piod_op = PIOD_WRITE_I;
 	piod.piod_offs = (void *)caddr;
 	piod.piod_addr = &instr;
 	piod.piod_len  = BREAKPOINT_INSTR_SZ;
 	if (ptrace(PT_IO, proc_getpid(phdl), (caddr_t)&piod, 0) < 0) {
 		DPRINTF("ERROR: couldn't write instruction at address 0x%jx",
 		    (uintmax_t)address);
 		ret = -1;
 	}
 
 	if (stopped)
 		/* Restart the process if we had to stop it. */
 		proc_continue(phdl);
 
 	return (ret);
 }
 
 /*
  * Decrement pc so that we delete the breakpoint at the correct
  * address, i.e. at the BREAKPOINT_INSTR address.
  *
  * This is only needed on some architectures where the pc value
  * when reading registers points at the instruction after the
  * breakpoint, e.g. x86.
  */
 void
 proc_bkptregadj(unsigned long *pc)
 {
 
 	(void)pc;
 #ifdef BREAKPOINT_ADJUST_SZ
 	*pc = *pc - BREAKPOINT_ADJUST_SZ;
 #endif
 }
 
 /*
  * Step over the breakpoint.
  */
 int
 proc_bkptexec(struct proc_handle *phdl, unsigned long saved)
 {
 	unsigned long pc;
 	unsigned long samesaved;
 	int status;
 
 	if (proc_regget(phdl, REG_PC, &pc) < 0) {
 		DPRINTFX("ERROR: couldn't get PC register");
 		return (-1);
 	}
 	proc_bkptregadj(&pc);
 	if (proc_bkptdel(phdl, pc, saved) < 0) {
 		DPRINTFX("ERROR: couldn't delete breakpoint");
 		return (-1);
 	}
 	/*
 	 * Go back in time and step over the new instruction just
 	 * set up by proc_bkptdel().
 	 */
 	proc_regset(phdl, REG_PC, pc);
 	if (ptrace(PT_STEP, proc_getpid(phdl), (caddr_t)1, 0) < 0) {
 		DPRINTFX("ERROR: ptrace step failed");
 		return (-1);
 	}
 	proc_wstatus(phdl);
 	status = proc_getwstat(phdl);
 	if (!WIFSTOPPED(status)) {
 		DPRINTFX("ERROR: don't know why process stopped");
 		return (-1);
 	}
 	/*
 	 * Restore the breakpoint. The saved instruction should be
 	 * the same as the one that we were passed in.
 	 */
 	if (proc_bkptset(phdl, pc, &samesaved) < 0) {
 		DPRINTFX("ERROR: couldn't restore breakpoint");
 		return (-1);
 	}
 	assert(samesaved == saved);
 
 	return (0);
 }
Index: head/lib/libproc/proc_regs.c
===================================================================
--- head/lib/libproc/proc_regs.c	(revision 322167)
+++ head/lib/libproc/proc_regs.c	(revision 322168)
@@ -1,154 +1,154 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Rui Paulo under sponsorship from the
  * FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/ptrace.h>
 
 #include <err.h>
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
 
 #include "_libproc.h"
 
 int
 proc_regget(struct proc_handle *phdl, proc_reg_t reg, unsigned long *regvalue)
 {
 	struct reg regs;
 
 	if (phdl->status == PS_DEAD || phdl->status == PS_UNDEAD ||
 	    phdl->status == PS_IDLE) {
 		errno = ENOENT;
 		return (-1);
 	}
 	memset(&regs, 0, sizeof(regs));
 	if (ptrace(PT_GETREGS, proc_getpid(phdl), (caddr_t)&regs, 0) < 0)
 		return (-1);
 	switch (reg) {
 	case REG_PC:
 #if defined(__aarch64__)
 		*regvalue = regs.elr;
 #elif defined(__amd64__)
 		*regvalue = regs.r_rip;
 #elif defined(__arm__)
 		*regvalue = regs.r_pc;
 #elif defined(__i386__)
 		*regvalue = regs.r_eip;
 #elif defined(__mips__)
 		*regvalue = regs.r_regs[PC];
 #elif defined(__powerpc__)
 		*regvalue = regs.pc;
-#elif defined(__riscv__)
+#elif defined(__riscv)
 		*regvalue = regs.sepc;
 #endif
 		break;
 	case REG_SP:
 #if defined(__aarch64__)
 		*regvalue = regs.sp;
 #elif defined(__amd64__)
 		*regvalue = regs.r_rsp;
 #elif defined(__arm__)
 		*regvalue = regs.r_sp;
 #elif defined(__i386__)
 		*regvalue = regs.r_esp;
 #elif defined(__mips__)
 		*regvalue = regs.r_regs[SP];
 #elif defined(__powerpc__)
 		*regvalue = regs.fixreg[1];
-#elif defined(__riscv__)
+#elif defined(__riscv)
 		*regvalue = regs.sp;
 #endif
 		break;
 	default:
 		DPRINTFX("ERROR: no support for reg number %d", reg);
 		return (-1);
 	}
 
 	return (0);
 }
 
 int
 proc_regset(struct proc_handle *phdl, proc_reg_t reg, unsigned long regvalue)
 {
 	struct reg regs;
 
 	if (phdl->status == PS_DEAD || phdl->status == PS_UNDEAD ||
 	    phdl->status == PS_IDLE) {
 		errno = ENOENT;
 		return (-1);
 	}
 	if (ptrace(PT_GETREGS, proc_getpid(phdl), (caddr_t)&regs, 0) < 0)
 		return (-1);
 	switch (reg) {
 	case REG_PC:
 #if defined(__aarch64__)
 		regs.elr = regvalue;
 #elif defined(__amd64__)
 		regs.r_rip = regvalue;
 #elif defined(__arm__)
 		regs.r_pc = regvalue;
 #elif defined(__i386__)
 		regs.r_eip = regvalue;
 #elif defined(__mips__)
 		regs.r_regs[PC] = regvalue;
 #elif defined(__powerpc__)
 		regs.pc = regvalue;
-#elif defined(__riscv__)
+#elif defined(__riscv)
 		regs.sepc = regvalue;
 #endif
 		break;
 	case REG_SP:
 #if defined(__aarch64__)
 		regs.sp = regvalue;
 #elif defined(__amd64__)
 		regs.r_rsp = regvalue;
 #elif defined(__arm__)
 		regs.r_sp = regvalue;
 #elif defined(__i386__)
 		regs.r_esp = regvalue;
 #elif defined(__mips__)
 		regs.r_regs[PC] = regvalue;
 #elif defined(__powerpc__)
 		regs.fixreg[1] = regvalue;
-#elif defined(__riscv__)
+#elif defined(__riscv)
 		regs.sp = regvalue;
 #endif
 		break;
 	default:
 		DPRINTFX("ERROR: no support for reg number %d", reg);
 		return (-1);
 	}
 	if (ptrace(PT_SETREGS, proc_getpid(phdl), (caddr_t)&regs, 0) < 0)
 		return (-1);
 
 	return (0);
 }
Index: head/libexec/rtld-elf/rtld.c
===================================================================
--- head/libexec/rtld-elf/rtld.c	(revision 322167)
+++ head/libexec/rtld-elf/rtld.c	(revision 322168)
@@ -1,5513 +1,5513 @@
 /*-
  * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra.
  * Copyright 2003 Alexander Kabaev <kan@FreeBSD.ORG>.
  * Copyright 2009-2013 Konstantin Belousov <kib@FreeBSD.ORG>.
  * Copyright 2012 John Marino <draco@marino.st>.
  * Copyright 2014-2017 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Dynamic linker for ELF.
  *
  * John Polstra <jdp@polstra.com>.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/utsname.h>
 #include <sys/ktrace.h>
 
 #include <dlfcn.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "debug.h"
 #include "rtld.h"
 #include "libmap.h"
 #include "paths.h"
 #include "rtld_tls.h"
 #include "rtld_printf.h"
 #include "rtld_utrace.h"
 #include "notes.h"
 
 /* Types. */
 typedef void (*func_ptr_type)();
 typedef void * (*path_enum_proc) (const char *path, size_t len, void *arg);
 
 /*
  * Function declarations.
  */
 static const char *basename(const char *);
 static void digest_dynamic1(Obj_Entry *, int, const Elf_Dyn **,
     const Elf_Dyn **, const Elf_Dyn **);
 static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *,
     const Elf_Dyn *);
 static void digest_dynamic(Obj_Entry *, int);
 static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *);
 static Obj_Entry *dlcheck(void *);
 static int dlclose_locked(void *, RtldLockState *);
 static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj,
     int lo_flags, int mode, RtldLockState *lockstate);
 static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int);
 static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *);
 static bool donelist_check(DoneList *, const Obj_Entry *);
 static void errmsg_restore(char *);
 static char *errmsg_save(void);
 static void *fill_search_info(const char *, size_t, void *);
 static char *find_library(const char *, const Obj_Entry *, int *);
 static const char *gethints(bool);
 static void hold_object(Obj_Entry *);
 static void unhold_object(Obj_Entry *);
 static void init_dag(Obj_Entry *);
 static void init_marker(Obj_Entry *);
 static void init_pagesizes(Elf_Auxinfo **aux_info);
 static void init_rtld(caddr_t, Elf_Auxinfo **);
 static void initlist_add_neededs(Needed_Entry *, Objlist *);
 static void initlist_add_objects(Obj_Entry *, Obj_Entry *, Objlist *);
 static void linkmap_add(Obj_Entry *);
 static void linkmap_delete(Obj_Entry *);
 static void load_filtees(Obj_Entry *, int flags, RtldLockState *);
 static void unload_filtees(Obj_Entry *, RtldLockState *);
 static int load_needed_objects(Obj_Entry *, int);
 static int load_preload_objects(void);
 static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int);
 static void map_stacks_exec(RtldLockState *);
 static int obj_enforce_relro(Obj_Entry *);
 static Obj_Entry *obj_from_addr(const void *);
 static void objlist_call_fini(Objlist *, Obj_Entry *, RtldLockState *);
 static void objlist_call_init(Objlist *, RtldLockState *);
 static void objlist_clear(Objlist *);
 static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *);
 static void objlist_init(Objlist *);
 static void objlist_push_head(Objlist *, Obj_Entry *);
 static void objlist_push_tail(Objlist *, Obj_Entry *);
 static void objlist_put_after(Objlist *, Obj_Entry *, Obj_Entry *);
 static void objlist_remove(Objlist *, Obj_Entry *);
 static int open_binary_fd(const char *argv0, bool search_in_path);
 static int parse_args(char* argv[], int argc, bool *use_pathp, int *fdp);
 static int parse_integer(const char *);
 static void *path_enumerate(const char *, path_enum_proc, void *);
 static void print_usage(const char *argv0);
 static void release_object(Obj_Entry *);
 static int relocate_object_dag(Obj_Entry *root, bool bind_now,
     Obj_Entry *rtldobj, int flags, RtldLockState *lockstate);
 static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate);
 static int relocate_objects(Obj_Entry *, bool, Obj_Entry *, int,
     RtldLockState *);
 static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now,
     int flags, RtldLockState *lockstate);
 static int rtld_dirname(const char *, char *);
 static int rtld_dirname_abs(const char *, char *);
 static void *rtld_dlopen(const char *name, int fd, int mode);
 static void rtld_exit(void);
 static char *search_library_path(const char *, const char *);
 static char *search_library_pathfds(const char *, const char *, int *);
 static const void **get_program_var_addr(const char *, RtldLockState *);
 static void set_program_var(const char *, const void *);
 static int symlook_default(SymLook *, const Obj_Entry *refobj);
 static int symlook_global(SymLook *, DoneList *);
 static void symlook_init_from_req(SymLook *, const SymLook *);
 static int symlook_list(SymLook *, const Objlist *, DoneList *);
 static int symlook_needed(SymLook *, const Needed_Entry *, DoneList *);
 static int symlook_obj1_sysv(SymLook *, const Obj_Entry *);
 static int symlook_obj1_gnu(SymLook *, const Obj_Entry *);
 static void trace_loaded_objects(Obj_Entry *);
 static void unlink_object(Obj_Entry *);
 static void unload_object(Obj_Entry *, RtldLockState *lockstate);
 static void unref_dag(Obj_Entry *);
 static void ref_dag(Obj_Entry *);
 static char *origin_subst_one(Obj_Entry *, char *, const char *,
     const char *, bool);
 static char *origin_subst(Obj_Entry *, char *);
 static bool obj_resolve_origin(Obj_Entry *obj);
 static void preinit_main(void);
 static int  rtld_verify_versions(const Objlist *);
 static int  rtld_verify_object_versions(Obj_Entry *);
 static void object_add_name(Obj_Entry *, const char *);
 static int  object_match_name(const Obj_Entry *, const char *);
 static void ld_utrace_log(int, void *, void *, size_t, int, const char *);
 static void rtld_fill_dl_phdr_info(const Obj_Entry *obj,
     struct dl_phdr_info *phdr_info);
 static uint32_t gnu_hash(const char *);
 static bool matched_symbol(SymLook *, const Obj_Entry *, Sym_Match_Result *,
     const unsigned long);
 
 void r_debug_state(struct r_debug *, struct link_map *) __noinline __exported;
 void _r_debug_postinit(struct link_map *) __noinline __exported;
 
 int __sys_openat(int, const char *, int, ...);
 
 /*
  * Data declarations.
  */
 static char *error_message;	/* Message for dlerror(), or NULL */
 struct r_debug r_debug __exported;	/* for GDB; */
 static bool libmap_disable;	/* Disable libmap */
 static bool ld_loadfltr;	/* Immediate filters processing */
 static char *libmap_override;	/* Maps to use in addition to libmap.conf */
 static bool trust;		/* False for setuid and setgid programs */
 static bool dangerous_ld_env;	/* True if environment variables have been
 				   used to affect the libraries loaded */
 bool ld_bind_not;		/* Disable PLT update */
 static char *ld_bind_now;	/* Environment variable for immediate binding */
 static char *ld_debug;		/* Environment variable for debugging */
 static char *ld_library_path;	/* Environment variable for search path */
 static char *ld_library_dirs;	/* Environment variable for library descriptors */
 static char *ld_preload;	/* Environment variable for libraries to
 				   load first */
 static char *ld_elf_hints_path;	/* Environment variable for alternative hints path */
 static char *ld_tracing;	/* Called from ldd to print libs */
 static char *ld_utrace;		/* Use utrace() to log events. */
 static struct obj_entry_q obj_list;	/* Queue of all loaded objects */
 static Obj_Entry *obj_main;	/* The main program shared object */
 static Obj_Entry obj_rtld;	/* The dynamic linker shared object */
 static unsigned int obj_count;	/* Number of objects in obj_list */
 static unsigned int obj_loads;	/* Number of loads of objects (gen count) */
 
 static Objlist list_global =	/* Objects dlopened with RTLD_GLOBAL */
   STAILQ_HEAD_INITIALIZER(list_global);
 static Objlist list_main =	/* Objects loaded at program startup */
   STAILQ_HEAD_INITIALIZER(list_main);
 static Objlist list_fini =	/* Objects needing fini() calls */
   STAILQ_HEAD_INITIALIZER(list_fini);
 
 Elf_Sym sym_zero;		/* For resolving undefined weak refs. */
 
 #define GDB_STATE(s,m)	r_debug.r_state = s; r_debug_state(&r_debug,m);
 
 extern Elf_Dyn _DYNAMIC;
 #pragma weak _DYNAMIC
 
 int dlclose(void *) __exported;
 char *dlerror(void) __exported;
 void *dlopen(const char *, int) __exported;
 void *fdlopen(int, int) __exported;
 void *dlsym(void *, const char *) __exported;
 dlfunc_t dlfunc(void *, const char *) __exported;
 void *dlvsym(void *, const char *, const char *) __exported;
 int dladdr(const void *, Dl_info *) __exported;
 void dllockinit(void *, void *(*)(void *), void (*)(void *), void (*)(void *),
     void (*)(void *), void (*)(void *), void (*)(void *)) __exported;
 int dlinfo(void *, int , void *) __exported;
 int dl_iterate_phdr(__dl_iterate_hdr_callback, void *) __exported;
 int _rtld_addr_phdr(const void *, struct dl_phdr_info *) __exported;
 int _rtld_get_stack_prot(void) __exported;
 int _rtld_is_dlopened(void *) __exported;
 void _rtld_error(const char *, ...) __exported;
 
 int npagesizes, osreldate;
 size_t *pagesizes;
 
 long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 static int stack_prot = PROT_READ | PROT_WRITE | RTLD_DEFAULT_STACK_EXEC;
 static int max_stack_flags;
 
 /*
  * Global declarations normally provided by crt1.  The dynamic linker is
  * not built with crt1, so we have to provide them ourselves.
  */
 char *__progname;
 char **environ;
 
 /*
  * Used to pass argc, argv to init functions.
  */
 int main_argc;
 char **main_argv;
 
 /*
  * Globals to control TLS allocation.
  */
 size_t tls_last_offset;		/* Static TLS offset of last module */
 size_t tls_last_size;		/* Static TLS size of last module */
 size_t tls_static_space;	/* Static TLS space allocated */
 size_t tls_static_max_align;
 int tls_dtv_generation = 1;	/* Used to detect when dtv size changes  */
 int tls_max_index = 1;		/* Largest module index allocated */
 
 bool ld_library_path_rpath = false;
 
 /*
  * Globals for path names, and such
  */
 char *ld_elf_hints_default = _PATH_ELF_HINTS;
 char *ld_path_libmap_conf = _PATH_LIBMAP_CONF;
 char *ld_path_rtld = _PATH_RTLD;
 char *ld_standard_library_path = STANDARD_LIBRARY_PATH;
 char *ld_env_prefix = LD_;
 
 /*
  * Fill in a DoneList with an allocation large enough to hold all of
  * the currently-loaded objects.  Keep this as a macro since it calls
  * alloca and we want that to occur within the scope of the caller.
  */
 #define donelist_init(dlp)					\
     ((dlp)->objs = alloca(obj_count * sizeof (dlp)->objs[0]),	\
     assert((dlp)->objs != NULL),				\
     (dlp)->num_alloc = obj_count,				\
     (dlp)->num_used = 0)
 
 #define	LD_UTRACE(e, h, mb, ms, r, n) do {			\
 	if (ld_utrace != NULL)					\
 		ld_utrace_log(e, h, mb, ms, r, n);		\
 } while (0)
 
 static void
 ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize,
     int refcnt, const char *name)
 {
 	struct utrace_rtld ut;
 	static const char rtld_utrace_sig[RTLD_UTRACE_SIG_SZ] = RTLD_UTRACE_SIG;
 
 	memcpy(ut.sig, rtld_utrace_sig, sizeof(ut.sig));
 	ut.event = event;
 	ut.handle = handle;
 	ut.mapbase = mapbase;
 	ut.mapsize = mapsize;
 	ut.refcnt = refcnt;
 	bzero(ut.name, sizeof(ut.name));
 	if (name)
 		strlcpy(ut.name, name, sizeof(ut.name));
 	utrace(&ut, sizeof(ut));
 }
 
 #ifdef RTLD_VARIANT_ENV_NAMES
 /*
  * construct the env variable based on the type of binary that's
  * running.
  */
 static inline const char *
 _LD(const char *var)
 {
 	static char buffer[128];
 
 	strlcpy(buffer, ld_env_prefix, sizeof(buffer));
 	strlcat(buffer, var, sizeof(buffer));
 	return (buffer);
 }
 #else
 #define _LD(x)	LD_ x
 #endif
 
 /*
  * Main entry point for dynamic linking.  The first argument is the
  * stack pointer.  The stack is expected to be laid out as described
  * in the SVR4 ABI specification, Intel 386 Processor Supplement.
  * Specifically, the stack pointer points to a word containing
  * ARGC.  Following that in the stack is a null-terminated sequence
  * of pointers to argument strings.  Then comes a null-terminated
  * sequence of pointers to environment strings.  Finally, there is a
  * sequence of "auxiliary vector" entries.
  *
  * The second argument points to a place to store the dynamic linker's
  * exit procedure pointer and the third to a place to store the main
  * program's object.
  *
  * The return value is the main program's entry point.
  */
 func_ptr_type
 _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp)
 {
     Elf_Auxinfo *aux, *auxp, *auxpf, *aux_info[AT_COUNT];
     Objlist_Entry *entry;
     Obj_Entry *last_interposer, *obj, *preload_tail;
     const Elf_Phdr *phdr;
     Objlist initlist;
     RtldLockState lockstate;
     struct stat st;
     Elf_Addr *argcp;
     char **argv, *argv0, **env, **envp, *kexecpath, *library_path_rpath;
     caddr_t imgentry;
     char buf[MAXPATHLEN];
     int argc, fd, i, mib[2], phnum, rtld_argc;
     size_t len;
     bool dir_enable, explicit_fd, search_in_path;
 
     /*
      * On entry, the dynamic linker itself has not been relocated yet.
      * Be very careful not to reference any global data until after
      * init_rtld has returned.  It is OK to reference file-scope statics
      * and string constants, and to call static and global functions.
      */
 
     /* Find the auxiliary vector on the stack. */
     argcp = sp;
     argc = *sp++;
     argv = (char **) sp;
     sp += argc + 1;	/* Skip over arguments and NULL terminator */
     env = (char **) sp;
     while (*sp++ != 0)	/* Skip over environment, and NULL terminator */
 	;
     aux = (Elf_Auxinfo *) sp;
 
     /* Digest the auxiliary vector. */
     for (i = 0;  i < AT_COUNT;  i++)
 	aux_info[i] = NULL;
     for (auxp = aux;  auxp->a_type != AT_NULL;  auxp++) {
 	if (auxp->a_type < AT_COUNT)
 	    aux_info[auxp->a_type] = auxp;
     }
 
     /* Initialize and relocate ourselves. */
     assert(aux_info[AT_BASE] != NULL);
     init_rtld((caddr_t) aux_info[AT_BASE]->a_un.a_ptr, aux_info);
 
     __progname = obj_rtld.path;
     argv0 = argv[0] != NULL ? argv[0] : "(null)";
     environ = env;
     main_argc = argc;
     main_argv = argv;
 
     if (aux_info[AT_CANARY] != NULL &&
 	aux_info[AT_CANARY]->a_un.a_ptr != NULL) {
 	    i = aux_info[AT_CANARYLEN]->a_un.a_val;
 	    if (i > sizeof(__stack_chk_guard))
 		    i = sizeof(__stack_chk_guard);
 	    memcpy(__stack_chk_guard, aux_info[AT_CANARY]->a_un.a_ptr, i);
     } else {
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_ARND;
 
 	len = sizeof(__stack_chk_guard);
 	if (sysctl(mib, 2, __stack_chk_guard, &len, NULL, 0) == -1 ||
 	    len != sizeof(__stack_chk_guard)) {
 		/* If sysctl was unsuccessful, use the "terminator canary". */
 		((unsigned char *)(void *)__stack_chk_guard)[0] = 0;
 		((unsigned char *)(void *)__stack_chk_guard)[1] = 0;
 		((unsigned char *)(void *)__stack_chk_guard)[2] = '\n';
 		((unsigned char *)(void *)__stack_chk_guard)[3] = 255;
 	}
     }
 
     trust = !issetugid();
 
     md_abi_variant_hook(aux_info);
 
     fd = -1;
     if (aux_info[AT_EXECFD] != NULL) {
 	fd = aux_info[AT_EXECFD]->a_un.a_val;
     } else {
 	assert(aux_info[AT_PHDR] != NULL);
 	phdr = (const Elf_Phdr *)aux_info[AT_PHDR]->a_un.a_ptr;
 	if (phdr == obj_rtld.phdr) {
 	    if (!trust) {
 		rtld_printf("Tainted process refusing to run binary %s\n",
 		  argv0);
 		rtld_die();
 	    }
 	    dbg("opening main program in direct exec mode");
 	    if (argc >= 2) {
 		rtld_argc = parse_args(argv, argc, &search_in_path, &fd);
 		argv0 = argv[rtld_argc];
 		explicit_fd = (fd != -1);
 		if (!explicit_fd)
 		    fd = open_binary_fd(argv0, search_in_path);
 		if (fstat(fd, &st) == -1) {
 		    _rtld_error("failed to fstat FD %d (%s): %s", fd,
 		      explicit_fd ? "user-provided descriptor" : argv0,
 		      rtld_strerror(errno));
 		    rtld_die();
 		}
 
 		/*
 		 * Rough emulation of the permission checks done by
 		 * execve(2), only Unix DACs are checked, ACLs are
 		 * ignored.  Preserve the semantic of disabling owner
 		 * to execute if owner x bit is cleared, even if
 		 * others x bit is enabled.
 		 * mmap(2) does not allow to mmap with PROT_EXEC if
 		 * binary' file comes from noexec mount.  We cannot
 		 * set VV_TEXT on the binary.
 		 */
 		dir_enable = false;
 		if (st.st_uid == geteuid()) {
 		    if ((st.st_mode & S_IXUSR) != 0)
 			dir_enable = true;
 		} else if (st.st_gid == getegid()) {
 		    if ((st.st_mode & S_IXGRP) != 0)
 			dir_enable = true;
 		} else if ((st.st_mode & S_IXOTH) != 0) {
 		    dir_enable = true;
 		}
 		if (!dir_enable) {
 		    rtld_printf("No execute permission for binary %s\n",
 		      argv0);
 		    rtld_die();
 		}
 
 		/*
 		 * For direct exec mode, argv[0] is the interpreter
 		 * name, we must remove it and shift arguments left
 		 * before invoking binary main.  Since stack layout
 		 * places environment pointers and aux vectors right
 		 * after the terminating NULL, we must shift
 		 * environment and aux as well.
 		 */
 		main_argc = argc - rtld_argc;
 		for (i = 0; i <= main_argc; i++)
 		    argv[i] = argv[i + rtld_argc];
 		*argcp -= rtld_argc;
 		environ = env = envp = argv + main_argc + 1;
 		do {
 		    *envp = *(envp + rtld_argc);
 		    envp++;
 		} while (*envp != NULL);
 		aux = auxp = (Elf_Auxinfo *)envp;
 		auxpf = (Elf_Auxinfo *)(envp + rtld_argc);
 		for (;; auxp++, auxpf++) {
 		    *auxp = *auxpf;
 		    if (auxp->a_type == AT_NULL)
 			    break;
 		}
 	    } else {
 		rtld_printf("no binary\n");
 		rtld_die();
 	    }
 	}
     }
 
     ld_bind_now = getenv(_LD("BIND_NOW"));
 
     /* 
      * If the process is tainted, then we un-set the dangerous environment
      * variables.  The process will be marked as tainted until setuid(2)
      * is called.  If any child process calls setuid(2) we do not want any
      * future processes to honor the potentially un-safe variables.
      */
     if (!trust) {
 	if (unsetenv(_LD("PRELOAD")) || unsetenv(_LD("LIBMAP")) ||
 	    unsetenv(_LD("LIBRARY_PATH")) || unsetenv(_LD("LIBRARY_PATH_FDS")) ||
 	    unsetenv(_LD("LIBMAP_DISABLE")) || unsetenv(_LD("BIND_NOT")) ||
 	    unsetenv(_LD("DEBUG")) || unsetenv(_LD("ELF_HINTS_PATH")) ||
 	    unsetenv(_LD("LOADFLTR")) || unsetenv(_LD("LIBRARY_PATH_RPATH"))) {
 		_rtld_error("environment corrupt; aborting");
 		rtld_die();
 	}
     }
     ld_debug = getenv(_LD("DEBUG"));
     if (ld_bind_now == NULL)
 	    ld_bind_not = getenv(_LD("BIND_NOT")) != NULL;
     libmap_disable = getenv(_LD("LIBMAP_DISABLE")) != NULL;
     libmap_override = getenv(_LD("LIBMAP"));
     ld_library_path = getenv(_LD("LIBRARY_PATH"));
     ld_library_dirs = getenv(_LD("LIBRARY_PATH_FDS"));
     ld_preload = getenv(_LD("PRELOAD"));
     ld_elf_hints_path = getenv(_LD("ELF_HINTS_PATH"));
     ld_loadfltr = getenv(_LD("LOADFLTR")) != NULL;
     library_path_rpath = getenv(_LD("LIBRARY_PATH_RPATH"));
     if (library_path_rpath != NULL) {
 	    if (library_path_rpath[0] == 'y' ||
 		library_path_rpath[0] == 'Y' ||
 		library_path_rpath[0] == '1')
 		    ld_library_path_rpath = true;
 	    else
 		    ld_library_path_rpath = false;
     }
     dangerous_ld_env = libmap_disable || (libmap_override != NULL) ||
 	(ld_library_path != NULL) || (ld_preload != NULL) ||
 	(ld_elf_hints_path != NULL) || ld_loadfltr;
     ld_tracing = getenv(_LD("TRACE_LOADED_OBJECTS"));
     ld_utrace = getenv(_LD("UTRACE"));
 
     if ((ld_elf_hints_path == NULL) || strlen(ld_elf_hints_path) == 0)
 	ld_elf_hints_path = ld_elf_hints_default;
 
     if (ld_debug != NULL && *ld_debug != '\0')
 	debug = 1;
     dbg("%s is initialized, base address = %p", __progname,
 	(caddr_t) aux_info[AT_BASE]->a_un.a_ptr);
     dbg("RTLD dynamic = %p", obj_rtld.dynamic);
     dbg("RTLD pltgot  = %p", obj_rtld.pltgot);
 
     dbg("initializing thread locks");
     lockdflt_init();
 
     /*
      * Load the main program, or process its program header if it is
      * already loaded.
      */
     if (fd != -1) {	/* Load the main program. */
 	dbg("loading main program");
 	obj_main = map_object(fd, argv0, NULL);
 	close(fd);
 	if (obj_main == NULL)
 	    rtld_die();
 	max_stack_flags = obj->stack_flags;
     } else {				/* Main program already loaded. */
 	dbg("processing main program's program header");
 	assert(aux_info[AT_PHDR] != NULL);
 	phdr = (const Elf_Phdr *) aux_info[AT_PHDR]->a_un.a_ptr;
 	assert(aux_info[AT_PHNUM] != NULL);
 	phnum = aux_info[AT_PHNUM]->a_un.a_val;
 	assert(aux_info[AT_PHENT] != NULL);
 	assert(aux_info[AT_PHENT]->a_un.a_val == sizeof(Elf_Phdr));
 	assert(aux_info[AT_ENTRY] != NULL);
 	imgentry = (caddr_t) aux_info[AT_ENTRY]->a_un.a_ptr;
 	if ((obj_main = digest_phdr(phdr, phnum, imgentry, argv0)) == NULL)
 	    rtld_die();
     }
 
     if (aux_info[AT_EXECPATH] != NULL && fd == -1) {
 	    kexecpath = aux_info[AT_EXECPATH]->a_un.a_ptr;
 	    dbg("AT_EXECPATH %p %s", kexecpath, kexecpath);
 	    if (kexecpath[0] == '/')
 		    obj_main->path = kexecpath;
 	    else if (getcwd(buf, sizeof(buf)) == NULL ||
 		     strlcat(buf, "/", sizeof(buf)) >= sizeof(buf) ||
 		     strlcat(buf, kexecpath, sizeof(buf)) >= sizeof(buf))
 		    obj_main->path = xstrdup(argv0);
 	    else
 		    obj_main->path = xstrdup(buf);
     } else {
 	    dbg("No AT_EXECPATH or direct exec");
 	    obj_main->path = xstrdup(argv0);
     }
     dbg("obj_main path %s", obj_main->path);
     obj_main->mainprog = true;
 
     if (aux_info[AT_STACKPROT] != NULL &&
       aux_info[AT_STACKPROT]->a_un.a_val != 0)
 	    stack_prot = aux_info[AT_STACKPROT]->a_un.a_val;
 
 #ifndef COMPAT_32BIT
     /*
      * Get the actual dynamic linker pathname from the executable if
      * possible.  (It should always be possible.)  That ensures that
      * gdb will find the right dynamic linker even if a non-standard
      * one is being used.
      */
     if (obj_main->interp != NULL &&
       strcmp(obj_main->interp, obj_rtld.path) != 0) {
 	free(obj_rtld.path);
 	obj_rtld.path = xstrdup(obj_main->interp);
         __progname = obj_rtld.path;
     }
 #endif
 
     digest_dynamic(obj_main, 0);
     dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d",
 	obj_main->path, obj_main->valid_hash_sysv, obj_main->valid_hash_gnu,
 	obj_main->dynsymcount);
 
     linkmap_add(obj_main);
     linkmap_add(&obj_rtld);
 
     /* Link the main program into the list of objects. */
     TAILQ_INSERT_HEAD(&obj_list, obj_main, next);
     obj_count++;
     obj_loads++;
 
     /* Initialize a fake symbol for resolving undefined weak references. */
     sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
     sym_zero.st_shndx = SHN_UNDEF;
     sym_zero.st_value = -(uintptr_t)obj_main->relocbase;
 
     if (!libmap_disable)
         libmap_disable = (bool)lm_init(libmap_override);
 
     dbg("loading LD_PRELOAD libraries");
     if (load_preload_objects() == -1)
 	rtld_die();
     preload_tail = globallist_curr(TAILQ_LAST(&obj_list, obj_entry_q));
 
     dbg("loading needed objects");
     if (load_needed_objects(obj_main, 0) == -1)
 	rtld_die();
 
     /* Make a list of all objects loaded at startup. */
     last_interposer = obj_main;
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker)
 	    continue;
 	if (obj->z_interpose && obj != obj_main) {
 	    objlist_put_after(&list_main, last_interposer, obj);
 	    last_interposer = obj;
 	} else {
 	    objlist_push_tail(&list_main, obj);
 	}
     	obj->refcount++;
     }
 
     dbg("checking for required versions");
     if (rtld_verify_versions(&list_main) == -1 && !ld_tracing)
 	rtld_die();
 
     if (ld_tracing) {		/* We're done */
 	trace_loaded_objects(obj_main);
 	exit(0);
     }
 
     if (getenv(_LD("DUMP_REL_PRE")) != NULL) {
        dump_relocations(obj_main);
        exit (0);
     }
 
     /*
      * Processing tls relocations requires having the tls offsets
      * initialized.  Prepare offsets before starting initial
      * relocation processing.
      */
     dbg("initializing initial thread local storage offsets");
     STAILQ_FOREACH(entry, &list_main, link) {
 	/*
 	 * Allocate all the initial objects out of the static TLS
 	 * block even if they didn't ask for it.
 	 */
 	allocate_tls_offset(entry->obj);
     }
 
     if (relocate_objects(obj_main,
       ld_bind_now != NULL && *ld_bind_now != '\0',
       &obj_rtld, SYMLOOK_EARLY, NULL) == -1)
 	rtld_die();
 
     dbg("doing copy relocations");
     if (do_copy_relocations(obj_main) == -1)
 	rtld_die();
 
     dbg("enforcing main obj relro");
     if (obj_enforce_relro(obj_main) == -1)
 	rtld_die();
 
     if (getenv(_LD("DUMP_REL_POST")) != NULL) {
        dump_relocations(obj_main);
        exit (0);
     }
 
     /*
      * Setup TLS for main thread.  This must be done after the
      * relocations are processed, since tls initialization section
      * might be the subject for relocations.
      */
     dbg("initializing initial thread local storage");
     allocate_initial_tls(globallist_curr(TAILQ_FIRST(&obj_list)));
 
     dbg("initializing key program variables");
     set_program_var("__progname", argv[0] != NULL ? basename(argv[0]) : "");
     set_program_var("environ", env);
     set_program_var("__elf_aux_vector", aux);
 
     /* Make a list of init functions to call. */
     objlist_init(&initlist);
     initlist_add_objects(globallist_curr(TAILQ_FIRST(&obj_list)),
       preload_tail, &initlist);
 
     r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */
 
     map_stacks_exec(NULL);
     ifunc_init(aux);
 
     dbg("resolving ifuncs");
     if (resolve_objects_ifunc(obj_main,
       ld_bind_now != NULL && *ld_bind_now != '\0', SYMLOOK_EARLY,
       NULL) == -1)
 	rtld_die();
 
     if (!obj_main->crt_no_init) {
 	/*
 	 * Make sure we don't call the main program's init and fini
 	 * functions for binaries linked with old crt1 which calls
 	 * _init itself.
 	 */
 	obj_main->init = obj_main->fini = (Elf_Addr)NULL;
 	obj_main->preinit_array = obj_main->init_array =
 	    obj_main->fini_array = (Elf_Addr)NULL;
     }
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     if (obj_main->crt_no_init)
 	preinit_main();
     objlist_call_init(&initlist, &lockstate);
     _r_debug_postinit(&obj_main->linkmap);
     objlist_clear(&initlist);
     dbg("loading filtees");
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker)
 	    continue;
 	if (ld_loadfltr || obj->z_loadfltr)
 	    load_filtees(obj, 0, &lockstate);
     }
     lock_release(rtld_bind_lock, &lockstate);
 
     dbg("transferring control to program entry point = %p", obj_main->entry);
 
     /* Return the exit procedure and the program entry point. */
     *exit_proc = rtld_exit;
     *objp = obj_main;
     return (func_ptr_type) obj_main->entry;
 }
 
 void *
 rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def)
 {
 	void *ptr;
 	Elf_Addr target;
 
 	ptr = (void *)make_function_pointer(def, obj);
 	target = call_ifunc_resolver(ptr);
 	return ((void *)target);
 }
 
 /*
  * NB: MIPS uses a private version of this function (_mips_rtld_bind).
  * Changes to this function should be applied there as well.
  */
 Elf_Addr
 _rtld_bind(Obj_Entry *obj, Elf_Size reloff)
 {
     const Elf_Rel *rel;
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     Elf_Addr *where;
     Elf_Addr target;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
     if (obj->pltrel)
 	rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff);
     else
 	rel = (const Elf_Rel *) ((caddr_t) obj->pltrela + reloff);
 
     where = (Elf_Addr *) (obj->relocbase + rel->r_offset);
     def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, SYMLOOK_IN_PLT,
 	NULL, &lockstate);
     if (def == NULL)
 	rtld_die();
     if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC)
 	target = (Elf_Addr)rtld_resolve_ifunc(defobj, def);
     else
 	target = (Elf_Addr)(defobj->relocbase + def->st_value);
 
     dbg("\"%s\" in \"%s\" ==> %p in \"%s\"",
       defobj->strtab + def->st_name, basename(obj->path),
       (void *)target, basename(defobj->path));
 
     /*
      * Write the new contents for the jmpslot. Note that depending on
      * architecture, the value which we need to return back to the
      * lazy binding trampoline may or may not be the target
      * address. The value returned from reloc_jmpslot() is the value
      * that the trampoline needs.
      */
     target = reloc_jmpslot(where, target, defobj, obj, rel);
     lock_release(rtld_bind_lock, &lockstate);
     return target;
 }
 
 /*
  * Error reporting function.  Use it like printf.  If formats the message
  * into a buffer, and sets things up so that the next call to dlerror()
  * will return the message.
  */
 void
 _rtld_error(const char *fmt, ...)
 {
     static char buf[512];
     va_list ap;
 
     va_start(ap, fmt);
     rtld_vsnprintf(buf, sizeof buf, fmt, ap);
     error_message = buf;
     va_end(ap);
     LD_UTRACE(UTRACE_RTLD_ERROR, NULL, NULL, 0, 0, error_message);
 }
 
 /*
  * Return a dynamically-allocated copy of the current error message, if any.
  */
 static char *
 errmsg_save(void)
 {
     return error_message == NULL ? NULL : xstrdup(error_message);
 }
 
 /*
  * Restore the current error message from a copy which was previously saved
  * by errmsg_save().  The copy is freed.
  */
 static void
 errmsg_restore(char *saved_msg)
 {
     if (saved_msg == NULL)
 	error_message = NULL;
     else {
 	_rtld_error("%s", saved_msg);
 	free(saved_msg);
     }
 }
 
 static const char *
 basename(const char *name)
 {
     const char *p = strrchr(name, '/');
     return p != NULL ? p + 1 : name;
 }
 
 static struct utsname uts;
 
 static char *
 origin_subst_one(Obj_Entry *obj, char *real, const char *kw,
     const char *subst, bool may_free)
 {
 	char *p, *p1, *res, *resp;
 	int subst_len, kw_len, subst_count, old_len, new_len;
 
 	kw_len = strlen(kw);
 
 	/*
 	 * First, count the number of the keyword occurrences, to
 	 * preallocate the final string.
 	 */
 	for (p = real, subst_count = 0;; p = p1 + kw_len, subst_count++) {
 		p1 = strstr(p, kw);
 		if (p1 == NULL)
 			break;
 	}
 
 	/*
 	 * If the keyword is not found, just return.
 	 *
 	 * Return non-substituted string if resolution failed.  We
 	 * cannot do anything more reasonable, the failure mode of the
 	 * caller is unresolved library anyway.
 	 */
 	if (subst_count == 0 || (obj != NULL && !obj_resolve_origin(obj)))
 		return (may_free ? real : xstrdup(real));
 	if (obj != NULL)
 		subst = obj->origin_path;
 
 	/*
 	 * There is indeed something to substitute.  Calculate the
 	 * length of the resulting string, and allocate it.
 	 */
 	subst_len = strlen(subst);
 	old_len = strlen(real);
 	new_len = old_len + (subst_len - kw_len) * subst_count;
 	res = xmalloc(new_len + 1);
 
 	/*
 	 * Now, execute the substitution loop.
 	 */
 	for (p = real, resp = res, *resp = '\0';;) {
 		p1 = strstr(p, kw);
 		if (p1 != NULL) {
 			/* Copy the prefix before keyword. */
 			memcpy(resp, p, p1 - p);
 			resp += p1 - p;
 			/* Keyword replacement. */
 			memcpy(resp, subst, subst_len);
 			resp += subst_len;
 			*resp = '\0';
 			p = p1 + kw_len;
 		} else
 			break;
 	}
 
 	/* Copy to the end of string and finish. */
 	strcat(resp, p);
 	if (may_free)
 		free(real);
 	return (res);
 }
 
 static char *
 origin_subst(Obj_Entry *obj, char *real)
 {
 	char *res1, *res2, *res3, *res4;
 
 	if (obj == NULL || !trust)
 		return (xstrdup(real));
 	if (uts.sysname[0] == '\0') {
 		if (uname(&uts) != 0) {
 			_rtld_error("utsname failed: %d", errno);
 			return (NULL);
 		}
 	}
 	res1 = origin_subst_one(obj, real, "$ORIGIN", NULL, false);
 	res2 = origin_subst_one(NULL, res1, "$OSNAME", uts.sysname, true);
 	res3 = origin_subst_one(NULL, res2, "$OSREL", uts.release, true);
 	res4 = origin_subst_one(NULL, res3, "$PLATFORM", uts.machine, true);
 	return (res4);
 }
 
 void
 rtld_die(void)
 {
     const char *msg = dlerror();
 
     if (msg == NULL)
 	msg = "Fatal error";
     rtld_fdputstr(STDERR_FILENO, msg);
     rtld_fdputchar(STDERR_FILENO, '\n');
     _exit(1);
 }
 
 /*
  * Process a shared object's DYNAMIC section, and save the important
  * information in its Obj_Entry structure.
  */
 static void
 digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath,
     const Elf_Dyn **dyn_soname, const Elf_Dyn **dyn_runpath)
 {
     const Elf_Dyn *dynp;
     Needed_Entry **needed_tail = &obj->needed;
     Needed_Entry **needed_filtees_tail = &obj->needed_filtees;
     Needed_Entry **needed_aux_filtees_tail = &obj->needed_aux_filtees;
     const Elf_Hashelt *hashtab;
     const Elf32_Word *hashval;
     Elf32_Word bkt, nmaskwords;
     int bloom_size32;
     int plttype = DT_REL;
 
     *dyn_rpath = NULL;
     *dyn_soname = NULL;
     *dyn_runpath = NULL;
 
     obj->bind_now = false;
     for (dynp = obj->dynamic;  dynp->d_tag != DT_NULL;  dynp++) {
 	switch (dynp->d_tag) {
 
 	case DT_REL:
 	    obj->rel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_RELSZ:
 	    obj->relsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Rel));
 	    break;
 
 	case DT_JMPREL:
 	    obj->pltrel = (const Elf_Rel *)
 	      (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PLTRELSZ:
 	    obj->pltrelsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELA:
 	    obj->rela = (const Elf_Rela *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_RELASZ:
 	    obj->relasize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELAENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Rela));
 	    break;
 
 	case DT_PLTREL:
 	    plttype = dynp->d_un.d_val;
 	    assert(dynp->d_un.d_val == DT_REL || plttype == DT_RELA);
 	    break;
 
 	case DT_SYMTAB:
 	    obj->symtab = (const Elf_Sym *)
 	      (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_SYMENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Sym));
 	    break;
 
 	case DT_STRTAB:
 	    obj->strtab = (const char *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_STRSZ:
 	    obj->strsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERNEED:
 	    obj->verneed = (const Elf_Verneed *) (obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_VERNEEDNUM:
 	    obj->verneednum = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERDEF:
 	    obj->verdef = (const Elf_Verdef *) (obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_VERDEFNUM:
 	    obj->verdefnum = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERSYM:
 	    obj->versyms = (const Elf_Versym *)(obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_HASH:
 	    {
 		hashtab = (const Elf_Hashelt *)(obj->relocbase +
 		    dynp->d_un.d_ptr);
 		obj->nbuckets = hashtab[0];
 		obj->nchains = hashtab[1];
 		obj->buckets = hashtab + 2;
 		obj->chains = obj->buckets + obj->nbuckets;
 		obj->valid_hash_sysv = obj->nbuckets > 0 && obj->nchains > 0 &&
 		  obj->buckets != NULL;
 	    }
 	    break;
 
 	case DT_GNU_HASH:
 	    {
 		hashtab = (const Elf_Hashelt *)(obj->relocbase +
 		    dynp->d_un.d_ptr);
 		obj->nbuckets_gnu = hashtab[0];
 		obj->symndx_gnu = hashtab[1];
 		nmaskwords = hashtab[2];
 		bloom_size32 = (__ELF_WORD_SIZE / 32) * nmaskwords;
 		obj->maskwords_bm_gnu = nmaskwords - 1;
 		obj->shift2_gnu = hashtab[3];
 		obj->bloom_gnu = (Elf_Addr *) (hashtab + 4);
 		obj->buckets_gnu = hashtab + 4 + bloom_size32;
 		obj->chain_zero_gnu = obj->buckets_gnu + obj->nbuckets_gnu -
 		  obj->symndx_gnu;
 		/* Number of bitmask words is required to be power of 2 */
 		obj->valid_hash_gnu = powerof2(nmaskwords) &&
 		    obj->nbuckets_gnu > 0 && obj->buckets_gnu != NULL;
 	    }
 	    break;
 
 	case DT_NEEDED:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_tail = nep;
 		needed_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_FILTER:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_filtees_tail = nep;
 		needed_filtees_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_AUXILIARY:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_aux_filtees_tail = nep;
 		needed_aux_filtees_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_PLTGOT:
 	    obj->pltgot = (Elf_Addr *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_TEXTREL:
 	    obj->textrel = true;
 	    break;
 
 	case DT_SYMBOLIC:
 	    obj->symbolic = true;
 	    break;
 
 	case DT_RPATH:
 	    /*
 	     * We have to wait until later to process this, because we
 	     * might not have gotten the address of the string table yet.
 	     */
 	    *dyn_rpath = dynp;
 	    break;
 
 	case DT_SONAME:
 	    *dyn_soname = dynp;
 	    break;
 
 	case DT_RUNPATH:
 	    *dyn_runpath = dynp;
 	    break;
 
 	case DT_INIT:
 	    obj->init = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PREINIT_ARRAY:
 	    obj->preinit_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PREINIT_ARRAYSZ:
 	    obj->preinit_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	case DT_INIT_ARRAY:
 	    obj->init_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_INIT_ARRAYSZ:
 	    obj->init_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	case DT_FINI:
 	    obj->fini = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_FINI_ARRAY:
 	    obj->fini_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_FINI_ARRAYSZ:
 	    obj->fini_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	/*
 	 * Don't process DT_DEBUG on MIPS as the dynamic section
 	 * is mapped read-only. DT_MIPS_RLD_MAP is used instead.
 	 */
 
 #ifndef __mips__
 	case DT_DEBUG:
 	    if (!early)
 		dbg("Filling in DT_DEBUG entry");
 	    ((Elf_Dyn*)dynp)->d_un.d_ptr = (Elf_Addr) &r_debug;
 	    break;
 #endif
 
 	case DT_FLAGS:
 		if (dynp->d_un.d_val & DF_ORIGIN)
 		    obj->z_origin = true;
 		if (dynp->d_un.d_val & DF_SYMBOLIC)
 		    obj->symbolic = true;
 		if (dynp->d_un.d_val & DF_TEXTREL)
 		    obj->textrel = true;
 		if (dynp->d_un.d_val & DF_BIND_NOW)
 		    obj->bind_now = true;
 		/*if (dynp->d_un.d_val & DF_STATIC_TLS)
 		    ;*/
 	    break;
 #ifdef __mips__
 	case DT_MIPS_LOCAL_GOTNO:
 		obj->local_gotno = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_SYMTABNO:
 		obj->symtabno = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_GOTSYM:
 		obj->gotsym = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_RLD_MAP:
 		*((Elf_Addr *)(dynp->d_un.d_ptr)) = (Elf_Addr) &r_debug;
 		break;
 #endif
 
 #ifdef __powerpc64__
 	case DT_PPC64_GLINK:
 		obj->glink = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 		break;
 #endif
 
 	case DT_FLAGS_1:
 		if (dynp->d_un.d_val & DF_1_NOOPEN)
 		    obj->z_noopen = true;
 		if (dynp->d_un.d_val & DF_1_ORIGIN)
 		    obj->z_origin = true;
 		if (dynp->d_un.d_val & DF_1_GLOBAL)
 		    obj->z_global = true;
 		if (dynp->d_un.d_val & DF_1_BIND_NOW)
 		    obj->bind_now = true;
 		if (dynp->d_un.d_val & DF_1_NODELETE)
 		    obj->z_nodelete = true;
 		if (dynp->d_un.d_val & DF_1_LOADFLTR)
 		    obj->z_loadfltr = true;
 		if (dynp->d_un.d_val & DF_1_INTERPOSE)
 		    obj->z_interpose = true;
 		if (dynp->d_un.d_val & DF_1_NODEFLIB)
 		    obj->z_nodeflib = true;
 	    break;
 
 	default:
 	    if (!early) {
 		dbg("Ignoring d_tag %ld = %#lx", (long)dynp->d_tag,
 		    (long)dynp->d_tag);
 	    }
 	    break;
 	}
     }
 
     obj->traced = false;
 
     if (plttype == DT_RELA) {
 	obj->pltrela = (const Elf_Rela *) obj->pltrel;
 	obj->pltrel = NULL;
 	obj->pltrelasize = obj->pltrelsize;
 	obj->pltrelsize = 0;
     }
 
     /* Determine size of dynsym table (equal to nchains of sysv hash) */
     if (obj->valid_hash_sysv)
 	obj->dynsymcount = obj->nchains;
     else if (obj->valid_hash_gnu) {
 	obj->dynsymcount = 0;
 	for (bkt = 0; bkt < obj->nbuckets_gnu; bkt++) {
 	    if (obj->buckets_gnu[bkt] == 0)
 		continue;
 	    hashval = &obj->chain_zero_gnu[obj->buckets_gnu[bkt]];
 	    do
 		obj->dynsymcount++;
 	    while ((*hashval++ & 1u) == 0);
 	}
 	obj->dynsymcount += obj->symndx_gnu;
     }
 }
 
 static bool
 obj_resolve_origin(Obj_Entry *obj)
 {
 
 	if (obj->origin_path != NULL)
 		return (true);
 	obj->origin_path = xmalloc(PATH_MAX);
 	return (rtld_dirname_abs(obj->path, obj->origin_path) != -1);
 }
 
 static void
 digest_dynamic2(Obj_Entry *obj, const Elf_Dyn *dyn_rpath,
     const Elf_Dyn *dyn_soname, const Elf_Dyn *dyn_runpath)
 {
 
 	if (obj->z_origin && !obj_resolve_origin(obj))
 		rtld_die();
 
 	if (dyn_runpath != NULL) {
 		obj->runpath = (char *)obj->strtab + dyn_runpath->d_un.d_val;
 		obj->runpath = origin_subst(obj, obj->runpath);
 	} else if (dyn_rpath != NULL) {
 		obj->rpath = (char *)obj->strtab + dyn_rpath->d_un.d_val;
 		obj->rpath = origin_subst(obj, obj->rpath);
 	}
 	if (dyn_soname != NULL)
 		object_add_name(obj, obj->strtab + dyn_soname->d_un.d_val);
 }
 
 static void
 digest_dynamic(Obj_Entry *obj, int early)
 {
 	const Elf_Dyn *dyn_rpath;
 	const Elf_Dyn *dyn_soname;
 	const Elf_Dyn *dyn_runpath;
 
 	digest_dynamic1(obj, early, &dyn_rpath, &dyn_soname, &dyn_runpath);
 	digest_dynamic2(obj, dyn_rpath, dyn_soname, dyn_runpath);
 }
 
 /*
  * Process a shared object's program header.  This is used only for the
  * main program, when the kernel has already loaded the main program
  * into memory before calling the dynamic linker.  It creates and
  * returns an Obj_Entry structure.
  */
 static Obj_Entry *
 digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path)
 {
     Obj_Entry *obj;
     const Elf_Phdr *phlimit = phdr + phnum;
     const Elf_Phdr *ph;
     Elf_Addr note_start, note_end;
     int nsegs = 0;
 
     obj = obj_new();
     for (ph = phdr;  ph < phlimit;  ph++) {
 	if (ph->p_type != PT_PHDR)
 	    continue;
 
 	obj->phdr = phdr;
 	obj->phsize = ph->p_memsz;
 	obj->relocbase = (caddr_t)phdr - ph->p_vaddr;
 	break;
     }
 
     obj->stack_flags = PF_X | PF_R | PF_W;
 
     for (ph = phdr;  ph < phlimit;  ph++) {
 	switch (ph->p_type) {
 
 	case PT_INTERP:
 	    obj->interp = (const char *)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_LOAD:
 	    if (nsegs == 0) {	/* First load segment */
 		obj->vaddrbase = trunc_page(ph->p_vaddr);
 		obj->mapbase = obj->vaddrbase + obj->relocbase;
 		obj->textsize = round_page(ph->p_vaddr + ph->p_memsz) -
 		  obj->vaddrbase;
 	    } else {		/* Last load segment */
 		obj->mapsize = round_page(ph->p_vaddr + ph->p_memsz) -
 		  obj->vaddrbase;
 	    }
 	    nsegs++;
 	    break;
 
 	case PT_DYNAMIC:
 	    obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_TLS:
 	    obj->tlsindex = 1;
 	    obj->tlssize = ph->p_memsz;
 	    obj->tlsalign = ph->p_align;
 	    obj->tlsinitsize = ph->p_filesz;
 	    obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_GNU_STACK:
 	    obj->stack_flags = ph->p_flags;
 	    break;
 
 	case PT_GNU_RELRO:
 	    obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr);
 	    obj->relro_size = round_page(ph->p_memsz);
 	    break;
 
 	case PT_NOTE:
 	    note_start = (Elf_Addr)obj->relocbase + ph->p_vaddr;
 	    note_end = note_start + ph->p_filesz;
 	    digest_notes(obj, note_start, note_end);
 	    break;
 	}
     }
     if (nsegs < 1) {
 	_rtld_error("%s: too few PT_LOAD segments", path);
 	return NULL;
     }
 
     obj->entry = entry;
     return obj;
 }
 
 void
 digest_notes(Obj_Entry *obj, Elf_Addr note_start, Elf_Addr note_end)
 {
 	const Elf_Note *note;
 	const char *note_name;
 	uintptr_t p;
 
 	for (note = (const Elf_Note *)note_start; (Elf_Addr)note < note_end;
 	    note = (const Elf_Note *)((const char *)(note + 1) +
 	      roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
 	      roundup2(note->n_descsz, sizeof(Elf32_Addr)))) {
 		if (note->n_namesz != sizeof(NOTE_FREEBSD_VENDOR) ||
 		    note->n_descsz != sizeof(int32_t))
 			continue;
 		if (note->n_type != NT_FREEBSD_ABI_TAG &&
 		    note->n_type != NT_FREEBSD_NOINIT_TAG)
 			continue;
 		note_name = (const char *)(note + 1);
 		if (strncmp(NOTE_FREEBSD_VENDOR, note_name,
 		    sizeof(NOTE_FREEBSD_VENDOR)) != 0)
 			continue;
 		switch (note->n_type) {
 		case NT_FREEBSD_ABI_TAG:
 			/* FreeBSD osrel note */
 			p = (uintptr_t)(note + 1);
 			p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 			obj->osrel = *(const int32_t *)(p);
 			dbg("note osrel %d", obj->osrel);
 			break;
 		case NT_FREEBSD_NOINIT_TAG:
 			/* FreeBSD 'crt does not call init' note */
 			obj->crt_no_init = true;
 			dbg("note crt_no_init");
 			break;
 		}
 	}
 }
 
 static Obj_Entry *
 dlcheck(void *handle)
 {
     Obj_Entry *obj;
 
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj == (Obj_Entry *) handle)
 	    break;
     }
 
     if (obj == NULL || obj->refcount == 0 || obj->dl_refcount == 0) {
 	_rtld_error("Invalid shared object handle %p", handle);
 	return NULL;
     }
     return obj;
 }
 
 /*
  * If the given object is already in the donelist, return true.  Otherwise
  * add the object to the list and return false.
  */
 static bool
 donelist_check(DoneList *dlp, const Obj_Entry *obj)
 {
     unsigned int i;
 
     for (i = 0;  i < dlp->num_used;  i++)
 	if (dlp->objs[i] == obj)
 	    return true;
     /*
      * Our donelist allocation should always be sufficient.  But if
      * our threads locking isn't working properly, more shared objects
      * could have been loaded since we allocated the list.  That should
      * never happen, but we'll handle it properly just in case it does.
      */
     if (dlp->num_used < dlp->num_alloc)
 	dlp->objs[dlp->num_used++] = obj;
     return false;
 }
 
 /*
  * Hash function for symbol table lookup.  Don't even think about changing
  * this.  It is specified by the System V ABI.
  */
 unsigned long
 elf_hash(const char *name)
 {
     const unsigned char *p = (const unsigned char *) name;
     unsigned long h = 0;
     unsigned long g;
 
     while (*p != '\0') {
 	h = (h << 4) + *p++;
 	if ((g = h & 0xf0000000) != 0)
 	    h ^= g >> 24;
 	h &= ~g;
     }
     return h;
 }
 
 /*
  * The GNU hash function is the Daniel J. Bernstein hash clipped to 32 bits
  * unsigned in case it's implemented with a wider type.
  */
 static uint32_t
 gnu_hash(const char *s)
 {
 	uint32_t h;
 	unsigned char c;
 
 	h = 5381;
 	for (c = *s; c != '\0'; c = *++s)
 		h = h * 33 + c;
 	return (h & 0xffffffff);
 }
 
 
 /*
  * Find the library with the given name, and return its full pathname.
  * The returned string is dynamically allocated.  Generates an error
  * message and returns NULL if the library cannot be found.
  *
  * If the second argument is non-NULL, then it refers to an already-
  * loaded shared object, whose library search path will be searched.
  *
  * If a library is successfully located via LD_LIBRARY_PATH_FDS, its
  * descriptor (which is close-on-exec) will be passed out via the third
  * argument.
  *
  * The search order is:
  *   DT_RPATH in the referencing file _unless_ DT_RUNPATH is present (1)
  *   DT_RPATH of the main object if DSO without defined DT_RUNPATH (1)
  *   LD_LIBRARY_PATH
  *   DT_RUNPATH in the referencing file
  *   ldconfig hints (if -z nodefaultlib, filter out default library directories
  *	 from list)
  *   /lib:/usr/lib _unless_ the referencing file is linked with -z nodefaultlib
  *
  * (1) Handled in digest_dynamic2 - rpath left NULL if runpath defined.
  */
 static char *
 find_library(const char *xname, const Obj_Entry *refobj, int *fdp)
 {
     char *pathname;
     char *name;
     bool nodeflib, objgiven;
 
     objgiven = refobj != NULL;
 
     if (libmap_disable || !objgiven ||
       (name = lm_find(refobj->path, xname)) == NULL)
 	name = (char *)xname;
 
     if (strchr(name, '/') != NULL) {	/* Hard coded pathname */
 	if (name[0] != '/' && !trust) {
 	    _rtld_error("Absolute pathname required for shared object \"%s\"",
 	      name);
 	    return (NULL);
 	}
 	return (origin_subst(__DECONST(Obj_Entry *, refobj),
 	  __DECONST(char *, name)));
     }
 
     dbg(" Searching for \"%s\"", name);
 
     /*
      * If refobj->rpath != NULL, then refobj->runpath is NULL.  Fall
      * back to pre-conforming behaviour if user requested so with
      * LD_LIBRARY_PATH_RPATH environment variable and ignore -z
      * nodeflib.
      */
     if (objgiven && refobj->rpath != NULL && ld_library_path_rpath) {
 	if ((pathname = search_library_path(name, ld_library_path)) != NULL ||
 	  (refobj != NULL &&
 	  (pathname = search_library_path(name, refobj->rpath)) != NULL) ||
 	  (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL ||
           (pathname = search_library_path(name, gethints(false))) != NULL ||
 	  (pathname = search_library_path(name, ld_standard_library_path)) != NULL)
 	    return (pathname);
     } else {
 	nodeflib = objgiven ? refobj->z_nodeflib : false;
 	if ((objgiven &&
 	  (pathname = search_library_path(name, refobj->rpath)) != NULL) ||
 	  (objgiven && refobj->runpath == NULL && refobj != obj_main &&
 	  (pathname = search_library_path(name, obj_main->rpath)) != NULL) ||
 	  (pathname = search_library_path(name, ld_library_path)) != NULL ||
 	  (objgiven &&
 	  (pathname = search_library_path(name, refobj->runpath)) != NULL) ||
 	  (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL ||
 	  (pathname = search_library_path(name, gethints(nodeflib))) != NULL ||
 	  (objgiven && !nodeflib &&
 	  (pathname = search_library_path(name, ld_standard_library_path)) != NULL))
 	    return (pathname);
     }
 
     if (objgiven && refobj->path != NULL) {
 	_rtld_error("Shared object \"%s\" not found, required by \"%s\"",
 	  name, basename(refobj->path));
     } else {
 	_rtld_error("Shared object \"%s\" not found", name);
     }
     return NULL;
 }
 
 /*
  * Given a symbol number in a referencing object, find the corresponding
  * definition of the symbol.  Returns a pointer to the symbol, or NULL if
  * no definition was found.  Returns a pointer to the Obj_Entry of the
  * defining object via the reference parameter DEFOBJ_OUT.
  */
 const Elf_Sym *
 find_symdef(unsigned long symnum, const Obj_Entry *refobj,
     const Obj_Entry **defobj_out, int flags, SymCache *cache,
     RtldLockState *lockstate)
 {
     const Elf_Sym *ref;
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     const Ver_Entry *ve;
     SymLook req;
     const char *name;
     int res;
 
     /*
      * If we have already found this symbol, get the information from
      * the cache.
      */
     if (symnum >= refobj->dynsymcount)
 	return NULL;	/* Bad object */
     if (cache != NULL && cache[symnum].sym != NULL) {
 	*defobj_out = cache[symnum].obj;
 	return cache[symnum].sym;
     }
 
     ref = refobj->symtab + symnum;
     name = refobj->strtab + ref->st_name;
     def = NULL;
     defobj = NULL;
     ve = NULL;
 
     /*
      * We don't have to do a full scale lookup if the symbol is local.
      * We know it will bind to the instance in this load module; to
      * which we already have a pointer (ie ref). By not doing a lookup,
      * we not only improve performance, but it also avoids unresolvable
      * symbols when local symbols are not in the hash table. This has
      * been seen with the ia64 toolchain.
      */
     if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) {
 	if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) {
 	    _rtld_error("%s: Bogus symbol table entry %lu", refobj->path,
 		symnum);
 	}
 	symlook_init(&req, name);
 	req.flags = flags;
 	ve = req.ventry = fetch_ventry(refobj, symnum);
 	req.lockstate = lockstate;
 	res = symlook_default(&req, refobj);
 	if (res == 0) {
 	    def = req.sym_out;
 	    defobj = req.defobj_out;
 	}
     } else {
 	def = ref;
 	defobj = refobj;
     }
 
     /*
      * If we found no definition and the reference is weak, treat the
      * symbol as having the value zero.
      */
     if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) {
 	def = &sym_zero;
 	defobj = obj_main;
     }
 
     if (def != NULL) {
 	*defobj_out = defobj;
 	/* Record the information in the cache to avoid subsequent lookups. */
 	if (cache != NULL) {
 	    cache[symnum].sym = def;
 	    cache[symnum].obj = defobj;
 	}
     } else {
 	if (refobj != &obj_rtld)
 	    _rtld_error("%s: Undefined symbol \"%s%s%s\"", refobj->path, name,
 	      ve != NULL ? "@" : "", ve != NULL ? ve->name : "");
     }
     return def;
 }
 
 /*
  * Return the search path from the ldconfig hints file, reading it if
  * necessary.  If nostdlib is true, then the default search paths are
  * not added to result.
  *
  * Returns NULL if there are problems with the hints file,
  * or if the search path there is empty.
  */
 static const char *
 gethints(bool nostdlib)
 {
 	static char *hints, *filtered_path;
 	static struct elfhints_hdr hdr;
 	struct fill_search_info_args sargs, hargs;
 	struct dl_serinfo smeta, hmeta, *SLPinfo, *hintinfo;
 	struct dl_serpath *SLPpath, *hintpath;
 	char *p;
 	struct stat hint_stat;
 	unsigned int SLPndx, hintndx, fndx, fcount;
 	int fd;
 	size_t flen;
 	uint32_t dl;
 	bool skip;
 
 	/* First call, read the hints file */
 	if (hints == NULL) {
 		/* Keep from trying again in case the hints file is bad. */
 		hints = "";
 
 		if ((fd = open(ld_elf_hints_path, O_RDONLY | O_CLOEXEC)) == -1)
 			return (NULL);
 
 		/*
 		 * Check of hdr.dirlistlen value against type limit
 		 * intends to pacify static analyzers.  Further
 		 * paranoia leads to checks that dirlist is fully
 		 * contained in the file range.
 		 */
 		if (read(fd, &hdr, sizeof hdr) != sizeof hdr ||
 		    hdr.magic != ELFHINTS_MAGIC ||
 		    hdr.version != 1 || hdr.dirlistlen > UINT_MAX / 2 ||
 		    fstat(fd, &hint_stat) == -1) {
 cleanup1:
 			close(fd);
 			hdr.dirlistlen = 0;
 			return (NULL);
 		}
 		dl = hdr.strtab;
 		if (dl + hdr.dirlist < dl)
 			goto cleanup1;
 		dl += hdr.dirlist;
 		if (dl + hdr.dirlistlen < dl)
 			goto cleanup1;
 		dl += hdr.dirlistlen;
 		if (dl > hint_stat.st_size)
 			goto cleanup1;
 		p = xmalloc(hdr.dirlistlen + 1);
 
 		if (lseek(fd, hdr.strtab + hdr.dirlist, SEEK_SET) == -1 ||
 		    read(fd, p, hdr.dirlistlen + 1) !=
 		    (ssize_t)hdr.dirlistlen + 1 || p[hdr.dirlistlen] != '\0') {
 			free(p);
 			goto cleanup1;
 		}
 		hints = p;
 		close(fd);
 	}
 
 	/*
 	 * If caller agreed to receive list which includes the default
 	 * paths, we are done. Otherwise, if we still did not
 	 * calculated filtered result, do it now.
 	 */
 	if (!nostdlib)
 		return (hints[0] != '\0' ? hints : NULL);
 	if (filtered_path != NULL)
 		goto filt_ret;
 
 	/*
 	 * Obtain the list of all configured search paths, and the
 	 * list of the default paths.
 	 *
 	 * First estimate the size of the results.
 	 */
 	smeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
 	smeta.dls_cnt = 0;
 	hmeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
 	hmeta.dls_cnt = 0;
 
 	sargs.request = RTLD_DI_SERINFOSIZE;
 	sargs.serinfo = &smeta;
 	hargs.request = RTLD_DI_SERINFOSIZE;
 	hargs.serinfo = &hmeta;
 
 	path_enumerate(ld_standard_library_path, fill_search_info, &sargs);
 	path_enumerate(hints, fill_search_info, &hargs);
 
 	SLPinfo = xmalloc(smeta.dls_size);
 	hintinfo = xmalloc(hmeta.dls_size);
 
 	/*
 	 * Next fetch both sets of paths.
 	 */
 	sargs.request = RTLD_DI_SERINFO;
 	sargs.serinfo = SLPinfo;
 	sargs.serpath = &SLPinfo->dls_serpath[0];
 	sargs.strspace = (char *)&SLPinfo->dls_serpath[smeta.dls_cnt];
 
 	hargs.request = RTLD_DI_SERINFO;
 	hargs.serinfo = hintinfo;
 	hargs.serpath = &hintinfo->dls_serpath[0];
 	hargs.strspace = (char *)&hintinfo->dls_serpath[hmeta.dls_cnt];
 
 	path_enumerate(ld_standard_library_path, fill_search_info, &sargs);
 	path_enumerate(hints, fill_search_info, &hargs);
 
 	/*
 	 * Now calculate the difference between two sets, by excluding
 	 * standard paths from the full set.
 	 */
 	fndx = 0;
 	fcount = 0;
 	filtered_path = xmalloc(hdr.dirlistlen + 1);
 	hintpath = &hintinfo->dls_serpath[0];
 	for (hintndx = 0; hintndx < hmeta.dls_cnt; hintndx++, hintpath++) {
 		skip = false;
 		SLPpath = &SLPinfo->dls_serpath[0];
 		/*
 		 * Check each standard path against current.
 		 */
 		for (SLPndx = 0; SLPndx < smeta.dls_cnt; SLPndx++, SLPpath++) {
 			/* matched, skip the path */
 			if (!strcmp(hintpath->dls_name, SLPpath->dls_name)) {
 				skip = true;
 				break;
 			}
 		}
 		if (skip)
 			continue;
 		/*
 		 * Not matched against any standard path, add the path
 		 * to result. Separate consequtive paths with ':'.
 		 */
 		if (fcount > 0) {
 			filtered_path[fndx] = ':';
 			fndx++;
 		}
 		fcount++;
 		flen = strlen(hintpath->dls_name);
 		strncpy((filtered_path + fndx),	hintpath->dls_name, flen);
 		fndx += flen;
 	}
 	filtered_path[fndx] = '\0';
 
 	free(SLPinfo);
 	free(hintinfo);
 
 filt_ret:
 	return (filtered_path[0] != '\0' ? filtered_path : NULL);
 }
 
 static void
 init_dag(Obj_Entry *root)
 {
     const Needed_Entry *needed;
     const Objlist_Entry *elm;
     DoneList donelist;
 
     if (root->dag_inited)
 	return;
     donelist_init(&donelist);
 
     /* Root object belongs to own DAG. */
     objlist_push_tail(&root->dldags, root);
     objlist_push_tail(&root->dagmembers, root);
     donelist_check(&donelist, root);
 
     /*
      * Add dependencies of root object to DAG in breadth order
      * by exploiting the fact that each new object get added
      * to the tail of the dagmembers list.
      */
     STAILQ_FOREACH(elm, &root->dagmembers, link) {
 	for (needed = elm->obj->needed; needed != NULL; needed = needed->next) {
 	    if (needed->obj == NULL || donelist_check(&donelist, needed->obj))
 		continue;
 	    objlist_push_tail(&needed->obj->dldags, root);
 	    objlist_push_tail(&root->dagmembers, needed->obj);
 	}
     }
     root->dag_inited = true;
 }
 
 static void
 init_marker(Obj_Entry *marker)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->marker = true;
 }
 
 Obj_Entry *
 globallist_curr(const Obj_Entry *obj)
 {
 
 	for (;;) {
 		if (obj == NULL)
 			return (NULL);
 		if (!obj->marker)
 			return (__DECONST(Obj_Entry *, obj));
 		obj = TAILQ_PREV(obj, obj_entry_q, next);
 	}
 }
 
 Obj_Entry *
 globallist_next(const Obj_Entry *obj)
 {
 
 	for (;;) {
 		obj = TAILQ_NEXT(obj, next);
 		if (obj == NULL)
 			return (NULL);
 		if (!obj->marker)
 			return (__DECONST(Obj_Entry *, obj));
 	}
 }
 
 /* Prevent the object from being unmapped while the bind lock is dropped. */
 static void
 hold_object(Obj_Entry *obj)
 {
 
 	obj->holdcount++;
 }
 
 static void
 unhold_object(Obj_Entry *obj)
 {
 
 	assert(obj->holdcount > 0);
 	if (--obj->holdcount == 0 && obj->unholdfree)
 		release_object(obj);
 }
 
 static void
 process_z(Obj_Entry *root)
 {
 	const Objlist_Entry *elm;
 	Obj_Entry *obj;
 
 	/*
 	 * Walk over object DAG and process every dependent object
 	 * that is marked as DF_1_NODELETE or DF_1_GLOBAL. They need
 	 * to grow their own DAG.
 	 *
 	 * For DF_1_GLOBAL, DAG is required for symbol lookups in
 	 * symlook_global() to work.
 	 *
 	 * For DF_1_NODELETE, the DAG should have its reference upped.
 	 */
 	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 		obj = elm->obj;
 		if (obj == NULL)
 			continue;
 		if (obj->z_nodelete && !obj->ref_nodel) {
 			dbg("obj %s -z nodelete", obj->path);
 			init_dag(obj);
 			ref_dag(obj);
 			obj->ref_nodel = true;
 		}
 		if (obj->z_global && objlist_find(&list_global, obj) == NULL) {
 			dbg("obj %s -z global", obj->path);
 			objlist_push_tail(&list_global, obj);
 			init_dag(obj);
 		}
 	}
 }
 /*
  * Initialize the dynamic linker.  The argument is the address at which
  * the dynamic linker has been mapped into memory.  The primary task of
  * this function is to relocate the dynamic linker.
  */
 static void
 init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info)
 {
     Obj_Entry objtmp;	/* Temporary rtld object */
     const Elf_Ehdr *ehdr;
     const Elf_Dyn *dyn_rpath;
     const Elf_Dyn *dyn_soname;
     const Elf_Dyn *dyn_runpath;
 
 #ifdef RTLD_INIT_PAGESIZES_EARLY
     /* The page size is required by the dynamic memory allocator. */
     init_pagesizes(aux_info);
 #endif
 
     /*
      * Conjure up an Obj_Entry structure for the dynamic linker.
      *
      * The "path" member can't be initialized yet because string constants
      * cannot yet be accessed. Below we will set it correctly.
      */
     memset(&objtmp, 0, sizeof(objtmp));
     objtmp.path = NULL;
     objtmp.rtld = true;
     objtmp.mapbase = mapbase;
 #ifdef PIC
     objtmp.relocbase = mapbase;
 #endif
 
     objtmp.dynamic = rtld_dynamic(&objtmp);
     digest_dynamic1(&objtmp, 1, &dyn_rpath, &dyn_soname, &dyn_runpath);
     assert(objtmp.needed == NULL);
 #if !defined(__mips__)
     /* MIPS has a bogus DT_TEXTREL. */
     assert(!objtmp.textrel);
 #endif
     /*
      * Temporarily put the dynamic linker entry into the object list, so
      * that symbols can be found.
      */
     relocate_objects(&objtmp, true, &objtmp, 0, NULL);
 
     ehdr = (Elf_Ehdr *)mapbase;
     objtmp.phdr = (Elf_Phdr *)((char *)mapbase + ehdr->e_phoff);
     objtmp.phsize = ehdr->e_phnum * sizeof(objtmp.phdr[0]);
 
     /* Initialize the object list. */
     TAILQ_INIT(&obj_list);
 
     /* Now that non-local variables can be accesses, copy out obj_rtld. */
     memcpy(&obj_rtld, &objtmp, sizeof(obj_rtld));
 
 #ifndef RTLD_INIT_PAGESIZES_EARLY
     /* The page size is required by the dynamic memory allocator. */
     init_pagesizes(aux_info);
 #endif
 
     if (aux_info[AT_OSRELDATE] != NULL)
 	    osreldate = aux_info[AT_OSRELDATE]->a_un.a_val;
 
     digest_dynamic2(&obj_rtld, dyn_rpath, dyn_soname, dyn_runpath);
 
     /* Replace the path with a dynamically allocated copy. */
     obj_rtld.path = xstrdup(ld_path_rtld);
 
     r_debug.r_brk = r_debug_state;
     r_debug.r_state = RT_CONSISTENT;
 }
 
 /*
  * Retrieve the array of supported page sizes.  The kernel provides the page
  * sizes in increasing order.
  */
 static void
 init_pagesizes(Elf_Auxinfo **aux_info)
 {
 	static size_t psa[MAXPAGESIZES];
 	int mib[2];
 	size_t len, size;
 
 	if (aux_info[AT_PAGESIZES] != NULL && aux_info[AT_PAGESIZESLEN] !=
 	    NULL) {
 		size = aux_info[AT_PAGESIZESLEN]->a_un.a_val;
 		pagesizes = aux_info[AT_PAGESIZES]->a_un.a_ptr;
 	} else {
 		len = 2;
 		if (sysctlnametomib("hw.pagesizes", mib, &len) == 0)
 			size = sizeof(psa);
 		else {
 			/* As a fallback, retrieve the base page size. */
 			size = sizeof(psa[0]);
 			if (aux_info[AT_PAGESZ] != NULL) {
 				psa[0] = aux_info[AT_PAGESZ]->a_un.a_val;
 				goto psa_filled;
 			} else {
 				mib[0] = CTL_HW;
 				mib[1] = HW_PAGESIZE;
 				len = 2;
 			}
 		}
 		if (sysctl(mib, len, psa, &size, NULL, 0) == -1) {
 			_rtld_error("sysctl for hw.pagesize(s) failed");
 			rtld_die();
 		}
 psa_filled:
 		pagesizes = psa;
 	}
 	npagesizes = size / sizeof(pagesizes[0]);
 	/* Discard any invalid entries at the end of the array. */
 	while (npagesizes > 0 && pagesizes[npagesizes - 1] == 0)
 		npagesizes--;
 }
 
 /*
  * Add the init functions from a needed object list (and its recursive
  * needed objects) to "list".  This is not used directly; it is a helper
  * function for initlist_add_objects().  The write lock must be held
  * when this function is called.
  */
 static void
 initlist_add_neededs(Needed_Entry *needed, Objlist *list)
 {
     /* Recursively process the successor needed objects. */
     if (needed->next != NULL)
 	initlist_add_neededs(needed->next, list);
 
     /* Process the current needed object. */
     if (needed->obj != NULL)
 	initlist_add_objects(needed->obj, needed->obj, list);
 }
 
 /*
  * Scan all of the DAGs rooted in the range of objects from "obj" to
  * "tail" and add their init functions to "list".  This recurses over
  * the DAGs and ensure the proper init ordering such that each object's
  * needed libraries are initialized before the object itself.  At the
  * same time, this function adds the objects to the global finalization
  * list "list_fini" in the opposite order.  The write lock must be
  * held when this function is called.
  */
 static void
 initlist_add_objects(Obj_Entry *obj, Obj_Entry *tail, Objlist *list)
 {
     Obj_Entry *nobj;
 
     if (obj->init_scanned || obj->init_done)
 	return;
     obj->init_scanned = true;
 
     /* Recursively process the successor objects. */
     nobj = globallist_next(obj);
     if (nobj != NULL && obj != tail)
 	initlist_add_objects(nobj, tail, list);
 
     /* Recursively process the needed objects. */
     if (obj->needed != NULL)
 	initlist_add_neededs(obj->needed, list);
     if (obj->needed_filtees != NULL)
 	initlist_add_neededs(obj->needed_filtees, list);
     if (obj->needed_aux_filtees != NULL)
 	initlist_add_neededs(obj->needed_aux_filtees, list);
 
     /* Add the object to the init list. */
     if (obj->preinit_array != (Elf_Addr)NULL || obj->init != (Elf_Addr)NULL ||
       obj->init_array != (Elf_Addr)NULL)
 	objlist_push_tail(list, obj);
 
     /* Add the object to the global fini list in the reverse order. */
     if ((obj->fini != (Elf_Addr)NULL || obj->fini_array != (Elf_Addr)NULL)
       && !obj->on_fini_list) {
 	objlist_push_head(&list_fini, obj);
 	obj->on_fini_list = true;
     }
 }
 
 #ifndef FPTR_TARGET
 #define FPTR_TARGET(f)	((Elf_Addr) (f))
 #endif
 
 static void
 free_needed_filtees(Needed_Entry *n, RtldLockState *lockstate)
 {
     Needed_Entry *needed, *needed1;
 
     for (needed = n; needed != NULL; needed = needed->next) {
 	if (needed->obj != NULL) {
 	    dlclose_locked(needed->obj, lockstate);
 	    needed->obj = NULL;
 	}
     }
     for (needed = n; needed != NULL; needed = needed1) {
 	needed1 = needed->next;
 	free(needed);
     }
 }
 
 static void
 unload_filtees(Obj_Entry *obj, RtldLockState *lockstate)
 {
 
 	free_needed_filtees(obj->needed_filtees, lockstate);
 	obj->needed_filtees = NULL;
 	free_needed_filtees(obj->needed_aux_filtees, lockstate);
 	obj->needed_aux_filtees = NULL;
 	obj->filtees_loaded = false;
 }
 
 static void
 load_filtee1(Obj_Entry *obj, Needed_Entry *needed, int flags,
     RtldLockState *lockstate)
 {
 
     for (; needed != NULL; needed = needed->next) {
 	needed->obj = dlopen_object(obj->strtab + needed->name, -1, obj,
 	  flags, ((ld_loadfltr || obj->z_loadfltr) ? RTLD_NOW : RTLD_LAZY) |
 	  RTLD_LOCAL, lockstate);
     }
 }
 
 static void
 load_filtees(Obj_Entry *obj, int flags, RtldLockState *lockstate)
 {
 
     lock_restart_for_upgrade(lockstate);
     if (!obj->filtees_loaded) {
 	load_filtee1(obj, obj->needed_filtees, flags, lockstate);
 	load_filtee1(obj, obj->needed_aux_filtees, flags, lockstate);
 	obj->filtees_loaded = true;
     }
 }
 
 static int
 process_needed(Obj_Entry *obj, Needed_Entry *needed, int flags)
 {
     Obj_Entry *obj1;
 
     for (; needed != NULL; needed = needed->next) {
 	obj1 = needed->obj = load_object(obj->strtab + needed->name, -1, obj,
 	  flags & ~RTLD_LO_NOLOAD);
 	if (obj1 == NULL && !ld_tracing && (flags & RTLD_LO_FILTEES) == 0)
 	    return (-1);
     }
     return (0);
 }
 
 /*
  * Given a shared object, traverse its list of needed objects, and load
  * each of them.  Returns 0 on success.  Generates an error message and
  * returns -1 on failure.
  */
 static int
 load_needed_objects(Obj_Entry *first, int flags)
 {
     Obj_Entry *obj;
 
     for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) {
 	if (obj->marker)
 	    continue;
 	if (process_needed(obj, obj->needed, flags) == -1)
 	    return (-1);
     }
     return (0);
 }
 
 static int
 load_preload_objects(void)
 {
     char *p = ld_preload;
     Obj_Entry *obj;
     static const char delim[] = " \t:;";
 
     if (p == NULL)
 	return 0;
 
     p += strspn(p, delim);
     while (*p != '\0') {
 	size_t len = strcspn(p, delim);
 	char savech;
 
 	savech = p[len];
 	p[len] = '\0';
 	obj = load_object(p, -1, NULL, 0);
 	if (obj == NULL)
 	    return -1;	/* XXX - cleanup */
 	obj->z_interpose = true;
 	p[len] = savech;
 	p += len;
 	p += strspn(p, delim);
     }
     LD_UTRACE(UTRACE_PRELOAD_FINISHED, NULL, NULL, 0, 0, NULL);
     return 0;
 }
 
 static const char *
 printable_path(const char *path)
 {
 
 	return (path == NULL ? "<unknown>" : path);
 }
 
 /*
  * Load a shared object into memory, if it is not already loaded.  The
  * object may be specified by name or by user-supplied file descriptor
  * fd_u. In the later case, the fd_u descriptor is not closed, but its
  * duplicate is.
  *
  * Returns a pointer to the Obj_Entry for the object.  Returns NULL
  * on failure.
  */
 static Obj_Entry *
 load_object(const char *name, int fd_u, const Obj_Entry *refobj, int flags)
 {
     Obj_Entry *obj;
     int fd;
     struct stat sb;
     char *path;
 
     fd = -1;
     if (name != NULL) {
 	TAILQ_FOREACH(obj, &obj_list, next) {
 	    if (obj->marker || obj->doomed)
 		continue;
 	    if (object_match_name(obj, name))
 		return (obj);
 	}
 
 	path = find_library(name, refobj, &fd);
 	if (path == NULL)
 	    return (NULL);
     } else
 	path = NULL;
 
     if (fd >= 0) {
 	/*
 	 * search_library_pathfds() opens a fresh file descriptor for the
 	 * library, so there is no need to dup().
 	 */
     } else if (fd_u == -1) {
 	/*
 	 * If we didn't find a match by pathname, or the name is not
 	 * supplied, open the file and check again by device and inode.
 	 * This avoids false mismatches caused by multiple links or ".."
 	 * in pathnames.
 	 *
 	 * To avoid a race, we open the file and use fstat() rather than
 	 * using stat().
 	 */
 	if ((fd = open(path, O_RDONLY | O_CLOEXEC | O_VERIFY)) == -1) {
 	    _rtld_error("Cannot open \"%s\"", path);
 	    free(path);
 	    return (NULL);
 	}
     } else {
 	fd = fcntl(fd_u, F_DUPFD_CLOEXEC, 0);
 	if (fd == -1) {
 	    _rtld_error("Cannot dup fd");
 	    free(path);
 	    return (NULL);
 	}
     }
     if (fstat(fd, &sb) == -1) {
 	_rtld_error("Cannot fstat \"%s\"", printable_path(path));
 	close(fd);
 	free(path);
 	return NULL;
     }
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker || obj->doomed)
 	    continue;
 	if (obj->ino == sb.st_ino && obj->dev == sb.st_dev)
 	    break;
     }
     if (obj != NULL && name != NULL) {
 	object_add_name(obj, name);
 	free(path);
 	close(fd);
 	return obj;
     }
     if (flags & RTLD_LO_NOLOAD) {
 	free(path);
 	close(fd);
 	return (NULL);
     }
 
     /* First use of this object, so we must map it in */
     obj = do_load_object(fd, name, path, &sb, flags);
     if (obj == NULL)
 	free(path);
     close(fd);
 
     return obj;
 }
 
 static Obj_Entry *
 do_load_object(int fd, const char *name, char *path, struct stat *sbp,
   int flags)
 {
     Obj_Entry *obj;
     struct statfs fs;
 
     /*
      * but first, make sure that environment variables haven't been
      * used to circumvent the noexec flag on a filesystem.
      */
     if (dangerous_ld_env) {
 	if (fstatfs(fd, &fs) != 0) {
 	    _rtld_error("Cannot fstatfs \"%s\"", printable_path(path));
 	    return NULL;
 	}
 	if (fs.f_flags & MNT_NOEXEC) {
 	    _rtld_error("Cannot execute objects on %s\n", fs.f_mntonname);
 	    return NULL;
 	}
     }
     dbg("loading \"%s\"", printable_path(path));
     obj = map_object(fd, printable_path(path), sbp);
     if (obj == NULL)
         return NULL;
 
     /*
      * If DT_SONAME is present in the object, digest_dynamic2 already
      * added it to the object names.
      */
     if (name != NULL)
 	object_add_name(obj, name);
     obj->path = path;
     digest_dynamic(obj, 0);
     dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj->path,
 	obj->valid_hash_sysv, obj->valid_hash_gnu, obj->dynsymcount);
     if (obj->z_noopen && (flags & (RTLD_LO_DLOPEN | RTLD_LO_TRACE)) ==
       RTLD_LO_DLOPEN) {
 	dbg("refusing to load non-loadable \"%s\"", obj->path);
 	_rtld_error("Cannot dlopen non-loadable %s", obj->path);
 	munmap(obj->mapbase, obj->mapsize);
 	obj_free(obj);
 	return (NULL);
     }
 
     obj->dlopened = (flags & RTLD_LO_DLOPEN) != 0;
     TAILQ_INSERT_TAIL(&obj_list, obj, next);
     obj_count++;
     obj_loads++;
     linkmap_add(obj);	/* for GDB & dlinfo() */
     max_stack_flags |= obj->stack_flags;
 
     dbg("  %p .. %p: %s", obj->mapbase,
          obj->mapbase + obj->mapsize - 1, obj->path);
     if (obj->textrel)
 	dbg("  WARNING: %s has impure text", obj->path);
     LD_UTRACE(UTRACE_LOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0,
 	obj->path);    
 
     return obj;
 }
 
 static Obj_Entry *
 obj_from_addr(const void *addr)
 {
     Obj_Entry *obj;
 
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker)
 	    continue;
 	if (addr < (void *) obj->mapbase)
 	    continue;
 	if (addr < (void *) (obj->mapbase + obj->mapsize))
 	    return obj;
     }
     return NULL;
 }
 
 static void
 preinit_main(void)
 {
     Elf_Addr *preinit_addr;
     int index;
 
     preinit_addr = (Elf_Addr *)obj_main->preinit_array;
     if (preinit_addr == NULL)
 	return;
 
     for (index = 0; index < obj_main->preinit_array_num; index++) {
 	if (preinit_addr[index] != 0 && preinit_addr[index] != 1) {
 	    dbg("calling preinit function for %s at %p", obj_main->path,
 	      (void *)preinit_addr[index]);
 	    LD_UTRACE(UTRACE_INIT_CALL, obj_main, (void *)preinit_addr[index],
 	      0, 0, obj_main->path);
 	    call_init_pointer(obj_main, preinit_addr[index]);
 	}
     }
 }
 
 /*
  * Call the finalization functions for each of the objects in "list"
  * belonging to the DAG of "root" and referenced once. If NULL "root"
  * is specified, every finalization function will be called regardless
  * of the reference count and the list elements won't be freed. All of
  * the objects are expected to have non-NULL fini functions.
  */
 static void
 objlist_call_fini(Objlist *list, Obj_Entry *root, RtldLockState *lockstate)
 {
     Objlist_Entry *elm;
     char *saved_msg;
     Elf_Addr *fini_addr;
     int index;
 
     assert(root == NULL || root->refcount == 1);
 
     if (root != NULL)
 	root->doomed = true;
 
     /*
      * Preserve the current error message since a fini function might
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
     do {
 	STAILQ_FOREACH(elm, list, link) {
 	    if (root != NULL && (elm->obj->refcount != 1 ||
 	      objlist_find(&root->dagmembers, elm->obj) == NULL))
 		continue;
 	    /* Remove object from fini list to prevent recursive invocation. */
 	    STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link);
 	    /* Ensure that new references cannot be acquired. */
 	    elm->obj->doomed = true;
 
 	    hold_object(elm->obj);
 	    lock_release(rtld_bind_lock, lockstate);
 	    /*
 	     * It is legal to have both DT_FINI and DT_FINI_ARRAY defined.
 	     * When this happens, DT_FINI_ARRAY is processed first.
 	     */
 	    fini_addr = (Elf_Addr *)elm->obj->fini_array;
 	    if (fini_addr != NULL && elm->obj->fini_array_num > 0) {
 		for (index = elm->obj->fini_array_num - 1; index >= 0;
 		  index--) {
 		    if (fini_addr[index] != 0 && fini_addr[index] != 1) {
 			dbg("calling fini function for %s at %p",
 			    elm->obj->path, (void *)fini_addr[index]);
 			LD_UTRACE(UTRACE_FINI_CALL, elm->obj,
 			    (void *)fini_addr[index], 0, 0, elm->obj->path);
 			call_initfini_pointer(elm->obj, fini_addr[index]);
 		    }
 		}
 	    }
 	    if (elm->obj->fini != (Elf_Addr)NULL) {
 		dbg("calling fini function for %s at %p", elm->obj->path,
 		    (void *)elm->obj->fini);
 		LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)elm->obj->fini,
 		    0, 0, elm->obj->path);
 		call_initfini_pointer(elm->obj, elm->obj->fini);
 	    }
 	    wlock_acquire(rtld_bind_lock, lockstate);
 	    unhold_object(elm->obj);
 	    /* No need to free anything if process is going down. */
 	    if (root != NULL)
 	    	free(elm);
 	    /*
 	     * We must restart the list traversal after every fini call
 	     * because a dlclose() call from the fini function or from
 	     * another thread might have modified the reference counts.
 	     */
 	    break;
 	}
     } while (elm != NULL);
     errmsg_restore(saved_msg);
 }
 
 /*
  * Call the initialization functions for each of the objects in
  * "list".  All of the objects are expected to have non-NULL init
  * functions.
  */
 static void
 objlist_call_init(Objlist *list, RtldLockState *lockstate)
 {
     Objlist_Entry *elm;
     Obj_Entry *obj;
     char *saved_msg;
     Elf_Addr *init_addr;
     int index;
 
     /*
      * Clean init_scanned flag so that objects can be rechecked and
      * possibly initialized earlier if any of vectors called below
      * cause the change by using dlopen.
      */
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker)
 	    continue;
 	obj->init_scanned = false;
     }
 
     /*
      * Preserve the current error message since an init function might
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
     STAILQ_FOREACH(elm, list, link) {
 	if (elm->obj->init_done) /* Initialized early. */
 	    continue;
 	/*
 	 * Race: other thread might try to use this object before current
 	 * one completes the initialization. Not much can be done here
 	 * without better locking.
 	 */
 	elm->obj->init_done = true;
 	hold_object(elm->obj);
 	lock_release(rtld_bind_lock, lockstate);
 
         /*
          * It is legal to have both DT_INIT and DT_INIT_ARRAY defined.
          * When this happens, DT_INIT is processed first.
          */
 	if (elm->obj->init != (Elf_Addr)NULL) {
 	    dbg("calling init function for %s at %p", elm->obj->path,
 	        (void *)elm->obj->init);
 	    LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)elm->obj->init,
 	        0, 0, elm->obj->path);
 	    call_initfini_pointer(elm->obj, elm->obj->init);
 	}
 	init_addr = (Elf_Addr *)elm->obj->init_array;
 	if (init_addr != NULL) {
 	    for (index = 0; index < elm->obj->init_array_num; index++) {
 		if (init_addr[index] != 0 && init_addr[index] != 1) {
 		    dbg("calling init function for %s at %p", elm->obj->path,
 			(void *)init_addr[index]);
 		    LD_UTRACE(UTRACE_INIT_CALL, elm->obj,
 			(void *)init_addr[index], 0, 0, elm->obj->path);
 		    call_init_pointer(elm->obj, init_addr[index]);
 		}
 	    }
 	}
 	wlock_acquire(rtld_bind_lock, lockstate);
 	unhold_object(elm->obj);
     }
     errmsg_restore(saved_msg);
 }
 
 static void
 objlist_clear(Objlist *list)
 {
     Objlist_Entry *elm;
 
     while (!STAILQ_EMPTY(list)) {
 	elm = STAILQ_FIRST(list);
 	STAILQ_REMOVE_HEAD(list, link);
 	free(elm);
     }
 }
 
 static Objlist_Entry *
 objlist_find(Objlist *list, const Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     STAILQ_FOREACH(elm, list, link)
 	if (elm->obj == obj)
 	    return elm;
     return NULL;
 }
 
 static void
 objlist_init(Objlist *list)
 {
     STAILQ_INIT(list);
 }
 
 static void
 objlist_push_head(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     elm = NEW(Objlist_Entry);
     elm->obj = obj;
     STAILQ_INSERT_HEAD(list, elm, link);
 }
 
 static void
 objlist_push_tail(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     elm = NEW(Objlist_Entry);
     elm->obj = obj;
     STAILQ_INSERT_TAIL(list, elm, link);
 }
 
 static void
 objlist_put_after(Objlist *list, Obj_Entry *listobj, Obj_Entry *obj)
 {
 	Objlist_Entry *elm, *listelm;
 
 	STAILQ_FOREACH(listelm, list, link) {
 		if (listelm->obj == listobj)
 			break;
 	}
 	elm = NEW(Objlist_Entry);
 	elm->obj = obj;
 	if (listelm != NULL)
 		STAILQ_INSERT_AFTER(list, listelm, elm, link);
 	else
 		STAILQ_INSERT_TAIL(list, elm, link);
 }
 
 static void
 objlist_remove(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     if ((elm = objlist_find(list, obj)) != NULL) {
 	STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link);
 	free(elm);
     }
 }
 
 /*
  * Relocate dag rooted in the specified object.
  * Returns 0 on success, or -1 on failure.
  */
 
 static int
 relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 	Objlist_Entry *elm;
 	int error;
 
 	error = 0;
 	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 		error = relocate_object(elm->obj, bind_now, rtldobj, flags,
 		    lockstate);
 		if (error == -1)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Prepare for, or clean after, relocating an object marked with
  * DT_TEXTREL or DF_TEXTREL.  Before relocating, all read-only
  * segments are remapped read-write.  After relocations are done, the
  * segment's permissions are returned back to the modes specified in
  * the phdrs.  If any relocation happened, or always for wired
  * program, COW is triggered.
  */
 static int
 reloc_textrel_prot(Obj_Entry *obj, bool before)
 {
 	const Elf_Phdr *ph;
 	void *base;
 	size_t l, sz;
 	int prot;
 
 	for (l = obj->phsize / sizeof(*ph), ph = obj->phdr; l > 0;
 	    l--, ph++) {
 		if (ph->p_type != PT_LOAD || (ph->p_flags & PF_W) != 0)
 			continue;
 		base = obj->relocbase + trunc_page(ph->p_vaddr);
 		sz = round_page(ph->p_vaddr + ph->p_filesz) -
 		    trunc_page(ph->p_vaddr);
 		prot = convert_prot(ph->p_flags) | (before ? PROT_WRITE : 0);
 		if (mprotect(base, sz, prot) == -1) {
 			_rtld_error("%s: Cannot write-%sable text segment: %s",
 			    obj->path, before ? "en" : "dis",
 			    rtld_strerror(errno));
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 /*
  * Relocate single object.
  * Returns 0 on success, or -1 on failure.
  */
 static int
 relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 
 	if (obj->relocated)
 		return (0);
 	obj->relocated = true;
 	if (obj != rtldobj)
 		dbg("relocating \"%s\"", obj->path);
 
 	if (obj->symtab == NULL || obj->strtab == NULL ||
 	    !(obj->valid_hash_sysv || obj->valid_hash_gnu)) {
 		_rtld_error("%s: Shared object has no run-time symbol table",
 			    obj->path);
 		return (-1);
 	}
 
 	/* There are relocations to the write-protected text segment. */
 	if (obj->textrel && reloc_textrel_prot(obj, true) != 0)
 		return (-1);
 
 	/* Process the non-PLT non-IFUNC relocations. */
 	if (reloc_non_plt(obj, rtldobj, flags, lockstate))
 		return (-1);
 
 	/* Re-protected the text segment. */
 	if (obj->textrel && reloc_textrel_prot(obj, false) != 0)
 		return (-1);
 
 	/* Set the special PLT or GOT entries. */
 	init_pltgot(obj);
 
 	/* Process the PLT relocations. */
 	if (reloc_plt(obj) == -1)
 		return (-1);
 	/* Relocate the jump slots if we are doing immediate binding. */
 	if (obj->bind_now || bind_now)
 		if (reloc_jmpslots(obj, flags, lockstate) == -1)
 			return (-1);
 
 	/*
 	 * Process the non-PLT IFUNC relocations.  The relocations are
 	 * processed in two phases, because IFUNC resolvers may
 	 * reference other symbols, which must be readily processed
 	 * before resolvers are called.
 	 */
 	if (obj->non_plt_gnu_ifunc &&
 	    reloc_non_plt(obj, rtldobj, flags | SYMLOOK_IFUNC, lockstate))
 		return (-1);
 
 	if (!obj->mainprog && obj_enforce_relro(obj) == -1)
 		return (-1);
 
 	/*
 	 * Set up the magic number and version in the Obj_Entry.  These
 	 * were checked in the crt1.o from the original ElfKit, so we
 	 * set them for backward compatibility.
 	 */
 	obj->magic = RTLD_MAGIC;
 	obj->version = RTLD_VERSION;
 
 	return (0);
 }
 
 /*
  * Relocate newly-loaded shared objects.  The argument is a pointer to
  * the Obj_Entry for the first such object.  All objects from the first
  * to the end of the list of objects are relocated.  Returns 0 on success,
  * or -1 on failure.
  */
 static int
 relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 	Obj_Entry *obj;
 	int error;
 
 	for (error = 0, obj = first;  obj != NULL;
 	    obj = TAILQ_NEXT(obj, next)) {
 		if (obj->marker)
 			continue;
 		error = relocate_object(obj, bind_now, rtldobj, flags,
 		    lockstate);
 		if (error == -1)
 			break;
 	}
 	return (error);
 }
 
 /*
  * The handling of R_MACHINE_IRELATIVE relocations and jumpslots
  * referencing STT_GNU_IFUNC symbols is postponed till the other
  * relocations are done.  The indirect functions specified as
  * ifunc are allowed to call other symbols, so we need to have
  * objects relocated before asking for resolution from indirects.
  *
  * The R_MACHINE_IRELATIVE slots are resolved in greedy fashion,
  * instead of the usual lazy handling of PLT slots.  It is
  * consistent with how GNU does it.
  */
 static int
 resolve_object_ifunc(Obj_Entry *obj, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	if (obj->irelative && reloc_iresolve(obj, lockstate) == -1)
 		return (-1);
 	if ((obj->bind_now || bind_now) && obj->gnu_ifunc &&
 	    reloc_gnu_ifunc(obj, flags, lockstate) == -1)
 		return (-1);
 	return (0);
 }
 
 static int
 resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	Obj_Entry *obj;
 
 	for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) {
 		if (obj->marker)
 			continue;
 		if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1)
 			return (-1);
 	}
 	return (0);
 }
 
 static int
 initlist_objects_ifunc(Objlist *list, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	Objlist_Entry *elm;
 
 	STAILQ_FOREACH(elm, list, link) {
 		if (resolve_object_ifunc(elm->obj, bind_now, flags,
 		    lockstate) == -1)
 			return (-1);
 	}
 	return (0);
 }
 
 /*
  * Cleanup procedure.  It will be called (by the atexit mechanism) just
  * before the process exits.
  */
 static void
 rtld_exit(void)
 {
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     dbg("rtld_exit()");
     objlist_call_fini(&list_fini, NULL, &lockstate);
     /* No need to remove the items from the list, since we are exiting. */
     if (!libmap_disable)
         lm_fini();
     lock_release(rtld_bind_lock, &lockstate);
 }
 
 /*
  * Iterate over a search path, translate each element, and invoke the
  * callback on the result.
  */
 static void *
 path_enumerate(const char *path, path_enum_proc callback, void *arg)
 {
     const char *trans;
     if (path == NULL)
 	return (NULL);
 
     path += strspn(path, ":;");
     while (*path != '\0') {
 	size_t len;
 	char  *res;
 
 	len = strcspn(path, ":;");
 	trans = lm_findn(NULL, path, len);
 	if (trans)
 	    res = callback(trans, strlen(trans), arg);
 	else
 	    res = callback(path, len, arg);
 
 	if (res != NULL)
 	    return (res);
 
 	path += len;
 	path += strspn(path, ":;");
     }
 
     return (NULL);
 }
 
 struct try_library_args {
     const char	*name;
     size_t	 namelen;
     char	*buffer;
     size_t	 buflen;
 };
 
 static void *
 try_library_path(const char *dir, size_t dirlen, void *param)
 {
     struct try_library_args *arg;
 
     arg = param;
     if (*dir == '/' || trust) {
 	char *pathname;
 
 	if (dirlen + 1 + arg->namelen + 1 > arg->buflen)
 		return (NULL);
 
 	pathname = arg->buffer;
 	strncpy(pathname, dir, dirlen);
 	pathname[dirlen] = '/';
 	strcpy(pathname + dirlen + 1, arg->name);
 
 	dbg("  Trying \"%s\"", pathname);
 	if (access(pathname, F_OK) == 0) {		/* We found it */
 	    pathname = xmalloc(dirlen + 1 + arg->namelen + 1);
 	    strcpy(pathname, arg->buffer);
 	    return (pathname);
 	}
     }
     return (NULL);
 }
 
 static char *
 search_library_path(const char *name, const char *path)
 {
     char *p;
     struct try_library_args arg;
 
     if (path == NULL)
 	return NULL;
 
     arg.name = name;
     arg.namelen = strlen(name);
     arg.buffer = xmalloc(PATH_MAX);
     arg.buflen = PATH_MAX;
 
     p = path_enumerate(path, try_library_path, &arg);
 
     free(arg.buffer);
 
     return (p);
 }
 
 
 /*
  * Finds the library with the given name using the directory descriptors
  * listed in the LD_LIBRARY_PATH_FDS environment variable.
  *
  * Returns a freshly-opened close-on-exec file descriptor for the library,
  * or -1 if the library cannot be found.
  */
 static char *
 search_library_pathfds(const char *name, const char *path, int *fdp)
 {
 	char *envcopy, *fdstr, *found, *last_token;
 	size_t len;
 	int dirfd, fd;
 
 	dbg("%s('%s', '%s', fdp)", __func__, name, path);
 
 	/* Don't load from user-specified libdirs into setuid binaries. */
 	if (!trust)
 		return (NULL);
 
 	/* We can't do anything if LD_LIBRARY_PATH_FDS isn't set. */
 	if (path == NULL)
 		return (NULL);
 
 	/* LD_LIBRARY_PATH_FDS only works with relative paths. */
 	if (name[0] == '/') {
 		dbg("Absolute path (%s) passed to %s", name, __func__);
 		return (NULL);
 	}
 
 	/*
 	 * Use strtok_r() to walk the FD:FD:FD list.  This requires a local
 	 * copy of the path, as strtok_r rewrites separator tokens
 	 * with '\0'.
 	 */
 	found = NULL;
 	envcopy = xstrdup(path);
 	for (fdstr = strtok_r(envcopy, ":", &last_token); fdstr != NULL;
 	    fdstr = strtok_r(NULL, ":", &last_token)) {
 		dirfd = parse_integer(fdstr);
 		if (dirfd < 0) {
 			_rtld_error("failed to parse directory FD: '%s'",
 				fdstr);
 			break;
 		}
 		fd = __sys_openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_VERIFY);
 		if (fd >= 0) {
 			*fdp = fd;
 			len = strlen(fdstr) + strlen(name) + 3;
 			found = xmalloc(len);
 			if (rtld_snprintf(found, len, "#%d/%s", dirfd, name) < 0) {
 				_rtld_error("error generating '%d/%s'",
 				    dirfd, name);
 				rtld_die();
 			}
 			dbg("open('%s') => %d", found, fd);
 			break;
 		}
 	}
 	free(envcopy);
 
 	return (found);
 }
 
 
 int
 dlclose(void *handle)
 {
 	RtldLockState lockstate;
 	int error;
 
 	wlock_acquire(rtld_bind_lock, &lockstate);
 	error = dlclose_locked(handle, &lockstate);
 	lock_release(rtld_bind_lock, &lockstate);
 	return (error);
 }
 
 static int
 dlclose_locked(void *handle, RtldLockState *lockstate)
 {
     Obj_Entry *root;
 
     root = dlcheck(handle);
     if (root == NULL)
 	return -1;
     LD_UTRACE(UTRACE_DLCLOSE_START, handle, NULL, 0, root->dl_refcount,
 	root->path);
 
     /* Unreference the object and its dependencies. */
     root->dl_refcount--;
 
     if (root->refcount == 1) {
 	/*
 	 * The object will be no longer referenced, so we must unload it.
 	 * First, call the fini functions.
 	 */
 	objlist_call_fini(&list_fini, root, lockstate);
 
 	unref_dag(root);
 
 	/* Finish cleaning up the newly-unreferenced objects. */
 	GDB_STATE(RT_DELETE,&root->linkmap);
 	unload_object(root, lockstate);
 	GDB_STATE(RT_CONSISTENT,NULL);
     } else
 	unref_dag(root);
 
     LD_UTRACE(UTRACE_DLCLOSE_STOP, handle, NULL, 0, 0, NULL);
     return 0;
 }
 
 char *
 dlerror(void)
 {
     char *msg = error_message;
     error_message = NULL;
     return msg;
 }
 
 /*
  * This function is deprecated and has no effect.
  */
 void
 dllockinit(void *context,
 	   void *(*lock_create)(void *context),
            void (*rlock_acquire)(void *lock),
            void (*wlock_acquire)(void *lock),
            void (*lock_release)(void *lock),
            void (*lock_destroy)(void *lock),
 	   void (*context_destroy)(void *context))
 {
     static void *cur_context;
     static void (*cur_context_destroy)(void *);
 
     /* Just destroy the context from the previous call, if necessary. */
     if (cur_context_destroy != NULL)
 	cur_context_destroy(cur_context);
     cur_context = context;
     cur_context_destroy = context_destroy;
 }
 
 void *
 dlopen(const char *name, int mode)
 {
 
 	return (rtld_dlopen(name, -1, mode));
 }
 
 void *
 fdlopen(int fd, int mode)
 {
 
 	return (rtld_dlopen(NULL, fd, mode));
 }
 
 static void *
 rtld_dlopen(const char *name, int fd, int mode)
 {
     RtldLockState lockstate;
     int lo_flags;
 
     LD_UTRACE(UTRACE_DLOPEN_START, NULL, NULL, 0, mode, name);
     ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1";
     if (ld_tracing != NULL) {
 	rlock_acquire(rtld_bind_lock, &lockstate);
 	if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
 	environ = (char **)*get_program_var_addr("environ", &lockstate);
 	lock_release(rtld_bind_lock, &lockstate);
     }
     lo_flags = RTLD_LO_DLOPEN;
     if (mode & RTLD_NODELETE)
 	    lo_flags |= RTLD_LO_NODELETE;
     if (mode & RTLD_NOLOAD)
 	    lo_flags |= RTLD_LO_NOLOAD;
     if (ld_tracing != NULL)
 	    lo_flags |= RTLD_LO_TRACE;
 
     return (dlopen_object(name, fd, obj_main, lo_flags,
       mode & (RTLD_MODEMASK | RTLD_GLOBAL), NULL));
 }
 
 static void
 dlopen_cleanup(Obj_Entry *obj, RtldLockState *lockstate)
 {
 
 	obj->dl_refcount--;
 	unref_dag(obj);
 	if (obj->refcount == 0)
 		unload_object(obj, lockstate);
 }
 
 static Obj_Entry *
 dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags,
     int mode, RtldLockState *lockstate)
 {
     Obj_Entry *old_obj_tail;
     Obj_Entry *obj;
     Objlist initlist;
     RtldLockState mlockstate;
     int result;
 
     objlist_init(&initlist);
 
     if (lockstate == NULL && !(lo_flags & RTLD_LO_EARLY)) {
 	wlock_acquire(rtld_bind_lock, &mlockstate);
 	lockstate = &mlockstate;
     }
     GDB_STATE(RT_ADD,NULL);
 
     old_obj_tail = globallist_curr(TAILQ_LAST(&obj_list, obj_entry_q));
     obj = NULL;
     if (name == NULL && fd == -1) {
 	obj = obj_main;
 	obj->refcount++;
     } else {
 	obj = load_object(name, fd, refobj, lo_flags);
     }
 
     if (obj) {
 	obj->dl_refcount++;
 	if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL)
 	    objlist_push_tail(&list_global, obj);
 	if (globallist_next(old_obj_tail) != NULL) {
 	    /* We loaded something new. */
 	    assert(globallist_next(old_obj_tail) == obj);
 	    result = load_needed_objects(obj,
 		lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY));
 	    init_dag(obj);
 	    ref_dag(obj);
 	    if (result != -1)
 		result = rtld_verify_versions(&obj->dagmembers);
 	    if (result != -1 && ld_tracing)
 		goto trace;
 	    if (result == -1 || relocate_object_dag(obj,
 	      (mode & RTLD_MODEMASK) == RTLD_NOW, &obj_rtld,
 	      (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0,
 	      lockstate) == -1) {
 		dlopen_cleanup(obj, lockstate);
 		obj = NULL;
 	    } else if (lo_flags & RTLD_LO_EARLY) {
 		/*
 		 * Do not call the init functions for early loaded
 		 * filtees.  The image is still not initialized enough
 		 * for them to work.
 		 *
 		 * Our object is found by the global object list and
 		 * will be ordered among all init calls done right
 		 * before transferring control to main.
 		 */
 	    } else {
 		/* Make list of init functions to call. */
 		initlist_add_objects(obj, obj, &initlist);
 	    }
 	    /*
 	     * Process all no_delete or global objects here, given
 	     * them own DAGs to prevent their dependencies from being
 	     * unloaded.  This has to be done after we have loaded all
 	     * of the dependencies, so that we do not miss any.
 	     */
 	    if (obj != NULL)
 		process_z(obj);
 	} else {
 	    /*
 	     * Bump the reference counts for objects on this DAG.  If
 	     * this is the first dlopen() call for the object that was
 	     * already loaded as a dependency, initialize the dag
 	     * starting at it.
 	     */
 	    init_dag(obj);
 	    ref_dag(obj);
 
 	    if ((lo_flags & RTLD_LO_TRACE) != 0)
 		goto trace;
 	}
 	if (obj != NULL && ((lo_flags & RTLD_LO_NODELETE) != 0 ||
 	  obj->z_nodelete) && !obj->ref_nodel) {
 	    dbg("obj %s nodelete", obj->path);
 	    ref_dag(obj);
 	    obj->z_nodelete = obj->ref_nodel = true;
 	}
     }
 
     LD_UTRACE(UTRACE_DLOPEN_STOP, obj, NULL, 0, obj ? obj->dl_refcount : 0,
 	name);
     GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL);
 
     if (!(lo_flags & RTLD_LO_EARLY)) {
 	map_stacks_exec(lockstate);
     }
 
     if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW,
       (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0,
       lockstate) == -1) {
 	objlist_clear(&initlist);
 	dlopen_cleanup(obj, lockstate);
 	if (lockstate == &mlockstate)
 	    lock_release(rtld_bind_lock, lockstate);
 	return (NULL);
     }
 
     if (!(lo_flags & RTLD_LO_EARLY)) {
 	/* Call the init functions. */
 	objlist_call_init(&initlist, lockstate);
     }
     objlist_clear(&initlist);
     if (lockstate == &mlockstate)
 	lock_release(rtld_bind_lock, lockstate);
     return obj;
 trace:
     trace_loaded_objects(obj);
     if (lockstate == &mlockstate)
 	lock_release(rtld_bind_lock, lockstate);
     exit(0);
 }
 
 static void *
 do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve,
     int flags)
 {
     DoneList donelist;
     const Obj_Entry *obj, *defobj;
     const Elf_Sym *def;
     SymLook req;
     RtldLockState lockstate;
     tls_index ti;
     void *sym;
     int res;
 
     def = NULL;
     defobj = NULL;
     symlook_init(&req, name);
     req.ventry = ve;
     req.flags = flags | SYMLOOK_IN_PLT;
     req.lockstate = &lockstate;
 
     LD_UTRACE(UTRACE_DLSYM_START, handle, NULL, 0, 0, name);
     rlock_acquire(rtld_bind_lock, &lockstate);
     if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
     if (handle == NULL || handle == RTLD_NEXT ||
 	handle == RTLD_DEFAULT || handle == RTLD_SELF) {
 
 	if ((obj = obj_from_addr(retaddr)) == NULL) {
 	    _rtld_error("Cannot determine caller's shared object");
 	    lock_release(rtld_bind_lock, &lockstate);
 	    LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
 	    return NULL;
 	}
 	if (handle == NULL) {	/* Just the caller's shared object. */
 	    res = symlook_obj(&req, obj);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	} else if (handle == RTLD_NEXT || /* Objects after caller's */
 		   handle == RTLD_SELF) { /* ... caller included */
 	    if (handle == RTLD_NEXT)
 		obj = globallist_next(obj);
 	    for (; obj != NULL; obj = TAILQ_NEXT(obj, next)) {
 		if (obj->marker)
 		    continue;
 		res = symlook_obj(&req, obj);
 		if (res == 0) {
 		    if (def == NULL ||
 		      ELF_ST_BIND(req.sym_out->st_info) != STB_WEAK) {
 			def = req.sym_out;
 			defobj = req.defobj_out;
 			if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 			    break;
 		    }
 		}
 	    }
 	    /*
 	     * Search the dynamic linker itself, and possibly resolve the
 	     * symbol from there.  This is how the application links to
 	     * dynamic linker services such as dlopen.
 	     */
 	    if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
 		res = symlook_obj(&req, &obj_rtld);
 		if (res == 0) {
 		    def = req.sym_out;
 		    defobj = req.defobj_out;
 		}
 	    }
 	} else {
 	    assert(handle == RTLD_DEFAULT);
 	    res = symlook_default(&req, obj);
 	    if (res == 0) {
 		defobj = req.defobj_out;
 		def = req.sym_out;
 	    }
 	}
     } else {
 	if ((obj = dlcheck(handle)) == NULL) {
 	    lock_release(rtld_bind_lock, &lockstate);
 	    LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
 	    return NULL;
 	}
 
 	donelist_init(&donelist);
 	if (obj->mainprog) {
             /* Handle obtained by dlopen(NULL, ...) implies global scope. */
 	    res = symlook_global(&req, &donelist);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	    /*
 	     * Search the dynamic linker itself, and possibly resolve the
 	     * symbol from there.  This is how the application links to
 	     * dynamic linker services such as dlopen.
 	     */
 	    if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
 		res = symlook_obj(&req, &obj_rtld);
 		if (res == 0) {
 		    def = req.sym_out;
 		    defobj = req.defobj_out;
 		}
 	    }
 	}
 	else {
 	    /* Search the whole DAG rooted at the given object. */
 	    res = symlook_list(&req, &obj->dagmembers, &donelist);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	}
     }
 
     if (def != NULL) {
 	lock_release(rtld_bind_lock, &lockstate);
 
 	/*
 	 * The value required by the caller is derived from the value
 	 * of the symbol. this is simply the relocated value of the
 	 * symbol.
 	 */
 	if (ELF_ST_TYPE(def->st_info) == STT_FUNC)
 	    sym = make_function_pointer(def, defobj);
 	else if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC)
 	    sym = rtld_resolve_ifunc(defobj, def);
 	else if (ELF_ST_TYPE(def->st_info) == STT_TLS) {
 	    ti.ti_module = defobj->tlsindex;
 	    ti.ti_offset = def->st_value;
 	    sym = __tls_get_addr(&ti);
 	} else
 	    sym = defobj->relocbase + def->st_value;
 	LD_UTRACE(UTRACE_DLSYM_STOP, handle, sym, 0, 0, name);
 	return (sym);
     }
 
     _rtld_error("Undefined symbol \"%s%s%s\"", name, ve != NULL ? "@" : "",
       ve != NULL ? ve->name : "");
     lock_release(rtld_bind_lock, &lockstate);
     LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
     return NULL;
 }
 
 void *
 dlsym(void *handle, const char *name)
 {
 	return do_dlsym(handle, name, __builtin_return_address(0), NULL,
 	    SYMLOOK_DLSYM);
 }
 
 dlfunc_t
 dlfunc(void *handle, const char *name)
 {
 	union {
 		void *d;
 		dlfunc_t f;
 	} rv;
 
 	rv.d = do_dlsym(handle, name, __builtin_return_address(0), NULL,
 	    SYMLOOK_DLSYM);
 	return (rv.f);
 }
 
 void *
 dlvsym(void *handle, const char *name, const char *version)
 {
 	Ver_Entry ventry;
 
 	ventry.name = version;
 	ventry.file = NULL;
 	ventry.hash = elf_hash(version);
 	ventry.flags= 0;
 	return do_dlsym(handle, name, __builtin_return_address(0), &ventry,
 	    SYMLOOK_DLSYM);
 }
 
 int
 _rtld_addr_phdr(const void *addr, struct dl_phdr_info *phdr_info)
 {
     const Obj_Entry *obj;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     obj = obj_from_addr(addr);
     if (obj == NULL) {
         _rtld_error("No shared object contains address");
 	lock_release(rtld_bind_lock, &lockstate);
         return (0);
     }
     rtld_fill_dl_phdr_info(obj, phdr_info);
     lock_release(rtld_bind_lock, &lockstate);
     return (1);
 }
 
 int
 dladdr(const void *addr, Dl_info *info)
 {
     const Obj_Entry *obj;
     const Elf_Sym *def;
     void *symbol_addr;
     unsigned long symoffset;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     obj = obj_from_addr(addr);
     if (obj == NULL) {
         _rtld_error("No shared object contains address");
 	lock_release(rtld_bind_lock, &lockstate);
         return 0;
     }
     info->dli_fname = obj->path;
     info->dli_fbase = obj->mapbase;
     info->dli_saddr = (void *)0;
     info->dli_sname = NULL;
 
     /*
      * Walk the symbol list looking for the symbol whose address is
      * closest to the address sent in.
      */
     for (symoffset = 0; symoffset < obj->dynsymcount; symoffset++) {
         def = obj->symtab + symoffset;
 
         /*
          * For skip the symbol if st_shndx is either SHN_UNDEF or
          * SHN_COMMON.
          */
         if (def->st_shndx == SHN_UNDEF || def->st_shndx == SHN_COMMON)
             continue;
 
         /*
          * If the symbol is greater than the specified address, or if it
          * is further away from addr than the current nearest symbol,
          * then reject it.
          */
         symbol_addr = obj->relocbase + def->st_value;
         if (symbol_addr > addr || symbol_addr < info->dli_saddr)
             continue;
 
         /* Update our idea of the nearest symbol. */
         info->dli_sname = obj->strtab + def->st_name;
         info->dli_saddr = symbol_addr;
 
         /* Exact match? */
         if (info->dli_saddr == addr)
             break;
     }
     lock_release(rtld_bind_lock, &lockstate);
     return 1;
 }
 
 int
 dlinfo(void *handle, int request, void *p)
 {
     const Obj_Entry *obj;
     RtldLockState lockstate;
     int error;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
 
     if (handle == NULL || handle == RTLD_SELF) {
 	void *retaddr;
 
 	retaddr = __builtin_return_address(0);	/* __GNUC__ only */
 	if ((obj = obj_from_addr(retaddr)) == NULL)
 	    _rtld_error("Cannot determine caller's shared object");
     } else
 	obj = dlcheck(handle);
 
     if (obj == NULL) {
 	lock_release(rtld_bind_lock, &lockstate);
 	return (-1);
     }
 
     error = 0;
     switch (request) {
     case RTLD_DI_LINKMAP:
 	*((struct link_map const **)p) = &obj->linkmap;
 	break;
     case RTLD_DI_ORIGIN:
 	error = rtld_dirname(obj->path, p);
 	break;
 
     case RTLD_DI_SERINFOSIZE:
     case RTLD_DI_SERINFO:
 	error = do_search_info(obj, request, (struct dl_serinfo *)p);
 	break;
 
     default:
 	_rtld_error("Invalid request %d passed to dlinfo()", request);
 	error = -1;
     }
 
     lock_release(rtld_bind_lock, &lockstate);
 
     return (error);
 }
 
 static void
 rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info)
 {
 
 	phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase;
 	phdr_info->dlpi_name = obj->path;
 	phdr_info->dlpi_phdr = obj->phdr;
 	phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]);
 	phdr_info->dlpi_tls_modid = obj->tlsindex;
 	phdr_info->dlpi_tls_data = obj->tlsinit;
 	phdr_info->dlpi_adds = obj_loads;
 	phdr_info->dlpi_subs = obj_loads - obj_count;
 }
 
 int
 dl_iterate_phdr(__dl_iterate_hdr_callback callback, void *param)
 {
 	struct dl_phdr_info phdr_info;
 	Obj_Entry *obj, marker;
 	RtldLockState bind_lockstate, phdr_lockstate;
 	int error;
 
 	init_marker(&marker);
 	error = 0;
 
 	wlock_acquire(rtld_phdr_lock, &phdr_lockstate);
 	wlock_acquire(rtld_bind_lock, &bind_lockstate);
 	for (obj = globallist_curr(TAILQ_FIRST(&obj_list)); obj != NULL;) {
 		TAILQ_INSERT_AFTER(&obj_list, obj, &marker, next);
 		rtld_fill_dl_phdr_info(obj, &phdr_info);
 		hold_object(obj);
 		lock_release(rtld_bind_lock, &bind_lockstate);
 
 		error = callback(&phdr_info, sizeof phdr_info, param);
 
 		wlock_acquire(rtld_bind_lock, &bind_lockstate);
 		unhold_object(obj);
 		obj = globallist_next(&marker);
 		TAILQ_REMOVE(&obj_list, &marker, next);
 		if (error != 0) {
 			lock_release(rtld_bind_lock, &bind_lockstate);
 			lock_release(rtld_phdr_lock, &phdr_lockstate);
 			return (error);
 		}
 	}
 
 	if (error == 0) {
 		rtld_fill_dl_phdr_info(&obj_rtld, &phdr_info);
 		lock_release(rtld_bind_lock, &bind_lockstate);
 		error = callback(&phdr_info, sizeof(phdr_info), param);
 	}
 	lock_release(rtld_phdr_lock, &phdr_lockstate);
 	return (error);
 }
 
 static void *
 fill_search_info(const char *dir, size_t dirlen, void *param)
 {
     struct fill_search_info_args *arg;
 
     arg = param;
 
     if (arg->request == RTLD_DI_SERINFOSIZE) {
 	arg->serinfo->dls_cnt ++;
 	arg->serinfo->dls_size += sizeof(struct dl_serpath) + dirlen + 1;
     } else {
 	struct dl_serpath *s_entry;
 
 	s_entry = arg->serpath;
 	s_entry->dls_name  = arg->strspace;
 	s_entry->dls_flags = arg->flags;
 
 	strncpy(arg->strspace, dir, dirlen);
 	arg->strspace[dirlen] = '\0';
 
 	arg->strspace += dirlen + 1;
 	arg->serpath++;
     }
 
     return (NULL);
 }
 
 static int
 do_search_info(const Obj_Entry *obj, int request, struct dl_serinfo *info)
 {
     struct dl_serinfo _info;
     struct fill_search_info_args args;
 
     args.request = RTLD_DI_SERINFOSIZE;
     args.serinfo = &_info;
 
     _info.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
     _info.dls_cnt  = 0;
 
     path_enumerate(obj->rpath, fill_search_info, &args);
     path_enumerate(ld_library_path, fill_search_info, &args);
     path_enumerate(obj->runpath, fill_search_info, &args);
     path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args);
     if (!obj->z_nodeflib)
       path_enumerate(ld_standard_library_path, fill_search_info, &args);
 
 
     if (request == RTLD_DI_SERINFOSIZE) {
 	info->dls_size = _info.dls_size;
 	info->dls_cnt = _info.dls_cnt;
 	return (0);
     }
 
     if (info->dls_cnt != _info.dls_cnt || info->dls_size != _info.dls_size) {
 	_rtld_error("Uninitialized Dl_serinfo struct passed to dlinfo()");
 	return (-1);
     }
 
     args.request  = RTLD_DI_SERINFO;
     args.serinfo  = info;
     args.serpath  = &info->dls_serpath[0];
     args.strspace = (char *)&info->dls_serpath[_info.dls_cnt];
 
     args.flags = LA_SER_RUNPATH;
     if (path_enumerate(obj->rpath, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_LIBPATH;
     if (path_enumerate(ld_library_path, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_RUNPATH;
     if (path_enumerate(obj->runpath, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_CONFIG;
     if (path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args)
       != NULL)
 	return (-1);
 
     args.flags = LA_SER_DEFAULT;
     if (!obj->z_nodeflib &&
       path_enumerate(ld_standard_library_path, fill_search_info, &args) != NULL)
 	return (-1);
     return (0);
 }
 
 static int
 rtld_dirname(const char *path, char *bname)
 {
     const char *endp;
 
     /* Empty or NULL string gets treated as "." */
     if (path == NULL || *path == '\0') {
 	bname[0] = '.';
 	bname[1] = '\0';
 	return (0);
     }
 
     /* Strip trailing slashes */
     endp = path + strlen(path) - 1;
     while (endp > path && *endp == '/')
 	endp--;
 
     /* Find the start of the dir */
     while (endp > path && *endp != '/')
 	endp--;
 
     /* Either the dir is "/" or there are no slashes */
     if (endp == path) {
 	bname[0] = *endp == '/' ? '/' : '.';
 	bname[1] = '\0';
 	return (0);
     } else {
 	do {
 	    endp--;
 	} while (endp > path && *endp == '/');
     }
 
     if (endp - path + 2 > PATH_MAX)
     {
 	_rtld_error("Filename is too long: %s", path);
 	return(-1);
     }
 
     strncpy(bname, path, endp - path + 1);
     bname[endp - path + 1] = '\0';
     return (0);
 }
 
 static int
 rtld_dirname_abs(const char *path, char *base)
 {
 	char *last;
 
 	if (realpath(path, base) == NULL)
 		return (-1);
 	dbg("%s -> %s", path, base);
 	last = strrchr(base, '/');
 	if (last == NULL)
 		return (-1);
 	if (last != base)
 		*last = '\0';
 	return (0);
 }
 
 static void
 linkmap_add(Obj_Entry *obj)
 {
     struct link_map *l = &obj->linkmap;
     struct link_map *prev;
 
     obj->linkmap.l_name = obj->path;
     obj->linkmap.l_addr = obj->mapbase;
     obj->linkmap.l_ld = obj->dynamic;
 #ifdef __mips__
     /* GDB needs load offset on MIPS to use the symbols */
     obj->linkmap.l_offs = obj->relocbase;
 #endif
 
     if (r_debug.r_map == NULL) {
 	r_debug.r_map = l;
 	return;
     }
 
     /*
      * Scan to the end of the list, but not past the entry for the
      * dynamic linker, which we want to keep at the very end.
      */
     for (prev = r_debug.r_map;
       prev->l_next != NULL && prev->l_next != &obj_rtld.linkmap;
       prev = prev->l_next)
 	;
 
     /* Link in the new entry. */
     l->l_prev = prev;
     l->l_next = prev->l_next;
     if (l->l_next != NULL)
 	l->l_next->l_prev = l;
     prev->l_next = l;
 }
 
 static void
 linkmap_delete(Obj_Entry *obj)
 {
     struct link_map *l = &obj->linkmap;
 
     if (l->l_prev == NULL) {
 	if ((r_debug.r_map = l->l_next) != NULL)
 	    l->l_next->l_prev = NULL;
 	return;
     }
 
     if ((l->l_prev->l_next = l->l_next) != NULL)
 	l->l_next->l_prev = l->l_prev;
 }
 
 /*
  * Function for the debugger to set a breakpoint on to gain control.
  *
  * The two parameters allow the debugger to easily find and determine
  * what the runtime loader is doing and to whom it is doing it.
  *
  * When the loadhook trap is hit (r_debug_state, set at program
  * initialization), the arguments can be found on the stack:
  *
  *  +8   struct link_map *m
  *  +4   struct r_debug  *rd
  *  +0   RetAddr
  */
 void
 r_debug_state(struct r_debug* rd, struct link_map *m)
 {
     /*
      * The following is a hack to force the compiler to emit calls to
      * this function, even when optimizing.  If the function is empty,
      * the compiler is not obliged to emit any code for calls to it,
      * even when marked __noinline.  However, gdb depends on those
      * calls being made.
      */
     __compiler_membar();
 }
 
 /*
  * A function called after init routines have completed. This can be used to
  * break before a program's entry routine is called, and can be used when
  * main is not available in the symbol table.
  */
 void
 _r_debug_postinit(struct link_map *m)
 {
 
 	/* See r_debug_state(). */
 	__compiler_membar();
 }
 
 static void
 release_object(Obj_Entry *obj)
 {
 
 	if (obj->holdcount > 0) {
 		obj->unholdfree = true;
 		return;
 	}
 	munmap(obj->mapbase, obj->mapsize);
 	linkmap_delete(obj);
 	obj_free(obj);
 }
 
 /*
  * Get address of the pointer variable in the main program.
  * Prefer non-weak symbol over the weak one.
  */
 static const void **
 get_program_var_addr(const char *name, RtldLockState *lockstate)
 {
     SymLook req;
     DoneList donelist;
 
     symlook_init(&req, name);
     req.lockstate = lockstate;
     donelist_init(&donelist);
     if (symlook_global(&req, &donelist) != 0)
 	return (NULL);
     if (ELF_ST_TYPE(req.sym_out->st_info) == STT_FUNC)
 	return ((const void **)make_function_pointer(req.sym_out,
 	  req.defobj_out));
     else if (ELF_ST_TYPE(req.sym_out->st_info) == STT_GNU_IFUNC)
 	return ((const void **)rtld_resolve_ifunc(req.defobj_out, req.sym_out));
     else
 	return ((const void **)(req.defobj_out->relocbase +
 	  req.sym_out->st_value));
 }
 
 /*
  * Set a pointer variable in the main program to the given value.  This
  * is used to set key variables such as "environ" before any of the
  * init functions are called.
  */
 static void
 set_program_var(const char *name, const void *value)
 {
     const void **addr;
 
     if ((addr = get_program_var_addr(name, NULL)) != NULL) {
 	dbg("\"%s\": *%p <-- %p", name, addr, value);
 	*addr = value;
     }
 }
 
 /*
  * Search the global objects, including dependencies and main object,
  * for the given symbol.
  */
 static int
 symlook_global(SymLook *req, DoneList *donelist)
 {
     SymLook req1;
     const Objlist_Entry *elm;
     int res;
 
     symlook_init_from_req(&req1, req);
 
     /* Search all objects loaded at program start up. */
     if (req->defobj_out == NULL ||
       ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) {
 	res = symlook_list(&req1, &list_main, donelist);
 	if (res == 0 && (req->defobj_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     /* Search all DAGs whose roots are RTLD_GLOBAL objects. */
     STAILQ_FOREACH(elm, &list_global, link) {
 	if (req->defobj_out != NULL &&
 	  ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK)
 	    break;
 	res = symlook_list(&req1, &elm->obj->dagmembers, donelist);
 	if (res == 0 && (req->defobj_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     return (req->sym_out != NULL ? 0 : ESRCH);
 }
 
 /*
  * Given a symbol name in a referencing object, find the corresponding
  * definition of the symbol.  Returns a pointer to the symbol, or NULL if
  * no definition was found.  Returns a pointer to the Obj_Entry of the
  * defining object via the reference parameter DEFOBJ_OUT.
  */
 static int
 symlook_default(SymLook *req, const Obj_Entry *refobj)
 {
     DoneList donelist;
     const Objlist_Entry *elm;
     SymLook req1;
     int res;
 
     donelist_init(&donelist);
     symlook_init_from_req(&req1, req);
 
     /*
      * Look first in the referencing object if linked symbolically,
      * and similarly handle protected symbols.
      */
     res = symlook_obj(&req1, refobj);
     if (res == 0 && (refobj->symbolic ||
       ELF_ST_VISIBILITY(req1.sym_out->st_other) == STV_PROTECTED)) {
 	req->sym_out = req1.sym_out;
 	req->defobj_out = req1.defobj_out;
 	assert(req->defobj_out != NULL);
     }
     if (refobj->symbolic || req->defobj_out != NULL)
 	donelist_check(&donelist, refobj);
 
     symlook_global(req, &donelist);
 
     /* Search all dlopened DAGs containing the referencing object. */
     STAILQ_FOREACH(elm, &refobj->dldags, link) {
 	if (req->sym_out != NULL &&
 	  ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK)
 	    break;
 	res = symlook_list(&req1, &elm->obj->dagmembers, &donelist);
 	if (res == 0 && (req->sym_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     /*
      * Search the dynamic linker itself, and possibly resolve the
      * symbol from there.  This is how the application links to
      * dynamic linker services such as dlopen.
      */
     if (req->sym_out == NULL ||
       ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) {
 	res = symlook_obj(&req1, &obj_rtld);
 	if (res == 0) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     return (req->sym_out != NULL ? 0 : ESRCH);
 }
 
 static int
 symlook_list(SymLook *req, const Objlist *objlist, DoneList *dlp)
 {
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     const Objlist_Entry *elm;
     SymLook req1;
     int res;
 
     def = NULL;
     defobj = NULL;
     STAILQ_FOREACH(elm, objlist, link) {
 	if (donelist_check(dlp, elm->obj))
 	    continue;
 	symlook_init_from_req(&req1, req);
 	if ((res = symlook_obj(&req1, elm->obj)) == 0) {
 	    if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) {
 		def = req1.sym_out;
 		defobj = req1.defobj_out;
 		if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 		    break;
 	    }
 	}
     }
     if (def != NULL) {
 	req->sym_out = def;
 	req->defobj_out = defobj;
 	return (0);
     }
     return (ESRCH);
 }
 
 /*
  * Search the chain of DAGS cointed to by the given Needed_Entry
  * for a symbol of the given name.  Each DAG is scanned completely
  * before advancing to the next one.  Returns a pointer to the symbol,
  * or NULL if no definition was found.
  */
 static int
 symlook_needed(SymLook *req, const Needed_Entry *needed, DoneList *dlp)
 {
     const Elf_Sym *def;
     const Needed_Entry *n;
     const Obj_Entry *defobj;
     SymLook req1;
     int res;
 
     def = NULL;
     defobj = NULL;
     symlook_init_from_req(&req1, req);
     for (n = needed; n != NULL; n = n->next) {
 	if (n->obj == NULL ||
 	    (res = symlook_list(&req1, &n->obj->dagmembers, dlp)) != 0)
 	    continue;
 	if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) {
 	    def = req1.sym_out;
 	    defobj = req1.defobj_out;
 	    if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 		break;
 	}
     }
     if (def != NULL) {
 	req->sym_out = def;
 	req->defobj_out = defobj;
 	return (0);
     }
     return (ESRCH);
 }
 
 /*
  * Search the symbol table of a single shared object for a symbol of
  * the given name and version, if requested.  Returns a pointer to the
  * symbol, or NULL if no definition was found.  If the object is
  * filter, return filtered symbol from filtee.
  *
  * The symbol's hash value is passed in for efficiency reasons; that
  * eliminates many recomputations of the hash value.
  */
 int
 symlook_obj(SymLook *req, const Obj_Entry *obj)
 {
     DoneList donelist;
     SymLook req1;
     int flags, res, mres;
 
     /*
      * If there is at least one valid hash at this point, we prefer to
      * use the faster GNU version if available.
      */
     if (obj->valid_hash_gnu)
 	mres = symlook_obj1_gnu(req, obj);
     else if (obj->valid_hash_sysv)
 	mres = symlook_obj1_sysv(req, obj);
     else
 	return (EINVAL);
 
     if (mres == 0) {
 	if (obj->needed_filtees != NULL) {
 	    flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0;
 	    load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate);
 	    donelist_init(&donelist);
 	    symlook_init_from_req(&req1, req);
 	    res = symlook_needed(&req1, obj->needed_filtees, &donelist);
 	    if (res == 0) {
 		req->sym_out = req1.sym_out;
 		req->defobj_out = req1.defobj_out;
 	    }
 	    return (res);
 	}
 	if (obj->needed_aux_filtees != NULL) {
 	    flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0;
 	    load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate);
 	    donelist_init(&donelist);
 	    symlook_init_from_req(&req1, req);
 	    res = symlook_needed(&req1, obj->needed_aux_filtees, &donelist);
 	    if (res == 0) {
 		req->sym_out = req1.sym_out;
 		req->defobj_out = req1.defobj_out;
 		return (res);
 	    }
 	}
     }
     return (mres);
 }
 
 /* Symbol match routine common to both hash functions */
 static bool
 matched_symbol(SymLook *req, const Obj_Entry *obj, Sym_Match_Result *result,
     const unsigned long symnum)
 {
 	Elf_Versym verndx;
 	const Elf_Sym *symp;
 	const char *strp;
 
 	symp = obj->symtab + symnum;
 	strp = obj->strtab + symp->st_name;
 
 	switch (ELF_ST_TYPE(symp->st_info)) {
 	case STT_FUNC:
 	case STT_NOTYPE:
 	case STT_OBJECT:
 	case STT_COMMON:
 	case STT_GNU_IFUNC:
 		if (symp->st_value == 0)
 			return (false);
 		/* fallthrough */
 	case STT_TLS:
 		if (symp->st_shndx != SHN_UNDEF)
 			break;
 #ifndef __mips__
 		else if (((req->flags & SYMLOOK_IN_PLT) == 0) &&
 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC))
 			break;
 		/* fallthrough */
 #endif
 	default:
 		return (false);
 	}
 	if (req->name[0] != strp[0] || strcmp(req->name, strp) != 0)
 		return (false);
 
 	if (req->ventry == NULL) {
 		if (obj->versyms != NULL) {
 			verndx = VER_NDX(obj->versyms[symnum]);
 			if (verndx > obj->vernum) {
 				_rtld_error(
 				    "%s: symbol %s references wrong version %d",
 				    obj->path, obj->strtab + symnum, verndx);
 				return (false);
 			}
 			/*
 			 * If we are not called from dlsym (i.e. this
 			 * is a normal relocation from unversioned
 			 * binary), accept the symbol immediately if
 			 * it happens to have first version after this
 			 * shared object became versioned.  Otherwise,
 			 * if symbol is versioned and not hidden,
 			 * remember it. If it is the only symbol with
 			 * this name exported by the shared object, it
 			 * will be returned as a match by the calling
 			 * function. If symbol is global (verndx < 2)
 			 * accept it unconditionally.
 			 */
 			if ((req->flags & SYMLOOK_DLSYM) == 0 &&
 			    verndx == VER_NDX_GIVEN) {
 				result->sym_out = symp;
 				return (true);
 			}
 			else if (verndx >= VER_NDX_GIVEN) {
 				if ((obj->versyms[symnum] & VER_NDX_HIDDEN)
 				    == 0) {
 					if (result->vsymp == NULL)
 						result->vsymp = symp;
 					result->vcount++;
 				}
 				return (false);
 			}
 		}
 		result->sym_out = symp;
 		return (true);
 	}
 	if (obj->versyms == NULL) {
 		if (object_match_name(obj, req->ventry->name)) {
 			_rtld_error("%s: object %s should provide version %s "
 			    "for symbol %s", obj_rtld.path, obj->path,
 			    req->ventry->name, obj->strtab + symnum);
 			return (false);
 		}
 	} else {
 		verndx = VER_NDX(obj->versyms[symnum]);
 		if (verndx > obj->vernum) {
 			_rtld_error("%s: symbol %s references wrong version %d",
 			    obj->path, obj->strtab + symnum, verndx);
 			return (false);
 		}
 		if (obj->vertab[verndx].hash != req->ventry->hash ||
 		    strcmp(obj->vertab[verndx].name, req->ventry->name)) {
 			/*
 			 * Version does not match. Look if this is a
 			 * global symbol and if it is not hidden. If
 			 * global symbol (verndx < 2) is available,
 			 * use it. Do not return symbol if we are
 			 * called by dlvsym, because dlvsym looks for
 			 * a specific version and default one is not
 			 * what dlvsym wants.
 			 */
 			if ((req->flags & SYMLOOK_DLSYM) ||
 			    (verndx >= VER_NDX_GIVEN) ||
 			    (obj->versyms[symnum] & VER_NDX_HIDDEN))
 				return (false);
 		}
 	}
 	result->sym_out = symp;
 	return (true);
 }
 
 /*
  * Search for symbol using SysV hash function.
  * obj->buckets is known not to be NULL at this point; the test for this was
  * performed with the obj->valid_hash_sysv assignment.
  */
 static int
 symlook_obj1_sysv(SymLook *req, const Obj_Entry *obj)
 {
 	unsigned long symnum;
 	Sym_Match_Result matchres;
 
 	matchres.sym_out = NULL;
 	matchres.vsymp = NULL;
 	matchres.vcount = 0;
 
 	for (symnum = obj->buckets[req->hash % obj->nbuckets];
 	    symnum != STN_UNDEF; symnum = obj->chains[symnum]) {
 		if (symnum >= obj->nchains)
 			return (ESRCH);	/* Bad object */
 
 		if (matched_symbol(req, obj, &matchres, symnum)) {
 			req->sym_out = matchres.sym_out;
 			req->defobj_out = obj;
 			return (0);
 		}
 	}
 	if (matchres.vcount == 1) {
 		req->sym_out = matchres.vsymp;
 		req->defobj_out = obj;
 		return (0);
 	}
 	return (ESRCH);
 }
 
 /* Search for symbol using GNU hash function */
 static int
 symlook_obj1_gnu(SymLook *req, const Obj_Entry *obj)
 {
 	Elf_Addr bloom_word;
 	const Elf32_Word *hashval;
 	Elf32_Word bucket;
 	Sym_Match_Result matchres;
 	unsigned int h1, h2;
 	unsigned long symnum;
 
 	matchres.sym_out = NULL;
 	matchres.vsymp = NULL;
 	matchres.vcount = 0;
 
 	/* Pick right bitmask word from Bloom filter array */
 	bloom_word = obj->bloom_gnu[(req->hash_gnu / __ELF_WORD_SIZE) &
 	    obj->maskwords_bm_gnu];
 
 	/* Calculate modulus word size of gnu hash and its derivative */
 	h1 = req->hash_gnu & (__ELF_WORD_SIZE - 1);
 	h2 = ((req->hash_gnu >> obj->shift2_gnu) & (__ELF_WORD_SIZE - 1));
 
 	/* Filter out the "definitely not in set" queries */
 	if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)
 		return (ESRCH);
 
 	/* Locate hash chain and corresponding value element*/
 	bucket = obj->buckets_gnu[req->hash_gnu % obj->nbuckets_gnu];
 	if (bucket == 0)
 		return (ESRCH);
 	hashval = &obj->chain_zero_gnu[bucket];
 	do {
 		if (((*hashval ^ req->hash_gnu) >> 1) == 0) {
 			symnum = hashval - obj->chain_zero_gnu;
 			if (matched_symbol(req, obj, &matchres, symnum)) {
 				req->sym_out = matchres.sym_out;
 				req->defobj_out = obj;
 				return (0);
 			}
 		}
 	} while ((*hashval++ & 1) == 0);
 	if (matchres.vcount == 1) {
 		req->sym_out = matchres.vsymp;
 		req->defobj_out = obj;
 		return (0);
 	}
 	return (ESRCH);
 }
 
 static void
 trace_loaded_objects(Obj_Entry *obj)
 {
     char	*fmt1, *fmt2, *fmt, *main_local, *list_containers;
     int		c;
 
     if ((main_local = getenv(_LD("TRACE_LOADED_OBJECTS_PROGNAME"))) == NULL)
 	main_local = "";
 
     if ((fmt1 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT1"))) == NULL)
 	fmt1 = "\t%o => %p (%x)\n";
 
     if ((fmt2 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT2"))) == NULL)
 	fmt2 = "\t%o (%x)\n";
 
     list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL"));
 
     for (; obj != NULL; obj = TAILQ_NEXT(obj, next)) {
 	Needed_Entry		*needed;
 	char			*name, *path;
 	bool			is_lib;
 
 	if (obj->marker)
 	    continue;
 	if (list_containers && obj->needed != NULL)
 	    rtld_printf("%s:\n", obj->path);
 	for (needed = obj->needed; needed; needed = needed->next) {
 	    if (needed->obj != NULL) {
 		if (needed->obj->traced && !list_containers)
 		    continue;
 		needed->obj->traced = true;
 		path = needed->obj->path;
 	    } else
 		path = "not found";
 
 	    name = (char *)obj->strtab + needed->name;
 	    is_lib = strncmp(name, "lib", 3) == 0;	/* XXX - bogus */
 
 	    fmt = is_lib ? fmt1 : fmt2;
 	    while ((c = *fmt++) != '\0') {
 		switch (c) {
 		default:
 		    rtld_putchar(c);
 		    continue;
 		case '\\':
 		    switch (c = *fmt) {
 		    case '\0':
 			continue;
 		    case 'n':
 			rtld_putchar('\n');
 			break;
 		    case 't':
 			rtld_putchar('\t');
 			break;
 		    }
 		    break;
 		case '%':
 		    switch (c = *fmt) {
 		    case '\0':
 			continue;
 		    case '%':
 		    default:
 			rtld_putchar(c);
 			break;
 		    case 'A':
 			rtld_putstr(main_local);
 			break;
 		    case 'a':
 			rtld_putstr(obj_main->path);
 			break;
 		    case 'o':
 			rtld_putstr(name);
 			break;
 #if 0
 		    case 'm':
 			rtld_printf("%d", sodp->sod_major);
 			break;
 		    case 'n':
 			rtld_printf("%d", sodp->sod_minor);
 			break;
 #endif
 		    case 'p':
 			rtld_putstr(path);
 			break;
 		    case 'x':
 			rtld_printf("%p", needed->obj ? needed->obj->mapbase :
 			  0);
 			break;
 		    }
 		    break;
 		}
 		++fmt;
 	    }
 	}
     }
 }
 
 /*
  * Unload a dlopened object and its dependencies from memory and from
  * our data structures.  It is assumed that the DAG rooted in the
  * object has already been unreferenced, and that the object has a
  * reference count of 0.
  */
 static void
 unload_object(Obj_Entry *root, RtldLockState *lockstate)
 {
 	Obj_Entry marker, *obj, *next;
 
 	assert(root->refcount == 0);
 
 	/*
 	 * Pass over the DAG removing unreferenced objects from
 	 * appropriate lists.
 	 */
 	unlink_object(root);
 
 	/* Unmap all objects that are no longer referenced. */
 	for (obj = TAILQ_FIRST(&obj_list); obj != NULL; obj = next) {
 		next = TAILQ_NEXT(obj, next);
 		if (obj->marker || obj->refcount != 0)
 			continue;
 		LD_UTRACE(UTRACE_UNLOAD_OBJECT, obj, obj->mapbase,
 		    obj->mapsize, 0, obj->path);
 		dbg("unloading \"%s\"", obj->path);
 		/*
 		 * Unlink the object now to prevent new references from
 		 * being acquired while the bind lock is dropped in
 		 * recursive dlclose() invocations.
 		 */
 		TAILQ_REMOVE(&obj_list, obj, next);
 		obj_count--;
 
 		if (obj->filtees_loaded) {
 			if (next != NULL) {
 				init_marker(&marker);
 				TAILQ_INSERT_BEFORE(next, &marker, next);
 				unload_filtees(obj, lockstate);
 				next = TAILQ_NEXT(&marker, next);
 				TAILQ_REMOVE(&obj_list, &marker, next);
 			} else
 				unload_filtees(obj, lockstate);
 		}
 		release_object(obj);
 	}
 }
 
 static void
 unlink_object(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     if (root->refcount == 0) {
 	/* Remove the object from the RTLD_GLOBAL list. */
 	objlist_remove(&list_global, root);
 
     	/* Remove the object from all objects' DAG lists. */
     	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 	    objlist_remove(&elm->obj->dldags, root);
 	    if (elm->obj != root)
 		unlink_object(elm->obj);
 	}
     }
 }
 
 static void
 ref_dag(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     assert(root->dag_inited);
     STAILQ_FOREACH(elm, &root->dagmembers, link)
 	elm->obj->refcount++;
 }
 
 static void
 unref_dag(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     assert(root->dag_inited);
     STAILQ_FOREACH(elm, &root->dagmembers, link)
 	elm->obj->refcount--;
 }
 
 /*
  * Common code for MD __tls_get_addr().
  */
 static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline;
 static void *
 tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset)
 {
     Elf_Addr *newdtv, *dtv;
     RtldLockState lockstate;
     int to_copy;
 
     dtv = *dtvp;
     /* Check dtv generation in case new modules have arrived */
     if (dtv[0] != tls_dtv_generation) {
 	wlock_acquire(rtld_bind_lock, &lockstate);
 	newdtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 	to_copy = dtv[1];
 	if (to_copy > tls_max_index)
 	    to_copy = tls_max_index;
 	memcpy(&newdtv[2], &dtv[2], to_copy * sizeof(Elf_Addr));
 	newdtv[0] = tls_dtv_generation;
 	newdtv[1] = tls_max_index;
 	free(dtv);
 	lock_release(rtld_bind_lock, &lockstate);
 	dtv = *dtvp = newdtv;
     }
 
     /* Dynamically allocate module TLS if necessary */
     if (dtv[index + 1] == 0) {
 	/* Signal safe, wlock will block out signals. */
 	wlock_acquire(rtld_bind_lock, &lockstate);
 	if (!dtv[index + 1])
 	    dtv[index + 1] = (Elf_Addr)allocate_module_tls(index);
 	lock_release(rtld_bind_lock, &lockstate);
     }
     return ((void *)(dtv[index + 1] + offset));
 }
 
 void *
 tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
 {
 	Elf_Addr *dtv;
 
 	dtv = *dtvp;
 	/* Check dtv generation in case new modules have arrived */
 	if (__predict_true(dtv[0] == tls_dtv_generation &&
 	    dtv[index + 1] != 0))
 		return ((void *)(dtv[index + 1] + offset));
 	return (tls_get_addr_slow(dtvp, index, offset));
 }
 
 #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \
-    defined(__powerpc__) || defined(__riscv__)
+    defined(__powerpc__) || defined(__riscv)
 
 /*
  * Allocate Static TLS using the Variant I method.
  */
 void *
 allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign)
 {
     Obj_Entry *obj;
     char *tcb;
     Elf_Addr **tls;
     Elf_Addr *dtv;
     Elf_Addr addr;
     int i;
 
     if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE)
 	return (oldtcb);
 
     assert(tcbsize >= TLS_TCB_SIZE);
     tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize);
     tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE);
 
     if (oldtcb != NULL) {
 	memcpy(tls, oldtcb, tls_static_space);
 	free(oldtcb);
 
 	/* Adjust the DTV. */
 	dtv = tls[0];
 	for (i = 0; i < dtv[1]; i++) {
 	    if (dtv[i+2] >= (Elf_Addr)oldtcb &&
 		dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) {
 		dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls;
 	    }
 	}
     } else {
 	dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 	tls[0] = dtv;
 	dtv[0] = tls_dtv_generation;
 	dtv[1] = tls_max_index;
 
 	for (obj = globallist_curr(objs); obj != NULL;
 	  obj = globallist_next(obj)) {
 	    if (obj->tlsoffset > 0) {
 		addr = (Elf_Addr)tls + obj->tlsoffset;
 		if (obj->tlsinitsize > 0)
 		    memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize);
 		if (obj->tlssize > obj->tlsinitsize)
 		    memset((void*) (addr + obj->tlsinitsize), 0,
 			   obj->tlssize - obj->tlsinitsize);
 		dtv[obj->tlsindex + 1] = addr;
 	    }
 	}
     }
 
     return (tcb);
 }
 
 void
 free_tls(void *tcb, size_t tcbsize, size_t tcbalign)
 {
     Elf_Addr *dtv;
     Elf_Addr tlsstart, tlsend;
     int dtvsize, i;
 
     assert(tcbsize >= TLS_TCB_SIZE);
 
     tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE;
     tlsend = tlsstart + tls_static_space;
 
     dtv = *(Elf_Addr **)tlsstart;
     dtvsize = dtv[1];
     for (i = 0; i < dtvsize; i++) {
 	if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) {
 	    free((void*)dtv[i+2]);
 	}
     }
     free(dtv);
     free(tcb);
 }
 
 #endif
 
 #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__)
 
 /*
  * Allocate Static TLS using the Variant II method.
  */
 void *
 allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign)
 {
     Obj_Entry *obj;
     size_t size, ralign;
     char *tls;
     Elf_Addr *dtv, *olddtv;
     Elf_Addr segbase, oldsegbase, addr;
     int i;
 
     ralign = tcbalign;
     if (tls_static_max_align > ralign)
 	    ralign = tls_static_max_align;
     size = round(tls_static_space, ralign) + round(tcbsize, ralign);
 
     assert(tcbsize >= 2*sizeof(Elf_Addr));
     tls = malloc_aligned(size, ralign);
     dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 
     segbase = (Elf_Addr)(tls + round(tls_static_space, ralign));
     ((Elf_Addr*)segbase)[0] = segbase;
     ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv;
 
     dtv[0] = tls_dtv_generation;
     dtv[1] = tls_max_index;
 
     if (oldtls) {
 	/*
 	 * Copy the static TLS block over whole.
 	 */
 	oldsegbase = (Elf_Addr) oldtls;
 	memcpy((void *)(segbase - tls_static_space),
 	       (const void *)(oldsegbase - tls_static_space),
 	       tls_static_space);
 
 	/*
 	 * If any dynamic TLS blocks have been created tls_get_addr(),
 	 * move them over.
 	 */
 	olddtv = ((Elf_Addr**)oldsegbase)[1];
 	for (i = 0; i < olddtv[1]; i++) {
 	    if (olddtv[i+2] < oldsegbase - size || olddtv[i+2] > oldsegbase) {
 		dtv[i+2] = olddtv[i+2];
 		olddtv[i+2] = 0;
 	    }
 	}
 
 	/*
 	 * We assume that this block was the one we created with
 	 * allocate_initial_tls().
 	 */
 	free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr));
     } else {
 	for (obj = objs; obj != NULL; obj = TAILQ_NEXT(obj, next)) {
 		if (obj->marker || obj->tlsoffset == 0)
 			continue;
 		addr = segbase - obj->tlsoffset;
 		memset((void*) (addr + obj->tlsinitsize),
 		       0, obj->tlssize - obj->tlsinitsize);
 		if (obj->tlsinit)
 		    memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize);
 		dtv[obj->tlsindex + 1] = addr;
 	}
     }
 
     return (void*) segbase;
 }
 
 void
 free_tls(void *tls, size_t tcbsize, size_t tcbalign)
 {
     Elf_Addr* dtv;
     size_t size, ralign;
     int dtvsize, i;
     Elf_Addr tlsstart, tlsend;
 
     /*
      * Figure out the size of the initial TLS block so that we can
      * find stuff which ___tls_get_addr() allocated dynamically.
      */
     ralign = tcbalign;
     if (tls_static_max_align > ralign)
 	    ralign = tls_static_max_align;
     size = round(tls_static_space, ralign);
 
     dtv = ((Elf_Addr**)tls)[1];
     dtvsize = dtv[1];
     tlsend = (Elf_Addr) tls;
     tlsstart = tlsend - size;
     for (i = 0; i < dtvsize; i++) {
 	if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) {
 		free_aligned((void *)dtv[i + 2]);
 	}
     }
 
     free_aligned((void *)tlsstart);
     free((void*) dtv);
 }
 
 #endif
 
 /*
  * Allocate TLS block for module with given index.
  */
 void *
 allocate_module_tls(int index)
 {
     Obj_Entry* obj;
     char* p;
 
     TAILQ_FOREACH(obj, &obj_list, next) {
 	if (obj->marker)
 	    continue;
 	if (obj->tlsindex == index)
 	    break;
     }
     if (!obj) {
 	_rtld_error("Can't find module with TLS index %d", index);
 	rtld_die();
     }
 
     p = malloc_aligned(obj->tlssize, obj->tlsalign);
     memcpy(p, obj->tlsinit, obj->tlsinitsize);
     memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize);
 
     return p;
 }
 
 bool
 allocate_tls_offset(Obj_Entry *obj)
 {
     size_t off;
 
     if (obj->tls_done)
 	return true;
 
     if (obj->tlssize == 0) {
 	obj->tls_done = true;
 	return true;
     }
 
     if (tls_last_offset == 0)
 	off = calculate_first_tls_offset(obj->tlssize, obj->tlsalign);
     else
 	off = calculate_tls_offset(tls_last_offset, tls_last_size,
 				   obj->tlssize, obj->tlsalign);
 
     /*
      * If we have already fixed the size of the static TLS block, we
      * must stay within that size. When allocating the static TLS, we
      * leave a small amount of space spare to be used for dynamically
      * loading modules which use static TLS.
      */
     if (tls_static_space != 0) {
 	if (calculate_tls_end(off, obj->tlssize) > tls_static_space)
 	    return false;
     } else if (obj->tlsalign > tls_static_max_align) {
 	    tls_static_max_align = obj->tlsalign;
     }
 
     tls_last_offset = obj->tlsoffset = off;
     tls_last_size = obj->tlssize;
     obj->tls_done = true;
 
     return true;
 }
 
 void
 free_tls_offset(Obj_Entry *obj)
 {
 
     /*
      * If we were the last thing to allocate out of the static TLS
      * block, we give our space back to the 'allocator'. This is a
      * simplistic workaround to allow libGL.so.1 to be loaded and
      * unloaded multiple times.
      */
     if (calculate_tls_end(obj->tlsoffset, obj->tlssize)
 	== calculate_tls_end(tls_last_offset, tls_last_size)) {
 	tls_last_offset -= obj->tlssize;
 	tls_last_size = 0;
     }
 }
 
 void *
 _rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign)
 {
     void *ret;
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     ret = allocate_tls(globallist_curr(TAILQ_FIRST(&obj_list)), oldtls,
       tcbsize, tcbalign);
     lock_release(rtld_bind_lock, &lockstate);
     return (ret);
 }
 
 void
 _rtld_free_tls(void *tcb, size_t tcbsize, size_t tcbalign)
 {
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     free_tls(tcb, tcbsize, tcbalign);
     lock_release(rtld_bind_lock, &lockstate);
 }
 
 static void
 object_add_name(Obj_Entry *obj, const char *name)
 {
     Name_Entry *entry;
     size_t len;
 
     len = strlen(name);
     entry = malloc(sizeof(Name_Entry) + len);
 
     if (entry != NULL) {
 	strcpy(entry->name, name);
 	STAILQ_INSERT_TAIL(&obj->names, entry, link);
     }
 }
 
 static int
 object_match_name(const Obj_Entry *obj, const char *name)
 {
     Name_Entry *entry;
 
     STAILQ_FOREACH(entry, &obj->names, link) {
 	if (strcmp(name, entry->name) == 0)
 	    return (1);
     }
     return (0);
 }
 
 static Obj_Entry *
 locate_dependency(const Obj_Entry *obj, const char *name)
 {
     const Objlist_Entry *entry;
     const Needed_Entry *needed;
 
     STAILQ_FOREACH(entry, &list_main, link) {
 	if (object_match_name(entry->obj, name))
 	    return entry->obj;
     }
 
     for (needed = obj->needed;  needed != NULL;  needed = needed->next) {
 	if (strcmp(obj->strtab + needed->name, name) == 0 ||
 	  (needed->obj != NULL && object_match_name(needed->obj, name))) {
 	    /*
 	     * If there is DT_NEEDED for the name we are looking for,
 	     * we are all set.  Note that object might not be found if
 	     * dependency was not loaded yet, so the function can
 	     * return NULL here.  This is expected and handled
 	     * properly by the caller.
 	     */
 	    return (needed->obj);
 	}
     }
     _rtld_error("%s: Unexpected inconsistency: dependency %s not found",
 	obj->path, name);
     rtld_die();
 }
 
 static int
 check_object_provided_version(Obj_Entry *refobj, const Obj_Entry *depobj,
     const Elf_Vernaux *vna)
 {
     const Elf_Verdef *vd;
     const char *vername;
 
     vername = refobj->strtab + vna->vna_name;
     vd = depobj->verdef;
     if (vd == NULL) {
 	_rtld_error("%s: version %s required by %s not defined",
 	    depobj->path, vername, refobj->path);
 	return (-1);
     }
     for (;;) {
 	if (vd->vd_version != VER_DEF_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verdef entry",
 		depobj->path, vd->vd_version);
 	    return (-1);
 	}
 	if (vna->vna_hash == vd->vd_hash) {
 	    const Elf_Verdaux *aux = (const Elf_Verdaux *)
 		((char *)vd + vd->vd_aux);
 	    if (strcmp(vername, depobj->strtab + aux->vda_name) == 0)
 		return (0);
 	}
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
     if (vna->vna_flags & VER_FLG_WEAK)
 	return (0);
     _rtld_error("%s: version %s required by %s not found",
 	depobj->path, vername, refobj->path);
     return (-1);
 }
 
 static int
 rtld_verify_object_versions(Obj_Entry *obj)
 {
     const Elf_Verneed *vn;
     const Elf_Verdef  *vd;
     const Elf_Verdaux *vda;
     const Elf_Vernaux *vna;
     const Obj_Entry *depobj;
     int maxvernum, vernum;
 
     if (obj->ver_checked)
 	return (0);
     obj->ver_checked = true;
 
     maxvernum = 0;
     /*
      * Walk over defined and required version records and figure out
      * max index used by any of them. Do very basic sanity checking
      * while there.
      */
     vn = obj->verneed;
     while (vn != NULL) {
 	if (vn->vn_version != VER_NEED_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verneed entry",
 		obj->path, vn->vn_version);
 	    return (-1);
 	}
 	vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux);
 	for (;;) {
 	    vernum = VER_NEED_IDX(vna->vna_other);
 	    if (vernum > maxvernum)
 		maxvernum = vernum;
 	    if (vna->vna_next == 0)
 		 break;
 	    vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next);
 	}
 	if (vn->vn_next == 0)
 	    break;
 	vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next);
     }
 
     vd = obj->verdef;
     while (vd != NULL) {
 	if (vd->vd_version != VER_DEF_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verdef entry",
 		obj->path, vd->vd_version);
 	    return (-1);
 	}
 	vernum = VER_DEF_IDX(vd->vd_ndx);
 	if (vernum > maxvernum)
 		maxvernum = vernum;
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
 
     if (maxvernum == 0)
 	return (0);
 
     /*
      * Store version information in array indexable by version index.
      * Verify that object version requirements are satisfied along the
      * way.
      */
     obj->vernum = maxvernum + 1;
     obj->vertab = xcalloc(obj->vernum, sizeof(Ver_Entry));
 
     vd = obj->verdef;
     while (vd != NULL) {
 	if ((vd->vd_flags & VER_FLG_BASE) == 0) {
 	    vernum = VER_DEF_IDX(vd->vd_ndx);
 	    assert(vernum <= maxvernum);
 	    vda = (const Elf_Verdaux *)((char *)vd + vd->vd_aux);
 	    obj->vertab[vernum].hash = vd->vd_hash;
 	    obj->vertab[vernum].name = obj->strtab + vda->vda_name;
 	    obj->vertab[vernum].file = NULL;
 	    obj->vertab[vernum].flags = 0;
 	}
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
 
     vn = obj->verneed;
     while (vn != NULL) {
 	depobj = locate_dependency(obj, obj->strtab + vn->vn_file);
 	if (depobj == NULL)
 	    return (-1);
 	vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux);
 	for (;;) {
 	    if (check_object_provided_version(obj, depobj, vna))
 		return (-1);
 	    vernum = VER_NEED_IDX(vna->vna_other);
 	    assert(vernum <= maxvernum);
 	    obj->vertab[vernum].hash = vna->vna_hash;
 	    obj->vertab[vernum].name = obj->strtab + vna->vna_name;
 	    obj->vertab[vernum].file = obj->strtab + vn->vn_file;
 	    obj->vertab[vernum].flags = (vna->vna_other & VER_NEED_HIDDEN) ?
 		VER_INFO_HIDDEN : 0;
 	    if (vna->vna_next == 0)
 		 break;
 	    vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next);
 	}
 	if (vn->vn_next == 0)
 	    break;
 	vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next);
     }
     return 0;
 }
 
 static int
 rtld_verify_versions(const Objlist *objlist)
 {
     Objlist_Entry *entry;
     int rc;
 
     rc = 0;
     STAILQ_FOREACH(entry, objlist, link) {
 	/*
 	 * Skip dummy objects or objects that have their version requirements
 	 * already checked.
 	 */
 	if (entry->obj->strtab == NULL || entry->obj->vertab != NULL)
 	    continue;
 	if (rtld_verify_object_versions(entry->obj) == -1) {
 	    rc = -1;
 	    if (ld_tracing == NULL)
 		break;
 	}
     }
     if (rc == 0 || ld_tracing != NULL)
     	rc = rtld_verify_object_versions(&obj_rtld);
     return rc;
 }
 
 const Ver_Entry *
 fetch_ventry(const Obj_Entry *obj, unsigned long symnum)
 {
     Elf_Versym vernum;
 
     if (obj->vertab) {
 	vernum = VER_NDX(obj->versyms[symnum]);
 	if (vernum >= obj->vernum) {
 	    _rtld_error("%s: symbol %s has wrong verneed value %d",
 		obj->path, obj->strtab + symnum, vernum);
 	} else if (obj->vertab[vernum].hash != 0) {
 	    return &obj->vertab[vernum];
 	}
     }
     return NULL;
 }
 
 int
 _rtld_get_stack_prot(void)
 {
 
 	return (stack_prot);
 }
 
 int
 _rtld_is_dlopened(void *arg)
 {
 	Obj_Entry *obj;
 	RtldLockState lockstate;
 	int res;
 
 	rlock_acquire(rtld_bind_lock, &lockstate);
 	obj = dlcheck(arg);
 	if (obj == NULL)
 		obj = obj_from_addr(arg);
 	if (obj == NULL) {
 		_rtld_error("No shared object contains address");
 		lock_release(rtld_bind_lock, &lockstate);
 		return (-1);
 	}
 	res = obj->dlopened ? 1 : 0;
 	lock_release(rtld_bind_lock, &lockstate);
 	return (res);
 }
 
 int
 obj_enforce_relro(Obj_Entry *obj)
 {
 
 	if (obj->relro_size > 0 && mprotect(obj->relro_page, obj->relro_size,
 	    PROT_READ) == -1) {
 		_rtld_error("%s: Cannot enforce relro protection: %s",
 		    obj->path, rtld_strerror(errno));
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 map_stacks_exec(RtldLockState *lockstate)
 {
 	void (*thr_map_stacks_exec)(void);
 
 	if ((max_stack_flags & PF_X) == 0 || (stack_prot & PROT_EXEC) != 0)
 		return;
 	thr_map_stacks_exec = (void (*)(void))(uintptr_t)
 	    get_program_var_addr("__pthread_map_stacks_exec", lockstate);
 	if (thr_map_stacks_exec != NULL) {
 		stack_prot |= PROT_EXEC;
 		thr_map_stacks_exec();
 	}
 }
 
 void
 symlook_init(SymLook *dst, const char *name)
 {
 
 	bzero(dst, sizeof(*dst));
 	dst->name = name;
 	dst->hash = elf_hash(name);
 	dst->hash_gnu = gnu_hash(name);
 }
 
 static void
 symlook_init_from_req(SymLook *dst, const SymLook *src)
 {
 
 	dst->name = src->name;
 	dst->hash = src->hash;
 	dst->hash_gnu = src->hash_gnu;
 	dst->ventry = src->ventry;
 	dst->flags = src->flags;
 	dst->defobj_out = NULL;
 	dst->sym_out = NULL;
 	dst->lockstate = src->lockstate;
 }
 
 static int
 open_binary_fd(const char *argv0, bool search_in_path)
 {
 	char *pathenv, *pe, binpath[PATH_MAX];
 	int fd;
 
 	if (search_in_path && strchr(argv0, '/') == NULL) {
 		pathenv = getenv("PATH");
 		if (pathenv == NULL) {
 			rtld_printf("-p and no PATH environment variable\n");
 			rtld_die();
 		}
 		pathenv = strdup(pathenv);
 		if (pathenv == NULL) {
 			rtld_printf("Cannot allocate memory\n");
 			rtld_die();
 		}
 		fd = -1;
 		errno = ENOENT;
 		while ((pe = strsep(&pathenv, ":")) != NULL) {
 			if (strlcpy(binpath, pe, sizeof(binpath)) >=
 			    sizeof(binpath))
 				continue;
 			if (binpath[0] != '\0' &&
 			    strlcat(binpath, "/", sizeof(binpath)) >=
 			    sizeof(binpath))
 				continue;
 			if (strlcat(binpath, argv0, sizeof(binpath)) >=
 			    sizeof(binpath))
 				continue;
 			fd = open(binpath, O_RDONLY | O_CLOEXEC | O_VERIFY);
 			if (fd != -1 || errno != ENOENT)
 				break;
 		}
 		free(pathenv);
 	} else {
 		fd = open(argv0, O_RDONLY | O_CLOEXEC | O_VERIFY);
 	}
 
 	if (fd == -1) {
 		rtld_printf("Opening %s: %s\n", argv0,
 		    rtld_strerror(errno));
 		rtld_die();
 	}
 	return (fd);
 }
 
 /*
  * Parse a set of command-line arguments.
  */
 static int
 parse_args(char* argv[], int argc, bool *use_pathp, int *fdp)
 {
 	const char *arg;
 	int fd, i, j, arglen;
 	char opt;
 
 	dbg("Parsing command-line arguments");
 	*use_pathp = false;
 	*fdp = -1;
 
 	for (i = 1; i < argc; i++ ) {
 		arg = argv[i];
 		dbg("argv[%d]: '%s'", i, arg);
 
 		/*
 		 * rtld arguments end with an explicit "--" or with the first
 		 * non-prefixed argument.
 		 */
 		if (strcmp(arg, "--") == 0) {
 			i++;
 			break;
 		}
 		if (arg[0] != '-')
 			break;
 
 		/*
 		 * All other arguments are single-character options that can
 		 * be combined, so we need to search through `arg` for them.
 		 */
 		arglen = strlen(arg);
 		for (j = 1; j < arglen; j++) {
 			opt = arg[j];
 			if (opt == 'h') {
 				print_usage(argv[0]);
 				rtld_die();
 			} else if (opt == 'f') {
 			/*
 			 * -f XX can be used to specify a descriptor for the
 			 * binary named at the command line (i.e., the later
 			 * argument will specify the process name but the
 			 * descriptor is what will actually be executed)
 			 */
 			if (j != arglen - 1) {
 				/* -f must be the last option in, e.g., -abcf */
 				_rtld_error("invalid options: %s", arg);
 				rtld_die();
 			}
 			i++;
 			fd = parse_integer(argv[i]);
 			if (fd == -1) {
 				_rtld_error("invalid file descriptor: '%s'",
 				    argv[i]);
 				rtld_die();
 			}
 			*fdp = fd;
 			break;
 			} else if (opt == 'p') {
 				*use_pathp = true;
 			} else {
 				rtld_printf("invalid argument: '%s'\n", arg);
 				print_usage(argv[0]);
 				rtld_die();
 			}
 		}
 	}
 
 	return (i);
 }
 
 /*
  * Parse a file descriptor number without pulling in more of libc (e.g. atoi).
  */
 static int
 parse_integer(const char *str)
 {
 	static const int RADIX = 10;  /* XXXJA: possibly support hex? */
 	const char *orig;
 	int n;
 	char c;
 
 	orig = str;
 	n = 0;
 	for (c = *str; c != '\0'; c = *++str) {
 		if (c < '0' || c > '9')
 			return (-1);
 
 		n *= RADIX;
 		n += c - '0';
 	}
 
 	/* Make sure we actually parsed something. */
 	if (str == orig)
 		return (-1);
 	return (n);
 }
 
 static void
 print_usage(const char *argv0)
 {
 
 	rtld_printf("Usage: %s [-h] [-f <FD>] [--] <binary> [<args>]\n"
 		"\n"
 		"Options:\n"
 		"  -h        Display this help message\n"
 		"  -p        Search in PATH for named binary\n"
 		"  -f <FD>   Execute <FD> instead of searching for <binary>\n"
 		"  --        End of RTLD options\n"
 		"  <binary>  Name of process to execute\n"
 		"  <args>    Arguments to the executed process\n", argv0);
 }
 
 /*
  * Overrides for libc_pic-provided functions.
  */
 
 int
 __getosreldate(void)
 {
 	size_t len;
 	int oid[2];
 	int error, osrel;
 
 	if (osreldate != 0)
 		return (osreldate);
 
 	oid[0] = CTL_KERN;
 	oid[1] = KERN_OSRELDATE;
 	osrel = 0;
 	len = sizeof(osrel);
 	error = sysctl(oid, 2, &osrel, &len, NULL, 0);
 	if (error == 0 && osrel > 0 && len == sizeof(osrel))
 		osreldate = osrel;
 	return (osreldate);
 }
 
 void
 exit(int status)
 {
 
 	_exit(status);
 }
 
 void (*__cleanup)(void);
 int __isthreaded = 0;
 int _thread_autoinit_dummy_decl = 1;
 
 /*
  * No unresolved symbols for rtld.
  */
 void
 __pthread_cxa_finalize(struct dl_phdr_info *a)
 {
 }
 
 void
 __stack_chk_fail(void)
 {
 
 	_rtld_error("stack overflow detected; terminated");
 	rtld_die();
 }
 __weak_reference(__stack_chk_fail, __stack_chk_fail_local);
 
 void
 __chk_fail(void)
 {
 
 	_rtld_error("buffer overflow detected; terminated");
 	rtld_die();
 }
 
 const char *
 rtld_strerror(int errnum)
 {
 
 	if (errnum < 0 || errnum >= sys_nerr)
 		return ("Unknown error");
 	return (sys_errlist[errnum]);
 }
Index: head/share/man/man7/arch.7
===================================================================
--- head/share/man/man7/arch.7	(revision 322167)
+++ head/share/man/man7/arch.7	(revision 322168)
@@ -1,351 +1,351 @@
 .\" Copyright (c) 2016-2017 The FreeBSD Foundation. All rights reserved.
 .\"
 .\" This documentation was created by Ed Maste under sponsorship of
 .\" The FreeBSD Foundation.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd May 16, 2017
 .Dt ARCH 7
 .Os
 .Sh NAME
 .Nm arch
 .Nd Architecture-specific details
 .Sh DESCRIPTION
 Differences between CPU architectures and platforms supported by
 .Fx .
 .Ss Introduction
 This document is a quick reference of key ABI details of
 .Fx
 architecture ports.
 For full details consult the processor-specific ABI supplement
 documentation.
 .Pp
 If not explicitly mentioned, sizes are in bytes.
 The architecture details in this document apply to
 .Fx 10.0
 and later, unless otherwise noted.
 .Pp
 .Fx
 uses a flat address space.
 Variables of types
 .Vt unsigned long ,
 .Vt uintptr_t ,
 and
 .Vt size_t
 and pointers all have the same representation.
 .Pp
 In order to maximize compatibility with future pointer integrity mechanisms,
 manipulations of pointers as integers should be performed via
 .Vt uintptr_t
 or
 .Vt intptr_t
 and no other types.
 In particular,
 .Vt long
 and
 .Vt ptrdiff_t
 should be avoided.
 .Pp
 On some architectures, e.g.
 .Dv sparc64 ,
 .Dv powerpc
 and AIM variants of
 .Dv powerpc64 ,
 the kernel uses a separate address space.
 On other architectures, kernel and a user mode process share a
 single address space.
 The kernel is located at the highest addresses.
 .Pp
 On each architecture, the main user mode thread's stack starts near
 the highest user address and grows down.
 .Pp
 .Fx
 architecture support varies by release.
 This table shows the first
 .Fx
 release to support each architecture, and, for discontinued
 architectures, the final release.
 .Pp
 .Bl -column -offset indent "Sy Architecture" "Sy Initial Release" "Sy Final Release"
 .It Sy Architecture Ta Sy Initial Release Ta Sy Final Release
 .It alpha       Ta 3.2   Ta 6.4
 .It amd64       Ta 5.1
 .It arm         Ta 6.0
 .It armeb       Ta 8.0
 .It armv6       Ta 10.0
 .It arm64       Ta 11.0
 .It ia64        Ta 5.0   Ta 10.x
 .It i386        Ta 1.0
 .It mips        Ta 8.0
 .It mipsel      Ta 9.0
 .It mipselhf    Ta 12.0
 .It mipshf      Ta 12.0
 .It mipsn32     Ta 9.0
 .It mips64      Ta 9.0
 .It mips64el    Ta 9.0
 .It mips64elhf  Ta 12.0
 .It mips64hf    Ta 12.0
 .It pc98        Ta 2.2   Ta 11.x
 .It powerpc     Ta 6.0
 .It powerpcspe  Ta 12.0
 .It powerpc64   Ta 6.0
 .It riscv64     Ta 12.0
 .It riscv64sf   Ta 12.0
 .It sparc64     Ta 5.0
 .El
 .Ss Type sizes
 All
 .Fx
 architectures use some variant of the ELF (see
 .Xr elf 5 )
 .Sy Application Binary Interface
 (ABI) for the machine processor.
 All supported ABIs can be divided into two groups:
 .Bl -tag -width "Dv ILP32"
 .It Dv ILP32
 .Vt int ,
 .Vt long ,
 .Vt void *
 types machine representations all have 4-byte size.
 .It Dv LP64
 .Vt int
 type machine representation uses 4 bytes,
 while
 .Vt long
 and
 .Vt void *
 are 8 bytes.
 .El
 Compilers define the
 .Dv _LP64
 symbol when compiling for an
 .Dv LP64
 ABI.
 .Pp
 Some machines support more that one
 .Fx
 ABI.
 Typically these are 64-bit machines, where the
 .Dq native
 .Dv LP64
 execution environment is accompanied by the
 .Dq legacy
 .Dv ILP32
 environment, which was historical 32-bit predecessor for 64-bit evolution.
 Examples are:
 .Bl -column -offset indent "Dv powerpc64" "Sy ILP32 counterpart"
 .It Sy LP64        Ta Sy ILP32 counterpart
 .It Dv amd64       Ta Dv i386
 .It Dv powerpc64   Ta Dv powerpc
 .It Dv mips64*     Ta Dv mips*
 .El
 .Dv arm64
 currently does not support execution of
 .Dv armv6
 binaries, even if the CPU implements
 .Dv AArch32
 execution state.
 .Pp
 On all supported architectures:
 .Bl -column -offset -indent "long long" "Size"
 .It Sy Type Ta Sy Size
 .It short Ta 2
 .It int Ta 4
 .It long Ta sizeof(void*)
 .It long long Ta 8
 .It float Ta 4
 .It double Ta 8
 .El
 Integers are represented in two's complement.
 Alignment of integer and pointer types is natural, that is,
 the address of the variable must be congruent to zero modulo the type size.
 Most ILP32 ABIs, except
 .Dv arm ,
 require only 4-byte alignment for 64-bit integers.
 .Pp
 Machine-dependent type sizes:
 .Bl -column -offset indent "Sy Architecture" "Sy void *" "Sy long double" "Sy time_t"
 .It Sy Architecture Ta Sy void * Ta Sy long double Ta Sy time_t
 .It amd64       Ta 8 Ta 16 Ta 8
 .It arm         Ta 4 Ta  8 Ta 8
 .It armeb       Ta 4 Ta  8 Ta 8
 .It armv6       Ta 4 Ta  8 Ta 8
 .It arm64       Ta 8 Ta 16 Ta 8
 .It i386        Ta 4 Ta 12 Ta 4
 .It mips        Ta 4 Ta  8 Ta 8
 .It mipsel      Ta 4 Ta  8 Ta 8
 .It mipselhf    Ta 4 Ta  8 Ta 8
 .It mipshf      Ta 4 Ta  8 Ta 8
 .It mipsn32     Ta 4 Ta  8 Ta 8
 .It mips64      Ta 8 Ta  8 Ta 8
 .It mips64el    Ta 8 Ta  8 Ta 8
 .It mips64elhf  Ta 8 Ta  8 Ta 8
 .It mips64hf    Ta 8 Ta  8 Ta 8
 .It powerpc     Ta 4 Ta  8 Ta 8
 .It powerpcspe  Ta 4 Ta  8 Ta 8
 .It powerpc64   Ta 8 Ta  8 Ta 8
 .It riscv64     Ta 8 Ta 16 Ta 8
 .It riscv64sf   Ta 8 Ta 16 Ta 8
 .It sparc64     Ta 8 Ta 16 Ta 8
 .El
 .Pp
 .Sy time_t
 is 8 bytes on all supported architectures except i386 and 32-bit
 variants of powerpc.
 .Ss Endianness and Char Signedness
 .Bl -column -offset indent "Sy Architecture" "Sy Endianness" "Sy char Signedness"
 .It Sy Architecture Ta Sy Endianness Ta Sy char Signedness
 .It amd64       Ta little Ta   signed
 .It arm         Ta little Ta unsigned
 .It armeb       Ta big    Ta unsigned
 .It armv6       Ta little Ta unsigned
 .It arm64       Ta little Ta unsigned
 .It i386        Ta little Ta   signed
 .It mips        Ta big    Ta   signed
 .It mipsel      Ta little Ta   signed
 .It mipselhf    Ta little Ta   signed
 .It mipshf      Ta big    Ta   signed
 .It mipsn32     Ta big    Ta   signed
 .It mips64      Ta big    Ta   signed
 .It mips64el    Ta little Ta   signed
 .It mips64elhf  Ta little Ta   signed
 .It mips64hf    Ta big    Ta   signed
 .It powerpc     Ta big    Ta unsigned
 .It powerpcspe  Ta big    Ta unsigned
 .It powerpc64   Ta big    Ta unsigned
 .It riscv64     Ta little Ta   signed
 .It riscv64sf   Ta little Ta   signed
 .It sparc64     Ta big    Ta   signed
 .El
 .Ss Page Size
 .Bl -column -offset indent "Sy Architecture" "Sy Page Sizes"
 .It Sy Architecture Ta Sy Page Sizes
 .It amd64       Ta 4K, 2M, 1G
 .It arm         Ta 4K
 .It armeb       Ta 4K
 .It armv6       Ta 4K, 1M
 .It arm64       Ta 4K, 2M, 1G
 .It i386        Ta 4K, 2M (PAE), 4M
 .It mips        Ta 4K
 .It mipsel      Ta 4K
 .It mipselhf    Ta 4K
 .It mipshf      Ta 4K
 .It mipsn32     Ta 4K
 .It mips64      Ta 4K
 .It mips64el    Ta 4K
 .It mips64elhf  Ta 4K
 .It mips64hf    Ta 4K
 .It powerpc     Ta 4K
 .It powerpcspe  Ta 4K
 .It powerpc64   Ta 4K
 .It riscv64     Ta 4K
 .It riscv64sf   Ta 4K
 .It sparc64     Ta 8K
 .El
 .Ss Floating Point
 .Bl -column -offset indent "Sy Architecture" "Sy float, double" "Sy long double"
 .It Sy Architecture Ta Sy float, double Ta Sy long double
 .It amd64       Ta hard Ta hard, 80 bit
 .It arm         Ta soft Ta soft, double precision
 .It armeb       Ta soft Ta soft, double precision
 .It armv6       Ta hard(1) Ta hard, double precision
 .It arm64       Ta hard Ta soft, quad precision
 .It i386        Ta hard Ta hard, 80 bit
 .It mips        Ta soft Ta identical to double
 .It mipsel      Ta soft Ta identical to double
 .It mipselhf    Ta hard Ta identical to double
 .It mipshf      Ta hard Ta identical to double
 .It mipsn32     Ta soft Ta identical to double
 .It mips64      Ta soft Ta identical to double
 .It mips64el    Ta soft Ta identical to double
 .It mips64elhf  Ta hard Ta identical to double
 .It mips64hf    Ta hard Ta identical to double
 .It powerpc     Ta hard Ta hard, double precision
 .It powerpcspe  Ta hard Ta hard, double precision
 .It powerpc64   Ta hard Ta hard, double precision
 .It riscv64     Ta hard Ta hard, double precision
 .It riscv64sf   Ta soft Ta soft, double precision
 .It sparc64     Ta hard Ta hard, quad precision
 .El
 .Pp
 (1) Prior to
 .Fx 11.0 ,
 armv6 used the softfp ABI even though it supported only processors
 with a floating point unit.
 .Ss Predefined Macros
 The compiler provides a number of predefined macros.
 Some of these provide architecture-specific details and are explained below.
 Other macros, including those required by the language standard, are not
 included here.
 .Pp
 The full set of predefined macros can be obtained with this command:
 .Bd -literal -offset indent
 cc -x c -dM -E /dev/null
 .Ed
 .Pp
 Common type size and endianness macros:
 .Bl -column -offset indent "BYTE_ORDER" "Sy Meaning"
 .It Sy Macro Ta Sy Meaning
 .It Dv __LP64__ Ta 64-bit (8-byte) long and pointer, 32-bit (4-byte) int
 .It Dv __ILP32__ Ta 32-bit (4-byte) int, long and pointer
 .It Dv BYTE_ORDER Ta Either Dv BIG_ENDIAN or Dv LITTLE_ENDIAN .
 .Dv PDP11_ENDIAN
 is not used on
 .Fx .
 .El
 .Pp
 Architecture-specific macros:
 .Bl -column -offset indent "Sy Architecture" "Sy Predefined macros"
 .It Sy Architecture Ta Sy Predefined macros
 .It amd64       Ta Dv __amd64__, Dv __x86_64__
 .It arm         Ta Dv __arm__
 .It armeb       Ta Dv __arm__
 .It armv6       Ta Dv __arm__, Dv __ARM_ARCH >= 6
 .It arm64       Ta Dv __aarch64__
 .It i386        Ta Dv __i386__
 .It mips        Ta Dv __mips__, Dv __MIPSEB__, Dv __mips_o32
 .It mipsel      Ta Dv __mips__, Dv __mips_o32
 .It mipselhf    Ta Dv __mips__, Dv __mips_o32
 .It mipshf      Ta Dv __mips__, Dv __MIPSEB__, Dv __mips_o32
 .It mipsn32     Ta Dv __mips__, Dv __MIPSEB__, Dv __mips_n32
 .It mips64      Ta Dv __mips__, Dv __MIPSEB__, Dv __mips_n64
 .It mips64el    Ta Dv __mips__, Dv __mips_n64
 .It mips64elhf  Ta Dv __mips__, Dv __mips_n64
 .It mips64hf    Ta Dv __mips__, Dv __MIPSEB__, Dv __mips_n64
 .It powerpc     Ta Dv __powerpc__
 .It powerpcspe  Ta Dv __powerpc__, Dv __SPE__
 .It powerpc64   Ta Dv __powerpc__, Dv __powerpc64__
-.It riscv64     Ta Dv __riscv__, Dv __riscv64
-.It riscv64sf   Ta Dv __riscv__, Dv __riscv64
+.It riscv64     Ta Dv __riscv, Dv __riscv_xlen == 64
+.It riscv64sf   Ta Dv __riscv, Dv __riscv_xlen == 64
 .It sparc64     Ta Dv __sparc64__
 .El
 .Sh SEE ALSO
 .Xr src.conf 5 ,
 .Xr build 7
 .Sh HISTORY
 An
 .Nm
 manual page appeared in
 .Fx 12 .
Index: head/sys/cddl/compat/opensolaris/sys/atomic.h
===================================================================
--- head/sys/cddl/compat/opensolaris/sys/atomic.h	(revision 322167)
+++ head/sys/cddl/compat/opensolaris/sys/atomic.h	(revision 322168)
@@ -1,142 +1,142 @@
 /*-
  * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _OPENSOLARIS_SYS_ATOMIC_H_
 #define	_OPENSOLARIS_SYS_ATOMIC_H_
 
 #include <sys/types.h>
 #include <machine/atomic.h>
 
 #define	casptr(_a, _b, _c)	\
 	atomic_cmpset_ptr((volatile uintptr_t *)(_a), (uintptr_t)(_b), (uintptr_t) (_c))
 #define cas32	atomic_cmpset_32
 
 #if !defined(__LP64__) && !defined(__mips_n32) && !defined(ARM_HAVE_ATOMIC64)
 extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
 extern void atomic_dec_64(volatile uint64_t *target);
 #endif
 #ifndef __sparc64__
 extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp,
     uint32_t newval);
 extern uint64_t atomic_cas_64(volatile uint64_t *target, uint64_t cmp,
     uint64_t newval);
 #endif
 extern uint64_t atomic_add_64_nv(volatile uint64_t *target, int64_t delta);
 extern uint8_t atomic_or_8_nv(volatile uint8_t *target, uint8_t value);
 extern void membar_producer(void);
 
 #if defined(__sparc64__) || defined(__powerpc__) || defined(__arm__) || \
-    defined(__mips__) || defined(__aarch64__) || defined(__riscv__)
+    defined(__mips__) || defined(__aarch64__) || defined(__riscv)
 extern void atomic_or_8(volatile uint8_t *target, uint8_t value);
 #else
 static __inline void
 atomic_or_8(volatile uint8_t *target, uint8_t value)
 {
 	atomic_set_8(target, value);
 }
 #endif
 
 static __inline uint32_t
 atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
 {
 	return (atomic_fetchadd_32(target, delta) + delta);
 }
 
 static __inline u_int
 atomic_add_int_nv(volatile u_int *target, int delta)
 {
 	return (atomic_add_32_nv(target, delta));
 }
 
 static __inline void
 atomic_dec_32(volatile uint32_t *target)
 {
 	atomic_subtract_32(target, 1);
 }
 
 static __inline uint32_t
 atomic_dec_32_nv(volatile uint32_t *target)
 {
 	return (atomic_fetchadd_32(target, -1) - 1);
 }
 
 #if defined(__LP64__) || defined(__mips_n32) || defined(ARM_HAVE_ATOMIC64)
 static __inline void
 atomic_dec_64(volatile uint64_t *target)
 {
 	atomic_subtract_64(target, 1);
 }
 #endif
 
 static __inline void
 atomic_inc_32(volatile uint32_t *target)
 {
 	atomic_add_32(target, 1);
 }
 
 static __inline uint32_t
 atomic_inc_32_nv(volatile uint32_t *target)
 {
 	return (atomic_add_32_nv(target, 1));
 }
 
 static __inline void
 atomic_inc_64(volatile uint64_t *target)
 {
 	atomic_add_64(target, 1);
 }
 
 static __inline uint64_t
 atomic_inc_64_nv(volatile uint64_t *target)
 {
 	return (atomic_add_64_nv(target, 1));
 }
 
 static __inline uint64_t
 atomic_dec_64_nv(volatile uint64_t *target)
 {
 	return (atomic_add_64_nv(target, -1));
 }
 
 #if !defined(COMPAT_32BIT) && defined(__LP64__)
 static __inline void *
 atomic_cas_ptr(volatile void *target, void *cmp,  void *newval)
 {
 	return ((void *)atomic_cas_64((volatile uint64_t *)target,
 	    (uint64_t)cmp, (uint64_t)newval));
 }
 #else
 static __inline void *
 atomic_cas_ptr(volatile void *target, void *cmp,  void *newval)
 {
 	return ((void *)atomic_cas_32((volatile uint32_t *)target,
 	    (uint32_t)cmp, (uint32_t)newval));
 }
 #endif	/* !defined(COMPAT_32BIT) && defined(__LP64__) */
 
 #endif	/* !_OPENSOLARIS_SYS_ATOMIC_H_ */
Index: head/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c	(revision 322167)
+++ head/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c	(revision 322168)
@@ -1,18361 +1,18361 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 /*
  * DTrace - Dynamic Tracing for Solaris
  *
  * This is the implementation of the Solaris Dynamic Tracing framework
  * (DTrace).  The user-visible interface to DTrace is described at length in
  * the "Solaris Dynamic Tracing Guide".  The interfaces between the libdtrace
  * library, the in-kernel DTrace framework, and the DTrace providers are
  * described in the block comments in the <sys/dtrace.h> header file.  The
  * internal architecture of DTrace is described in the block comments in the
  * <sys/dtrace_impl.h> header file.  The comments contained within the DTrace
  * implementation very much assume mastery of all of these sources; if one has
  * an unanswered question about the implementation, one should consult them
  * first.
  *
  * The functions here are ordered roughly as follows:
  *
  *   - Probe context functions
  *   - Probe hashing functions
  *   - Non-probe context utility functions
  *   - Matching functions
  *   - Provider-to-Framework API functions
  *   - Probe management functions
  *   - DIF object functions
  *   - Format functions
  *   - Predicate functions
  *   - ECB functions
  *   - Buffer functions
  *   - Enabling functions
  *   - DOF functions
  *   - Anonymous enabling functions
  *   - Consumer state functions
  *   - Helper functions
  *   - Hook functions
  *   - Driver cookbook functions
  *
  * Each group of functions begins with a block comment labelled the "DTrace
  * [Group] Functions", allowing one to find each block by searching forward
  * on capital-f functions.
  */
 #include <sys/errno.h>
 #ifndef illumos
 #include <sys/time.h>
 #endif
 #include <sys/stat.h>
 #include <sys/modctl.h>
 #include <sys/conf.h>
 #include <sys/systm.h>
 #ifdef illumos
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #endif
 #include <sys/cpuvar.h>
 #include <sys/kmem.h>
 #ifdef illumos
 #include <sys/strsubr.h>
 #endif
 #include <sys/sysmacros.h>
 #include <sys/dtrace_impl.h>
 #include <sys/atomic.h>
 #include <sys/cmn_err.h>
 #ifdef illumos
 #include <sys/mutex_impl.h>
 #include <sys/rwlock_impl.h>
 #endif
 #include <sys/ctf_api.h>
 #ifdef illumos
 #include <sys/panic.h>
 #include <sys/priv_impl.h>
 #endif
 #include <sys/policy.h>
 #ifdef illumos
 #include <sys/cred_impl.h>
 #include <sys/procfs_isa.h>
 #endif
 #include <sys/taskq.h>
 #ifdef illumos
 #include <sys/mkdev.h>
 #include <sys/kdi.h>
 #endif
 #include <sys/zone.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include "strtolctype.h"
 
 /* FreeBSD includes: */
 #ifndef illumos
 #include <sys/callout.h>
 #include <sys/ctype.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/ptrace.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <sys/dtrace_bsd.h>
 
 #include <netinet/in.h>
 
 #include "dtrace_cddl.h"
 #include "dtrace_debug.c"
 #endif
 
 #include "dtrace_xoroshiro128_plus.h"
 
 /*
  * DTrace Tunable Variables
  *
  * The following variables may be tuned by adding a line to /etc/system that
  * includes both the name of the DTrace module ("dtrace") and the name of the
  * variable.  For example:
  *
  *   set dtrace:dtrace_destructive_disallow = 1
  *
  * In general, the only variables that one should be tuning this way are those
  * that affect system-wide DTrace behavior, and for which the default behavior
  * is undesirable.  Most of these variables are tunable on a per-consumer
  * basis using DTrace options, and need not be tuned on a system-wide basis.
  * When tuning these variables, avoid pathological values; while some attempt
  * is made to verify the integrity of these variables, they are not considered
  * part of the supported interface to DTrace, and they are therefore not
  * checked comprehensively.  Further, these variables should not be tuned
  * dynamically via "mdb -kw" or other means; they should only be tuned via
  * /etc/system.
  */
 int		dtrace_destructive_disallow = 0;
 #ifndef illumos
 /* Positive logic version of dtrace_destructive_disallow for loader tunable */
 int		dtrace_allow_destructive = 1;
 #endif
 dtrace_optval_t	dtrace_nonroot_maxsize = (16 * 1024 * 1024);
 size_t		dtrace_difo_maxsize = (256 * 1024);
 dtrace_optval_t	dtrace_dof_maxsize = (8 * 1024 * 1024);
 size_t		dtrace_statvar_maxsize = (16 * 1024);
 size_t		dtrace_actions_max = (16 * 1024);
 size_t		dtrace_retain_max = 1024;
 dtrace_optval_t	dtrace_helper_actions_max = 128;
 dtrace_optval_t	dtrace_helper_providers_max = 32;
 dtrace_optval_t	dtrace_dstate_defsize = (1 * 1024 * 1024);
 size_t		dtrace_strsize_default = 256;
 dtrace_optval_t	dtrace_cleanrate_default = 9900990;		/* 101 hz */
 dtrace_optval_t	dtrace_cleanrate_min = 200000;			/* 5000 hz */
 dtrace_optval_t	dtrace_cleanrate_max = (uint64_t)60 * NANOSEC;	/* 1/minute */
 dtrace_optval_t	dtrace_aggrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t	dtrace_statusrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC;	 /* 6/minute */
 dtrace_optval_t	dtrace_switchrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t	dtrace_nspec_default = 1;
 dtrace_optval_t	dtrace_specsize_default = 32 * 1024;
 dtrace_optval_t dtrace_stackframes_default = 20;
 dtrace_optval_t dtrace_ustackframes_default = 20;
 dtrace_optval_t dtrace_jstackframes_default = 50;
 dtrace_optval_t dtrace_jstackstrsize_default = 512;
 int		dtrace_msgdsize_max = 128;
 hrtime_t	dtrace_chill_max = MSEC2NSEC(500);		/* 500 ms */
 hrtime_t	dtrace_chill_interval = NANOSEC;		/* 1000 ms */
 int		dtrace_devdepth_max = 32;
 int		dtrace_err_verbose;
 hrtime_t	dtrace_deadman_interval = NANOSEC;
 hrtime_t	dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
 hrtime_t	dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
 hrtime_t	dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
 #ifndef illumos
 int		dtrace_memstr_max = 4096;
 #endif
 
 /*
  * DTrace External Variables
  *
  * As dtrace(7D) is a kernel module, any DTrace variables are obviously
  * available to DTrace consumers via the backtick (`) syntax.  One of these,
  * dtrace_zero, is made deliberately so:  it is provided as a source of
  * well-known, zero-filled memory.  While this variable is not documented,
  * it is used by some translators as an implementation detail.
  */
 const char	dtrace_zero[256] = { 0 };	/* zero-filled memory */
 
 /*
  * DTrace Internal Variables
  */
 #ifdef illumos
 static dev_info_t	*dtrace_devi;		/* device info */
 #endif
 #ifdef illumos
 static vmem_t		*dtrace_arena;		/* probe ID arena */
 static vmem_t		*dtrace_minor;		/* minor number arena */
 #else
 static taskq_t		*dtrace_taskq;		/* task queue */
 static struct unrhdr	*dtrace_arena;		/* Probe ID number.     */
 #endif
 static dtrace_probe_t	**dtrace_probes;	/* array of all probes */
 static int		dtrace_nprobes;		/* number of probes */
 static dtrace_provider_t *dtrace_provider;	/* provider list */
 static dtrace_meta_t	*dtrace_meta_pid;	/* user-land meta provider */
 static int		dtrace_opens;		/* number of opens */
 static int		dtrace_helpers;		/* number of helpers */
 static int		dtrace_getf;		/* number of unpriv getf()s */
 #ifdef illumos
 static void		*dtrace_softstate;	/* softstate pointer */
 #endif
 static dtrace_hash_t	*dtrace_bymod;		/* probes hashed by module */
 static dtrace_hash_t	*dtrace_byfunc;		/* probes hashed by function */
 static dtrace_hash_t	*dtrace_byname;		/* probes hashed by name */
 static dtrace_toxrange_t *dtrace_toxrange;	/* toxic range array */
 static int		dtrace_toxranges;	/* number of toxic ranges */
 static int		dtrace_toxranges_max;	/* size of toxic range array */
 static dtrace_anon_t	dtrace_anon;		/* anonymous enabling */
 static kmem_cache_t	*dtrace_state_cache;	/* cache for dynamic state */
 static uint64_t		dtrace_vtime_references; /* number of vtimestamp refs */
 static kthread_t	*dtrace_panicked;	/* panicking thread */
 static dtrace_ecb_t	*dtrace_ecb_create_cache; /* cached created ECB */
 static dtrace_genid_t	dtrace_probegen;	/* current probe generation */
 static dtrace_helpers_t *dtrace_deferred_pid;	/* deferred helper list */
 static dtrace_enabling_t *dtrace_retained;	/* list of retained enablings */
 static dtrace_genid_t	dtrace_retained_gen;	/* current retained enab gen */
 static dtrace_dynvar_t	dtrace_dynhash_sink;	/* end of dynamic hash chains */
 static int		dtrace_dynvar_failclean; /* dynvars failed to clean */
 #ifndef illumos
 static struct mtx	dtrace_unr_mtx;
 MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF);
 static eventhandler_tag	dtrace_kld_load_tag;
 static eventhandler_tag	dtrace_kld_unload_try_tag;
 #endif
 
 /*
  * DTrace Locking
  * DTrace is protected by three (relatively coarse-grained) locks:
  *
  * (1) dtrace_lock is required to manipulate essentially any DTrace state,
  *     including enabling state, probes, ECBs, consumer state, helper state,
  *     etc.  Importantly, dtrace_lock is _not_ required when in probe context;
  *     probe context is lock-free -- synchronization is handled via the
  *     dtrace_sync() cross call mechanism.
  *
  * (2) dtrace_provider_lock is required when manipulating provider state, or
  *     when provider state must be held constant.
  *
  * (3) dtrace_meta_lock is required when manipulating meta provider state, or
  *     when meta provider state must be held constant.
  *
  * The lock ordering between these three locks is dtrace_meta_lock before
  * dtrace_provider_lock before dtrace_lock.  (In particular, there are
  * several places where dtrace_provider_lock is held by the framework as it
  * calls into the providers -- which then call back into the framework,
  * grabbing dtrace_lock.)
  *
  * There are two other locks in the mix:  mod_lock and cpu_lock.  With respect
  * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
  * role as a coarse-grained lock; it is acquired before both of these locks.
  * With respect to dtrace_meta_lock, its behavior is stranger:  cpu_lock must
  * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
  * mod_lock is similar with respect to dtrace_provider_lock in that it must be
  * acquired _between_ dtrace_provider_lock and dtrace_lock.
  */
 static kmutex_t		dtrace_lock;		/* probe state lock */
 static kmutex_t		dtrace_provider_lock;	/* provider state lock */
 static kmutex_t		dtrace_meta_lock;	/* meta-provider state lock */
 
 #ifndef illumos
 /* XXX FreeBSD hacks. */
 #define cr_suid		cr_svuid
 #define cr_sgid		cr_svgid
 #define	ipaddr_t	in_addr_t
 #define mod_modname	pathname
 #define vuprintf	vprintf
 #define ttoproc(_a)	((_a)->td_proc)
 #define crgetzoneid(_a)	0
 #define SNOCD		0
 #define CPU_ON_INTR(_a)	0
 
 #define PRIV_EFFECTIVE		(1 << 0)
 #define PRIV_DTRACE_KERNEL	(1 << 1)
 #define PRIV_DTRACE_PROC	(1 << 2)
 #define PRIV_DTRACE_USER	(1 << 3)
 #define PRIV_PROC_OWNER		(1 << 4)
 #define PRIV_PROC_ZONE		(1 << 5)
 #define PRIV_ALL		~0
 
 SYSCTL_DECL(_debug_dtrace);
 SYSCTL_DECL(_kern_dtrace);
 #endif
 
 #ifdef illumos
 #define curcpu	CPU->cpu_id
 #endif
 
 
 /*
  * DTrace Provider Variables
  *
  * These are the variables relating to DTrace as a provider (that is, the
  * provider of the BEGIN, END, and ERROR probes).
  */
 static dtrace_pattr_t	dtrace_provider_attr = {
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 };
 
 static void
 dtrace_nullop(void)
 {}
 
 static dtrace_pops_t	dtrace_provider_ops = {
 	(void (*)(void *, dtrace_probedesc_t *))dtrace_nullop,
 	(void (*)(void *, modctl_t *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	NULL,
 	NULL,
 	NULL,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop
 };
 
 static dtrace_id_t	dtrace_probeid_begin;	/* special BEGIN probe */
 static dtrace_id_t	dtrace_probeid_end;	/* special END probe */
 dtrace_id_t		dtrace_probeid_error;	/* special ERROR probe */
 
 /*
  * DTrace Helper Tracing Variables
  *
  * These variables should be set dynamically to enable helper tracing.  The
  * only variables that should be set are dtrace_helptrace_enable (which should
  * be set to a non-zero value to allocate helper tracing buffers on the next
  * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
  * non-zero value to deallocate helper tracing buffers on the next close of
  * /dev/dtrace).  When (and only when) helper tracing is disabled, the
  * buffer size may also be set via dtrace_helptrace_bufsize.
  */
 int			dtrace_helptrace_enable = 0;
 int			dtrace_helptrace_disable = 0;
 int			dtrace_helptrace_bufsize = 16 * 1024 * 1024;
 uint32_t		dtrace_helptrace_nlocals;
 static dtrace_helptrace_t *dtrace_helptrace_buffer;
 static uint32_t		dtrace_helptrace_next = 0;
 static int		dtrace_helptrace_wrapped = 0;
 
 /*
  * DTrace Error Hashing
  *
  * On DEBUG kernels, DTrace will track the errors that has seen in a hash
  * table.  This is very useful for checking coverage of tests that are
  * expected to induce DIF or DOF processing errors, and may be useful for
  * debugging problems in the DIF code generator or in DOF generation .  The
  * error hash may be examined with the ::dtrace_errhash MDB dcmd.
  */
 #ifdef DEBUG
 static dtrace_errhash_t	dtrace_errhash[DTRACE_ERRHASHSZ];
 static const char *dtrace_errlast;
 static kthread_t *dtrace_errthread;
 static kmutex_t dtrace_errlock;
 #endif
 
 /*
  * DTrace Macros and Constants
  *
  * These are various macros that are useful in various spots in the
  * implementation, along with a few random constants that have no meaning
  * outside of the implementation.  There is no real structure to this cpp
  * mishmash -- but is there ever?
  */
 #define	DTRACE_HASHSTR(hash, probe)	\
 	dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
 
 #define	DTRACE_HASHNEXT(hash, probe)	\
 	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
 
 #define	DTRACE_HASHPREV(hash, probe)	\
 	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
 
 #define	DTRACE_HASHEQ(hash, lhs, rhs)	\
 	(strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
 	    *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
 
 #define	DTRACE_AGGHASHSIZE_SLEW		17
 
 #define	DTRACE_V4MAPPED_OFFSET		(sizeof (uint32_t) * 3)
 
 /*
  * The key for a thread-local variable consists of the lower 61 bits of the
  * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
  * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
  * equal to a variable identifier.  This is necessary (but not sufficient) to
  * assure that global associative arrays never collide with thread-local
  * variables.  To guarantee that they cannot collide, we must also define the
  * order for keying dynamic variables.  That order is:
  *
  *   [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
  *
  * Because the variable-key and the tls-key are in orthogonal spaces, there is
  * no way for a global variable key signature to match a thread-local key
  * signature.
  */
 #ifdef illumos
 #define	DTRACE_TLS_THRKEY(where) { \
 	uint_t intr = 0; \
 	uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
 	for (; actv; actv >>= 1) \
 		intr++; \
 	ASSERT(intr < (1 << 3)); \
 	(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
 	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
 }
 #else
 #define	DTRACE_TLS_THRKEY(where) { \
 	solaris_cpu_t *_c = &solaris_cpu[curcpu]; \
 	uint_t intr = 0; \
 	uint_t actv = _c->cpu_intr_actv; \
 	for (; actv; actv >>= 1) \
 		intr++; \
 	ASSERT(intr < (1 << 3)); \
 	(where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \
 	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
 }
 #endif
 
 #define	DT_BSWAP_8(x)	((x) & 0xff)
 #define	DT_BSWAP_16(x)	((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
 #define	DT_BSWAP_32(x)	((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
 #define	DT_BSWAP_64(x)	((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
 
 #define	DT_MASK_LO 0x00000000FFFFFFFFULL
 
 #define	DTRACE_STORE(type, tomax, offset, what) \
 	*((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
 
 #ifndef __x86
 #define	DTRACE_ALIGNCHECK(addr, size, flags)				\
 	if (addr & (size - 1)) {					\
 		*flags |= CPU_DTRACE_BADALIGN;				\
 		cpu_core[curcpu].cpuc_dtrace_illval = addr;	\
 		return (0);						\
 	}
 #else
 #define	DTRACE_ALIGNCHECK(addr, size, flags)
 #endif
 
 /*
  * Test whether a range of memory starting at testaddr of size testsz falls
  * within the range of memory described by addr, sz.  We take care to avoid
  * problems with overflow and underflow of the unsigned quantities, and
  * disallow all negative sizes.  Ranges of size 0 are allowed.
  */
 #define	DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
 	((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
 	(testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
 	(testaddr) + (testsz) >= (testaddr))
 
 #define	DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz)		\
 do {									\
 	if ((remp) != NULL) {						\
 		*(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr);	\
 	}								\
 _NOTE(CONSTCOND) } while (0)
 
 
 /*
  * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
  * alloc_sz on the righthand side of the comparison in order to avoid overflow
  * or underflow in the comparison with it.  This is simpler than the INRANGE
  * check above, because we know that the dtms_scratch_ptr is valid in the
  * range.  Allocations of size zero are allowed.
  */
 #define	DTRACE_INSCRATCH(mstate, alloc_sz) \
 	((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
 	(mstate)->dtms_scratch_ptr >= (alloc_sz))
 
 #define	DTRACE_LOADFUNC(bits)						\
 /*CSTYLED*/								\
 uint##bits##_t								\
 dtrace_load##bits(uintptr_t addr)					\
 {									\
 	size_t size = bits / NBBY;					\
 	/*CSTYLED*/							\
 	uint##bits##_t rval;						\
 	int i;								\
 	volatile uint16_t *flags = (volatile uint16_t *)		\
 	    &cpu_core[curcpu].cpuc_dtrace_flags;			\
 									\
 	DTRACE_ALIGNCHECK(addr, size, flags);				\
 									\
 	for (i = 0; i < dtrace_toxranges; i++) {			\
 		if (addr >= dtrace_toxrange[i].dtt_limit)		\
 			continue;					\
 									\
 		if (addr + size <= dtrace_toxrange[i].dtt_base)		\
 			continue;					\
 									\
 		/*							\
 		 * This address falls within a toxic region; return 0.	\
 		 */							\
 		*flags |= CPU_DTRACE_BADADDR;				\
 		cpu_core[curcpu].cpuc_dtrace_illval = addr;		\
 		return (0);						\
 	}								\
 									\
 	*flags |= CPU_DTRACE_NOFAULT;					\
 	/*CSTYLED*/							\
 	rval = *((volatile uint##bits##_t *)addr);			\
 	*flags &= ~CPU_DTRACE_NOFAULT;					\
 									\
 	return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0);		\
 }
 
 #ifdef _LP64
 #define	dtrace_loadptr	dtrace_load64
 #else
 #define	dtrace_loadptr	dtrace_load32
 #endif
 
 #define	DTRACE_DYNHASH_FREE	0
 #define	DTRACE_DYNHASH_SINK	1
 #define	DTRACE_DYNHASH_VALID	2
 
 #define	DTRACE_MATCH_NEXT	0
 #define	DTRACE_MATCH_DONE	1
 #define	DTRACE_ANCHORED(probe)	((probe)->dtpr_func[0] != '\0')
 #define	DTRACE_STATE_ALIGN	64
 
 #define	DTRACE_FLAGS2FLT(flags)						\
 	(((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR :		\
 	((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP :		\
 	((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO :		\
 	((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV :		\
 	((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV :		\
 	((flags) & CPU_DTRACE_TUPOFLOW) ?  DTRACEFLT_TUPOFLOW :		\
 	((flags) & CPU_DTRACE_BADALIGN) ?  DTRACEFLT_BADALIGN :		\
 	((flags) & CPU_DTRACE_NOSCRATCH) ?  DTRACEFLT_NOSCRATCH :	\
 	((flags) & CPU_DTRACE_BADSTACK) ?  DTRACEFLT_BADSTACK :		\
 	DTRACEFLT_UNKNOWN)
 
 #define	DTRACEACT_ISSTRING(act)						\
 	((act)->dta_kind == DTRACEACT_DIFEXPR &&			\
 	(act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
 
 /* Function prototype definitions: */
 static size_t dtrace_strlen(const char *, size_t);
 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
 static void dtrace_enabling_provide(dtrace_provider_t *);
 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
 static void dtrace_enabling_matchall(void);
 static void dtrace_enabling_reap(void);
 static dtrace_state_t *dtrace_anon_grab(void);
 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
     dtrace_state_t *, uint64_t, uint64_t);
 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
 static void dtrace_buffer_drop(dtrace_buffer_t *);
 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
     dtrace_state_t *, dtrace_mstate_t *);
 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
     dtrace_optval_t);
 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
 uint16_t dtrace_load16(uintptr_t);
 uint32_t dtrace_load32(uintptr_t);
 uint64_t dtrace_load64(uintptr_t);
 uint8_t dtrace_load8(uintptr_t);
 void dtrace_dynvar_clean(dtrace_dstate_t *);
 dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *,
     size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *);
 uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *);
 static int dtrace_priv_proc(dtrace_state_t *);
 static void dtrace_getf_barrier(void);
 static int dtrace_canload_remains(uint64_t, size_t, size_t *,
     dtrace_mstate_t *, dtrace_vstate_t *);
 static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
     dtrace_mstate_t *, dtrace_vstate_t *);
 
 /*
  * DTrace Probe Context Functions
  *
  * These functions are called from probe context.  Because probe context is
  * any context in which C may be called, arbitrarily locks may be held,
  * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
  * As a result, functions called from probe context may only call other DTrace
  * support functions -- they may not interact at all with the system at large.
  * (Note that the ASSERT macro is made probe-context safe by redefining it in
  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
  * loads are to be performed from probe context, they _must_ be in terms of
  * the safe dtrace_load*() variants.
  *
  * Some functions in this block are not actually called from probe context;
  * for these functions, there will be a comment above the function reading
  * "Note:  not called from probe context."
  */
 void
 dtrace_panic(const char *format, ...)
 {
 	va_list alist;
 
 	va_start(alist, format);
 #ifdef __FreeBSD__
 	vpanic(format, alist);
 #else
 	dtrace_vpanic(format, alist);
 #endif
 	va_end(alist);
 }
 
 int
 dtrace_assfail(const char *a, const char *f, int l)
 {
 	dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
 
 	/*
 	 * We just need something here that even the most clever compiler
 	 * cannot optimize away.
 	 */
 	return (a[(uintptr_t)f]);
 }
 
 /*
  * Atomically increment a specified error counter from probe context.
  */
 static void
 dtrace_error(uint32_t *counter)
 {
 	/*
 	 * Most counters stored to in probe context are per-CPU counters.
 	 * However, there are some error conditions that are sufficiently
 	 * arcane that they don't merit per-CPU storage.  If these counters
 	 * are incremented concurrently on different CPUs, scalability will be
 	 * adversely affected -- but we don't expect them to be white-hot in a
 	 * correctly constructed enabling...
 	 */
 	uint32_t oval, nval;
 
 	do {
 		oval = *counter;
 
 		if ((nval = oval + 1) == 0) {
 			/*
 			 * If the counter would wrap, set it to 1 -- assuring
 			 * that the counter is never zero when we have seen
 			 * errors.  (The counter must be 32-bits because we
 			 * aren't guaranteed a 64-bit compare&swap operation.)
 			 * To save this code both the infamy of being fingered
 			 * by a priggish news story and the indignity of being
 			 * the target of a neo-puritan witch trial, we're
 			 * carefully avoiding any colorful description of the
 			 * likelihood of this condition -- but suffice it to
 			 * say that it is only slightly more likely than the
 			 * overflow of predicate cache IDs, as discussed in
 			 * dtrace_predicate_create().
 			 */
 			nval = 1;
 		}
 	} while (dtrace_cas32(counter, oval, nval) != oval);
 }
 
 /*
  * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
  * uint8_t, a uint16_t, a uint32_t and a uint64_t.
  */
 /* BEGIN CSTYLED */
 DTRACE_LOADFUNC(8)
 DTRACE_LOADFUNC(16)
 DTRACE_LOADFUNC(32)
 DTRACE_LOADFUNC(64)
 /* END CSTYLED */
 
 static int
 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
 {
 	if (dest < mstate->dtms_scratch_base)
 		return (0);
 
 	if (dest + size < dest)
 		return (0);
 
 	if (dest + size > mstate->dtms_scratch_ptr)
 		return (0);
 
 	return (1);
 }
 
 static int
 dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
     dtrace_statvar_t **svars, int nsvars)
 {
 	int i;
 	size_t maxglobalsize, maxlocalsize;
 
 	if (nsvars == 0)
 		return (0);
 
 	maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
 	maxlocalsize = maxglobalsize * NCPU;
 
 	for (i = 0; i < nsvars; i++) {
 		dtrace_statvar_t *svar = svars[i];
 		uint8_t scope;
 		size_t size;
 
 		if (svar == NULL || (size = svar->dtsv_size) == 0)
 			continue;
 
 		scope = svar->dtsv_var.dtdv_scope;
 
 		/*
 		 * We verify that our size is valid in the spirit of providing
 		 * defense in depth:  we want to prevent attackers from using
 		 * DTrace to escalate an orthogonal kernel heap corruption bug
 		 * into the ability to store to arbitrary locations in memory.
 		 */
 		VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
 		    (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
 
 		if (DTRACE_INRANGE(addr, sz, svar->dtsv_data,
 		    svar->dtsv_size)) {
 			DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
 			    svar->dtsv_size);
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Check to see if the address is within a memory region to which a store may
  * be issued.  This includes the DTrace scratch areas, and any DTrace variable
  * region.  The caller of dtrace_canstore() is responsible for performing any
  * alignment checks that are needed before stores are actually executed.
  */
 static int
 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate)
 {
 	return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
 }
 
 /*
  * Implementation of dtrace_canstore which communicates the upper bound of the
  * allowed memory region.
  */
 static int
 dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	/*
 	 * First, check to see if the address is in scratch space...
 	 */
 	if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
 	    mstate->dtms_scratch_size)) {
 		DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
 		    mstate->dtms_scratch_size);
 		return (1);
 	}
 
 	/*
 	 * Now check to see if it's a dynamic variable.  This check will pick
 	 * up both thread-local variables and any global dynamically-allocated
 	 * variables.
 	 */
 	if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
 	    vstate->dtvs_dynvars.dtds_size)) {
 		dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 		uintptr_t base = (uintptr_t)dstate->dtds_base +
 		    (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
 		uintptr_t chunkoffs;
 		dtrace_dynvar_t *dvar;
 
 		/*
 		 * Before we assume that we can store here, we need to make
 		 * sure that it isn't in our metadata -- storing to our
 		 * dynamic variable metadata would corrupt our state.  For
 		 * the range to not include any dynamic variable metadata,
 		 * it must:
 		 *
 		 *	(1) Start above the hash table that is at the base of
 		 *	the dynamic variable space
 		 *
 		 *	(2) Have a starting chunk offset that is beyond the
 		 *	dtrace_dynvar_t that is at the base of every chunk
 		 *
 		 *	(3) Not span a chunk boundary
 		 *
 		 *	(4) Not be in the tuple space of a dynamic variable
 		 *
 		 */
 		if (addr < base)
 			return (0);
 
 		chunkoffs = (addr - base) % dstate->dtds_chunksize;
 
 		if (chunkoffs < sizeof (dtrace_dynvar_t))
 			return (0);
 
 		if (chunkoffs + sz > dstate->dtds_chunksize)
 			return (0);
 
 		dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
 
 		if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
 			return (0);
 
 		if (chunkoffs < sizeof (dtrace_dynvar_t) +
 		    ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
 			return (0);
 
 		DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize);
 		return (1);
 	}
 
 	/*
 	 * Finally, check the static local and global variables.  These checks
 	 * take the longest, so we perform them last.
 	 */
 	if (dtrace_canstore_statvar(addr, sz, remain,
 	    vstate->dtvs_locals, vstate->dtvs_nlocals))
 		return (1);
 
 	if (dtrace_canstore_statvar(addr, sz, remain,
 	    vstate->dtvs_globals, vstate->dtvs_nglobals))
 		return (1);
 
 	return (0);
 }
 
 
 /*
  * Convenience routine to check to see if the address is within a memory
  * region in which a load may be issued given the user's privilege level;
  * if not, it sets the appropriate error flags and loads 'addr' into the
  * illegal value slot.
  *
  * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
  * appropriate memory access protection.
  */
 static int
 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate)
 {
 	return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
 }
 
 /*
  * Implementation of dtrace_canload which communicates the uppoer bound of the
  * allowed memory region.
  */
 static int
 dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 	file_t *fp;
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
 		return (1);
 	}
 
 	/*
 	 * You can obviously read that which you can store.
 	 */
 	if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
 		return (1);
 
 	/*
 	 * We're allowed to read from our own string table.
 	 */
 	if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
 	    mstate->dtms_difo->dtdo_strlen)) {
 		DTRACE_RANGE_REMAIN(remain, addr,
 		    mstate->dtms_difo->dtdo_strtab,
 		    mstate->dtms_difo->dtdo_strlen);
 		return (1);
 	}
 
 	if (vstate->dtvs_state != NULL &&
 	    dtrace_priv_proc(vstate->dtvs_state)) {
 		proc_t *p;
 
 		/*
 		 * When we have privileges to the current process, there are
 		 * several context-related kernel structures that are safe to
 		 * read, even absent the privilege to read from kernel memory.
 		 * These reads are safe because these structures contain only
 		 * state that (1) we're permitted to read, (2) is harmless or
 		 * (3) contains pointers to additional kernel state that we're
 		 * not permitted to read (and as such, do not present an
 		 * opportunity for privilege escalation).  Finally (and
 		 * critically), because of the nature of their relation with
 		 * the current thread context, the memory associated with these
 		 * structures cannot change over the duration of probe context,
 		 * and it is therefore impossible for this memory to be
 		 * deallocated and reallocated as something else while it's
 		 * being operated upon.
 		 */
 		if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread,
 			    sizeof (kthread_t));
 			return (1);
 		}
 
 		if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
 		    sz, curthread->t_procp, sizeof (proc_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp,
 			    sizeof (proc_t));
 			return (1);
 		}
 
 		if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
 		    curthread->t_cred, sizeof (cred_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred,
 			    sizeof (cred_t));
 			return (1);
 		}
 
 #ifdef illumos
 		if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
 		    &(p->p_pidp->pid_id), sizeof (pid_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id),
 			    sizeof (pid_t));
 			return (1);
 		}
 
 		if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
 		    curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu,
 			    offsetof(cpu_t, cpu_pause_thread));
 			return (1);
 		}
 #endif
 	}
 
 	if ((fp = mstate->dtms_getf) != NULL) {
 		uintptr_t psz = sizeof (void *);
 		vnode_t *vp;
 		vnodeops_t *op;
 
 		/*
 		 * When getf() returns a file_t, the enabling is implicitly
 		 * granted the (transient) right to read the returned file_t
 		 * as well as the v_path and v_op->vnop_name of the underlying
 		 * vnode.  These accesses are allowed after a successful
 		 * getf() because the members that they refer to cannot change
 		 * once set -- and the barrier logic in the kernel's closef()
 		 * path assures that the file_t and its referenced vode_t
 		 * cannot themselves be stale (that is, it impossible for
 		 * either dtms_getf itself or its f_vnode member to reference
 		 * freed memory).
 		 */
 		if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t));
 			return (1);
 		}
 
 		if ((vp = fp->f_vnode) != NULL) {
 			size_t slen;
 #ifdef illumos
 			if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path,
 				    psz);
 				return (1);
 			}
 			slen = strlen(vp->v_path) + 1;
 			if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) {
 				DTRACE_RANGE_REMAIN(remain, addr, vp->v_path,
 				    slen);
 				return (1);
 			}
 #endif
 
 			if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op,
 				    psz);
 				return (1);
 			}
 
 #ifdef illumos
 			if ((op = vp->v_op) != NULL &&
 			    DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr,
 				    &op->vnop_name, psz);
 				return (1);
 			}
 
 			if (op != NULL && op->vnop_name != NULL &&
 			    DTRACE_INRANGE(addr, sz, op->vnop_name,
 			    (slen = strlen(op->vnop_name) + 1))) {
 				DTRACE_RANGE_REMAIN(remain, addr,
 				    op->vnop_name, slen);
 				return (1);
 			}
 #endif
 		}
 	}
 
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
 	*illval = addr;
 	return (0);
 }
 
 /*
  * Convenience routine to check to see if a given string is within a memory
  * region in which a load may be issued given the user's privilege level;
  * this exists so that we don't need to issue unnecessary dtrace_strlen()
  * calls in the event that the user has all privileges.
  */
 static int
 dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	size_t rsize;
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
 		return (1);
 	}
 
 	/*
 	 * Even if the caller is uninterested in querying the remaining valid
 	 * range, it is required to ensure that the access is allowed.
 	 */
 	if (remain == NULL) {
 		remain = &rsize;
 	}
 	if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
 		size_t strsz;
 		/*
 		 * Perform the strlen after determining the length of the
 		 * memory region which is accessible.  This prevents timing
 		 * information from being used to find NULs in memory which is
 		 * not accessible to the caller.
 		 */
 		strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
 		    MIN(sz, *remain));
 		if (strsz <= *remain) {
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Convenience routine to check to see if a given variable is within a memory
  * region in which a load may be issued given the user's privilege level.
  */
 static int
 dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	size_t sz;
 	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
 
 	/*
 	 * Calculate the max size before performing any checks since even
 	 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
 	 * return the max length via 'remain'.
 	 */
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		dtrace_state_t *state = vstate->dtvs_state;
 
 		if (state != NULL) {
 			sz = state->dts_options[DTRACEOPT_STRSIZE];
 		} else {
 			/*
 			 * In helper context, we have a NULL state; fall back
 			 * to using the system-wide default for the string size
 			 * in this case.
 			 */
 			sz = dtrace_strsize_default;
 		}
 	} else {
 		sz = type->dtdt_size;
 	}
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
 		return (1);
 	}
 
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
 		    vstate));
 	}
 	return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
 	    vstate));
 }
 
 /*
  * Convert a string to a signed integer using safe loads.
  *
  * NOTE: This function uses various macros from strtolctype.h to manipulate
  * digit values, etc -- these have all been checked to ensure they make
  * no additional function calls.
  */
 static int64_t
 dtrace_strtoll(char *input, int base, size_t limit)
 {
 	uintptr_t pos = (uintptr_t)input;
 	int64_t val = 0;
 	int x;
 	boolean_t neg = B_FALSE;
 	char c, cc, ccc;
 	uintptr_t end = pos + limit;
 
 	/*
 	 * Consume any whitespace preceding digits.
 	 */
 	while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
 		pos++;
 
 	/*
 	 * Handle an explicit sign if one is present.
 	 */
 	if (c == '-' || c == '+') {
 		if (c == '-')
 			neg = B_TRUE;
 		c = dtrace_load8(++pos);
 	}
 
 	/*
 	 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
 	 * if present.
 	 */
 	if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
 	    cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
 		pos += 2;
 		c = ccc;
 	}
 
 	/*
 	 * Read in contiguous digits until the first non-digit character.
 	 */
 	for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
 	    c = dtrace_load8(++pos))
 		val = val * base + x;
 
 	return (neg ? -val : val);
 }
 
 /*
  * Compare two strings using safe loads.
  */
 static int
 dtrace_strncmp(char *s1, char *s2, size_t limit)
 {
 	uint8_t c1, c2;
 	volatile uint16_t *flags;
 
 	if (s1 == s2 || limit == 0)
 		return (0);
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	do {
 		if (s1 == NULL) {
 			c1 = '\0';
 		} else {
 			c1 = dtrace_load8((uintptr_t)s1++);
 		}
 
 		if (s2 == NULL) {
 			c2 = '\0';
 		} else {
 			c2 = dtrace_load8((uintptr_t)s2++);
 		}
 
 		if (c1 != c2)
 			return (c1 - c2);
 	} while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
 
 	return (0);
 }
 
 /*
  * Compute strlen(s) for a string using safe memory accesses.  The additional
  * len parameter is used to specify a maximum length to ensure completion.
  */
 static size_t
 dtrace_strlen(const char *s, size_t lim)
 {
 	uint_t len;
 
 	for (len = 0; len != lim; len++) {
 		if (dtrace_load8((uintptr_t)s++) == '\0')
 			break;
 	}
 
 	return (len);
 }
 
 /*
  * Check if an address falls within a toxic region.
  */
 static int
 dtrace_istoxic(uintptr_t kaddr, size_t size)
 {
 	uintptr_t taddr, tsize;
 	int i;
 
 	for (i = 0; i < dtrace_toxranges; i++) {
 		taddr = dtrace_toxrange[i].dtt_base;
 		tsize = dtrace_toxrange[i].dtt_limit - taddr;
 
 		if (kaddr - taddr < tsize) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = kaddr;
 			return (1);
 		}
 
 		if (taddr - kaddr < size) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = taddr;
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe
  * memory specified by the DIF program.  The dst is assumed to be safe memory
  * that we can store to directly because it is managed by DTrace.  As with
  * standard bcopy, overlapping copies are handled properly.
  */
 static void
 dtrace_bcopy(const void *src, void *dst, size_t len)
 {
 	if (len != 0) {
 		uint8_t *s1 = dst;
 		const uint8_t *s2 = src;
 
 		if (s1 <= s2) {
 			do {
 				*s1++ = dtrace_load8((uintptr_t)s2++);
 			} while (--len != 0);
 		} else {
 			s2 += len;
 			s1 += len;
 
 			do {
 				*--s1 = dtrace_load8((uintptr_t)--s2);
 			} while (--len != 0);
 		}
 	}
 }
 
 /*
  * Copy src to dst using safe memory accesses, up to either the specified
  * length, or the point that a nul byte is encountered.  The src is assumed to
  * be unsafe memory specified by the DIF program.  The dst is assumed to be
  * safe memory that we can store to directly because it is managed by DTrace.
  * Unlike dtrace_bcopy(), overlapping regions are not handled.
  */
 static void
 dtrace_strcpy(const void *src, void *dst, size_t len)
 {
 	if (len != 0) {
 		uint8_t *s1 = dst, c;
 		const uint8_t *s2 = src;
 
 		do {
 			*s1++ = c = dtrace_load8((uintptr_t)s2++);
 		} while (--len != 0 && c != '\0');
 	}
 }
 
 /*
  * Copy src to dst, deriving the size and type from the specified (BYREF)
  * variable type.  The src is assumed to be unsafe memory specified by the DIF
  * program.  The dst is assumed to be DTrace variable memory that is of the
  * specified type; we assume that we can store to directly.
  */
 static void
 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
 {
 	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
 
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
 	} else {
 		dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
 	}
 }
 
 /*
  * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be
  * unsafe memory specified by the DIF program.  The s2 data is assumed to be
  * safe memory that we can access directly because it is managed by DTrace.
  */
 static int
 dtrace_bcmp(const void *s1, const void *s2, size_t len)
 {
 	volatile uint16_t *flags;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if (s1 == s2)
 		return (0);
 
 	if (s1 == NULL || s2 == NULL)
 		return (1);
 
 	if (s1 != s2 && len != 0) {
 		const uint8_t *ps1 = s1;
 		const uint8_t *ps2 = s2;
 
 		do {
 			if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
 				return (1);
 		} while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
 	}
 	return (0);
 }
 
 /*
  * Zero the specified region using a simple byte-by-byte loop.  Note that this
  * is for safe DTrace-managed memory only.
  */
 static void
 dtrace_bzero(void *dst, size_t len)
 {
 	uchar_t *cp;
 
 	for (cp = dst; len != 0; len--)
 		*cp++ = 0;
 }
 
 static void
 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
 {
 	uint64_t result[2];
 
 	result[0] = addend1[0] + addend2[0];
 	result[1] = addend1[1] + addend2[1] +
 	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
 
 	sum[0] = result[0];
 	sum[1] = result[1];
 }
 
 /*
  * Shift the 128-bit value in a by b. If b is positive, shift left.
  * If b is negative, shift right.
  */
 static void
 dtrace_shift_128(uint64_t *a, int b)
 {
 	uint64_t mask;
 
 	if (b == 0)
 		return;
 
 	if (b < 0) {
 		b = -b;
 		if (b >= 64) {
 			a[0] = a[1] >> (b - 64);
 			a[1] = 0;
 		} else {
 			a[0] >>= b;
 			mask = 1LL << (64 - b);
 			mask -= 1;
 			a[0] |= ((a[1] & mask) << (64 - b));
 			a[1] >>= b;
 		}
 	} else {
 		if (b >= 64) {
 			a[1] = a[0] << (b - 64);
 			a[0] = 0;
 		} else {
 			a[1] <<= b;
 			mask = a[0] >> (64 - b);
 			a[1] |= mask;
 			a[0] <<= b;
 		}
 	}
 }
 
 /*
  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
  * use native multiplication on those, and then re-combine into the
  * resulting 128-bit value.
  *
  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
  *     hi1 * hi2 << 64 +
  *     hi1 * lo2 << 32 +
  *     hi2 * lo1 << 32 +
  *     lo1 * lo2
  */
 static void
 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
 {
 	uint64_t hi1, hi2, lo1, lo2;
 	uint64_t tmp[2];
 
 	hi1 = factor1 >> 32;
 	hi2 = factor2 >> 32;
 
 	lo1 = factor1 & DT_MASK_LO;
 	lo2 = factor2 & DT_MASK_LO;
 
 	product[0] = lo1 * lo2;
 	product[1] = hi1 * hi2;
 
 	tmp[0] = hi1 * lo2;
 	tmp[1] = 0;
 	dtrace_shift_128(tmp, 32);
 	dtrace_add_128(product, tmp, product);
 
 	tmp[0] = hi2 * lo1;
 	tmp[1] = 0;
 	dtrace_shift_128(tmp, 32);
 	dtrace_add_128(product, tmp, product);
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the user credentials of the process that enabled the
  * invoking ECB match the target credentials
  */
 static int
 dtrace_priv_proc_common_user(dtrace_state_t *state)
 {
 	cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
 
 	/*
 	 * We should always have a non-NULL state cred here, since if cred
 	 * is null (anonymous tracing), we fast-path bypass this routine.
 	 */
 	ASSERT(s_cr != NULL);
 
 	if ((cr = CRED()) != NULL &&
 	    s_cr->cr_uid == cr->cr_uid &&
 	    s_cr->cr_uid == cr->cr_ruid &&
 	    s_cr->cr_uid == cr->cr_suid &&
 	    s_cr->cr_gid == cr->cr_gid &&
 	    s_cr->cr_gid == cr->cr_rgid &&
 	    s_cr->cr_gid == cr->cr_sgid)
 		return (1);
 
 	return (0);
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the zone of the process that enabled the invoking ECB
  * matches the target credentials
  */
 static int
 dtrace_priv_proc_common_zone(dtrace_state_t *state)
 {
 #ifdef illumos
 	cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
 
 	/*
 	 * We should always have a non-NULL state cred here, since if cred
 	 * is null (anonymous tracing), we fast-path bypass this routine.
 	 */
 	ASSERT(s_cr != NULL);
 
 	if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
 		return (1);
 
 	return (0);
 #else
 	return (1);
 #endif
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the process has not setuid or changed credentials.
  */
 static int
 dtrace_priv_proc_common_nocd(void)
 {
 	proc_t *proc;
 
 	if ((proc = ttoproc(curthread)) != NULL &&
 	    !(proc->p_flag & SNOCD))
 		return (1);
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc_destructive(dtrace_state_t *state)
 {
 	int action = state->dts_cred.dcr_action;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
 	    dtrace_priv_proc_common_zone(state) == 0)
 		goto bad;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
 	    dtrace_priv_proc_common_user(state) == 0)
 		goto bad;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
 	    dtrace_priv_proc_common_nocd() == 0)
 		goto bad;
 
 	return (1);
 
 bad:
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc_control(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
 		return (1);
 
 	if (dtrace_priv_proc_common_zone(state) &&
 	    dtrace_priv_proc_common_user(state) &&
 	    dtrace_priv_proc_common_nocd())
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_kernel(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_kernel_destructive(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
 
 	return (0);
 }
 
 /*
  * Determine if the dte_cond of the specified ECB allows for processing of
  * the current probe to continue.  Note that this routine may allow continued
  * processing, but with access(es) stripped from the mstate's dtms_access
  * field.
  */
 static int
 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
     dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 	dtrace_provider_t *prov = probe->dtpr_provider;
 	dtrace_pops_t *pops = &prov->dtpv_pops;
 	int mode = DTRACE_MODE_NOPRIV_DROP;
 
 	ASSERT(ecb->dte_cond);
 
 #ifdef illumos
 	if (pops->dtps_mode != NULL) {
 		mode = pops->dtps_mode(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 
 		ASSERT((mode & DTRACE_MODE_USER) ||
 		    (mode & DTRACE_MODE_KERNEL));
 		ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
 		    (mode & DTRACE_MODE_NOPRIV_DROP));
 	}
 
 	/*
 	 * If the dte_cond bits indicate that this consumer is only allowed to
 	 * see user-mode firings of this probe, call the provider's dtps_mode()
 	 * entry point to check that the probe was fired while in a user
 	 * context.  If that's not the case, use the policy specified by the
 	 * provider to determine if we drop the probe or merely restrict
 	 * operation.
 	 */
 	if (ecb->dte_cond & DTRACE_COND_USERMODE) {
 		ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
 
 		if (!(mode & DTRACE_MODE_USER)) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 			mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
 		}
 	}
 #endif
 
 	/*
 	 * This is more subtle than it looks. We have to be absolutely certain
 	 * that CRED() isn't going to change out from under us so it's only
 	 * legit to examine that structure if we're in constrained situations.
 	 * Currently, the only times we'll this check is if a non-super-user
 	 * has enabled the profile or syscall providers -- providers that
 	 * allow visibility of all processes. For the profile case, the check
 	 * above will ensure that we're examining a user context.
 	 */
 	if (ecb->dte_cond & DTRACE_COND_OWNER) {
 		cred_t *cr;
 		cred_t *s_cr = state->dts_cred.dcr_cred;
 		proc_t *proc;
 
 		ASSERT(s_cr != NULL);
 
 		if ((cr = CRED()) == NULL ||
 		    s_cr->cr_uid != cr->cr_uid ||
 		    s_cr->cr_uid != cr->cr_ruid ||
 		    s_cr->cr_uid != cr->cr_suid ||
 		    s_cr->cr_gid != cr->cr_gid ||
 		    s_cr->cr_gid != cr->cr_rgid ||
 		    s_cr->cr_gid != cr->cr_sgid ||
 		    (proc = ttoproc(curthread)) == NULL ||
 		    (proc->p_flag & SNOCD)) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 #ifdef illumos
 			mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
 #endif
 		}
 	}
 
 #ifdef illumos
 	/*
 	 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
 	 * in our zone, check to see if our mode policy is to restrict rather
 	 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
 	 * and DTRACE_ACCESS_ARGS
 	 */
 	if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
 		cred_t *cr;
 		cred_t *s_cr = state->dts_cred.dcr_cred;
 
 		ASSERT(s_cr != NULL);
 
 		if ((cr = CRED()) == NULL ||
 		    s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 			mstate->dtms_access &=
 			    ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
 		}
 	}
 #endif
 
 	return (1);
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously (and at a regular interval) from outside of probe context to
  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable
  * cleaning is explained in detail in <sys/dtrace_impl.h>.
  */
 void
 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
 {
 	dtrace_dynvar_t *dirty;
 	dtrace_dstate_percpu_t *dcpu;
 	dtrace_dynvar_t **rinsep;
 	int i, j, work = 0;
 
 	for (i = 0; i < NCPU; i++) {
 		dcpu = &dstate->dtds_percpu[i];
 		rinsep = &dcpu->dtdsc_rinsing;
 
 		/*
 		 * If the dirty list is NULL, there is no dirty work to do.
 		 */
 		if (dcpu->dtdsc_dirty == NULL)
 			continue;
 
 		if (dcpu->dtdsc_rinsing != NULL) {
 			/*
 			 * If the rinsing list is non-NULL, then it is because
 			 * this CPU was selected to accept another CPU's
 			 * dirty list -- and since that time, dirty buffers
 			 * have accumulated.  This is a highly unlikely
 			 * condition, but we choose to ignore the dirty
 			 * buffers -- they'll be picked up a future cleanse.
 			 */
 			continue;
 		}
 
 		if (dcpu->dtdsc_clean != NULL) {
 			/*
 			 * If the clean list is non-NULL, then we're in a
 			 * situation where a CPU has done deallocations (we
 			 * have a non-NULL dirty list) but no allocations (we
 			 * also have a non-NULL clean list).  We can't simply
 			 * move the dirty list into the clean list on this
 			 * CPU, yet we also don't want to allow this condition
 			 * to persist, lest a short clean list prevent a
 			 * massive dirty list from being cleaned (which in
 			 * turn could lead to otherwise avoidable dynamic
 			 * drops).  To deal with this, we look for some CPU
 			 * with a NULL clean list, NULL dirty list, and NULL
 			 * rinsing list -- and then we borrow this CPU to
 			 * rinse our dirty list.
 			 */
 			for (j = 0; j < NCPU; j++) {
 				dtrace_dstate_percpu_t *rinser;
 
 				rinser = &dstate->dtds_percpu[j];
 
 				if (rinser->dtdsc_rinsing != NULL)
 					continue;
 
 				if (rinser->dtdsc_dirty != NULL)
 					continue;
 
 				if (rinser->dtdsc_clean != NULL)
 					continue;
 
 				rinsep = &rinser->dtdsc_rinsing;
 				break;
 			}
 
 			if (j == NCPU) {
 				/*
 				 * We were unable to find another CPU that
 				 * could accept this dirty list -- we are
 				 * therefore unable to clean it now.
 				 */
 				dtrace_dynvar_failclean++;
 				continue;
 			}
 		}
 
 		work = 1;
 
 		/*
 		 * Atomically move the dirty list aside.
 		 */
 		do {
 			dirty = dcpu->dtdsc_dirty;
 
 			/*
 			 * Before we zap the dirty list, set the rinsing list.
 			 * (This allows for a potential assertion in
 			 * dtrace_dynvar():  if a free dynamic variable appears
 			 * on a hash chain, either the dirty list or the
 			 * rinsing list for some CPU must be non-NULL.)
 			 */
 			*rinsep = dirty;
 			dtrace_membar_producer();
 		} while (dtrace_casptr(&dcpu->dtdsc_dirty,
 		    dirty, NULL) != dirty);
 	}
 
 	if (!work) {
 		/*
 		 * We have no work to do; we can simply return.
 		 */
 		return;
 	}
 
 	dtrace_sync();
 
 	for (i = 0; i < NCPU; i++) {
 		dcpu = &dstate->dtds_percpu[i];
 
 		if (dcpu->dtdsc_rinsing == NULL)
 			continue;
 
 		/*
 		 * We are now guaranteed that no hash chain contains a pointer
 		 * into this dirty list; we can make it clean.
 		 */
 		ASSERT(dcpu->dtdsc_clean == NULL);
 		dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
 		dcpu->dtdsc_rinsing = NULL;
 	}
 
 	/*
 	 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
 	 * sure that all CPUs have seen all of the dtdsc_clean pointers.
 	 * This prevents a race whereby a CPU incorrectly decides that
 	 * the state should be something other than DTRACE_DSTATE_CLEAN
 	 * after dtrace_dynvar_clean() has completed.
 	 */
 	dtrace_sync();
 
 	dstate->dtds_state = DTRACE_DSTATE_CLEAN;
 }
 
 /*
  * Depending on the value of the op parameter, this function looks-up,
  * allocates or deallocates an arbitrarily-keyed dynamic variable.  If an
  * allocation is requested, this function will return a pointer to a
  * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
  * variable can be allocated.  If NULL is returned, the appropriate counter
  * will be incremented.
  */
 dtrace_dynvar_t *
 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
     dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	uint64_t hashval = DTRACE_DYNHASH_VALID;
 	dtrace_dynhash_t *hash = dstate->dtds_hash;
 	dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
 	processorid_t me = curcpu, cpu = me;
 	dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
 	size_t bucket, ksize;
 	size_t chunksize = dstate->dtds_chunksize;
 	uintptr_t kdata, lock, nstate;
 	uint_t i;
 
 	ASSERT(nkeys != 0);
 
 	/*
 	 * Hash the key.  As with aggregations, we use Jenkins' "One-at-a-time"
 	 * algorithm.  For the by-value portions, we perform the algorithm in
 	 * 16-bit chunks (as opposed to 8-bit chunks).  This speeds things up a
 	 * bit, and seems to have only a minute effect on distribution.  For
 	 * the by-reference data, we perform "One-at-a-time" iterating (safely)
 	 * over each referenced byte.  It's painful to do this, but it's much
 	 * better than pathological hash distribution.  The efficacy of the
 	 * hashing algorithm (and a comparison with other algorithms) may be
 	 * found by running the ::dtrace_dynstat MDB dcmd.
 	 */
 	for (i = 0; i < nkeys; i++) {
 		if (key[i].dttk_size == 0) {
 			uint64_t val = key[i].dttk_value;
 
 			hashval += (val >> 48) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += (val >> 32) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += (val >> 16) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += val & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 		} else {
 			/*
 			 * This is incredibly painful, but it beats the hell
 			 * out of the alternative.
 			 */
 			uint64_t j, size = key[i].dttk_size;
 			uintptr_t base = (uintptr_t)key[i].dttk_value;
 
 			if (!dtrace_canload(base, size, mstate, vstate))
 				break;
 
 			for (j = 0; j < size; j++) {
 				hashval += dtrace_load8(base + j);
 				hashval += (hashval << 10);
 				hashval ^= (hashval >> 6);
 			}
 		}
 	}
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 		return (NULL);
 
 	hashval += (hashval << 3);
 	hashval ^= (hashval >> 11);
 	hashval += (hashval << 15);
 
 	/*
 	 * There is a remote chance (ideally, 1 in 2^31) that our hashval
 	 * comes out to be one of our two sentinel hash values.  If this
 	 * actually happens, we set the hashval to be a value known to be a
 	 * non-sentinel value.
 	 */
 	if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
 		hashval = DTRACE_DYNHASH_VALID;
 
 	/*
 	 * Yes, it's painful to do a divide here.  If the cycle count becomes
 	 * important here, tricks can be pulled to reduce it.  (However, it's
 	 * critical that hash collisions be kept to an absolute minimum;
 	 * they're much more painful than a divide.)  It's better to have a
 	 * solution that generates few collisions and still keeps things
 	 * relatively simple.
 	 */
 	bucket = hashval % dstate->dtds_hashsize;
 
 	if (op == DTRACE_DYNVAR_DEALLOC) {
 		volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
 
 		for (;;) {
 			while ((lock = *lockp) & 1)
 				continue;
 
 			if (dtrace_casptr((volatile void *)lockp,
 			    (volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock)
 				break;
 		}
 
 		dtrace_membar_producer();
 	}
 
 top:
 	prev = NULL;
 	lock = hash[bucket].dtdh_lock;
 
 	dtrace_membar_consumer();
 
 	start = hash[bucket].dtdh_chain;
 	ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
 	    start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
 	    op != DTRACE_DYNVAR_DEALLOC));
 
 	for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
 		dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
 		dtrace_key_t *dkey = &dtuple->dtt_key[0];
 
 		if (dvar->dtdv_hashval != hashval) {
 			if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
 				/*
 				 * We've reached the sink, and therefore the
 				 * end of the hash chain; we can kick out of
 				 * the loop knowing that we have seen a valid
 				 * snapshot of state.
 				 */
 				ASSERT(dvar->dtdv_next == NULL);
 				ASSERT(dvar == &dtrace_dynhash_sink);
 				break;
 			}
 
 			if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
 				/*
 				 * We've gone off the rails:  somewhere along
 				 * the line, one of the members of this hash
 				 * chain was deleted.  Note that we could also
 				 * detect this by simply letting this loop run
 				 * to completion, as we would eventually hit
 				 * the end of the dirty list.  However, we
 				 * want to avoid running the length of the
 				 * dirty list unnecessarily (it might be quite
 				 * long), so we catch this as early as
 				 * possible by detecting the hash marker.  In
 				 * this case, we simply set dvar to NULL and
 				 * break; the conditional after the loop will
 				 * send us back to top.
 				 */
 				dvar = NULL;
 				break;
 			}
 
 			goto next;
 		}
 
 		if (dtuple->dtt_nkeys != nkeys)
 			goto next;
 
 		for (i = 0; i < nkeys; i++, dkey++) {
 			if (dkey->dttk_size != key[i].dttk_size)
 				goto next; /* size or type mismatch */
 
 			if (dkey->dttk_size != 0) {
 				if (dtrace_bcmp(
 				    (void *)(uintptr_t)key[i].dttk_value,
 				    (void *)(uintptr_t)dkey->dttk_value,
 				    dkey->dttk_size))
 					goto next;
 			} else {
 				if (dkey->dttk_value != key[i].dttk_value)
 					goto next;
 			}
 		}
 
 		if (op != DTRACE_DYNVAR_DEALLOC)
 			return (dvar);
 
 		ASSERT(dvar->dtdv_next == NULL ||
 		    dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
 
 		if (prev != NULL) {
 			ASSERT(hash[bucket].dtdh_chain != dvar);
 			ASSERT(start != dvar);
 			ASSERT(prev->dtdv_next == dvar);
 			prev->dtdv_next = dvar->dtdv_next;
 		} else {
 			if (dtrace_casptr(&hash[bucket].dtdh_chain,
 			    start, dvar->dtdv_next) != start) {
 				/*
 				 * We have failed to atomically swing the
 				 * hash table head pointer, presumably because
 				 * of a conflicting allocation on another CPU.
 				 * We need to reread the hash chain and try
 				 * again.
 				 */
 				goto top;
 			}
 		}
 
 		dtrace_membar_producer();
 
 		/*
 		 * Now set the hash value to indicate that it's free.
 		 */
 		ASSERT(hash[bucket].dtdh_chain != dvar);
 		dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
 
 		dtrace_membar_producer();
 
 		/*
 		 * Set the next pointer to point at the dirty list, and
 		 * atomically swing the dirty pointer to the newly freed dvar.
 		 */
 		do {
 			next = dcpu->dtdsc_dirty;
 			dvar->dtdv_next = next;
 		} while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
 
 		/*
 		 * Finally, unlock this hash bucket.
 		 */
 		ASSERT(hash[bucket].dtdh_lock == lock);
 		ASSERT(lock & 1);
 		hash[bucket].dtdh_lock++;
 
 		return (NULL);
 next:
 		prev = dvar;
 		continue;
 	}
 
 	if (dvar == NULL) {
 		/*
 		 * If dvar is NULL, it is because we went off the rails:
 		 * one of the elements that we traversed in the hash chain
 		 * was deleted while we were traversing it.  In this case,
 		 * we assert that we aren't doing a dealloc (deallocs lock
 		 * the hash bucket to prevent themselves from racing with
 		 * one another), and retry the hash chain traversal.
 		 */
 		ASSERT(op != DTRACE_DYNVAR_DEALLOC);
 		goto top;
 	}
 
 	if (op != DTRACE_DYNVAR_ALLOC) {
 		/*
 		 * If we are not to allocate a new variable, we want to
 		 * return NULL now.  Before we return, check that the value
 		 * of the lock word hasn't changed.  If it has, we may have
 		 * seen an inconsistent snapshot.
 		 */
 		if (op == DTRACE_DYNVAR_NOALLOC) {
 			if (hash[bucket].dtdh_lock != lock)
 				goto top;
 		} else {
 			ASSERT(op == DTRACE_DYNVAR_DEALLOC);
 			ASSERT(hash[bucket].dtdh_lock == lock);
 			ASSERT(lock & 1);
 			hash[bucket].dtdh_lock++;
 		}
 
 		return (NULL);
 	}
 
 	/*
 	 * We need to allocate a new dynamic variable.  The size we need is the
 	 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
 	 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
 	 * the size of any referred-to data (dsize).  We then round the final
 	 * size up to the chunksize for allocation.
 	 */
 	for (ksize = 0, i = 0; i < nkeys; i++)
 		ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
 
 	/*
 	 * This should be pretty much impossible, but could happen if, say,
 	 * strange DIF specified the tuple.  Ideally, this should be an
 	 * assertion and not an error condition -- but that requires that the
 	 * chunksize calculation in dtrace_difo_chunksize() be absolutely
 	 * bullet-proof.  (That is, it must not be able to be fooled by
 	 * malicious DIF.)  Given the lack of backwards branches in DIF,
 	 * solving this would presumably not amount to solving the Halting
 	 * Problem -- but it still seems awfully hard.
 	 */
 	if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
 	    ksize + dsize > chunksize) {
 		dcpu->dtdsc_drops++;
 		return (NULL);
 	}
 
 	nstate = DTRACE_DSTATE_EMPTY;
 
 	do {
 retry:
 		free = dcpu->dtdsc_free;
 
 		if (free == NULL) {
 			dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
 			void *rval;
 
 			if (clean == NULL) {
 				/*
 				 * We're out of dynamic variable space on
 				 * this CPU.  Unless we have tried all CPUs,
 				 * we'll try to allocate from a different
 				 * CPU.
 				 */
 				switch (dstate->dtds_state) {
 				case DTRACE_DSTATE_CLEAN: {
 					void *sp = &dstate->dtds_state;
 
 					if (++cpu >= NCPU)
 						cpu = 0;
 
 					if (dcpu->dtdsc_dirty != NULL &&
 					    nstate == DTRACE_DSTATE_EMPTY)
 						nstate = DTRACE_DSTATE_DIRTY;
 
 					if (dcpu->dtdsc_rinsing != NULL)
 						nstate = DTRACE_DSTATE_RINSING;
 
 					dcpu = &dstate->dtds_percpu[cpu];
 
 					if (cpu != me)
 						goto retry;
 
 					(void) dtrace_cas32(sp,
 					    DTRACE_DSTATE_CLEAN, nstate);
 
 					/*
 					 * To increment the correct bean
 					 * counter, take another lap.
 					 */
 					goto retry;
 				}
 
 				case DTRACE_DSTATE_DIRTY:
 					dcpu->dtdsc_dirty_drops++;
 					break;
 
 				case DTRACE_DSTATE_RINSING:
 					dcpu->dtdsc_rinsing_drops++;
 					break;
 
 				case DTRACE_DSTATE_EMPTY:
 					dcpu->dtdsc_drops++;
 					break;
 				}
 
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
 				return (NULL);
 			}
 
 			/*
 			 * The clean list appears to be non-empty.  We want to
 			 * move the clean list to the free list; we start by
 			 * moving the clean pointer aside.
 			 */
 			if (dtrace_casptr(&dcpu->dtdsc_clean,
 			    clean, NULL) != clean) {
 				/*
 				 * We are in one of two situations:
 				 *
 				 *  (a)	The clean list was switched to the
 				 *	free list by another CPU.
 				 *
 				 *  (b)	The clean list was added to by the
 				 *	cleansing cyclic.
 				 *
 				 * In either of these situations, we can
 				 * just reattempt the free list allocation.
 				 */
 				goto retry;
 			}
 
 			ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
 
 			/*
 			 * Now we'll move the clean list to our free list.
 			 * It's impossible for this to fail:  the only way
 			 * the free list can be updated is through this
 			 * code path, and only one CPU can own the clean list.
 			 * Thus, it would only be possible for this to fail if
 			 * this code were racing with dtrace_dynvar_clean().
 			 * (That is, if dtrace_dynvar_clean() updated the clean
 			 * list, and we ended up racing to update the free
 			 * list.)  This race is prevented by the dtrace_sync()
 			 * in dtrace_dynvar_clean() -- which flushes the
 			 * owners of the clean lists out before resetting
 			 * the clean lists.
 			 */
 			dcpu = &dstate->dtds_percpu[me];
 			rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
 			ASSERT(rval == NULL);
 			goto retry;
 		}
 
 		dvar = free;
 		new_free = dvar->dtdv_next;
 	} while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
 
 	/*
 	 * We have now allocated a new chunk.  We copy the tuple keys into the
 	 * tuple array and copy any referenced key data into the data space
 	 * following the tuple array.  As we do this, we relocate dttk_value
 	 * in the final tuple to point to the key data address in the chunk.
 	 */
 	kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
 	dvar->dtdv_data = (void *)(kdata + ksize);
 	dvar->dtdv_tuple.dtt_nkeys = nkeys;
 
 	for (i = 0; i < nkeys; i++) {
 		dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
 		size_t kesize = key[i].dttk_size;
 
 		if (kesize != 0) {
 			dtrace_bcopy(
 			    (const void *)(uintptr_t)key[i].dttk_value,
 			    (void *)kdata, kesize);
 			dkey->dttk_value = kdata;
 			kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
 		} else {
 			dkey->dttk_value = key[i].dttk_value;
 		}
 
 		dkey->dttk_size = kesize;
 	}
 
 	ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
 	dvar->dtdv_hashval = hashval;
 	dvar->dtdv_next = start;
 
 	if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
 		return (dvar);
 
 	/*
 	 * The cas has failed.  Either another CPU is adding an element to
 	 * this hash chain, or another CPU is deleting an element from this
 	 * hash chain.  The simplest way to deal with both of these cases
 	 * (though not necessarily the most efficient) is to free our
 	 * allocated block and re-attempt it all.  Note that the free is
 	 * to the dirty list and _not_ to the free list.  This is to prevent
 	 * races with allocators, above.
 	 */
 	dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
 
 	dtrace_membar_producer();
 
 	do {
 		free = dcpu->dtdsc_dirty;
 		dvar->dtdv_next = free;
 	} while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
 
 	goto top;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	if ((int64_t)nval < (int64_t)*oval)
 		*oval = nval;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	if ((int64_t)nval > (int64_t)*oval)
 		*oval = nval;
 }
 
 static void
 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
 {
 	int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
 	int64_t val = (int64_t)nval;
 
 	if (val < 0) {
 		for (i = 0; i < zero; i++) {
 			if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
 				quanta[i] += incr;
 				return;
 			}
 		}
 	} else {
 		for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
 			if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
 				quanta[i - 1] += incr;
 				return;
 			}
 		}
 
 		quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
 		return;
 	}
 
 	ASSERT(0);
 }
 
 static void
 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
 {
 	uint64_t arg = *lquanta++;
 	int32_t base = DTRACE_LQUANTIZE_BASE(arg);
 	uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
 	uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
 	int32_t val = (int32_t)nval, level;
 
 	ASSERT(step != 0);
 	ASSERT(levels != 0);
 
 	if (val < base) {
 		/*
 		 * This is an underflow.
 		 */
 		lquanta[0] += incr;
 		return;
 	}
 
 	level = (val - base) / step;
 
 	if (level < levels) {
 		lquanta[level + 1] += incr;
 		return;
 	}
 
 	/*
 	 * This is an overflow.
 	 */
 	lquanta[levels + 1] += incr;
 }
 
 static int
 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
     uint16_t high, uint16_t nsteps, int64_t value)
 {
 	int64_t this = 1, last, next;
 	int base = 1, order;
 
 	ASSERT(factor <= nsteps);
 	ASSERT(nsteps % factor == 0);
 
 	for (order = 0; order < low; order++)
 		this *= factor;
 
 	/*
 	 * If our value is less than our factor taken to the power of the
 	 * low order of magnitude, it goes into the zeroth bucket.
 	 */
 	if (value < (last = this))
 		return (0);
 
 	for (this *= factor; order <= high; order++) {
 		int nbuckets = this > nsteps ? nsteps : this;
 
 		if ((next = this * factor) < this) {
 			/*
 			 * We should not generally get log/linear quantizations
 			 * with a high magnitude that allows 64-bits to
 			 * overflow, but we nonetheless protect against this
 			 * by explicitly checking for overflow, and clamping
 			 * our value accordingly.
 			 */
 			value = this - 1;
 		}
 
 		if (value < this) {
 			/*
 			 * If our value lies within this order of magnitude,
 			 * determine its position by taking the offset within
 			 * the order of magnitude, dividing by the bucket
 			 * width, and adding to our (accumulated) base.
 			 */
 			return (base + (value - last) / (this / nbuckets));
 		}
 
 		base += nbuckets - (nbuckets / factor);
 		last = this;
 		this = next;
 	}
 
 	/*
 	 * Our value is greater than or equal to our factor taken to the
 	 * power of one plus the high magnitude -- return the top bucket.
 	 */
 	return (base);
 }
 
 static void
 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
 {
 	uint64_t arg = *llquanta++;
 	uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
 	uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
 	uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
 	uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
 
 	llquanta[dtrace_aggregate_llquantize_bucket(factor,
 	    low, high, nsteps, nval)] += incr;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
 {
 	data[0]++;
 	data[1] += nval;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
 {
 	int64_t snval = (int64_t)nval;
 	uint64_t tmp[2];
 
 	data[0]++;
 	data[1] += nval;
 
 	/*
 	 * What we want to say here is:
 	 *
 	 * data[2] += nval * nval;
 	 *
 	 * But given that nval is 64-bit, we could easily overflow, so
 	 * we do this as 128-bit arithmetic.
 	 */
 	if (snval < 0)
 		snval = -snval;
 
 	dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
 	dtrace_add_128(data + 2, tmp, data + 2);
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	*oval = *oval + 1;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	*oval += nval;
 }
 
 /*
  * Aggregate given the tuple in the principal data buffer, and the aggregating
  * action denoted by the specified dtrace_aggregation_t.  The aggregation
  * buffer is specified as the buf parameter.  This routine does not return
  * failure; if there is no space in the aggregation buffer, the data will be
  * dropped, and a corresponding counter incremented.
  */
 static void
 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
     intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
 {
 	dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
 	uint32_t i, ndx, size, fsize;
 	uint32_t align = sizeof (uint64_t) - 1;
 	dtrace_aggbuffer_t *agb;
 	dtrace_aggkey_t *key;
 	uint32_t hashval = 0, limit, isstr;
 	caddr_t tomax, data, kdata;
 	dtrace_actkind_t action;
 	dtrace_action_t *act;
 	uintptr_t offs;
 
 	if (buf == NULL)
 		return;
 
 	if (!agg->dtag_hasarg) {
 		/*
 		 * Currently, only quantize() and lquantize() take additional
 		 * arguments, and they have the same semantics:  an increment
 		 * value that defaults to 1 when not present.  If additional
 		 * aggregating actions take arguments, the setting of the
 		 * default argument value will presumably have to become more
 		 * sophisticated...
 		 */
 		arg = 1;
 	}
 
 	action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
 	size = rec->dtrd_offset - agg->dtag_base;
 	fsize = size + rec->dtrd_size;
 
 	ASSERT(dbuf->dtb_tomax != NULL);
 	data = dbuf->dtb_tomax + offset + agg->dtag_base;
 
 	if ((tomax = buf->dtb_tomax) == NULL) {
 		dtrace_buffer_drop(buf);
 		return;
 	}
 
 	/*
 	 * The metastructure is always at the bottom of the buffer.
 	 */
 	agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
 	    sizeof (dtrace_aggbuffer_t));
 
 	if (buf->dtb_offset == 0) {
 		/*
 		 * We just kludge up approximately 1/8th of the size to be
 		 * buckets.  If this guess ends up being routinely
 		 * off-the-mark, we may need to dynamically readjust this
 		 * based on past performance.
 		 */
 		uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
 
 		if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
 		    (uintptr_t)tomax || hashsize == 0) {
 			/*
 			 * We've been given a ludicrously small buffer;
 			 * increment our drop count and leave.
 			 */
 			dtrace_buffer_drop(buf);
 			return;
 		}
 
 		/*
 		 * And now, a pathetic attempt to try to get a an odd (or
 		 * perchance, a prime) hash size for better hash distribution.
 		 */
 		if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
 			hashsize -= DTRACE_AGGHASHSIZE_SLEW;
 
 		agb->dtagb_hashsize = hashsize;
 		agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
 		    agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
 		agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
 
 		for (i = 0; i < agb->dtagb_hashsize; i++)
 			agb->dtagb_hash[i] = NULL;
 	}
 
 	ASSERT(agg->dtag_first != NULL);
 	ASSERT(agg->dtag_first->dta_intuple);
 
 	/*
 	 * Calculate the hash value based on the key.  Note that we _don't_
 	 * include the aggid in the hashing (but we will store it as part of
 	 * the key).  The hashing algorithm is Bob Jenkins' "One-at-a-time"
 	 * algorithm: a simple, quick algorithm that has no known funnels, and
 	 * gets good distribution in practice.  The efficacy of the hashing
 	 * algorithm (and a comparison with other algorithms) may be found by
 	 * running the ::dtrace_aggstat MDB dcmd.
 	 */
 	for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
 		i = act->dta_rec.dtrd_offset - agg->dtag_base;
 		limit = i + act->dta_rec.dtrd_size;
 		ASSERT(limit <= size);
 		isstr = DTRACEACT_ISSTRING(act);
 
 		for (; i < limit; i++) {
 			hashval += data[i];
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			if (isstr && data[i] == '\0')
 				break;
 		}
 	}
 
 	hashval += (hashval << 3);
 	hashval ^= (hashval >> 11);
 	hashval += (hashval << 15);
 
 	/*
 	 * Yes, the divide here is expensive -- but it's generally the least
 	 * of the performance issues given the amount of data that we iterate
 	 * over to compute hash values, compare data, etc.
 	 */
 	ndx = hashval % agb->dtagb_hashsize;
 
 	for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
 		ASSERT((caddr_t)key >= tomax);
 		ASSERT((caddr_t)key < tomax + buf->dtb_size);
 
 		if (hashval != key->dtak_hashval || key->dtak_size != size)
 			continue;
 
 		kdata = key->dtak_data;
 		ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
 
 		for (act = agg->dtag_first; act->dta_intuple;
 		    act = act->dta_next) {
 			i = act->dta_rec.dtrd_offset - agg->dtag_base;
 			limit = i + act->dta_rec.dtrd_size;
 			ASSERT(limit <= size);
 			isstr = DTRACEACT_ISSTRING(act);
 
 			for (; i < limit; i++) {
 				if (kdata[i] != data[i])
 					goto next;
 
 				if (isstr && data[i] == '\0')
 					break;
 			}
 		}
 
 		if (action != key->dtak_action) {
 			/*
 			 * We are aggregating on the same value in the same
 			 * aggregation with two different aggregating actions.
 			 * (This should have been picked up in the compiler,
 			 * so we may be dealing with errant or devious DIF.)
 			 * This is an error condition; we indicate as much,
 			 * and return.
 			 */
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 			return;
 		}
 
 		/*
 		 * This is a hit:  we need to apply the aggregator to
 		 * the value at this key.
 		 */
 		agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
 		return;
 next:
 		continue;
 	}
 
 	/*
 	 * We didn't find it.  We need to allocate some zero-filled space,
 	 * link it into the hash table appropriately, and apply the aggregator
 	 * to the (zero-filled) value.
 	 */
 	offs = buf->dtb_offset;
 	while (offs & (align - 1))
 		offs += sizeof (uint32_t);
 
 	/*
 	 * If we don't have enough room to both allocate a new key _and_
 	 * its associated data, increment the drop count and return.
 	 */
 	if ((uintptr_t)tomax + offs + fsize >
 	    agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
 		dtrace_buffer_drop(buf);
 		return;
 	}
 
 	/*CONSTCOND*/
 	ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
 	key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
 	agb->dtagb_free -= sizeof (dtrace_aggkey_t);
 
 	key->dtak_data = kdata = tomax + offs;
 	buf->dtb_offset = offs + fsize;
 
 	/*
 	 * Now copy the data across.
 	 */
 	*((dtrace_aggid_t *)kdata) = agg->dtag_id;
 
 	for (i = sizeof (dtrace_aggid_t); i < size; i++)
 		kdata[i] = data[i];
 
 	/*
 	 * Because strings are not zeroed out by default, we need to iterate
 	 * looking for actions that store strings, and we need to explicitly
 	 * pad these strings out with zeroes.
 	 */
 	for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
 		int nul;
 
 		if (!DTRACEACT_ISSTRING(act))
 			continue;
 
 		i = act->dta_rec.dtrd_offset - agg->dtag_base;
 		limit = i + act->dta_rec.dtrd_size;
 		ASSERT(limit <= size);
 
 		for (nul = 0; i < limit; i++) {
 			if (nul) {
 				kdata[i] = '\0';
 				continue;
 			}
 
 			if (data[i] != '\0')
 				continue;
 
 			nul = 1;
 		}
 	}
 
 	for (i = size; i < fsize; i++)
 		kdata[i] = 0;
 
 	key->dtak_hashval = hashval;
 	key->dtak_size = size;
 	key->dtak_action = action;
 	key->dtak_next = agb->dtagb_hash[ndx];
 	agb->dtagb_hash[ndx] = key;
 
 	/*
 	 * Finally, apply the aggregator.
 	 */
 	*((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
 	agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
 }
 
 /*
  * Given consumer state, this routine finds a speculation in the INACTIVE
  * state and transitions it into the ACTIVE state.  If there is no speculation
  * in the INACTIVE state, 0 is returned.  In this case, no error counter is
  * incremented -- it is up to the caller to take appropriate action.
  */
 static int
 dtrace_speculation(dtrace_state_t *state)
 {
 	int i = 0;
 	dtrace_speculation_state_t current;
 	uint32_t *stat = &state->dts_speculations_unavail, count;
 
 	while (i < state->dts_nspeculations) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 
 		current = spec->dtsp_state;
 
 		if (current != DTRACESPEC_INACTIVE) {
 			if (current == DTRACESPEC_COMMITTINGMANY ||
 			    current == DTRACESPEC_COMMITTING ||
 			    current == DTRACESPEC_DISCARDING)
 				stat = &state->dts_speculations_busy;
 			i++;
 			continue;
 		}
 
 		if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 		    current, DTRACESPEC_ACTIVE) == current)
 			return (i + 1);
 	}
 
 	/*
 	 * We couldn't find a speculation.  If we found as much as a single
 	 * busy speculation buffer, we'll attribute this failure as "busy"
 	 * instead of "unavail".
 	 */
 	do {
 		count = *stat;
 	} while (dtrace_cas32(stat, count, count + 1) != count);
 
 	return (0);
 }
 
 /*
  * This routine commits an active speculation.  If the specified speculation
  * is not in a valid state to perform a commit(), this routine will silently do
  * nothing.  The state of the specified speculation is transitioned according
  * to the state transition diagram outlined in <sys/dtrace_impl.h>
  */
 static void
 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_buffer_t *src, *dest;
 	uintptr_t daddr, saddr, dlimit, slimit;
 	dtrace_speculation_state_t current, new = 0;
 	intptr_t offs;
 	uint64_t timestamp;
 
 	if (which == 0)
 		return;
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	src = &spec->dtsp_buffer[cpu];
 	dest = &state->dts_buffer[cpu];
 
 	do {
 		current = spec->dtsp_state;
 
 		if (current == DTRACESPEC_COMMITTINGMANY)
 			break;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_DISCARDING:
 			return;
 
 		case DTRACESPEC_COMMITTING:
 			/*
 			 * This is only possible if we are (a) commit()'ing
 			 * without having done a prior speculate() on this CPU
 			 * and (b) racing with another commit() on a different
 			 * CPU.  There's nothing to do -- we just assert that
 			 * our offset is 0.
 			 */
 			ASSERT(src->dtb_offset == 0);
 			return;
 
 		case DTRACESPEC_ACTIVE:
 			new = DTRACESPEC_COMMITTING;
 			break;
 
 		case DTRACESPEC_ACTIVEONE:
 			/*
 			 * This speculation is active on one CPU.  If our
 			 * buffer offset is non-zero, we know that the one CPU
 			 * must be us.  Otherwise, we are committing on a
 			 * different CPU from the speculate(), and we must
 			 * rely on being asynchronously cleaned.
 			 */
 			if (src->dtb_offset != 0) {
 				new = DTRACESPEC_COMMITTING;
 				break;
 			}
 			/*FALLTHROUGH*/
 
 		case DTRACESPEC_ACTIVEMANY:
 			new = DTRACESPEC_COMMITTINGMANY;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	/*
 	 * We have set the state to indicate that we are committing this
 	 * speculation.  Now reserve the necessary space in the destination
 	 * buffer.
 	 */
 	if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
 	    sizeof (uint64_t), state, NULL)) < 0) {
 		dtrace_buffer_drop(dest);
 		goto out;
 	}
 
 	/*
 	 * We have sufficient space to copy the speculative buffer into the
 	 * primary buffer.  First, modify the speculative buffer, filling
 	 * in the timestamp of all entries with the current time.  The data
 	 * must have the commit() time rather than the time it was traced,
 	 * so that all entries in the primary buffer are in timestamp order.
 	 */
 	timestamp = dtrace_gethrtime();
 	saddr = (uintptr_t)src->dtb_tomax;
 	slimit = saddr + src->dtb_offset;
 	while (saddr < slimit) {
 		size_t size;
 		dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
 
 		if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
 			saddr += sizeof (dtrace_epid_t);
 			continue;
 		}
 		ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
 		size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
 
 		ASSERT3U(saddr + size, <=, slimit);
 		ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
 		ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
 
 		DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
 
 		saddr += size;
 	}
 
 	/*
 	 * Copy the buffer across.  (Note that this is a
 	 * highly subobtimal bcopy(); in the unlikely event that this becomes
 	 * a serious performance issue, a high-performance DTrace-specific
 	 * bcopy() should obviously be invented.)
 	 */
 	daddr = (uintptr_t)dest->dtb_tomax + offs;
 	dlimit = daddr + src->dtb_offset;
 	saddr = (uintptr_t)src->dtb_tomax;
 
 	/*
 	 * First, the aligned portion.
 	 */
 	while (dlimit - daddr >= sizeof (uint64_t)) {
 		*((uint64_t *)daddr) = *((uint64_t *)saddr);
 
 		daddr += sizeof (uint64_t);
 		saddr += sizeof (uint64_t);
 	}
 
 	/*
 	 * Now any left-over bit...
 	 */
 	while (dlimit - daddr)
 		*((uint8_t *)daddr++) = *((uint8_t *)saddr++);
 
 	/*
 	 * Finally, commit the reserved space in the destination buffer.
 	 */
 	dest->dtb_offset = offs + src->dtb_offset;
 
 out:
 	/*
 	 * If we're lucky enough to be the only active CPU on this speculation
 	 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
 	 */
 	if (current == DTRACESPEC_ACTIVE ||
 	    (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
 		uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
 		    DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
 
 		ASSERT(rval == DTRACESPEC_COMMITTING);
 	}
 
 	src->dtb_offset = 0;
 	src->dtb_xamot_drops += src->dtb_drops;
 	src->dtb_drops = 0;
 }
 
 /*
  * This routine discards an active speculation.  If the specified speculation
  * is not in a valid state to perform a discard(), this routine will silently
  * do nothing.  The state of the specified speculation is transitioned
  * according to the state transition diagram outlined in <sys/dtrace_impl.h>
  */
 static void
 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_speculation_state_t current, new = 0;
 	dtrace_buffer_t *buf;
 
 	if (which == 0)
 		return;
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	buf = &spec->dtsp_buffer[cpu];
 
 	do {
 		current = spec->dtsp_state;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_COMMITTINGMANY:
 		case DTRACESPEC_COMMITTING:
 		case DTRACESPEC_DISCARDING:
 			return;
 
 		case DTRACESPEC_ACTIVE:
 		case DTRACESPEC_ACTIVEMANY:
 			new = DTRACESPEC_DISCARDING;
 			break;
 
 		case DTRACESPEC_ACTIVEONE:
 			if (buf->dtb_offset != 0) {
 				new = DTRACESPEC_INACTIVE;
 			} else {
 				new = DTRACESPEC_DISCARDING;
 			}
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	buf->dtb_offset = 0;
 	buf->dtb_drops = 0;
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously from cross call context to clean any speculations that are
  * in the COMMITTINGMANY or DISCARDING states.  These speculations may not be
  * transitioned back to the INACTIVE state until all CPUs have cleaned the
  * speculation.
  */
 static void
 dtrace_speculation_clean_here(dtrace_state_t *state)
 {
 	dtrace_icookie_t cookie;
 	processorid_t cpu = curcpu;
 	dtrace_buffer_t *dest = &state->dts_buffer[cpu];
 	dtrace_specid_t i;
 
 	cookie = dtrace_interrupt_disable();
 
 	if (dest->dtb_tomax == NULL) {
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 		dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
 
 		if (src->dtb_tomax == NULL)
 			continue;
 
 		if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
 			src->dtb_offset = 0;
 			continue;
 		}
 
 		if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
 			continue;
 
 		if (src->dtb_offset == 0)
 			continue;
 
 		dtrace_speculation_commit(state, cpu, i + 1);
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously (and at a regular interval) to clean any speculations that
  * are in the COMMITTINGMANY or DISCARDING states.  If it discovers that there
  * is work to be done, it cross calls all CPUs to perform that work;
  * COMMITMANY and DISCARDING speculations may not be transitioned back to the
  * INACTIVE state until they have been cleaned by all CPUs.
  */
 static void
 dtrace_speculation_clean(dtrace_state_t *state)
 {
 	int work = 0, rv;
 	dtrace_specid_t i;
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 
 		ASSERT(!spec->dtsp_cleaning);
 
 		if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
 		    spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
 			continue;
 
 		work++;
 		spec->dtsp_cleaning = 1;
 	}
 
 	if (!work)
 		return;
 
 	dtrace_xcall(DTRACE_CPUALL,
 	    (dtrace_xcall_t)dtrace_speculation_clean_here, state);
 
 	/*
 	 * We now know that all CPUs have committed or discarded their
 	 * speculation buffers, as appropriate.  We can now set the state
 	 * to inactive.
 	 */
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 		dtrace_speculation_state_t current, new;
 
 		if (!spec->dtsp_cleaning)
 			continue;
 
 		current = spec->dtsp_state;
 		ASSERT(current == DTRACESPEC_DISCARDING ||
 		    current == DTRACESPEC_COMMITTINGMANY);
 
 		new = DTRACESPEC_INACTIVE;
 
 		rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
 		ASSERT(rv == current);
 		spec->dtsp_cleaning = 0;
 	}
 }
 
 /*
  * Called as part of a speculate() to get the speculative buffer associated
  * with a given speculation.  Returns NULL if the specified speculation is not
  * in an ACTIVE state.  If the speculation is in the ACTIVEONE state -- and
  * the active CPU is not the specified CPU -- the speculation will be
  * atomically transitioned into the ACTIVEMANY state.
  */
 static dtrace_buffer_t *
 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_speculation_state_t current, new = 0;
 	dtrace_buffer_t *buf;
 
 	if (which == 0)
 		return (NULL);
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return (NULL);
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	buf = &spec->dtsp_buffer[cpuid];
 
 	do {
 		current = spec->dtsp_state;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_COMMITTINGMANY:
 		case DTRACESPEC_DISCARDING:
 			return (NULL);
 
 		case DTRACESPEC_COMMITTING:
 			ASSERT(buf->dtb_offset == 0);
 			return (NULL);
 
 		case DTRACESPEC_ACTIVEONE:
 			/*
 			 * This speculation is currently active on one CPU.
 			 * Check the offset in the buffer; if it's non-zero,
 			 * that CPU must be us (and we leave the state alone).
 			 * If it's zero, assume that we're starting on a new
 			 * CPU -- and change the state to indicate that the
 			 * speculation is active on more than one CPU.
 			 */
 			if (buf->dtb_offset != 0)
 				return (buf);
 
 			new = DTRACESPEC_ACTIVEMANY;
 			break;
 
 		case DTRACESPEC_ACTIVEMANY:
 			return (buf);
 
 		case DTRACESPEC_ACTIVE:
 			new = DTRACESPEC_ACTIVEONE;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
 	return (buf);
 }
 
 /*
  * Return a string.  In the event that the user lacks the privilege to access
  * arbitrary kernel memory, we copy the string out to scratch memory so that we
  * don't fail access checking.
  *
  * dtrace_dif_variable() uses this routine as a helper for various
  * builtin values such as 'execname' and 'probefunc.'
  */
 uintptr_t
 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
     dtrace_mstate_t *mstate)
 {
 	uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 	uintptr_t ret;
 	size_t strsz;
 
 	/*
 	 * The easy case: this probe is allowed to read all of memory, so
 	 * we can just return this as a vanilla pointer.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
 		return (addr);
 
 	/*
 	 * This is the tougher case: we copy the string in question from
 	 * kernel memory into scratch memory and return it that way: this
 	 * ensures that we won't trip up when access checking tests the
 	 * BYREF return value.
 	 */
 	strsz = dtrace_strlen((char *)addr, size) + 1;
 
 	if (mstate->dtms_scratch_ptr + strsz >
 	    mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return (0);
 	}
 
 	dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
 	    strsz);
 	ret = mstate->dtms_scratch_ptr;
 	mstate->dtms_scratch_ptr += strsz;
 	return (ret);
 }
 
 /*
  * Return a string from a memoy address which is known to have one or
  * more concatenated, individually zero terminated, sub-strings.
  * In the event that the user lacks the privilege to access
  * arbitrary kernel memory, we copy the string out to scratch memory so that we
  * don't fail access checking.
  *
  * dtrace_dif_variable() uses this routine as a helper for various
  * builtin values such as 'execargs'.
  */
 static uintptr_t
 dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state,
     dtrace_mstate_t *mstate)
 {
 	char *p;
 	size_t i;
 	uintptr_t ret;
 
 	if (mstate->dtms_scratch_ptr + strsz >
 	    mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return (0);
 	}
 
 	dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
 	    strsz);
 
 	/* Replace sub-string termination characters with a space. */
 	for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1;
 	    p++, i++)
 		if (*p == '\0')
 			*p = ' ';
 
 	ret = mstate->dtms_scratch_ptr;
 	mstate->dtms_scratch_ptr += strsz;
 	return (ret);
 }
 
 /*
  * This function implements the DIF emulator's variable lookups.  The emulator
  * passes a reserved variable identifier and optional built-in array index.
  */
 static uint64_t
 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
     uint64_t ndx)
 {
 	/*
 	 * If we're accessing one of the uncached arguments, we'll turn this
 	 * into a reference in the args array.
 	 */
 	if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
 		ndx = v - DIF_VAR_ARG0;
 		v = DIF_VAR_ARGS;
 	}
 
 	switch (v) {
 	case DIF_VAR_ARGS:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
 		if (ndx >= sizeof (mstate->dtms_arg) /
 		    sizeof (mstate->dtms_arg[0])) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 			dtrace_provider_t *pv;
 			uint64_t val;
 
 			pv = mstate->dtms_probe->dtpr_provider;
 			if (pv->dtpv_pops.dtps_getargval != NULL)
 				val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
 				    mstate->dtms_probe->dtpr_id,
 				    mstate->dtms_probe->dtpr_arg, ndx, aframes);
 			else
 				val = dtrace_getarg(ndx, aframes);
 
 			/*
 			 * This is regrettably required to keep the compiler
 			 * from tail-optimizing the call to dtrace_getarg().
 			 * The condition always evaluates to true, but the
 			 * compiler has no way of figuring that out a priori.
 			 * (None of this would be necessary if the compiler
 			 * could be relied upon to _always_ tail-optimize
 			 * the call to dtrace_getarg() -- but it can't.)
 			 */
 			if (mstate->dtms_probe != NULL)
 				return (val);
 
 			ASSERT(0);
 		}
 
 		return (mstate->dtms_arg[ndx]);
 
 #ifdef illumos
 	case DIF_VAR_UREGS: {
 		klwp_t *lwp;
 
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if ((lwp = curthread->t_lwp) == NULL) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = NULL;
 			return (0);
 		}
 
 		return (dtrace_getreg(lwp->lwp_regs, ndx));
 		return (0);
 	}
 #else
 	case DIF_VAR_UREGS: {
 		struct trapframe *tframe;
 
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if ((tframe = curthread->td_frame) == NULL) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = 0;
 			return (0);
 		}
 
 		return (dtrace_getreg(tframe, ndx));
 	}
 #endif
 
 	case DIF_VAR_CURTHREAD:
 		if (!dtrace_priv_proc(state))
 			return (0);
 		return ((uint64_t)(uintptr_t)curthread);
 
 	case DIF_VAR_TIMESTAMP:
 		if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
 			mstate->dtms_timestamp = dtrace_gethrtime();
 			mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 		}
 		return (mstate->dtms_timestamp);
 
 	case DIF_VAR_VTIMESTAMP:
 		ASSERT(dtrace_vtime_references != 0);
 		return (curthread->t_dtrace_vtime);
 
 	case DIF_VAR_WALLTIMESTAMP:
 		if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
 			mstate->dtms_walltimestamp = dtrace_gethrestime();
 			mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
 		}
 		return (mstate->dtms_walltimestamp);
 
 #ifdef illumos
 	case DIF_VAR_IPL:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
 			mstate->dtms_ipl = dtrace_getipl();
 			mstate->dtms_present |= DTRACE_MSTATE_IPL;
 		}
 		return (mstate->dtms_ipl);
 #endif
 
 	case DIF_VAR_EPID:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
 		return (mstate->dtms_epid);
 
 	case DIF_VAR_ID:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (mstate->dtms_probe->dtpr_id);
 
 	case DIF_VAR_STACKDEPTH:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 
 			mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
 			mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
 		}
 		return (mstate->dtms_stackdepth);
 
 	case DIF_VAR_USTACKDEPTH:
 		if (!dtrace_priv_proc(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
 			/*
 			 * See comment in DIF_VAR_PID.
 			 */
 			if (DTRACE_ANCHORED(mstate->dtms_probe) &&
 			    CPU_ON_INTR(CPU)) {
 				mstate->dtms_ustackdepth = 0;
 			} else {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				mstate->dtms_ustackdepth =
 				    dtrace_getustackdepth();
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			}
 			mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
 		}
 		return (mstate->dtms_ustackdepth);
 
 	case DIF_VAR_CALLER:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 
 			if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
 				/*
 				 * If this is an unanchored probe, we are
 				 * required to go through the slow path:
 				 * dtrace_caller() only guarantees correct
 				 * results for anchored probes.
 				 */
 				pc_t caller[2] = {0, 0};
 
 				dtrace_getpcstack(caller, 2, aframes,
 				    (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
 				mstate->dtms_caller = caller[1];
 			} else if ((mstate->dtms_caller =
 			    dtrace_caller(aframes)) == -1) {
 				/*
 				 * We have failed to do this the quick way;
 				 * we must resort to the slower approach of
 				 * calling dtrace_getpcstack().
 				 */
 				pc_t caller = 0;
 
 				dtrace_getpcstack(&caller, 1, aframes, NULL);
 				mstate->dtms_caller = caller;
 			}
 
 			mstate->dtms_present |= DTRACE_MSTATE_CALLER;
 		}
 		return (mstate->dtms_caller);
 
 	case DIF_VAR_UCALLER:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
 			uint64_t ustack[3];
 
 			/*
 			 * dtrace_getupcstack() fills in the first uint64_t
 			 * with the current PID.  The second uint64_t will
 			 * be the program counter at user-level.  The third
 			 * uint64_t will contain the caller, which is what
 			 * we're after.
 			 */
 			ustack[2] = 0;
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_getupcstack(ustack, 3);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			mstate->dtms_ucaller = ustack[2];
 			mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
 		}
 
 		return (mstate->dtms_ucaller);
 
 	case DIF_VAR_PROBEPROV:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
 		    state, mstate));
 
 	case DIF_VAR_PROBEMOD:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_mod,
 		    state, mstate));
 
 	case DIF_VAR_PROBEFUNC:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_func,
 		    state, mstate));
 
 	case DIF_VAR_PROBENAME:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_name,
 		    state, mstate));
 
 	case DIF_VAR_PID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * Note that we are assuming that an unanchored probe is
 		 * always due to a high-level interrupt.  (And we're assuming
 		 * that there is only a single high level interrupt.)
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (pid0.pid_id);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * Further, it is always safe to dereference the p_pidp member
 		 * of one's own proc structure.  (These are truisms becuase
 		 * threads and processes don't clean up their own state --
 		 * they leave that task to whomever reaps them.)
 		 */
 		return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
 #else
 		return ((uint64_t)curproc->p_pid);
 #endif
 
 	case DIF_VAR_PPID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (pid0.pid_id);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return ((uint64_t)curthread->t_procp->p_ppid);
 #else
 		if (curproc->p_pid == proc0.p_pid)
 			return (curproc->p_pid);
 		else
 			return (curproc->p_pptr->p_pid);
 #endif
 
 	case DIF_VAR_TID:
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (0);
 #endif
 
 		return ((uint64_t)curthread->t_tid);
 
 	case DIF_VAR_EXECARGS: {
 		struct pargs *p_args = curthread->td_proc->p_args;
 
 		if (p_args == NULL)
 			return(0);
 
 		return (dtrace_dif_varstrz(
 		    (uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate));
 	}
 
 	case DIF_VAR_EXECNAME:
 #ifdef illumos
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return (dtrace_dif_varstr(
 		    (uintptr_t)curthread->t_procp->p_user.u_comm,
 		    state, mstate));
 #else
 		return (dtrace_dif_varstr(
 		    (uintptr_t) curthread->td_proc->p_comm, state, mstate));
 #endif
 
 	case DIF_VAR_ZONENAME:
 #ifdef illumos
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return (dtrace_dif_varstr(
 		    (uintptr_t)curthread->t_procp->p_zone->zone_name,
 		    state, mstate));
 #else
 		return (0);
 #endif
 
 	case DIF_VAR_UID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)p0.p_cred->cr_uid);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 *
 		 * Additionally, it is safe to dereference one's own process
 		 * credential, since this is never NULL after process birth.
 		 */
 		return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
 #else
 		return ((uint64_t)curthread->td_ucred->cr_uid);
 #endif
 
 	case DIF_VAR_GID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)p0.p_cred->cr_gid);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 *
 		 * Additionally, it is safe to dereference one's own process
 		 * credential, since this is never NULL after process birth.
 		 */
 		return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
 #else
 		return ((uint64_t)curthread->td_ucred->cr_gid);
 #endif
 
 	case DIF_VAR_ERRNO: {
 #ifdef illumos
 		klwp_t *lwp;
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (0);
 
 		/*
 		 * It is always safe to dereference one's own t_lwp pointer in
 		 * the event that this pointer is non-NULL.  (This is true
 		 * because threads and lwps don't clean up their own state --
 		 * they leave that task to whomever reaps them.)
 		 */
 		if ((lwp = curthread->t_lwp) == NULL)
 			return (0);
 
 		return ((uint64_t)lwp->lwp_errno);
 #else
 		return (curthread->td_errno);
 #endif
 	}
 #ifndef illumos
 	case DIF_VAR_CPU: {
 		return curcpu;
 	}
 #endif
 	default:
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 }
 
 
 typedef enum dtrace_json_state {
 	DTRACE_JSON_REST = 1,
 	DTRACE_JSON_OBJECT,
 	DTRACE_JSON_STRING,
 	DTRACE_JSON_STRING_ESCAPE,
 	DTRACE_JSON_STRING_ESCAPE_UNICODE,
 	DTRACE_JSON_COLON,
 	DTRACE_JSON_COMMA,
 	DTRACE_JSON_VALUE,
 	DTRACE_JSON_IDENTIFIER,
 	DTRACE_JSON_NUMBER,
 	DTRACE_JSON_NUMBER_FRAC,
 	DTRACE_JSON_NUMBER_EXP,
 	DTRACE_JSON_COLLECT_OBJECT
 } dtrace_json_state_t;
 
 /*
  * This function possesses just enough knowledge about JSON to extract a single
  * value from a JSON string and store it in the scratch buffer.  It is able
  * to extract nested object values, and members of arrays by index.
  *
  * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
  * be looked up as we descend into the object tree.  e.g.
  *
  *    foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
  *       with nelems = 5.
  *
  * The run time of this function must be bounded above by strsize to limit the
  * amount of work done in probe context.  As such, it is implemented as a
  * simple state machine, reading one character at a time using safe loads
  * until we find the requested element, hit a parsing error or run off the
  * end of the object or string.
  *
  * As there is no way for a subroutine to return an error without interrupting
  * clause execution, we simply return NULL in the event of a missing key or any
  * other error condition.  Each NULL return in this function is commented with
  * the error condition it represents -- parsing or otherwise.
  *
  * The set of states for the state machine closely matches the JSON
  * specification (http://json.org/).  Briefly:
  *
  *   DTRACE_JSON_REST:
  *     Skip whitespace until we find either a top-level Object, moving
  *     to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
  *
  *   DTRACE_JSON_OBJECT:
  *     Locate the next key String in an Object.  Sets a flag to denote
  *     the next String as a key string and moves to DTRACE_JSON_STRING.
  *
  *   DTRACE_JSON_COLON:
  *     Skip whitespace until we find the colon that separates key Strings
  *     from their values.  Once found, move to DTRACE_JSON_VALUE.
  *
  *   DTRACE_JSON_VALUE:
  *     Detects the type of the next value (String, Number, Identifier, Object
  *     or Array) and routes to the states that process that type.  Here we also
  *     deal with the element selector list if we are requested to traverse down
  *     into the object tree.
  *
  *   DTRACE_JSON_COMMA:
  *     Skip whitespace until we find the comma that separates key-value pairs
  *     in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
  *     (similarly DTRACE_JSON_VALUE).  All following literal value processing
  *     states return to this state at the end of their value, unless otherwise
  *     noted.
  *
  *   DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
  *     Processes a Number literal from the JSON, including any exponent
  *     component that may be present.  Numbers are returned as strings, which
  *     may be passed to strtoll() if an integer is required.
  *
  *   DTRACE_JSON_IDENTIFIER:
  *     Processes a "true", "false" or "null" literal in the JSON.
  *
  *   DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
  *   DTRACE_JSON_STRING_ESCAPE_UNICODE:
  *     Processes a String literal from the JSON, whether the String denotes
  *     a key, a value or part of a larger Object.  Handles all escape sequences
  *     present in the specification, including four-digit unicode characters,
  *     but merely includes the escape sequence without converting it to the
  *     actual escaped character.  If the String is flagged as a key, we
  *     move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
  *
  *   DTRACE_JSON_COLLECT_OBJECT:
  *     This state collects an entire Object (or Array), correctly handling
  *     embedded strings.  If the full element selector list matches this nested
  *     object, we return the Object in full as a string.  If not, we use this
  *     state to skip to the next value at this level and continue processing.
  *
  * NOTE: This function uses various macros from strtolctype.h to manipulate
  * digit values, etc -- these have all been checked to ensure they make
  * no additional function calls.
  */
 static char *
 dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
     char *dest)
 {
 	dtrace_json_state_t state = DTRACE_JSON_REST;
 	int64_t array_elem = INT64_MIN;
 	int64_t array_pos = 0;
 	uint8_t escape_unicount = 0;
 	boolean_t string_is_key = B_FALSE;
 	boolean_t collect_object = B_FALSE;
 	boolean_t found_key = B_FALSE;
 	boolean_t in_array = B_FALSE;
 	uint32_t braces = 0, brackets = 0;
 	char *elem = elemlist;
 	char *dd = dest;
 	uintptr_t cur;
 
 	for (cur = json; cur < json + size; cur++) {
 		char cc = dtrace_load8(cur);
 		if (cc == '\0')
 			return (NULL);
 
 		switch (state) {
 		case DTRACE_JSON_REST:
 			if (isspace(cc))
 				break;
 
 			if (cc == '{') {
 				state = DTRACE_JSON_OBJECT;
 				break;
 			}
 
 			if (cc == '[') {
 				in_array = B_TRUE;
 				array_pos = 0;
 				array_elem = dtrace_strtoll(elem, 10, size);
 				found_key = array_elem == 0 ? B_TRUE : B_FALSE;
 				state = DTRACE_JSON_VALUE;
 				break;
 			}
 
 			/*
 			 * ERROR: expected to find a top-level object or array.
 			 */
 			return (NULL);
 		case DTRACE_JSON_OBJECT:
 			if (isspace(cc))
 				break;
 
 			if (cc == '"') {
 				state = DTRACE_JSON_STRING;
 				string_is_key = B_TRUE;
 				break;
 			}
 
 			/*
 			 * ERROR: either the object did not start with a key
 			 * string, or we've run off the end of the object
 			 * without finding the requested key.
 			 */
 			return (NULL);
 		case DTRACE_JSON_STRING:
 			if (cc == '\\') {
 				*dd++ = '\\';
 				state = DTRACE_JSON_STRING_ESCAPE;
 				break;
 			}
 
 			if (cc == '"') {
 				if (collect_object) {
 					/*
 					 * We don't reset the dest here, as
 					 * the string is part of a larger
 					 * object being collected.
 					 */
 					*dd++ = cc;
 					collect_object = B_FALSE;
 					state = DTRACE_JSON_COLLECT_OBJECT;
 					break;
 				}
 				*dd = '\0';
 				dd = dest; /* reset string buffer */
 				if (string_is_key) {
 					if (dtrace_strncmp(dest, elem,
 					    size) == 0)
 						found_key = B_TRUE;
 				} else if (found_key) {
 					if (nelems > 1) {
 						/*
 						 * We expected an object, not
 						 * this string.
 						 */
 						return (NULL);
 					}
 					return (dest);
 				}
 				state = string_is_key ? DTRACE_JSON_COLON :
 				    DTRACE_JSON_COMMA;
 				string_is_key = B_FALSE;
 				break;
 			}
 
 			*dd++ = cc;
 			break;
 		case DTRACE_JSON_STRING_ESCAPE:
 			*dd++ = cc;
 			if (cc == 'u') {
 				escape_unicount = 0;
 				state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
 			} else {
 				state = DTRACE_JSON_STRING;
 			}
 			break;
 		case DTRACE_JSON_STRING_ESCAPE_UNICODE:
 			if (!isxdigit(cc)) {
 				/*
 				 * ERROR: invalid unicode escape, expected
 				 * four valid hexidecimal digits.
 				 */
 				return (NULL);
 			}
 
 			*dd++ = cc;
 			if (++escape_unicount == 4)
 				state = DTRACE_JSON_STRING;
 			break;
 		case DTRACE_JSON_COLON:
 			if (isspace(cc))
 				break;
 
 			if (cc == ':') {
 				state = DTRACE_JSON_VALUE;
 				break;
 			}
 
 			/*
 			 * ERROR: expected a colon.
 			 */
 			return (NULL);
 		case DTRACE_JSON_COMMA:
 			if (isspace(cc))
 				break;
 
 			if (cc == ',') {
 				if (in_array) {
 					state = DTRACE_JSON_VALUE;
 					if (++array_pos == array_elem)
 						found_key = B_TRUE;
 				} else {
 					state = DTRACE_JSON_OBJECT;
 				}
 				break;
 			}
 
 			/*
 			 * ERROR: either we hit an unexpected character, or
 			 * we reached the end of the object or array without
 			 * finding the requested key.
 			 */
 			return (NULL);
 		case DTRACE_JSON_IDENTIFIER:
 			if (islower(cc)) {
 				*dd++ = cc;
 				break;
 			}
 
 			*dd = '\0';
 			dd = dest; /* reset string buffer */
 
 			if (dtrace_strncmp(dest, "true", 5) == 0 ||
 			    dtrace_strncmp(dest, "false", 6) == 0 ||
 			    dtrace_strncmp(dest, "null", 5) == 0) {
 				if (found_key) {
 					if (nelems > 1) {
 						/*
 						 * ERROR: We expected an object,
 						 * not this identifier.
 						 */
 						return (NULL);
 					}
 					return (dest);
 				} else {
 					cur--;
 					state = DTRACE_JSON_COMMA;
 					break;
 				}
 			}
 
 			/*
 			 * ERROR: we did not recognise the identifier as one
 			 * of those in the JSON specification.
 			 */
 			return (NULL);
 		case DTRACE_JSON_NUMBER:
 			if (cc == '.') {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER_FRAC;
 				break;
 			}
 
 			if (cc == 'x' || cc == 'X') {
 				/*
 				 * ERROR: specification explicitly excludes
 				 * hexidecimal or octal numbers.
 				 */
 				return (NULL);
 			}
 
 			/* FALLTHRU */
 		case DTRACE_JSON_NUMBER_FRAC:
 			if (cc == 'e' || cc == 'E') {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER_EXP;
 				break;
 			}
 
 			if (cc == '+' || cc == '-') {
 				/*
 				 * ERROR: expect sign as part of exponent only.
 				 */
 				return (NULL);
 			}
 			/* FALLTHRU */
 		case DTRACE_JSON_NUMBER_EXP:
 			if (isdigit(cc) || cc == '+' || cc == '-') {
 				*dd++ = cc;
 				break;
 			}
 
 			*dd = '\0';
 			dd = dest; /* reset string buffer */
 			if (found_key) {
 				if (nelems > 1) {
 					/*
 					 * ERROR: We expected an object, not
 					 * this number.
 					 */
 					return (NULL);
 				}
 				return (dest);
 			}
 
 			cur--;
 			state = DTRACE_JSON_COMMA;
 			break;
 		case DTRACE_JSON_VALUE:
 			if (isspace(cc))
 				break;
 
 			if (cc == '{' || cc == '[') {
 				if (nelems > 1 && found_key) {
 					in_array = cc == '[' ? B_TRUE : B_FALSE;
 					/*
 					 * If our element selector directs us
 					 * to descend into this nested object,
 					 * then move to the next selector
 					 * element in the list and restart the
 					 * state machine.
 					 */
 					while (*elem != '\0')
 						elem++;
 					elem++; /* skip the inter-element NUL */
 					nelems--;
 					dd = dest;
 					if (in_array) {
 						state = DTRACE_JSON_VALUE;
 						array_pos = 0;
 						array_elem = dtrace_strtoll(
 						    elem, 10, size);
 						found_key = array_elem == 0 ?
 						    B_TRUE : B_FALSE;
 					} else {
 						found_key = B_FALSE;
 						state = DTRACE_JSON_OBJECT;
 					}
 					break;
 				}
 
 				/*
 				 * Otherwise, we wish to either skip this
 				 * nested object or return it in full.
 				 */
 				if (cc == '[')
 					brackets = 1;
 				else
 					braces = 1;
 				*dd++ = cc;
 				state = DTRACE_JSON_COLLECT_OBJECT;
 				break;
 			}
 
 			if (cc == '"') {
 				state = DTRACE_JSON_STRING;
 				break;
 			}
 
 			if (islower(cc)) {
 				/*
 				 * Here we deal with true, false and null.
 				 */
 				*dd++ = cc;
 				state = DTRACE_JSON_IDENTIFIER;
 				break;
 			}
 
 			if (cc == '-' || isdigit(cc)) {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER;
 				break;
 			}
 
 			/*
 			 * ERROR: unexpected character at start of value.
 			 */
 			return (NULL);
 		case DTRACE_JSON_COLLECT_OBJECT:
 			if (cc == '\0')
 				/*
 				 * ERROR: unexpected end of input.
 				 */
 				return (NULL);
 
 			*dd++ = cc;
 			if (cc == '"') {
 				collect_object = B_TRUE;
 				state = DTRACE_JSON_STRING;
 				break;
 			}
 
 			if (cc == ']') {
 				if (brackets-- == 0) {
 					/*
 					 * ERROR: unbalanced brackets.
 					 */
 					return (NULL);
 				}
 			} else if (cc == '}') {
 				if (braces-- == 0) {
 					/*
 					 * ERROR: unbalanced braces.
 					 */
 					return (NULL);
 				}
 			} else if (cc == '{') {
 				braces++;
 			} else if (cc == '[') {
 				brackets++;
 			}
 
 			if (brackets == 0 && braces == 0) {
 				if (found_key) {
 					*dd = '\0';
 					return (dest);
 				}
 				dd = dest; /* reset string buffer */
 				state = DTRACE_JSON_COMMA;
 			}
 			break;
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
  * Notice that we don't bother validating the proper number of arguments or
  * their types in the tuple stack.  This isn't needed because all argument
  * interpretation is safe because of our load safety -- the worst that can
  * happen is that a bogus program can obtain bogus results.
  */
 static void
 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
     dtrace_key_t *tupregs, int nargs,
     dtrace_mstate_t *mstate, dtrace_state_t *state)
 {
 	volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 
 #ifdef illumos
 	union {
 		mutex_impl_t mi;
 		uint64_t mx;
 	} m;
 
 	union {
 		krwlock_t ri;
 		uintptr_t rw;
 	} r;
 #else
 	struct thread *lowner;
 	union {
 		struct lock_object *li;
 		uintptr_t lx;
 	} l;
 #endif
 
 	switch (subr) {
 	case DIF_SUBR_RAND:
 		regs[rd] = dtrace_xoroshiro128_plus_next(
 		    state->dts_rstate[curcpu]);
 		break;
 
 #ifdef illumos
 	case DIF_SUBR_MUTEX_OWNED:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		if (MUTEX_TYPE_ADAPTIVE(&m.mi))
 			regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
 		else
 			regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
 		break;
 
 	case DIF_SUBR_MUTEX_OWNER:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
 		    MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
 			regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
 		else
 			regs[rd] = 0;
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_SPIN:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
 		break;
 
 	case DIF_SUBR_RW_READ_HELD: {
 		uintptr_t tmp;
 
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_READ_HELD(&r.ri, tmp);
 		break;
 	}
 
 	case DIF_SUBR_RW_WRITE_HELD:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_WRITE_HELD(&r.ri);
 		break;
 
 	case DIF_SUBR_RW_ISWRITER:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_ISWRITER(&r.ri);
 		break;
 
 #else /* !illumos */
 	case DIF_SUBR_MUTEX_OWNED:
 		if (!dtrace_canload(tupregs[0].dttk_value,
 			sizeof (struct lock_object), mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_MUTEX_OWNER:
 		if (!dtrace_canload(tupregs[0].dttk_value,
 			sizeof (struct lock_object), mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		regs[rd] = (uintptr_t)lowner;
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SLEEPLOCK) != 0;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_SPIN:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_READ_HELD: 
 	case DIF_SUBR_SX_SHARED_HELD: 
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
 		    lowner == NULL;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_WRITE_HELD:
 	case DIF_SUBR_SX_EXCLUSIVE_HELD:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr(tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
 		    lowner != NULL;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_ISWRITER:
 	case DIF_SUBR_SX_ISEXCLUSIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr(tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		regs[rd] = (lowner == curthread);
 		break;
 #endif /* illumos */
 
 	case DIF_SUBR_BCOPY: {
 		/*
 		 * We need to be sure that the destination is in the scratch
 		 * region -- no other region is allowed.
 		 */
 		uintptr_t src = tupregs[0].dttk_value;
 		uintptr_t dest = tupregs[1].dttk_value;
 		size_t size = tupregs[2].dttk_value;
 
 		if (!dtrace_inscratch(dest, size, mstate)) {
 			*flags |= CPU_DTRACE_BADADDR;
 			*illval = regs[rd];
 			break;
 		}
 
 		if (!dtrace_canload(src, size, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		dtrace_bcopy((void *)src, (void *)dest, size);
 		break;
 	}
 
 	case DIF_SUBR_ALLOCA:
 	case DIF_SUBR_COPYIN: {
 		uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 		uint64_t size =
 		    tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
 		size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 
 		/*
 		 * Rounding up the user allocation size could have overflowed
 		 * a large, bogus allocation (like -1ULL) to 0.
 		 */
 		if (scratch_size < size ||
 		    !DTRACE_INSCRATCH(mstate, scratch_size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (subr == DIF_SUBR_COPYIN) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 
 		mstate->dtms_scratch_ptr += scratch_size;
 		regs[rd] = dest;
 		break;
 	}
 
 	case DIF_SUBR_COPYINTO: {
 		uint64_t size = tupregs[1].dttk_value;
 		uintptr_t dest = tupregs[2].dttk_value;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 		if (!dtrace_inscratch(dest, size, mstate)) {
 			*flags |= CPU_DTRACE_BADADDR;
 			*illval = regs[rd];
 			break;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 	}
 
 	case DIF_SUBR_COPYINSTR: {
 		uintptr_t dest = mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 
 		if (nargs > 1 && tupregs[1].dttk_value < size)
 			size = tupregs[1].dttk_value + 1;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		((char *)dest)[size - 1] = '\0';
 		mstate->dtms_scratch_ptr += size;
 		regs[rd] = dest;
 		break;
 	}
 
 #ifdef illumos
 	case DIF_SUBR_MSGSIZE:
 	case DIF_SUBR_MSGDSIZE: {
 		uintptr_t baddr = tupregs[0].dttk_value, daddr;
 		uintptr_t wptr, rptr;
 		size_t count = 0;
 		int cont = 0;
 
 		while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) {
 
 			if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
 			    vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			wptr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_wptr));
 
 			rptr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_rptr));
 
 			if (wptr < rptr) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = tupregs[0].dttk_value;
 				break;
 			}
 
 			daddr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_datap));
 
 			baddr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_cont));
 
 			/*
 			 * We want to prevent against denial-of-service here,
 			 * so we're only going to search the list for
 			 * dtrace_msgdsize_max mblks.
 			 */
 			if (cont++ > dtrace_msgdsize_max) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 
 			if (subr == DIF_SUBR_MSGDSIZE) {
 				if (dtrace_load8(daddr +
 				    offsetof(dblk_t, db_type)) != M_DATA)
 					continue;
 			}
 
 			count += wptr - rptr;
 		}
 
 		if (!(*flags & CPU_DTRACE_FAULT))
 			regs[rd] = count;
 
 		break;
 	}
 #endif
 
 	case DIF_SUBR_PROGENYOF: {
 		pid_t pid = tupregs[0].dttk_value;
 		proc_t *p;
 		int rval = 0;
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 
 		for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
 #ifdef illumos
 			if (p->p_pidp->pid_id == pid) {
 #else
 			if (p->p_pid == pid) {
 #endif
 				rval = 1;
 				break;
 			}
 		}
 
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		regs[rd] = rval;
 		break;
 	}
 
 	case DIF_SUBR_SPECULATION:
 		regs[rd] = dtrace_speculation(state);
 		break;
 
 	case DIF_SUBR_COPYOUT: {
 		uintptr_t kaddr = tupregs[0].dttk_value;
 		uintptr_t uaddr = tupregs[1].dttk_value;
 		uint64_t size = tupregs[2].dttk_value;
 
 		if (!dtrace_destructive_disallow &&
 		    dtrace_priv_proc_control(state) &&
 		    !dtrace_istoxic(kaddr, size) &&
 		    dtrace_canload(kaddr, size, mstate, vstate)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyout(kaddr, uaddr, size, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 		break;
 	}
 
 	case DIF_SUBR_COPYOUTSTR: {
 		uintptr_t kaddr = tupregs[0].dttk_value;
 		uintptr_t uaddr = tupregs[1].dttk_value;
 		uint64_t size = tupregs[2].dttk_value;
 		size_t lim;
 
 		if (!dtrace_destructive_disallow &&
 		    dtrace_priv_proc_control(state) &&
 		    !dtrace_istoxic(kaddr, size) &&
 		    dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyoutstr(kaddr, uaddr, lim, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 		break;
 	}
 
 	case DIF_SUBR_STRLEN: {
 		size_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
 		size_t lim;
 
 		if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		regs[rd] = dtrace_strlen((char *)addr, lim);
 		break;
 	}
 
 	case DIF_SUBR_STRCHR:
 	case DIF_SUBR_STRRCHR: {
 		/*
 		 * We're going to iterate over the string looking for the
 		 * specified character.  We will iterate until we have reached
 		 * the string length or we have found the character.  If this
 		 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
 		 * of the specified character instead of the first.
 		 */
 		uintptr_t addr = tupregs[0].dttk_value;
 		uintptr_t addr_limit;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t lim;
 		char c, target = (char)tupregs[1].dttk_value;
 
 		if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		addr_limit = addr + lim;
 
 		for (regs[rd] = 0; addr < addr_limit; addr++) {
 			if ((c = dtrace_load8(addr)) == target) {
 				regs[rd] = addr;
 
 				if (subr == DIF_SUBR_STRCHR)
 					break;
 			}
 
 			if (c == '\0')
 				break;
 		}
 		break;
 	}
 
 	case DIF_SUBR_STRSTR:
 	case DIF_SUBR_INDEX:
 	case DIF_SUBR_RINDEX: {
 		/*
 		 * We're going to iterate over the string looking for the
 		 * specified string.  We will iterate until we have reached
 		 * the string length or we have found the string.  (Yes, this
 		 * is done in the most naive way possible -- but considering
 		 * that the string we're searching for is likely to be
 		 * relatively short, the complexity of Rabin-Karp or similar
 		 * hardly seems merited.)
 		 */
 		char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
 		char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t len = dtrace_strlen(addr, size);
 		size_t sublen = dtrace_strlen(substr, size);
 		char *limit = addr + len, *orig = addr;
 		int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
 		int inc = 1;
 
 		regs[rd] = notfound;
 
 		if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
 		    vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * strstr() and index()/rindex() have similar semantics if
 		 * both strings are the empty string: strstr() returns a
 		 * pointer to the (empty) string, and index() and rindex()
 		 * both return index 0 (regardless of any position argument).
 		 */
 		if (sublen == 0 && len == 0) {
 			if (subr == DIF_SUBR_STRSTR)
 				regs[rd] = (uintptr_t)addr;
 			else
 				regs[rd] = 0;
 			break;
 		}
 
 		if (subr != DIF_SUBR_STRSTR) {
 			if (subr == DIF_SUBR_RINDEX) {
 				limit = orig - 1;
 				addr += len;
 				inc = -1;
 			}
 
 			/*
 			 * Both index() and rindex() take an optional position
 			 * argument that denotes the starting position.
 			 */
 			if (nargs == 3) {
 				int64_t pos = (int64_t)tupregs[2].dttk_value;
 
 				/*
 				 * If the position argument to index() is
 				 * negative, Perl implicitly clamps it at
 				 * zero.  This semantic is a little surprising
 				 * given the special meaning of negative
 				 * positions to similar Perl functions like
 				 * substr(), but it appears to reflect a
 				 * notion that index() can start from a
 				 * negative index and increment its way up to
 				 * the string.  Given this notion, Perl's
 				 * rindex() is at least self-consistent in
 				 * that it implicitly clamps positions greater
 				 * than the string length to be the string
 				 * length.  Where Perl completely loses
 				 * coherence, however, is when the specified
 				 * substring is the empty string ("").  In
 				 * this case, even if the position is
 				 * negative, rindex() returns 0 -- and even if
 				 * the position is greater than the length,
 				 * index() returns the string length.  These
 				 * semantics violate the notion that index()
 				 * should never return a value less than the
 				 * specified position and that rindex() should
 				 * never return a value greater than the
 				 * specified position.  (One assumes that
 				 * these semantics are artifacts of Perl's
 				 * implementation and not the results of
 				 * deliberate design -- it beggars belief that
 				 * even Larry Wall could desire such oddness.)
 				 * While in the abstract one would wish for
 				 * consistent position semantics across
 				 * substr(), index() and rindex() -- or at the
 				 * very least self-consistent position
 				 * semantics for index() and rindex() -- we
 				 * instead opt to keep with the extant Perl
 				 * semantics, in all their broken glory.  (Do
 				 * we have more desire to maintain Perl's
 				 * semantics than Perl does?  Probably.)
 				 */
 				if (subr == DIF_SUBR_RINDEX) {
 					if (pos < 0) {
 						if (sublen == 0)
 							regs[rd] = 0;
 						break;
 					}
 
 					if (pos > len)
 						pos = len;
 				} else {
 					if (pos < 0)
 						pos = 0;
 
 					if (pos >= len) {
 						if (sublen == 0)
 							regs[rd] = len;
 						break;
 					}
 				}
 
 				addr = orig + pos;
 			}
 		}
 
 		for (regs[rd] = notfound; addr != limit; addr += inc) {
 			if (dtrace_strncmp(addr, substr, sublen) == 0) {
 				if (subr != DIF_SUBR_STRSTR) {
 					/*
 					 * As D index() and rindex() are
 					 * modeled on Perl (and not on awk),
 					 * we return a zero-based (and not a
 					 * one-based) index.  (For you Perl
 					 * weenies: no, we're not going to add
 					 * $[ -- and shouldn't you be at a con
 					 * or something?)
 					 */
 					regs[rd] = (uintptr_t)(addr - orig);
 					break;
 				}
 
 				ASSERT(subr == DIF_SUBR_STRSTR);
 				regs[rd] = (uintptr_t)addr;
 				break;
 			}
 		}
 
 		break;
 	}
 
 	case DIF_SUBR_STRTOK: {
 		uintptr_t addr = tupregs[0].dttk_value;
 		uintptr_t tokaddr = tupregs[1].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t limit, toklimit;
 		size_t clim;
 		uint8_t c = 0, tokmap[32];	 /* 256 / 8 */
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		int i;
 
 		/*
 		 * Check both the token buffer and (later) the input buffer,
 		 * since both could be non-scratch addresses.
 		 */
 		if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		toklimit = tokaddr + clim;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (addr == 0) {
 			/*
 			 * If the address specified is NULL, we use our saved
 			 * strtok pointer from the mstate.  Note that this
 			 * means that the saved strtok pointer is _only_
 			 * valid within multiple enablings of the same probe --
 			 * it behaves like an implicit clause-local variable.
 			 */
 			addr = mstate->dtms_strtok;
 			limit = mstate->dtms_strtok_limit;
 		} else {
 			/*
 			 * If the user-specified address is non-NULL we must
 			 * access check it.  This is the only time we have
 			 * a chance to do so, since this address may reside
 			 * in the string table of this clause-- future calls
 			 * (when we fetch addr from mstate->dtms_strtok)
 			 * would fail this access check.
 			 */
 			if (!dtrace_strcanload(addr, size, &clim, mstate,
 			    vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 			limit = addr + clim;
 		}
 
 		/*
 		 * First, zero the token map, and then process the token
 		 * string -- setting a bit in the map for every character
 		 * found in the token string.
 		 */
 		for (i = 0; i < sizeof (tokmap); i++)
 			tokmap[i] = 0;
 
 		for (; tokaddr < toklimit; tokaddr++) {
 			if ((c = dtrace_load8(tokaddr)) == '\0')
 				break;
 
 			ASSERT((c >> 3) < sizeof (tokmap));
 			tokmap[c >> 3] |= (1 << (c & 0x7));
 		}
 
 		for (; addr < limit; addr++) {
 			/*
 			 * We're looking for a character that is _not_
 			 * contained in the token string.
 			 */
 			if ((c = dtrace_load8(addr)) == '\0')
 				break;
 
 			if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
 				break;
 		}
 
 		if (c == '\0') {
 			/*
 			 * We reached the end of the string without finding
 			 * any character that was not in the token string.
 			 * We return NULL in this case, and we set the saved
 			 * address to NULL as well.
 			 */
 			regs[rd] = 0;
 			mstate->dtms_strtok = 0;
 			mstate->dtms_strtok_limit = 0;
 			break;
 		}
 
 		/*
 		 * From here on, we're copying into the destination string.
 		 */
 		for (i = 0; addr < limit && i < size - 1; addr++) {
 			if ((c = dtrace_load8(addr)) == '\0')
 				break;
 
 			if (tokmap[c >> 3] & (1 << (c & 0x7)))
 				break;
 
 			ASSERT(i < size);
 			dest[i++] = c;
 		}
 
 		ASSERT(i < size);
 		dest[i] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		mstate->dtms_strtok = addr;
 		mstate->dtms_strtok_limit = limit;
 		break;
 	}
 
 	case DIF_SUBR_SUBSTR: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *d = (char *)mstate->dtms_scratch_ptr;
 		int64_t index = (int64_t)tupregs[1].dttk_value;
 		int64_t remaining = (int64_t)tupregs[2].dttk_value;
 		size_t len = dtrace_strlen((char *)s, size);
 		int64_t i;
 
 		if (!dtrace_canload(s, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (nargs <= 2)
 			remaining = (int64_t)size;
 
 		if (index < 0) {
 			index += len;
 
 			if (index < 0 && index + remaining > 0) {
 				remaining += index;
 				index = 0;
 			}
 		}
 
 		if (index >= len || index < 0) {
 			remaining = 0;
 		} else if (remaining < 0) {
 			remaining += len - index;
 		} else if (index + remaining > size) {
 			remaining = size - index;
 		}
 
 		for (i = 0; i < remaining; i++) {
 			if ((d[i] = dtrace_load8(s + index + i)) == '\0')
 				break;
 		}
 
 		d[i] = '\0';
 
 		mstate->dtms_scratch_ptr += size;
 		regs[rd] = (uintptr_t)d;
 		break;
 	}
 
 	case DIF_SUBR_JSON: {
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t json = tupregs[0].dttk_value;
 		size_t jsonlen = dtrace_strlen((char *)json, size);
 		uintptr_t elem = tupregs[1].dttk_value;
 		size_t elemlen = dtrace_strlen((char *)elem, size);
 
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1;
 		char *ee = elemlist;
 		int nelems = 1;
 		uintptr_t cur;
 
 		if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) ||
 		    !dtrace_canload(elem, elemlen + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * Read the element selector and split it up into a packed list
 		 * of strings.
 		 */
 		for (cur = elem; cur < elem + elemlen; cur++) {
 			char cc = dtrace_load8(cur);
 
 			if (cur == elem && cc == '[') {
 				/*
 				 * If the first element selector key is
 				 * actually an array index then ignore the
 				 * bracket.
 				 */
 				continue;
 			}
 
 			if (cc == ']')
 				continue;
 
 			if (cc == '.' || cc == '[') {
 				nelems++;
 				cc = '\0';
 			}
 
 			*ee++ = cc;
 		}
 		*ee++ = '\0';
 
 		if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist,
 		    nelems, dest)) != 0)
 			mstate->dtms_scratch_ptr += jsonlen + 1;
 		break;
 	}
 
 	case DIF_SUBR_TOUPPER:
 	case DIF_SUBR_TOLOWER: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *dest = (char *)mstate->dtms_scratch_ptr, c;
 		size_t len = dtrace_strlen((char *)s, size);
 		char lower, upper, convert;
 		int64_t i;
 
 		if (subr == DIF_SUBR_TOUPPER) {
 			lower = 'a';
 			upper = 'z';
 			convert = 'A';
 		} else {
 			lower = 'A';
 			upper = 'Z';
 			convert = 'a';
 		}
 
 		if (!dtrace_canload(s, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (i = 0; i < size - 1; i++) {
 			if ((c = dtrace_load8(s + i)) == '\0')
 				break;
 
 			if (c >= lower && c <= upper)
 				c = convert + (c - lower);
 
 			dest[i] = c;
 		}
 
 		ASSERT(i < size);
 		dest[i] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 #ifdef illumos
 	case DIF_SUBR_GETMAJOR:
 #ifdef _LP64
 		regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
 #else
 		regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
 #endif
 		break;
 
 	case DIF_SUBR_GETMINOR:
 #ifdef _LP64
 		regs[rd] = tupregs[0].dttk_value & MAXMIN64;
 #else
 		regs[rd] = tupregs[0].dttk_value & MAXMIN;
 #endif
 		break;
 
 	case DIF_SUBR_DDI_PATHNAME: {
 		/*
 		 * This one is a galactic mess.  We are going to roughly
 		 * emulate ddi_pathname(), but it's made more complicated
 		 * by the fact that we (a) want to include the minor name and
 		 * (b) must proceed iteratively instead of recursively.
 		 */
 		uintptr_t dest = mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *start = (char *)dest, *end = start + size - 1;
 		uintptr_t daddr = tupregs[0].dttk_value;
 		int64_t minor = (int64_t)tupregs[1].dttk_value;
 		char *s;
 		int i, len, depth = 0;
 
 		/*
 		 * Due to all the pointer jumping we do and context we must
 		 * rely upon, we just mandate that the user must have kernel
 		 * read privileges to use this routine.
 		 */
 		if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
 			*flags |= CPU_DTRACE_KPRIV;
 			*illval = daddr;
 			regs[rd] = 0;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		*end = '\0';
 
 		/*
 		 * We want to have a name for the minor.  In order to do this,
 		 * we need to walk the minor list from the devinfo.  We want
 		 * to be sure that we don't infinitely walk a circular list,
 		 * so we check for circularity by sending a scout pointer
 		 * ahead two elements for every element that we iterate over;
 		 * if the list is circular, these will ultimately point to the
 		 * same element.  You may recognize this little trick as the
 		 * answer to a stupid interview question -- one that always
 		 * seems to be asked by those who had to have it laboriously
 		 * explained to them, and who can't even concisely describe
 		 * the conditions under which one would be forced to resort to
 		 * this technique.  Needless to say, those conditions are
 		 * found here -- and probably only here.  Is this the only use
 		 * of this infamous trick in shipping, production code?  If it
 		 * isn't, it probably should be...
 		 */
 		if (minor != -1) {
 			uintptr_t maddr = dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_minor));
 
 			uintptr_t next = offsetof(struct ddi_minor_data, next);
 			uintptr_t name = offsetof(struct ddi_minor_data,
 			    d_minor) + offsetof(struct ddi_minor, name);
 			uintptr_t dev = offsetof(struct ddi_minor_data,
 			    d_minor) + offsetof(struct ddi_minor, dev);
 			uintptr_t scout;
 
 			if (maddr != NULL)
 				scout = dtrace_loadptr(maddr + next);
 
 			while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
 				uint64_t m;
 #ifdef _LP64
 				m = dtrace_load64(maddr + dev) & MAXMIN64;
 #else
 				m = dtrace_load32(maddr + dev) & MAXMIN;
 #endif
 				if (m != minor) {
 					maddr = dtrace_loadptr(maddr + next);
 
 					if (scout == NULL)
 						continue;
 
 					scout = dtrace_loadptr(scout + next);
 
 					if (scout == NULL)
 						continue;
 
 					scout = dtrace_loadptr(scout + next);
 
 					if (scout == NULL)
 						continue;
 
 					if (scout == maddr) {
 						*flags |= CPU_DTRACE_ILLOP;
 						break;
 					}
 
 					continue;
 				}
 
 				/*
 				 * We have the minor data.  Now we need to
 				 * copy the minor's name into the end of the
 				 * pathname.
 				 */
 				s = (char *)dtrace_loadptr(maddr + name);
 				len = dtrace_strlen(s, size);
 
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 
 				if (len != 0) {
 					if ((end -= (len + 1)) < start)
 						break;
 
 					*end = ':';
 				}
 
 				for (i = 1; i <= len; i++)
 					end[i] = dtrace_load8((uintptr_t)s++);
 				break;
 			}
 		}
 
 		while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
 			ddi_node_state_t devi_state;
 
 			devi_state = dtrace_load32(daddr +
 			    offsetof(struct dev_info, devi_node_state));
 
 			if (*flags & CPU_DTRACE_FAULT)
 				break;
 
 			if (devi_state >= DS_INITIALIZED) {
 				s = (char *)dtrace_loadptr(daddr +
 				    offsetof(struct dev_info, devi_addr));
 				len = dtrace_strlen(s, size);
 
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 
 				if (len != 0) {
 					if ((end -= (len + 1)) < start)
 						break;
 
 					*end = '@';
 				}
 
 				for (i = 1; i <= len; i++)
 					end[i] = dtrace_load8((uintptr_t)s++);
 			}
 
 			/*
 			 * Now for the node name...
 			 */
 			s = (char *)dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_node_name));
 
 			daddr = dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_parent));
 
 			/*
 			 * If our parent is NULL (that is, if we're the root
 			 * node), we're going to use the special path
 			 * "devices".
 			 */
 			if (daddr == 0)
 				s = "devices";
 
 			len = dtrace_strlen(s, size);
 			if (*flags & CPU_DTRACE_FAULT)
 				break;
 
 			if ((end -= (len + 1)) < start)
 				break;
 
 			for (i = 1; i <= len; i++)
 				end[i] = dtrace_load8((uintptr_t)s++);
 			*end = '/';
 
 			if (depth++ > dtrace_devdepth_max) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		if (end < start)
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 
 		if (daddr == 0) {
 			regs[rd] = (uintptr_t)end;
 			mstate->dtms_scratch_ptr += size;
 		}
 
 		break;
 	}
 #endif
 
 	case DIF_SUBR_STRJOIN: {
 		char *d = (char *)mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t s1 = tupregs[0].dttk_value;
 		uintptr_t s2 = tupregs[1].dttk_value;
 		int i = 0, j = 0;
 		size_t lim1, lim2;
 		char c;
 
 		if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
 		    !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (;;) {
 			if (i >= size) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
 			if ((d[i++] = c) == '\0') {
 				i--;
 				break;
 			}
 		}
 
 		for (;;) {
 			if (i >= size) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 
 			c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
 			if ((d[i++] = c) == '\0')
 				break;
 		}
 
 		if (i < size) {
 			mstate->dtms_scratch_ptr += i;
 			regs[rd] = (uintptr_t)d;
 		}
 
 		break;
 	}
 
 	case DIF_SUBR_STRTOLL: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t lim;
 		int base = 10;
 
 		if (nargs > 1) {
 			if ((base = tupregs[1].dttk_value) <= 1 ||
 			    base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) {
 			regs[rd] = INT64_MIN;
 			break;
 		}
 
 		regs[rd] = dtrace_strtoll((char *)s, base, lim);
 		break;
 	}
 
 	case DIF_SUBR_LLTOSTR: {
 		int64_t i = (int64_t)tupregs[0].dttk_value;
 		uint64_t val, digit;
 		uint64_t size = 65;	/* enough room for 2^64 in binary */
 		char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
 		int base = 10;
 
 		if (nargs > 1) {
 			if ((base = tupregs[1].dttk_value) <= 1 ||
 			    base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		val = (base == 10 && i < 0) ? i * -1 : i;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (*end-- = '\0'; val; val /= base) {
 			if ((digit = val % base) <= '9' - '0') {
 				*end-- = '0' + digit;
 			} else {
 				*end-- = 'a' + (digit - ('9' - '0') - 1);
 			}
 		}
 
 		if (i == 0 && base == 16)
 			*end-- = '0';
 
 		if (base == 16)
 			*end-- = 'x';
 
 		if (i == 0 || base == 8 || base == 16)
 			*end-- = '0';
 
 		if (i < 0 && base == 10)
 			*end-- = '-';
 
 		regs[rd] = (uintptr_t)end + 1;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_HTONS:
 	case DIF_SUBR_NTOHS:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint16_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_HTONL:
 	case DIF_SUBR_NTOHL:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint32_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_HTONLL:
 	case DIF_SUBR_NTOHLL:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint64_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_DIRNAME:
 	case DIF_SUBR_BASENAME: {
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t src = tupregs[0].dttk_value;
 		int i, j, len = dtrace_strlen((char *)src, size);
 		int lastbase = -1, firstbase = -1, lastdir = -1;
 		int start, end;
 
 		if (!dtrace_canload(src, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * The basename and dirname for a zero-length string is
 		 * defined to be "."
 		 */
 		if (len == 0) {
 			len = 1;
 			src = (uintptr_t)".";
 		}
 
 		/*
 		 * Start from the back of the string, moving back toward the
 		 * front until we see a character that isn't a slash.  That
 		 * character is the last character in the basename.
 		 */
 		for (i = len - 1; i >= 0; i--) {
 			if (dtrace_load8(src + i) != '/')
 				break;
 		}
 
 		if (i >= 0)
 			lastbase = i;
 
 		/*
 		 * Starting from the last character in the basename, move
 		 * towards the front until we find a slash.  The character
 		 * that we processed immediately before that is the first
 		 * character in the basename.
 		 */
 		for (; i >= 0; i--) {
 			if (dtrace_load8(src + i) == '/')
 				break;
 		}
 
 		if (i >= 0)
 			firstbase = i + 1;
 
 		/*
 		 * Now keep going until we find a non-slash character.  That
 		 * character is the last character in the dirname.
 		 */
 		for (; i >= 0; i--) {
 			if (dtrace_load8(src + i) != '/')
 				break;
 		}
 
 		if (i >= 0)
 			lastdir = i;
 
 		ASSERT(!(lastbase == -1 && firstbase != -1));
 		ASSERT(!(firstbase == -1 && lastdir != -1));
 
 		if (lastbase == -1) {
 			/*
 			 * We didn't find a non-slash character.  We know that
 			 * the length is non-zero, so the whole string must be
 			 * slashes.  In either the dirname or the basename
 			 * case, we return '/'.
 			 */
 			ASSERT(firstbase == -1);
 			firstbase = lastbase = lastdir = 0;
 		}
 
 		if (firstbase == -1) {
 			/*
 			 * The entire string consists only of a basename
 			 * component.  If we're looking for dirname, we need
 			 * to change our string to be just "."; if we're
 			 * looking for a basename, we'll just set the first
 			 * character of the basename to be 0.
 			 */
 			if (subr == DIF_SUBR_DIRNAME) {
 				ASSERT(lastdir == -1);
 				src = (uintptr_t)".";
 				lastdir = 0;
 			} else {
 				firstbase = 0;
 			}
 		}
 
 		if (subr == DIF_SUBR_DIRNAME) {
 			if (lastdir == -1) {
 				/*
 				 * We know that we have a slash in the name --
 				 * or lastdir would be set to 0, above.  And
 				 * because lastdir is -1, we know that this
 				 * slash must be the first character.  (That
 				 * is, the full string must be of the form
 				 * "/basename".)  In this case, the last
 				 * character of the directory name is 0.
 				 */
 				lastdir = 0;
 			}
 
 			start = 0;
 			end = lastdir;
 		} else {
 			ASSERT(subr == DIF_SUBR_BASENAME);
 			ASSERT(firstbase != -1 && lastbase != -1);
 			start = firstbase;
 			end = lastbase;
 		}
 
 		for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
 			dest[j] = dtrace_load8(src + i);
 
 		dest[j] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_GETF: {
 		uintptr_t fd = tupregs[0].dttk_value;
 		struct filedesc *fdp;
 		file_t *fp;
 
 		if (!dtrace_priv_proc(state)) {
 			regs[rd] = 0;
 			break;
 		}
 		fdp = curproc->p_fd;
 		FILEDESC_SLOCK(fdp);
 		fp = fget_locked(fdp, fd);
 		mstate->dtms_getf = fp;
 		regs[rd] = (uintptr_t)fp;
 		FILEDESC_SUNLOCK(fdp);
 		break;
 	}
 
 	case DIF_SUBR_CLEANPATH: {
 		char *dest = (char *)mstate->dtms_scratch_ptr, c;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t src = tupregs[0].dttk_value;
 		size_t lim;
 		int i = 0, j = 0;
 #ifdef illumos
 		zone_t *z;
 #endif
 
 		if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * Move forward, loading each character.
 		 */
 		do {
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 next:
 			if (j + 5 >= size)	/* 5 = strlen("/..c\0") */
 				break;
 
 			if (c != '/') {
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c == '/') {
 				/*
 				 * We have two slashes -- we can just advance
 				 * to the next character.
 				 */
 				goto next;
 			}
 
 			if (c != '.') {
 				/*
 				 * This is not "." and it's not ".." -- we can
 				 * just store the "/" and this character and
 				 * drive on.
 				 */
 				dest[j++] = '/';
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c == '/') {
 				/*
 				 * This is a "/./" component.  We're not going
 				 * to store anything in the destination buffer;
 				 * we're just going to go to the next component.
 				 */
 				goto next;
 			}
 
 			if (c != '.') {
 				/*
 				 * This is not ".." -- we can just store the
 				 * "/." and this character and continue
 				 * processing.
 				 */
 				dest[j++] = '/';
 				dest[j++] = '.';
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c != '/' && c != '\0') {
 				/*
 				 * This is not ".." -- it's "..[mumble]".
 				 * We'll store the "/.." and this character
 				 * and continue processing.
 				 */
 				dest[j++] = '/';
 				dest[j++] = '.';
 				dest[j++] = '.';
 				dest[j++] = c;
 				continue;
 			}
 
 			/*
 			 * This is "/../" or "/..\0".  We need to back up
 			 * our destination pointer until we find a "/".
 			 */
 			i--;
 			while (j != 0 && dest[--j] != '/')
 				continue;
 
 			if (c == '\0')
 				dest[++j] = '/';
 		} while (c != '\0');
 
 		dest[j] = '\0';
 
 #ifdef illumos
 		if (mstate->dtms_getf != NULL &&
 		    !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
 		    (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
 			/*
 			 * If we've done a getf() as a part of this ECB and we
 			 * don't have kernel access (and we're not in the global
 			 * zone), check if the path we cleaned up begins with
 			 * the zone's root path, and trim it off if so.  Note
 			 * that this is an output cleanliness issue, not a
 			 * security issue: knowing one's zone root path does
 			 * not enable privilege escalation.
 			 */
 			if (strstr(dest, z->zone_rootpath) == dest)
 				dest += strlen(z->zone_rootpath) - 1;
 		}
 #endif
 
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_INET_NTOA:
 	case DIF_SUBR_INET_NTOA6:
 	case DIF_SUBR_INET_NTOP: {
 		size_t size;
 		int af, argi, i;
 		char *base, *end;
 
 		if (subr == DIF_SUBR_INET_NTOP) {
 			af = (int)tupregs[0].dttk_value;
 			argi = 1;
 		} else {
 			af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
 			argi = 0;
 		}
 
 		if (af == AF_INET) {
 			ipaddr_t ip4;
 			uint8_t *ptr8, val;
 
 			if (!dtrace_canload(tupregs[argi].dttk_value,
 			    sizeof (ipaddr_t), mstate, vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			/*
 			 * Safely load the IPv4 address.
 			 */
 			ip4 = dtrace_load32(tupregs[argi].dttk_value);
 
 			/*
 			 * Check an IPv4 string will fit in scratch.
 			 */
 			size = INET_ADDRSTRLEN;
 			if (!DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			base = (char *)mstate->dtms_scratch_ptr;
 			end = (char *)mstate->dtms_scratch_ptr + size - 1;
 
 			/*
 			 * Stringify as a dotted decimal quad.
 			 */
 			*end-- = '\0';
 			ptr8 = (uint8_t *)&ip4;
 			for (i = 3; i >= 0; i--) {
 				val = ptr8[i];
 
 				if (val == 0) {
 					*end-- = '0';
 				} else {
 					for (; val; val /= 10) {
 						*end-- = '0' + (val % 10);
 					}
 				}
 
 				if (i > 0)
 					*end-- = '.';
 			}
 			ASSERT(end + 1 >= base);
 
 		} else if (af == AF_INET6) {
 			struct in6_addr ip6;
 			int firstzero, tryzero, numzero, v6end;
 			uint16_t val;
 			const char digits[] = "0123456789abcdef";
 
 			/*
 			 * Stringify using RFC 1884 convention 2 - 16 bit
 			 * hexadecimal values with a zero-run compression.
 			 * Lower case hexadecimal digits are used.
 			 * 	eg, fe80::214:4fff:fe0b:76c8.
 			 * The IPv4 embedded form is returned for inet_ntop,
 			 * just the IPv4 string is returned for inet_ntoa6.
 			 */
 
 			if (!dtrace_canload(tupregs[argi].dttk_value,
 			    sizeof (struct in6_addr), mstate, vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			/*
 			 * Safely load the IPv6 address.
 			 */
 			dtrace_bcopy(
 			    (void *)(uintptr_t)tupregs[argi].dttk_value,
 			    (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
 
 			/*
 			 * Check an IPv6 string will fit in scratch.
 			 */
 			size = INET6_ADDRSTRLEN;
 			if (!DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			base = (char *)mstate->dtms_scratch_ptr;
 			end = (char *)mstate->dtms_scratch_ptr + size - 1;
 			*end-- = '\0';
 
 			/*
 			 * Find the longest run of 16 bit zero values
 			 * for the single allowed zero compression - "::".
 			 */
 			firstzero = -1;
 			tryzero = -1;
 			numzero = 1;
 			for (i = 0; i < sizeof (struct in6_addr); i++) {
 #ifdef illumos
 				if (ip6._S6_un._S6_u8[i] == 0 &&
 #else
 				if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
 #endif
 				    tryzero == -1 && i % 2 == 0) {
 					tryzero = i;
 					continue;
 				}
 
 				if (tryzero != -1 &&
 #ifdef illumos
 				    (ip6._S6_un._S6_u8[i] != 0 ||
 #else
 				    (ip6.__u6_addr.__u6_addr8[i] != 0 ||
 #endif
 				    i == sizeof (struct in6_addr) - 1)) {
 
 					if (i - tryzero <= numzero) {
 						tryzero = -1;
 						continue;
 					}
 
 					firstzero = tryzero;
 					numzero = i - i % 2 - tryzero;
 					tryzero = -1;
 
 #ifdef illumos
 					if (ip6._S6_un._S6_u8[i] == 0 &&
 #else
 					if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
 #endif
 					    i == sizeof (struct in6_addr) - 1)
 						numzero += 2;
 				}
 			}
 			ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
 
 			/*
 			 * Check for an IPv4 embedded address.
 			 */
 			v6end = sizeof (struct in6_addr) - 2;
 			if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
 			    IN6_IS_ADDR_V4COMPAT(&ip6)) {
 				for (i = sizeof (struct in6_addr) - 1;
 				    i >= DTRACE_V4MAPPED_OFFSET; i--) {
 					ASSERT(end >= base);
 
 #ifdef illumos
 					val = ip6._S6_un._S6_u8[i];
 #else
 					val = ip6.__u6_addr.__u6_addr8[i];
 #endif
 
 					if (val == 0) {
 						*end-- = '0';
 					} else {
 						for (; val; val /= 10) {
 							*end-- = '0' + val % 10;
 						}
 					}
 
 					if (i > DTRACE_V4MAPPED_OFFSET)
 						*end-- = '.';
 				}
 
 				if (subr == DIF_SUBR_INET_NTOA6)
 					goto inetout;
 
 				/*
 				 * Set v6end to skip the IPv4 address that
 				 * we have already stringified.
 				 */
 				v6end = 10;
 			}
 
 			/*
 			 * Build the IPv6 string by working through the
 			 * address in reverse.
 			 */
 			for (i = v6end; i >= 0; i -= 2) {
 				ASSERT(end >= base);
 
 				if (i == firstzero + numzero - 2) {
 					*end-- = ':';
 					*end-- = ':';
 					i -= numzero - 2;
 					continue;
 				}
 
 				if (i < 14 && i != firstzero - 2)
 					*end-- = ':';
 
 #ifdef illumos
 				val = (ip6._S6_un._S6_u8[i] << 8) +
 				    ip6._S6_un._S6_u8[i + 1];
 #else
 				val = (ip6.__u6_addr.__u6_addr8[i] << 8) +
 				    ip6.__u6_addr.__u6_addr8[i + 1];
 #endif
 
 				if (val == 0) {
 					*end-- = '0';
 				} else {
 					for (; val; val /= 16) {
 						*end-- = digits[val % 16];
 					}
 				}
 			}
 			ASSERT(end + 1 >= base);
 
 		} else {
 			/*
 			 * The user didn't use AH_INET or AH_INET6.
 			 */
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 			regs[rd] = 0;
 			break;
 		}
 
 inetout:	regs[rd] = (uintptr_t)end + 1;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_MEMREF: {
 		uintptr_t size = 2 * sizeof(uintptr_t);
 		uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t));
 		size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size;
 
 		/* address and length */
 		memref[0] = tupregs[0].dttk_value;
 		memref[1] = tupregs[1].dttk_value;
 
 		regs[rd] = (uintptr_t) memref;
 		mstate->dtms_scratch_ptr += scratch_size;
 		break;
 	}
 
 #ifndef illumos
 	case DIF_SUBR_MEMSTR: {
 		char *str = (char *)mstate->dtms_scratch_ptr;
 		uintptr_t mem = tupregs[0].dttk_value;
 		char c = tupregs[1].dttk_value;
 		size_t size = tupregs[2].dttk_value;
 		uint8_t n;
 		int i;
 
 		regs[rd] = 0;
 
 		if (size == 0)
 			break;
 
 		if (!dtrace_canload(mem, size - 1, mstate, vstate))
 			break;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			break;
 		}
 
 		if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) {
 			*flags |= CPU_DTRACE_ILLOP;
 			break;
 		}
 
 		for (i = 0; i < size - 1; i++) {
 			n = dtrace_load8(mem++);
 			str[i] = (n == 0) ? c : n;
 		}
 		str[size - 1] = 0;
 
 		regs[rd] = (uintptr_t)str;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 #endif
 	}
 }
 
 /*
  * Emulate the execution of DTrace IR instructions specified by the given
  * DIF object.  This function is deliberately void of assertions as all of
  * the necessary checks are handled by a call to dtrace_difo_validate().
  */
 static uint64_t
 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate, dtrace_state_t *state)
 {
 	const dif_instr_t *text = difo->dtdo_buf;
 	const uint_t textlen = difo->dtdo_len;
 	const char *strtab = difo->dtdo_strtab;
 	const uint64_t *inttab = difo->dtdo_inttab;
 
 	uint64_t rval = 0;
 	dtrace_statvar_t *svar;
 	dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 	dtrace_difv_t *v;
 	volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 
 	dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
 	uint64_t regs[DIF_DIR_NREGS];
 	uint64_t *tmp;
 
 	uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
 	int64_t cc_r;
 	uint_t pc = 0, id, opc = 0;
 	uint8_t ttop = 0;
 	dif_instr_t instr;
 	uint_t r1, r2, rd;
 
 	/*
 	 * We stash the current DIF object into the machine state: we need it
 	 * for subsequent access checking.
 	 */
 	mstate->dtms_difo = difo;
 
 	regs[DIF_REG_R0] = 0; 		/* %r0 is fixed at zero */
 
 	while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
 		opc = pc;
 
 		instr = text[pc++];
 		r1 = DIF_INSTR_R1(instr);
 		r2 = DIF_INSTR_R2(instr);
 		rd = DIF_INSTR_RD(instr);
 
 		switch (DIF_INSTR_OP(instr)) {
 		case DIF_OP_OR:
 			regs[rd] = regs[r1] | regs[r2];
 			break;
 		case DIF_OP_XOR:
 			regs[rd] = regs[r1] ^ regs[r2];
 			break;
 		case DIF_OP_AND:
 			regs[rd] = regs[r1] & regs[r2];
 			break;
 		case DIF_OP_SLL:
 			regs[rd] = regs[r1] << regs[r2];
 			break;
 		case DIF_OP_SRL:
 			regs[rd] = regs[r1] >> regs[r2];
 			break;
 		case DIF_OP_SUB:
 			regs[rd] = regs[r1] - regs[r2];
 			break;
 		case DIF_OP_ADD:
 			regs[rd] = regs[r1] + regs[r2];
 			break;
 		case DIF_OP_MUL:
 			regs[rd] = regs[r1] * regs[r2];
 			break;
 		case DIF_OP_SDIV:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = (int64_t)regs[r1] /
 				    (int64_t)regs[r2];
 			}
 			break;
 
 		case DIF_OP_UDIV:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = regs[r1] / regs[r2];
 			}
 			break;
 
 		case DIF_OP_SREM:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = (int64_t)regs[r1] %
 				    (int64_t)regs[r2];
 			}
 			break;
 
 		case DIF_OP_UREM:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = regs[r1] % regs[r2];
 			}
 			break;
 
 		case DIF_OP_NOT:
 			regs[rd] = ~regs[r1];
 			break;
 		case DIF_OP_MOV:
 			regs[rd] = regs[r1];
 			break;
 		case DIF_OP_CMP:
 			cc_r = regs[r1] - regs[r2];
 			cc_n = cc_r < 0;
 			cc_z = cc_r == 0;
 			cc_v = 0;
 			cc_c = regs[r1] < regs[r2];
 			break;
 		case DIF_OP_TST:
 			cc_n = cc_v = cc_c = 0;
 			cc_z = regs[r1] == 0;
 			break;
 		case DIF_OP_BA:
 			pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BE:
 			if (cc_z)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BNE:
 			if (cc_z == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BG:
 			if ((cc_z | (cc_n ^ cc_v)) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGU:
 			if ((cc_c | cc_z) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGE:
 			if ((cc_n ^ cc_v) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGEU:
 			if (cc_c == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BL:
 			if (cc_n ^ cc_v)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLU:
 			if (cc_c)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLE:
 			if (cc_z | (cc_n ^ cc_v))
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLEU:
 			if (cc_c | cc_z)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_RLDSB:
 			if (!dtrace_canload(regs[r1], 1, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSB:
 			regs[rd] = (int8_t)dtrace_load8(regs[r1]);
 			break;
 		case DIF_OP_RLDSH:
 			if (!dtrace_canload(regs[r1], 2, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSH:
 			regs[rd] = (int16_t)dtrace_load16(regs[r1]);
 			break;
 		case DIF_OP_RLDSW:
 			if (!dtrace_canload(regs[r1], 4, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSW:
 			regs[rd] = (int32_t)dtrace_load32(regs[r1]);
 			break;
 		case DIF_OP_RLDUB:
 			if (!dtrace_canload(regs[r1], 1, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUB:
 			regs[rd] = dtrace_load8(regs[r1]);
 			break;
 		case DIF_OP_RLDUH:
 			if (!dtrace_canload(regs[r1], 2, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUH:
 			regs[rd] = dtrace_load16(regs[r1]);
 			break;
 		case DIF_OP_RLDUW:
 			if (!dtrace_canload(regs[r1], 4, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUW:
 			regs[rd] = dtrace_load32(regs[r1]);
 			break;
 		case DIF_OP_RLDX:
 			if (!dtrace_canload(regs[r1], 8, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDX:
 			regs[rd] = dtrace_load64(regs[r1]);
 			break;
 		case DIF_OP_ULDSB:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int8_t)
 			    dtrace_fuword8((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDSH:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int16_t)
 			    dtrace_fuword16((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDSW:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int32_t)
 			    dtrace_fuword32((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUB:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword8((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUH:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword16((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUW:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword32((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDX:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword64((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_RET:
 			rval = regs[rd];
 			pc = textlen;
 			break;
 		case DIF_OP_NOP:
 			break;
 		case DIF_OP_SETX:
 			regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
 			break;
 		case DIF_OP_SETS:
 			regs[rd] = (uint64_t)(uintptr_t)
 			    (strtab + DIF_INSTR_STRING(instr));
 			break;
 		case DIF_OP_SCMP: {
 			size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
 			uintptr_t s1 = regs[r1];
 			uintptr_t s2 = regs[r2];
 			size_t lim1, lim2;
 
 			if (s1 != 0 &&
 			    !dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
 				break;
 			if (s2 != 0 &&
 			    !dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
 				break;
 
 			cc_r = dtrace_strncmp((char *)s1, (char *)s2,
 			    MIN(lim1, lim2));
 
 			cc_n = cc_r < 0;
 			cc_z = cc_r == 0;
 			cc_v = cc_c = 0;
 			break;
 		}
 		case DIF_OP_LDGA:
 			regs[rd] = dtrace_dif_variable(mstate, state,
 			    r1, regs[r2]);
 			break;
 		case DIF_OP_LDGS:
 			id = DIF_INSTR_VAR(instr);
 
 			if (id >= DIF_VAR_OTHER_UBASE) {
 				uintptr_t a;
 
 				id -= DIF_VAR_OTHER_UBASE;
 				svar = vstate->dtvs_globals[id];
 				ASSERT(svar != NULL);
 				v = &svar->dtsv_var;
 
 				if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
 					regs[rd] = svar->dtsv_data;
 					break;
 				}
 
 				a = (uintptr_t)svar->dtsv_data;
 
 				if (*(uint8_t *)a == UINT8_MAX) {
 					/*
 					 * If the 0th byte is set to UINT8_MAX
 					 * then this is to be treated as a
 					 * reference to a NULL variable.
 					 */
 					regs[rd] = 0;
 				} else {
 					regs[rd] = a + sizeof (uint64_t);
 				}
 
 				break;
 			}
 
 			regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
 			break;
 
 		case DIF_OP_STGS:
 			id = DIF_INSTR_VAR(instr);
 
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			VERIFY(id < vstate->dtvs_nglobals);
 			svar = vstate->dtvs_globals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t lim;
 
 				ASSERT(a != 0);
 				ASSERT(svar->dtsv_size != 0);
 
 				if (regs[rd] == 0) {
 					*(uint8_t *)a = UINT8_MAX;
 					break;
 				} else {
 					*(uint8_t *)a = 0;
 					a += sizeof (uint64_t);
 				}
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    (void *)a, &v->dtdv_type, lim);
 				break;
 			}
 
 			svar->dtsv_data = regs[rd];
 			break;
 
 		case DIF_OP_LDTA:
 			/*
 			 * There are no DTrace built-in thread-local arrays at
 			 * present.  This opcode is saved for future work.
 			 */
 			*flags |= CPU_DTRACE_ILLOP;
 			regs[rd] = 0;
 			break;
 
 		case DIF_OP_LDLS:
 			id = DIF_INSTR_VAR(instr);
 
 			if (id < DIF_VAR_OTHER_UBASE) {
 				/*
 				 * For now, this has no meaning.
 				 */
 				regs[rd] = 0;
 				break;
 			}
 
 			id -= DIF_VAR_OTHER_UBASE;
 
 			ASSERT(id < vstate->dtvs_nlocals);
 			ASSERT(vstate->dtvs_locals != NULL);
 
 			svar = vstate->dtvs_locals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t sz = v->dtdv_type.dtdt_size;
 				size_t lim;
 
 				sz += sizeof (uint64_t);
 				ASSERT(svar->dtsv_size == NCPU * sz);
 				a += curcpu * sz;
 
 				if (*(uint8_t *)a == UINT8_MAX) {
 					/*
 					 * If the 0th byte is set to UINT8_MAX
 					 * then this is to be treated as a
 					 * reference to a NULL variable.
 					 */
 					regs[rd] = 0;
 				} else {
 					regs[rd] = a + sizeof (uint64_t);
 				}
 
 				break;
 			}
 
 			ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
 			tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
 			regs[rd] = tmp[curcpu];
 			break;
 
 		case DIF_OP_STLS:
 			id = DIF_INSTR_VAR(instr);
 
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			VERIFY(id < vstate->dtvs_nlocals);
 
 			ASSERT(vstate->dtvs_locals != NULL);
 			svar = vstate->dtvs_locals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t sz = v->dtdv_type.dtdt_size;
 				size_t lim;
 
 				sz += sizeof (uint64_t);
 				ASSERT(svar->dtsv_size == NCPU * sz);
 				a += curcpu * sz;
 
 				if (regs[rd] == 0) {
 					*(uint8_t *)a = UINT8_MAX;
 					break;
 				} else {
 					*(uint8_t *)a = 0;
 					a += sizeof (uint64_t);
 				}
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    (void *)a, &v->dtdv_type, lim);
 				break;
 			}
 
 			ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
 			tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
 			tmp[curcpu] = regs[rd];
 			break;
 
 		case DIF_OP_LDTS: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			v = &vstate->dtvs_tlocals[id];
 
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_value = (uint64_t)id;
 			key[0].dttk_size = 0;
 			DTRACE_TLS_THRKEY(key[1].dttk_value);
 			key[1].dttk_size = 0;
 
 			dvar = dtrace_dynvar(dstate, 2, key,
 			    sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
 			    mstate, vstate);
 
 			if (dvar == NULL) {
 				regs[rd] = 0;
 				break;
 			}
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
 			} else {
 				regs[rd] = *((uint64_t *)dvar->dtdv_data);
 			}
 
 			break;
 		}
 
 		case DIF_OP_STTS: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			VERIFY(id < vstate->dtvs_ntlocals);
 
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_value = (uint64_t)id;
 			key[0].dttk_size = 0;
 			DTRACE_TLS_THRKEY(key[1].dttk_value);
 			key[1].dttk_size = 0;
 			v = &vstate->dtvs_tlocals[id];
 
 			dvar = dtrace_dynvar(dstate, 2, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    regs[rd] ? DTRACE_DYNVAR_ALLOC :
 			    DTRACE_DYNVAR_DEALLOC, mstate, vstate);
 
 			/*
 			 * Given that we're storing to thread-local data,
 			 * we need to flush our predicate cache.
 			 */
 			curthread->t_predcache = 0;
 
 			if (dvar == NULL)
 				break;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				size_t lim;
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd],
 				    &v->dtdv_type, &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    dvar->dtdv_data, &v->dtdv_type, lim);
 			} else {
 				*((uint64_t *)dvar->dtdv_data) = regs[rd];
 			}
 
 			break;
 		}
 
 		case DIF_OP_SRA:
 			regs[rd] = (int64_t)regs[r1] >> regs[r2];
 			break;
 
 		case DIF_OP_CALL:
 			dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
 			    regs, tupregs, ttop, mstate, state);
 			break;
 
 		case DIF_OP_PUSHTR:
 			if (ttop == DIF_DTR_NREGS) {
 				*flags |= CPU_DTRACE_TUPOFLOW;
 				break;
 			}
 
 			if (r1 == DIF_TYPE_STRING) {
 				/*
 				 * If this is a string type and the size is 0,
 				 * we'll use the system-wide default string
 				 * size.  Note that we are _not_ looking at
 				 * the value of the DTRACEOPT_STRSIZE option;
 				 * had this been set, we would expect to have
 				 * a non-zero size value in the "pushtr".
 				 */
 				tupregs[ttop].dttk_size =
 				    dtrace_strlen((char *)(uintptr_t)regs[rd],
 				    regs[r2] ? regs[r2] :
 				    dtrace_strsize_default) + 1;
 			} else {
 				if (regs[r2] > LONG_MAX) {
 					*flags |= CPU_DTRACE_ILLOP;
 					break;
 				}
 
 				tupregs[ttop].dttk_size = regs[r2];
 			}
 
 			tupregs[ttop++].dttk_value = regs[rd];
 			break;
 
 		case DIF_OP_PUSHTV:
 			if (ttop == DIF_DTR_NREGS) {
 				*flags |= CPU_DTRACE_TUPOFLOW;
 				break;
 			}
 
 			tupregs[ttop].dttk_value = regs[rd];
 			tupregs[ttop++].dttk_size = 0;
 			break;
 
 		case DIF_OP_POPTS:
 			if (ttop != 0)
 				ttop--;
 			break;
 
 		case DIF_OP_FLUSHTS:
 			ttop = 0;
 			break;
 
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key = tupregs;
 			uint_t nkeys = ttop;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			key[nkeys].dttk_value = (uint64_t)id;
 			key[nkeys++].dttk_size = 0;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
 				DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
 				key[nkeys++].dttk_size = 0;
 				VERIFY(id < vstate->dtvs_ntlocals);
 				v = &vstate->dtvs_tlocals[id];
 			} else {
 				VERIFY(id < vstate->dtvs_nglobals);
 				v = &vstate->dtvs_globals[id]->dtsv_var;
 			}
 
 			dvar = dtrace_dynvar(dstate, nkeys, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    DTRACE_DYNVAR_NOALLOC, mstate, vstate);
 
 			if (dvar == NULL) {
 				regs[rd] = 0;
 				break;
 			}
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
 			} else {
 				regs[rd] = *((uint64_t *)dvar->dtdv_data);
 			}
 
 			break;
 		}
 
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key = tupregs;
 			uint_t nkeys = ttop;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			key[nkeys].dttk_value = (uint64_t)id;
 			key[nkeys++].dttk_size = 0;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
 				DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
 				key[nkeys++].dttk_size = 0;
 				VERIFY(id < vstate->dtvs_ntlocals);
 				v = &vstate->dtvs_tlocals[id];
 			} else {
 				VERIFY(id < vstate->dtvs_nglobals);
 				v = &vstate->dtvs_globals[id]->dtsv_var;
 			}
 
 			dvar = dtrace_dynvar(dstate, nkeys, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    regs[rd] ? DTRACE_DYNVAR_ALLOC :
 			    DTRACE_DYNVAR_DEALLOC, mstate, vstate);
 
 			if (dvar == NULL)
 				break;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				size_t lim;
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    dvar->dtdv_data, &v->dtdv_type, lim);
 			} else {
 				*((uint64_t *)dvar->dtdv_data) = regs[rd];
 			}
 
 			break;
 		}
 
 		case DIF_OP_ALLOCS: {
 			uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 			size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
 
 			/*
 			 * Rounding up the user allocation size could have
 			 * overflowed large, bogus allocations (like -1ULL) to
 			 * 0.
 			 */
 			if (size < regs[r1] ||
 			    !DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 
 			dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
 			mstate->dtms_scratch_ptr += size;
 			regs[rd] = ptr;
 			break;
 		}
 
 		case DIF_OP_COPYS:
 			if (!dtrace_canstore(regs[rd], regs[r2],
 			    mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 
 			if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
 				break;
 
 			dtrace_bcopy((void *)(uintptr_t)regs[r1],
 			    (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
 			break;
 
 		case DIF_OP_STB:
 			if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
 			break;
 
 		case DIF_OP_STH:
 			if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 1) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
 			break;
 
 		case DIF_OP_STW:
 			if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 3) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
 			break;
 
 		case DIF_OP_STX:
 			if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 7) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
 			break;
 		}
 	}
 
 	if (!(*flags & CPU_DTRACE_FAULT))
 		return (rval);
 
 	mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
 	mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
 
 	return (0);
 }
 
 static void
 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 	dtrace_provider_t *prov = probe->dtpr_provider;
 	char c[DTRACE_FULLNAMELEN + 80], *str;
 	char *msg = "dtrace: breakpoint action at probe ";
 	char *ecbmsg = " (ecb ";
 	uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
 	uintptr_t val = (uintptr_t)ecb;
 	int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	/*
 	 * It's impossible to be taking action on the NULL probe.
 	 */
 	ASSERT(probe != NULL);
 
 	/*
 	 * This is a poor man's (destitute man's?) sprintf():  we want to
 	 * print the provider name, module name, function name and name of
 	 * the probe, along with the hex address of the ECB with the breakpoint
 	 * action -- all of which we must place in the character buffer by
 	 * hand.
 	 */
 	while (*msg != '\0')
 		c[i++] = *msg++;
 
 	for (str = prov->dtpv_name; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_mod; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_func; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_name; *str != '\0'; str++)
 		c[i++] = *str;
 
 	while (*ecbmsg != '\0')
 		c[i++] = *ecbmsg++;
 
 	while (shift >= 0) {
 		mask = (uintptr_t)0xf << shift;
 
 		if (val >= ((uintptr_t)1 << shift))
 			c[i++] = "0123456789abcdef"[(val & mask) >> shift];
 		shift -= 4;
 	}
 
 	c[i++] = ')';
 	c[i] = '\0';
 
 #ifdef illumos
 	debug_enter(c);
 #else
 	kdb_enter(KDB_WHY_DTRACE, "breakpoint action");
 #endif
 }
 
 static void
 dtrace_action_panic(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	/*
 	 * It's impossible to be taking action on the NULL probe.
 	 */
 	ASSERT(probe != NULL);
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	if (dtrace_panicked != NULL)
 		return;
 
 	if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
 		return;
 
 	/*
 	 * We won the right to panic.  (We want to be sure that only one
 	 * thread calls panic() from dtrace_probe(), and that panic() is
 	 * called exactly once.)
 	 */
 	dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
 	    probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
 	    probe->dtpr_func, probe->dtpr_name, (void *)ecb);
 }
 
 static void
 dtrace_action_raise(uint64_t sig)
 {
 	if (dtrace_destructive_disallow)
 		return;
 
 	if (sig >= NSIG) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return;
 	}
 
 #ifdef illumos
 	/*
 	 * raise() has a queue depth of 1 -- we ignore all subsequent
 	 * invocations of the raise() action.
 	 */
 	if (curthread->t_dtrace_sig == 0)
 		curthread->t_dtrace_sig = (uint8_t)sig;
 
 	curthread->t_sig_check = 1;
 	aston(curthread);
 #else
 	struct proc *p = curproc;
 	PROC_LOCK(p);
 	kern_psignal(p, sig);
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 dtrace_action_stop(void)
 {
 	if (dtrace_destructive_disallow)
 		return;
 
 #ifdef illumos
 	if (!curthread->t_dtrace_stop) {
 		curthread->t_dtrace_stop = 1;
 		curthread->t_sig_check = 1;
 		aston(curthread);
 	}
 #else
 	struct proc *p = curproc;
 	PROC_LOCK(p);
 	kern_psignal(p, SIGSTOP);
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
 {
 	hrtime_t now;
 	volatile uint16_t *flags;
 #ifdef illumos
 	cpu_t *cpu = CPU;
 #else
 	cpu_t *cpu = &solaris_cpu[curcpu];
 #endif
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	now = dtrace_gethrtime();
 
 	if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
 		/*
 		 * We need to advance the mark to the current time.
 		 */
 		cpu->cpu_dtrace_chillmark = now;
 		cpu->cpu_dtrace_chilled = 0;
 	}
 
 	/*
 	 * Now check to see if the requested chill time would take us over
 	 * the maximum amount of time allowed in the chill interval.  (Or
 	 * worse, if the calculation itself induces overflow.)
 	 */
 	if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
 	    cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
 		*flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	while (dtrace_gethrtime() - now < val)
 		continue;
 
 	/*
 	 * Normally, we assure that the value of the variable "timestamp" does
 	 * not change within an ECB.  The presence of chill() represents an
 	 * exception to this rule, however.
 	 */
 	mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
 	cpu->cpu_dtrace_chilled += val;
 }
 
 static void
 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
     uint64_t *buf, uint64_t arg)
 {
 	int nframes = DTRACE_USTACK_NFRAMES(arg);
 	int strsize = DTRACE_USTACK_STRSIZE(arg);
 	uint64_t *pcs = &buf[1], *fps;
 	char *str = (char *)&pcs[nframes];
 	int size, offs = 0, i, j;
 	size_t rem;
 	uintptr_t old = mstate->dtms_scratch_ptr, saved;
 	uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	char *sym;
 
 	/*
 	 * Should be taking a faster path if string space has not been
 	 * allocated.
 	 */
 	ASSERT(strsize != 0);
 
 	/*
 	 * We will first allocate some temporary space for the frame pointers.
 	 */
 	fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 	size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
 	    (nframes * sizeof (uint64_t));
 
 	if (!DTRACE_INSCRATCH(mstate, size)) {
 		/*
 		 * Not enough room for our frame pointers -- need to indicate
 		 * that we ran out of scratch space.
 		 */
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return;
 	}
 
 	mstate->dtms_scratch_ptr += size;
 	saved = mstate->dtms_scratch_ptr;
 
 	/*
 	 * Now get a stack with both program counters and frame pointers.
 	 */
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 	dtrace_getufpstack(buf, fps, nframes + 1);
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 	/*
 	 * If that faulted, we're cooked.
 	 */
 	if (*flags & CPU_DTRACE_FAULT)
 		goto out;
 
 	/*
 	 * Now we want to walk up the stack, calling the USTACK helper.  For
 	 * each iteration, we restore the scratch pointer.
 	 */
 	for (i = 0; i < nframes; i++) {
 		mstate->dtms_scratch_ptr = saved;
 
 		if (offs >= strsize)
 			break;
 
 		sym = (char *)(uintptr_t)dtrace_helper(
 		    DTRACE_HELPER_ACTION_USTACK,
 		    mstate, state, pcs[i], fps[i]);
 
 		/*
 		 * If we faulted while running the helper, we're going to
 		 * clear the fault and null out the corresponding string.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			str[offs++] = '\0';
 			continue;
 		}
 
 		if (sym == NULL) {
 			str[offs++] = '\0';
 			continue;
 		}
 
 		if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate,
 		    &(state->dts_vstate))) {
 			str[offs++] = '\0';
 			continue;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 
 		/*
 		 * Now copy in the string that the helper returned to us.
 		 */
 		for (j = 0; offs + j < strsize && j < rem; j++) {
 			if ((str[offs + j] = sym[j]) == '\0')
 				break;
 		}
 
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		offs += j + 1;
 	}
 
 	if (offs >= strsize) {
 		/*
 		 * If we didn't have room for all of the strings, we don't
 		 * abort processing -- this needn't be a fatal error -- but we
 		 * still want to increment a counter (dts_stkstroverflows) to
 		 * allow this condition to be warned about.  (If this is from
 		 * a jstack() action, it is easily tuned via jstackstrsize.)
 		 */
 		dtrace_error(&state->dts_stkstroverflows);
 	}
 
 	while (offs < strsize)
 		str[offs++] = '\0';
 
 out:
 	mstate->dtms_scratch_ptr = old;
 }
 
 static void
 dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
     size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
 {
 	volatile uint16_t *flags;
 	uint64_t val = *valp;
 	size_t valoffs = *valoffsp;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
 
 	/*
 	 * If this is a string, we're going to only load until we find the zero
 	 * byte -- after which we'll store zero bytes.
 	 */
 	if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
 		char c = '\0' + 1;
 		size_t s;
 
 		for (s = 0; s < size; s++) {
 			if (c != '\0' && dtkind == DIF_TF_BYREF) {
 				c = dtrace_load8(val++);
 			} else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				c = dtrace_fuword8((void *)(uintptr_t)val++);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 			}
 
 			DTRACE_STORE(uint8_t, tomax, valoffs++, c);
 
 			if (c == '\0' && intuple)
 				break;
 		}
 	} else {
 		uint8_t c;
 		while (valoffs < end) {
 			if (dtkind == DIF_TF_BYREF) {
 				c = dtrace_load8(val++);
 			} else if (dtkind == DIF_TF_BYUREF) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				c = dtrace_fuword8((void *)(uintptr_t)val++);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 			}
 
 			DTRACE_STORE(uint8_t, tomax,
 			    valoffs++, c);
 		}
 	}
 
 	*valp = val;
 	*valoffsp = valoffs;
 }
 
 /*
  * If you're looking for the epicenter of DTrace, you just found it.  This
  * is the function called by the provider to fire a probe -- from which all
  * subsequent probe-context DTrace activity emanates.
  */
 void
 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
 {
 	processorid_t cpuid;
 	dtrace_icookie_t cookie;
 	dtrace_probe_t *probe;
 	dtrace_mstate_t mstate;
 	dtrace_ecb_t *ecb;
 	dtrace_action_t *act;
 	intptr_t offs;
 	size_t size;
 	int vtime, onintr;
 	volatile uint16_t *flags;
 	hrtime_t now;
 
 	if (panicstr != NULL)
 		return;
 
 #ifdef illumos
 	/*
 	 * Kick out immediately if this CPU is still being born (in which case
 	 * curthread will be set to -1) or the current thread can't allow
 	 * probes in its current context.
 	 */
 	if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
 		return;
 #endif
 
 	cookie = dtrace_interrupt_disable();
 	probe = dtrace_probes[id - 1];
 	cpuid = curcpu;
 	onintr = CPU_ON_INTR(CPU);
 
 	if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
 	    probe->dtpr_predcache == curthread->t_predcache) {
 		/*
 		 * We have hit in the predicate cache; we know that
 		 * this predicate would evaluate to be false.
 		 */
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 #ifdef illumos
 	if (panic_quiesce) {
 #else
 	if (panicstr != NULL) {
 #endif
 		/*
 		 * We don't trace anything if we're panicking.
 		 */
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 	now = mstate.dtms_timestamp = dtrace_gethrtime();
 	mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 	vtime = dtrace_vtime_references != 0;
 
 	if (vtime && curthread->t_dtrace_start)
 		curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
 
 	mstate.dtms_difo = NULL;
 	mstate.dtms_probe = probe;
 	mstate.dtms_strtok = 0;
 	mstate.dtms_arg[0] = arg0;
 	mstate.dtms_arg[1] = arg1;
 	mstate.dtms_arg[2] = arg2;
 	mstate.dtms_arg[3] = arg3;
 	mstate.dtms_arg[4] = arg4;
 
 	flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
 
 	for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
 		dtrace_predicate_t *pred = ecb->dte_predicate;
 		dtrace_state_t *state = ecb->dte_state;
 		dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
 		dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
 		dtrace_vstate_t *vstate = &state->dts_vstate;
 		dtrace_provider_t *prov = probe->dtpr_provider;
 		uint64_t tracememsize = 0;
 		int committed = 0;
 		caddr_t tomax;
 
 		/*
 		 * A little subtlety with the following (seemingly innocuous)
 		 * declaration of the automatic 'val':  by looking at the
 		 * code, you might think that it could be declared in the
 		 * action processing loop, below.  (That is, it's only used in
 		 * the action processing loop.)  However, it must be declared
 		 * out of that scope because in the case of DIF expression
 		 * arguments to aggregating actions, one iteration of the
 		 * action loop will use the last iteration's value.
 		 */
 		uint64_t val = 0;
 
 		mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
 		mstate.dtms_getf = NULL;
 
 		*flags &= ~CPU_DTRACE_ERROR;
 
 		if (prov == dtrace_provider) {
 			/*
 			 * If dtrace itself is the provider of this probe,
 			 * we're only going to continue processing the ECB if
 			 * arg0 (the dtrace_state_t) is equal to the ECB's
 			 * creating state.  (This prevents disjoint consumers
 			 * from seeing one another's metaprobes.)
 			 */
 			if (arg0 != (uint64_t)(uintptr_t)state)
 				continue;
 		}
 
 		if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
 			/*
 			 * We're not currently active.  If our provider isn't
 			 * the dtrace pseudo provider, we're not interested.
 			 */
 			if (prov != dtrace_provider)
 				continue;
 
 			/*
 			 * Now we must further check if we are in the BEGIN
 			 * probe.  If we are, we will only continue processing
 			 * if we're still in WARMUP -- if one BEGIN enabling
 			 * has invoked the exit() action, we don't want to
 			 * evaluate subsequent BEGIN enablings.
 			 */
 			if (probe->dtpr_id == dtrace_probeid_begin &&
 			    state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
 				ASSERT(state->dts_activity ==
 				    DTRACE_ACTIVITY_DRAINING);
 				continue;
 			}
 		}
 
 		if (ecb->dte_cond) {
 			/*
 			 * If the dte_cond bits indicate that this
 			 * consumer is only allowed to see user-mode firings
 			 * of this probe, call the provider's dtps_usermode()
 			 * entry point to check that the probe was fired
 			 * while in a user context. Skip this ECB if that's
 			 * not the case.
 			 */
 			if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
 			    prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
 			    probe->dtpr_id, probe->dtpr_arg) == 0)
 				continue;
 
 #ifdef illumos
 			/*
 			 * This is more subtle than it looks. We have to be
 			 * absolutely certain that CRED() isn't going to
 			 * change out from under us so it's only legit to
 			 * examine that structure if we're in constrained
 			 * situations. Currently, the only times we'll this
 			 * check is if a non-super-user has enabled the
 			 * profile or syscall providers -- providers that
 			 * allow visibility of all processes. For the
 			 * profile case, the check above will ensure that
 			 * we're examining a user context.
 			 */
 			if (ecb->dte_cond & DTRACE_COND_OWNER) {
 				cred_t *cr;
 				cred_t *s_cr =
 				    ecb->dte_state->dts_cred.dcr_cred;
 				proc_t *proc;
 
 				ASSERT(s_cr != NULL);
 
 				if ((cr = CRED()) == NULL ||
 				    s_cr->cr_uid != cr->cr_uid ||
 				    s_cr->cr_uid != cr->cr_ruid ||
 				    s_cr->cr_uid != cr->cr_suid ||
 				    s_cr->cr_gid != cr->cr_gid ||
 				    s_cr->cr_gid != cr->cr_rgid ||
 				    s_cr->cr_gid != cr->cr_sgid ||
 				    (proc = ttoproc(curthread)) == NULL ||
 				    (proc->p_flag & SNOCD))
 					continue;
 			}
 
 			if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
 				cred_t *cr;
 				cred_t *s_cr =
 				    ecb->dte_state->dts_cred.dcr_cred;
 
 				ASSERT(s_cr != NULL);
 
 				if ((cr = CRED()) == NULL ||
 				    s_cr->cr_zone->zone_id !=
 				    cr->cr_zone->zone_id)
 					continue;
 			}
 #endif
 		}
 
 		if (now - state->dts_alive > dtrace_deadman_timeout) {
 			/*
 			 * We seem to be dead.  Unless we (a) have kernel
 			 * destructive permissions (b) have explicitly enabled
 			 * destructive actions and (c) destructive actions have
 			 * not been disabled, we're going to transition into
 			 * the KILLED state, from which no further processing
 			 * on this state will be performed.
 			 */
 			if (!dtrace_priv_kernel_destructive(state) ||
 			    !state->dts_cred.dcr_destructive ||
 			    dtrace_destructive_disallow) {
 				void *activity = &state->dts_activity;
 				dtrace_activity_t current;
 
 				do {
 					current = state->dts_activity;
 				} while (dtrace_cas32(activity, current,
 				    DTRACE_ACTIVITY_KILLED) != current);
 
 				continue;
 			}
 		}
 
 		if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
 		    ecb->dte_alignment, state, &mstate)) < 0)
 			continue;
 
 		tomax = buf->dtb_tomax;
 		ASSERT(tomax != NULL);
 
 		if (ecb->dte_size != 0) {
 			dtrace_rechdr_t dtrh;
 			if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
 				mstate.dtms_timestamp = dtrace_gethrtime();
 				mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 			}
 			ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
 			dtrh.dtrh_epid = ecb->dte_epid;
 			DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
 			    mstate.dtms_timestamp);
 			*((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
 		}
 
 		mstate.dtms_epid = ecb->dte_epid;
 		mstate.dtms_present |= DTRACE_MSTATE_EPID;
 
 		if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
 			mstate.dtms_access = DTRACE_ACCESS_KERNEL;
 		else
 			mstate.dtms_access = 0;
 
 		if (pred != NULL) {
 			dtrace_difo_t *dp = pred->dtp_difo;
 			uint64_t rval;
 
 			rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
 
 			if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
 				dtrace_cacheid_t cid = probe->dtpr_predcache;
 
 				if (cid != DTRACE_CACHEIDNONE && !onintr) {
 					/*
 					 * Update the predicate cache...
 					 */
 					ASSERT(cid == pred->dtp_cacheid);
 					curthread->t_predcache = cid;
 				}
 
 				continue;
 			}
 		}
 
 		for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
 		    act != NULL; act = act->dta_next) {
 			size_t valoffs;
 			dtrace_difo_t *dp;
 			dtrace_recdesc_t *rec = &act->dta_rec;
 
 			size = rec->dtrd_size;
 			valoffs = offs + rec->dtrd_offset;
 
 			if (DTRACEACT_ISAGG(act->dta_kind)) {
 				uint64_t v = 0xbad;
 				dtrace_aggregation_t *agg;
 
 				agg = (dtrace_aggregation_t *)act;
 
 				if ((dp = act->dta_difo) != NULL)
 					v = dtrace_dif_emulate(dp,
 					    &mstate, vstate, state);
 
 				if (*flags & CPU_DTRACE_ERROR)
 					continue;
 
 				/*
 				 * Note that we always pass the expression
 				 * value from the previous iteration of the
 				 * action loop.  This value will only be used
 				 * if there is an expression argument to the
 				 * aggregating action, denoted by the
 				 * dtag_hasarg field.
 				 */
 				dtrace_aggregate(agg, buf,
 				    offs, aggbuf, v, val);
 				continue;
 			}
 
 			switch (act->dta_kind) {
 			case DTRACEACT_STOP:
 				if (dtrace_priv_proc_destructive(state))
 					dtrace_action_stop();
 				continue;
 
 			case DTRACEACT_BREAKPOINT:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_breakpoint(ecb);
 				continue;
 
 			case DTRACEACT_PANIC:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_panic(ecb);
 				continue;
 
 			case DTRACEACT_STACK:
 				if (!dtrace_priv_kernel(state))
 					continue;
 
 				dtrace_getpcstack((pc_t *)(tomax + valoffs),
 				    size / sizeof (pc_t), probe->dtpr_aframes,
 				    DTRACE_ANCHORED(probe) ? NULL :
 				    (uint32_t *)arg0);
 				continue;
 
 			case DTRACEACT_JSTACK:
 			case DTRACEACT_USTACK:
 				if (!dtrace_priv_proc(state))
 					continue;
 
 				/*
 				 * See comment in DIF_VAR_PID.
 				 */
 				if (DTRACE_ANCHORED(mstate.dtms_probe) &&
 				    CPU_ON_INTR(CPU)) {
 					int depth = DTRACE_USTACK_NFRAMES(
 					    rec->dtrd_arg) + 1;
 
 					dtrace_bzero((void *)(tomax + valoffs),
 					    DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
 					    + depth * sizeof (uint64_t));
 
 					continue;
 				}
 
 				if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
 				    curproc->p_dtrace_helpers != NULL) {
 					/*
 					 * This is the slow path -- we have
 					 * allocated string space, and we're
 					 * getting the stack of a process that
 					 * has helpers.  Call into a separate
 					 * routine to perform this processing.
 					 */
 					dtrace_action_ustack(&mstate, state,
 					    (uint64_t *)(tomax + valoffs),
 					    rec->dtrd_arg);
 					continue;
 				}
 
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				dtrace_getupcstack((uint64_t *)
 				    (tomax + valoffs),
 				    DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				continue;
 
 			default:
 				break;
 			}
 
 			dp = act->dta_difo;
 			ASSERT(dp != NULL);
 
 			val = dtrace_dif_emulate(dp, &mstate, vstate, state);
 
 			if (*flags & CPU_DTRACE_ERROR)
 				continue;
 
 			switch (act->dta_kind) {
 			case DTRACEACT_SPECULATE: {
 				dtrace_rechdr_t *dtrh;
 
 				ASSERT(buf == &state->dts_buffer[cpuid]);
 				buf = dtrace_speculation_buffer(state,
 				    cpuid, val);
 
 				if (buf == NULL) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				offs = dtrace_buffer_reserve(buf,
 				    ecb->dte_needed, ecb->dte_alignment,
 				    state, NULL);
 
 				if (offs < 0) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				tomax = buf->dtb_tomax;
 				ASSERT(tomax != NULL);
 
 				if (ecb->dte_size == 0)
 					continue;
 
 				ASSERT3U(ecb->dte_size, >=,
 				    sizeof (dtrace_rechdr_t));
 				dtrh = ((void *)(tomax + offs));
 				dtrh->dtrh_epid = ecb->dte_epid;
 				/*
 				 * When the speculation is committed, all of
 				 * the records in the speculative buffer will
 				 * have their timestamps set to the commit
 				 * time.  Until then, it is set to a sentinel
 				 * value, for debugability.
 				 */
 				DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
 				continue;
 			}
 
 			case DTRACEACT_PRINTM: {
 				/* The DIF returns a 'memref'. */
 				uintptr_t *memref = (uintptr_t *)(uintptr_t) val;
 
 				/* Get the size from the memref. */
 				size = memref[1];
 
 				/*
 				 * Check if the size exceeds the allocated
 				 * buffer size.
 				 */
 				if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) {
 					/* Flag a drop! */
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				/* Store the size in the buffer first. */
 				DTRACE_STORE(uintptr_t, tomax,
 				    valoffs, size);
 
 				/*
 				 * Offset the buffer address to the start
 				 * of the data.
 				 */
 				valoffs += sizeof(uintptr_t);
 
 				/*
 				 * Reset to the memory address rather than
 				 * the memref array, then let the BYREF
 				 * code below do the work to store the 
 				 * memory data in the buffer.
 				 */
 				val = memref[0];
 				break;
 			}
 
 			case DTRACEACT_CHILL:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_chill(&mstate, val);
 				continue;
 
 			case DTRACEACT_RAISE:
 				if (dtrace_priv_proc_destructive(state))
 					dtrace_action_raise(val);
 				continue;
 
 			case DTRACEACT_COMMIT:
 				ASSERT(!committed);
 
 				/*
 				 * We need to commit our buffer state.
 				 */
 				if (ecb->dte_size)
 					buf->dtb_offset = offs + ecb->dte_size;
 				buf = &state->dts_buffer[cpuid];
 				dtrace_speculation_commit(state, cpuid, val);
 				committed = 1;
 				continue;
 
 			case DTRACEACT_DISCARD:
 				dtrace_speculation_discard(state, cpuid, val);
 				continue;
 
 			case DTRACEACT_DIFEXPR:
 			case DTRACEACT_LIBACT:
 			case DTRACEACT_PRINTF:
 			case DTRACEACT_PRINTA:
 			case DTRACEACT_SYSTEM:
 			case DTRACEACT_FREOPEN:
 			case DTRACEACT_TRACEMEM:
 				break;
 
 			case DTRACEACT_TRACEMEM_DYNSIZE:
 				tracememsize = val;
 				break;
 
 			case DTRACEACT_SYM:
 			case DTRACEACT_MOD:
 				if (!dtrace_priv_kernel(state))
 					continue;
 				break;
 
 			case DTRACEACT_USYM:
 			case DTRACEACT_UMOD:
 			case DTRACEACT_UADDR: {
 #ifdef illumos
 				struct pid *pid = curthread->t_procp->p_pidp;
 #endif
 
 				if (!dtrace_priv_proc(state))
 					continue;
 
 				DTRACE_STORE(uint64_t, tomax,
 #ifdef illumos
 				    valoffs, (uint64_t)pid->pid_id);
 #else
 				    valoffs, (uint64_t) curproc->p_pid);
 #endif
 				DTRACE_STORE(uint64_t, tomax,
 				    valoffs + sizeof (uint64_t), val);
 
 				continue;
 			}
 
 			case DTRACEACT_EXIT: {
 				/*
 				 * For the exit action, we are going to attempt
 				 * to atomically set our activity to be
 				 * draining.  If this fails (either because
 				 * another CPU has beat us to the exit action,
 				 * or because our current activity is something
 				 * other than ACTIVE or WARMUP), we will
 				 * continue.  This assures that the exit action
 				 * can be successfully recorded at most once
 				 * when we're in the ACTIVE state.  If we're
 				 * encountering the exit() action while in
 				 * COOLDOWN, however, we want to honor the new
 				 * status code.  (We know that we're the only
 				 * thread in COOLDOWN, so there is no race.)
 				 */
 				void *activity = &state->dts_activity;
 				dtrace_activity_t current = state->dts_activity;
 
 				if (current == DTRACE_ACTIVITY_COOLDOWN)
 					break;
 
 				if (current != DTRACE_ACTIVITY_WARMUP)
 					current = DTRACE_ACTIVITY_ACTIVE;
 
 				if (dtrace_cas32(activity, current,
 				    DTRACE_ACTIVITY_DRAINING) != current) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				break;
 			}
 
 			default:
 				ASSERT(0);
 			}
 
 			if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ||
 			    dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) {
 				uintptr_t end = valoffs + size;
 
 				if (tracememsize != 0 &&
 				    valoffs + tracememsize < end) {
 					end = valoffs + tracememsize;
 					tracememsize = 0;
 				}
 
 				if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
 				    !dtrace_vcanload((void *)(uintptr_t)val,
 				    &dp->dtdo_rtype, NULL, &mstate, vstate))
 					continue;
 
 				dtrace_store_by_ref(dp, tomax, size, &valoffs,
 				    &val, end, act->dta_intuple,
 				    dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
 				    DIF_TF_BYREF: DIF_TF_BYUREF);
 				continue;
 			}
 
 			switch (size) {
 			case 0:
 				break;
 
 			case sizeof (uint8_t):
 				DTRACE_STORE(uint8_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint16_t):
 				DTRACE_STORE(uint16_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint32_t):
 				DTRACE_STORE(uint32_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint64_t):
 				DTRACE_STORE(uint64_t, tomax, valoffs, val);
 				break;
 			default:
 				/*
 				 * Any other size should have been returned by
 				 * reference, not by value.
 				 */
 				ASSERT(0);
 				break;
 			}
 		}
 
 		if (*flags & CPU_DTRACE_DROP)
 			continue;
 
 		if (*flags & CPU_DTRACE_FAULT) {
 			int ndx;
 			dtrace_action_t *err;
 
 			buf->dtb_errors++;
 
 			if (probe->dtpr_id == dtrace_probeid_error) {
 				/*
 				 * There's nothing we can do -- we had an
 				 * error on the error probe.  We bump an
 				 * error counter to at least indicate that
 				 * this condition happened.
 				 */
 				dtrace_error(&state->dts_dblerrors);
 				continue;
 			}
 
 			if (vtime) {
 				/*
 				 * Before recursing on dtrace_probe(), we
 				 * need to explicitly clear out our start
 				 * time to prevent it from being accumulated
 				 * into t_dtrace_vtime.
 				 */
 				curthread->t_dtrace_start = 0;
 			}
 
 			/*
 			 * Iterate over the actions to figure out which action
 			 * we were processing when we experienced the error.
 			 * Note that act points _past_ the faulting action; if
 			 * act is ecb->dte_action, the fault was in the
 			 * predicate, if it's ecb->dte_action->dta_next it's
 			 * in action #1, and so on.
 			 */
 			for (err = ecb->dte_action, ndx = 0;
 			    err != act; err = err->dta_next, ndx++)
 				continue;
 
 			dtrace_probe_error(state, ecb->dte_epid, ndx,
 			    (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
 			    mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
 			    cpu_core[cpuid].cpuc_dtrace_illval);
 
 			continue;
 		}
 
 		if (!committed)
 			buf->dtb_offset = offs + ecb->dte_size;
 	}
 
 	if (vtime)
 		curthread->t_dtrace_start = dtrace_gethrtime();
 
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * DTrace Probe Hashing Functions
  *
  * The functions in this section (and indeed, the functions in remaining
  * sections) are not _called_ from probe context.  (Any exceptions to this are
  * marked with a "Note:".)  Rather, they are called from elsewhere in the
  * DTrace framework to look-up probes in, add probes to and remove probes from
  * the DTrace probe hashes.  (Each probe is hashed by each element of the
  * probe tuple -- allowing for fast lookups, regardless of what was
  * specified.)
  */
 static uint_t
 dtrace_hash_str(const char *p)
 {
 	unsigned int g;
 	uint_t hval = 0;
 
 	while (*p) {
 		hval = (hval << 4) + *p++;
 		if ((g = (hval & 0xf0000000)) != 0)
 			hval ^= g >> 24;
 		hval &= ~g;
 	}
 	return (hval);
 }
 
 static dtrace_hash_t *
 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
 {
 	dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
 
 	hash->dth_stroffs = stroffs;
 	hash->dth_nextoffs = nextoffs;
 	hash->dth_prevoffs = prevoffs;
 
 	hash->dth_size = 1;
 	hash->dth_mask = hash->dth_size - 1;
 
 	hash->dth_tab = kmem_zalloc(hash->dth_size *
 	    sizeof (dtrace_hashbucket_t *), KM_SLEEP);
 
 	return (hash);
 }
 
 static void
 dtrace_hash_destroy(dtrace_hash_t *hash)
 {
 #ifdef DEBUG
 	int i;
 
 	for (i = 0; i < hash->dth_size; i++)
 		ASSERT(hash->dth_tab[i] == NULL);
 #endif
 
 	kmem_free(hash->dth_tab,
 	    hash->dth_size * sizeof (dtrace_hashbucket_t *));
 	kmem_free(hash, sizeof (dtrace_hash_t));
 }
 
 static void
 dtrace_hash_resize(dtrace_hash_t *hash)
 {
 	int size = hash->dth_size, i, ndx;
 	int new_size = hash->dth_size << 1;
 	int new_mask = new_size - 1;
 	dtrace_hashbucket_t **new_tab, *bucket, *next;
 
 	ASSERT((new_size & new_mask) == 0);
 
 	new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
 
 	for (i = 0; i < size; i++) {
 		for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
 			dtrace_probe_t *probe = bucket->dthb_chain;
 
 			ASSERT(probe != NULL);
 			ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
 
 			next = bucket->dthb_next;
 			bucket->dthb_next = new_tab[ndx];
 			new_tab[ndx] = bucket;
 		}
 	}
 
 	kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
 	hash->dth_tab = new_tab;
 	hash->dth_size = new_size;
 	hash->dth_mask = new_mask;
 }
 
 static void
 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
 {
 	int hashval = DTRACE_HASHSTR(hash, new);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 	dtrace_probe_t **nextp, **prevp;
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
 			goto add;
 	}
 
 	if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
 		dtrace_hash_resize(hash);
 		dtrace_hash_add(hash, new);
 		return;
 	}
 
 	bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
 	bucket->dthb_next = hash->dth_tab[ndx];
 	hash->dth_tab[ndx] = bucket;
 	hash->dth_nbuckets++;
 
 add:
 	nextp = DTRACE_HASHNEXT(hash, new);
 	ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
 	*nextp = bucket->dthb_chain;
 
 	if (bucket->dthb_chain != NULL) {
 		prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
 		ASSERT(*prevp == NULL);
 		*prevp = new;
 	}
 
 	bucket->dthb_chain = new;
 	bucket->dthb_len++;
 }
 
 static dtrace_probe_t *
 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
 {
 	int hashval = DTRACE_HASHSTR(hash, template);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
 			return (bucket->dthb_chain);
 	}
 
 	return (NULL);
 }
 
 static int
 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
 {
 	int hashval = DTRACE_HASHSTR(hash, template);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
 			return (bucket->dthb_len);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
 {
 	int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
 	dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
 
 	/*
 	 * Find the bucket that we're removing this probe from.
 	 */
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
 			break;
 	}
 
 	ASSERT(bucket != NULL);
 
 	if (*prevp == NULL) {
 		if (*nextp == NULL) {
 			/*
 			 * The removed probe was the only probe on this
 			 * bucket; we need to remove the bucket.
 			 */
 			dtrace_hashbucket_t *b = hash->dth_tab[ndx];
 
 			ASSERT(bucket->dthb_chain == probe);
 			ASSERT(b != NULL);
 
 			if (b == bucket) {
 				hash->dth_tab[ndx] = bucket->dthb_next;
 			} else {
 				while (b->dthb_next != bucket)
 					b = b->dthb_next;
 				b->dthb_next = bucket->dthb_next;
 			}
 
 			ASSERT(hash->dth_nbuckets > 0);
 			hash->dth_nbuckets--;
 			kmem_free(bucket, sizeof (dtrace_hashbucket_t));
 			return;
 		}
 
 		bucket->dthb_chain = *nextp;
 	} else {
 		*(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
 	}
 
 	if (*nextp != NULL)
 		*(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
 }
 
 /*
  * DTrace Utility Functions
  *
  * These are random utility functions that are _not_ called from probe context.
  */
 static int
 dtrace_badattr(const dtrace_attribute_t *a)
 {
 	return (a->dtat_name > DTRACE_STABILITY_MAX ||
 	    a->dtat_data > DTRACE_STABILITY_MAX ||
 	    a->dtat_class > DTRACE_CLASS_MAX);
 }
 
 /*
  * Return a duplicate copy of a string.  If the specified string is NULL,
  * this function returns a zero-length string.
  */
 static char *
 dtrace_strdup(const char *str)
 {
 	char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
 
 	if (str != NULL)
 		(void) strcpy(new, str);
 
 	return (new);
 }
 
 #define	DTRACE_ISALPHA(c)	\
 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
 
 static int
 dtrace_badname(const char *s)
 {
 	char c;
 
 	if (s == NULL || (c = *s++) == '\0')
 		return (0);
 
 	if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
 		return (1);
 
 	while ((c = *s++) != '\0') {
 		if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
 		    c != '-' && c != '_' && c != '.' && c != '`')
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
 {
 	uint32_t priv;
 
 #ifdef illumos
 	if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
 		/*
 		 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
 		 */
 		priv = DTRACE_PRIV_ALL;
 	} else {
 		*uidp = crgetuid(cr);
 		*zoneidp = crgetzoneid(cr);
 
 		priv = 0;
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
 			priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
 		else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
 			priv |= DTRACE_PRIV_USER;
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
 			priv |= DTRACE_PRIV_PROC;
 		if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 			priv |= DTRACE_PRIV_OWNER;
 		if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 			priv |= DTRACE_PRIV_ZONEOWNER;
 	}
 #else
 	priv = DTRACE_PRIV_ALL;
 #endif
 
 	*privp = priv;
 }
 
 #ifdef DTRACE_ERRDEBUG
 static void
 dtrace_errdebug(const char *str)
 {
 	int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ;
 	int occupied = 0;
 
 	mutex_enter(&dtrace_errlock);
 	dtrace_errlast = str;
 	dtrace_errthread = curthread;
 
 	while (occupied++ < DTRACE_ERRHASHSZ) {
 		if (dtrace_errhash[hval].dter_msg == str) {
 			dtrace_errhash[hval].dter_count++;
 			goto out;
 		}
 
 		if (dtrace_errhash[hval].dter_msg != NULL) {
 			hval = (hval + 1) % DTRACE_ERRHASHSZ;
 			continue;
 		}
 
 		dtrace_errhash[hval].dter_msg = str;
 		dtrace_errhash[hval].dter_count = 1;
 		goto out;
 	}
 
 	panic("dtrace: undersized error hash");
 out:
 	mutex_exit(&dtrace_errlock);
 }
 #endif
 
 /*
  * DTrace Matching Functions
  *
  * These functions are used to match groups of probes, given some elements of
  * a probe tuple, or some globbed expressions for elements of a probe tuple.
  */
 static int
 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
     zoneid_t zoneid)
 {
 	if (priv != DTRACE_PRIV_ALL) {
 		uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
 		uint32_t match = priv & ppriv;
 
 		/*
 		 * No PRIV_DTRACE_* privileges...
 		 */
 		if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
 		    DTRACE_PRIV_KERNEL)) == 0)
 			return (0);
 
 		/*
 		 * No matching bits, but there were bits to match...
 		 */
 		if (match == 0 && ppriv != 0)
 			return (0);
 
 		/*
 		 * Need to have permissions to the process, but don't...
 		 */
 		if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
 		    uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
 			return (0);
 		}
 
 		/*
 		 * Need to be in the same zone unless we possess the
 		 * privilege to examine all zones.
 		 */
 		if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
 		    zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
 			return (0);
 		}
 	}
 
 	return (1);
 }
 
 /*
  * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
  * consists of input pattern strings and an ops-vector to evaluate them.
  * This function returns >0 for match, 0 for no match, and <0 for error.
  */
 static int
 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
     uint32_t priv, uid_t uid, zoneid_t zoneid)
 {
 	dtrace_provider_t *pvp = prp->dtpr_provider;
 	int rv;
 
 	if (pvp->dtpv_defunct)
 		return (0);
 
 	if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
 		return (rv);
 
 	if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
 		return (0);
 
 	return (rv);
 }
 
 /*
  * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
  * interface for matching a glob pattern 'p' to an input string 's'.  Unlike
  * libc's version, the kernel version only applies to 8-bit ASCII strings.
  * In addition, all of the recursion cases except for '*' matching have been
  * unwound.  For '*', we still implement recursive evaluation, but a depth
  * counter is maintained and matching is aborted if we recurse too deep.
  * The function returns 0 if no match, >0 if match, and <0 if recursion error.
  */
 static int
 dtrace_match_glob(const char *s, const char *p, int depth)
 {
 	const char *olds;
 	char s1, c;
 	int gs;
 
 	if (depth > DTRACE_PROBEKEY_MAXDEPTH)
 		return (-1);
 
 	if (s == NULL)
 		s = ""; /* treat NULL as empty string */
 
 top:
 	olds = s;
 	s1 = *s++;
 
 	if (p == NULL)
 		return (0);
 
 	if ((c = *p++) == '\0')
 		return (s1 == '\0');
 
 	switch (c) {
 	case '[': {
 		int ok = 0, notflag = 0;
 		char lc = '\0';
 
 		if (s1 == '\0')
 			return (0);
 
 		if (*p == '!') {
 			notflag = 1;
 			p++;
 		}
 
 		if ((c = *p++) == '\0')
 			return (0);
 
 		do {
 			if (c == '-' && lc != '\0' && *p != ']') {
 				if ((c = *p++) == '\0')
 					return (0);
 				if (c == '\\' && (c = *p++) == '\0')
 					return (0);
 
 				if (notflag) {
 					if (s1 < lc || s1 > c)
 						ok++;
 					else
 						return (0);
 				} else if (lc <= s1 && s1 <= c)
 					ok++;
 
 			} else if (c == '\\' && (c = *p++) == '\0')
 				return (0);
 
 			lc = c; /* save left-hand 'c' for next iteration */
 
 			if (notflag) {
 				if (s1 != c)
 					ok++;
 				else
 					return (0);
 			} else if (s1 == c)
 				ok++;
 
 			if ((c = *p++) == '\0')
 				return (0);
 
 		} while (c != ']');
 
 		if (ok)
 			goto top;
 
 		return (0);
 	}
 
 	case '\\':
 		if ((c = *p++) == '\0')
 			return (0);
 		/*FALLTHRU*/
 
 	default:
 		if (c != s1)
 			return (0);
 		/*FALLTHRU*/
 
 	case '?':
 		if (s1 != '\0')
 			goto top;
 		return (0);
 
 	case '*':
 		while (*p == '*')
 			p++; /* consecutive *'s are identical to a single one */
 
 		if (*p == '\0')
 			return (1);
 
 		for (s = olds; *s != '\0'; s++) {
 			if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
 				return (gs);
 		}
 
 		return (0);
 	}
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_string(const char *s, const char *p, int depth)
 {
 	return (s != NULL && strcmp(s, p) == 0);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_nul(const char *s, const char *p, int depth)
 {
 	return (1); /* always match the empty pattern */
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_nonzero(const char *s, const char *p, int depth)
 {
 	return (s != NULL && s[0] != '\0');
 }
 
 static int
 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
     zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
 {
 	dtrace_probe_t template, *probe;
 	dtrace_hash_t *hash = NULL;
 	int len, best = INT_MAX, nmatched = 0;
 	dtrace_id_t i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * If the probe ID is specified in the key, just lookup by ID and
 	 * invoke the match callback once if a matching probe is found.
 	 */
 	if (pkp->dtpk_id != DTRACE_IDNONE) {
 		if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
 		    dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
 			(void) (*matched)(probe, arg);
 			nmatched++;
 		}
 		return (nmatched);
 	}
 
 	template.dtpr_mod = (char *)pkp->dtpk_mod;
 	template.dtpr_func = (char *)pkp->dtpk_func;
 	template.dtpr_name = (char *)pkp->dtpk_name;
 
 	/*
 	 * We want to find the most distinct of the module name, function
 	 * name, and name.  So for each one that is not a glob pattern or
 	 * empty string, we perform a lookup in the corresponding hash and
 	 * use the hash table with the fewest collisions to do our search.
 	 */
 	if (pkp->dtpk_mmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
 		best = len;
 		hash = dtrace_bymod;
 	}
 
 	if (pkp->dtpk_fmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
 		best = len;
 		hash = dtrace_byfunc;
 	}
 
 	if (pkp->dtpk_nmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
 		best = len;
 		hash = dtrace_byname;
 	}
 
 	/*
 	 * If we did not select a hash table, iterate over every probe and
 	 * invoke our callback for each one that matches our input probe key.
 	 */
 	if (hash == NULL) {
 		for (i = 0; i < dtrace_nprobes; i++) {
 			if ((probe = dtrace_probes[i]) == NULL ||
 			    dtrace_match_probe(probe, pkp, priv, uid,
 			    zoneid) <= 0)
 				continue;
 
 			nmatched++;
 
 			if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
 				break;
 		}
 
 		return (nmatched);
 	}
 
 	/*
 	 * If we selected a hash table, iterate over each probe of the same key
 	 * name and invoke the callback for every probe that matches the other
 	 * attributes of our input probe key.
 	 */
 	for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
 	    probe = *(DTRACE_HASHNEXT(hash, probe))) {
 
 		if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
 			continue;
 
 		nmatched++;
 
 		if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
 			break;
 	}
 
 	return (nmatched);
 }
 
 /*
  * Return the function pointer dtrace_probecmp() should use to compare the
  * specified pattern with a string.  For NULL or empty patterns, we select
  * dtrace_match_nul().  For glob pattern strings, we use dtrace_match_glob().
  * For non-empty non-glob strings, we use dtrace_match_string().
  */
 static dtrace_probekey_f *
 dtrace_probekey_func(const char *p)
 {
 	char c;
 
 	if (p == NULL || *p == '\0')
 		return (&dtrace_match_nul);
 
 	while ((c = *p++) != '\0') {
 		if (c == '[' || c == '?' || c == '*' || c == '\\')
 			return (&dtrace_match_glob);
 	}
 
 	return (&dtrace_match_string);
 }
 
 /*
  * Build a probe comparison key for use with dtrace_match_probe() from the
  * given probe description.  By convention, a null key only matches anchored
  * probes: if each field is the empty string, reset dtpk_fmatch to
  * dtrace_match_nonzero().
  */
 static void
 dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
 {
 	pkp->dtpk_prov = pdp->dtpd_provider;
 	pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
 
 	pkp->dtpk_mod = pdp->dtpd_mod;
 	pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
 
 	pkp->dtpk_func = pdp->dtpd_func;
 	pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
 
 	pkp->dtpk_name = pdp->dtpd_name;
 	pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
 
 	pkp->dtpk_id = pdp->dtpd_id;
 
 	if (pkp->dtpk_id == DTRACE_IDNONE &&
 	    pkp->dtpk_pmatch == &dtrace_match_nul &&
 	    pkp->dtpk_mmatch == &dtrace_match_nul &&
 	    pkp->dtpk_fmatch == &dtrace_match_nul &&
 	    pkp->dtpk_nmatch == &dtrace_match_nul)
 		pkp->dtpk_fmatch = &dtrace_match_nonzero;
 }
 
 /*
  * DTrace Provider-to-Framework API Functions
  *
  * These functions implement much of the Provider-to-Framework API, as
  * described in <sys/dtrace.h>.  The parts of the API not in this section are
  * the functions in the API for probe management (found below), and
  * dtrace_probe() itself (found above).
  */
 
 /*
  * Register the calling provider with the DTrace framework.  This should
  * generally be called by DTrace providers in their attach(9E) entry point.
  */
 int
 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
     cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
 {
 	dtrace_provider_t *provider;
 
 	if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "arguments", name ? name : "<NULL>");
 		return (EINVAL);
 	}
 
 	if (name[0] == '\0' || dtrace_badname(name)) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider name", name);
 		return (EINVAL);
 	}
 
 	if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
 	    pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
 	    pops->dtps_destroy == NULL ||
 	    ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider ops", name);
 		return (EINVAL);
 	}
 
 	if (dtrace_badattr(&pap->dtpa_provider) ||
 	    dtrace_badattr(&pap->dtpa_mod) ||
 	    dtrace_badattr(&pap->dtpa_func) ||
 	    dtrace_badattr(&pap->dtpa_name) ||
 	    dtrace_badattr(&pap->dtpa_args)) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider attributes", name);
 		return (EINVAL);
 	}
 
 	if (priv & ~DTRACE_PRIV_ALL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "privilege attributes", name);
 		return (EINVAL);
 	}
 
 	if ((priv & DTRACE_PRIV_KERNEL) &&
 	    (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
 	    pops->dtps_usermode == NULL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': need "
 		    "dtps_usermode() op for given privilege attributes", name);
 		return (EINVAL);
 	}
 
 	provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
 	provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(provider->dtpv_name, name);
 
 	provider->dtpv_attr = *pap;
 	provider->dtpv_priv.dtpp_flags = priv;
 	if (cr != NULL) {
 		provider->dtpv_priv.dtpp_uid = crgetuid(cr);
 		provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
 	}
 	provider->dtpv_pops = *pops;
 
 	if (pops->dtps_provide == NULL) {
 		ASSERT(pops->dtps_provide_module != NULL);
 		provider->dtpv_pops.dtps_provide =
 		    (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop;
 	}
 
 	if (pops->dtps_provide_module == NULL) {
 		ASSERT(pops->dtps_provide != NULL);
 		provider->dtpv_pops.dtps_provide_module =
 		    (void (*)(void *, modctl_t *))dtrace_nullop;
 	}
 
 	if (pops->dtps_suspend == NULL) {
 		ASSERT(pops->dtps_resume == NULL);
 		provider->dtpv_pops.dtps_suspend =
 		    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
 		provider->dtpv_pops.dtps_resume =
 		    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
 	}
 
 	provider->dtpv_arg = arg;
 	*idp = (dtrace_provider_id_t)provider;
 
 	if (pops == &dtrace_provider_ops) {
 		ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 		ASSERT(dtrace_anon.dta_enabling == NULL);
 
 		/*
 		 * We make sure that the DTrace provider is at the head of
 		 * the provider chain.
 		 */
 		provider->dtpv_next = dtrace_provider;
 		dtrace_provider = provider;
 		return (0);
 	}
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * If there is at least one provider registered, we'll add this
 	 * provider after the first provider.
 	 */
 	if (dtrace_provider != NULL) {
 		provider->dtpv_next = dtrace_provider->dtpv_next;
 		dtrace_provider->dtpv_next = provider;
 	} else {
 		dtrace_provider = provider;
 	}
 
 	if (dtrace_retained != NULL) {
 		dtrace_enabling_provide(provider);
 
 		/*
 		 * Now we need to call dtrace_enabling_matchall() -- which
 		 * will acquire cpu_lock and dtrace_lock.  We therefore need
 		 * to drop all of our locks before calling into it...
 		 */
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_provider_lock);
 		dtrace_enabling_matchall();
 
 		return (0);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	return (0);
 }
 
 /*
  * Unregister the specified provider from the DTrace framework.  This should
  * generally be called by DTrace providers in their detach(9E) entry point.
  */
 int
 dtrace_unregister(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *old = (dtrace_provider_t *)id;
 	dtrace_provider_t *prev = NULL;
 	int i, self = 0, noreap = 0;
 	dtrace_probe_t *probe, *first = NULL;
 
 	if (old->dtpv_pops.dtps_enable ==
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
 		/*
 		 * If DTrace itself is the provider, we're called with locks
 		 * already held.
 		 */
 		ASSERT(old == dtrace_provider);
 #ifdef illumos
 		ASSERT(dtrace_devi != NULL);
 #endif
 		ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 		self = 1;
 
 		if (dtrace_provider->dtpv_next != NULL) {
 			/*
 			 * There's another provider here; return failure.
 			 */
 			return (EBUSY);
 		}
 	} else {
 		mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 		mutex_enter(&mod_lock);
 #endif
 		mutex_enter(&dtrace_lock);
 	}
 
 	/*
 	 * If anyone has /dev/dtrace open, or if there are anonymous enabled
 	 * probes, we refuse to let providers slither away, unless this
 	 * provider has already been explicitly invalidated.
 	 */
 	if (!old->dtpv_defunct &&
 	    (dtrace_opens || (dtrace_anon.dta_state != NULL &&
 	    dtrace_anon.dta_state->dts_necbs > 0))) {
 		if (!self) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 		}
 		return (EBUSY);
 	}
 
 	/*
 	 * Attempt to destroy the probes associated with this provider.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != old)
 			continue;
 
 		if (probe->dtpr_ecb == NULL)
 			continue;
 
 		/*
 		 * If we are trying to unregister a defunct provider, and the
 		 * provider was made defunct within the interval dictated by
 		 * dtrace_unregister_defunct_reap, we'll (asynchronously)
 		 * attempt to reap our enablings.  To denote that the provider
 		 * should reattempt to unregister itself at some point in the
 		 * future, we will return a differentiable error code (EAGAIN
 		 * instead of EBUSY) in this case.
 		 */
 		if (dtrace_gethrtime() - old->dtpv_defunct >
 		    dtrace_unregister_defunct_reap)
 			noreap = 1;
 
 		if (!self) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 		}
 
 		if (noreap)
 			return (EBUSY);
 
 		(void) taskq_dispatch(dtrace_taskq,
 		    (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
 
 		return (EAGAIN);
 	}
 
 	/*
 	 * All of the probes for this provider are disabled; we can safely
 	 * remove all of them from their hash chains and from the probe array.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != old)
 			continue;
 
 		dtrace_probes[i] = NULL;
 
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		if (first == NULL) {
 			first = probe;
 			probe->dtpr_nextmod = NULL;
 		} else {
 			probe->dtpr_nextmod = first;
 			first = probe;
 		}
 	}
 
 	/*
 	 * The provider's probes have been removed from the hash chains and
 	 * from the probe array.  Now issue a dtrace_sync() to be sure that
 	 * everyone has cleared out from any probe array processing.
 	 */
 	dtrace_sync();
 
 	for (probe = first; probe != NULL; probe = first) {
 		first = probe->dtpr_nextmod;
 
 		old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
 #else
 		free_unr(dtrace_arena, probe->dtpr_id);
 #endif
 		kmem_free(probe, sizeof (dtrace_probe_t));
 	}
 
 	if ((prev = dtrace_provider) == old) {
 #ifdef illumos
 		ASSERT(self || dtrace_devi == NULL);
 		ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
 #endif
 		dtrace_provider = old->dtpv_next;
 	} else {
 		while (prev != NULL && prev->dtpv_next != old)
 			prev = prev->dtpv_next;
 
 		if (prev == NULL) {
 			panic("attempt to unregister non-existent "
 			    "dtrace provider %p\n", (void *)id);
 		}
 
 		prev->dtpv_next = old->dtpv_next;
 	}
 
 	if (!self) {
 		mutex_exit(&dtrace_lock);
 #ifdef illumos
 		mutex_exit(&mod_lock);
 #endif
 		mutex_exit(&dtrace_provider_lock);
 	}
 
 	kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
 	kmem_free(old, sizeof (dtrace_provider_t));
 
 	return (0);
 }
 
 /*
  * Invalidate the specified provider.  All subsequent probe lookups for the
  * specified provider will fail, but its probes will not be removed.
  */
 void
 dtrace_invalidate(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *pvp = (dtrace_provider_t *)id;
 
 	ASSERT(pvp->dtpv_pops.dtps_enable !=
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	pvp->dtpv_defunct = dtrace_gethrtime();
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 }
 
 /*
  * Indicate whether or not DTrace has attached.
  */
 int
 dtrace_attached(void)
 {
 	/*
 	 * dtrace_provider will be non-NULL iff the DTrace driver has
 	 * attached.  (It's non-NULL because DTrace is always itself a
 	 * provider.)
 	 */
 	return (dtrace_provider != NULL);
 }
 
 /*
  * Remove all the unenabled probes for the given provider.  This function is
  * not unlike dtrace_unregister(), except that it doesn't remove the provider
  * -- just as many of its associated probes as it can.
  */
 int
 dtrace_condense(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *prov = (dtrace_provider_t *)id;
 	int i;
 	dtrace_probe_t *probe;
 
 	/*
 	 * Make sure this isn't the dtrace provider itself.
 	 */
 	ASSERT(prov->dtpv_pops.dtps_enable !=
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * Attempt to destroy the probes associated with this provider.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != prov)
 			continue;
 
 		if (probe->dtpr_ecb != NULL)
 			continue;
 
 		dtrace_probes[i] = NULL;
 
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 		kmem_free(probe, sizeof (dtrace_probe_t));
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
 #else
 		free_unr(dtrace_arena, i + 1);
 #endif
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	return (0);
 }
 
 /*
  * DTrace Probe Management Functions
  *
  * The functions in this section perform the DTrace probe management,
  * including functions to create probes, look-up probes, and call into the
  * providers to request that probes be provided.  Some of these functions are
  * in the Provider-to-Framework API; these functions can be identified by the
  * fact that they are not declared "static".
  */
 
 /*
  * Create a probe with the specified module name, function name, and name.
  */
 dtrace_id_t
 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
     const char *func, const char *name, int aframes, void *arg)
 {
 	dtrace_probe_t *probe, **probes;
 	dtrace_provider_t *provider = (dtrace_provider_t *)prov;
 	dtrace_id_t id;
 
 	if (provider == dtrace_provider) {
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 	} else {
 		mutex_enter(&dtrace_lock);
 	}
 
 #ifdef illumos
 	id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
 	    VM_BESTFIT | VM_SLEEP);
 #else
 	id = alloc_unr(dtrace_arena);
 #endif
 	probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
 
 	probe->dtpr_id = id;
 	probe->dtpr_gen = dtrace_probegen++;
 	probe->dtpr_mod = dtrace_strdup(mod);
 	probe->dtpr_func = dtrace_strdup(func);
 	probe->dtpr_name = dtrace_strdup(name);
 	probe->dtpr_arg = arg;
 	probe->dtpr_aframes = aframes;
 	probe->dtpr_provider = provider;
 
 	dtrace_hash_add(dtrace_bymod, probe);
 	dtrace_hash_add(dtrace_byfunc, probe);
 	dtrace_hash_add(dtrace_byname, probe);
 
 	if (id - 1 >= dtrace_nprobes) {
 		size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
 		size_t nsize = osize << 1;
 
 		if (nsize == 0) {
 			ASSERT(osize == 0);
 			ASSERT(dtrace_probes == NULL);
 			nsize = sizeof (dtrace_probe_t *);
 		}
 
 		probes = kmem_zalloc(nsize, KM_SLEEP);
 
 		if (dtrace_probes == NULL) {
 			ASSERT(osize == 0);
 			dtrace_probes = probes;
 			dtrace_nprobes = 1;
 		} else {
 			dtrace_probe_t **oprobes = dtrace_probes;
 
 			bcopy(oprobes, probes, osize);
 			dtrace_membar_producer();
 			dtrace_probes = probes;
 
 			dtrace_sync();
 
 			/*
 			 * All CPUs are now seeing the new probes array; we can
 			 * safely free the old array.
 			 */
 			kmem_free(oprobes, osize);
 			dtrace_nprobes <<= 1;
 		}
 
 		ASSERT(id - 1 < dtrace_nprobes);
 	}
 
 	ASSERT(dtrace_probes[id - 1] == NULL);
 	dtrace_probes[id - 1] = probe;
 
 	if (provider != dtrace_provider)
 		mutex_exit(&dtrace_lock);
 
 	return (id);
 }
 
 static dtrace_probe_t *
 dtrace_probe_lookup_id(dtrace_id_t id)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > dtrace_nprobes)
 		return (NULL);
 
 	return (dtrace_probes[id - 1]);
 }
 
 static int
 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
 {
 	*((dtrace_id_t *)arg) = probe->dtpr_id;
 
 	return (DTRACE_MATCH_DONE);
 }
 
 /*
  * Look up a probe based on provider and one or more of module name, function
  * name and probe name.
  */
 dtrace_id_t
 dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod,
     char *func, char *name)
 {
 	dtrace_probekey_t pkey;
 	dtrace_id_t id;
 	int match;
 
 	pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
 	pkey.dtpk_pmatch = &dtrace_match_string;
 	pkey.dtpk_mod = mod;
 	pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_func = func;
 	pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_name = name;
 	pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_id = DTRACE_IDNONE;
 
 	mutex_enter(&dtrace_lock);
 	match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
 	    dtrace_probe_lookup_match, &id);
 	mutex_exit(&dtrace_lock);
 
 	ASSERT(match == 1 || match == 0);
 	return (match ? id : 0);
 }
 
 /*
  * Returns the probe argument associated with the specified probe.
  */
 void *
 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
 {
 	dtrace_probe_t *probe;
 	void *rval = NULL;
 
 	mutex_enter(&dtrace_lock);
 
 	if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
 	    probe->dtpr_provider == (dtrace_provider_t *)id)
 		rval = probe->dtpr_arg;
 
 	mutex_exit(&dtrace_lock);
 
 	return (rval);
 }
 
 /*
  * Copy a probe into a probe description.
  */
 static void
 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
 {
 	bzero(pdp, sizeof (dtrace_probedesc_t));
 	pdp->dtpd_id = prp->dtpr_id;
 
 	(void) strncpy(pdp->dtpd_provider,
 	    prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
 
 	(void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
 	(void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
 	(void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
 }
 
 /*
  * Called to indicate that a probe -- or probes -- should be provided by a
  * specfied provider.  If the specified description is NULL, the provider will
  * be told to provide all of its probes.  (This is done whenever a new
  * consumer comes along, or whenever a retained enabling is to be matched.) If
  * the specified description is non-NULL, the provider is given the
  * opportunity to dynamically provide the specified probe, allowing providers
  * to support the creation of probes on-the-fly.  (So-called _autocreated_
  * probes.)  If the provider is NULL, the operations will be applied to all
  * providers; if the provider is non-NULL the operations will only be applied
  * to the specified provider.  The dtrace_provider_lock must be held, and the
  * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
  * will need to grab the dtrace_lock when it reenters the framework through
  * dtrace_probe_lookup(), dtrace_probe_create(), etc.
  */
 static void
 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
 {
 #ifdef illumos
 	modctl_t *ctl;
 #endif
 	int all = 0;
 
 	ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 
 	if (prv == NULL) {
 		all = 1;
 		prv = dtrace_provider;
 	}
 
 	do {
 		/*
 		 * First, call the blanket provide operation.
 		 */
 		prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
 
 #ifdef illumos
 		/*
 		 * Now call the per-module provide operation.  We will grab
 		 * mod_lock to prevent the list from being modified.  Note
 		 * that this also prevents the mod_busy bits from changing.
 		 * (mod_busy can only be changed with mod_lock held.)
 		 */
 		mutex_enter(&mod_lock);
 
 		ctl = &modules;
 		do {
 			if (ctl->mod_busy || ctl->mod_mp == NULL)
 				continue;
 
 			prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
 
 		} while ((ctl = ctl->mod_next) != &modules);
 
 		mutex_exit(&mod_lock);
 #endif
 	} while (all && (prv = prv->dtpv_next) != NULL);
 }
 
 #ifdef illumos
 /*
  * Iterate over each probe, and call the Framework-to-Provider API function
  * denoted by offs.
  */
 static void
 dtrace_probe_foreach(uintptr_t offs)
 {
 	dtrace_provider_t *prov;
 	void (*func)(void *, dtrace_id_t, void *);
 	dtrace_probe_t *probe;
 	dtrace_icookie_t cookie;
 	int i;
 
 	/*
 	 * We disable interrupts to walk through the probe array.  This is
 	 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
 	 * won't see stale data.
 	 */
 	cookie = dtrace_interrupt_disable();
 
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_ecb == NULL) {
 			/*
 			 * This probe isn't enabled -- don't call the function.
 			 */
 			continue;
 		}
 
 		prov = probe->dtpr_provider;
 		func = *((void(**)(void *, dtrace_id_t, void *))
 		    ((uintptr_t)&prov->dtpv_pops + offs));
 
 		func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 #endif
 
 static int
 dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
 {
 	dtrace_probekey_t pkey;
 	uint32_t priv;
 	uid_t uid;
 	zoneid_t zoneid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	dtrace_ecb_create_cache = NULL;
 
 	if (desc == NULL) {
 		/*
 		 * If we're passed a NULL description, we're being asked to
 		 * create an ECB with a NULL probe.
 		 */
 		(void) dtrace_ecb_create_enable(NULL, enab);
 		return (0);
 	}
 
 	dtrace_probekey(desc, &pkey);
 	dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
 	    &priv, &uid, &zoneid);
 
 	return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
 	    enab));
 }
 
 /*
  * DTrace Helper Provider Functions
  */
 static void
 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
 {
 	attr->dtat_name = DOF_ATTR_NAME(dofattr);
 	attr->dtat_data = DOF_ATTR_DATA(dofattr);
 	attr->dtat_class = DOF_ATTR_CLASS(dofattr);
 }
 
 static void
 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
     const dof_provider_t *dofprov, char *strtab)
 {
 	hprov->dthpv_provname = strtab + dofprov->dofpv_name;
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
 	    dofprov->dofpv_provattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
 	    dofprov->dofpv_modattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
 	    dofprov->dofpv_funcattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
 	    dofprov->dofpv_nameattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
 	    dofprov->dofpv_argsattr);
 }
 
 static void
 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
 	dof_provider_t *provider;
 	dof_probe_t *probe;
 	uint32_t *off, *enoff;
 	uint8_t *arg;
 	char *strtab;
 	uint_t i, nprobes;
 	dtrace_helper_provdesc_t dhpv;
 	dtrace_helper_probedesc_t dhpb;
 	dtrace_meta_t *meta = dtrace_meta_pid;
 	dtrace_mops_t *mops = &meta->dtm_mops;
 	void *parg;
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_strtab * dof->dofh_secsize);
 	prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_probes * dof->dofh_secsize);
 	arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_prargs * dof->dofh_secsize);
 	off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_proffs * dof->dofh_secsize);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 	off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
 	arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
 	enoff = NULL;
 
 	/*
 	 * See dtrace_helper_provider_validate().
 	 */
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    provider->dofpv_prenoffs != DOF_SECT_NONE) {
 		enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 		    provider->dofpv_prenoffs * dof->dofh_secsize);
 		enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
 	}
 
 	nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
 
 	/*
 	 * Create the provider.
 	 */
 	dtrace_dofprov2hprov(&dhpv, provider, strtab);
 
 	if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
 		return;
 
 	meta->dtm_count++;
 
 	/*
 	 * Create the probes.
 	 */
 	for (i = 0; i < nprobes; i++) {
 		probe = (dof_probe_t *)(uintptr_t)(daddr +
 		    prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
 
 		/* See the check in dtrace_helper_provider_validate(). */
 		if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN)
 			continue;
 
 		dhpb.dthpb_mod = dhp->dofhp_mod;
 		dhpb.dthpb_func = strtab + probe->dofpr_func;
 		dhpb.dthpb_name = strtab + probe->dofpr_name;
 		dhpb.dthpb_base = probe->dofpr_addr;
 		dhpb.dthpb_offs = off + probe->dofpr_offidx;
 		dhpb.dthpb_noffs = probe->dofpr_noffs;
 		if (enoff != NULL) {
 			dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
 			dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
 		} else {
 			dhpb.dthpb_enoffs = NULL;
 			dhpb.dthpb_nenoffs = 0;
 		}
 		dhpb.dthpb_args = arg + probe->dofpr_argidx;
 		dhpb.dthpb_nargc = probe->dofpr_nargc;
 		dhpb.dthpb_xargc = probe->dofpr_xargc;
 		dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
 		dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
 
 		mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
 	}
 }
 
 static void
 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_meta_lock));
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		dtrace_helper_provide_one(dhp, sec, pid);
 	}
 
 	/*
 	 * We may have just created probes, so we must now rematch against
 	 * any retained enablings.  Note that this call will acquire both
 	 * cpu_lock and dtrace_lock; the fact that we are holding
 	 * dtrace_meta_lock now is what defines the ordering with respect to
 	 * these three locks.
 	 */
 	dtrace_enabling_matchall();
 }
 
 static void
 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	dof_sec_t *str_sec;
 	dof_provider_t *provider;
 	char *strtab;
 	dtrace_helper_provdesc_t dhpv;
 	dtrace_meta_t *meta = dtrace_meta_pid;
 	dtrace_mops_t *mops = &meta->dtm_mops;
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_strtab * dof->dofh_secsize);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 
 	/*
 	 * Create the provider.
 	 */
 	dtrace_dofprov2hprov(&dhpv, provider, strtab);
 
 	mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
 
 	meta->dtm_count--;
 }
 
 static void
 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_meta_lock));
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		dtrace_helper_provider_remove_one(dhp, sec, pid);
 	}
 }
 
 /*
  * DTrace Meta Provider-to-Framework API Functions
  *
  * These functions implement the Meta Provider-to-Framework API, as described
  * in <sys/dtrace.h>.
  */
 int
 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
     dtrace_meta_provider_id_t *idp)
 {
 	dtrace_meta_t *meta;
 	dtrace_helpers_t *help, *next;
 	int i;
 
 	*idp = DTRACE_METAPROVNONE;
 
 	/*
 	 * We strictly don't need the name, but we hold onto it for
 	 * debuggability. All hail error queues!
 	 */
 	if (name == NULL) {
 		cmn_err(CE_WARN, "failed to register meta-provider: "
 		    "invalid name");
 		return (EINVAL);
 	}
 
 	if (mops == NULL ||
 	    mops->dtms_create_probe == NULL ||
 	    mops->dtms_provide_pid == NULL ||
 	    mops->dtms_remove_pid == NULL) {
 		cmn_err(CE_WARN, "failed to register meta-register %s: "
 		    "invalid ops", name);
 		return (EINVAL);
 	}
 
 	meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
 	meta->dtm_mops = *mops;
 	meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(meta->dtm_name, name);
 	meta->dtm_arg = arg;
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (dtrace_meta_pid != NULL) {
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_meta_lock);
 		cmn_err(CE_WARN, "failed to register meta-register %s: "
 		    "user-land meta-provider exists", name);
 		kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
 		kmem_free(meta, sizeof (dtrace_meta_t));
 		return (EINVAL);
 	}
 
 	dtrace_meta_pid = meta;
 	*idp = (dtrace_meta_provider_id_t)meta;
 
 	/*
 	 * If there are providers and probes ready to go, pass them
 	 * off to the new meta provider now.
 	 */
 
 	help = dtrace_deferred_pid;
 	dtrace_deferred_pid = NULL;
 
 	mutex_exit(&dtrace_lock);
 
 	while (help != NULL) {
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
 			    help->dthps_pid);
 		}
 
 		next = help->dthps_next;
 		help->dthps_next = NULL;
 		help->dthps_prev = NULL;
 		help->dthps_deferred = 0;
 		help = next;
 	}
 
 	mutex_exit(&dtrace_meta_lock);
 
 	return (0);
 }
 
 int
 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
 {
 	dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (old == dtrace_meta_pid) {
 		pp = &dtrace_meta_pid;
 	} else {
 		panic("attempt to unregister non-existent "
 		    "dtrace meta-provider %p\n", (void *)old);
 	}
 
 	if (old->dtm_count != 0) {
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_meta_lock);
 		return (EBUSY);
 	}
 
 	*pp = NULL;
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_meta_lock);
 
 	kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
 	kmem_free(old, sizeof (dtrace_meta_t));
 
 	return (0);
 }
 
 
 /*
  * DTrace DIF Object Functions
  */
 static int
 dtrace_difo_err(uint_t pc, const char *format, ...)
 {
 	if (dtrace_err_verbose) {
 		va_list alist;
 
 		(void) uprintf("dtrace DIF object error: [%u]: ", pc);
 		va_start(alist, format);
 		(void) vuprintf(format, alist);
 		va_end(alist);
 	}
 
 #ifdef DTRACE_ERRDEBUG
 	dtrace_errdebug(format);
 #endif
 	return (1);
 }
 
 /*
  * Validate a DTrace DIF object by checking the IR instructions.  The following
  * rules are currently enforced by dtrace_difo_validate():
  *
  * 1. Each instruction must have a valid opcode
  * 2. Each register, string, variable, or subroutine reference must be valid
  * 3. No instruction can modify register %r0 (must be zero)
  * 4. All instruction reserved bits must be set to zero
  * 5. The last instruction must be a "ret" instruction
  * 6. All branch targets must reference a valid instruction _after_ the branch
  */
 static int
 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
     cred_t *cr)
 {
 	int err = 0, i;
 	int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
 	int kcheckload;
 	uint_t pc;
 	int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
 
 	kcheckload = cr == NULL ||
 	    (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
 
 	dp->dtdo_destructive = 0;
 
 	for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t r1 = DIF_INSTR_R1(instr);
 		uint_t r2 = DIF_INSTR_R2(instr);
 		uint_t rd = DIF_INSTR_RD(instr);
 		uint_t rs = DIF_INSTR_RS(instr);
 		uint_t label = DIF_INSTR_LABEL(instr);
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t subr = DIF_INSTR_SUBR(instr);
 		uint_t type = DIF_INSTR_TYPE(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_OR:
 		case DIF_OP_XOR:
 		case DIF_OP_AND:
 		case DIF_OP_SLL:
 		case DIF_OP_SRL:
 		case DIF_OP_SRA:
 		case DIF_OP_SUB:
 		case DIF_OP_ADD:
 		case DIF_OP_MUL:
 		case DIF_OP_SDIV:
 		case DIF_OP_UDIV:
 		case DIF_OP_SREM:
 		case DIF_OP_UREM:
 		case DIF_OP_COPYS:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_NOT:
 		case DIF_OP_MOV:
 		case DIF_OP_ALLOCS:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDSB:
 		case DIF_OP_LDSH:
 		case DIF_OP_LDSW:
 		case DIF_OP_LDUB:
 		case DIF_OP_LDUH:
 		case DIF_OP_LDUW:
 		case DIF_OP_LDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			if (kcheckload)
 				dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
 				    DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
 			break;
 		case DIF_OP_RLDSB:
 		case DIF_OP_RLDSH:
 		case DIF_OP_RLDSW:
 		case DIF_OP_RLDUB:
 		case DIF_OP_RLDUH:
 		case DIF_OP_RLDUW:
 		case DIF_OP_RLDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_ULDSB:
 		case DIF_OP_ULDSH:
 		case DIF_OP_ULDSW:
 		case DIF_OP_ULDUB:
 		case DIF_OP_ULDUH:
 		case DIF_OP_ULDUW:
 		case DIF_OP_ULDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_STB:
 		case DIF_OP_STH:
 		case DIF_OP_STW:
 		case DIF_OP_STX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to 0 address\n");
 			break;
 		case DIF_OP_CMP:
 		case DIF_OP_SCMP:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_TST:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0 || rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_BA:
 		case DIF_OP_BE:
 		case DIF_OP_BNE:
 		case DIF_OP_BG:
 		case DIF_OP_BGU:
 		case DIF_OP_BGE:
 		case DIF_OP_BGEU:
 		case DIF_OP_BL:
 		case DIF_OP_BLU:
 		case DIF_OP_BLE:
 		case DIF_OP_BLEU:
 			if (label >= dp->dtdo_len) {
 				err += efunc(pc, "invalid branch target %u\n",
 				    label);
 			}
 			if (label <= pc) {
 				err += efunc(pc, "backward branch to %u\n",
 				    label);
 			}
 			break;
 		case DIF_OP_RET:
 			if (r1 != 0 || r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			break;
 		case DIF_OP_NOP:
 		case DIF_OP_POPTS:
 		case DIF_OP_FLUSHTS:
 			if (r1 != 0 || r2 != 0 || rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_SETX:
 			if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
 				err += efunc(pc, "invalid integer ref %u\n",
 				    DIF_INSTR_INTEGER(instr));
 			}
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_SETS:
 			if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
 				err += efunc(pc, "invalid string ref %u\n",
 				    DIF_INSTR_STRING(instr));
 			}
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDGA:
 		case DIF_OP_LDTA:
 			if (r1 > DIF_VAR_ARRAY_MAX)
 				err += efunc(pc, "invalid array %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDGS:
 		case DIF_OP_LDTS:
 		case DIF_OP_LDLS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA:
 			if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
 				err += efunc(pc, "invalid variable %u\n", v);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_STGS:
 		case DIF_OP_STTS:
 		case DIF_OP_STLS:
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
 				err += efunc(pc, "invalid variable %u\n", v);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			break;
 		case DIF_OP_CALL:
 			if (subr > DIF_SUBR_MAX)
 				err += efunc(pc, "invalid subr %u\n", subr);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 
 			if (subr == DIF_SUBR_COPYOUT ||
 			    subr == DIF_SUBR_COPYOUTSTR) {
 				dp->dtdo_destructive = 1;
 			}
 
 			if (subr == DIF_SUBR_GETF) {
 				/*
 				 * If we have a getf() we need to record that
 				 * in our state.  Note that our state can be
 				 * NULL if this is a helper -- but in that
 				 * case, the call to getf() is itself illegal,
 				 * and will be caught (slightly later) when
 				 * the helper is validated.
 				 */
 				if (vstate->dtvs_state != NULL)
 					vstate->dtvs_state->dts_getf++;
 			}
 
 			break;
 		case DIF_OP_PUSHTR:
 			if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
 				err += efunc(pc, "invalid ref type %u\n", type);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rs);
 			break;
 		case DIF_OP_PUSHTV:
 			if (type != DIF_TYPE_CTF)
 				err += efunc(pc, "invalid val type %u\n", type);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rs);
 			break;
 		default:
 			err += efunc(pc, "invalid opcode %u\n",
 			    DIF_INSTR_OP(instr));
 		}
 	}
 
 	if (dp->dtdo_len != 0 &&
 	    DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
 		err += efunc(dp->dtdo_len - 1,
 		    "expected 'ret' as last DIF instruction\n");
 	}
 
 	if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
 		/*
 		 * If we're not returning by reference, the size must be either
 		 * 0 or the size of one of the base types.
 		 */
 		switch (dp->dtdo_rtype.dtdt_size) {
 		case 0:
 		case sizeof (uint8_t):
 		case sizeof (uint16_t):
 		case sizeof (uint32_t):
 		case sizeof (uint64_t):
 			break;
 
 		default:
 			err += efunc(dp->dtdo_len - 1, "bad return size\n");
 		}
 	}
 
 	for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
 		dtrace_diftype_t *vt, *et;
 		uint_t id, ndx;
 
 		if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
 		    v->dtdv_scope != DIFV_SCOPE_THREAD &&
 		    v->dtdv_scope != DIFV_SCOPE_LOCAL) {
 			err += efunc(i, "unrecognized variable scope %d\n",
 			    v->dtdv_scope);
 			break;
 		}
 
 		if (v->dtdv_kind != DIFV_KIND_ARRAY &&
 		    v->dtdv_kind != DIFV_KIND_SCALAR) {
 			err += efunc(i, "unrecognized variable type %d\n",
 			    v->dtdv_kind);
 			break;
 		}
 
 		if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
 			err += efunc(i, "%d exceeds variable id limit\n", id);
 			break;
 		}
 
 		if (id < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		/*
 		 * For user-defined variables, we need to check that this
 		 * definition is identical to any previous definition that we
 		 * encountered.
 		 */
 		ndx = id - DIF_VAR_OTHER_UBASE;
 
 		switch (v->dtdv_scope) {
 		case DIFV_SCOPE_GLOBAL:
 			if (maxglobal == -1 || ndx > maxglobal)
 				maxglobal = ndx;
 
 			if (ndx < vstate->dtvs_nglobals) {
 				dtrace_statvar_t *svar;
 
 				if ((svar = vstate->dtvs_globals[ndx]) != NULL)
 					existing = &svar->dtsv_var;
 			}
 
 			break;
 
 		case DIFV_SCOPE_THREAD:
 			if (maxtlocal == -1 || ndx > maxtlocal)
 				maxtlocal = ndx;
 
 			if (ndx < vstate->dtvs_ntlocals)
 				existing = &vstate->dtvs_tlocals[ndx];
 			break;
 
 		case DIFV_SCOPE_LOCAL:
 			if (maxlocal == -1 || ndx > maxlocal)
 				maxlocal = ndx;
 
 			if (ndx < vstate->dtvs_nlocals) {
 				dtrace_statvar_t *svar;
 
 				if ((svar = vstate->dtvs_locals[ndx]) != NULL)
 					existing = &svar->dtsv_var;
 			}
 
 			break;
 		}
 
 		vt = &v->dtdv_type;
 
 		if (vt->dtdt_flags & DIF_TF_BYREF) {
 			if (vt->dtdt_size == 0) {
 				err += efunc(i, "zero-sized variable\n");
 				break;
 			}
 
 			if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
 			    v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
 			    vt->dtdt_size > dtrace_statvar_maxsize) {
 				err += efunc(i, "oversized by-ref static\n");
 				break;
 			}
 		}
 
 		if (existing == NULL || existing->dtdv_id == 0)
 			continue;
 
 		ASSERT(existing->dtdv_id == v->dtdv_id);
 		ASSERT(existing->dtdv_scope == v->dtdv_scope);
 
 		if (existing->dtdv_kind != v->dtdv_kind)
 			err += efunc(i, "%d changed variable kind\n", id);
 
 		et = &existing->dtdv_type;
 
 		if (vt->dtdt_flags != et->dtdt_flags) {
 			err += efunc(i, "%d changed variable type flags\n", id);
 			break;
 		}
 
 		if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
 			err += efunc(i, "%d changed variable type size\n", id);
 			break;
 		}
 	}
 
 	for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_LDGS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_STGS:
 		case DIF_OP_STGAA:
 			if (v > DIF_VAR_OTHER_UBASE + maxglobal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		case DIF_OP_LDTS:
 		case DIF_OP_LDTAA:
 		case DIF_OP_STTS:
 		case DIF_OP_STTAA:
 			if (v > DIF_VAR_OTHER_UBASE + maxtlocal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		case DIF_OP_LDLS:
 		case DIF_OP_STLS:
 			if (v > DIF_VAR_OTHER_UBASE + maxlocal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		default:
 			break;
 		}
 	}
 
 	return (err);
 }
 
 /*
  * Validate a DTrace DIF object that it is to be used as a helper.  Helpers
  * are much more constrained than normal DIFOs.  Specifically, they may
  * not:
  *
  * 1. Make calls to subroutines other than copyin(), copyinstr() or
  *    miscellaneous string routines
  * 2. Access DTrace variables other than the args[] array, and the
  *    curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
  * 3. Have thread-local variables.
  * 4. Have dynamic variables.
  */
 static int
 dtrace_difo_validate_helper(dtrace_difo_t *dp)
 {
 	int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
 	int err = 0;
 	uint_t pc;
 
 	for (pc = 0; pc < dp->dtdo_len; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t subr = DIF_INSTR_SUBR(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_OR:
 		case DIF_OP_XOR:
 		case DIF_OP_AND:
 		case DIF_OP_SLL:
 		case DIF_OP_SRL:
 		case DIF_OP_SRA:
 		case DIF_OP_SUB:
 		case DIF_OP_ADD:
 		case DIF_OP_MUL:
 		case DIF_OP_SDIV:
 		case DIF_OP_UDIV:
 		case DIF_OP_SREM:
 		case DIF_OP_UREM:
 		case DIF_OP_COPYS:
 		case DIF_OP_NOT:
 		case DIF_OP_MOV:
 		case DIF_OP_RLDSB:
 		case DIF_OP_RLDSH:
 		case DIF_OP_RLDSW:
 		case DIF_OP_RLDUB:
 		case DIF_OP_RLDUH:
 		case DIF_OP_RLDUW:
 		case DIF_OP_RLDX:
 		case DIF_OP_ULDSB:
 		case DIF_OP_ULDSH:
 		case DIF_OP_ULDSW:
 		case DIF_OP_ULDUB:
 		case DIF_OP_ULDUH:
 		case DIF_OP_ULDUW:
 		case DIF_OP_ULDX:
 		case DIF_OP_STB:
 		case DIF_OP_STH:
 		case DIF_OP_STW:
 		case DIF_OP_STX:
 		case DIF_OP_ALLOCS:
 		case DIF_OP_CMP:
 		case DIF_OP_SCMP:
 		case DIF_OP_TST:
 		case DIF_OP_BA:
 		case DIF_OP_BE:
 		case DIF_OP_BNE:
 		case DIF_OP_BG:
 		case DIF_OP_BGU:
 		case DIF_OP_BGE:
 		case DIF_OP_BGEU:
 		case DIF_OP_BL:
 		case DIF_OP_BLU:
 		case DIF_OP_BLE:
 		case DIF_OP_BLEU:
 		case DIF_OP_RET:
 		case DIF_OP_NOP:
 		case DIF_OP_POPTS:
 		case DIF_OP_FLUSHTS:
 		case DIF_OP_SETX:
 		case DIF_OP_SETS:
 		case DIF_OP_LDGA:
 		case DIF_OP_LDLS:
 		case DIF_OP_STGS:
 		case DIF_OP_STLS:
 		case DIF_OP_PUSHTR:
 		case DIF_OP_PUSHTV:
 			break;
 
 		case DIF_OP_LDGS:
 			if (v >= DIF_VAR_OTHER_UBASE)
 				break;
 
 			if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
 				break;
 
 			if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
 			    v == DIF_VAR_PPID || v == DIF_VAR_TID ||
 			    v == DIF_VAR_EXECARGS ||
 			    v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
 			    v == DIF_VAR_UID || v == DIF_VAR_GID)
 				break;
 
 			err += efunc(pc, "illegal variable %u\n", v);
 			break;
 
 		case DIF_OP_LDTA:
 		case DIF_OP_LDTS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA:
 			err += efunc(pc, "illegal dynamic variable load\n");
 			break;
 
 		case DIF_OP_STTS:
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			err += efunc(pc, "illegal dynamic variable store\n");
 			break;
 
 		case DIF_OP_CALL:
 			if (subr == DIF_SUBR_ALLOCA ||
 			    subr == DIF_SUBR_BCOPY ||
 			    subr == DIF_SUBR_COPYIN ||
 			    subr == DIF_SUBR_COPYINTO ||
 			    subr == DIF_SUBR_COPYINSTR ||
 			    subr == DIF_SUBR_INDEX ||
 			    subr == DIF_SUBR_INET_NTOA ||
 			    subr == DIF_SUBR_INET_NTOA6 ||
 			    subr == DIF_SUBR_INET_NTOP ||
 			    subr == DIF_SUBR_JSON ||
 			    subr == DIF_SUBR_LLTOSTR ||
 			    subr == DIF_SUBR_STRTOLL ||
 			    subr == DIF_SUBR_RINDEX ||
 			    subr == DIF_SUBR_STRCHR ||
 			    subr == DIF_SUBR_STRJOIN ||
 			    subr == DIF_SUBR_STRRCHR ||
 			    subr == DIF_SUBR_STRSTR ||
 			    subr == DIF_SUBR_HTONS ||
 			    subr == DIF_SUBR_HTONL ||
 			    subr == DIF_SUBR_HTONLL ||
 			    subr == DIF_SUBR_NTOHS ||
 			    subr == DIF_SUBR_NTOHL ||
 			    subr == DIF_SUBR_NTOHLL ||
 			    subr == DIF_SUBR_MEMREF)
 				break;
 #ifdef __FreeBSD__
 			if (subr == DIF_SUBR_MEMSTR)
 				break;
 #endif
 
 			err += efunc(pc, "invalid subr %u\n", subr);
 			break;
 
 		default:
 			err += efunc(pc, "invalid opcode %u\n",
 			    DIF_INSTR_OP(instr));
 		}
 	}
 
 	return (err);
 }
 
 /*
  * Returns 1 if the expression in the DIF object can be cached on a per-thread
  * basis; 0 if not.
  */
 static int
 dtrace_difo_cacheable(dtrace_difo_t *dp)
 {
 	int i;
 
 	if (dp == NULL)
 		return (0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
 			continue;
 
 		switch (v->dtdv_id) {
 		case DIF_VAR_CURTHREAD:
 		case DIF_VAR_PID:
 		case DIF_VAR_TID:
 		case DIF_VAR_EXECARGS:
 		case DIF_VAR_EXECNAME:
 		case DIF_VAR_ZONENAME:
 			break;
 
 		default:
 			return (0);
 		}
 	}
 
 	/*
 	 * This DIF object may be cacheable.  Now we need to look for any
 	 * array loading instructions, any memory loading instructions, or
 	 * any stores to thread-local variables.
 	 */
 	for (i = 0; i < dp->dtdo_len; i++) {
 		uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
 
 		if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
 		    (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
 		    (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
 		    op == DIF_OP_LDGA || op == DIF_OP_STTS)
 			return (0);
 	}
 
 	return (1);
 }
 
 static void
 dtrace_difo_hold(dtrace_difo_t *dp)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	dp->dtdo_refcnt++;
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	/*
 	 * We need to check this DIF object for references to the variable
 	 * DIF_VAR_VTIMESTAMP.
 	 */
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
 			continue;
 
 		if (dtrace_vtime_references++ == 0)
 			dtrace_vtime_enable();
 	}
 }
 
 /*
  * This routine calculates the dynamic variable chunksize for a given DIF
  * object.  The calculation is not fool-proof, and can probably be tricked by
  * malicious DIF -- but it works for all compiler-generated DIF.  Because this
  * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
  * if a dynamic variable size exceeds the chunksize.
  */
 static void
 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	uint64_t sval = 0;
 	dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
 	const dif_instr_t *text = dp->dtdo_buf;
 	uint_t pc, srd = 0;
 	uint_t ttop = 0;
 	size_t size, ksize;
 	uint_t id, i;
 
 	for (pc = 0; pc < dp->dtdo_len; pc++) {
 		dif_instr_t instr = text[pc];
 		uint_t op = DIF_INSTR_OP(instr);
 		uint_t rd = DIF_INSTR_RD(instr);
 		uint_t r1 = DIF_INSTR_R1(instr);
 		uint_t nkeys = 0;
 		uchar_t scope = 0;
 
 		dtrace_key_t *key = tupregs;
 
 		switch (op) {
 		case DIF_OP_SETX:
 			sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
 			srd = rd;
 			continue;
 
 		case DIF_OP_STTS:
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_size = 0;
 			key[1].dttk_size = 0;
 			nkeys = 2;
 			scope = DIFV_SCOPE_THREAD;
 			break;
 
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			nkeys = ttop;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
 				key[nkeys++].dttk_size = 0;
 
 			key[nkeys++].dttk_size = 0;
 
 			if (op == DIF_OP_STTAA) {
 				scope = DIFV_SCOPE_THREAD;
 			} else {
 				scope = DIFV_SCOPE_GLOBAL;
 			}
 
 			break;
 
 		case DIF_OP_PUSHTR:
 			if (ttop == DIF_DTR_NREGS)
 				return;
 
 			if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
 				/*
 				 * If the register for the size of the "pushtr"
 				 * is %r0 (or the value is 0) and the type is
 				 * a string, we'll use the system-wide default
 				 * string size.
 				 */
 				tupregs[ttop++].dttk_size =
 				    dtrace_strsize_default;
 			} else {
 				if (srd == 0)
 					return;
 
 				if (sval > LONG_MAX)
 					return;
 
 				tupregs[ttop++].dttk_size = sval;
 			}
 
 			break;
 
 		case DIF_OP_PUSHTV:
 			if (ttop == DIF_DTR_NREGS)
 				return;
 
 			tupregs[ttop++].dttk_size = 0;
 			break;
 
 		case DIF_OP_FLUSHTS:
 			ttop = 0;
 			break;
 
 		case DIF_OP_POPTS:
 			if (ttop != 0)
 				ttop--;
 			break;
 		}
 
 		sval = 0;
 		srd = 0;
 
 		if (nkeys == 0)
 			continue;
 
 		/*
 		 * We have a dynamic variable allocation; calculate its size.
 		 */
 		for (ksize = 0, i = 0; i < nkeys; i++)
 			ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
 
 		size = sizeof (dtrace_dynvar_t);
 		size += sizeof (dtrace_key_t) * (nkeys - 1);
 		size += ksize;
 
 		/*
 		 * Now we need to determine the size of the stored data.
 		 */
 		id = DIF_INSTR_VAR(instr);
 
 		for (i = 0; i < dp->dtdo_varlen; i++) {
 			dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 			if (v->dtdv_id == id && v->dtdv_scope == scope) {
 				size += v->dtdv_type.dtdt_size;
 				break;
 			}
 		}
 
 		if (i == dp->dtdo_varlen)
 			return;
 
 		/*
 		 * We have the size.  If this is larger than the chunk size
 		 * for our dynamic variable state, reset the chunk size.
 		 */
 		size = P2ROUNDUP(size, sizeof (uint64_t));
 
 		/*
 		 * Before setting the chunk size, check that we're not going
 		 * to set it to a negative value...
 		 */
 		if (size > LONG_MAX)
 			return;
 
 		/*
 		 * ...and make certain that we didn't badly overflow.
 		 */
 		if (size < ksize || size < sizeof (dtrace_dynvar_t))
 			return;
 
 		if (size > vstate->dtvs_dynvars.dtds_chunksize)
 			vstate->dtvs_dynvars.dtds_chunksize = size;
 	}
 }
 
 static void
 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i, oldsvars, osz, nsz, otlocals, ntlocals;
 	uint_t id;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_statvar_t *svar, ***svarp = NULL;
 		size_t dsize = 0;
 		uint8_t scope = v->dtdv_scope;
 		int *np = NULL;
 
 		if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		id -= DIF_VAR_OTHER_UBASE;
 
 		switch (scope) {
 		case DIFV_SCOPE_THREAD:
 			while (id >= (otlocals = vstate->dtvs_ntlocals)) {
 				dtrace_difv_t *tlocals;
 
 				if ((ntlocals = (otlocals << 1)) == 0)
 					ntlocals = 1;
 
 				osz = otlocals * sizeof (dtrace_difv_t);
 				nsz = ntlocals * sizeof (dtrace_difv_t);
 
 				tlocals = kmem_zalloc(nsz, KM_SLEEP);
 
 				if (osz != 0) {
 					bcopy(vstate->dtvs_tlocals,
 					    tlocals, osz);
 					kmem_free(vstate->dtvs_tlocals, osz);
 				}
 
 				vstate->dtvs_tlocals = tlocals;
 				vstate->dtvs_ntlocals = ntlocals;
 			}
 
 			vstate->dtvs_tlocals[id] = *v;
 			continue;
 
 		case DIFV_SCOPE_LOCAL:
 			np = &vstate->dtvs_nlocals;
 			svarp = &vstate->dtvs_locals;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
 				dsize = NCPU * (v->dtdv_type.dtdt_size +
 				    sizeof (uint64_t));
 			else
 				dsize = NCPU * sizeof (uint64_t);
 
 			break;
 
 		case DIFV_SCOPE_GLOBAL:
 			np = &vstate->dtvs_nglobals;
 			svarp = &vstate->dtvs_globals;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
 				dsize = v->dtdv_type.dtdt_size +
 				    sizeof (uint64_t);
 
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		while (id >= (oldsvars = *np)) {
 			dtrace_statvar_t **statics;
 			int newsvars, oldsize, newsize;
 
 			if ((newsvars = (oldsvars << 1)) == 0)
 				newsvars = 1;
 
 			oldsize = oldsvars * sizeof (dtrace_statvar_t *);
 			newsize = newsvars * sizeof (dtrace_statvar_t *);
 
 			statics = kmem_zalloc(newsize, KM_SLEEP);
 
 			if (oldsize != 0) {
 				bcopy(*svarp, statics, oldsize);
 				kmem_free(*svarp, oldsize);
 			}
 
 			*svarp = statics;
 			*np = newsvars;
 		}
 
 		if ((svar = (*svarp)[id]) == NULL) {
 			svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
 			svar->dtsv_var = *v;
 
 			if ((svar->dtsv_size = dsize) != 0) {
 				svar->dtsv_data = (uint64_t)(uintptr_t)
 				    kmem_zalloc(dsize, KM_SLEEP);
 			}
 
 			(*svarp)[id] = svar;
 		}
 
 		svar->dtsv_refcnt++;
 	}
 
 	dtrace_difo_chunksize(dp, vstate);
 	dtrace_difo_hold(dp);
 }
 
 static dtrace_difo_t *
 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	dtrace_difo_t *new;
 	size_t sz;
 
 	ASSERT(dp->dtdo_buf != NULL);
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
 
 	ASSERT(dp->dtdo_buf != NULL);
 	sz = dp->dtdo_len * sizeof (dif_instr_t);
 	new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
 	bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
 	new->dtdo_len = dp->dtdo_len;
 
 	if (dp->dtdo_strtab != NULL) {
 		ASSERT(dp->dtdo_strlen != 0);
 		new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
 		bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
 		new->dtdo_strlen = dp->dtdo_strlen;
 	}
 
 	if (dp->dtdo_inttab != NULL) {
 		ASSERT(dp->dtdo_intlen != 0);
 		sz = dp->dtdo_intlen * sizeof (uint64_t);
 		new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
 		bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
 		new->dtdo_intlen = dp->dtdo_intlen;
 	}
 
 	if (dp->dtdo_vartab != NULL) {
 		ASSERT(dp->dtdo_varlen != 0);
 		sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
 		new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
 		bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
 		new->dtdo_varlen = dp->dtdo_varlen;
 	}
 
 	dtrace_difo_init(new, vstate);
 	return (new);
 }
 
 static void
 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i;
 
 	ASSERT(dp->dtdo_refcnt == 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_statvar_t *svar, **svarp = NULL;
 		uint_t id;
 		uint8_t scope = v->dtdv_scope;
 		int *np = NULL;
 
 		switch (scope) {
 		case DIFV_SCOPE_THREAD:
 			continue;
 
 		case DIFV_SCOPE_LOCAL:
 			np = &vstate->dtvs_nlocals;
 			svarp = vstate->dtvs_locals;
 			break;
 
 		case DIFV_SCOPE_GLOBAL:
 			np = &vstate->dtvs_nglobals;
 			svarp = vstate->dtvs_globals;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		id -= DIF_VAR_OTHER_UBASE;
 		ASSERT(id < *np);
 
 		svar = svarp[id];
 		ASSERT(svar != NULL);
 		ASSERT(svar->dtsv_refcnt > 0);
 
 		if (--svar->dtsv_refcnt > 0)
 			continue;
 
 		if (svar->dtsv_size != 0) {
 			ASSERT(svar->dtsv_data != 0);
 			kmem_free((void *)(uintptr_t)svar->dtsv_data,
 			    svar->dtsv_size);
 		}
 
 		kmem_free(svar, sizeof (dtrace_statvar_t));
 		svarp[id] = NULL;
 	}
 
 	if (dp->dtdo_buf != NULL)
 		kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
 	if (dp->dtdo_inttab != NULL)
 		kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
 	if (dp->dtdo_strtab != NULL)
 		kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
 	if (dp->dtdo_vartab != NULL)
 		kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
 
 	kmem_free(dp, sizeof (dtrace_difo_t));
 }
 
 static void
 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
 			continue;
 
 		ASSERT(dtrace_vtime_references > 0);
 		if (--dtrace_vtime_references == 0)
 			dtrace_vtime_disable();
 	}
 
 	if (--dp->dtdo_refcnt == 0)
 		dtrace_difo_destroy(dp, vstate);
 }
 
 /*
  * DTrace Format Functions
  */
 static uint16_t
 dtrace_format_add(dtrace_state_t *state, char *str)
 {
 	char *fmt, **new;
 	uint16_t ndx, len = strlen(str) + 1;
 
 	fmt = kmem_zalloc(len, KM_SLEEP);
 	bcopy(str, fmt, len);
 
 	for (ndx = 0; ndx < state->dts_nformats; ndx++) {
 		if (state->dts_formats[ndx] == NULL) {
 			state->dts_formats[ndx] = fmt;
 			return (ndx + 1);
 		}
 	}
 
 	if (state->dts_nformats == USHRT_MAX) {
 		/*
 		 * This is only likely if a denial-of-service attack is being
 		 * attempted.  As such, it's okay to fail silently here.
 		 */
 		kmem_free(fmt, len);
 		return (0);
 	}
 
 	/*
 	 * For simplicity, we always resize the formats array to be exactly the
 	 * number of formats.
 	 */
 	ndx = state->dts_nformats++;
 	new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
 
 	if (state->dts_formats != NULL) {
 		ASSERT(ndx != 0);
 		bcopy(state->dts_formats, new, ndx * sizeof (char *));
 		kmem_free(state->dts_formats, ndx * sizeof (char *));
 	}
 
 	state->dts_formats = new;
 	state->dts_formats[ndx] = fmt;
 
 	return (ndx + 1);
 }
 
 static void
 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
 {
 	char *fmt;
 
 	ASSERT(state->dts_formats != NULL);
 	ASSERT(format <= state->dts_nformats);
 	ASSERT(state->dts_formats[format - 1] != NULL);
 
 	fmt = state->dts_formats[format - 1];
 	kmem_free(fmt, strlen(fmt) + 1);
 	state->dts_formats[format - 1] = NULL;
 }
 
 static void
 dtrace_format_destroy(dtrace_state_t *state)
 {
 	int i;
 
 	if (state->dts_nformats == 0) {
 		ASSERT(state->dts_formats == NULL);
 		return;
 	}
 
 	ASSERT(state->dts_formats != NULL);
 
 	for (i = 0; i < state->dts_nformats; i++) {
 		char *fmt = state->dts_formats[i];
 
 		if (fmt == NULL)
 			continue;
 
 		kmem_free(fmt, strlen(fmt) + 1);
 	}
 
 	kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
 	state->dts_nformats = 0;
 	state->dts_formats = NULL;
 }
 
 /*
  * DTrace Predicate Functions
  */
 static dtrace_predicate_t *
 dtrace_predicate_create(dtrace_difo_t *dp)
 {
 	dtrace_predicate_t *pred;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
 	pred->dtp_difo = dp;
 	pred->dtp_refcnt = 1;
 
 	if (!dtrace_difo_cacheable(dp))
 		return (pred);
 
 	if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
 		/*
 		 * This is only theoretically possible -- we have had 2^32
 		 * cacheable predicates on this machine.  We cannot allow any
 		 * more predicates to become cacheable:  as unlikely as it is,
 		 * there may be a thread caching a (now stale) predicate cache
 		 * ID. (N.B.: the temptation is being successfully resisted to
 		 * have this cmn_err() "Holy shit -- we executed this code!")
 		 */
 		return (pred);
 	}
 
 	pred->dtp_cacheid = dtrace_predcache_id++;
 
 	return (pred);
 }
 
 static void
 dtrace_predicate_hold(dtrace_predicate_t *pred)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
 	ASSERT(pred->dtp_refcnt > 0);
 
 	pred->dtp_refcnt++;
 }
 
 static void
 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
 {
 	dtrace_difo_t *dp = pred->dtp_difo;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
 	ASSERT(pred->dtp_refcnt > 0);
 
 	if (--pred->dtp_refcnt == 0) {
 		dtrace_difo_release(pred->dtp_difo, vstate);
 		kmem_free(pred, sizeof (dtrace_predicate_t));
 	}
 }
 
 /*
  * DTrace Action Description Functions
  */
 static dtrace_actdesc_t *
 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
     uint64_t uarg, uint64_t arg)
 {
 	dtrace_actdesc_t *act;
 
 #ifdef illumos
 	ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
 	    arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
 #endif
 
 	act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
 	act->dtad_kind = kind;
 	act->dtad_ntuple = ntuple;
 	act->dtad_uarg = uarg;
 	act->dtad_arg = arg;
 	act->dtad_refcnt = 1;
 
 	return (act);
 }
 
 static void
 dtrace_actdesc_hold(dtrace_actdesc_t *act)
 {
 	ASSERT(act->dtad_refcnt >= 1);
 	act->dtad_refcnt++;
 }
 
 static void
 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
 {
 	dtrace_actkind_t kind = act->dtad_kind;
 	dtrace_difo_t *dp;
 
 	ASSERT(act->dtad_refcnt >= 1);
 
 	if (--act->dtad_refcnt != 0)
 		return;
 
 	if ((dp = act->dtad_difo) != NULL)
 		dtrace_difo_release(dp, vstate);
 
 	if (DTRACEACT_ISPRINTFLIKE(kind)) {
 		char *str = (char *)(uintptr_t)act->dtad_arg;
 
 #ifdef illumos
 		ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
 		    (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
 #endif
 
 		if (str != NULL)
 			kmem_free(str, strlen(str) + 1);
 	}
 
 	kmem_free(act, sizeof (dtrace_actdesc_t));
 }
 
 /*
  * DTrace ECB Functions
  */
 static dtrace_ecb_t *
 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_epid_t epid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
 	ecb->dte_predicate = NULL;
 	ecb->dte_probe = probe;
 
 	/*
 	 * The default size is the size of the default action: recording
 	 * the header.
 	 */
 	ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
 	ecb->dte_alignment = sizeof (dtrace_epid_t);
 
 	epid = state->dts_epid++;
 
 	if (epid - 1 >= state->dts_necbs) {
 		dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
 		int necbs = state->dts_necbs << 1;
 
 		ASSERT(epid == state->dts_necbs + 1);
 
 		if (necbs == 0) {
 			ASSERT(oecbs == NULL);
 			necbs = 1;
 		}
 
 		ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
 
 		if (oecbs != NULL)
 			bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
 
 		dtrace_membar_producer();
 		state->dts_ecbs = ecbs;
 
 		if (oecbs != NULL) {
 			/*
 			 * If this state is active, we must dtrace_sync()
 			 * before we can free the old dts_ecbs array:  we're
 			 * coming in hot, and there may be active ring
 			 * buffer processing (which indexes into the dts_ecbs
 			 * array) on another CPU.
 			 */
 			if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 				dtrace_sync();
 
 			kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
 		}
 
 		dtrace_membar_producer();
 		state->dts_necbs = necbs;
 	}
 
 	ecb->dte_state = state;
 
 	ASSERT(state->dts_ecbs[epid - 1] == NULL);
 	dtrace_membar_producer();
 	state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
 
 	return (ecb);
 }
 
 static void
 dtrace_ecb_enable(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_next == NULL);
 
 	if (probe == NULL) {
 		/*
 		 * This is the NULL probe -- there's nothing to do.
 		 */
 		return;
 	}
 
 	if (probe->dtpr_ecb == NULL) {
 		dtrace_provider_t *prov = probe->dtpr_provider;
 
 		/*
 		 * We're the first ECB on this probe.
 		 */
 		probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
 
 		if (ecb->dte_predicate != NULL)
 			probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
 
 		prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 	} else {
 		/*
 		 * This probe is already active.  Swing the last pointer to
 		 * point to the new ECB, and issue a dtrace_sync() to assure
 		 * that all CPUs have seen the change.
 		 */
 		ASSERT(probe->dtpr_ecb_last != NULL);
 		probe->dtpr_ecb_last->dte_next = ecb;
 		probe->dtpr_ecb_last = ecb;
 		probe->dtpr_predcache = 0;
 
 		dtrace_sync();
 	}
 }
 
 static int
 dtrace_ecb_resize(dtrace_ecb_t *ecb)
 {
 	dtrace_action_t *act;
 	uint32_t curneeded = UINT32_MAX;
 	uint32_t aggbase = UINT32_MAX;
 
 	/*
 	 * If we record anything, we always record the dtrace_rechdr_t.  (And
 	 * we always record it first.)
 	 */
 	ecb->dte_size = sizeof (dtrace_rechdr_t);
 	ecb->dte_alignment = sizeof (dtrace_epid_t);
 
 	for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 		dtrace_recdesc_t *rec = &act->dta_rec;
 		ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
 
 		ecb->dte_alignment = MAX(ecb->dte_alignment,
 		    rec->dtrd_alignment);
 
 		if (DTRACEACT_ISAGG(act->dta_kind)) {
 			dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
 
 			ASSERT(rec->dtrd_size != 0);
 			ASSERT(agg->dtag_first != NULL);
 			ASSERT(act->dta_prev->dta_intuple);
 			ASSERT(aggbase != UINT32_MAX);
 			ASSERT(curneeded != UINT32_MAX);
 
 			agg->dtag_base = aggbase;
 
 			curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
 			rec->dtrd_offset = curneeded;
 			if (curneeded + rec->dtrd_size < curneeded)
 				return (EINVAL);
 			curneeded += rec->dtrd_size;
 			ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
 
 			aggbase = UINT32_MAX;
 			curneeded = UINT32_MAX;
 		} else if (act->dta_intuple) {
 			if (curneeded == UINT32_MAX) {
 				/*
 				 * This is the first record in a tuple.  Align
 				 * curneeded to be at offset 4 in an 8-byte
 				 * aligned block.
 				 */
 				ASSERT(act->dta_prev == NULL ||
 				    !act->dta_prev->dta_intuple);
 				ASSERT3U(aggbase, ==, UINT32_MAX);
 				curneeded = P2PHASEUP(ecb->dte_size,
 				    sizeof (uint64_t), sizeof (dtrace_aggid_t));
 
 				aggbase = curneeded - sizeof (dtrace_aggid_t);
 				ASSERT(IS_P2ALIGNED(aggbase,
 				    sizeof (uint64_t)));
 			}
 			curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
 			rec->dtrd_offset = curneeded;
 			if (curneeded + rec->dtrd_size < curneeded)
 				return (EINVAL);
 			curneeded += rec->dtrd_size;
 		} else {
 			/* tuples must be followed by an aggregation */
 			ASSERT(act->dta_prev == NULL ||
 			    !act->dta_prev->dta_intuple);
 
 			ecb->dte_size = P2ROUNDUP(ecb->dte_size,
 			    rec->dtrd_alignment);
 			rec->dtrd_offset = ecb->dte_size;
 			if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
 				return (EINVAL);
 			ecb->dte_size += rec->dtrd_size;
 			ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
 		}
 	}
 
 	if ((act = ecb->dte_action) != NULL &&
 	    !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
 	    ecb->dte_size == sizeof (dtrace_rechdr_t)) {
 		/*
 		 * If the size is still sizeof (dtrace_rechdr_t), then all
 		 * actions store no data; set the size to 0.
 		 */
 		ecb->dte_size = 0;
 	}
 
 	ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
 	ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
 	ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
 	    ecb->dte_needed);
 	return (0);
 }
 
 static dtrace_action_t *
 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 {
 	dtrace_aggregation_t *agg;
 	size_t size = sizeof (uint64_t);
 	int ntuple = desc->dtad_ntuple;
 	dtrace_action_t *act;
 	dtrace_recdesc_t *frec;
 	dtrace_aggid_t aggid;
 	dtrace_state_t *state = ecb->dte_state;
 
 	agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
 	agg->dtag_ecb = ecb;
 
 	ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
 
 	switch (desc->dtad_kind) {
 	case DTRACEAGG_MIN:
 		agg->dtag_initial = INT64_MAX;
 		agg->dtag_aggregate = dtrace_aggregate_min;
 		break;
 
 	case DTRACEAGG_MAX:
 		agg->dtag_initial = INT64_MIN;
 		agg->dtag_aggregate = dtrace_aggregate_max;
 		break;
 
 	case DTRACEAGG_COUNT:
 		agg->dtag_aggregate = dtrace_aggregate_count;
 		break;
 
 	case DTRACEAGG_QUANTIZE:
 		agg->dtag_aggregate = dtrace_aggregate_quantize;
 		size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
 		    sizeof (uint64_t);
 		break;
 
 	case DTRACEAGG_LQUANTIZE: {
 		uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
 		uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
 
 		agg->dtag_initial = desc->dtad_arg;
 		agg->dtag_aggregate = dtrace_aggregate_lquantize;
 
 		if (step == 0 || levels == 0)
 			goto err;
 
 		size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
 		break;
 	}
 
 	case DTRACEAGG_LLQUANTIZE: {
 		uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
 		uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
 		uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
 		uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
 		int64_t v;
 
 		agg->dtag_initial = desc->dtad_arg;
 		agg->dtag_aggregate = dtrace_aggregate_llquantize;
 
 		if (factor < 2 || low >= high || nsteps < factor)
 			goto err;
 
 		/*
 		 * Now check that the number of steps evenly divides a power
 		 * of the factor.  (This assures both integer bucket size and
 		 * linearity within each magnitude.)
 		 */
 		for (v = factor; v < nsteps; v *= factor)
 			continue;
 
 		if ((v % nsteps) || (nsteps % factor))
 			goto err;
 
 		size = (dtrace_aggregate_llquantize_bucket(factor,
 		    low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
 		break;
 	}
 
 	case DTRACEAGG_AVG:
 		agg->dtag_aggregate = dtrace_aggregate_avg;
 		size = sizeof (uint64_t) * 2;
 		break;
 
 	case DTRACEAGG_STDDEV:
 		agg->dtag_aggregate = dtrace_aggregate_stddev;
 		size = sizeof (uint64_t) * 4;
 		break;
 
 	case DTRACEAGG_SUM:
 		agg->dtag_aggregate = dtrace_aggregate_sum;
 		break;
 
 	default:
 		goto err;
 	}
 
 	agg->dtag_action.dta_rec.dtrd_size = size;
 
 	if (ntuple == 0)
 		goto err;
 
 	/*
 	 * We must make sure that we have enough actions for the n-tuple.
 	 */
 	for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
 		if (DTRACEACT_ISAGG(act->dta_kind))
 			break;
 
 		if (--ntuple == 0) {
 			/*
 			 * This is the action with which our n-tuple begins.
 			 */
 			agg->dtag_first = act;
 			goto success;
 		}
 	}
 
 	/*
 	 * This n-tuple is short by ntuple elements.  Return failure.
 	 */
 	ASSERT(ntuple != 0);
 err:
 	kmem_free(agg, sizeof (dtrace_aggregation_t));
 	return (NULL);
 
 success:
 	/*
 	 * If the last action in the tuple has a size of zero, it's actually
 	 * an expression argument for the aggregating action.
 	 */
 	ASSERT(ecb->dte_action_last != NULL);
 	act = ecb->dte_action_last;
 
 	if (act->dta_kind == DTRACEACT_DIFEXPR) {
 		ASSERT(act->dta_difo != NULL);
 
 		if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
 			agg->dtag_hasarg = 1;
 	}
 
 	/*
 	 * We need to allocate an id for this aggregation.
 	 */
 #ifdef illumos
 	aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
 	    VM_BESTFIT | VM_SLEEP);
 #else
 	aggid = alloc_unr(state->dts_aggid_arena);
 #endif
 
 	if (aggid - 1 >= state->dts_naggregations) {
 		dtrace_aggregation_t **oaggs = state->dts_aggregations;
 		dtrace_aggregation_t **aggs;
 		int naggs = state->dts_naggregations << 1;
 		int onaggs = state->dts_naggregations;
 
 		ASSERT(aggid == state->dts_naggregations + 1);
 
 		if (naggs == 0) {
 			ASSERT(oaggs == NULL);
 			naggs = 1;
 		}
 
 		aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
 
 		if (oaggs != NULL) {
 			bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
 			kmem_free(oaggs, onaggs * sizeof (*aggs));
 		}
 
 		state->dts_aggregations = aggs;
 		state->dts_naggregations = naggs;
 	}
 
 	ASSERT(state->dts_aggregations[aggid - 1] == NULL);
 	state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
 
 	frec = &agg->dtag_first->dta_rec;
 	if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
 		frec->dtrd_alignment = sizeof (dtrace_aggid_t);
 
 	for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
 		ASSERT(!act->dta_intuple);
 		act->dta_intuple = 1;
 	}
 
 	return (&agg->dtag_action);
 }
 
 static void
 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
 {
 	dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_aggid_t aggid = agg->dtag_id;
 
 	ASSERT(DTRACEACT_ISAGG(act->dta_kind));
 #ifdef illumos
 	vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
 #else
 	free_unr(state->dts_aggid_arena, aggid);
 #endif
 
 	ASSERT(state->dts_aggregations[aggid - 1] == agg);
 	state->dts_aggregations[aggid - 1] = NULL;
 
 	kmem_free(agg, sizeof (dtrace_aggregation_t));
 }
 
 static int
 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 {
 	dtrace_action_t *action, *last;
 	dtrace_difo_t *dp = desc->dtad_difo;
 	uint32_t size = 0, align = sizeof (uint8_t), mask;
 	uint16_t format = 0;
 	dtrace_recdesc_t *rec;
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize;
 	uint64_t arg = desc->dtad_arg;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
 
 	if (DTRACEACT_ISAGG(desc->dtad_kind)) {
 		/*
 		 * If this is an aggregating action, there must be neither
 		 * a speculate nor a commit on the action chain.
 		 */
 		dtrace_action_t *act;
 
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (act->dta_kind == DTRACEACT_COMMIT)
 				return (EINVAL);
 
 			if (act->dta_kind == DTRACEACT_SPECULATE)
 				return (EINVAL);
 		}
 
 		action = dtrace_ecb_aggregation_create(ecb, desc);
 
 		if (action == NULL)
 			return (EINVAL);
 	} else {
 		if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
 		    (desc->dtad_kind == DTRACEACT_DIFEXPR &&
 		    dp != NULL && dp->dtdo_destructive)) {
 			state->dts_destructive = 1;
 		}
 
 		switch (desc->dtad_kind) {
 		case DTRACEACT_PRINTF:
 		case DTRACEACT_PRINTA:
 		case DTRACEACT_SYSTEM:
 		case DTRACEACT_FREOPEN:
 		case DTRACEACT_DIFEXPR:
 			/*
 			 * We know that our arg is a string -- turn it into a
 			 * format.
 			 */
 			if (arg == 0) {
 				ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
 				    desc->dtad_kind == DTRACEACT_DIFEXPR);
 				format = 0;
 			} else {
 				ASSERT(arg != 0);
 #ifdef illumos
 				ASSERT(arg > KERNELBASE);
 #endif
 				format = dtrace_format_add(state,
 				    (char *)(uintptr_t)arg);
 			}
 
 			/*FALLTHROUGH*/
 		case DTRACEACT_LIBACT:
 		case DTRACEACT_TRACEMEM:
 		case DTRACEACT_TRACEMEM_DYNSIZE:
 			if (dp == NULL)
 				return (EINVAL);
 
 			if ((size = dp->dtdo_rtype.dtdt_size) != 0)
 				break;
 
 			if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
 				if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 					return (EINVAL);
 
 				size = opt[DTRACEOPT_STRSIZE];
 			}
 
 			break;
 
 		case DTRACEACT_STACK:
 			if ((nframes = arg) == 0) {
 				nframes = opt[DTRACEOPT_STACKFRAMES];
 				ASSERT(nframes > 0);
 				arg = nframes;
 			}
 
 			size = nframes * sizeof (pc_t);
 			break;
 
 		case DTRACEACT_JSTACK:
 			if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
 				strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
 
 			if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
 				nframes = opt[DTRACEOPT_JSTACKFRAMES];
 
 			arg = DTRACE_USTACK_ARG(nframes, strsize);
 
 			/*FALLTHROUGH*/
 		case DTRACEACT_USTACK:
 			if (desc->dtad_kind != DTRACEACT_JSTACK &&
 			    (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
 				strsize = DTRACE_USTACK_STRSIZE(arg);
 				nframes = opt[DTRACEOPT_USTACKFRAMES];
 				ASSERT(nframes > 0);
 				arg = DTRACE_USTACK_ARG(nframes, strsize);
 			}
 
 			/*
 			 * Save a slot for the pid.
 			 */
 			size = (nframes + 1) * sizeof (uint64_t);
 			size += DTRACE_USTACK_STRSIZE(arg);
 			size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
 
 			break;
 
 		case DTRACEACT_SYM:
 		case DTRACEACT_MOD:
 			if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
 			    sizeof (uint64_t)) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_USYM:
 		case DTRACEACT_UMOD:
 		case DTRACEACT_UADDR:
 			if (dp == NULL ||
 			    (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 
 			/*
 			 * We have a slot for the pid, plus a slot for the
 			 * argument.  To keep things simple (aligned with
 			 * bitness-neutral sizing), we store each as a 64-bit
 			 * quantity.
 			 */
 			size = 2 * sizeof (uint64_t);
 			break;
 
 		case DTRACEACT_STOP:
 		case DTRACEACT_BREAKPOINT:
 		case DTRACEACT_PANIC:
 			break;
 
 		case DTRACEACT_CHILL:
 		case DTRACEACT_DISCARD:
 		case DTRACEACT_RAISE:
 			if (dp == NULL)
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_EXIT:
 			if (dp == NULL ||
 			    (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_SPECULATE:
 			if (ecb->dte_size > sizeof (dtrace_rechdr_t))
 				return (EINVAL);
 
 			if (dp == NULL)
 				return (EINVAL);
 
 			state->dts_speculates = 1;
 			break;
 
 		case DTRACEACT_PRINTM:
 		    	size = dp->dtdo_rtype.dtdt_size;
 			break;
 
 		case DTRACEACT_COMMIT: {
 			dtrace_action_t *act = ecb->dte_action;
 
 			for (; act != NULL; act = act->dta_next) {
 				if (act->dta_kind == DTRACEACT_COMMIT)
 					return (EINVAL);
 			}
 
 			if (dp == NULL)
 				return (EINVAL);
 			break;
 		}
 
 		default:
 			return (EINVAL);
 		}
 
 		if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
 			/*
 			 * If this is a data-storing action or a speculate,
 			 * we must be sure that there isn't a commit on the
 			 * action chain.
 			 */
 			dtrace_action_t *act = ecb->dte_action;
 
 			for (; act != NULL; act = act->dta_next) {
 				if (act->dta_kind == DTRACEACT_COMMIT)
 					return (EINVAL);
 			}
 		}
 
 		action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
 		action->dta_rec.dtrd_size = size;
 	}
 
 	action->dta_refcnt = 1;
 	rec = &action->dta_rec;
 	size = rec->dtrd_size;
 
 	for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
 		if (!(size & mask)) {
 			align = mask + 1;
 			break;
 		}
 	}
 
 	action->dta_kind = desc->dtad_kind;
 
 	if ((action->dta_difo = dp) != NULL)
 		dtrace_difo_hold(dp);
 
 	rec->dtrd_action = action->dta_kind;
 	rec->dtrd_arg = arg;
 	rec->dtrd_uarg = desc->dtad_uarg;
 	rec->dtrd_alignment = (uint16_t)align;
 	rec->dtrd_format = format;
 
 	if ((last = ecb->dte_action_last) != NULL) {
 		ASSERT(ecb->dte_action != NULL);
 		action->dta_prev = last;
 		last->dta_next = action;
 	} else {
 		ASSERT(ecb->dte_action == NULL);
 		ecb->dte_action = action;
 	}
 
 	ecb->dte_action_last = action;
 
 	return (0);
 }
 
 static void
 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
 {
 	dtrace_action_t *act = ecb->dte_action, *next;
 	dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
 	dtrace_difo_t *dp;
 	uint16_t format;
 
 	if (act != NULL && act->dta_refcnt > 1) {
 		ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
 		act->dta_refcnt--;
 	} else {
 		for (; act != NULL; act = next) {
 			next = act->dta_next;
 			ASSERT(next != NULL || act == ecb->dte_action_last);
 			ASSERT(act->dta_refcnt == 1);
 
 			if ((format = act->dta_rec.dtrd_format) != 0)
 				dtrace_format_remove(ecb->dte_state, format);
 
 			if ((dp = act->dta_difo) != NULL)
 				dtrace_difo_release(dp, vstate);
 
 			if (DTRACEACT_ISAGG(act->dta_kind)) {
 				dtrace_ecb_aggregation_destroy(ecb, act);
 			} else {
 				kmem_free(act, sizeof (dtrace_action_t));
 			}
 		}
 	}
 
 	ecb->dte_action = NULL;
 	ecb->dte_action_last = NULL;
 	ecb->dte_size = 0;
 }
 
 static void
 dtrace_ecb_disable(dtrace_ecb_t *ecb)
 {
 	/*
 	 * We disable the ECB by removing it from its probe.
 	 */
 	dtrace_ecb_t *pecb, *prev = NULL;
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (probe == NULL) {
 		/*
 		 * This is the NULL probe; there is nothing to disable.
 		 */
 		return;
 	}
 
 	for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
 		if (pecb == ecb)
 			break;
 		prev = pecb;
 	}
 
 	ASSERT(pecb != NULL);
 
 	if (prev == NULL) {
 		probe->dtpr_ecb = ecb->dte_next;
 	} else {
 		prev->dte_next = ecb->dte_next;
 	}
 
 	if (ecb == probe->dtpr_ecb_last) {
 		ASSERT(ecb->dte_next == NULL);
 		probe->dtpr_ecb_last = prev;
 	}
 
 	/*
 	 * The ECB has been disconnected from the probe; now sync to assure
 	 * that all CPUs have seen the change before returning.
 	 */
 	dtrace_sync();
 
 	if (probe->dtpr_ecb == NULL) {
 		/*
 		 * That was the last ECB on the probe; clear the predicate
 		 * cache ID for the probe, disable it and sync one more time
 		 * to assure that we'll never hit it again.
 		 */
 		dtrace_provider_t *prov = probe->dtpr_provider;
 
 		ASSERT(ecb->dte_next == NULL);
 		ASSERT(probe->dtpr_ecb_last == NULL);
 		probe->dtpr_predcache = DTRACE_CACHEIDNONE;
 		prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 		dtrace_sync();
 	} else {
 		/*
 		 * There is at least one ECB remaining on the probe.  If there
 		 * is _exactly_ one, set the probe's predicate cache ID to be
 		 * the predicate cache ID of the remaining ECB.
 		 */
 		ASSERT(probe->dtpr_ecb_last != NULL);
 		ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
 
 		if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
 			dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
 
 			ASSERT(probe->dtpr_ecb->dte_next == NULL);
 
 			if (p != NULL)
 				probe->dtpr_predcache = p->dtp_cacheid;
 		}
 
 		ecb->dte_next = NULL;
 	}
 }
 
 static void
 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
 {
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 	dtrace_predicate_t *pred;
 	dtrace_epid_t epid = ecb->dte_epid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_next == NULL);
 	ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
 
 	if ((pred = ecb->dte_predicate) != NULL)
 		dtrace_predicate_release(pred, vstate);
 
 	dtrace_ecb_action_remove(ecb);
 
 	ASSERT(state->dts_ecbs[epid - 1] == ecb);
 	state->dts_ecbs[epid - 1] = NULL;
 
 	kmem_free(ecb, sizeof (dtrace_ecb_t));
 }
 
 static dtrace_ecb_t *
 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
     dtrace_enabling_t *enab)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_predicate_t *pred;
 	dtrace_actdesc_t *act;
 	dtrace_provider_t *prov;
 	dtrace_ecbdesc_t *desc = enab->dten_current;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(state != NULL);
 
 	ecb = dtrace_ecb_add(state, probe);
 	ecb->dte_uarg = desc->dted_uarg;
 
 	if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
 		dtrace_predicate_hold(pred);
 		ecb->dte_predicate = pred;
 	}
 
 	if (probe != NULL) {
 		/*
 		 * If the provider shows more leg than the consumer is old
 		 * enough to see, we need to enable the appropriate implicit
 		 * predicate bits to prevent the ecb from activating at
 		 * revealing times.
 		 *
 		 * Providers specifying DTRACE_PRIV_USER at register time
 		 * are stating that they need the /proc-style privilege
 		 * model to be enforced, and this is what DTRACE_COND_OWNER
 		 * and DTRACE_COND_ZONEOWNER will then do at probe time.
 		 */
 		prov = probe->dtpr_provider;
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
 			ecb->dte_cond |= DTRACE_COND_OWNER;
 
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
 			ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
 
 		/*
 		 * If the provider shows us kernel innards and the user
 		 * is lacking sufficient privilege, enable the
 		 * DTRACE_COND_USERMODE implicit predicate.
 		 */
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
 			ecb->dte_cond |= DTRACE_COND_USERMODE;
 	}
 
 	if (dtrace_ecb_create_cache != NULL) {
 		/*
 		 * If we have a cached ecb, we'll use its action list instead
 		 * of creating our own (saving both time and space).
 		 */
 		dtrace_ecb_t *cached = dtrace_ecb_create_cache;
 		dtrace_action_t *act = cached->dte_action;
 
 		if (act != NULL) {
 			ASSERT(act->dta_refcnt > 0);
 			act->dta_refcnt++;
 			ecb->dte_action = act;
 			ecb->dte_action_last = cached->dte_action_last;
 			ecb->dte_needed = cached->dte_needed;
 			ecb->dte_size = cached->dte_size;
 			ecb->dte_alignment = cached->dte_alignment;
 		}
 
 		return (ecb);
 	}
 
 	for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
 		if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
 			dtrace_ecb_destroy(ecb);
 			return (NULL);
 		}
 	}
 
 	if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
 		dtrace_ecb_destroy(ecb);
 		return (NULL);
 	}
 
 	return (dtrace_ecb_create_cache = ecb);
 }
 
 static int
 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_enabling_t *enab = arg;
 	dtrace_state_t *state = enab->dten_vstate->dtvs_state;
 
 	ASSERT(state != NULL);
 
 	if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
 		/*
 		 * This probe was created in a generation for which this
 		 * enabling has previously created ECBs; we don't want to
 		 * enable it again, so just kick out.
 		 */
 		return (DTRACE_MATCH_NEXT);
 	}
 
 	if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
 		return (DTRACE_MATCH_DONE);
 
 	dtrace_ecb_enable(ecb);
 	return (DTRACE_MATCH_NEXT);
 }
 
 static dtrace_ecb_t *
 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
 {
 	dtrace_ecb_t *ecb;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > state->dts_necbs)
 		return (NULL);
 
 	ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
 	ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
 
 	return (state->dts_ecbs[id - 1]);
 }
 
 static dtrace_aggregation_t *
 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
 {
 	dtrace_aggregation_t *agg;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > state->dts_naggregations)
 		return (NULL);
 
 	ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
 	ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
 	    agg->dtag_id == id);
 
 	return (state->dts_aggregations[id - 1]);
 }
 
 /*
  * DTrace Buffer Functions
  *
  * The following functions manipulate DTrace buffers.  Most of these functions
  * are called in the context of establishing or processing consumer state;
  * exceptions are explicitly noted.
  */
 
 /*
  * Note:  called from cross call context.  This function switches the two
  * buffers on a given CPU.  The atomicity of this operation is assured by
  * disabling interrupts while the actual switch takes place; the disabling of
  * interrupts serializes the execution with any execution of dtrace_probe() on
  * the same CPU.
  */
 static void
 dtrace_buffer_switch(dtrace_buffer_t *buf)
 {
 	caddr_t tomax = buf->dtb_tomax;
 	caddr_t xamot = buf->dtb_xamot;
 	dtrace_icookie_t cookie;
 	hrtime_t now;
 
 	ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 	ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
 
 	cookie = dtrace_interrupt_disable();
 	now = dtrace_gethrtime();
 	buf->dtb_tomax = xamot;
 	buf->dtb_xamot = tomax;
 	buf->dtb_xamot_drops = buf->dtb_drops;
 	buf->dtb_xamot_offset = buf->dtb_offset;
 	buf->dtb_xamot_errors = buf->dtb_errors;
 	buf->dtb_xamot_flags = buf->dtb_flags;
 	buf->dtb_offset = 0;
 	buf->dtb_drops = 0;
 	buf->dtb_errors = 0;
 	buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
 	buf->dtb_interval = now - buf->dtb_switched;
 	buf->dtb_switched = now;
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * Note:  called from cross call context.  This function activates a buffer
  * on a CPU.  As with dtrace_buffer_switch(), the atomicity of the operation
  * is guaranteed by the disabling of interrupts.
  */
 static void
 dtrace_buffer_activate(dtrace_state_t *state)
 {
 	dtrace_buffer_t *buf;
 	dtrace_icookie_t cookie = dtrace_interrupt_disable();
 
 	buf = &state->dts_buffer[curcpu];
 
 	if (buf->dtb_tomax != NULL) {
 		/*
 		 * We might like to assert that the buffer is marked inactive,
 		 * but this isn't necessarily true:  the buffer for the CPU
 		 * that processes the BEGIN probe has its buffer activated
 		 * manually.  In this case, we take the (harmless) action
 		 * re-clearing the bit INACTIVE bit.
 		 */
 		buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 
 #ifdef __FreeBSD__
 /*
  * Activate the specified per-CPU buffer.  This is used instead of
  * dtrace_buffer_activate() when APs have not yet started, i.e. when
  * activating anonymous state.
  */
 static void
 dtrace_buffer_activate_cpu(dtrace_state_t *state, int cpu)
 {
 
 	if (state->dts_buffer[cpu].dtb_tomax != NULL)
 		state->dts_buffer[cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
 }
 #endif
 
 static int
 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
     processorid_t cpu, int *factor)
 {
 #ifdef illumos
 	cpu_t *cp;
 #endif
 	dtrace_buffer_t *buf;
 	int allocated = 0, desired = 0;
 
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	*factor = 1;
 
 	if (size > dtrace_nonroot_maxsize &&
 	    !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
 		return (EFBIG);
 
 	cp = cpu_list;
 
 	do {
 		if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
 			continue;
 
 		buf = &bufs[cp->cpu_id];
 
 		/*
 		 * If there is already a buffer allocated for this CPU, it
 		 * is only possible that this is a DR event.  In this case,
 		 */
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			continue;
 		}
 
 		ASSERT(buf->dtb_xamot == NULL);
 
 		if ((buf->dtb_tomax = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 
 		buf->dtb_size = size;
 		buf->dtb_flags = flags;
 		buf->dtb_offset = 0;
 		buf->dtb_drops = 0;
 
 		if (flags & DTRACEBUF_NOSWITCH)
 			continue;
 
 		if ((buf->dtb_xamot = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 	} while ((cp = cp->cpu_next) != cpu_list);
 
 	return (0);
 
 err:
 	cp = cpu_list;
 
 	do {
 		if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
 			continue;
 
 		buf = &bufs[cp->cpu_id];
 		desired += 2;
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(buf->dtb_tomax != NULL);
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_xamot, size);
 			allocated++;
 		}
 
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_tomax, size);
 			allocated++;
 		}
 
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 		buf->dtb_size = 0;
 	} while ((cp = cp->cpu_next) != cpu_list);
 #else
 	int i;
 
 	*factor = 1;
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
-    defined(__mips__) || defined(__powerpc__) || defined(__riscv__)
+    defined(__mips__) || defined(__powerpc__) || defined(__riscv)
 	/*
 	 * FreeBSD isn't good at limiting the amount of memory we
 	 * ask to malloc, so let's place a limit here before trying
 	 * to do something that might well end in tears at bedtime.
 	 */
 	if (size > physmem * PAGE_SIZE / (128 * (mp_maxid + 1)))
 		return (ENOMEM);
 #endif
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	CPU_FOREACH(i) {
 		if (cpu != DTRACE_CPUALL && cpu != i)
 			continue;
 
 		buf = &bufs[i];
 
 		/*
 		 * If there is already a buffer allocated for this CPU, it
 		 * is only possible that this is a DR event.  In this case,
 		 * the buffer size must match our specified size.
 		 */
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			continue;
 		}
 
 		ASSERT(buf->dtb_xamot == NULL);
 
 		if ((buf->dtb_tomax = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 
 		buf->dtb_size = size;
 		buf->dtb_flags = flags;
 		buf->dtb_offset = 0;
 		buf->dtb_drops = 0;
 
 		if (flags & DTRACEBUF_NOSWITCH)
 			continue;
 
 		if ((buf->dtb_xamot = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 	}
 
 	return (0);
 
 err:
 	/*
 	 * Error allocating memory, so free the buffers that were
 	 * allocated before the failed allocation.
 	 */
 	CPU_FOREACH(i) {
 		if (cpu != DTRACE_CPUALL && cpu != i)
 			continue;
 
 		buf = &bufs[i];
 		desired += 2;
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(buf->dtb_tomax != NULL);
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_xamot, size);
 			allocated++;
 		}
 
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_tomax, size);
 			allocated++;
 		}
 
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 		buf->dtb_size = 0;
 
 	}
 #endif
 	*factor = desired / (allocated > 0 ? allocated : 1);
 
 	return (ENOMEM);
 }
 
 /*
  * Note:  called from probe context.  This function just increments the drop
  * count on a buffer.  It has been made a function to allow for the
  * possibility of understanding the source of mysterious drop counts.  (A
  * problem for which one may be particularly disappointed that DTrace cannot
  * be used to understand DTrace.)
  */
 static void
 dtrace_buffer_drop(dtrace_buffer_t *buf)
 {
 	buf->dtb_drops++;
 }
 
 /*
  * Note:  called from probe context.  This function is called to reserve space
  * in a buffer.  If mstate is non-NULL, sets the scratch base and size in the
  * mstate.  Returns the new offset in the buffer, or a negative value if an
  * error has occurred.
  */
 static intptr_t
 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
     dtrace_state_t *state, dtrace_mstate_t *mstate)
 {
 	intptr_t offs = buf->dtb_offset, soffs;
 	intptr_t woffs;
 	caddr_t tomax;
 	size_t total;
 
 	if (buf->dtb_flags & DTRACEBUF_INACTIVE)
 		return (-1);
 
 	if ((tomax = buf->dtb_tomax) == NULL) {
 		dtrace_buffer_drop(buf);
 		return (-1);
 	}
 
 	if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
 		while (offs & (align - 1)) {
 			/*
 			 * Assert that our alignment is off by a number which
 			 * is itself sizeof (uint32_t) aligned.
 			 */
 			ASSERT(!((align - (offs & (align - 1))) &
 			    (sizeof (uint32_t) - 1)));
 			DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
 			offs += sizeof (uint32_t);
 		}
 
 		if ((soffs = offs + needed) > buf->dtb_size) {
 			dtrace_buffer_drop(buf);
 			return (-1);
 		}
 
 		if (mstate == NULL)
 			return (offs);
 
 		mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
 		mstate->dtms_scratch_size = buf->dtb_size - soffs;
 		mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
 
 		return (offs);
 	}
 
 	if (buf->dtb_flags & DTRACEBUF_FILL) {
 		if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
 		    (buf->dtb_flags & DTRACEBUF_FULL))
 			return (-1);
 		goto out;
 	}
 
 	total = needed + (offs & (align - 1));
 
 	/*
 	 * For a ring buffer, life is quite a bit more complicated.  Before
 	 * we can store any padding, we need to adjust our wrapping offset.
 	 * (If we've never before wrapped or we're not about to, no adjustment
 	 * is required.)
 	 */
 	if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
 	    offs + total > buf->dtb_size) {
 		woffs = buf->dtb_xamot_offset;
 
 		if (offs + total > buf->dtb_size) {
 			/*
 			 * We can't fit in the end of the buffer.  First, a
 			 * sanity check that we can fit in the buffer at all.
 			 */
 			if (total > buf->dtb_size) {
 				dtrace_buffer_drop(buf);
 				return (-1);
 			}
 
 			/*
 			 * We're going to be storing at the top of the buffer,
 			 * so now we need to deal with the wrapped offset.  We
 			 * only reset our wrapped offset to 0 if it is
 			 * currently greater than the current offset.  If it
 			 * is less than the current offset, it is because a
 			 * previous allocation induced a wrap -- but the
 			 * allocation didn't subsequently take the space due
 			 * to an error or false predicate evaluation.  In this
 			 * case, we'll just leave the wrapped offset alone: if
 			 * the wrapped offset hasn't been advanced far enough
 			 * for this allocation, it will be adjusted in the
 			 * lower loop.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
 				if (woffs >= offs)
 					woffs = 0;
 			} else {
 				woffs = 0;
 			}
 
 			/*
 			 * Now we know that we're going to be storing to the
 			 * top of the buffer and that there is room for us
 			 * there.  We need to clear the buffer from the current
 			 * offset to the end (there may be old gunk there).
 			 */
 			while (offs < buf->dtb_size)
 				tomax[offs++] = 0;
 
 			/*
 			 * We need to set our offset to zero.  And because we
 			 * are wrapping, we need to set the bit indicating as
 			 * much.  We can also adjust our needed space back
 			 * down to the space required by the ECB -- we know
 			 * that the top of the buffer is aligned.
 			 */
 			offs = 0;
 			total = needed;
 			buf->dtb_flags |= DTRACEBUF_WRAPPED;
 		} else {
 			/*
 			 * There is room for us in the buffer, so we simply
 			 * need to check the wrapped offset.
 			 */
 			if (woffs < offs) {
 				/*
 				 * The wrapped offset is less than the offset.
 				 * This can happen if we allocated buffer space
 				 * that induced a wrap, but then we didn't
 				 * subsequently take the space due to an error
 				 * or false predicate evaluation.  This is
 				 * okay; we know that _this_ allocation isn't
 				 * going to induce a wrap.  We still can't
 				 * reset the wrapped offset to be zero,
 				 * however: the space may have been trashed in
 				 * the previous failed probe attempt.  But at
 				 * least the wrapped offset doesn't need to
 				 * be adjusted at all...
 				 */
 				goto out;
 			}
 		}
 
 		while (offs + total > woffs) {
 			dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
 			size_t size;
 
 			if (epid == DTRACE_EPIDNONE) {
 				size = sizeof (uint32_t);
 			} else {
 				ASSERT3U(epid, <=, state->dts_necbs);
 				ASSERT(state->dts_ecbs[epid - 1] != NULL);
 
 				size = state->dts_ecbs[epid - 1]->dte_size;
 			}
 
 			ASSERT(woffs + size <= buf->dtb_size);
 			ASSERT(size != 0);
 
 			if (woffs + size == buf->dtb_size) {
 				/*
 				 * We've reached the end of the buffer; we want
 				 * to set the wrapped offset to 0 and break
 				 * out.  However, if the offs is 0, then we're
 				 * in a strange edge-condition:  the amount of
 				 * space that we want to reserve plus the size
 				 * of the record that we're overwriting is
 				 * greater than the size of the buffer.  This
 				 * is problematic because if we reserve the
 				 * space but subsequently don't consume it (due
 				 * to a failed predicate or error) the wrapped
 				 * offset will be 0 -- yet the EPID at offset 0
 				 * will not be committed.  This situation is
 				 * relatively easy to deal with:  if we're in
 				 * this case, the buffer is indistinguishable
 				 * from one that hasn't wrapped; we need only
 				 * finish the job by clearing the wrapped bit,
 				 * explicitly setting the offset to be 0, and
 				 * zero'ing out the old data in the buffer.
 				 */
 				if (offs == 0) {
 					buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
 					buf->dtb_offset = 0;
 					woffs = total;
 
 					while (woffs < buf->dtb_size)
 						tomax[woffs++] = 0;
 				}
 
 				woffs = 0;
 				break;
 			}
 
 			woffs += size;
 		}
 
 		/*
 		 * We have a wrapped offset.  It may be that the wrapped offset
 		 * has become zero -- that's okay.
 		 */
 		buf->dtb_xamot_offset = woffs;
 	}
 
 out:
 	/*
 	 * Now we can plow the buffer with any necessary padding.
 	 */
 	while (offs & (align - 1)) {
 		/*
 		 * Assert that our alignment is off by a number which
 		 * is itself sizeof (uint32_t) aligned.
 		 */
 		ASSERT(!((align - (offs & (align - 1))) &
 		    (sizeof (uint32_t) - 1)));
 		DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
 		offs += sizeof (uint32_t);
 	}
 
 	if (buf->dtb_flags & DTRACEBUF_FILL) {
 		if (offs + needed > buf->dtb_size - state->dts_reserve) {
 			buf->dtb_flags |= DTRACEBUF_FULL;
 			return (-1);
 		}
 	}
 
 	if (mstate == NULL)
 		return (offs);
 
 	/*
 	 * For ring buffers and fill buffers, the scratch space is always
 	 * the inactive buffer.
 	 */
 	mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
 	mstate->dtms_scratch_size = buf->dtb_size;
 	mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
 
 	return (offs);
 }
 
 static void
 dtrace_buffer_polish(dtrace_buffer_t *buf)
 {
 	ASSERT(buf->dtb_flags & DTRACEBUF_RING);
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
 		return;
 
 	/*
 	 * We need to polish the ring buffer.  There are three cases:
 	 *
 	 * - The first (and presumably most common) is that there is no gap
 	 *   between the buffer offset and the wrapped offset.  In this case,
 	 *   there is nothing in the buffer that isn't valid data; we can
 	 *   mark the buffer as polished and return.
 	 *
 	 * - The second (less common than the first but still more common
 	 *   than the third) is that there is a gap between the buffer offset
 	 *   and the wrapped offset, and the wrapped offset is larger than the
 	 *   buffer offset.  This can happen because of an alignment issue, or
 	 *   can happen because of a call to dtrace_buffer_reserve() that
 	 *   didn't subsequently consume the buffer space.  In this case,
 	 *   we need to zero the data from the buffer offset to the wrapped
 	 *   offset.
 	 *
 	 * - The third (and least common) is that there is a gap between the
 	 *   buffer offset and the wrapped offset, but the wrapped offset is
 	 *   _less_ than the buffer offset.  This can only happen because a
 	 *   call to dtrace_buffer_reserve() induced a wrap, but the space
 	 *   was not subsequently consumed.  In this case, we need to zero the
 	 *   space from the offset to the end of the buffer _and_ from the
 	 *   top of the buffer to the wrapped offset.
 	 */
 	if (buf->dtb_offset < buf->dtb_xamot_offset) {
 		bzero(buf->dtb_tomax + buf->dtb_offset,
 		    buf->dtb_xamot_offset - buf->dtb_offset);
 	}
 
 	if (buf->dtb_offset > buf->dtb_xamot_offset) {
 		bzero(buf->dtb_tomax + buf->dtb_offset,
 		    buf->dtb_size - buf->dtb_offset);
 		bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
 	}
 }
 
 /*
  * This routine determines if data generated at the specified time has likely
  * been entirely consumed at user-level.  This routine is called to determine
  * if an ECB on a defunct probe (but for an active enabling) can be safely
  * disabled and destroyed.
  */
 static int
 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
 {
 	int i;
 
 	for (i = 0; i < NCPU; i++) {
 		dtrace_buffer_t *buf = &bufs[i];
 
 		if (buf->dtb_size == 0)
 			continue;
 
 		if (buf->dtb_flags & DTRACEBUF_RING)
 			return (0);
 
 		if (!buf->dtb_switched && buf->dtb_offset != 0)
 			return (0);
 
 		if (buf->dtb_switched - buf->dtb_interval < when)
 			return (0);
 	}
 
 	return (1);
 }
 
 static void
 dtrace_buffer_free(dtrace_buffer_t *bufs)
 {
 	int i;
 
 	for (i = 0; i < NCPU; i++) {
 		dtrace_buffer_t *buf = &bufs[i];
 
 		if (buf->dtb_tomax == NULL) {
 			ASSERT(buf->dtb_xamot == NULL);
 			ASSERT(buf->dtb_size == 0);
 			continue;
 		}
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 			kmem_free(buf->dtb_xamot, buf->dtb_size);
 		}
 
 		kmem_free(buf->dtb_tomax, buf->dtb_size);
 		buf->dtb_size = 0;
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 	}
 }
 
 /*
  * DTrace Enabling Functions
  */
 static dtrace_enabling_t *
 dtrace_enabling_create(dtrace_vstate_t *vstate)
 {
 	dtrace_enabling_t *enab;
 
 	enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
 	enab->dten_vstate = vstate;
 
 	return (enab);
 }
 
 static void
 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
 {
 	dtrace_ecbdesc_t **ndesc;
 	size_t osize, nsize;
 
 	/*
 	 * We can't add to enablings after we've enabled them, or after we've
 	 * retained them.
 	 */
 	ASSERT(enab->dten_probegen == 0);
 	ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
 
 	if (enab->dten_ndesc < enab->dten_maxdesc) {
 		enab->dten_desc[enab->dten_ndesc++] = ecb;
 		return;
 	}
 
 	osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
 
 	if (enab->dten_maxdesc == 0) {
 		enab->dten_maxdesc = 1;
 	} else {
 		enab->dten_maxdesc <<= 1;
 	}
 
 	ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
 
 	nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
 	ndesc = kmem_zalloc(nsize, KM_SLEEP);
 	bcopy(enab->dten_desc, ndesc, osize);
 	if (enab->dten_desc != NULL)
 		kmem_free(enab->dten_desc, osize);
 
 	enab->dten_desc = ndesc;
 	enab->dten_desc[enab->dten_ndesc++] = ecb;
 }
 
 static void
 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
     dtrace_probedesc_t *pd)
 {
 	dtrace_ecbdesc_t *new;
 	dtrace_predicate_t *pred;
 	dtrace_actdesc_t *act;
 
 	/*
 	 * We're going to create a new ECB description that matches the
 	 * specified ECB in every way, but has the specified probe description.
 	 */
 	new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
 
 	if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
 		dtrace_predicate_hold(pred);
 
 	for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
 		dtrace_actdesc_hold(act);
 
 	new->dted_action = ecb->dted_action;
 	new->dted_pred = ecb->dted_pred;
 	new->dted_probe = *pd;
 	new->dted_uarg = ecb->dted_uarg;
 
 	dtrace_enabling_add(enab, new);
 }
 
 static void
 dtrace_enabling_dump(dtrace_enabling_t *enab)
 {
 	int i;
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
 
 #ifdef __FreeBSD__
 		printf("dtrace: enabling probe %d (%s:%s:%s:%s)\n", i,
 		    desc->dtpd_provider, desc->dtpd_mod,
 		    desc->dtpd_func, desc->dtpd_name);
 #else
 		cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
 		    desc->dtpd_provider, desc->dtpd_mod,
 		    desc->dtpd_func, desc->dtpd_name);
 #endif
 	}
 }
 
 static void
 dtrace_enabling_destroy(dtrace_enabling_t *enab)
 {
 	int i;
 	dtrace_ecbdesc_t *ep;
 	dtrace_vstate_t *vstate = enab->dten_vstate;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_actdesc_t *act, *next;
 		dtrace_predicate_t *pred;
 
 		ep = enab->dten_desc[i];
 
 		if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
 			dtrace_predicate_release(pred, vstate);
 
 		for (act = ep->dted_action; act != NULL; act = next) {
 			next = act->dtad_next;
 			dtrace_actdesc_release(act, vstate);
 		}
 
 		kmem_free(ep, sizeof (dtrace_ecbdesc_t));
 	}
 
 	if (enab->dten_desc != NULL)
 		kmem_free(enab->dten_desc,
 		    enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
 
 	/*
 	 * If this was a retained enabling, decrement the dts_nretained count
 	 * and take it off of the dtrace_retained list.
 	 */
 	if (enab->dten_prev != NULL || enab->dten_next != NULL ||
 	    dtrace_retained == enab) {
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 		ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
 		enab->dten_vstate->dtvs_state->dts_nretained--;
 		dtrace_retained_gen++;
 	}
 
 	if (enab->dten_prev == NULL) {
 		if (dtrace_retained == enab) {
 			dtrace_retained = enab->dten_next;
 
 			if (dtrace_retained != NULL)
 				dtrace_retained->dten_prev = NULL;
 		}
 	} else {
 		ASSERT(enab != dtrace_retained);
 		ASSERT(dtrace_retained != NULL);
 		enab->dten_prev->dten_next = enab->dten_next;
 	}
 
 	if (enab->dten_next != NULL) {
 		ASSERT(dtrace_retained != NULL);
 		enab->dten_next->dten_prev = enab->dten_prev;
 	}
 
 	kmem_free(enab, sizeof (dtrace_enabling_t));
 }
 
 static int
 dtrace_enabling_retain(dtrace_enabling_t *enab)
 {
 	dtrace_state_t *state;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
 	ASSERT(enab->dten_vstate != NULL);
 
 	state = enab->dten_vstate->dtvs_state;
 	ASSERT(state != NULL);
 
 	/*
 	 * We only allow each state to retain dtrace_retain_max enablings.
 	 */
 	if (state->dts_nretained >= dtrace_retain_max)
 		return (ENOSPC);
 
 	state->dts_nretained++;
 	dtrace_retained_gen++;
 
 	if (dtrace_retained == NULL) {
 		dtrace_retained = enab;
 		return (0);
 	}
 
 	enab->dten_next = dtrace_retained;
 	dtrace_retained->dten_prev = enab;
 	dtrace_retained = enab;
 
 	return (0);
 }
 
 static int
 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
     dtrace_probedesc_t *create)
 {
 	dtrace_enabling_t *new, *enab;
 	int found = 0, err = ENOENT;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
 	ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
 	ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
 	ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
 
 	new = dtrace_enabling_create(&state->dts_vstate);
 
 	/*
 	 * Iterate over all retained enablings, looking for enablings that
 	 * match the specified state.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 		int i;
 
 		/*
 		 * dtvs_state can only be NULL for helper enablings -- and
 		 * helper enablings can't be retained.
 		 */
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state != state)
 			continue;
 
 		/*
 		 * Now iterate over each probe description; we're looking for
 		 * an exact match to the specified probe description.
 		 */
 		for (i = 0; i < enab->dten_ndesc; i++) {
 			dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 			dtrace_probedesc_t *pd = &ep->dted_probe;
 
 			if (strcmp(pd->dtpd_provider, match->dtpd_provider))
 				continue;
 
 			if (strcmp(pd->dtpd_mod, match->dtpd_mod))
 				continue;
 
 			if (strcmp(pd->dtpd_func, match->dtpd_func))
 				continue;
 
 			if (strcmp(pd->dtpd_name, match->dtpd_name))
 				continue;
 
 			/*
 			 * We have a winning probe!  Add it to our growing
 			 * enabling.
 			 */
 			found = 1;
 			dtrace_enabling_addlike(new, ep, create);
 		}
 	}
 
 	if (!found || (err = dtrace_enabling_retain(new)) != 0) {
 		dtrace_enabling_destroy(new);
 		return (err);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_enabling_retract(dtrace_state_t *state)
 {
 	dtrace_enabling_t *enab, *next;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * Iterate over all retained enablings, destroy the enablings retained
 	 * for the specified state.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = next) {
 		next = enab->dten_next;
 
 		/*
 		 * dtvs_state can only be NULL for helper enablings -- and
 		 * helper enablings can't be retained.
 		 */
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state == state) {
 			ASSERT(state->dts_nretained > 0);
 			dtrace_enabling_destroy(enab);
 		}
 	}
 
 	ASSERT(state->dts_nretained == 0);
 }
 
 static int
 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
 {
 	int i = 0;
 	int matched = 0;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 
 		enab->dten_current = ep;
 		enab->dten_error = 0;
 
 		matched += dtrace_probe_enable(&ep->dted_probe, enab);
 
 		if (enab->dten_error != 0) {
 			/*
 			 * If we get an error half-way through enabling the
 			 * probes, we kick out -- perhaps with some number of
 			 * them enabled.  Leaving enabled probes enabled may
 			 * be slightly confusing for user-level, but we expect
 			 * that no one will attempt to actually drive on in
 			 * the face of such errors.  If this is an anonymous
 			 * enabling (indicated with a NULL nmatched pointer),
 			 * we cmn_err() a message.  We aren't expecting to
 			 * get such an error -- such as it can exist at all,
 			 * it would be a result of corrupted DOF in the driver
 			 * properties.
 			 */
 			if (nmatched == NULL) {
 				cmn_err(CE_WARN, "dtrace_enabling_match() "
 				    "error on %p: %d", (void *)ep,
 				    enab->dten_error);
 			}
 
 			return (enab->dten_error);
 		}
 	}
 
 	enab->dten_probegen = dtrace_probegen;
 	if (nmatched != NULL)
 		*nmatched = matched;
 
 	return (0);
 }
 
 static void
 dtrace_enabling_matchall(void)
 {
 	dtrace_enabling_t *enab;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * Iterate over all retained enablings to see if any probes match
 	 * against them.  We only perform this operation on enablings for which
 	 * we have sufficient permissions by virtue of being in the global zone
 	 * or in the same zone as the DTrace client.  Because we can be called
 	 * after dtrace_detach() has been called, we cannot assert that there
 	 * are retained enablings.  We can safely load from dtrace_retained,
 	 * however:  the taskq_destroy() at the end of dtrace_detach() will
 	 * block pending our completion.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 #ifdef illumos
 		cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
 
 		if (INGLOBALZONE(curproc) ||
 		    cr != NULL && getzoneid() == crgetzoneid(cr))
 #endif
 			(void) dtrace_enabling_match(enab, NULL);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 }
 
 /*
  * If an enabling is to be enabled without having matched probes (that is, if
  * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
  * enabling must be _primed_ by creating an ECB for every ECB description.
  * This must be done to assure that we know the number of speculations, the
  * number of aggregations, the minimum buffer size needed, etc. before we
  * transition out of DTRACE_ACTIVITY_INACTIVE.  To do this without actually
  * enabling any probes, we create ECBs for every ECB decription, but with a
  * NULL probe -- which is exactly what this function does.
  */
 static void
 dtrace_enabling_prime(dtrace_state_t *state)
 {
 	dtrace_enabling_t *enab;
 	int i;
 
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state != state)
 			continue;
 
 		/*
 		 * We don't want to prime an enabling more than once, lest
 		 * we allow a malicious user to induce resource exhaustion.
 		 * (The ECBs that result from priming an enabling aren't
 		 * leaked -- but they also aren't deallocated until the
 		 * consumer state is destroyed.)
 		 */
 		if (enab->dten_primed)
 			continue;
 
 		for (i = 0; i < enab->dten_ndesc; i++) {
 			enab->dten_current = enab->dten_desc[i];
 			(void) dtrace_probe_enable(NULL, enab);
 		}
 
 		enab->dten_primed = 1;
 	}
 }
 
 /*
  * Called to indicate that probes should be provided due to retained
  * enablings.  This is implemented in terms of dtrace_probe_provide(), but it
  * must take an initial lap through the enabling calling the dtps_provide()
  * entry point explicitly to allow for autocreated probes.
  */
 static void
 dtrace_enabling_provide(dtrace_provider_t *prv)
 {
 	int i, all = 0;
 	dtrace_probedesc_t desc;
 	dtrace_genid_t gen;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 
 	if (prv == NULL) {
 		all = 1;
 		prv = dtrace_provider;
 	}
 
 	do {
 		dtrace_enabling_t *enab;
 		void *parg = prv->dtpv_arg;
 
 retry:
 		gen = dtrace_retained_gen;
 		for (enab = dtrace_retained; enab != NULL;
 		    enab = enab->dten_next) {
 			for (i = 0; i < enab->dten_ndesc; i++) {
 				desc = enab->dten_desc[i]->dted_probe;
 				mutex_exit(&dtrace_lock);
 				prv->dtpv_pops.dtps_provide(parg, &desc);
 				mutex_enter(&dtrace_lock);
 				/*
 				 * Process the retained enablings again if
 				 * they have changed while we weren't holding
 				 * dtrace_lock.
 				 */
 				if (gen != dtrace_retained_gen)
 					goto retry;
 			}
 		}
 	} while (all && (prv = prv->dtpv_next) != NULL);
 
 	mutex_exit(&dtrace_lock);
 	dtrace_probe_provide(NULL, all ? NULL : prv);
 	mutex_enter(&dtrace_lock);
 }
 
 /*
  * Called to reap ECBs that are attached to probes from defunct providers.
  */
 static void
 dtrace_enabling_reap(void)
 {
 	dtrace_provider_t *prov;
 	dtrace_probe_t *probe;
 	dtrace_ecb_t *ecb;
 	hrtime_t when;
 	int i;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_ecb == NULL)
 			continue;
 
 		prov = probe->dtpr_provider;
 
 		if ((when = prov->dtpv_defunct) == 0)
 			continue;
 
 		/*
 		 * We have ECBs on a defunct provider:  we want to reap these
 		 * ECBs to allow the provider to unregister.  The destruction
 		 * of these ECBs must be done carefully:  if we destroy the ECB
 		 * and the consumer later wishes to consume an EPID that
 		 * corresponds to the destroyed ECB (and if the EPID metadata
 		 * has not been previously consumed), the consumer will abort
 		 * processing on the unknown EPID.  To reduce (but not, sadly,
 		 * eliminate) the possibility of this, we will only destroy an
 		 * ECB for a defunct provider if, for the state that
 		 * corresponds to the ECB:
 		 *
 		 *  (a)	There is no speculative tracing (which can effectively
 		 *	cache an EPID for an arbitrary amount of time).
 		 *
 		 *  (b)	The principal buffers have been switched twice since the
 		 *	provider became defunct.
 		 *
 		 *  (c)	The aggregation buffers are of zero size or have been
 		 *	switched twice since the provider became defunct.
 		 *
 		 * We use dts_speculates to determine (a) and call a function
 		 * (dtrace_buffer_consumed()) to determine (b) and (c).  Note
 		 * that as soon as we've been unable to destroy one of the ECBs
 		 * associated with the probe, we quit trying -- reaping is only
 		 * fruitful in as much as we can destroy all ECBs associated
 		 * with the defunct provider's probes.
 		 */
 		while ((ecb = probe->dtpr_ecb) != NULL) {
 			dtrace_state_t *state = ecb->dte_state;
 			dtrace_buffer_t *buf = state->dts_buffer;
 			dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
 
 			if (state->dts_speculates)
 				break;
 
 			if (!dtrace_buffer_consumed(buf, when))
 				break;
 
 			if (!dtrace_buffer_consumed(aggbuf, when))
 				break;
 
 			dtrace_ecb_disable(ecb);
 			ASSERT(probe->dtpr_ecb != ecb);
 			dtrace_ecb_destroy(ecb);
 		}
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 }
 
 /*
  * DTrace DOF Functions
  */
 /*ARGSUSED*/
 static void
 dtrace_dof_error(dof_hdr_t *dof, const char *str)
 {
 	if (dtrace_err_verbose)
 		cmn_err(CE_WARN, "failed to process DOF: %s", str);
 
 #ifdef DTRACE_ERRDEBUG
 	dtrace_errdebug(str);
 #endif
 }
 
 /*
  * Create DOF out of a currently enabled state.  Right now, we only create
  * DOF containing the run-time options -- but this could be expanded to create
  * complete DOF representing the enabled state.
  */
 static dof_hdr_t *
 dtrace_dof_create(dtrace_state_t *state)
 {
 	dof_hdr_t *dof;
 	dof_sec_t *sec;
 	dof_optdesc_t *opt;
 	int i, len = sizeof (dof_hdr_t) +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
 	    sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	dof = kmem_zalloc(len, KM_SLEEP);
 	dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
 	dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
 	dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
 	dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
 
 	dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
 	dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
 	dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
 	dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
 	dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
 	dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
 
 	dof->dofh_flags = 0;
 	dof->dofh_hdrsize = sizeof (dof_hdr_t);
 	dof->dofh_secsize = sizeof (dof_sec_t);
 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
 	dof->dofh_secoff = sizeof (dof_hdr_t);
 	dof->dofh_loadsz = len;
 	dof->dofh_filesz = len;
 	dof->dofh_pad = 0;
 
 	/*
 	 * Fill in the option section header...
 	 */
 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
 	sec->dofs_type = DOF_SECT_OPTDESC;
 	sec->dofs_align = sizeof (uint64_t);
 	sec->dofs_flags = DOF_SECF_LOAD;
 	sec->dofs_entsize = sizeof (dof_optdesc_t);
 
 	opt = (dof_optdesc_t *)((uintptr_t)sec +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
 
 	sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
 	sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
 
 	for (i = 0; i < DTRACEOPT_MAX; i++) {
 		opt[i].dofo_option = i;
 		opt[i].dofo_strtab = DOF_SECIDX_NONE;
 		opt[i].dofo_value = state->dts_options[i];
 	}
 
 	return (dof);
 }
 
 static dof_hdr_t *
 dtrace_dof_copyin(uintptr_t uarg, int *errp)
 {
 	dof_hdr_t hdr, *dof;
 
 	ASSERT(!MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * First, we're going to copyin() the sizeof (dof_hdr_t).
 	 */
 	if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
 		dtrace_dof_error(NULL, "failed to copyin DOF header");
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	/*
 	 * Now we'll allocate the entire DOF and copy it in -- provided
 	 * that the length isn't outrageous.
 	 */
 	if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(&hdr, "load size exceeds maximum");
 		*errp = E2BIG;
 		return (NULL);
 	}
 
 	if (hdr.dofh_loadsz < sizeof (hdr)) {
 		dtrace_dof_error(&hdr, "invalid load size");
 		*errp = EINVAL;
 		return (NULL);
 	}
 
 	dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
 
 	if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
 	    dof->dofh_loadsz != hdr.dofh_loadsz) {
 		kmem_free(dof, hdr.dofh_loadsz);
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	return (dof);
 }
 
 #ifdef __FreeBSD__
 static dof_hdr_t *
 dtrace_dof_copyin_proc(struct proc *p, uintptr_t uarg, int *errp)
 {
 	dof_hdr_t hdr, *dof;
 	struct thread *td;
 	size_t loadsz;
 
 	ASSERT(!MUTEX_HELD(&dtrace_lock));
 
 	td = curthread;
 
 	/*
 	 * First, we're going to copyin() the sizeof (dof_hdr_t).
 	 */
 	if (proc_readmem(td, p, uarg, &hdr, sizeof(hdr)) != sizeof(hdr)) {
 		dtrace_dof_error(NULL, "failed to copyin DOF header");
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	/*
 	 * Now we'll allocate the entire DOF and copy it in -- provided
 	 * that the length isn't outrageous.
 	 */
 	if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(&hdr, "load size exceeds maximum");
 		*errp = E2BIG;
 		return (NULL);
 	}
 	loadsz = (size_t)hdr.dofh_loadsz;
 
 	if (loadsz < sizeof (hdr)) {
 		dtrace_dof_error(&hdr, "invalid load size");
 		*errp = EINVAL;
 		return (NULL);
 	}
 
 	dof = kmem_alloc(loadsz, KM_SLEEP);
 
 	if (proc_readmem(td, p, uarg, dof, loadsz) != loadsz ||
 	    dof->dofh_loadsz != loadsz) {
 		kmem_free(dof, hdr.dofh_loadsz);
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	return (dof);
 }
 
 static __inline uchar_t
 dtrace_dof_char(char c)
 {
 
 	switch (c) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 		return (c - '0');
 	case 'A':
 	case 'B':
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'F':
 		return (c - 'A' + 10);
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 		return (c - 'a' + 10);
 	}
 	/* Should not reach here. */
 	return (UCHAR_MAX);
 }
 #endif /* __FreeBSD__ */
 
 static dof_hdr_t *
 dtrace_dof_property(const char *name)
 {
 #ifdef __FreeBSD__
 	uint8_t *dofbuf;
 	u_char *data, *eol;
 	caddr_t doffile;
 	size_t bytes, len, i;
 	dof_hdr_t *dof;
 	u_char c1, c2;
 
 	dof = NULL;
 
 	doffile = preload_search_by_type("dtrace_dof");
 	if (doffile == NULL)
 		return (NULL);
 
 	data = preload_fetch_addr(doffile);
 	len = preload_fetch_size(doffile);
 	for (;;) {
 		/* Look for the end of the line. All lines end in a newline. */
 		eol = memchr(data, '\n', len);
 		if (eol == NULL)
 			return (NULL);
 
 		if (strncmp(name, data, strlen(name)) == 0)
 			break;
 
 		eol++; /* skip past the newline */
 		len -= eol - data;
 		data = eol;
 	}
 
 	/* We've found the data corresponding to the specified key. */
 
 	data += strlen(name) + 1; /* skip past the '=' */
 	len = eol - data;
 	if (len % 2 != 0) {
 		dtrace_dof_error(NULL, "invalid DOF encoding length");
 		goto doferr;
 	}
 	bytes = len / 2;
 	if (bytes < sizeof(dof_hdr_t)) {
 		dtrace_dof_error(NULL, "truncated header");
 		goto doferr;
 	}
 
 	/*
 	 * Each byte is represented by the two ASCII characters in its hex
 	 * representation.
 	 */
 	dofbuf = malloc(bytes, M_SOLARIS, M_WAITOK);
 	for (i = 0; i < bytes; i++) {
 		c1 = dtrace_dof_char(data[i * 2]);
 		c2 = dtrace_dof_char(data[i * 2 + 1]);
 		if (c1 == UCHAR_MAX || c2 == UCHAR_MAX) {
 			dtrace_dof_error(NULL, "invalid hex char in DOF");
 			goto doferr;
 		}
 		dofbuf[i] = c1 * 16 + c2;
 	}
 
 	dof = (dof_hdr_t *)dofbuf;
 	if (bytes < dof->dofh_loadsz) {
 		dtrace_dof_error(NULL, "truncated DOF");
 		goto doferr;
 	}
 
 	if (dof->dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(NULL, "oversized DOF");
 		goto doferr;
 	}
 
 	return (dof);
 
 doferr:
 	free(dof, M_SOLARIS);
 	return (NULL);
 #else /* __FreeBSD__ */
 	uchar_t *buf;
 	uint64_t loadsz;
 	unsigned int len, i;
 	dof_hdr_t *dof;
 
 	/*
 	 * Unfortunately, array of values in .conf files are always (and
 	 * only) interpreted to be integer arrays.  We must read our DOF
 	 * as an integer array, and then squeeze it into a byte array.
 	 */
 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
 	    (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
 		return (NULL);
 
 	for (i = 0; i < len; i++)
 		buf[i] = (uchar_t)(((int *)buf)[i]);
 
 	if (len < sizeof (dof_hdr_t)) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "truncated header");
 		return (NULL);
 	}
 
 	if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "truncated DOF");
 		return (NULL);
 	}
 
 	if (loadsz >= dtrace_dof_maxsize) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "oversized DOF");
 		return (NULL);
 	}
 
 	dof = kmem_alloc(loadsz, KM_SLEEP);
 	bcopy(buf, dof, loadsz);
 	ddi_prop_free(buf);
 
 	return (dof);
 #endif /* !__FreeBSD__ */
 }
 
 static void
 dtrace_dof_destroy(dof_hdr_t *dof)
 {
 	kmem_free(dof, dof->dofh_loadsz);
 }
 
 /*
  * Return the dof_sec_t pointer corresponding to a given section index.  If the
  * index is not valid, dtrace_dof_error() is called and NULL is returned.  If
  * a type other than DOF_SECT_NONE is specified, the header is checked against
  * this type and NULL is returned if the types do not match.
  */
 static dof_sec_t *
 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
 {
 	dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
 	    ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
 
 	if (i >= dof->dofh_secnum) {
 		dtrace_dof_error(dof, "referenced section index is invalid");
 		return (NULL);
 	}
 
 	if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
 		dtrace_dof_error(dof, "referenced section is not loadable");
 		return (NULL);
 	}
 
 	if (type != DOF_SECT_NONE && type != sec->dofs_type) {
 		dtrace_dof_error(dof, "referenced section is the wrong type");
 		return (NULL);
 	}
 
 	return (sec);
 }
 
 static dtrace_probedesc_t *
 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
 {
 	dof_probedesc_t *probe;
 	dof_sec_t *strtab;
 	uintptr_t daddr = (uintptr_t)dof;
 	uintptr_t str;
 	size_t size;
 
 	if (sec->dofs_type != DOF_SECT_PROBEDESC) {
 		dtrace_dof_error(dof, "invalid probe section");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad alignment in probe description");
 		return (NULL);
 	}
 
 	if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
 		dtrace_dof_error(dof, "truncated probe description");
 		return (NULL);
 	}
 
 	probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
 
 	if (strtab == NULL)
 		return (NULL);
 
 	str = daddr + strtab->dofs_offset;
 	size = strtab->dofs_size;
 
 	if (probe->dofp_provider >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe provider");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_provider,
 	    (char *)(str + probe->dofp_provider),
 	    MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
 
 	if (probe->dofp_mod >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe module");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
 	    MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
 
 	if (probe->dofp_func >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe function");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
 	    MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
 
 	if (probe->dofp_name >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe name");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
 	    MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
 
 	return (desc);
 }
 
 static dtrace_difo_t *
 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_difo_t *dp;
 	size_t ttl = 0;
 	dof_difohdr_t *dofd;
 	uintptr_t daddr = (uintptr_t)dof;
 	size_t max = dtrace_difo_maxsize;
 	int i, l, n;
 
 	static const struct {
 		int section;
 		int bufoffs;
 		int lenoffs;
 		int entsize;
 		int align;
 		const char *msg;
 	} difo[] = {
 		{ DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
 		offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
 		sizeof (dif_instr_t), "multiple DIF sections" },
 
 		{ DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
 		offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
 		sizeof (uint64_t), "multiple integer tables" },
 
 		{ DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
 		offsetof(dtrace_difo_t, dtdo_strlen), 0,
 		sizeof (char), "multiple string tables" },
 
 		{ DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
 		offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
 		sizeof (uint_t), "multiple variable tables" },
 
 		{ DOF_SECT_NONE, 0, 0, 0, 0, NULL }
 	};
 
 	if (sec->dofs_type != DOF_SECT_DIFOHDR) {
 		dtrace_dof_error(dof, "invalid DIFO header section");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad alignment in DIFO header");
 		return (NULL);
 	}
 
 	if (sec->dofs_size < sizeof (dof_difohdr_t) ||
 	    sec->dofs_size % sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad size in DIFO header");
 		return (NULL);
 	}
 
 	dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
 
 	dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
 	dp->dtdo_rtype = dofd->dofd_rtype;
 
 	for (l = 0; l < n; l++) {
 		dof_sec_t *subsec;
 		void **bufp;
 		uint32_t *lenp;
 
 		if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
 		    dofd->dofd_links[l])) == NULL)
 			goto err; /* invalid section link */
 
 		if (ttl + subsec->dofs_size > max) {
 			dtrace_dof_error(dof, "exceeds maximum size");
 			goto err;
 		}
 
 		ttl += subsec->dofs_size;
 
 		for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
 			if (subsec->dofs_type != difo[i].section)
 				continue;
 
 			if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
 				dtrace_dof_error(dof, "section not loaded");
 				goto err;
 			}
 
 			if (subsec->dofs_align != difo[i].align) {
 				dtrace_dof_error(dof, "bad alignment");
 				goto err;
 			}
 
 			bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
 			lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
 
 			if (*bufp != NULL) {
 				dtrace_dof_error(dof, difo[i].msg);
 				goto err;
 			}
 
 			if (difo[i].entsize != subsec->dofs_entsize) {
 				dtrace_dof_error(dof, "entry size mismatch");
 				goto err;
 			}
 
 			if (subsec->dofs_entsize != 0 &&
 			    (subsec->dofs_size % subsec->dofs_entsize) != 0) {
 				dtrace_dof_error(dof, "corrupt entry size");
 				goto err;
 			}
 
 			*lenp = subsec->dofs_size;
 			*bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
 			bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
 			    *bufp, subsec->dofs_size);
 
 			if (subsec->dofs_entsize != 0)
 				*lenp /= subsec->dofs_entsize;
 
 			break;
 		}
 
 		/*
 		 * If we encounter a loadable DIFO sub-section that is not
 		 * known to us, assume this is a broken program and fail.
 		 */
 		if (difo[i].section == DOF_SECT_NONE &&
 		    (subsec->dofs_flags & DOF_SECF_LOAD)) {
 			dtrace_dof_error(dof, "unrecognized DIFO subsection");
 			goto err;
 		}
 	}
 
 	if (dp->dtdo_buf == NULL) {
 		/*
 		 * We can't have a DIF object without DIF text.
 		 */
 		dtrace_dof_error(dof, "missing DIF text");
 		goto err;
 	}
 
 	/*
 	 * Before we validate the DIF object, run through the variable table
 	 * looking for the strings -- if any of their size are under, we'll set
 	 * their size to be the system-wide default string size.  Note that
 	 * this should _not_ happen if the "strsize" option has been set --
 	 * in this case, the compiler should have set the size to reflect the
 	 * setting of the option.
 	 */
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_diftype_t *t = &v->dtdv_type;
 
 		if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
 			t->dtdt_size = dtrace_strsize_default;
 	}
 
 	if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
 		goto err;
 
 	dtrace_difo_init(dp, vstate);
 	return (dp);
 
 err:
 	kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
 	kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
 	kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
 	kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
 
 	kmem_free(dp, sizeof (dtrace_difo_t));
 	return (NULL);
 }
 
 static dtrace_predicate_t *
 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_difo_t *dp;
 
 	if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
 		return (NULL);
 
 	return (dtrace_predicate_create(dp));
 }
 
 static dtrace_actdesc_t *
 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
 	dof_actdesc_t *desc;
 	dof_sec_t *difosec;
 	size_t offs;
 	uintptr_t daddr = (uintptr_t)dof;
 	uint64_t arg;
 	dtrace_actkind_t kind;
 
 	if (sec->dofs_type != DOF_SECT_ACTDESC) {
 		dtrace_dof_error(dof, "invalid action section");
 		return (NULL);
 	}
 
 	if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
 		dtrace_dof_error(dof, "truncated action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "bad alignment in action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_size < sec->dofs_entsize) {
 		dtrace_dof_error(dof, "section entry size exceeds total size");
 		return (NULL);
 	}
 
 	if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
 		dtrace_dof_error(dof, "bad entry size in action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
 		dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
 		return (NULL);
 	}
 
 	for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
 		desc = (dof_actdesc_t *)(daddr +
 		    (uintptr_t)sec->dofs_offset + offs);
 		kind = (dtrace_actkind_t)desc->dofa_kind;
 
 		if ((DTRACEACT_ISPRINTFLIKE(kind) &&
 		    (kind != DTRACEACT_PRINTA ||
 		    desc->dofa_strtab != DOF_SECIDX_NONE)) ||
 		    (kind == DTRACEACT_DIFEXPR &&
 		    desc->dofa_strtab != DOF_SECIDX_NONE)) {
 			dof_sec_t *strtab;
 			char *str, *fmt;
 			uint64_t i;
 
 			/*
 			 * The argument to these actions is an index into the
 			 * DOF string table.  For printf()-like actions, this
 			 * is the format string.  For print(), this is the
 			 * CTF type of the expression result.
 			 */
 			if ((strtab = dtrace_dof_sect(dof,
 			    DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
 				goto err;
 
 			str = (char *)((uintptr_t)dof +
 			    (uintptr_t)strtab->dofs_offset);
 
 			for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
 				if (str[i] == '\0')
 					break;
 			}
 
 			if (i >= strtab->dofs_size) {
 				dtrace_dof_error(dof, "bogus format string");
 				goto err;
 			}
 
 			if (i == desc->dofa_arg) {
 				dtrace_dof_error(dof, "empty format string");
 				goto err;
 			}
 
 			i -= desc->dofa_arg;
 			fmt = kmem_alloc(i + 1, KM_SLEEP);
 			bcopy(&str[desc->dofa_arg], fmt, i + 1);
 			arg = (uint64_t)(uintptr_t)fmt;
 		} else {
 			if (kind == DTRACEACT_PRINTA) {
 				ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
 				arg = 0;
 			} else {
 				arg = desc->dofa_arg;
 			}
 		}
 
 		act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
 		    desc->dofa_uarg, arg);
 
 		if (last != NULL) {
 			last->dtad_next = act;
 		} else {
 			first = act;
 		}
 
 		last = act;
 
 		if (desc->dofa_difo == DOF_SECIDX_NONE)
 			continue;
 
 		if ((difosec = dtrace_dof_sect(dof,
 		    DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
 			goto err;
 
 		act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
 
 		if (act->dtad_difo == NULL)
 			goto err;
 	}
 
 	ASSERT(first != NULL);
 	return (first);
 
 err:
 	for (act = first; act != NULL; act = next) {
 		next = act->dtad_next;
 		dtrace_actdesc_release(act, vstate);
 	}
 
 	return (NULL);
 }
 
 static dtrace_ecbdesc_t *
 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_ecbdesc_t *ep;
 	dof_ecbdesc_t *ecb;
 	dtrace_probedesc_t *desc;
 	dtrace_predicate_t *pred = NULL;
 
 	if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
 		dtrace_dof_error(dof, "truncated ECB description");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "bad alignment in ECB description");
 		return (NULL);
 	}
 
 	ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
 	sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
 
 	if (sec == NULL)
 		return (NULL);
 
 	ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
 	ep->dted_uarg = ecb->dofe_uarg;
 	desc = &ep->dted_probe;
 
 	if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
 		goto err;
 
 	if (ecb->dofe_pred != DOF_SECIDX_NONE) {
 		if ((sec = dtrace_dof_sect(dof,
 		    DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
 			goto err;
 
 		if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
 			goto err;
 
 		ep->dted_pred.dtpdd_predicate = pred;
 	}
 
 	if (ecb->dofe_actions != DOF_SECIDX_NONE) {
 		if ((sec = dtrace_dof_sect(dof,
 		    DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
 			goto err;
 
 		ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
 
 		if (ep->dted_action == NULL)
 			goto err;
 	}
 
 	return (ep);
 
 err:
 	if (pred != NULL)
 		dtrace_predicate_release(pred, vstate);
 	kmem_free(ep, sizeof (dtrace_ecbdesc_t));
 	return (NULL);
 }
 
 /*
  * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
  * specified DOF.  SETX relocations are computed using 'ubase', the base load
  * address of the object containing the DOF, and DOFREL relocations are relative
  * to the relocation offset within the DOF.
  */
 static int
 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase,
     uint64_t udaddr)
 {
 	uintptr_t daddr = (uintptr_t)dof;
 	dof_relohdr_t *dofr =
 	    (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	dof_sec_t *ss, *rs, *ts;
 	dof_relodesc_t *r;
 	uint_t i, n;
 
 	if (sec->dofs_size < sizeof (dof_relohdr_t) ||
 	    sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "invalid relocation header");
 		return (-1);
 	}
 
 	ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
 	rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
 	ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
 
 	if (ss == NULL || rs == NULL || ts == NULL)
 		return (-1); /* dtrace_dof_error() has been called already */
 
 	if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
 	    rs->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "invalid relocation section");
 		return (-1);
 	}
 
 	r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
 	n = rs->dofs_size / rs->dofs_entsize;
 
 	for (i = 0; i < n; i++) {
 		uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
 
 		switch (r->dofr_type) {
 		case DOF_RELO_NONE:
 			break;
 		case DOF_RELO_SETX:
 		case DOF_RELO_DOFREL:
 			if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
 			    sizeof (uint64_t) > ts->dofs_size) {
 				dtrace_dof_error(dof, "bad relocation offset");
 				return (-1);
 			}
 
 			if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
 				dtrace_dof_error(dof, "misaligned setx relo");
 				return (-1);
 			}
 
 			if (r->dofr_type == DOF_RELO_SETX)
 				*(uint64_t *)taddr += ubase;
 			else
 				*(uint64_t *)taddr +=
 				    udaddr + ts->dofs_offset + r->dofr_offset;
 			break;
 		default:
 			dtrace_dof_error(dof, "invalid relocation type");
 			return (-1);
 		}
 
 		r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
 	}
 
 	return (0);
 }
 
 /*
  * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
  * header:  it should be at the front of a memory region that is at least
  * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
  * size.  It need not be validated in any other way.
  */
 static int
 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
     dtrace_enabling_t **enabp, uint64_t ubase, uint64_t udaddr, int noprobes)
 {
 	uint64_t len = dof->dofh_loadsz, seclen;
 	uintptr_t daddr = (uintptr_t)dof;
 	dtrace_ecbdesc_t *ep;
 	dtrace_enabling_t *enab;
 	uint_t i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
 
 	/*
 	 * Check the DOF header identification bytes.  In addition to checking
 	 * valid settings, we also verify that unused bits/bytes are zeroed so
 	 * we can use them later without fear of regressing existing binaries.
 	 */
 	if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
 	    DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
 		dtrace_dof_error(dof, "DOF magic string mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
 	    dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
 		dtrace_dof_error(dof, "DOF has invalid data model");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
 		dtrace_dof_error(dof, "DOF encoding mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
 		dtrace_dof_error(dof, "DOF version mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
 		dtrace_dof_error(dof, "DOF uses unsupported instruction set");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
 		dtrace_dof_error(dof, "DOF uses too many integer registers");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
 		dtrace_dof_error(dof, "DOF uses too many tuple registers");
 		return (-1);
 	}
 
 	for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
 		if (dof->dofh_ident[i] != 0) {
 			dtrace_dof_error(dof, "DOF has invalid ident byte set");
 			return (-1);
 		}
 	}
 
 	if (dof->dofh_flags & ~DOF_FL_VALID) {
 		dtrace_dof_error(dof, "DOF has invalid flag bits set");
 		return (-1);
 	}
 
 	if (dof->dofh_secsize == 0) {
 		dtrace_dof_error(dof, "zero section header size");
 		return (-1);
 	}
 
 	/*
 	 * Check that the section headers don't exceed the amount of DOF
 	 * data.  Note that we cast the section size and number of sections
 	 * to uint64_t's to prevent possible overflow in the multiplication.
 	 */
 	seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
 
 	if (dof->dofh_secoff > len || seclen > len ||
 	    dof->dofh_secoff + seclen > len) {
 		dtrace_dof_error(dof, "truncated section headers");
 		return (-1);
 	}
 
 	if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
 		dtrace_dof_error(dof, "misaligned section headers");
 		return (-1);
 	}
 
 	if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
 		dtrace_dof_error(dof, "misaligned section size");
 		return (-1);
 	}
 
 	/*
 	 * Take an initial pass through the section headers to be sure that
 	 * the headers don't have stray offsets.  If the 'noprobes' flag is
 	 * set, do not permit sections relating to providers, probes, or args.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (noprobes) {
 			switch (sec->dofs_type) {
 			case DOF_SECT_PROVIDER:
 			case DOF_SECT_PROBES:
 			case DOF_SECT_PRARGS:
 			case DOF_SECT_PROFFS:
 				dtrace_dof_error(dof, "illegal sections "
 				    "for enabling");
 				return (-1);
 			}
 		}
 
 		if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
 		    !(sec->dofs_flags & DOF_SECF_LOAD)) {
 			dtrace_dof_error(dof, "loadable section with load "
 			    "flag unset");
 			return (-1);
 		}
 
 		if (!(sec->dofs_flags & DOF_SECF_LOAD))
 			continue; /* just ignore non-loadable sections */
 
 		if (!ISP2(sec->dofs_align)) {
 			dtrace_dof_error(dof, "bad section alignment");
 			return (-1);
 		}
 
 		if (sec->dofs_offset & (sec->dofs_align - 1)) {
 			dtrace_dof_error(dof, "misaligned section");
 			return (-1);
 		}
 
 		if (sec->dofs_offset > len || sec->dofs_size > len ||
 		    sec->dofs_offset + sec->dofs_size > len) {
 			dtrace_dof_error(dof, "corrupt section header");
 			return (-1);
 		}
 
 		if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
 		    sec->dofs_offset + sec->dofs_size - 1) != '\0') {
 			dtrace_dof_error(dof, "non-terminating string table");
 			return (-1);
 		}
 	}
 
 	/*
 	 * Take a second pass through the sections and locate and perform any
 	 * relocations that are present.  We do this after the first pass to
 	 * be sure that all sections have had their headers validated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (!(sec->dofs_flags & DOF_SECF_LOAD))
 			continue; /* skip sections that are not loadable */
 
 		switch (sec->dofs_type) {
 		case DOF_SECT_URELHDR:
 			if (dtrace_dof_relocate(dof, sec, ubase, udaddr) != 0)
 				return (-1);
 			break;
 		}
 	}
 
 	if ((enab = *enabp) == NULL)
 		enab = *enabp = dtrace_enabling_create(vstate);
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_ECBDESC)
 			continue;
 
 		if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
 			dtrace_enabling_destroy(enab);
 			*enabp = NULL;
 			return (-1);
 		}
 
 		dtrace_enabling_add(enab, ep);
 	}
 
 	return (0);
 }
 
 /*
  * Process DOF for any options.  This routine assumes that the DOF has been
  * at least processed by dtrace_dof_slurp().
  */
 static int
 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
 {
 	int i, rval;
 	uint32_t entsize;
 	size_t offs;
 	dof_optdesc_t *desc;
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_OPTDESC)
 			continue;
 
 		if (sec->dofs_align != sizeof (uint64_t)) {
 			dtrace_dof_error(dof, "bad alignment in "
 			    "option description");
 			return (EINVAL);
 		}
 
 		if ((entsize = sec->dofs_entsize) == 0) {
 			dtrace_dof_error(dof, "zeroed option entry size");
 			return (EINVAL);
 		}
 
 		if (entsize < sizeof (dof_optdesc_t)) {
 			dtrace_dof_error(dof, "bad option entry size");
 			return (EINVAL);
 		}
 
 		for (offs = 0; offs < sec->dofs_size; offs += entsize) {
 			desc = (dof_optdesc_t *)((uintptr_t)dof +
 			    (uintptr_t)sec->dofs_offset + offs);
 
 			if (desc->dofo_strtab != DOF_SECIDX_NONE) {
 				dtrace_dof_error(dof, "non-zero option string");
 				return (EINVAL);
 			}
 
 			if (desc->dofo_value == DTRACEOPT_UNSET) {
 				dtrace_dof_error(dof, "unset option");
 				return (EINVAL);
 			}
 
 			if ((rval = dtrace_state_option(state,
 			    desc->dofo_option, desc->dofo_value)) != 0) {
 				dtrace_dof_error(dof, "rejected option");
 				return (rval);
 			}
 		}
 	}
 
 	return (0);
 }
 
 /*
  * DTrace Consumer State Functions
  */
 static int
 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
 {
 	size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
 	void *base;
 	uintptr_t limit;
 	dtrace_dynvar_t *dvar, *next, *start;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
 
 	bzero(dstate, sizeof (dtrace_dstate_t));
 
 	if ((dstate->dtds_chunksize = chunksize) == 0)
 		dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
 
 	VERIFY(dstate->dtds_chunksize < LONG_MAX);
 
 	if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
 		size = min;
 
 	if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 		return (ENOMEM);
 
 	dstate->dtds_size = size;
 	dstate->dtds_base = base;
 	dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
 	bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
 
 	hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
 
 	if (hashsize != 1 && (hashsize & 1))
 		hashsize--;
 
 	dstate->dtds_hashsize = hashsize;
 	dstate->dtds_hash = dstate->dtds_base;
 
 	/*
 	 * Set all of our hash buckets to point to the single sink, and (if
 	 * it hasn't already been set), set the sink's hash value to be the
 	 * sink sentinel value.  The sink is needed for dynamic variable
 	 * lookups to know that they have iterated over an entire, valid hash
 	 * chain.
 	 */
 	for (i = 0; i < hashsize; i++)
 		dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
 
 	if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
 		dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
 
 	/*
 	 * Determine number of active CPUs.  Divide free list evenly among
 	 * active CPUs.
 	 */
 	start = (dtrace_dynvar_t *)
 	    ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
 	limit = (uintptr_t)base + size;
 
 	VERIFY((uintptr_t)start < limit);
 	VERIFY((uintptr_t)start >= (uintptr_t)base);
 
 	maxper = (limit - (uintptr_t)start) / NCPU;
 	maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
 
 #ifndef illumos
 	CPU_FOREACH(i) {
 #else
 	for (i = 0; i < NCPU; i++) {
 #endif
 		dstate->dtds_percpu[i].dtdsc_free = dvar = start;
 
 		/*
 		 * If we don't even have enough chunks to make it once through
 		 * NCPUs, we're just going to allocate everything to the first
 		 * CPU.  And if we're on the last CPU, we're going to allocate
 		 * whatever is left over.  In either case, we set the limit to
 		 * be the limit of the dynamic variable space.
 		 */
 		if (maxper == 0 || i == NCPU - 1) {
 			limit = (uintptr_t)base + size;
 			start = NULL;
 		} else {
 			limit = (uintptr_t)start + maxper;
 			start = (dtrace_dynvar_t *)limit;
 		}
 
 		VERIFY(limit <= (uintptr_t)base + size);
 
 		for (;;) {
 			next = (dtrace_dynvar_t *)((uintptr_t)dvar +
 			    dstate->dtds_chunksize);
 
 			if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
 				break;
 
 			VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
 			    (uintptr_t)dvar <= (uintptr_t)base + size);
 			dvar->dtdv_next = next;
 			dvar = next;
 		}
 
 		if (maxper == 0)
 			break;
 	}
 
 	return (0);
 }
 
 static void
 dtrace_dstate_fini(dtrace_dstate_t *dstate)
 {
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	if (dstate->dtds_base == NULL)
 		return;
 
 	kmem_free(dstate->dtds_base, dstate->dtds_size);
 	kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
 }
 
 static void
 dtrace_vstate_fini(dtrace_vstate_t *vstate)
 {
 	/*
 	 * Logical XOR, where are you?
 	 */
 	ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
 
 	if (vstate->dtvs_nglobals > 0) {
 		kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
 		    sizeof (dtrace_statvar_t *));
 	}
 
 	if (vstate->dtvs_ntlocals > 0) {
 		kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
 		    sizeof (dtrace_difv_t));
 	}
 
 	ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
 
 	if (vstate->dtvs_nlocals > 0) {
 		kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
 		    sizeof (dtrace_statvar_t *));
 	}
 }
 
 #ifdef illumos
 static void
 dtrace_state_clean(dtrace_state_t *state)
 {
 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
 		return;
 
 	dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
 	dtrace_speculation_clean(state);
 }
 
 static void
 dtrace_state_deadman(dtrace_state_t *state)
 {
 	hrtime_t now;
 
 	dtrace_sync();
 
 	now = dtrace_gethrtime();
 
 	if (state != dtrace_anon.dta_state &&
 	    now - state->dts_laststatus >= dtrace_deadman_user)
 		return;
 
 	/*
 	 * We must be sure that dts_alive never appears to be less than the
 	 * value upon entry to dtrace_state_deadman(), and because we lack a
 	 * dtrace_cas64(), we cannot store to it atomically.  We thus instead
 	 * store INT64_MAX to it, followed by a memory barrier, followed by
 	 * the new value.  This assures that dts_alive never appears to be
 	 * less than its true value, regardless of the order in which the
 	 * stores to the underlying storage are issued.
 	 */
 	state->dts_alive = INT64_MAX;
 	dtrace_membar_producer();
 	state->dts_alive = now;
 }
 #else	/* !illumos */
 static void
 dtrace_state_clean(void *arg)
 {
 	dtrace_state_t *state = arg;
 	dtrace_optval_t *opt = state->dts_options;
 
 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
 		return;
 
 	dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
 	dtrace_speculation_clean(state);
 
 	callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
 	    dtrace_state_clean, state);
 }
 
 static void
 dtrace_state_deadman(void *arg)
 {
 	dtrace_state_t *state = arg;
 	hrtime_t now;
 
 	dtrace_sync();
 
 	dtrace_debug_output();
 
 	now = dtrace_gethrtime();
 
 	if (state != dtrace_anon.dta_state &&
 	    now - state->dts_laststatus >= dtrace_deadman_user)
 		return;
 
 	/*
 	 * We must be sure that dts_alive never appears to be less than the
 	 * value upon entry to dtrace_state_deadman(), and because we lack a
 	 * dtrace_cas64(), we cannot store to it atomically.  We thus instead
 	 * store INT64_MAX to it, followed by a memory barrier, followed by
 	 * the new value.  This assures that dts_alive never appears to be
 	 * less than its true value, regardless of the order in which the
 	 * stores to the underlying storage are issued.
 	 */
 	state->dts_alive = INT64_MAX;
 	dtrace_membar_producer();
 	state->dts_alive = now;
 
 	callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
 	    dtrace_state_deadman, state);
 }
 #endif	/* illumos */
 
 static dtrace_state_t *
 #ifdef illumos
 dtrace_state_create(dev_t *devp, cred_t *cr)
 #else
 dtrace_state_create(struct cdev *dev, struct ucred *cred __unused)
 #endif
 {
 #ifdef illumos
 	minor_t minor;
 	major_t major;
 #else
 	cred_t *cr = NULL;
 	int m = 0;
 #endif
 	char c[30];
 	dtrace_state_t *state;
 	dtrace_optval_t *opt;
 	int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
 	int cpu_it;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 #ifdef illumos
 	minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
 	    VM_BESTFIT | VM_SLEEP);
 
 	if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
 		vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
 		return (NULL);
 	}
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 #else
 	if (dev != NULL) {
 		cr = dev->si_cred;
 		m = dev2unit(dev);
 	}
 
 	/* Allocate memory for the state. */
 	state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP);
 #endif
 
 	state->dts_epid = DTRACE_EPIDNONE + 1;
 
 	(void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m);
 #ifdef illumos
 	state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
 	    NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
 
 	if (devp != NULL) {
 		major = getemajor(*devp);
 	} else {
 		major = ddi_driver_major(dtrace_devi);
 	}
 
 	state->dts_dev = makedevice(major, minor);
 
 	if (devp != NULL)
 		*devp = state->dts_dev;
 #else
 	state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx);
 	state->dts_dev = dev;
 #endif
 
 	/*
 	 * We allocate NCPU buffers.  On the one hand, this can be quite
 	 * a bit of memory per instance (nearly 36K on a Starcat).  On the
 	 * other hand, it saves an additional memory reference in the probe
 	 * path.
 	 */
 	state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
 	state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
 
 	/*
          * Allocate and initialise the per-process per-CPU random state.
 	 * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is
          * assumed to be seeded at this point (if from Fortuna seed file).
 	 */
 	(void) read_random(&state->dts_rstate[0], 2 * sizeof(uint64_t));
 	for (cpu_it = 1; cpu_it < NCPU; cpu_it++) {
 		/*
 		 * Each CPU is assigned a 2^64 period, non-overlapping
 		 * subsequence.
 		 */
 		dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1],
 		    state->dts_rstate[cpu_it]); 
 	}
 
 #ifdef illumos
 	state->dts_cleaner = CYCLIC_NONE;
 	state->dts_deadman = CYCLIC_NONE;
 #else
 	callout_init(&state->dts_cleaner, 1);
 	callout_init(&state->dts_deadman, 1);
 #endif
 	state->dts_vstate.dtvs_state = state;
 
 	for (i = 0; i < DTRACEOPT_MAX; i++)
 		state->dts_options[i] = DTRACEOPT_UNSET;
 
 	/*
 	 * Set the default options.
 	 */
 	opt = state->dts_options;
 	opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
 	opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
 	opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
 	opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
 	opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
 	opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
 	opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
 	opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
 	opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
 	opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
 	opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
 	opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
 	opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
 	opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
 
 	state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
 
 	/*
 	 * Depending on the user credentials, we set flag bits which alter probe
 	 * visibility or the amount of destructiveness allowed.  In the case of
 	 * actual anonymous tracing, or the possession of all privileges, all of
 	 * the normal checks are bypassed.
 	 */
 	if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
 		state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
 		state->dts_cred.dcr_action = DTRACE_CRA_ALL;
 	} else {
 		/*
 		 * Set up the credentials for this instantiation.  We take a
 		 * hold on the credential to prevent it from disappearing on
 		 * us; this in turn prevents the zone_t referenced by this
 		 * credential from disappearing.  This means that we can
 		 * examine the credential and the zone from probe context.
 		 */
 		crhold(cr);
 		state->dts_cred.dcr_cred = cr;
 
 		/*
 		 * CRA_PROC means "we have *some* privilege for dtrace" and
 		 * unlocks the use of variables like pid, zonename, etc.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
 		    PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
 			state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
 		}
 
 		/*
 		 * dtrace_user allows use of syscall and profile providers.
 		 * If the user also has proc_owner and/or proc_zone, we
 		 * extend the scope to include additional visibility and
 		 * destructive power.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
 				state->dts_cred.dcr_visible |=
 				    DTRACE_CRV_ALLPROC;
 
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 			}
 
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
 				state->dts_cred.dcr_visible |=
 				    DTRACE_CRV_ALLZONE;
 
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 			}
 
 			/*
 			 * If we have all privs in whatever zone this is,
 			 * we can do destructive things to processes which
 			 * have altered credentials.
 			 */
 #ifdef illumos
 			if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
 			    cr->cr_zone->zone_privset)) {
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
 			}
 #endif
 		}
 
 		/*
 		 * Holding the dtrace_kernel privilege also implies that
 		 * the user has the dtrace_user privilege from a visibility
 		 * perspective.  But without further privileges, some
 		 * destructive actions are not available.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
 			/*
 			 * Make all probes in all zones visible.  However,
 			 * this doesn't mean that all actions become available
 			 * to all zones.
 			 */
 			state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
 			    DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
 
 			state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
 			    DTRACE_CRA_PROC;
 			/*
 			 * Holding proc_owner means that destructive actions
 			 * for *this* zone are allowed.
 			 */
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 
 			/*
 			 * Holding proc_zone means that destructive actions
 			 * for this user/group ID in all zones is allowed.
 			 */
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 
 #ifdef illumos
 			/*
 			 * If we have all privs in whatever zone this is,
 			 * we can do destructive things to processes which
 			 * have altered credentials.
 			 */
 			if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
 			    cr->cr_zone->zone_privset)) {
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
 			}
 #endif
 		}
 
 		/*
 		 * Holding the dtrace_proc privilege gives control over fasttrap
 		 * and pid providers.  We need to grant wider destructive
 		 * privileges in the event that the user has proc_owner and/or
 		 * proc_zone.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 		}
 	}
 
 	return (state);
 }
 
 static int
 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
 {
 	dtrace_optval_t *opt = state->dts_options, size;
 	processorid_t cpu = 0;;
 	int flags = 0, rval, factor, divisor = 1;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(which < DTRACEOPT_MAX);
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
 	    (state == dtrace_anon.dta_state &&
 	    state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
 
 	if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
 		return (0);
 
 	if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
 		cpu = opt[DTRACEOPT_CPU];
 
 	if (which == DTRACEOPT_SPECSIZE)
 		flags |= DTRACEBUF_NOSWITCH;
 
 	if (which == DTRACEOPT_BUFSIZE) {
 		if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
 			flags |= DTRACEBUF_RING;
 
 		if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
 			flags |= DTRACEBUF_FILL;
 
 		if (state != dtrace_anon.dta_state ||
 		    state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
 			flags |= DTRACEBUF_INACTIVE;
 	}
 
 	for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
 		/*
 		 * The size must be 8-byte aligned.  If the size is not 8-byte
 		 * aligned, drop it down by the difference.
 		 */
 		if (size & (sizeof (uint64_t) - 1))
 			size -= size & (sizeof (uint64_t) - 1);
 
 		if (size < state->dts_reserve) {
 			/*
 			 * Buffers always must be large enough to accommodate
 			 * their prereserved space.  We return E2BIG instead
 			 * of ENOMEM in this case to allow for user-level
 			 * software to differentiate the cases.
 			 */
 			return (E2BIG);
 		}
 
 		rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
 
 		if (rval != ENOMEM) {
 			opt[which] = size;
 			return (rval);
 		}
 
 		if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
 			return (rval);
 
 		for (divisor = 2; divisor < factor; divisor <<= 1)
 			continue;
 	}
 
 	return (ENOMEM);
 }
 
 static int
 dtrace_state_buffers(dtrace_state_t *state)
 {
 	dtrace_speculation_t *spec = state->dts_speculations;
 	int rval, i;
 
 	if ((rval = dtrace_state_buffer(state, state->dts_buffer,
 	    DTRACEOPT_BUFSIZE)) != 0)
 		return (rval);
 
 	if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
 	    DTRACEOPT_AGGSIZE)) != 0)
 		return (rval);
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		if ((rval = dtrace_state_buffer(state,
 		    spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
 			return (rval);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_state_prereserve(dtrace_state_t *state)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_probe_t *probe;
 
 	state->dts_reserve = 0;
 
 	if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
 		return;
 
 	/*
 	 * If our buffer policy is a "fill" buffer policy, we need to set the
 	 * prereserved space to be the space required by the END probes.
 	 */
 	probe = dtrace_probes[dtrace_probeid_end - 1];
 	ASSERT(probe != NULL);
 
 	for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
 		if (ecb->dte_state != state)
 			continue;
 
 		state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
 	}
 }
 
 static int
 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
 {
 	dtrace_optval_t *opt = state->dts_options, sz, nspec;
 	dtrace_speculation_t *spec;
 	dtrace_buffer_t *buf;
 #ifdef illumos
 	cyc_handler_t hdlr;
 	cyc_time_t when;
 #endif
 	int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
 	dtrace_icookie_t cookie;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
 		rval = EBUSY;
 		goto out;
 	}
 
 	/*
 	 * Before we can perform any checks, we must prime all of the
 	 * retained enablings that correspond to this state.
 	 */
 	dtrace_enabling_prime(state);
 
 	if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
 		rval = EACCES;
 		goto out;
 	}
 
 	dtrace_state_prereserve(state);
 
 	/*
 	 * Now we want to do is try to allocate our speculations.
 	 * We do not automatically resize the number of speculations; if
 	 * this fails, we will fail the operation.
 	 */
 	nspec = opt[DTRACEOPT_NSPEC];
 	ASSERT(nspec != DTRACEOPT_UNSET);
 
 	if (nspec > INT_MAX) {
 		rval = ENOMEM;
 		goto out;
 	}
 
 	spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
 	    KM_NOSLEEP | KM_NORMALPRI);
 
 	if (spec == NULL) {
 		rval = ENOMEM;
 		goto out;
 	}
 
 	state->dts_speculations = spec;
 	state->dts_nspeculations = (int)nspec;
 
 	for (i = 0; i < nspec; i++) {
 		if ((buf = kmem_zalloc(bufsize,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
 			rval = ENOMEM;
 			goto err;
 		}
 
 		spec[i].dtsp_buffer = buf;
 	}
 
 	if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
 		if (dtrace_anon.dta_state == NULL) {
 			rval = ENOENT;
 			goto out;
 		}
 
 		if (state->dts_necbs != 0) {
 			rval = EALREADY;
 			goto out;
 		}
 
 		state->dts_anon = dtrace_anon_grab();
 		ASSERT(state->dts_anon != NULL);
 		state = state->dts_anon;
 
 		/*
 		 * We want "grabanon" to be set in the grabbed state, so we'll
 		 * copy that option value from the grabbing state into the
 		 * grabbed state.
 		 */
 		state->dts_options[DTRACEOPT_GRABANON] =
 		    opt[DTRACEOPT_GRABANON];
 
 		*cpu = dtrace_anon.dta_beganon;
 
 		/*
 		 * If the anonymous state is active (as it almost certainly
 		 * is if the anonymous enabling ultimately matched anything),
 		 * we don't allow any further option processing -- but we
 		 * don't return failure.
 		 */
 		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 			goto out;
 	}
 
 	if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
 	    opt[DTRACEOPT_AGGSIZE] != 0) {
 		if (state->dts_aggregations == NULL) {
 			/*
 			 * We're not going to create an aggregation buffer
 			 * because we don't have any ECBs that contain
 			 * aggregations -- set this option to 0.
 			 */
 			opt[DTRACEOPT_AGGSIZE] = 0;
 		} else {
 			/*
 			 * If we have an aggregation buffer, we must also have
 			 * a buffer to use as scratch.
 			 */
 			if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
 			    opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
 				opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
 			}
 		}
 	}
 
 	if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
 	    opt[DTRACEOPT_SPECSIZE] != 0) {
 		if (!state->dts_speculates) {
 			/*
 			 * We're not going to create speculation buffers
 			 * because we don't have any ECBs that actually
 			 * speculate -- set the speculation size to 0.
 			 */
 			opt[DTRACEOPT_SPECSIZE] = 0;
 		}
 	}
 
 	/*
 	 * The bare minimum size for any buffer that we're actually going to
 	 * do anything to is sizeof (uint64_t).
 	 */
 	sz = sizeof (uint64_t);
 
 	if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
 	    (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
 	    (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
 		/*
 		 * A buffer size has been explicitly set to 0 (or to a size
 		 * that will be adjusted to 0) and we need the space -- we
 		 * need to return failure.  We return ENOSPC to differentiate
 		 * it from failing to allocate a buffer due to failure to meet
 		 * the reserve (for which we return E2BIG).
 		 */
 		rval = ENOSPC;
 		goto out;
 	}
 
 	if ((rval = dtrace_state_buffers(state)) != 0)
 		goto err;
 
 	if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
 		sz = dtrace_dstate_defsize;
 
 	do {
 		rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
 
 		if (rval == 0)
 			break;
 
 		if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
 			goto err;
 	} while (sz >>= 1);
 
 	opt[DTRACEOPT_DYNVARSIZE] = sz;
 
 	if (rval != 0)
 		goto err;
 
 	if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
 		opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
 
 	if (opt[DTRACEOPT_CLEANRATE] == 0)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
 
 	if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
 
 	if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
 
 	state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
 #ifdef illumos
 	hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
 	hdlr.cyh_arg = state;
 	hdlr.cyh_level = CY_LOW_LEVEL;
 
 	when.cyt_when = 0;
 	when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
 
 	state->dts_cleaner = cyclic_add(&hdlr, &when);
 
 	hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
 	hdlr.cyh_arg = state;
 	hdlr.cyh_level = CY_LOW_LEVEL;
 
 	when.cyt_when = 0;
 	when.cyt_interval = dtrace_deadman_interval;
 
 	state->dts_deadman = cyclic_add(&hdlr, &when);
 #else
 	callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
 	    dtrace_state_clean, state);
 	callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
 	    dtrace_state_deadman, state);
 #endif
 
 	state->dts_activity = DTRACE_ACTIVITY_WARMUP;
 
 #ifdef illumos
 	if (state->dts_getf != 0 &&
 	    !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
 		/*
 		 * We don't have kernel privs but we have at least one call
 		 * to getf(); we need to bump our zone's count, and (if
 		 * this is the first enabling to have an unprivileged call
 		 * to getf()) we need to hook into closef().
 		 */
 		state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
 
 		if (dtrace_getf++ == 0) {
 			ASSERT(dtrace_closef == NULL);
 			dtrace_closef = dtrace_getf_barrier;
 		}
 	}
 #endif
 
 	/*
 	 * Now it's time to actually fire the BEGIN probe.  We need to disable
 	 * interrupts here both to record the CPU on which we fired the BEGIN
 	 * probe (the data from this CPU will be processed first at user
 	 * level) and to manually activate the buffer for this CPU.
 	 */
 	cookie = dtrace_interrupt_disable();
 	*cpu = curcpu;
 	ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
 	state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
 
 	dtrace_probe(dtrace_probeid_begin,
 	    (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
 	dtrace_interrupt_enable(cookie);
 	/*
 	 * We may have had an exit action from a BEGIN probe; only change our
 	 * state to ACTIVE if we're still in WARMUP.
 	 */
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
 	    state->dts_activity == DTRACE_ACTIVITY_DRAINING);
 
 	if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
 		state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
 
 #ifdef __FreeBSD__
 	/*
 	 * We enable anonymous tracing before APs are started, so we must
 	 * activate buffers using the current CPU.
 	 */
 	if (state == dtrace_anon.dta_state)
 		for (int i = 0; i < NCPU; i++)
 			dtrace_buffer_activate_cpu(state, i);
 	else
 		dtrace_xcall(DTRACE_CPUALL,
 		    (dtrace_xcall_t)dtrace_buffer_activate, state);
 #else
 	/*
 	 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
 	 * want each CPU to transition its principal buffer out of the
 	 * INACTIVE state.  Doing this assures that no CPU will suddenly begin
 	 * processing an ECB halfway down a probe's ECB chain; all CPUs will
 	 * atomically transition from processing none of a state's ECBs to
 	 * processing all of them.
 	 */
 	dtrace_xcall(DTRACE_CPUALL,
 	    (dtrace_xcall_t)dtrace_buffer_activate, state);
 #endif
 	goto out;
 
 err:
 	dtrace_buffer_free(state->dts_buffer);
 	dtrace_buffer_free(state->dts_aggbuffer);
 
 	if ((nspec = state->dts_nspeculations) == 0) {
 		ASSERT(state->dts_speculations == NULL);
 		goto out;
 	}
 
 	spec = state->dts_speculations;
 	ASSERT(spec != NULL);
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		if ((buf = spec[i].dtsp_buffer) == NULL)
 			break;
 
 		dtrace_buffer_free(buf);
 		kmem_free(buf, bufsize);
 	}
 
 	kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
 	state->dts_nspeculations = 0;
 	state->dts_speculations = NULL;
 
 out:
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 
 	return (rval);
 }
 
 static int
 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
 {
 	dtrace_icookie_t cookie;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
 	    state->dts_activity != DTRACE_ACTIVITY_DRAINING)
 		return (EINVAL);
 
 	/*
 	 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
 	 * to be sure that every CPU has seen it.  See below for the details
 	 * on why this is done.
 	 */
 	state->dts_activity = DTRACE_ACTIVITY_DRAINING;
 	dtrace_sync();
 
 	/*
 	 * By this point, it is impossible for any CPU to be still processing
 	 * with DTRACE_ACTIVITY_ACTIVE.  We can thus set our activity to
 	 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
 	 * other CPU in dtrace_buffer_reserve().  This allows dtrace_probe()
 	 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
 	 * iff we're in the END probe.
 	 */
 	state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
 	dtrace_sync();
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
 
 	/*
 	 * Finally, we can release the reserve and call the END probe.  We
 	 * disable interrupts across calling the END probe to allow us to
 	 * return the CPU on which we actually called the END probe.  This
 	 * allows user-land to be sure that this CPU's principal buffer is
 	 * processed last.
 	 */
 	state->dts_reserve = 0;
 
 	cookie = dtrace_interrupt_disable();
 	*cpu = curcpu;
 	dtrace_probe(dtrace_probeid_end,
 	    (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
 	dtrace_interrupt_enable(cookie);
 
 	state->dts_activity = DTRACE_ACTIVITY_STOPPED;
 	dtrace_sync();
 
 #ifdef illumos
 	if (state->dts_getf != 0 &&
 	    !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
 		/*
 		 * We don't have kernel privs but we have at least one call
 		 * to getf(); we need to lower our zone's count, and (if
 		 * this is the last enabling to have an unprivileged call
 		 * to getf()) we need to clear the closef() hook.
 		 */
 		ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
 		ASSERT(dtrace_closef == dtrace_getf_barrier);
 		ASSERT(dtrace_getf > 0);
 
 		state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
 
 		if (--dtrace_getf == 0)
 			dtrace_closef = NULL;
 	}
 #endif
 
 	return (0);
 }
 
 static int
 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
     dtrace_optval_t val)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 		return (EBUSY);
 
 	if (option >= DTRACEOPT_MAX)
 		return (EINVAL);
 
 	if (option != DTRACEOPT_CPU && val < 0)
 		return (EINVAL);
 
 	switch (option) {
 	case DTRACEOPT_DESTRUCTIVE:
 		if (dtrace_destructive_disallow)
 			return (EACCES);
 
 		state->dts_cred.dcr_destructive = 1;
 		break;
 
 	case DTRACEOPT_BUFSIZE:
 	case DTRACEOPT_DYNVARSIZE:
 	case DTRACEOPT_AGGSIZE:
 	case DTRACEOPT_SPECSIZE:
 	case DTRACEOPT_STRSIZE:
 		if (val < 0)
 			return (EINVAL);
 
 		if (val >= LONG_MAX) {
 			/*
 			 * If this is an otherwise negative value, set it to
 			 * the highest multiple of 128m less than LONG_MAX.
 			 * Technically, we're adjusting the size without
 			 * regard to the buffer resizing policy, but in fact,
 			 * this has no effect -- if we set the buffer size to
 			 * ~LONG_MAX and the buffer policy is ultimately set to
 			 * be "manual", the buffer allocation is guaranteed to
 			 * fail, if only because the allocation requires two
 			 * buffers.  (We set the the size to the highest
 			 * multiple of 128m because it ensures that the size
 			 * will remain a multiple of a megabyte when
 			 * repeatedly halved -- all the way down to 15m.)
 			 */
 			val = LONG_MAX - (1 << 27) + 1;
 		}
 	}
 
 	state->dts_options[option] = val;
 
 	return (0);
 }
 
 static void
 dtrace_state_destroy(dtrace_state_t *state)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 #ifdef illumos
 	minor_t minor = getminor(state->dts_dev);
 #endif
 	int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
 	dtrace_speculation_t *spec = state->dts_speculations;
 	int nspec = state->dts_nspeculations;
 	uint32_t match;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	/*
 	 * First, retract any retained enablings for this state.
 	 */
 	dtrace_enabling_retract(state);
 	ASSERT(state->dts_nretained == 0);
 
 	if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
 	    state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
 		/*
 		 * We have managed to come into dtrace_state_destroy() on a
 		 * hot enabling -- almost certainly because of a disorderly
 		 * shutdown of a consumer.  (That is, a consumer that is
 		 * exiting without having called dtrace_stop().) In this case,
 		 * we're going to set our activity to be KILLED, and then
 		 * issue a sync to be sure that everyone is out of probe
 		 * context before we start blowing away ECBs.
 		 */
 		state->dts_activity = DTRACE_ACTIVITY_KILLED;
 		dtrace_sync();
 	}
 
 	/*
 	 * Release the credential hold we took in dtrace_state_create().
 	 */
 	if (state->dts_cred.dcr_cred != NULL)
 		crfree(state->dts_cred.dcr_cred);
 
 	/*
 	 * Now we can safely disable and destroy any enabled probes.  Because
 	 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
 	 * (especially if they're all enabled), we take two passes through the
 	 * ECBs:  in the first, we disable just DTRACE_PRIV_KERNEL probes, and
 	 * in the second we disable whatever is left over.
 	 */
 	for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
 		for (i = 0; i < state->dts_necbs; i++) {
 			if ((ecb = state->dts_ecbs[i]) == NULL)
 				continue;
 
 			if (match && ecb->dte_probe != NULL) {
 				dtrace_probe_t *probe = ecb->dte_probe;
 				dtrace_provider_t *prov = probe->dtpr_provider;
 
 				if (!(prov->dtpv_priv.dtpp_flags & match))
 					continue;
 			}
 
 			dtrace_ecb_disable(ecb);
 			dtrace_ecb_destroy(ecb);
 		}
 
 		if (!match)
 			break;
 	}
 
 	/*
 	 * Before we free the buffers, perform one more sync to assure that
 	 * every CPU is out of probe context.
 	 */
 	dtrace_sync();
 
 	dtrace_buffer_free(state->dts_buffer);
 	dtrace_buffer_free(state->dts_aggbuffer);
 
 	for (i = 0; i < nspec; i++)
 		dtrace_buffer_free(spec[i].dtsp_buffer);
 
 #ifdef illumos
 	if (state->dts_cleaner != CYCLIC_NONE)
 		cyclic_remove(state->dts_cleaner);
 
 	if (state->dts_deadman != CYCLIC_NONE)
 		cyclic_remove(state->dts_deadman);
 #else
 	callout_stop(&state->dts_cleaner);
 	callout_drain(&state->dts_cleaner);
 	callout_stop(&state->dts_deadman);
 	callout_drain(&state->dts_deadman);
 #endif
 
 	dtrace_dstate_fini(&vstate->dtvs_dynvars);
 	dtrace_vstate_fini(vstate);
 	if (state->dts_ecbs != NULL)
 		kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
 
 	if (state->dts_aggregations != NULL) {
 #ifdef DEBUG
 		for (i = 0; i < state->dts_naggregations; i++)
 			ASSERT(state->dts_aggregations[i] == NULL);
 #endif
 		ASSERT(state->dts_naggregations > 0);
 		kmem_free(state->dts_aggregations,
 		    state->dts_naggregations * sizeof (dtrace_aggregation_t *));
 	}
 
 	kmem_free(state->dts_buffer, bufsize);
 	kmem_free(state->dts_aggbuffer, bufsize);
 
 	for (i = 0; i < nspec; i++)
 		kmem_free(spec[i].dtsp_buffer, bufsize);
 
 	if (spec != NULL)
 		kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
 
 	dtrace_format_destroy(state);
 
 	if (state->dts_aggid_arena != NULL) {
 #ifdef illumos
 		vmem_destroy(state->dts_aggid_arena);
 #else
 		delete_unrhdr(state->dts_aggid_arena);
 #endif
 		state->dts_aggid_arena = NULL;
 	}
 #ifdef illumos
 	ddi_soft_state_free(dtrace_softstate, minor);
 	vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
 #endif
 }
 
 /*
  * DTrace Anonymous Enabling Functions
  */
 static dtrace_state_t *
 dtrace_anon_grab(void)
 {
 	dtrace_state_t *state;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if ((state = dtrace_anon.dta_state) == NULL) {
 		ASSERT(dtrace_anon.dta_enabling == NULL);
 		return (NULL);
 	}
 
 	ASSERT(dtrace_anon.dta_enabling != NULL);
 	ASSERT(dtrace_retained != NULL);
 
 	dtrace_enabling_destroy(dtrace_anon.dta_enabling);
 	dtrace_anon.dta_enabling = NULL;
 	dtrace_anon.dta_state = NULL;
 
 	return (state);
 }
 
 static void
 dtrace_anon_property(void)
 {
 	int i, rv;
 	dtrace_state_t *state;
 	dof_hdr_t *dof;
 	char c[32];		/* enough for "dof-data-" + digits */
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	for (i = 0; ; i++) {
 		(void) snprintf(c, sizeof (c), "dof-data-%d", i);
 
 		dtrace_err_verbose = 1;
 
 		if ((dof = dtrace_dof_property(c)) == NULL) {
 			dtrace_err_verbose = 0;
 			break;
 		}
 
 #ifdef illumos
 		/*
 		 * We want to create anonymous state, so we need to transition
 		 * the kernel debugger to indicate that DTrace is active.  If
 		 * this fails (e.g. because the debugger has modified text in
 		 * some way), we won't continue with the processing.
 		 */
 		if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
 			cmn_err(CE_NOTE, "kernel debugger active; anonymous "
 			    "enabling ignored.");
 			dtrace_dof_destroy(dof);
 			break;
 		}
 #endif
 
 		/*
 		 * If we haven't allocated an anonymous state, we'll do so now.
 		 */
 		if ((state = dtrace_anon.dta_state) == NULL) {
 			state = dtrace_state_create(NULL, NULL);
 			dtrace_anon.dta_state = state;
 
 			if (state == NULL) {
 				/*
 				 * This basically shouldn't happen:  the only
 				 * failure mode from dtrace_state_create() is a
 				 * failure of ddi_soft_state_zalloc() that
 				 * itself should never happen.  Still, the
 				 * interface allows for a failure mode, and
 				 * we want to fail as gracefully as possible:
 				 * we'll emit an error message and cease
 				 * processing anonymous state in this case.
 				 */
 				cmn_err(CE_WARN, "failed to create "
 				    "anonymous state");
 				dtrace_dof_destroy(dof);
 				break;
 			}
 		}
 
 		rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
 		    &dtrace_anon.dta_enabling, 0, 0, B_TRUE);
 
 		if (rv == 0)
 			rv = dtrace_dof_options(dof, state);
 
 		dtrace_err_verbose = 0;
 		dtrace_dof_destroy(dof);
 
 		if (rv != 0) {
 			/*
 			 * This is malformed DOF; chuck any anonymous state
 			 * that we created.
 			 */
 			ASSERT(dtrace_anon.dta_enabling == NULL);
 			dtrace_state_destroy(state);
 			dtrace_anon.dta_state = NULL;
 			break;
 		}
 
 		ASSERT(dtrace_anon.dta_enabling != NULL);
 	}
 
 	if (dtrace_anon.dta_enabling != NULL) {
 		int rval;
 
 		/*
 		 * dtrace_enabling_retain() can only fail because we are
 		 * trying to retain more enablings than are allowed -- but
 		 * we only have one anonymous enabling, and we are guaranteed
 		 * to be allowed at least one retained enabling; we assert
 		 * that dtrace_enabling_retain() returns success.
 		 */
 		rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
 		ASSERT(rval == 0);
 
 		dtrace_enabling_dump(dtrace_anon.dta_enabling);
 	}
 }
 
 /*
  * DTrace Helper Functions
  */
 static void
 dtrace_helper_trace(dtrace_helper_action_t *helper,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
 {
 	uint32_t size, next, nnext, i;
 	dtrace_helptrace_t *ent, *buffer;
 	uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if ((buffer = dtrace_helptrace_buffer) == NULL)
 		return;
 
 	ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
 
 	/*
 	 * What would a tracing framework be without its own tracing
 	 * framework?  (Well, a hell of a lot simpler, for starters...)
 	 */
 	size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
 	    sizeof (uint64_t) - sizeof (uint64_t);
 
 	/*
 	 * Iterate until we can allocate a slot in the trace buffer.
 	 */
 	do {
 		next = dtrace_helptrace_next;
 
 		if (next + size < dtrace_helptrace_bufsize) {
 			nnext = next + size;
 		} else {
 			nnext = size;
 		}
 	} while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
 
 	/*
 	 * We have our slot; fill it in.
 	 */
 	if (nnext == size) {
 		dtrace_helptrace_wrapped++;
 		next = 0;
 	}
 
 	ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next);
 	ent->dtht_helper = helper;
 	ent->dtht_where = where;
 	ent->dtht_nlocals = vstate->dtvs_nlocals;
 
 	ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
 	    mstate->dtms_fltoffs : -1;
 	ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
 	ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval;
 
 	for (i = 0; i < vstate->dtvs_nlocals; i++) {
 		dtrace_statvar_t *svar;
 
 		if ((svar = vstate->dtvs_locals[i]) == NULL)
 			continue;
 
 		ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
 		ent->dtht_locals[i] =
 		    ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu];
 	}
 }
 
 static uint64_t
 dtrace_helper(int which, dtrace_mstate_t *mstate,
     dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
 {
 	uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	uint64_t sarg0 = mstate->dtms_arg[0];
 	uint64_t sarg1 = mstate->dtms_arg[1];
 	uint64_t rval = 0;
 	dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
 	dtrace_helper_action_t *helper;
 	dtrace_vstate_t *vstate;
 	dtrace_difo_t *pred;
 	int i, trace = dtrace_helptrace_buffer != NULL;
 
 	ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
 
 	if (helpers == NULL)
 		return (0);
 
 	if ((helper = helpers->dthps_actions[which]) == NULL)
 		return (0);
 
 	vstate = &helpers->dthps_vstate;
 	mstate->dtms_arg[0] = arg0;
 	mstate->dtms_arg[1] = arg1;
 
 	/*
 	 * Now iterate over each helper.  If its predicate evaluates to 'true',
 	 * we'll call the corresponding actions.  Note that the below calls
 	 * to dtrace_dif_emulate() may set faults in machine state.  This is
 	 * okay:  our caller (the outer dtrace_dif_emulate()) will simply plow
 	 * the stored DIF offset with its own (which is the desired behavior).
 	 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
 	 * from machine state; this is okay, too.
 	 */
 	for (; helper != NULL; helper = helper->dtha_next) {
 		if ((pred = helper->dtha_predicate) != NULL) {
 			if (trace)
 				dtrace_helper_trace(helper, mstate, vstate, 0);
 
 			if (!dtrace_dif_emulate(pred, mstate, vstate, state))
 				goto next;
 
 			if (*flags & CPU_DTRACE_FAULT)
 				goto err;
 		}
 
 		for (i = 0; i < helper->dtha_nactions; i++) {
 			if (trace)
 				dtrace_helper_trace(helper,
 				    mstate, vstate, i + 1);
 
 			rval = dtrace_dif_emulate(helper->dtha_actions[i],
 			    mstate, vstate, state);
 
 			if (*flags & CPU_DTRACE_FAULT)
 				goto err;
 		}
 
 next:
 		if (trace)
 			dtrace_helper_trace(helper, mstate, vstate,
 			    DTRACE_HELPTRACE_NEXT);
 	}
 
 	if (trace)
 		dtrace_helper_trace(helper, mstate, vstate,
 		    DTRACE_HELPTRACE_DONE);
 
 	/*
 	 * Restore the arg0 that we saved upon entry.
 	 */
 	mstate->dtms_arg[0] = sarg0;
 	mstate->dtms_arg[1] = sarg1;
 
 	return (rval);
 
 err:
 	if (trace)
 		dtrace_helper_trace(helper, mstate, vstate,
 		    DTRACE_HELPTRACE_ERR);
 
 	/*
 	 * Restore the arg0 that we saved upon entry.
 	 */
 	mstate->dtms_arg[0] = sarg0;
 	mstate->dtms_arg[1] = sarg1;
 
 	return (0);
 }
 
 static void
 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
     dtrace_vstate_t *vstate)
 {
 	int i;
 
 	if (helper->dtha_predicate != NULL)
 		dtrace_difo_release(helper->dtha_predicate, vstate);
 
 	for (i = 0; i < helper->dtha_nactions; i++) {
 		ASSERT(helper->dtha_actions[i] != NULL);
 		dtrace_difo_release(helper->dtha_actions[i], vstate);
 	}
 
 	kmem_free(helper->dtha_actions,
 	    helper->dtha_nactions * sizeof (dtrace_difo_t *));
 	kmem_free(helper, sizeof (dtrace_helper_action_t));
 }
 
 static int
 dtrace_helper_destroygen(dtrace_helpers_t *help, int gen)
 {
 	proc_t *p = curproc;
 	dtrace_vstate_t *vstate;
 	int i;
 
 	if (help == NULL)
 		help = p->p_dtrace_helpers;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (help == NULL || gen > help->dthps_generation)
 		return (EINVAL);
 
 	vstate = &help->dthps_vstate;
 
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		dtrace_helper_action_t *last = NULL, *h, *next;
 
 		for (h = help->dthps_actions[i]; h != NULL; h = next) {
 			next = h->dtha_next;
 
 			if (h->dtha_generation == gen) {
 				if (last != NULL) {
 					last->dtha_next = next;
 				} else {
 					help->dthps_actions[i] = next;
 				}
 
 				dtrace_helper_action_destroy(h, vstate);
 			} else {
 				last = h;
 			}
 		}
 	}
 
 	/*
 	 * Interate until we've cleared out all helper providers with the
 	 * given generation number.
 	 */
 	for (;;) {
 		dtrace_helper_provider_t *prov;
 
 		/*
 		 * Look for a helper provider with the right generation. We
 		 * have to start back at the beginning of the list each time
 		 * because we drop dtrace_lock. It's unlikely that we'll make
 		 * more than two passes.
 		 */
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			prov = help->dthps_provs[i];
 
 			if (prov->dthp_generation == gen)
 				break;
 		}
 
 		/*
 		 * If there were no matches, we're done.
 		 */
 		if (i == help->dthps_nprovs)
 			break;
 
 		/*
 		 * Move the last helper provider into this slot.
 		 */
 		help->dthps_nprovs--;
 		help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
 		help->dthps_provs[help->dthps_nprovs] = NULL;
 
 		mutex_exit(&dtrace_lock);
 
 		/*
 		 * If we have a meta provider, remove this helper provider.
 		 */
 		mutex_enter(&dtrace_meta_lock);
 		if (dtrace_meta_pid != NULL) {
 			ASSERT(dtrace_deferred_pid == NULL);
 			dtrace_helper_provider_remove(&prov->dthp_prov,
 			    p->p_pid);
 		}
 		mutex_exit(&dtrace_meta_lock);
 
 		dtrace_helper_provider_destroy(prov);
 
 		mutex_enter(&dtrace_lock);
 	}
 
 	return (0);
 }
 
 static int
 dtrace_helper_validate(dtrace_helper_action_t *helper)
 {
 	int err = 0, i;
 	dtrace_difo_t *dp;
 
 	if ((dp = helper->dtha_predicate) != NULL)
 		err += dtrace_difo_validate_helper(dp);
 
 	for (i = 0; i < helper->dtha_nactions; i++)
 		err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
 
 	return (err == 0);
 }
 
 static int
 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep,
     dtrace_helpers_t *help)
 {
 	dtrace_helper_action_t *helper, *last;
 	dtrace_actdesc_t *act;
 	dtrace_vstate_t *vstate;
 	dtrace_predicate_t *pred;
 	int count = 0, nactions = 0, i;
 
 	if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
 		return (EINVAL);
 
 	last = help->dthps_actions[which];
 	vstate = &help->dthps_vstate;
 
 	for (count = 0; last != NULL; last = last->dtha_next) {
 		count++;
 		if (last->dtha_next == NULL)
 			break;
 	}
 
 	/*
 	 * If we already have dtrace_helper_actions_max helper actions for this
 	 * helper action type, we'll refuse to add a new one.
 	 */
 	if (count >= dtrace_helper_actions_max)
 		return (ENOSPC);
 
 	helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
 	helper->dtha_generation = help->dthps_generation;
 
 	if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
 		ASSERT(pred->dtp_difo != NULL);
 		dtrace_difo_hold(pred->dtp_difo);
 		helper->dtha_predicate = pred->dtp_difo;
 	}
 
 	for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
 		if (act->dtad_kind != DTRACEACT_DIFEXPR)
 			goto err;
 
 		if (act->dtad_difo == NULL)
 			goto err;
 
 		nactions++;
 	}
 
 	helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
 	    (helper->dtha_nactions = nactions), KM_SLEEP);
 
 	for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
 		dtrace_difo_hold(act->dtad_difo);
 		helper->dtha_actions[i++] = act->dtad_difo;
 	}
 
 	if (!dtrace_helper_validate(helper))
 		goto err;
 
 	if (last == NULL) {
 		help->dthps_actions[which] = helper;
 	} else {
 		last->dtha_next = helper;
 	}
 
 	if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
 		dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
 		dtrace_helptrace_next = 0;
 	}
 
 	return (0);
 err:
 	dtrace_helper_action_destroy(helper, vstate);
 	return (EINVAL);
 }
 
 static void
 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
     dof_helper_t *dofhp)
 {
 	ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (!dtrace_attached() || dtrace_meta_pid == NULL) {
 		/*
 		 * If the dtrace module is loaded but not attached, or if
 		 * there aren't isn't a meta provider registered to deal with
 		 * these provider descriptions, we need to postpone creating
 		 * the actual providers until later.
 		 */
 
 		if (help->dthps_next == NULL && help->dthps_prev == NULL &&
 		    dtrace_deferred_pid != help) {
 			help->dthps_deferred = 1;
 			help->dthps_pid = p->p_pid;
 			help->dthps_next = dtrace_deferred_pid;
 			help->dthps_prev = NULL;
 			if (dtrace_deferred_pid != NULL)
 				dtrace_deferred_pid->dthps_prev = help;
 			dtrace_deferred_pid = help;
 		}
 
 		mutex_exit(&dtrace_lock);
 
 	} else if (dofhp != NULL) {
 		/*
 		 * If the dtrace module is loaded and we have a particular
 		 * helper provider description, pass that off to the
 		 * meta provider.
 		 */
 
 		mutex_exit(&dtrace_lock);
 
 		dtrace_helper_provide(dofhp, p->p_pid);
 
 	} else {
 		/*
 		 * Otherwise, just pass all the helper provider descriptions
 		 * off to the meta provider.
 		 */
 
 		int i;
 		mutex_exit(&dtrace_lock);
 
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
 			    p->p_pid);
 		}
 	}
 
 	mutex_exit(&dtrace_meta_lock);
 }
 
 static int
 dtrace_helper_provider_add(dof_helper_t *dofhp, dtrace_helpers_t *help, int gen)
 {
 	dtrace_helper_provider_t *hprov, **tmp_provs;
 	uint_t tmp_maxprovs, i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(help != NULL);
 
 	/*
 	 * If we already have dtrace_helper_providers_max helper providers,
 	 * we're refuse to add a new one.
 	 */
 	if (help->dthps_nprovs >= dtrace_helper_providers_max)
 		return (ENOSPC);
 
 	/*
 	 * Check to make sure this isn't a duplicate.
 	 */
 	for (i = 0; i < help->dthps_nprovs; i++) {
 		if (dofhp->dofhp_addr ==
 		    help->dthps_provs[i]->dthp_prov.dofhp_addr)
 			return (EALREADY);
 	}
 
 	hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
 	hprov->dthp_prov = *dofhp;
 	hprov->dthp_ref = 1;
 	hprov->dthp_generation = gen;
 
 	/*
 	 * Allocate a bigger table for helper providers if it's already full.
 	 */
 	if (help->dthps_maxprovs == help->dthps_nprovs) {
 		tmp_maxprovs = help->dthps_maxprovs;
 		tmp_provs = help->dthps_provs;
 
 		if (help->dthps_maxprovs == 0)
 			help->dthps_maxprovs = 2;
 		else
 			help->dthps_maxprovs *= 2;
 		if (help->dthps_maxprovs > dtrace_helper_providers_max)
 			help->dthps_maxprovs = dtrace_helper_providers_max;
 
 		ASSERT(tmp_maxprovs < help->dthps_maxprovs);
 
 		help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
 		    sizeof (dtrace_helper_provider_t *), KM_SLEEP);
 
 		if (tmp_provs != NULL) {
 			bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
 			    sizeof (dtrace_helper_provider_t *));
 			kmem_free(tmp_provs, tmp_maxprovs *
 			    sizeof (dtrace_helper_provider_t *));
 		}
 	}
 
 	help->dthps_provs[help->dthps_nprovs] = hprov;
 	help->dthps_nprovs++;
 
 	return (0);
 }
 
 static void
 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
 {
 	mutex_enter(&dtrace_lock);
 
 	if (--hprov->dthp_ref == 0) {
 		dof_hdr_t *dof;
 		mutex_exit(&dtrace_lock);
 		dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
 		dtrace_dof_destroy(dof);
 		kmem_free(hprov, sizeof (dtrace_helper_provider_t));
 	} else {
 		mutex_exit(&dtrace_lock);
 	}
 }
 
 static int
 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
 {
 	uintptr_t daddr = (uintptr_t)dof;
 	dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
 	dof_provider_t *provider;
 	dof_probe_t *probe;
 	uint8_t *arg;
 	char *strtab, *typestr;
 	dof_stridx_t typeidx;
 	size_t typesz;
 	uint_t nprobes, j, k;
 
 	ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
 
 	if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned section offset");
 		return (-1);
 	}
 
 	/*
 	 * The section needs to be large enough to contain the DOF provider
 	 * structure appropriate for the given version.
 	 */
 	if (sec->dofs_size <
 	    ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
 	    offsetof(dof_provider_t, dofpv_prenoffs) :
 	    sizeof (dof_provider_t))) {
 		dtrace_dof_error(dof, "provider section too small");
 		return (-1);
 	}
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
 	prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
 	arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
 	off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
 
 	if (str_sec == NULL || prb_sec == NULL ||
 	    arg_sec == NULL || off_sec == NULL)
 		return (-1);
 
 	enoff_sec = NULL;
 
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    provider->dofpv_prenoffs != DOF_SECT_NONE &&
 	    (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
 	    provider->dofpv_prenoffs)) == NULL)
 		return (-1);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 
 	if (provider->dofpv_name >= str_sec->dofs_size ||
 	    strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
 		dtrace_dof_error(dof, "invalid provider name");
 		return (-1);
 	}
 
 	if (prb_sec->dofs_entsize == 0 ||
 	    prb_sec->dofs_entsize > prb_sec->dofs_size) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned entry size");
 		return (-1);
 	}
 
 	if (off_sec->dofs_entsize != sizeof (uint32_t)) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned section offset");
 		return (-1);
 	}
 
 	if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
 
 	nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
 
 	/*
 	 * Take a pass through the probes to check for errors.
 	 */
 	for (j = 0; j < nprobes; j++) {
 		probe = (dof_probe_t *)(uintptr_t)(daddr +
 		    prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
 
 		if (probe->dofpr_func >= str_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid function name");
 			return (-1);
 		}
 
 		if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
 			dtrace_dof_error(dof, "function name too long");
 			/*
 			 * Keep going if the function name is too long.
 			 * Unlike provider and probe names, we cannot reasonably
 			 * impose restrictions on function names, since they're
 			 * a property of the code being instrumented. We will
 			 * skip this probe in dtrace_helper_provide_one().
 			 */
 		}
 
 		if (probe->dofpr_name >= str_sec->dofs_size ||
 		    strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
 			dtrace_dof_error(dof, "invalid probe name");
 			return (-1);
 		}
 
 		/*
 		 * The offset count must not wrap the index, and the offsets
 		 * must also not overflow the section's data.
 		 */
 		if (probe->dofpr_offidx + probe->dofpr_noffs <
 		    probe->dofpr_offidx ||
 		    (probe->dofpr_offidx + probe->dofpr_noffs) *
 		    off_sec->dofs_entsize > off_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid probe offset");
 			return (-1);
 		}
 
 		if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
 			/*
 			 * If there's no is-enabled offset section, make sure
 			 * there aren't any is-enabled offsets. Otherwise
 			 * perform the same checks as for probe offsets
 			 * (immediately above).
 			 */
 			if (enoff_sec == NULL) {
 				if (probe->dofpr_enoffidx != 0 ||
 				    probe->dofpr_nenoffs != 0) {
 					dtrace_dof_error(dof, "is-enabled "
 					    "offsets with null section");
 					return (-1);
 				}
 			} else if (probe->dofpr_enoffidx +
 			    probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
 			    (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
 			    enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
 				dtrace_dof_error(dof, "invalid is-enabled "
 				    "offset");
 				return (-1);
 			}
 
 			if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
 				dtrace_dof_error(dof, "zero probe and "
 				    "is-enabled offsets");
 				return (-1);
 			}
 		} else if (probe->dofpr_noffs == 0) {
 			dtrace_dof_error(dof, "zero probe offsets");
 			return (-1);
 		}
 
 		if (probe->dofpr_argidx + probe->dofpr_xargc <
 		    probe->dofpr_argidx ||
 		    (probe->dofpr_argidx + probe->dofpr_xargc) *
 		    arg_sec->dofs_entsize > arg_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid args");
 			return (-1);
 		}
 
 		typeidx = probe->dofpr_nargv;
 		typestr = strtab + probe->dofpr_nargv;
 		for (k = 0; k < probe->dofpr_nargc; k++) {
 			if (typeidx >= str_sec->dofs_size) {
 				dtrace_dof_error(dof, "bad "
 				    "native argument type");
 				return (-1);
 			}
 
 			typesz = strlen(typestr) + 1;
 			if (typesz > DTRACE_ARGTYPELEN) {
 				dtrace_dof_error(dof, "native "
 				    "argument type too long");
 				return (-1);
 			}
 			typeidx += typesz;
 			typestr += typesz;
 		}
 
 		typeidx = probe->dofpr_xargv;
 		typestr = strtab + probe->dofpr_xargv;
 		for (k = 0; k < probe->dofpr_xargc; k++) {
 			if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
 				dtrace_dof_error(dof, "bad "
 				    "native argument index");
 				return (-1);
 			}
 
 			if (typeidx >= str_sec->dofs_size) {
 				dtrace_dof_error(dof, "bad "
 				    "translated argument type");
 				return (-1);
 			}
 
 			typesz = strlen(typestr) + 1;
 			if (typesz > DTRACE_ARGTYPELEN) {
 				dtrace_dof_error(dof, "translated argument "
 				    "type too long");
 				return (-1);
 			}
 
 			typeidx += typesz;
 			typestr += typesz;
 		}
 	}
 
 	return (0);
 }
 
 static int
 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp, struct proc *p)
 {
 	dtrace_helpers_t *help;
 	dtrace_vstate_t *vstate;
 	dtrace_enabling_t *enab = NULL;
 	int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
 	uintptr_t daddr = (uintptr_t)dof;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if ((help = p->p_dtrace_helpers) == NULL)
 		help = dtrace_helpers_create(p);
 
 	vstate = &help->dthps_vstate;
 
 	if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, dhp->dofhp_addr,
 	    dhp->dofhp_dof, B_FALSE)) != 0) {
 		dtrace_dof_destroy(dof);
 		return (rv);
 	}
 
 	/*
 	 * Look for helper providers and validate their descriptions.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		if (dtrace_helper_provider_validate(dof, sec) != 0) {
 			dtrace_enabling_destroy(enab);
 			dtrace_dof_destroy(dof);
 			return (-1);
 		}
 
 		nprovs++;
 	}
 
 	/*
 	 * Now we need to walk through the ECB descriptions in the enabling.
 	 */
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 		dtrace_probedesc_t *desc = &ep->dted_probe;
 
 		if (strcmp(desc->dtpd_provider, "dtrace") != 0)
 			continue;
 
 		if (strcmp(desc->dtpd_mod, "helper") != 0)
 			continue;
 
 		if (strcmp(desc->dtpd_func, "ustack") != 0)
 			continue;
 
 		if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
 		    ep, help)) != 0) {
 			/*
 			 * Adding this helper action failed -- we are now going
 			 * to rip out the entire generation and return failure.
 			 */
 			(void) dtrace_helper_destroygen(help,
 			    help->dthps_generation);
 			dtrace_enabling_destroy(enab);
 			dtrace_dof_destroy(dof);
 			return (-1);
 		}
 
 		nhelpers++;
 	}
 
 	if (nhelpers < enab->dten_ndesc)
 		dtrace_dof_error(dof, "unmatched helpers");
 
 	gen = help->dthps_generation++;
 	dtrace_enabling_destroy(enab);
 
 	if (nprovs > 0) {
 		/*
 		 * Now that this is in-kernel, we change the sense of the
 		 * members:  dofhp_dof denotes the in-kernel copy of the DOF
 		 * and dofhp_addr denotes the address at user-level.
 		 */
 		dhp->dofhp_addr = dhp->dofhp_dof;
 		dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
 
 		if (dtrace_helper_provider_add(dhp, help, gen) == 0) {
 			mutex_exit(&dtrace_lock);
 			dtrace_helper_provider_register(p, help, dhp);
 			mutex_enter(&dtrace_lock);
 
 			destroy = 0;
 		}
 	}
 
 	if (destroy)
 		dtrace_dof_destroy(dof);
 
 	return (gen);
 }
 
 static dtrace_helpers_t *
 dtrace_helpers_create(proc_t *p)
 {
 	dtrace_helpers_t *help;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(p->p_dtrace_helpers == NULL);
 
 	help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
 	help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
 	    DTRACE_NHELPER_ACTIONS, KM_SLEEP);
 
 	p->p_dtrace_helpers = help;
 	dtrace_helpers++;
 
 	return (help);
 }
 
 #ifdef illumos
 static
 #endif
 void
 dtrace_helpers_destroy(proc_t *p)
 {
 	dtrace_helpers_t *help;
 	dtrace_vstate_t *vstate;
 #ifdef illumos
 	proc_t *p = curproc;
 #endif
 	int i;
 
 	mutex_enter(&dtrace_lock);
 
 	ASSERT(p->p_dtrace_helpers != NULL);
 	ASSERT(dtrace_helpers > 0);
 
 	help = p->p_dtrace_helpers;
 	vstate = &help->dthps_vstate;
 
 	/*
 	 * We're now going to lose the help from this process.
 	 */
 	p->p_dtrace_helpers = NULL;
 	dtrace_sync();
 
 	/*
 	 * Destory the helper actions.
 	 */
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		dtrace_helper_action_t *h, *next;
 
 		for (h = help->dthps_actions[i]; h != NULL; h = next) {
 			next = h->dtha_next;
 			dtrace_helper_action_destroy(h, vstate);
 			h = next;
 		}
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	/*
 	 * Destroy the helper providers.
 	 */
 	if (help->dthps_maxprovs > 0) {
 		mutex_enter(&dtrace_meta_lock);
 		if (dtrace_meta_pid != NULL) {
 			ASSERT(dtrace_deferred_pid == NULL);
 
 			for (i = 0; i < help->dthps_nprovs; i++) {
 				dtrace_helper_provider_remove(
 				    &help->dthps_provs[i]->dthp_prov, p->p_pid);
 			}
 		} else {
 			mutex_enter(&dtrace_lock);
 			ASSERT(help->dthps_deferred == 0 ||
 			    help->dthps_next != NULL ||
 			    help->dthps_prev != NULL ||
 			    help == dtrace_deferred_pid);
 
 			/*
 			 * Remove the helper from the deferred list.
 			 */
 			if (help->dthps_next != NULL)
 				help->dthps_next->dthps_prev = help->dthps_prev;
 			if (help->dthps_prev != NULL)
 				help->dthps_prev->dthps_next = help->dthps_next;
 			if (dtrace_deferred_pid == help) {
 				dtrace_deferred_pid = help->dthps_next;
 				ASSERT(help->dthps_prev == NULL);
 			}
 
 			mutex_exit(&dtrace_lock);
 		}
 
 		mutex_exit(&dtrace_meta_lock);
 
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provider_destroy(help->dthps_provs[i]);
 		}
 
 		kmem_free(help->dthps_provs, help->dthps_maxprovs *
 		    sizeof (dtrace_helper_provider_t *));
 	}
 
 	mutex_enter(&dtrace_lock);
 
 	dtrace_vstate_fini(&help->dthps_vstate);
 	kmem_free(help->dthps_actions,
 	    sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
 	kmem_free(help, sizeof (dtrace_helpers_t));
 
 	--dtrace_helpers;
 	mutex_exit(&dtrace_lock);
 }
 
 #ifdef illumos
 static
 #endif
 void
 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
 {
 	dtrace_helpers_t *help, *newhelp;
 	dtrace_helper_action_t *helper, *new, *last;
 	dtrace_difo_t *dp;
 	dtrace_vstate_t *vstate;
 	int i, j, sz, hasprovs = 0;
 
 	mutex_enter(&dtrace_lock);
 	ASSERT(from->p_dtrace_helpers != NULL);
 	ASSERT(dtrace_helpers > 0);
 
 	help = from->p_dtrace_helpers;
 	newhelp = dtrace_helpers_create(to);
 	ASSERT(to->p_dtrace_helpers != NULL);
 
 	newhelp->dthps_generation = help->dthps_generation;
 	vstate = &newhelp->dthps_vstate;
 
 	/*
 	 * Duplicate the helper actions.
 	 */
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		if ((helper = help->dthps_actions[i]) == NULL)
 			continue;
 
 		for (last = NULL; helper != NULL; helper = helper->dtha_next) {
 			new = kmem_zalloc(sizeof (dtrace_helper_action_t),
 			    KM_SLEEP);
 			new->dtha_generation = helper->dtha_generation;
 
 			if ((dp = helper->dtha_predicate) != NULL) {
 				dp = dtrace_difo_duplicate(dp, vstate);
 				new->dtha_predicate = dp;
 			}
 
 			new->dtha_nactions = helper->dtha_nactions;
 			sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
 			new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
 
 			for (j = 0; j < new->dtha_nactions; j++) {
 				dtrace_difo_t *dp = helper->dtha_actions[j];
 
 				ASSERT(dp != NULL);
 				dp = dtrace_difo_duplicate(dp, vstate);
 				new->dtha_actions[j] = dp;
 			}
 
 			if (last != NULL) {
 				last->dtha_next = new;
 			} else {
 				newhelp->dthps_actions[i] = new;
 			}
 
 			last = new;
 		}
 	}
 
 	/*
 	 * Duplicate the helper providers and register them with the
 	 * DTrace framework.
 	 */
 	if (help->dthps_nprovs > 0) {
 		newhelp->dthps_nprovs = help->dthps_nprovs;
 		newhelp->dthps_maxprovs = help->dthps_nprovs;
 		newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
 		    sizeof (dtrace_helper_provider_t *), KM_SLEEP);
 		for (i = 0; i < newhelp->dthps_nprovs; i++) {
 			newhelp->dthps_provs[i] = help->dthps_provs[i];
 			newhelp->dthps_provs[i]->dthp_ref++;
 		}
 
 		hasprovs = 1;
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	if (hasprovs)
 		dtrace_helper_provider_register(to, newhelp, NULL);
 }
 
 /*
  * DTrace Hook Functions
  */
 static void
 dtrace_module_loaded(modctl_t *ctl)
 {
 	dtrace_provider_t *prv;
 
 	mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 	mutex_enter(&mod_lock);
 #endif
 
 #ifdef illumos
 	ASSERT(ctl->mod_busy);
 #endif
 
 	/*
 	 * We're going to call each providers per-module provide operation
 	 * specifying only this module.
 	 */
 	for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
 		prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
 
 #ifdef illumos
 	mutex_exit(&mod_lock);
 #endif
 	mutex_exit(&dtrace_provider_lock);
 
 	/*
 	 * If we have any retained enablings, we need to match against them.
 	 * Enabling probes requires that cpu_lock be held, and we cannot hold
 	 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
 	 * module.  (In particular, this happens when loading scheduling
 	 * classes.)  So if we have any retained enablings, we need to dispatch
 	 * our task queue to do the match for us.
 	 */
 	mutex_enter(&dtrace_lock);
 
 	if (dtrace_retained == NULL) {
 		mutex_exit(&dtrace_lock);
 		return;
 	}
 
 	(void) taskq_dispatch(dtrace_taskq,
 	    (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
 
 	mutex_exit(&dtrace_lock);
 
 	/*
 	 * And now, for a little heuristic sleaze:  in general, we want to
 	 * match modules as soon as they load.  However, we cannot guarantee
 	 * this, because it would lead us to the lock ordering violation
 	 * outlined above.  The common case, of course, is that cpu_lock is
 	 * _not_ held -- so we delay here for a clock tick, hoping that that's
 	 * long enough for the task queue to do its work.  If it's not, it's
 	 * not a serious problem -- it just means that the module that we
 	 * just loaded may not be immediately instrumentable.
 	 */
 	delay(1);
 }
 
 static void
 #ifdef illumos
 dtrace_module_unloaded(modctl_t *ctl)
 #else
 dtrace_module_unloaded(modctl_t *ctl, int *error)
 #endif
 {
 	dtrace_probe_t template, *probe, *first, *next;
 	dtrace_provider_t *prov;
 #ifndef illumos
 	char modname[DTRACE_MODNAMELEN];
 	size_t len;
 #endif
 
 #ifdef illumos
 	template.dtpr_mod = ctl->mod_modname;
 #else
 	/* Handle the fact that ctl->filename may end in ".ko". */
 	strlcpy(modname, ctl->filename, sizeof(modname));
 	len = strlen(ctl->filename);
 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
 		modname[len - 3] = '\0';
 	template.dtpr_mod = modname;
 #endif
 
 	mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 	mutex_enter(&mod_lock);
 #endif
 	mutex_enter(&dtrace_lock);
 
 #ifndef illumos
 	if (ctl->nenabled > 0) {
 		/* Don't allow unloads if a probe is enabled. */
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		*error = -1;
 		printf(
 	"kldunload: attempt to unload module that has DTrace probes enabled\n");
 		return;
 	}
 #endif
 
 	if (dtrace_bymod == NULL) {
 		/*
 		 * The DTrace module is loaded (obviously) but not attached;
 		 * we don't have any work to do.
 		 */
 		mutex_exit(&dtrace_provider_lock);
 #ifdef illumos
 		mutex_exit(&mod_lock);
 #endif
 		mutex_exit(&dtrace_lock);
 		return;
 	}
 
 	for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
 	    probe != NULL; probe = probe->dtpr_nextmod) {
 		if (probe->dtpr_ecb != NULL) {
 			mutex_exit(&dtrace_provider_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_lock);
 
 			/*
 			 * This shouldn't _actually_ be possible -- we're
 			 * unloading a module that has an enabled probe in it.
 			 * (It's normally up to the provider to make sure that
 			 * this can't happen.)  However, because dtps_enable()
 			 * doesn't have a failure mode, there can be an
 			 * enable/unload race.  Upshot:  we don't want to
 			 * assert, but we're not going to disable the
 			 * probe, either.
 			 */
 			if (dtrace_err_verbose) {
 #ifdef illumos
 				cmn_err(CE_WARN, "unloaded module '%s' had "
 				    "enabled probes", ctl->mod_modname);
 #else
 				cmn_err(CE_WARN, "unloaded module '%s' had "
 				    "enabled probes", modname);
 #endif
 			}
 
 			return;
 		}
 	}
 
 	probe = first;
 
 	for (first = NULL; probe != NULL; probe = next) {
 		ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
 
 		dtrace_probes[probe->dtpr_id - 1] = NULL;
 
 		next = probe->dtpr_nextmod;
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		if (first == NULL) {
 			first = probe;
 			probe->dtpr_nextmod = NULL;
 		} else {
 			probe->dtpr_nextmod = first;
 			first = probe;
 		}
 	}
 
 	/*
 	 * We've removed all of the module's probes from the hash chains and
 	 * from the probe array.  Now issue a dtrace_sync() to be sure that
 	 * everyone has cleared out from any probe array processing.
 	 */
 	dtrace_sync();
 
 	for (probe = first; probe != NULL; probe = first) {
 		first = probe->dtpr_nextmod;
 		prov = probe->dtpr_provider;
 		prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
 #else
 		free_unr(dtrace_arena, probe->dtpr_id);
 #endif
 		kmem_free(probe, sizeof (dtrace_probe_t));
 	}
 
 	mutex_exit(&dtrace_lock);
 #ifdef illumos
 	mutex_exit(&mod_lock);
 #endif
 	mutex_exit(&dtrace_provider_lock);
 }
 
 #ifndef illumos
 static void
 dtrace_kld_load(void *arg __unused, linker_file_t lf)
 {
 
 	dtrace_module_loaded(lf);
 }
 
 static void
 dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error)
 {
 
 	if (*error != 0)
 		/* We already have an error, so don't do anything. */
 		return;
 	dtrace_module_unloaded(lf, error);
 }
 #endif
 
 #ifdef illumos
 static void
 dtrace_suspend(void)
 {
 	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
 }
 
 static void
 dtrace_resume(void)
 {
 	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
 }
 #endif
 
 static int
 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
 {
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	mutex_enter(&dtrace_lock);
 
 	switch (what) {
 	case CPU_CONFIG: {
 		dtrace_state_t *state;
 		dtrace_optval_t *opt, rs, c;
 
 		/*
 		 * For now, we only allocate a new buffer for anonymous state.
 		 */
 		if ((state = dtrace_anon.dta_state) == NULL)
 			break;
 
 		if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
 			break;
 
 		opt = state->dts_options;
 		c = opt[DTRACEOPT_CPU];
 
 		if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
 			break;
 
 		/*
 		 * Regardless of what the actual policy is, we're going to
 		 * temporarily set our resize policy to be manual.  We're
 		 * also going to temporarily set our CPU option to denote
 		 * the newly configured CPU.
 		 */
 		rs = opt[DTRACEOPT_BUFRESIZE];
 		opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
 		opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
 
 		(void) dtrace_state_buffers(state);
 
 		opt[DTRACEOPT_BUFRESIZE] = rs;
 		opt[DTRACEOPT_CPU] = c;
 
 		break;
 	}
 
 	case CPU_UNCONFIG:
 		/*
 		 * We don't free the buffer in the CPU_UNCONFIG case.  (The
 		 * buffer will be freed when the consumer exits.)
 		 */
 		break;
 
 	default:
 		break;
 	}
 
 	mutex_exit(&dtrace_lock);
 	return (0);
 }
 
 #ifdef illumos
 static void
 dtrace_cpu_setup_initial(processorid_t cpu)
 {
 	(void) dtrace_cpu_setup(CPU_CONFIG, cpu);
 }
 #endif
 
 static void
 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
 {
 	if (dtrace_toxranges >= dtrace_toxranges_max) {
 		int osize, nsize;
 		dtrace_toxrange_t *range;
 
 		osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
 
 		if (osize == 0) {
 			ASSERT(dtrace_toxrange == NULL);
 			ASSERT(dtrace_toxranges_max == 0);
 			dtrace_toxranges_max = 1;
 		} else {
 			dtrace_toxranges_max <<= 1;
 		}
 
 		nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
 		range = kmem_zalloc(nsize, KM_SLEEP);
 
 		if (dtrace_toxrange != NULL) {
 			ASSERT(osize != 0);
 			bcopy(dtrace_toxrange, range, osize);
 			kmem_free(dtrace_toxrange, osize);
 		}
 
 		dtrace_toxrange = range;
 	}
 
 	ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0);
 	ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0);
 
 	dtrace_toxrange[dtrace_toxranges].dtt_base = base;
 	dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
 	dtrace_toxranges++;
 }
 
 static void
 dtrace_getf_barrier()
 {
 #ifdef illumos
 	/*
 	 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
 	 * that contain calls to getf(), this routine will be called on every
 	 * closef() before either the underlying vnode is released or the
 	 * file_t itself is freed.  By the time we are here, it is essential
 	 * that the file_t can no longer be accessed from a call to getf()
 	 * in probe context -- that assures that a dtrace_sync() can be used
 	 * to clear out any enablings referring to the old structures.
 	 */
 	if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
 	    kcred->cr_zone->zone_dtrace_getf != 0)
 		dtrace_sync();
 #endif
 }
 
 /*
  * DTrace Driver Cookbook Functions
  */
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 {
 	dtrace_provider_id_t id;
 	dtrace_state_t *state = NULL;
 	dtrace_enabling_t *enab;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (ddi_soft_state_init(&dtrace_softstate,
 	    sizeof (dtrace_state_t), 0) != 0) {
 		cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		return (DDI_FAILURE);
 	}
 
 	if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
 	    DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
 	    ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
 	    DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 		cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
 		ddi_remove_minor_node(devi, NULL);
 		ddi_soft_state_fini(&dtrace_softstate);
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		return (DDI_FAILURE);
 	}
 
 	ddi_report_dev(devi);
 	dtrace_devi = devi;
 
 	dtrace_modload = dtrace_module_loaded;
 	dtrace_modunload = dtrace_module_unloaded;
 	dtrace_cpu_init = dtrace_cpu_setup_initial;
 	dtrace_helpers_cleanup = dtrace_helpers_destroy;
 	dtrace_helpers_fork = dtrace_helpers_duplicate;
 	dtrace_cpustart_init = dtrace_suspend;
 	dtrace_cpustart_fini = dtrace_resume;
 	dtrace_debugger_init = dtrace_suspend;
 	dtrace_debugger_fini = dtrace_resume;
 
 	register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
 	    NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
 	dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
 	    UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
 	    VM_SLEEP | VMC_IDENTIFIER);
 	dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
 	    1, INT_MAX, 0);
 
 	dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
 	    sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
 	    NULL, NULL, NULL, NULL, NULL, 0);
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
 	    offsetof(dtrace_probe_t, dtpr_nextmod),
 	    offsetof(dtrace_probe_t, dtpr_prevmod));
 
 	dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
 	    offsetof(dtrace_probe_t, dtpr_nextfunc),
 	    offsetof(dtrace_probe_t, dtpr_prevfunc));
 
 	dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
 	    offsetof(dtrace_probe_t, dtpr_nextname),
 	    offsetof(dtrace_probe_t, dtpr_prevname));
 
 	if (dtrace_retain_max < 1) {
 		cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
 		    "setting to 1", dtrace_retain_max);
 		dtrace_retain_max = 1;
 	}
 
 	/*
 	 * Now discover our toxic ranges.
 	 */
 	dtrace_toxic_ranges(dtrace_toxrange_add);
 
 	/*
 	 * Before we register ourselves as a provider to our own framework,
 	 * we would like to assert that dtrace_provider is NULL -- but that's
 	 * not true if we were loaded as a dependency of a DTrace provider.
 	 * Once we've registered, we can assert that dtrace_provider is our
 	 * pseudo provider.
 	 */
 	(void) dtrace_register("dtrace", &dtrace_provider_attr,
 	    DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
 
 	ASSERT(dtrace_provider != NULL);
 	ASSERT((dtrace_provider_id_t)dtrace_provider == id);
 
 	dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
 	dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "END", 0, NULL);
 	dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
 
 	dtrace_anon_property();
 	mutex_exit(&cpu_lock);
 
 	/*
 	 * If there are already providers, we must ask them to provide their
 	 * probes, and then match any anonymous enabling against them.  Note
 	 * that there should be no other retained enablings at this time:
 	 * the only retained enablings at this time should be the anonymous
 	 * enabling.
 	 */
 	if (dtrace_anon.dta_enabling != NULL) {
 		ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
 
 		dtrace_enabling_provide(NULL);
 		state = dtrace_anon.dta_state;
 
 		/*
 		 * We couldn't hold cpu_lock across the above call to
 		 * dtrace_enabling_provide(), but we must hold it to actually
 		 * enable the probes.  We have to drop all of our locks, pick
 		 * up cpu_lock, and regain our locks before matching the
 		 * retained anonymous enabling.
 		 */
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_provider_lock);
 
 		mutex_enter(&cpu_lock);
 		mutex_enter(&dtrace_provider_lock);
 		mutex_enter(&dtrace_lock);
 
 		if ((enab = dtrace_anon.dta_enabling) != NULL)
 			(void) dtrace_enabling_match(enab, NULL);
 
 		mutex_exit(&cpu_lock);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	if (state != NULL) {
 		/*
 		 * If we created any anonymous state, set it going now.
 		 */
 		(void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
 	}
 
 	return (DDI_SUCCESS);
 }
 #endif	/* illumos */
 
 #ifndef illumos
 static void dtrace_dtr(void *);
 #endif
 
 /*ARGSUSED*/
 static int
 #ifdef illumos
 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 #else
 dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 #endif
 {
 	dtrace_state_t *state;
 	uint32_t priv;
 	uid_t uid;
 	zoneid_t zoneid;
 
 #ifdef illumos
 	if (getminor(*devp) == DTRACEMNRN_HELPER)
 		return (0);
 
 	/*
 	 * If this wasn't an open with the "helper" minor, then it must be
 	 * the "dtrace" minor.
 	 */
 	if (getminor(*devp) == DTRACEMNRN_DTRACE)
 		return (ENXIO);
 #else
 	cred_t *cred_p = NULL;
 	cred_p = dev->si_cred;
 
 	/*
 	 * If no DTRACE_PRIV_* bits are set in the credential, then the
 	 * caller lacks sufficient permission to do anything with DTrace.
 	 */
 	dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
 	if (priv == DTRACE_PRIV_NONE) {
 #endif
 
 		return (EACCES);
 	}
 
 	/*
 	 * Ask all providers to provide all their probes.
 	 */
 	mutex_enter(&dtrace_provider_lock);
 	dtrace_probe_provide(NULL, NULL);
 	mutex_exit(&dtrace_provider_lock);
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 	dtrace_opens++;
 	dtrace_membar_producer();
 
 #ifdef illumos
 	/*
 	 * If the kernel debugger is active (that is, if the kernel debugger
 	 * modified text in some way), we won't allow the open.
 	 */
 	if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
 		dtrace_opens--;
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_lock);
 		return (EBUSY);
 	}
 
 	if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) {
 		/*
 		 * If DTrace helper tracing is enabled, we need to allocate the
 		 * trace buffer and initialize the values.
 		 */
 		dtrace_helptrace_buffer =
 		    kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
 		dtrace_helptrace_next = 0;
 		dtrace_helptrace_wrapped = 0;
 		dtrace_helptrace_enable = 0;
 	}
 
 	state = dtrace_state_create(devp, cred_p);
 #else
 	state = dtrace_state_create(dev, NULL);
 	devfs_set_cdevpriv(state, dtrace_dtr);
 #endif
 
 	mutex_exit(&cpu_lock);
 
 	if (state == NULL) {
 #ifdef illumos
 		if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
 			(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 #else
 		--dtrace_opens;
 #endif
 		mutex_exit(&dtrace_lock);
 		return (EAGAIN);
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 #ifdef illumos
 static int
 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 #else
 static void
 dtrace_dtr(void *data)
 #endif
 {
 #ifdef illumos
 	minor_t minor = getminor(dev);
 	dtrace_state_t *state;
 #endif
 	dtrace_helptrace_t *buf = NULL;
 
 #ifdef illumos
 	if (minor == DTRACEMNRN_HELPER)
 		return (0);
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 #else
 	dtrace_state_t *state = data;
 #endif
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 #ifdef illumos
 	if (state->dts_anon)
 #else
 	if (state != NULL && state->dts_anon)
 #endif
 	{
 		/*
 		 * There is anonymous state. Destroy that first.
 		 */
 		ASSERT(dtrace_anon.dta_state == NULL);
 		dtrace_state_destroy(state->dts_anon);
 	}
 
 	if (dtrace_helptrace_disable) {
 		/*
 		 * If we have been told to disable helper tracing, set the
 		 * buffer to NULL before calling into dtrace_state_destroy();
 		 * we take advantage of its dtrace_sync() to know that no
 		 * CPU is in probe context with enabled helper tracing
 		 * after it returns.
 		 */
 		buf = dtrace_helptrace_buffer;
 		dtrace_helptrace_buffer = NULL;
 	}
 
 #ifdef illumos
 	dtrace_state_destroy(state);
 #else
 	if (state != NULL) {
 		dtrace_state_destroy(state);
 		kmem_free(state, 0);
 	}
 #endif
 	ASSERT(dtrace_opens > 0);
 
 #ifdef illumos
 	/*
 	 * Only relinquish control of the kernel debugger interface when there
 	 * are no consumers and no anonymous enablings.
 	 */
 	if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
 		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 #else
 	--dtrace_opens;
 #endif
 
 	if (buf != NULL) {
 		kmem_free(buf, dtrace_helptrace_bufsize);
 		dtrace_helptrace_disable = 0;
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 
 #ifdef illumos
 	return (0);
 #endif
 }
 
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
 {
 	int rval;
 	dof_helper_t help, *dhp = NULL;
 
 	switch (cmd) {
 	case DTRACEHIOC_ADDDOF:
 		if (copyin((void *)arg, &help, sizeof (help)) != 0) {
 			dtrace_dof_error(NULL, "failed to copyin DOF helper");
 			return (EFAULT);
 		}
 
 		dhp = &help;
 		arg = (intptr_t)help.dofhp_dof;
 		/*FALLTHROUGH*/
 
 	case DTRACEHIOC_ADD: {
 		dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
 
 		if (dof == NULL)
 			return (rval);
 
 		mutex_enter(&dtrace_lock);
 
 		/*
 		 * dtrace_helper_slurp() takes responsibility for the dof --
 		 * it may free it now or it may save it and free it later.
 		 */
 		if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
 			*rv = rval;
 			rval = 0;
 		} else {
 			rval = EINVAL;
 		}
 
 		mutex_exit(&dtrace_lock);
 		return (rval);
 	}
 
 	case DTRACEHIOC_REMOVE: {
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_helper_destroygen(NULL, arg);
 		mutex_exit(&dtrace_lock);
 
 		return (rval);
 	}
 
 	default:
 		break;
 	}
 
 	return (ENOTTY);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 {
 	minor_t minor = getminor(dev);
 	dtrace_state_t *state;
 	int rval;
 
 	if (minor == DTRACEMNRN_HELPER)
 		return (dtrace_ioctl_helper(cmd, arg, rv));
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 
 	if (state->dts_anon) {
 		ASSERT(dtrace_anon.dta_state == NULL);
 		state = state->dts_anon;
 	}
 
 	switch (cmd) {
 	case DTRACEIOC_PROVIDER: {
 		dtrace_providerdesc_t pvd;
 		dtrace_provider_t *pvp;
 
 		if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
 			return (EFAULT);
 
 		pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
 		mutex_enter(&dtrace_provider_lock);
 
 		for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
 			if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
 				break;
 		}
 
 		mutex_exit(&dtrace_provider_lock);
 
 		if (pvp == NULL)
 			return (ESRCH);
 
 		bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
 		bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
 
 		if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_EPROBE: {
 		dtrace_eprobedesc_t epdesc;
 		dtrace_ecb_t *ecb;
 		dtrace_action_t *act;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 		int nrecs;
 
 		if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		if (ecb->dte_probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
 		epdesc.dtepd_uarg = ecb->dte_uarg;
 		epdesc.dtepd_size = ecb->dte_size;
 
 		nrecs = epdesc.dtepd_nrecs;
 		epdesc.dtepd_nrecs = 0;
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			epdesc.dtepd_nrecs++;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_eprobedesc_t) +
 		    (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&epdesc, (void *)dest, sizeof (epdesc));
 		dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
 
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			if (nrecs-- == 0)
 				break;
 
 			bcopy(&act->dta_rec, (void *)dest,
 			    sizeof (dtrace_recdesc_t));
 			dest += sizeof (dtrace_recdesc_t);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 
 	case DTRACEIOC_AGGDESC: {
 		dtrace_aggdesc_t aggdesc;
 		dtrace_action_t *act;
 		dtrace_aggregation_t *agg;
 		int nrecs;
 		uint32_t offs;
 		dtrace_recdesc_t *lrec;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 
 		if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
 
 		nrecs = aggdesc.dtagd_nrecs;
 		aggdesc.dtagd_nrecs = 0;
 
 		offs = agg->dtag_base;
 		lrec = &agg->dtag_action.dta_rec;
 		aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			ASSERT(act->dta_intuple ||
 			    DTRACEACT_ISAGG(act->dta_kind));
 
 			/*
 			 * If this action has a record size of zero, it
 			 * denotes an argument to the aggregating action.
 			 * Because the presence of this record doesn't (or
 			 * shouldn't) affect the way the data is interpreted,
 			 * we don't copy it out to save user-level the
 			 * confusion of dealing with a zero-length record.
 			 */
 			if (act->dta_rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			aggdesc.dtagd_nrecs++;
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_aggdesc_t) +
 		    (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
 		dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			dtrace_recdesc_t rec = act->dta_rec;
 
 			/*
 			 * See the comment in the above loop for why we pass
 			 * over zero-length records.
 			 */
 			if (rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			if (nrecs-- == 0)
 				break;
 
 			rec.dtrd_offset -= offs;
 			bcopy(&rec, (void *)dest, sizeof (rec));
 			dest += sizeof (dtrace_recdesc_t);
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 
 	case DTRACEIOC_ENABLE: {
 		dof_hdr_t *dof;
 		dtrace_enabling_t *enab = NULL;
 		dtrace_vstate_t *vstate;
 		int err = 0;
 
 		*rv = 0;
 
 		/*
 		 * If a NULL argument has been passed, we take this as our
 		 * cue to reevaluate our enablings.
 		 */
 		if (arg == NULL) {
 			dtrace_enabling_matchall();
 
 			return (0);
 		}
 
 		if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
 			return (rval);
 
 		mutex_enter(&cpu_lock);
 		mutex_enter(&dtrace_lock);
 		vstate = &state->dts_vstate;
 
 		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EBUSY);
 		}
 
 		if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EINVAL);
 		}
 
 		if ((rval = dtrace_dof_options(dof, state)) != 0) {
 			dtrace_enabling_destroy(enab);
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (rval);
 		}
 
 		if ((err = dtrace_enabling_match(enab, rv)) == 0) {
 			err = dtrace_enabling_retain(enab);
 		} else {
 			dtrace_enabling_destroy(enab);
 		}
 
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_lock);
 		dtrace_dof_destroy(dof);
 
 		return (err);
 	}
 
 	case DTRACEIOC_REPLICATE: {
 		dtrace_repldesc_t desc;
 		dtrace_probedesc_t *match = &desc.dtrpd_match;
 		dtrace_probedesc_t *create = &desc.dtrpd_create;
 		int err;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		mutex_enter(&dtrace_lock);
 		err = dtrace_enabling_replicate(state, match, create);
 		mutex_exit(&dtrace_lock);
 
 		return (err);
 	}
 
 	case DTRACEIOC_PROBEMATCH:
 	case DTRACEIOC_PROBES: {
 		dtrace_probe_t *probe = NULL;
 		dtrace_probedesc_t desc;
 		dtrace_probekey_t pkey;
 		dtrace_id_t i;
 		int m = 0;
 		uint32_t priv;
 		uid_t uid;
 		zoneid_t zoneid;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		/*
 		 * Before we attempt to match this probe, we want to give
 		 * all providers the opportunity to provide it.
 		 */
 		if (desc.dtpd_id == DTRACE_IDNONE) {
 			mutex_enter(&dtrace_provider_lock);
 			dtrace_probe_provide(&desc, NULL);
 			mutex_exit(&dtrace_provider_lock);
 			desc.dtpd_id++;
 		}
 
 		if (cmd == DTRACEIOC_PROBEMATCH)  {
 			dtrace_probekey(&desc, &pkey);
 			pkey.dtpk_id = DTRACE_IDNONE;
 		}
 
 		dtrace_cred2priv(cr, &priv, &uid, &zoneid);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_PROBEMATCH) {
 			for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    (m = dtrace_match_probe(probe, &pkey,
 				    priv, uid, zoneid)) != 0)
 					break;
 			}
 
 			if (m < 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 
 		} else {
 			for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    dtrace_match_priv(probe, priv, uid, zoneid))
 					break;
 			}
 		}
 
 		if (probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (ESRCH);
 		}
 
 		dtrace_probe_description(probe, &desc);
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_PROBEARG: {
 		dtrace_argdesc_t desc;
 		dtrace_probe_t *probe;
 		dtrace_provider_t *prov;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		if (desc.dtargd_id == DTRACE_IDNONE)
 			return (EINVAL);
 
 		if (desc.dtargd_ndx == DTRACE_ARGNONE)
 			return (EINVAL);
 
 		mutex_enter(&dtrace_provider_lock);
 		mutex_enter(&mod_lock);
 		mutex_enter(&dtrace_lock);
 
 		if (desc.dtargd_id > dtrace_nprobes) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&mod_lock);
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&mod_lock);
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		prov = probe->dtpr_provider;
 
 		if (prov->dtpv_pops.dtps_getargdesc == NULL) {
 			/*
 			 * There isn't any typed information for this probe.
 			 * Set the argument number to DTRACE_ARGNONE.
 			 */
 			desc.dtargd_ndx = DTRACE_ARGNONE;
 		} else {
 			desc.dtargd_native[0] = '\0';
 			desc.dtargd_xlate[0] = '\0';
 			desc.dtargd_mapping = desc.dtargd_ndx;
 
 			prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
 			    probe->dtpr_id, probe->dtpr_arg, &desc);
 		}
 
 		mutex_exit(&mod_lock);
 		mutex_exit(&dtrace_provider_lock);
 
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_GO: {
 		processorid_t cpuid;
 		rval = dtrace_state_go(state, &cpuid);
 
 		if (rval != 0)
 			return (rval);
 
 		if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_STOP: {
 		processorid_t cpuid;
 
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_state_stop(state, &cpuid);
 		mutex_exit(&dtrace_lock);
 
 		if (rval != 0)
 			return (rval);
 
 		if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_DOFGET: {
 		dof_hdr_t hdr, *dof;
 		uint64_t len;
 
 		if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 		dof = dtrace_dof_create(state);
 		mutex_exit(&dtrace_lock);
 
 		len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
 		rval = copyout(dof, (void *)arg, len);
 		dtrace_dof_destroy(dof);
 
 		return (rval == 0 ? 0 : EFAULT);
 	}
 
 	case DTRACEIOC_AGGSNAP:
 	case DTRACEIOC_BUFSNAP: {
 		dtrace_bufdesc_t desc;
 		caddr_t cached;
 		dtrace_buffer_t *buf;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
 			return (EINVAL);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_BUFSNAP) {
 			buf = &state->dts_buffer[desc.dtbd_cpu];
 		} else {
 			buf = &state->dts_aggbuffer[desc.dtbd_cpu];
 		}
 
 		if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
 			size_t sz = buf->dtb_offset;
 
 			if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
 				mutex_exit(&dtrace_lock);
 				return (EBUSY);
 			}
 
 			/*
 			 * If this buffer has already been consumed, we're
 			 * going to indicate that there's nothing left here
 			 * to consume.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
 				mutex_exit(&dtrace_lock);
 
 				desc.dtbd_size = 0;
 				desc.dtbd_drops = 0;
 				desc.dtbd_errors = 0;
 				desc.dtbd_oldest = 0;
 				sz = sizeof (desc);
 
 				if (copyout(&desc, (void *)arg, sz) != 0)
 					return (EFAULT);
 
 				return (0);
 			}
 
 			/*
 			 * If this is a ring buffer that has wrapped, we want
 			 * to copy the whole thing out.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
 				dtrace_buffer_polish(buf);
 				sz = buf->dtb_size;
 			}
 
 			if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EFAULT);
 			}
 
 			desc.dtbd_size = sz;
 			desc.dtbd_drops = buf->dtb_drops;
 			desc.dtbd_errors = buf->dtb_errors;
 			desc.dtbd_oldest = buf->dtb_xamot_offset;
 			desc.dtbd_timestamp = dtrace_gethrtime();
 
 			mutex_exit(&dtrace_lock);
 
 			if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 				return (EFAULT);
 
 			buf->dtb_flags |= DTRACEBUF_CONSUMED;
 
 			return (0);
 		}
 
 		if (buf->dtb_tomax == NULL) {
 			ASSERT(buf->dtb_xamot == NULL);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		cached = buf->dtb_tomax;
 		ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 
 		dtrace_xcall(desc.dtbd_cpu,
 		    (dtrace_xcall_t)dtrace_buffer_switch, buf);
 
 		state->dts_errors += buf->dtb_xamot_errors;
 
 		/*
 		 * If the buffers did not actually switch, then the cross call
 		 * did not take place -- presumably because the given CPU is
 		 * not in the ready set.  If this is the case, we'll return
 		 * ENOENT.
 		 */
 		if (buf->dtb_tomax == cached) {
 			ASSERT(buf->dtb_xamot != cached);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		ASSERT(cached == buf->dtb_xamot);
 
 		/*
 		 * We have our snapshot; now copy it out.
 		 */
 		if (copyout(buf->dtb_xamot, desc.dtbd_data,
 		    buf->dtb_xamot_offset) != 0) {
 			mutex_exit(&dtrace_lock);
 			return (EFAULT);
 		}
 
 		desc.dtbd_size = buf->dtb_xamot_offset;
 		desc.dtbd_drops = buf->dtb_xamot_drops;
 		desc.dtbd_errors = buf->dtb_xamot_errors;
 		desc.dtbd_oldest = 0;
 		desc.dtbd_timestamp = buf->dtb_switched;
 
 		mutex_exit(&dtrace_lock);
 
 		/*
 		 * Finally, copy out the buffer description.
 		 */
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_CONF: {
 		dtrace_conf_t conf;
 
 		bzero(&conf, sizeof (conf));
 		conf.dtc_difversion = DIF_VERSION;
 		conf.dtc_difintregs = DIF_DIR_NREGS;
 		conf.dtc_diftupregs = DIF_DTR_NREGS;
 		conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
 
 		if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_STATUS: {
 		dtrace_status_t stat;
 		dtrace_dstate_t *dstate;
 		int i, j;
 		uint64_t nerrs;
 
 		/*
 		 * See the comment in dtrace_state_deadman() for the reason
 		 * for setting dts_laststatus to INT64_MAX before setting
 		 * it to the correct value.
 		 */
 		state->dts_laststatus = INT64_MAX;
 		dtrace_membar_producer();
 		state->dts_laststatus = dtrace_gethrtime();
 
 		bzero(&stat, sizeof (stat));
 
 		mutex_enter(&dtrace_lock);
 
 		if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
 			stat.dtst_exiting = 1;
 
 		nerrs = state->dts_errors;
 		dstate = &state->dts_vstate.dtvs_dynvars;
 
 		for (i = 0; i < NCPU; i++) {
 			dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
 
 			stat.dtst_dyndrops += dcpu->dtdsc_drops;
 			stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
 			stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
 
 			if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
 				stat.dtst_filled++;
 
 			nerrs += state->dts_buffer[i].dtb_errors;
 
 			for (j = 0; j < state->dts_nspeculations; j++) {
 				dtrace_speculation_t *spec;
 				dtrace_buffer_t *buf;
 
 				spec = &state->dts_speculations[j];
 				buf = &spec->dtsp_buffer[i];
 				stat.dtst_specdrops += buf->dtb_xamot_drops;
 			}
 		}
 
 		stat.dtst_specdrops_busy = state->dts_speculations_busy;
 		stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
 		stat.dtst_stkstroverflows = state->dts_stkstroverflows;
 		stat.dtst_dblerrors = state->dts_dblerrors;
 		stat.dtst_killed =
 		    (state->dts_activity == DTRACE_ACTIVITY_KILLED);
 		stat.dtst_errors = nerrs;
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_FORMAT: {
 		dtrace_fmtdesc_t fmt;
 		char *str;
 		int len;
 
 		if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if (fmt.dtfd_format == 0 ||
 		    fmt.dtfd_format > state->dts_nformats) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		/*
 		 * Format strings are allocated contiguously and they are
 		 * never freed; if a format index is less than the number
 		 * of formats, we can assert that the format map is non-NULL
 		 * and that the format for the specified index is non-NULL.
 		 */
 		ASSERT(state->dts_formats != NULL);
 		str = state->dts_formats[fmt.dtfd_format - 1];
 		ASSERT(str != NULL);
 
 		len = strlen(str) + 1;
 
 		if (len > fmt.dtfd_length) {
 			fmt.dtfd_length = len;
 
 			if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 		} else {
 			if (copyout(str, fmt.dtfd_string, len) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 		}
 
 		mutex_exit(&dtrace_lock);
 		return (0);
 	}
 
 	default:
 		break;
 	}
 
 	return (ENOTTY);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 {
 	dtrace_state_t *state;
 
 	switch (cmd) {
 	case DDI_DETACH:
 		break;
 
 	case DDI_SUSPEND:
 		return (DDI_SUCCESS);
 
 	default:
 		return (DDI_FAILURE);
 	}
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	ASSERT(dtrace_opens == 0);
 
 	if (dtrace_helpers > 0) {
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&cpu_lock);
 		return (DDI_FAILURE);
 	}
 
 	if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&cpu_lock);
 		return (DDI_FAILURE);
 	}
 
 	dtrace_provider = NULL;
 
 	if ((state = dtrace_anon_grab()) != NULL) {
 		/*
 		 * If there were ECBs on this state, the provider should
 		 * have not been allowed to detach; assert that there is
 		 * none.
 		 */
 		ASSERT(state->dts_necbs == 0);
 		dtrace_state_destroy(state);
 
 		/*
 		 * If we're being detached with anonymous state, we need to
 		 * indicate to the kernel debugger that DTrace is now inactive.
 		 */
 		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 	}
 
 	bzero(&dtrace_anon, sizeof (dtrace_anon_t));
 	unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
 	dtrace_cpu_init = NULL;
 	dtrace_helpers_cleanup = NULL;
 	dtrace_helpers_fork = NULL;
 	dtrace_cpustart_init = NULL;
 	dtrace_cpustart_fini = NULL;
 	dtrace_debugger_init = NULL;
 	dtrace_debugger_fini = NULL;
 	dtrace_modload = NULL;
 	dtrace_modunload = NULL;
 
 	ASSERT(dtrace_getf == 0);
 	ASSERT(dtrace_closef == NULL);
 
 	mutex_exit(&cpu_lock);
 
 	kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
 	dtrace_probes = NULL;
 	dtrace_nprobes = 0;
 
 	dtrace_hash_destroy(dtrace_bymod);
 	dtrace_hash_destroy(dtrace_byfunc);
 	dtrace_hash_destroy(dtrace_byname);
 	dtrace_bymod = NULL;
 	dtrace_byfunc = NULL;
 	dtrace_byname = NULL;
 
 	kmem_cache_destroy(dtrace_state_cache);
 	vmem_destroy(dtrace_minor);
 	vmem_destroy(dtrace_arena);
 
 	if (dtrace_toxrange != NULL) {
 		kmem_free(dtrace_toxrange,
 		    dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
 		dtrace_toxrange = NULL;
 		dtrace_toxranges = 0;
 		dtrace_toxranges_max = 0;
 	}
 
 	ddi_remove_minor_node(dtrace_devi, NULL);
 	dtrace_devi = NULL;
 
 	ddi_soft_state_fini(&dtrace_softstate);
 
 	ASSERT(dtrace_vtime_references == 0);
 	ASSERT(dtrace_opens == 0);
 	ASSERT(dtrace_retained == NULL);
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	/*
 	 * We don't destroy the task queue until after we have dropped our
 	 * locks (taskq_destroy() may block on running tasks).  To prevent
 	 * attempting to do work after we have effectively detached but before
 	 * the task queue has been destroyed, all tasks dispatched via the
 	 * task queue must check that DTrace is still attached before
 	 * performing any operation.
 	 */
 	taskq_destroy(dtrace_taskq);
 	dtrace_taskq = NULL;
 
 	return (DDI_SUCCESS);
 }
 #endif
 
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 {
 	int error;
 
 	switch (infocmd) {
 	case DDI_INFO_DEVT2DEVINFO:
 		*result = (void *)dtrace_devi;
 		error = DDI_SUCCESS;
 		break;
 	case DDI_INFO_DEVT2INSTANCE:
 		*result = (void *)0;
 		error = DDI_SUCCESS;
 		break;
 	default:
 		error = DDI_FAILURE;
 	}
 	return (error);
 }
 #endif
 
 #ifdef illumos
 static struct cb_ops dtrace_cb_ops = {
 	dtrace_open,		/* open */
 	dtrace_close,		/* close */
 	nulldev,		/* strategy */
 	nulldev,		/* print */
 	nodev,			/* dump */
 	nodev,			/* read */
 	nodev,			/* write */
 	dtrace_ioctl,		/* ioctl */
 	nodev,			/* devmap */
 	nodev,			/* mmap */
 	nodev,			/* segmap */
 	nochpoll,		/* poll */
 	ddi_prop_op,		/* cb_prop_op */
 	0,			/* streamtab  */
 	D_NEW | D_MP		/* Driver compatibility flag */
 };
 
 static struct dev_ops dtrace_ops = {
 	DEVO_REV,		/* devo_rev */
 	0,			/* refcnt */
 	dtrace_info,		/* get_dev_info */
 	nulldev,		/* identify */
 	nulldev,		/* probe */
 	dtrace_attach,		/* attach */
 	dtrace_detach,		/* detach */
 	nodev,			/* reset */
 	&dtrace_cb_ops,		/* driver operations */
 	NULL,			/* bus operations */
 	nodev			/* dev power */
 };
 
 static struct modldrv modldrv = {
 	&mod_driverops,		/* module type (this is a pseudo driver) */
 	"Dynamic Tracing",	/* name of module */
 	&dtrace_ops,		/* driver ops */
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1,
 	(void *)&modldrv,
 	NULL
 };
 
 int
 _init(void)
 {
 	return (mod_install(&modlinkage));
 }
 
 int
 _info(struct modinfo *modinfop)
 {
 	return (mod_info(&modlinkage, modinfop));
 }
 
 int
 _fini(void)
 {
 	return (mod_remove(&modlinkage));
 }
 #else
 
 static d_ioctl_t	dtrace_ioctl;
 static d_ioctl_t	dtrace_ioctl_helper;
 static void		dtrace_load(void *);
 static int		dtrace_unload(void);
 static struct cdev	*dtrace_dev;
 static struct cdev	*helper_dev;
 
 void dtrace_invop_init(void);
 void dtrace_invop_uninit(void);
 
 static struct cdevsw dtrace_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_ioctl	= dtrace_ioctl,
 	.d_open		= dtrace_open,
 	.d_name		= "dtrace",
 };
 
 static struct cdevsw helper_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_ioctl	= dtrace_ioctl_helper,
 	.d_name		= "helper",
 };
 
 #include <dtrace_anon.c>
 #include <dtrace_ioctl.c>
 #include <dtrace_load.c>
 #include <dtrace_modevent.c>
 #include <dtrace_sysctl.c>
 #include <dtrace_unload.c>
 #include <dtrace_vtime.c>
 #include <dtrace_hacks.c>
 #include <dtrace_isa.c>
 
 SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL);
 SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL);
 SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL);
 
 DEV_MODULE(dtrace, dtrace_modevent, NULL);
 MODULE_VERSION(dtrace, 1);
 MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1);
 #endif
Index: head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 322167)
+++ head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 322168)
@@ -1,2510 +1,2510 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_DTRACE_H
 #define	_SYS_DTRACE_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * DTrace Dynamic Tracing Software: Kernel Interfaces
  *
  * Note: The contents of this file are private to the implementation of the
  * Solaris system and DTrace subsystem and are subject to change at any time
  * without notice.  Applications and drivers using these interfaces will fail
  * to run on future releases.  These interfaces should not be used for any
  * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB).
  * Please refer to the "Solaris Dynamic Tracing Guide" for more information.
  */
 
 #ifndef _ASM
 
 #include <sys/types.h>
 #include <sys/modctl.h>
 #include <sys/processor.h>
 #ifdef illumos
 #include <sys/systm.h>
 #else
 #include <sys/cpuvar.h>
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/ioccom.h>
 #include <sys/ucred.h>
 typedef int model_t;
 #endif
 #include <sys/ctf_api.h>
 #ifdef illumos
 #include <sys/cyclic.h>
 #include <sys/int_limits.h>
 #else
 #include <sys/stdint.h>
 #endif
 
 /*
  * DTrace Universal Constants and Typedefs
  */
 #define	DTRACE_CPUALL		-1	/* all CPUs */
 #define	DTRACE_IDNONE		0	/* invalid probe identifier */
 #define	DTRACE_EPIDNONE		0	/* invalid enabled probe identifier */
 #define	DTRACE_AGGIDNONE	0	/* invalid aggregation identifier */
 #define	DTRACE_AGGVARIDNONE	0	/* invalid aggregation variable ID */
 #define	DTRACE_CACHEIDNONE	0	/* invalid predicate cache */
 #define	DTRACE_PROVNONE		0	/* invalid provider identifier */
 #define	DTRACE_METAPROVNONE	0	/* invalid meta-provider identifier */
 #define	DTRACE_ARGNONE		-1	/* invalid argument index */
 
 #define	DTRACE_PROVNAMELEN	64
 #define	DTRACE_MODNAMELEN	64
 #define	DTRACE_FUNCNAMELEN	192
 #define	DTRACE_NAMELEN		64
 #define	DTRACE_FULLNAMELEN	(DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \
 				DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4)
 #define	DTRACE_ARGTYPELEN	128
 
 typedef uint32_t dtrace_id_t;		/* probe identifier */
 typedef uint32_t dtrace_epid_t;		/* enabled probe identifier */
 typedef uint32_t dtrace_aggid_t;	/* aggregation identifier */
 typedef int64_t dtrace_aggvarid_t;	/* aggregation variable identifier */
 typedef uint16_t dtrace_actkind_t;	/* action kind */
 typedef int64_t dtrace_optval_t;	/* option value */
 typedef uint32_t dtrace_cacheid_t;	/* predicate cache identifier */
 
 typedef enum dtrace_probespec {
 	DTRACE_PROBESPEC_NONE = -1,
 	DTRACE_PROBESPEC_PROVIDER = 0,
 	DTRACE_PROBESPEC_MOD,
 	DTRACE_PROBESPEC_FUNC,
 	DTRACE_PROBESPEC_NAME
 } dtrace_probespec_t;
 
 /*
  * DTrace Intermediate Format (DIF)
  *
  * The following definitions describe the DTrace Intermediate Format (DIF), a
  * a RISC-like instruction set and program encoding used to represent
  * predicates and actions that can be bound to DTrace probes.  The constants
  * below defining the number of available registers are suggested minimums; the
  * compiler should use DTRACEIOC_CONF to dynamically obtain the number of
  * registers provided by the current DTrace implementation.
  */
 #define	DIF_VERSION_1	1		/* DIF version 1: Solaris 10 Beta */
 #define	DIF_VERSION_2	2		/* DIF version 2: Solaris 10 FCS */
 #define	DIF_VERSION	DIF_VERSION_2	/* latest DIF instruction set version */
 #define	DIF_DIR_NREGS	8		/* number of DIF integer registers */
 #define	DIF_DTR_NREGS	8		/* number of DIF tuple registers */
 
 #define	DIF_OP_OR	1		/* or	r1, r2, rd */
 #define	DIF_OP_XOR	2		/* xor	r1, r2, rd */
 #define	DIF_OP_AND	3		/* and	r1, r2, rd */
 #define	DIF_OP_SLL	4		/* sll	r1, r2, rd */
 #define	DIF_OP_SRL	5		/* srl	r1, r2, rd */
 #define	DIF_OP_SUB	6		/* sub	r1, r2, rd */
 #define	DIF_OP_ADD	7		/* add	r1, r2, rd */
 #define	DIF_OP_MUL	8		/* mul	r1, r2, rd */
 #define	DIF_OP_SDIV	9		/* sdiv	r1, r2, rd */
 #define	DIF_OP_UDIV	10		/* udiv r1, r2, rd */
 #define	DIF_OP_SREM	11		/* srem r1, r2, rd */
 #define	DIF_OP_UREM	12		/* urem r1, r2, rd */
 #define	DIF_OP_NOT	13		/* not	r1, rd */
 #define	DIF_OP_MOV	14		/* mov	r1, rd */
 #define	DIF_OP_CMP	15		/* cmp	r1, r2 */
 #define	DIF_OP_TST	16		/* tst  r1 */
 #define	DIF_OP_BA	17		/* ba	label */
 #define	DIF_OP_BE	18		/* be	label */
 #define	DIF_OP_BNE	19		/* bne	label */
 #define	DIF_OP_BG	20		/* bg	label */
 #define	DIF_OP_BGU	21		/* bgu	label */
 #define	DIF_OP_BGE	22		/* bge	label */
 #define	DIF_OP_BGEU	23		/* bgeu	label */
 #define	DIF_OP_BL	24		/* bl	label */
 #define	DIF_OP_BLU	25		/* blu	label */
 #define	DIF_OP_BLE	26		/* ble	label */
 #define	DIF_OP_BLEU	27		/* bleu	label */
 #define	DIF_OP_LDSB	28		/* ldsb	[r1], rd */
 #define	DIF_OP_LDSH	29		/* ldsh	[r1], rd */
 #define	DIF_OP_LDSW	30		/* ldsw [r1], rd */
 #define	DIF_OP_LDUB	31		/* ldub	[r1], rd */
 #define	DIF_OP_LDUH	32		/* lduh	[r1], rd */
 #define	DIF_OP_LDUW	33		/* lduw	[r1], rd */
 #define	DIF_OP_LDX	34		/* ldx	[r1], rd */
 #define	DIF_OP_RET	35		/* ret	rd */
 #define	DIF_OP_NOP	36		/* nop */
 #define	DIF_OP_SETX	37		/* setx	intindex, rd */
 #define	DIF_OP_SETS	38		/* sets strindex, rd */
 #define	DIF_OP_SCMP	39		/* scmp	r1, r2 */
 #define	DIF_OP_LDGA	40		/* ldga	var, ri, rd */
 #define	DIF_OP_LDGS	41		/* ldgs var, rd */
 #define	DIF_OP_STGS	42		/* stgs var, rs */
 #define	DIF_OP_LDTA	43		/* ldta var, ri, rd */
 #define	DIF_OP_LDTS	44		/* ldts var, rd */
 #define	DIF_OP_STTS	45		/* stts var, rs */
 #define	DIF_OP_SRA	46		/* sra	r1, r2, rd */
 #define	DIF_OP_CALL	47		/* call	subr, rd */
 #define	DIF_OP_PUSHTR	48		/* pushtr type, rs, rr */
 #define	DIF_OP_PUSHTV	49		/* pushtv type, rs, rv */
 #define	DIF_OP_POPTS	50		/* popts */
 #define	DIF_OP_FLUSHTS	51		/* flushts */
 #define	DIF_OP_LDGAA	52		/* ldgaa var, rd */
 #define	DIF_OP_LDTAA	53		/* ldtaa var, rd */
 #define	DIF_OP_STGAA	54		/* stgaa var, rs */
 #define	DIF_OP_STTAA	55		/* sttaa var, rs */
 #define	DIF_OP_LDLS	56		/* ldls	var, rd */
 #define	DIF_OP_STLS	57		/* stls	var, rs */
 #define	DIF_OP_ALLOCS	58		/* allocs r1, rd */
 #define	DIF_OP_COPYS	59		/* copys  r1, r2, rd */
 #define	DIF_OP_STB	60		/* stb	r1, [rd] */
 #define	DIF_OP_STH	61		/* sth	r1, [rd] */
 #define	DIF_OP_STW	62		/* stw	r1, [rd] */
 #define	DIF_OP_STX	63		/* stx	r1, [rd] */
 #define	DIF_OP_ULDSB	64		/* uldsb [r1], rd */
 #define	DIF_OP_ULDSH	65		/* uldsh [r1], rd */
 #define	DIF_OP_ULDSW	66		/* uldsw [r1], rd */
 #define	DIF_OP_ULDUB	67		/* uldub [r1], rd */
 #define	DIF_OP_ULDUH	68		/* ulduh [r1], rd */
 #define	DIF_OP_ULDUW	69		/* ulduw [r1], rd */
 #define	DIF_OP_ULDX	70		/* uldx  [r1], rd */
 #define	DIF_OP_RLDSB	71		/* rldsb [r1], rd */
 #define	DIF_OP_RLDSH	72		/* rldsh [r1], rd */
 #define	DIF_OP_RLDSW	73		/* rldsw [r1], rd */
 #define	DIF_OP_RLDUB	74		/* rldub [r1], rd */
 #define	DIF_OP_RLDUH	75		/* rlduh [r1], rd */
 #define	DIF_OP_RLDUW	76		/* rlduw [r1], rd */
 #define	DIF_OP_RLDX	77		/* rldx  [r1], rd */
 #define	DIF_OP_XLATE	78		/* xlate xlrindex, rd */
 #define	DIF_OP_XLARG	79		/* xlarg xlrindex, rd */
 
 #define	DIF_INTOFF_MAX		0xffff	/* highest integer table offset */
 #define	DIF_STROFF_MAX		0xffff	/* highest string table offset */
 #define	DIF_REGISTER_MAX	0xff	/* highest register number */
 #define	DIF_VARIABLE_MAX	0xffff	/* highest variable identifier */
 #define	DIF_SUBROUTINE_MAX	0xffff	/* highest subroutine code */
 
 #define	DIF_VAR_ARRAY_MIN	0x0000	/* lowest numbered array variable */
 #define	DIF_VAR_ARRAY_UBASE	0x0080	/* lowest user-defined array */
 #define	DIF_VAR_ARRAY_MAX	0x00ff	/* highest numbered array variable */
 
 #define	DIF_VAR_OTHER_MIN	0x0100	/* lowest numbered scalar or assc */
 #define	DIF_VAR_OTHER_UBASE	0x0500	/* lowest user-defined scalar or assc */
 #define	DIF_VAR_OTHER_MAX	0xffff	/* highest numbered scalar or assc */
 
 #define	DIF_VAR_ARGS		0x0000	/* arguments array */
 #define	DIF_VAR_REGS		0x0001	/* registers array */
 #define	DIF_VAR_UREGS		0x0002	/* user registers array */
 #define	DIF_VAR_CURTHREAD	0x0100	/* thread pointer */
 #define	DIF_VAR_TIMESTAMP	0x0101	/* timestamp */
 #define	DIF_VAR_VTIMESTAMP	0x0102	/* virtual timestamp */
 #define	DIF_VAR_IPL		0x0103	/* interrupt priority level */
 #define	DIF_VAR_EPID		0x0104	/* enabled probe ID */
 #define	DIF_VAR_ID		0x0105	/* probe ID */
 #define	DIF_VAR_ARG0		0x0106	/* first argument */
 #define	DIF_VAR_ARG1		0x0107	/* second argument */
 #define	DIF_VAR_ARG2		0x0108	/* third argument */
 #define	DIF_VAR_ARG3		0x0109	/* fourth argument */
 #define	DIF_VAR_ARG4		0x010a	/* fifth argument */
 #define	DIF_VAR_ARG5		0x010b	/* sixth argument */
 #define	DIF_VAR_ARG6		0x010c	/* seventh argument */
 #define	DIF_VAR_ARG7		0x010d	/* eighth argument */
 #define	DIF_VAR_ARG8		0x010e	/* ninth argument */
 #define	DIF_VAR_ARG9		0x010f	/* tenth argument */
 #define	DIF_VAR_STACKDEPTH	0x0110	/* stack depth */
 #define	DIF_VAR_CALLER		0x0111	/* caller */
 #define	DIF_VAR_PROBEPROV	0x0112	/* probe provider */
 #define	DIF_VAR_PROBEMOD	0x0113	/* probe module */
 #define	DIF_VAR_PROBEFUNC	0x0114	/* probe function */
 #define	DIF_VAR_PROBENAME	0x0115	/* probe name */
 #define	DIF_VAR_PID		0x0116	/* process ID */
 #define	DIF_VAR_TID		0x0117	/* (per-process) thread ID */
 #define	DIF_VAR_EXECNAME	0x0118	/* name of executable */
 #define	DIF_VAR_ZONENAME	0x0119	/* zone name associated with process */
 #define	DIF_VAR_WALLTIMESTAMP	0x011a	/* wall-clock timestamp */
 #define	DIF_VAR_USTACKDEPTH	0x011b	/* user-land stack depth */
 #define	DIF_VAR_UCALLER		0x011c	/* user-level caller */
 #define	DIF_VAR_PPID		0x011d	/* parent process ID */
 #define	DIF_VAR_UID		0x011e	/* process user ID */
 #define	DIF_VAR_GID		0x011f	/* process group ID */
 #define	DIF_VAR_ERRNO		0x0120	/* thread errno */
 #define	DIF_VAR_EXECARGS	0x0121	/* process arguments */
 
 #ifndef illumos
 #define	DIF_VAR_CPU		0x0200
 #endif
 
 #define	DIF_SUBR_RAND			0
 #define	DIF_SUBR_MUTEX_OWNED		1
 #define	DIF_SUBR_MUTEX_OWNER		2
 #define	DIF_SUBR_MUTEX_TYPE_ADAPTIVE	3
 #define	DIF_SUBR_MUTEX_TYPE_SPIN	4
 #define	DIF_SUBR_RW_READ_HELD		5
 #define	DIF_SUBR_RW_WRITE_HELD		6
 #define	DIF_SUBR_RW_ISWRITER		7
 #define	DIF_SUBR_COPYIN			8
 #define	DIF_SUBR_COPYINSTR		9
 #define	DIF_SUBR_SPECULATION		10
 #define	DIF_SUBR_PROGENYOF		11
 #define	DIF_SUBR_STRLEN			12
 #define	DIF_SUBR_COPYOUT		13
 #define	DIF_SUBR_COPYOUTSTR		14
 #define	DIF_SUBR_ALLOCA			15
 #define	DIF_SUBR_BCOPY			16
 #define	DIF_SUBR_COPYINTO		17
 #define	DIF_SUBR_MSGDSIZE		18
 #define	DIF_SUBR_MSGSIZE		19
 #define	DIF_SUBR_GETMAJOR		20
 #define	DIF_SUBR_GETMINOR		21
 #define	DIF_SUBR_DDI_PATHNAME		22
 #define	DIF_SUBR_STRJOIN		23
 #define	DIF_SUBR_LLTOSTR		24
 #define	DIF_SUBR_BASENAME		25
 #define	DIF_SUBR_DIRNAME		26
 #define	DIF_SUBR_CLEANPATH		27
 #define	DIF_SUBR_STRCHR			28
 #define	DIF_SUBR_STRRCHR		29
 #define	DIF_SUBR_STRSTR			30
 #define	DIF_SUBR_STRTOK			31
 #define	DIF_SUBR_SUBSTR			32
 #define	DIF_SUBR_INDEX			33
 #define	DIF_SUBR_RINDEX			34
 #define	DIF_SUBR_HTONS			35
 #define	DIF_SUBR_HTONL			36
 #define	DIF_SUBR_HTONLL			37
 #define	DIF_SUBR_NTOHS			38
 #define	DIF_SUBR_NTOHL			39
 #define	DIF_SUBR_NTOHLL			40
 #define	DIF_SUBR_INET_NTOP		41
 #define	DIF_SUBR_INET_NTOA		42
 #define	DIF_SUBR_INET_NTOA6		43
 #define	DIF_SUBR_TOUPPER		44
 #define	DIF_SUBR_TOLOWER		45
 #define	DIF_SUBR_MEMREF			46
 #define	DIF_SUBR_SX_SHARED_HELD		47
 #define	DIF_SUBR_SX_EXCLUSIVE_HELD	48
 #define	DIF_SUBR_SX_ISEXCLUSIVE		49
 #define	DIF_SUBR_MEMSTR			50
 #define	DIF_SUBR_GETF			51
 #define	DIF_SUBR_JSON			52
 #define	DIF_SUBR_STRTOLL		53
 #define	DIF_SUBR_MAX			53	/* max subroutine value */
 
 typedef uint32_t dif_instr_t;
 
 #define	DIF_INSTR_OP(i)			(((i) >> 24) & 0xff)
 #define	DIF_INSTR_R1(i)			(((i) >> 16) & 0xff)
 #define	DIF_INSTR_R2(i)			(((i) >>  8) & 0xff)
 #define	DIF_INSTR_RD(i)			((i) & 0xff)
 #define	DIF_INSTR_RS(i)			((i) & 0xff)
 #define	DIF_INSTR_LABEL(i)		((i) & 0xffffff)
 #define	DIF_INSTR_VAR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_INTEGER(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_STRING(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_SUBR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_TYPE(i)		(((i) >> 16) & 0xff)
 #define	DIF_INSTR_XLREF(i)		(((i) >>  8) & 0xffff)
 
 #define	DIF_INSTR_FMT(op, r1, r2, d) \
 	(((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d))
 
 #define	DIF_INSTR_NOT(r1, d)		(DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d))
 #define	DIF_INSTR_MOV(r1, d)		(DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d))
 #define	DIF_INSTR_CMP(op, r1, r2)	(DIF_INSTR_FMT(op, r1, r2, 0))
 #define	DIF_INSTR_TST(r1)		(DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0))
 #define	DIF_INSTR_BRANCH(op, label)	(((op) << 24) | (label))
 #define	DIF_INSTR_LOAD(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_STORE(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_SETX(i, d)		((DIF_OP_SETX << 24) | ((i) << 8) | (d))
 #define	DIF_INSTR_SETS(s, d)		((DIF_OP_SETS << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_RET(d)		(DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d))
 #define	DIF_INSTR_NOP			(DIF_OP_NOP << 24)
 #define	DIF_INSTR_LDA(op, v, r, d)	(DIF_INSTR_FMT(op, v, r, d))
 #define	DIF_INSTR_LDV(op, v, d)		(((op) << 24) | ((v) << 8) | (d))
 #define	DIF_INSTR_STV(op, v, rs)	(((op) << 24) | ((v) << 8) | (rs))
 #define	DIF_INSTR_CALL(s, d)		((DIF_OP_CALL << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_PUSHTS(op, t, r2, rs)	(DIF_INSTR_FMT(op, t, r2, rs))
 #define	DIF_INSTR_POPTS			(DIF_OP_POPTS << 24)
 #define	DIF_INSTR_FLUSHTS		(DIF_OP_FLUSHTS << 24)
 #define	DIF_INSTR_ALLOCS(r1, d)		(DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d))
 #define	DIF_INSTR_COPYS(r1, r2, d)	(DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d))
 #define	DIF_INSTR_XLATE(op, r, d)	(((op) << 24) | ((r) << 8) | (d))
 
 #define	DIF_REG_R0	0		/* %r0 is always set to zero */
 
 /*
  * A DTrace Intermediate Format Type (DIF Type) is used to represent the types
  * of variables, function and associative array arguments, and the return type
  * for each DIF object (shown below).  It contains a description of the type,
  * its size in bytes, and a module identifier.
  */
 typedef struct dtrace_diftype {
 	uint8_t dtdt_kind;		/* type kind (see below) */
 	uint8_t dtdt_ckind;		/* type kind in CTF */
 	uint8_t dtdt_flags;		/* type flags (see below) */
 	uint8_t dtdt_pad;		/* reserved for future use */
 	uint32_t dtdt_size;		/* type size in bytes (unless string) */
 } dtrace_diftype_t;
 
 #define	DIF_TYPE_CTF		0	/* type is a CTF type */
 #define	DIF_TYPE_STRING		1	/* type is a D string */
 
 #define	DIF_TF_BYREF		0x1	/* type is passed by reference */
 #define	DIF_TF_BYUREF		0x2	/* user type is passed by reference */
 
 /*
  * A DTrace Intermediate Format variable record is used to describe each of the
  * variables referenced by a given DIF object.  It contains an integer variable
  * identifier along with variable scope and properties, as shown below.  The
  * size of this structure must be sizeof (int) aligned.
  */
 typedef struct dtrace_difv {
 	uint32_t dtdv_name;		/* variable name index in dtdo_strtab */
 	uint32_t dtdv_id;		/* variable reference identifier */
 	uint8_t dtdv_kind;		/* variable kind (see below) */
 	uint8_t dtdv_scope;		/* variable scope (see below) */
 	uint16_t dtdv_flags;		/* variable flags (see below) */
 	dtrace_diftype_t dtdv_type;	/* variable type (see above) */
 } dtrace_difv_t;
 
 #define	DIFV_KIND_ARRAY		0	/* variable is an array of quantities */
 #define	DIFV_KIND_SCALAR	1	/* variable is a scalar quantity */
 
 #define	DIFV_SCOPE_GLOBAL	0	/* variable has global scope */
 #define	DIFV_SCOPE_THREAD	1	/* variable has thread scope */
 #define	DIFV_SCOPE_LOCAL	2	/* variable has local scope */
 
 #define	DIFV_F_REF		0x1	/* variable is referenced by DIFO */
 #define	DIFV_F_MOD		0x2	/* variable is written by DIFO */
 
 /*
  * DTrace Actions
  *
  * The upper byte determines the class of the action; the low bytes determines
  * the specific action within that class.  The classes of actions are as
  * follows:
  *
  *   [ no class ]                  <= May record process- or kernel-related data
  *   DTRACEACT_PROC                <= Only records process-related data
  *   DTRACEACT_PROC_DESTRUCTIVE    <= Potentially destructive to processes
  *   DTRACEACT_KERNEL              <= Only records kernel-related data
  *   DTRACEACT_KERNEL_DESTRUCTIVE  <= Potentially destructive to the kernel
  *   DTRACEACT_SPECULATIVE         <= Speculation-related action
  *   DTRACEACT_AGGREGATION         <= Aggregating action
  */
 #define	DTRACEACT_NONE			0	/* no action */
 #define	DTRACEACT_DIFEXPR		1	/* action is DIF expression */
 #define	DTRACEACT_EXIT			2	/* exit() action */
 #define	DTRACEACT_PRINTF		3	/* printf() action */
 #define	DTRACEACT_PRINTA		4	/* printa() action */
 #define	DTRACEACT_LIBACT		5	/* library-controlled action */
 #define	DTRACEACT_TRACEMEM		6	/* tracemem() action */
 #define	DTRACEACT_TRACEMEM_DYNSIZE	7	/* dynamic tracemem() size */
 #define	DTRACEACT_PRINTM		8	/* printm() action (BSD) */
 
 #define	DTRACEACT_PROC			0x0100
 #define	DTRACEACT_USTACK		(DTRACEACT_PROC + 1)
 #define	DTRACEACT_JSTACK		(DTRACEACT_PROC + 2)
 #define	DTRACEACT_USYM			(DTRACEACT_PROC + 3)
 #define	DTRACEACT_UMOD			(DTRACEACT_PROC + 4)
 #define	DTRACEACT_UADDR			(DTRACEACT_PROC + 5)
 
 #define	DTRACEACT_PROC_DESTRUCTIVE	0x0200
 #define	DTRACEACT_STOP			(DTRACEACT_PROC_DESTRUCTIVE + 1)
 #define	DTRACEACT_RAISE			(DTRACEACT_PROC_DESTRUCTIVE + 2)
 #define	DTRACEACT_SYSTEM		(DTRACEACT_PROC_DESTRUCTIVE + 3)
 #define	DTRACEACT_FREOPEN		(DTRACEACT_PROC_DESTRUCTIVE + 4)
 
 #define	DTRACEACT_PROC_CONTROL		0x0300
 
 #define	DTRACEACT_KERNEL		0x0400
 #define	DTRACEACT_STACK			(DTRACEACT_KERNEL + 1)
 #define	DTRACEACT_SYM			(DTRACEACT_KERNEL + 2)
 #define	DTRACEACT_MOD			(DTRACEACT_KERNEL + 3)
 
 #define	DTRACEACT_KERNEL_DESTRUCTIVE	0x0500
 #define	DTRACEACT_BREAKPOINT		(DTRACEACT_KERNEL_DESTRUCTIVE + 1)
 #define	DTRACEACT_PANIC			(DTRACEACT_KERNEL_DESTRUCTIVE + 2)
 #define	DTRACEACT_CHILL			(DTRACEACT_KERNEL_DESTRUCTIVE + 3)
 
 #define	DTRACEACT_SPECULATIVE		0x0600
 #define	DTRACEACT_SPECULATE		(DTRACEACT_SPECULATIVE + 1)
 #define	DTRACEACT_COMMIT		(DTRACEACT_SPECULATIVE + 2)
 #define	DTRACEACT_DISCARD		(DTRACEACT_SPECULATIVE + 3)
 
 #define	DTRACEACT_CLASS(x)		((x) & 0xff00)
 
 #define	DTRACEACT_ISDESTRUCTIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \
 	DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE)
 
 #define	DTRACEACT_ISSPECULATIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE)
 
 #define	DTRACEACT_ISPRINTFLIKE(x)	\
 	((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \
 	(x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN)
 
 /*
  * DTrace Aggregating Actions
  *
  * These are functions f(x) for which the following is true:
  *
  *    f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n)
  *
  * where x_n is a set of arbitrary data.  Aggregating actions are in their own
  * DTrace action class, DTTRACEACT_AGGREGATION.  The macros provided here allow
  * for easier processing of the aggregation argument and data payload for a few
  * aggregating actions (notably:  quantize(), lquantize(), and ustack()).
  */
 #define	DTRACEACT_AGGREGATION		0x0700
 #define	DTRACEAGG_COUNT			(DTRACEACT_AGGREGATION + 1)
 #define	DTRACEAGG_MIN			(DTRACEACT_AGGREGATION + 2)
 #define	DTRACEAGG_MAX			(DTRACEACT_AGGREGATION + 3)
 #define	DTRACEAGG_AVG			(DTRACEACT_AGGREGATION + 4)
 #define	DTRACEAGG_SUM			(DTRACEACT_AGGREGATION + 5)
 #define	DTRACEAGG_STDDEV		(DTRACEACT_AGGREGATION + 6)
 #define	DTRACEAGG_QUANTIZE		(DTRACEACT_AGGREGATION + 7)
 #define	DTRACEAGG_LQUANTIZE		(DTRACEACT_AGGREGATION + 8)
 #define	DTRACEAGG_LLQUANTIZE		(DTRACEACT_AGGREGATION + 9)
 
 #define	DTRACEACT_ISAGG(x)		\
 	(DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION)
 
 #define	DTRACE_QUANTIZE_NBUCKETS	\
 	(((sizeof (uint64_t) * NBBY) - 1) * 2 + 1)
 
 #define	DTRACE_QUANTIZE_ZEROBUCKET	((sizeof (uint64_t) * NBBY) - 1)
 
 #define	DTRACE_QUANTIZE_BUCKETVAL(buck)					\
 	(int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ?			\
 	-(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) :		\
 	(buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 :			\
 	1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1))
 
 #define	DTRACE_LQUANTIZE_STEPSHIFT		48
 #define	DTRACE_LQUANTIZE_STEPMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LQUANTIZE_LEVELSHIFT		32
 #define	DTRACE_LQUANTIZE_LEVELMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LQUANTIZE_BASESHIFT		0
 #define	DTRACE_LQUANTIZE_BASEMASK		UINT32_MAX
 
 #define	DTRACE_LQUANTIZE_STEP(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \
 	DTRACE_LQUANTIZE_STEPSHIFT)
 
 #define	DTRACE_LQUANTIZE_LEVELS(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \
 	DTRACE_LQUANTIZE_LEVELSHIFT)
 
 #define	DTRACE_LQUANTIZE_BASE(x)		\
 	(int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \
 	DTRACE_LQUANTIZE_BASESHIFT)
 
 #define	DTRACE_LLQUANTIZE_FACTORSHIFT		48
 #define	DTRACE_LLQUANTIZE_FACTORMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LLQUANTIZE_LOWSHIFT		32
 #define	DTRACE_LLQUANTIZE_LOWMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LLQUANTIZE_HIGHSHIFT		16
 #define	DTRACE_LLQUANTIZE_HIGHMASK		((uint64_t)UINT16_MAX << 16)
 #define	DTRACE_LLQUANTIZE_NSTEPSHIFT		0
 #define	DTRACE_LLQUANTIZE_NSTEPMASK		UINT16_MAX
 
 #define	DTRACE_LLQUANTIZE_FACTOR(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \
 	DTRACE_LLQUANTIZE_FACTORSHIFT)
 
 #define	DTRACE_LLQUANTIZE_LOW(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \
 	DTRACE_LLQUANTIZE_LOWSHIFT)
 
 #define	DTRACE_LLQUANTIZE_HIGH(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \
 	DTRACE_LLQUANTIZE_HIGHSHIFT)
 
 #define	DTRACE_LLQUANTIZE_NSTEP(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \
 	DTRACE_LLQUANTIZE_NSTEPSHIFT)
 
 #define	DTRACE_USTACK_NFRAMES(x)	(uint32_t)((x) & UINT32_MAX)
 #define	DTRACE_USTACK_STRSIZE(x)	(uint32_t)((x) >> 32)
 #define	DTRACE_USTACK_ARG(x, y)		\
 	((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX))
 
 #ifndef _LP64
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DTRACE_PTR(type, name)	uint32_t name##pad; type *name
 #else
 #define	DTRACE_PTR(type, name)	type *name; uint32_t name##pad
 #endif
 #else
 #define	DTRACE_PTR(type, name)	type *name
 #endif
 
 /*
  * DTrace Object Format (DOF)
  *
  * DTrace programs can be persistently encoded in the DOF format so that they
  * may be embedded in other programs (for example, in an ELF file) or in the
  * dtrace driver configuration file for use in anonymous tracing.  The DOF
  * format is versioned and extensible so that it can be revised and so that
  * internal data structures can be modified or extended compatibly.  All DOF
  * structures use fixed-size types, so the 32-bit and 64-bit representations
  * are identical and consumers can use either data model transparently.
  *
  * The file layout is structured as follows:
  *
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |   dof_hdr_t   |  dof_sec_t[ ... ] |   loadable   | non-loadable  |
  * | (file header) | (section headers) | section data | section data  |
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |<------------ dof_hdr.dofh_loadsz --------------->|               |
  * |<------------ dof_hdr.dofh_filesz ------------------------------->|
  *
  * The file header stores meta-data including a magic number, data model for
  * the instrumentation, data encoding, and properties of the DIF code within.
  * The header describes its own size and the size of the section headers.  By
  * convention, an array of section headers follows the file header, and then
  * the data for all loadable sections and unloadable sections.  This permits
  * consumer code to easily download the headers and all loadable data into the
  * DTrace driver in one contiguous chunk, omitting other extraneous sections.
  *
  * The section headers describe the size, offset, alignment, and section type
  * for each section.  Sections are described using a set of #defines that tell
  * the consumer what kind of data is expected.  Sections can contain links to
  * other sections by storing a dof_secidx_t, an index into the section header
  * array, inside of the section data structures.  The section header includes
  * an entry size so that sections with data arrays can grow their structures.
  *
  * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which
  * are represented themselves as a collection of related DOF sections.  This
  * permits us to change the set of sections associated with a DIFO over time,
  * and also permits us to encode DIFOs that contain different sets of sections.
  * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a
  * section of type DOF_SECT_DIFOHDR.  This section's data is then an array of
  * dof_secidx_t's which in turn denote the sections associated with this DIFO.
  *
  * This loose coupling of the file structure (header and sections) to the
  * structure of the DTrace program itself (ECB descriptions, action
  * descriptions, and DIFOs) permits activities such as relocation processing
  * to occur in a single pass without having to understand D program structure.
  *
  * Finally, strings are always stored in ELF-style string tables along with a
  * string table section index and string table offset.  Therefore strings in
  * DOF are always arbitrary-length and not bound to the current implementation.
  */
 
 #define	DOF_ID_SIZE	16	/* total size of dofh_ident[] in bytes */
 
 typedef struct dof_hdr {
 	uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */
 	uint32_t dofh_flags;		/* file attribute flags (if any) */
 	uint32_t dofh_hdrsize;		/* size of file header in bytes */
 	uint32_t dofh_secsize;		/* size of section header in bytes */
 	uint32_t dofh_secnum;		/* number of section headers */
 	uint64_t dofh_secoff;		/* file offset of section headers */
 	uint64_t dofh_loadsz;		/* file size of loadable portion */
 	uint64_t dofh_filesz;		/* file size of entire DOF file */
 	uint64_t dofh_pad;		/* reserved for future use */
 } dof_hdr_t;
 
 #define	DOF_ID_MAG0	0	/* first byte of magic number */
 #define	DOF_ID_MAG1	1	/* second byte of magic number */
 #define	DOF_ID_MAG2	2	/* third byte of magic number */
 #define	DOF_ID_MAG3	3	/* fourth byte of magic number */
 #define	DOF_ID_MODEL	4	/* DOF data model (see below) */
 #define	DOF_ID_ENCODING	5	/* DOF data encoding (see below) */
 #define	DOF_ID_VERSION	6	/* DOF file format major version (see below) */
 #define	DOF_ID_DIFVERS	7	/* DIF instruction set version */
 #define	DOF_ID_DIFIREG	8	/* DIF integer registers used by compiler */
 #define	DOF_ID_DIFTREG	9	/* DIF tuple registers used by compiler */
 #define	DOF_ID_PAD	10	/* start of padding bytes (all zeroes) */
 
 #define	DOF_MAG_MAG0	0x7F	/* DOF_ID_MAG[0-3] */
 #define	DOF_MAG_MAG1	'D'
 #define	DOF_MAG_MAG2	'O'
 #define	DOF_MAG_MAG3	'F'
 
 #define	DOF_MAG_STRING	"\177DOF"
 #define	DOF_MAG_STRLEN	4
 
 #define	DOF_MODEL_NONE	0	/* DOF_ID_MODEL */
 #define	DOF_MODEL_ILP32	1
 #define	DOF_MODEL_LP64	2
 
 #ifdef _LP64
 #define	DOF_MODEL_NATIVE	DOF_MODEL_LP64
 #else
 #define	DOF_MODEL_NATIVE	DOF_MODEL_ILP32
 #endif
 
 #define	DOF_ENCODE_NONE	0	/* DOF_ID_ENCODING */
 #define	DOF_ENCODE_LSB	1
 #define	DOF_ENCODE_MSB	2
 
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_MSB
 #else
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_LSB
 #endif
 
 #define	DOF_VERSION_1	1	/* DOF version 1: Solaris 10 FCS */
 #define	DOF_VERSION_2	2	/* DOF version 2: Solaris Express 6/06 */
 #define	DOF_VERSION	DOF_VERSION_2	/* Latest DOF version */
 
 #define	DOF_FL_VALID	0	/* mask of all valid dofh_flags bits */
 
 typedef uint32_t dof_secidx_t;	/* section header table index type */
 typedef uint32_t dof_stridx_t;	/* string table index type */
 
 #define	DOF_SECIDX_NONE	(-1U)	/* null value for section indices */
 #define	DOF_STRIDX_NONE	(-1U)	/* null value for string indices */
 
 typedef struct dof_sec {
 	uint32_t dofs_type;	/* section type (see below) */
 	uint32_t dofs_align;	/* section data memory alignment */
 	uint32_t dofs_flags;	/* section flags (if any) */
 	uint32_t dofs_entsize;	/* size of section entry (if table) */
 	uint64_t dofs_offset;	/* offset of section data within file */
 	uint64_t dofs_size;	/* size of section data in bytes */
 } dof_sec_t;
 
 #define	DOF_SECT_NONE		0	/* null section */
 #define	DOF_SECT_COMMENTS	1	/* compiler comments */
 #define	DOF_SECT_SOURCE		2	/* D program source code */
 #define	DOF_SECT_ECBDESC	3	/* dof_ecbdesc_t */
 #define	DOF_SECT_PROBEDESC	4	/* dof_probedesc_t */
 #define	DOF_SECT_ACTDESC	5	/* dof_actdesc_t array */
 #define	DOF_SECT_DIFOHDR	6	/* dof_difohdr_t (variable length) */
 #define	DOF_SECT_DIF		7	/* uint32_t array of byte code */
 #define	DOF_SECT_STRTAB		8	/* string table */
 #define	DOF_SECT_VARTAB		9	/* dtrace_difv_t array */
 #define	DOF_SECT_RELTAB		10	/* dof_relodesc_t array */
 #define	DOF_SECT_TYPTAB		11	/* dtrace_diftype_t array */
 #define	DOF_SECT_URELHDR	12	/* dof_relohdr_t (user relocations) */
 #define	DOF_SECT_KRELHDR	13	/* dof_relohdr_t (kernel relocations) */
 #define	DOF_SECT_OPTDESC	14	/* dof_optdesc_t array */
 #define	DOF_SECT_PROVIDER	15	/* dof_provider_t */
 #define	DOF_SECT_PROBES		16	/* dof_probe_t array */
 #define	DOF_SECT_PRARGS		17	/* uint8_t array (probe arg mappings) */
 #define	DOF_SECT_PROFFS		18	/* uint32_t array (probe arg offsets) */
 #define	DOF_SECT_INTTAB		19	/* uint64_t array */
 #define	DOF_SECT_UTSNAME	20	/* struct utsname */
 #define	DOF_SECT_XLTAB		21	/* dof_xlref_t array */
 #define	DOF_SECT_XLMEMBERS	22	/* dof_xlmember_t array */
 #define	DOF_SECT_XLIMPORT	23	/* dof_xlator_t */
 #define	DOF_SECT_XLEXPORT	24	/* dof_xlator_t */
 #define	DOF_SECT_PREXPORT	25	/* dof_secidx_t array (exported objs) */
 #define	DOF_SECT_PRENOFFS	26	/* uint32_t array (enabled offsets) */
 
 #define	DOF_SECF_LOAD		1	/* section should be loaded */
 
 #define	DOF_SEC_ISLOADABLE(x)						\
 	(((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) ||	\
 	((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) ||	\
 	((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) ||		\
 	((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) ||		\
 	((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) ||	\
 	((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) ||	\
 	((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) ||	\
 	((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) ||		\
 	((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) ||		\
 	((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) ||	\
 	((x) == DOF_SECT_XLEXPORT) ||  ((x) == DOF_SECT_PREXPORT) || 	\
 	((x) == DOF_SECT_PRENOFFS))
 
 typedef struct dof_ecbdesc {
 	dof_secidx_t dofe_probes;	/* link to DOF_SECT_PROBEDESC */
 	dof_secidx_t dofe_pred;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofe_actions;	/* link to DOF_SECT_ACTDESC */
 	uint32_t dofe_pad;		/* reserved for future use */
 	uint64_t dofe_uarg;		/* user-supplied library argument */
 } dof_ecbdesc_t;
 
 typedef struct dof_probedesc {
 	dof_secidx_t dofp_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofp_provider;	/* provider string */
 	dof_stridx_t dofp_mod;		/* module string */
 	dof_stridx_t dofp_func;		/* function string */
 	dof_stridx_t dofp_name;		/* name string */
 	uint32_t dofp_id;		/* probe identifier (or zero) */
 } dof_probedesc_t;
 
 typedef struct dof_actdesc {
 	dof_secidx_t dofa_difo;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofa_strtab;	/* link to DOF_SECT_STRTAB section */
 	uint32_t dofa_kind;		/* action kind (DTRACEACT_* constant) */
 	uint32_t dofa_ntuple;		/* number of subsequent tuple actions */
 	uint64_t dofa_arg;		/* kind-specific argument */
 	uint64_t dofa_uarg;		/* user-supplied argument */
 } dof_actdesc_t;
 
 typedef struct dof_difohdr {
 	dtrace_diftype_t dofd_rtype;	/* return type for this fragment */
 	dof_secidx_t dofd_links[1];	/* variable length array of indices */
 } dof_difohdr_t;
 
 typedef struct dof_relohdr {
 	dof_secidx_t dofr_strtab;	/* link to DOF_SECT_STRTAB for names */
 	dof_secidx_t dofr_relsec;	/* link to DOF_SECT_RELTAB for relos */
 	dof_secidx_t dofr_tgtsec;	/* link to section we are relocating */
 } dof_relohdr_t;
 
 typedef struct dof_relodesc {
 	dof_stridx_t dofr_name;		/* string name of relocation symbol */
 	uint32_t dofr_type;		/* relo type (DOF_RELO_* constant) */
 	uint64_t dofr_offset;		/* byte offset for relocation */
 	uint64_t dofr_data;		/* additional type-specific data */
 } dof_relodesc_t;
 
 #define	DOF_RELO_NONE	0		/* empty relocation entry */
 #define	DOF_RELO_SETX	1		/* relocate setx value */
 #define	DOF_RELO_DOFREL	2		/* relocate DOF-relative value */
 
 typedef struct dof_optdesc {
 	uint32_t dofo_option;		/* option identifier */
 	dof_secidx_t dofo_strtab;	/* string table, if string option */
 	uint64_t dofo_value;		/* option value or string index */
 } dof_optdesc_t;
 
 typedef uint32_t dof_attr_t;		/* encoded stability attributes */
 
 #define	DOF_ATTR(n, d, c)	(((n) << 24) | ((d) << 16) | ((c) << 8))
 #define	DOF_ATTR_NAME(a)	(((a) >> 24) & 0xff)
 #define	DOF_ATTR_DATA(a)	(((a) >> 16) & 0xff)
 #define	DOF_ATTR_CLASS(a)	(((a) >>  8) & 0xff)
 
 typedef struct dof_provider {
 	dof_secidx_t dofpv_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_secidx_t dofpv_probes;	/* link to DOF_SECT_PROBES section */
 	dof_secidx_t dofpv_prargs;	/* link to DOF_SECT_PRARGS section */
 	dof_secidx_t dofpv_proffs;	/* link to DOF_SECT_PROFFS section */
 	dof_stridx_t dofpv_name;	/* provider name string */
 	dof_attr_t dofpv_provattr;	/* provider attributes */
 	dof_attr_t dofpv_modattr;	/* module attributes */
 	dof_attr_t dofpv_funcattr;	/* function attributes */
 	dof_attr_t dofpv_nameattr;	/* name attributes */
 	dof_attr_t dofpv_argsattr;	/* args attributes */
 	dof_secidx_t dofpv_prenoffs;	/* link to DOF_SECT_PRENOFFS section */
 } dof_provider_t;
 
 typedef struct dof_probe {
 	uint64_t dofpr_addr;		/* probe base address or offset */
 	dof_stridx_t dofpr_func;	/* probe function string */
 	dof_stridx_t dofpr_name;	/* probe name string */
 	dof_stridx_t dofpr_nargv;	/* native argument type strings */
 	dof_stridx_t dofpr_xargv;	/* translated argument type strings */
 	uint32_t dofpr_argidx;		/* index of first argument mapping */
 	uint32_t dofpr_offidx;		/* index of first offset entry */
 	uint8_t dofpr_nargc;		/* native argument count */
 	uint8_t dofpr_xargc;		/* translated argument count */
 	uint16_t dofpr_noffs;		/* number of offset entries for probe */
 	uint32_t dofpr_enoffidx;	/* index of first is-enabled offset */
 	uint16_t dofpr_nenoffs;		/* number of is-enabled offsets */
 	uint16_t dofpr_pad1;		/* reserved for future use */
 	uint32_t dofpr_pad2;		/* reserved for future use */
 } dof_probe_t;
 
 typedef struct dof_xlator {
 	dof_secidx_t dofxl_members;	/* link to DOF_SECT_XLMEMBERS section */
 	dof_secidx_t dofxl_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofxl_argv;	/* input parameter type strings */
 	uint32_t dofxl_argc;		/* input parameter list length */
 	dof_stridx_t dofxl_type;	/* output type string name */
 	dof_attr_t dofxl_attr;		/* output stability attributes */
 } dof_xlator_t;
 
 typedef struct dof_xlmember {
 	dof_secidx_t dofxm_difo;	/* member link to DOF_SECT_DIFOHDR */
 	dof_stridx_t dofxm_name;	/* member name */
 	dtrace_diftype_t dofxm_type;	/* member type */
 } dof_xlmember_t;
 
 typedef struct dof_xlref {
 	dof_secidx_t dofxr_xlator;	/* link to DOF_SECT_XLATORS section */
 	uint32_t dofxr_member;		/* index of referenced dof_xlmember */
 	uint32_t dofxr_argn;		/* index of argument for DIF_OP_XLARG */
 } dof_xlref_t;
 
 /*
  * DTrace Intermediate Format Object (DIFO)
  *
  * A DIFO is used to store the compiled DIF for a D expression, its return
  * type, and its string and variable tables.  The string table is a single
  * buffer of character data into which sets instructions and variable
  * references can reference strings using a byte offset.  The variable table
  * is an array of dtrace_difv_t structures that describe the name and type of
  * each variable and the id used in the DIF code.  This structure is described
  * above in the DIF section of this header file.  The DIFO is used at both
  * user-level (in the library) and in the kernel, but the structure is never
  * passed between the two: the DOF structures form the only interface.  As a
  * result, the definition can change depending on the presence of _KERNEL.
  */
 typedef struct dtrace_difo {
 	dif_instr_t *dtdo_buf;		/* instruction buffer */
 	uint64_t *dtdo_inttab;		/* integer table (optional) */
 	char *dtdo_strtab;		/* string table (optional) */
 	dtrace_difv_t *dtdo_vartab;	/* variable table (optional) */
 	uint_t dtdo_len;		/* length of instruction buffer */
 	uint_t dtdo_intlen;		/* length of integer table */
 	uint_t dtdo_strlen;		/* length of string table */
 	uint_t dtdo_varlen;		/* length of variable table */
 	dtrace_diftype_t dtdo_rtype;	/* return type */
 	uint_t dtdo_refcnt;		/* owner reference count */
 	uint_t dtdo_destructive;	/* invokes destructive subroutines */
 #ifndef _KERNEL
 	dof_relodesc_t *dtdo_kreltab;	/* kernel relocations */
 	dof_relodesc_t *dtdo_ureltab;	/* user relocations */
 	struct dt_node **dtdo_xlmtab;	/* translator references */
 	uint_t dtdo_krelen;		/* length of krelo table */
 	uint_t dtdo_urelen;		/* length of urelo table */
 	uint_t dtdo_xlmlen;		/* length of translator table */
 #endif
 } dtrace_difo_t;
 
 /*
  * DTrace Enabling Description Structures
  *
  * When DTrace is tracking the description of a DTrace enabling entity (probe,
  * predicate, action, ECB, record, etc.), it does so in a description
  * structure.  These structures all end in "desc", and are used at both
  * user-level and in the kernel -- but (with the exception of
  * dtrace_probedesc_t) they are never passed between them.  Typically,
  * user-level will use the description structures when assembling an enabling.
  * It will then distill those description structures into a DOF object (see
  * above), and send it into the kernel.  The kernel will again use the
  * description structures to create a description of the enabling as it reads
  * the DOF.  When the description is complete, the enabling will be actually
  * created -- turning it into the structures that represent the enabling
  * instead of merely describing it.  Not surprisingly, the description
  * structures bear a strong resemblance to the DOF structures that act as their
  * conduit.
  */
 struct dtrace_predicate;
 
 typedef struct dtrace_probedesc {
 	dtrace_id_t dtpd_id;			/* probe identifier */
 	char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */
 	char dtpd_mod[DTRACE_MODNAMELEN];	/* probe module name */
 	char dtpd_func[DTRACE_FUNCNAMELEN];	/* probe function name */
 	char dtpd_name[DTRACE_NAMELEN];		/* probe name */
 } dtrace_probedesc_t;
 
 typedef struct dtrace_repldesc {
 	dtrace_probedesc_t dtrpd_match;		/* probe descr. to match */
 	dtrace_probedesc_t dtrpd_create;	/* probe descr. to create */
 } dtrace_repldesc_t;
 
 typedef struct dtrace_preddesc {
 	dtrace_difo_t *dtpdd_difo;		/* pointer to DIF object */
 	struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */
 } dtrace_preddesc_t;
 
 typedef struct dtrace_actdesc {
 	dtrace_difo_t *dtad_difo;		/* pointer to DIF object */
 	struct dtrace_actdesc *dtad_next;	/* next action */
 	dtrace_actkind_t dtad_kind;		/* kind of action */
 	uint32_t dtad_ntuple;			/* number in tuple */
 	uint64_t dtad_arg;			/* action argument */
 	uint64_t dtad_uarg;			/* user argument */
 	int dtad_refcnt;			/* reference count */
 } dtrace_actdesc_t;
 
 typedef struct dtrace_ecbdesc {
 	dtrace_actdesc_t *dted_action;		/* action description(s) */
 	dtrace_preddesc_t dted_pred;		/* predicate description */
 	dtrace_probedesc_t dted_probe;		/* probe description */
 	uint64_t dted_uarg;			/* library argument */
 	int dted_refcnt;			/* reference count */
 } dtrace_ecbdesc_t;
 
 /*
  * DTrace Metadata Description Structures
  *
  * DTrace separates the trace data stream from the metadata stream.  The only
  * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID +
  * timestamp) or (in the case of aggregations) aggregation identifiers.  To
  * determine the structure of the data, DTrace consumers pass the token to the
  * kernel, and receive in return a corresponding description of the enabled
  * probe (via the dtrace_eprobedesc structure) or the aggregation (via the
  * dtrace_aggdesc structure).  Both of these structures are expressed in terms
  * of record descriptions (via the dtrace_recdesc structure) that describe the
  * exact structure of the data.  Some record descriptions may also contain a
  * format identifier; this additional bit of metadata can be retrieved from the
  * kernel, for which a format description is returned via the dtrace_fmtdesc
  * structure.  Note that all four of these structures must be bitness-neutral
  * to allow for a 32-bit DTrace consumer on a 64-bit kernel.
  */
 typedef struct dtrace_recdesc {
 	dtrace_actkind_t dtrd_action;		/* kind of action */
 	uint32_t dtrd_size;			/* size of record */
 	uint32_t dtrd_offset;			/* offset in ECB's data */
 	uint16_t dtrd_alignment;		/* required alignment */
 	uint16_t dtrd_format;			/* format, if any */
 	uint64_t dtrd_arg;			/* action argument */
 	uint64_t dtrd_uarg;			/* user argument */
 } dtrace_recdesc_t;
 
 typedef struct dtrace_eprobedesc {
 	dtrace_epid_t dtepd_epid;		/* enabled probe ID */
 	dtrace_id_t dtepd_probeid;		/* probe ID */
 	uint64_t dtepd_uarg;			/* library argument */
 	uint32_t dtepd_size;			/* total size */
 	int dtepd_nrecs;			/* number of records */
 	dtrace_recdesc_t dtepd_rec[1];		/* records themselves */
 } dtrace_eprobedesc_t;
 
 typedef struct dtrace_aggdesc {
 	DTRACE_PTR(char, dtagd_name);		/* not filled in by kernel */
 	dtrace_aggvarid_t dtagd_varid;		/* not filled in by kernel */
 	int dtagd_flags;			/* not filled in by kernel */
 	dtrace_aggid_t dtagd_id;		/* aggregation ID */
 	dtrace_epid_t dtagd_epid;		/* enabled probe ID */
 	uint32_t dtagd_size;			/* size in bytes */
 	int dtagd_nrecs;			/* number of records */
 	uint32_t dtagd_pad;			/* explicit padding */
 	dtrace_recdesc_t dtagd_rec[1];		/* record descriptions */
 } dtrace_aggdesc_t;
 
 typedef struct dtrace_fmtdesc {
 	DTRACE_PTR(char, dtfd_string);		/* format string */
 	int dtfd_length;			/* length of format string */
 	uint16_t dtfd_format;			/* format identifier */
 } dtrace_fmtdesc_t;
 
 #define	DTRACE_SIZEOF_EPROBEDESC(desc)				\
 	(sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ?	\
 	(((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 #define	DTRACE_SIZEOF_AGGDESC(desc)				\
 	(sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ?	\
 	(((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 /*
  * DTrace Option Interface
  *
  * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections
  * in a DOF image.  The dof_optdesc structure contains an option identifier and
  * an option value.  The valid option identifiers are found below; the mapping
  * between option identifiers and option identifying strings is maintained at
  * user-level.  Note that the value of DTRACEOPT_UNSET is such that all of the
  * following are potentially valid option values:  all positive integers, zero
  * and negative one.  Some options (notably "bufpolicy" and "bufresize") take
  * predefined tokens as their values; these are defined with
  * DTRACEOPT_{option}_{token}.
  */
 #define	DTRACEOPT_BUFSIZE	0	/* buffer size */
 #define	DTRACEOPT_BUFPOLICY	1	/* buffer policy */
 #define	DTRACEOPT_DYNVARSIZE	2	/* dynamic variable size */
 #define	DTRACEOPT_AGGSIZE	3	/* aggregation size */
 #define	DTRACEOPT_SPECSIZE	4	/* speculation size */
 #define	DTRACEOPT_NSPEC		5	/* number of speculations */
 #define	DTRACEOPT_STRSIZE	6	/* string size */
 #define	DTRACEOPT_CLEANRATE	7	/* dynvar cleaning rate */
 #define	DTRACEOPT_CPU		8	/* CPU to trace */
 #define	DTRACEOPT_BUFRESIZE	9	/* buffer resizing policy */
 #define	DTRACEOPT_GRABANON	10	/* grab anonymous state, if any */
 #define	DTRACEOPT_FLOWINDENT	11	/* indent function entry/return */
 #define	DTRACEOPT_QUIET		12	/* only output explicitly traced data */
 #define	DTRACEOPT_STACKFRAMES	13	/* number of stack frames */
 #define	DTRACEOPT_USTACKFRAMES	14	/* number of user stack frames */
 #define	DTRACEOPT_AGGRATE	15	/* aggregation snapshot rate */
 #define	DTRACEOPT_SWITCHRATE	16	/* buffer switching rate */
 #define	DTRACEOPT_STATUSRATE	17	/* status rate */
 #define	DTRACEOPT_DESTRUCTIVE	18	/* destructive actions allowed */
 #define	DTRACEOPT_STACKINDENT	19	/* output indent for stack traces */
 #define	DTRACEOPT_RAWBYTES	20	/* always print bytes in raw form */
 #define	DTRACEOPT_JSTACKFRAMES	21	/* number of jstack() frames */
 #define	DTRACEOPT_JSTACKSTRSIZE	22	/* size of jstack() string table */
 #define	DTRACEOPT_AGGSORTKEY	23	/* sort aggregations by key */
 #define	DTRACEOPT_AGGSORTREV	24	/* reverse-sort aggregations */
 #define	DTRACEOPT_AGGSORTPOS	25	/* agg. position to sort on */
 #define	DTRACEOPT_AGGSORTKEYPOS	26	/* agg. key position to sort on */
 #define	DTRACEOPT_TEMPORAL	27	/* temporally ordered output */
 #define	DTRACEOPT_AGGHIST	28	/* histogram aggregation output */
 #define	DTRACEOPT_AGGPACK	29	/* packed aggregation output */
 #define	DTRACEOPT_AGGZOOM	30	/* zoomed aggregation scaling */
 #define	DTRACEOPT_ZONE		31	/* zone in which to enable probes */
 #define	DTRACEOPT_MAX		32	/* number of options */
 
 #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
 
 #define	DTRACEOPT_BUFPOLICY_RING	0	/* ring buffer */
 #define	DTRACEOPT_BUFPOLICY_FILL	1	/* fill buffer, then stop */
 #define	DTRACEOPT_BUFPOLICY_SWITCH	2	/* switch buffers */
 
 #define	DTRACEOPT_BUFRESIZE_AUTO	0	/* automatic resizing */
 #define	DTRACEOPT_BUFRESIZE_MANUAL	1	/* manual resizing */
 
 /*
  * DTrace Buffer Interface
  *
  * In order to get a snapshot of the principal or aggregation buffer,
  * user-level passes a buffer description to the kernel with the dtrace_bufdesc
  * structure.  This describes which CPU user-level is interested in, and
  * where user-level wishes the kernel to snapshot the buffer to (the
  * dtbd_data field).  The kernel uses the same structure to pass back some
  * information regarding the buffer:  the size of data actually copied out, the
  * number of drops, the number of errors, the offset of the oldest record,
  * and the time of the snapshot.
  *
  * If the buffer policy is a "switch" policy, taking a snapshot of the
  * principal buffer has the additional effect of switching the active and
  * inactive buffers.  Taking a snapshot of the aggregation buffer _always_ has
  * the additional effect of switching the active and inactive buffers.
  */
 typedef struct dtrace_bufdesc {
 	uint64_t dtbd_size;			/* size of buffer */
 	uint32_t dtbd_cpu;			/* CPU or DTRACE_CPUALL */
 	uint32_t dtbd_errors;			/* number of errors */
 	uint64_t dtbd_drops;			/* number of drops */
 	DTRACE_PTR(char, dtbd_data);		/* data */
 	uint64_t dtbd_oldest;			/* offset of oldest record */
 	uint64_t dtbd_timestamp;		/* hrtime of snapshot */
 } dtrace_bufdesc_t;
 
 /*
  * Each record in the buffer (dtbd_data) begins with a header that includes
  * the epid and a timestamp.  The timestamp is split into two 4-byte parts
  * so that we do not require 8-byte alignment.
  */
 typedef struct dtrace_rechdr {
 	dtrace_epid_t dtrh_epid;		/* enabled probe id */
 	uint32_t dtrh_timestamp_hi;		/* high bits of hrtime_t */
 	uint32_t dtrh_timestamp_lo;		/* low bits of hrtime_t */
 } dtrace_rechdr_t;
 
 #define	DTRACE_RECORD_LOAD_TIMESTAMP(dtrh)			\
 	((dtrh)->dtrh_timestamp_lo +				\
 	((uint64_t)(dtrh)->dtrh_timestamp_hi << 32))
 
 #define	DTRACE_RECORD_STORE_TIMESTAMP(dtrh, hrtime) {		\
 	(dtrh)->dtrh_timestamp_lo = (uint32_t)hrtime;		\
 	(dtrh)->dtrh_timestamp_hi = hrtime >> 32;		\
 }
 
 /*
  * DTrace Status
  *
  * The status of DTrace is relayed via the dtrace_status structure.  This
  * structure contains members to count drops other than the capacity drops
  * available via the buffer interface (see above).  This consists of dynamic
  * drops (including capacity dynamic drops, rinsing drops and dirty drops), and
  * speculative drops (including capacity speculative drops, drops due to busy
  * speculative buffers and drops due to unavailable speculative buffers).
  * Additionally, the status structure contains a field to indicate the number
  * of "fill"-policy buffers have been filled and a boolean field to indicate
  * that exit() has been called.  If the dtst_exiting field is non-zero, no
  * further data will be generated until tracing is stopped (at which time any
  * enablings of the END action will be processed); if user-level sees that
  * this field is non-zero, tracing should be stopped as soon as possible.
  */
 typedef struct dtrace_status {
 	uint64_t dtst_dyndrops;			/* dynamic drops */
 	uint64_t dtst_dyndrops_rinsing;		/* dyn drops due to rinsing */
 	uint64_t dtst_dyndrops_dirty;		/* dyn drops due to dirty */
 	uint64_t dtst_specdrops;		/* speculative drops */
 	uint64_t dtst_specdrops_busy;		/* spec drops due to busy */
 	uint64_t dtst_specdrops_unavail;	/* spec drops due to unavail */
 	uint64_t dtst_errors;			/* total errors */
 	uint64_t dtst_filled;			/* number of filled bufs */
 	uint64_t dtst_stkstroverflows;		/* stack string tab overflows */
 	uint64_t dtst_dblerrors;		/* errors in ERROR probes */
 	char dtst_killed;			/* non-zero if killed */
 	char dtst_exiting;			/* non-zero if exit() called */
 	char dtst_pad[6];			/* pad out to 64-bit align */
 } dtrace_status_t;
 
 /*
  * DTrace Configuration
  *
  * User-level may need to understand some elements of the kernel DTrace
  * configuration in order to generate correct DIF.  This information is
  * conveyed via the dtrace_conf structure.
  */
 typedef struct dtrace_conf {
 	uint_t dtc_difversion;			/* supported DIF version */
 	uint_t dtc_difintregs;			/* # of DIF integer registers */
 	uint_t dtc_diftupregs;			/* # of DIF tuple registers */
 	uint_t dtc_ctfmodel;			/* CTF data model */
 	uint_t dtc_pad[8];			/* reserved for future use */
 } dtrace_conf_t;
 
 /*
  * DTrace Faults
  *
  * The constants below DTRACEFLT_LIBRARY indicate probe processing faults;
  * constants at or above DTRACEFLT_LIBRARY indicate faults in probe
  * postprocessing at user-level.  Probe processing faults induce an ERROR
  * probe and are replicated in unistd.d to allow users' ERROR probes to decode
  * the error condition using thse symbolic labels.
  */
 #define	DTRACEFLT_UNKNOWN		0	/* Unknown fault */
 #define	DTRACEFLT_BADADDR		1	/* Bad address */
 #define	DTRACEFLT_BADALIGN		2	/* Bad alignment */
 #define	DTRACEFLT_ILLOP			3	/* Illegal operation */
 #define	DTRACEFLT_DIVZERO		4	/* Divide-by-zero */
 #define	DTRACEFLT_NOSCRATCH		5	/* Out of scratch space */
 #define	DTRACEFLT_KPRIV			6	/* Illegal kernel access */
 #define	DTRACEFLT_UPRIV			7	/* Illegal user access */
 #define	DTRACEFLT_TUPOFLOW		8	/* Tuple stack overflow */
 #define	DTRACEFLT_BADSTACK		9	/* Bad stack */
 
 #define	DTRACEFLT_LIBRARY		1000	/* Library-level fault */
 
 /*
  * DTrace Argument Types
  *
  * Because it would waste both space and time, argument types do not reside
  * with the probe.  In order to determine argument types for args[X]
  * variables, the D compiler queries for argument types on a probe-by-probe
  * basis.  (This optimizes for the common case that arguments are either not
  * used or used in an untyped fashion.)  Typed arguments are specified with a
  * string of the type name in the dtragd_native member of the argument
  * description structure.  Typed arguments may be further translated to types
  * of greater stability; the provider indicates such a translated argument by
  * filling in the dtargd_xlate member with the string of the translated type.
  * Finally, the provider may indicate which argument value a given argument
  * maps to by setting the dtargd_mapping member -- allowing a single argument
  * to map to multiple args[X] variables.
  */
 typedef struct dtrace_argdesc {
 	dtrace_id_t dtargd_id;			/* probe identifier */
 	int dtargd_ndx;				/* arg number (-1 iff none) */
 	int dtargd_mapping;			/* value mapping */
 	char dtargd_native[DTRACE_ARGTYPELEN];	/* native type name */
 	char dtargd_xlate[DTRACE_ARGTYPELEN];	/* translated type name */
 } dtrace_argdesc_t;
 
 /*
  * DTrace Stability Attributes
  *
  * Each DTrace provider advertises the name and data stability of each of its
  * probe description components, as well as its architectural dependencies.
  * The D compiler can query the provider attributes (dtrace_pattr_t below) in
  * order to compute the properties of an input program and report them.
  */
 typedef uint8_t dtrace_stability_t;	/* stability code (see attributes(5)) */
 typedef uint8_t dtrace_class_t;		/* architectural dependency class */
 
 #define	DTRACE_STABILITY_INTERNAL	0	/* private to DTrace itself */
 #define	DTRACE_STABILITY_PRIVATE	1	/* private to Sun (see docs) */
 #define	DTRACE_STABILITY_OBSOLETE	2	/* scheduled for removal */
 #define	DTRACE_STABILITY_EXTERNAL	3	/* not controlled by Sun */
 #define	DTRACE_STABILITY_UNSTABLE	4	/* new or rapidly changing */
 #define	DTRACE_STABILITY_EVOLVING	5	/* less rapidly changing */
 #define	DTRACE_STABILITY_STABLE		6	/* mature interface from Sun */
 #define	DTRACE_STABILITY_STANDARD	7	/* industry standard */
 #define	DTRACE_STABILITY_MAX		7	/* maximum valid stability */
 
 #define	DTRACE_CLASS_UNKNOWN	0	/* unknown architectural dependency */
 #define	DTRACE_CLASS_CPU	1	/* CPU-module-specific */
 #define	DTRACE_CLASS_PLATFORM	2	/* platform-specific (uname -i) */
 #define	DTRACE_CLASS_GROUP	3	/* hardware-group-specific (uname -m) */
 #define	DTRACE_CLASS_ISA	4	/* ISA-specific (uname -p) */
 #define	DTRACE_CLASS_COMMON	5	/* common to all systems */
 #define	DTRACE_CLASS_MAX	5	/* maximum valid class */
 
 #define	DTRACE_PRIV_NONE	0x0000
 #define	DTRACE_PRIV_KERNEL	0x0001
 #define	DTRACE_PRIV_USER	0x0002
 #define	DTRACE_PRIV_PROC	0x0004
 #define	DTRACE_PRIV_OWNER	0x0008
 #define	DTRACE_PRIV_ZONEOWNER	0x0010
 
 #define	DTRACE_PRIV_ALL	\
 	(DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \
 	DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER)
 
 typedef struct dtrace_ppriv {
 	uint32_t dtpp_flags;			/* privilege flags */
 	uid_t dtpp_uid;				/* user ID */
 	zoneid_t dtpp_zoneid;			/* zone ID */
 } dtrace_ppriv_t;
 
 typedef struct dtrace_attribute {
 	dtrace_stability_t dtat_name;		/* entity name stability */
 	dtrace_stability_t dtat_data;		/* entity data stability */
 	dtrace_class_t dtat_class;		/* entity data dependency */
 } dtrace_attribute_t;
 
 typedef struct dtrace_pattr {
 	dtrace_attribute_t dtpa_provider;	/* provider attributes */
 	dtrace_attribute_t dtpa_mod;		/* module attributes */
 	dtrace_attribute_t dtpa_func;		/* function attributes */
 	dtrace_attribute_t dtpa_name;		/* name attributes */
 	dtrace_attribute_t dtpa_args;		/* args[] attributes */
 } dtrace_pattr_t;
 
 typedef struct dtrace_providerdesc {
 	char dtvd_name[DTRACE_PROVNAMELEN];	/* provider name */
 	dtrace_pattr_t dtvd_attr;		/* stability attributes */
 	dtrace_ppriv_t dtvd_priv;		/* privileges required */
 } dtrace_providerdesc_t;
 
 /*
  * DTrace Pseudodevice Interface
  *
  * DTrace is controlled through ioctl(2)'s to the in-kernel dtrace:dtrace
  * pseudodevice driver.  These ioctls comprise the user-kernel interface to
  * DTrace.
  */
 #ifdef illumos
 #define	DTRACEIOC		(('d' << 24) | ('t' << 16) | ('r' << 8))
 #define	DTRACEIOC_PROVIDER	(DTRACEIOC | 1)		/* provider query */
 #define	DTRACEIOC_PROBES	(DTRACEIOC | 2)		/* probe query */
 #define	DTRACEIOC_BUFSNAP	(DTRACEIOC | 4)		/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	(DTRACEIOC | 5)		/* match probes */
 #define	DTRACEIOC_ENABLE	(DTRACEIOC | 6)		/* enable probes */
 #define	DTRACEIOC_AGGSNAP	(DTRACEIOC | 7)		/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	(DTRACEIOC | 8)		/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	(DTRACEIOC | 9)		/* get probe arg */
 #define	DTRACEIOC_CONF		(DTRACEIOC | 10)	/* get config. */
 #define	DTRACEIOC_STATUS	(DTRACEIOC | 11)	/* get status */
 #define	DTRACEIOC_GO		(DTRACEIOC | 12)	/* start tracing */
 #define	DTRACEIOC_STOP		(DTRACEIOC | 13)	/* stop tracing */
 #define	DTRACEIOC_AGGDESC	(DTRACEIOC | 15)	/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	(DTRACEIOC | 16)	/* get format str */
 #define	DTRACEIOC_DOFGET	(DTRACEIOC | 17)	/* get DOF */
 #define	DTRACEIOC_REPLICATE	(DTRACEIOC | 18)	/* replicate enab */
 #else
 #define	DTRACEIOC_PROVIDER	_IOWR('x',1,dtrace_providerdesc_t)
 							/* provider query */
 #define	DTRACEIOC_PROBES	_IOWR('x',2,dtrace_probedesc_t)
 							/* probe query */
 #define	DTRACEIOC_BUFSNAP	_IOW('x',4,dtrace_bufdesc_t *)	
 							/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	_IOWR('x',5,dtrace_probedesc_t)
 							/* match probes */
 typedef struct {
 	void	*dof;		/* DOF userland address written to driver. */
 	int	n_matched;	/* # matches returned by driver. */
 } dtrace_enable_io_t;
 #define	DTRACEIOC_ENABLE	_IOWR('x',6,dtrace_enable_io_t)
 							/* enable probes */
 #define	DTRACEIOC_AGGSNAP	_IOW('x',7,dtrace_bufdesc_t *)
 							/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	_IOW('x',8,dtrace_eprobedesc_t)
 							/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	_IOWR('x',9,dtrace_argdesc_t)
 							/* get probe arg */
 #define	DTRACEIOC_CONF		_IOR('x',10,dtrace_conf_t)
 							/* get config. */
 #define	DTRACEIOC_STATUS	_IOR('x',11,dtrace_status_t)
 							/* get status */
 #define	DTRACEIOC_GO		_IOR('x',12,processorid_t)
 							/* start tracing */
 #define	DTRACEIOC_STOP		_IOWR('x',13,processorid_t)
 							/* stop tracing */
 #define	DTRACEIOC_AGGDESC	_IOW('x',15,dtrace_aggdesc_t *)	
 							/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	_IOWR('x',16,dtrace_fmtdesc_t)	
 							/* get format str */
 #define	DTRACEIOC_DOFGET	_IOW('x',17,dof_hdr_t *)
 							/* get DOF */
 #define	DTRACEIOC_REPLICATE	_IOW('x',18,dtrace_repldesc_t)	
 							/* replicate enab */
 #endif
 
 /*
  * DTrace Helpers
  *
  * In general, DTrace establishes probes in processes and takes actions on
  * processes without knowing their specific user-level structures.  Instead of
  * existing in the framework, process-specific knowledge is contained by the
  * enabling D program -- which can apply process-specific knowledge by making
  * appropriate use of DTrace primitives like copyin() and copyinstr() to
  * operate on user-level data.  However, there may exist some specific probes
  * of particular semantic relevance that the application developer may wish to
  * explicitly export.  For example, an application may wish to export a probe
  * at the point that it begins and ends certain well-defined transactions.  In
  * addition to providing probes, programs may wish to offer assistance for
  * certain actions.  For example, in highly dynamic environments (e.g., Java),
  * it may be difficult to obtain a stack trace in terms of meaningful symbol
  * names (the translation from instruction addresses to corresponding symbol
  * names may only be possible in situ); these environments may wish to define
  * a series of actions to be applied in situ to obtain a meaningful stack
  * trace.
  *
  * These two mechanisms -- user-level statically defined tracing and assisting
  * DTrace actions -- are provided via DTrace _helpers_.  Helpers are specified
  * via DOF, but unlike enabling DOF, helper DOF may contain definitions of
  * providers, probes and their arguments.  If a helper wishes to provide
  * action assistance, probe descriptions and corresponding DIF actions may be
  * specified in the helper DOF.  For such helper actions, however, the probe
  * description describes the specific helper:  all DTrace helpers have the
  * provider name "dtrace" and the module name "helper", and the name of the
  * helper is contained in the function name (for example, the ustack() helper
  * is named "ustack").  Any helper-specific name may be contained in the name
  * (for example, if a helper were to have a constructor, it might be named
  * "dtrace:helper:<helper>:init").  Helper actions are only called when the
  * action that they are helping is taken.  Helper actions may only return DIF
  * expressions, and may only call the following subroutines:
  *
  *    alloca()      <= Allocates memory out of the consumer's scratch space
  *    bcopy()       <= Copies memory to scratch space
  *    copyin()      <= Copies memory from user-level into consumer's scratch
  *    copyinto()    <= Copies memory into a specific location in scratch
  *    copyinstr()   <= Copies a string into a specific location in scratch
  *
  * Helper actions may only access the following built-in variables:
  *
  *    curthread     <= Current kthread_t pointer
  *    tid           <= Current thread identifier
  *    pid           <= Current process identifier
  *    ppid          <= Parent process identifier
  *    uid           <= Current user ID
  *    gid           <= Current group ID
  *    execname      <= Current executable name
  *    zonename      <= Current zone name
  *
  * Helper actions may not manipulate or allocate dynamic variables, but they
  * may have clause-local and statically-allocated global variables.  The
  * helper action variable state is specific to the helper action -- variables
  * used by the helper action may not be accessed outside of the helper
  * action, and the helper action may not access variables that like outside
  * of it.  Helper actions may not load from kernel memory at-large; they are
  * restricting to loading current user state (via copyin() and variants) and
  * scratch space.  As with probe enablings, helper actions are executed in
  * program order.  The result of the helper action is the result of the last
  * executing helper expression.
  *
  * Helpers -- composed of either providers/probes or probes/actions (or both)
  * -- are added by opening the "helper" minor node, and issuing an ioctl(2)
  * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This
  * encapsulates the name and base address of the user-level library or
  * executable publishing the helpers and probes as well as the DOF that
  * contains the definitions of those helpers and probes.
  *
  * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy
  * helpers and should no longer be used.  No other ioctls are valid on the
  * helper minor node.
  */
 #ifdef illumos
 #define	DTRACEHIOC		(('d' << 24) | ('t' << 16) | ('h' << 8))
 #define	DTRACEHIOC_ADD		(DTRACEHIOC | 1)	/* add helper */
 #define	DTRACEHIOC_REMOVE	(DTRACEHIOC | 2)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	(DTRACEHIOC | 3)	/* add helper DOF */
 #else
 #define	DTRACEHIOC_REMOVE	_IOW('z', 2, int)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	_IOWR('z', 3, dof_helper_t)/* add helper DOF */
 #endif
 
 typedef struct dof_helper {
 	char dofhp_mod[DTRACE_MODNAMELEN];	/* executable or library name */
 	uint64_t dofhp_addr;			/* base address of object */
 	uint64_t dofhp_dof;			/* address of helper DOF */
 #ifdef __FreeBSD__
 	pid_t dofhp_pid;			/* target process ID */
 	int dofhp_gen;
 #endif
 } dof_helper_t;
 
 #define	DTRACEMNR_DTRACE	"dtrace"	/* node for DTrace ops */
 #define	DTRACEMNR_HELPER	"helper"	/* node for helpers */
 #define	DTRACEMNRN_DTRACE	0		/* minor for DTrace ops */
 #define	DTRACEMNRN_HELPER	1		/* minor for helpers */
 #define	DTRACEMNRN_CLONE	2		/* first clone minor */
 
 #ifdef _KERNEL
 
 /*
  * DTrace Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement separate in-kernel DTrace providers.  Common functions
  * are provided in uts/common/os/dtrace.c.  ISA-dependent subroutines are
  * defined in uts/<isa>/dtrace/dtrace_asm.s or uts/<isa>/dtrace/dtrace_isa.c.
  *
  * The provider API has two halves:  the API that the providers consume from
  * DTrace, and the API that providers make available to DTrace.
  *
  * 1 Framework-to-Provider API
  *
  * 1.1  Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_pops structure
  * that the provider passes to the framework when registering itself.  This
  * structure consists of the following members:
  *
  *   dtps_provide()          <-- Provide all probes, all modules
  *   dtps_provide_module()   <-- Provide all probes in specified module
  *   dtps_enable()           <-- Enable specified probe
  *   dtps_disable()          <-- Disable specified probe
  *   dtps_suspend()          <-- Suspend specified probe
  *   dtps_resume()           <-- Resume specified probe
  *   dtps_getargdesc()       <-- Get the argument description for args[X]
  *   dtps_getargval()        <-- Get the value for an argX or args[X] variable
  *   dtps_usermode()         <-- Find out if the probe was fired in user mode
  *   dtps_destroy()          <-- Destroy all state associated with this probe
  *
  * 1.2  void dtps_provide(void *arg, const dtrace_probedesc_t *spec)
  *
  * 1.2.1  Overview
  *
  *   Called to indicate that the provider should provide all probes.  If the
  *   specified description is non-NULL, dtps_provide() is being called because
  *   no probe matched a specified probe -- if the provider has the ability to
  *   create custom probes, it may wish to create a probe that matches the
  *   specified description.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a probe description that the provider may
  *   wish to consider when creating custom probes.  The provider is expected to
  *   call back into the DTrace framework via dtrace_probe_create() to create
  *   any necessary probes.  dtps_provide() may be called even if the provider
  *   has made available all probes; the provider should check the return value
  *   of dtrace_probe_create() to handle this case.  Note that the provider need
  *   not implement both dtps_provide() and dtps_provide_module(); see
  *   "Arguments and Notes" for dtrace_register(), below.
  *
  * 1.2.3  Return value
  *
  *   None.
  *
  * 1.2.4  Caller's context
  *
  *   dtps_provide() is typically called from open() or ioctl() context, but may
  *   be called from other contexts as well.  The DTrace framework is locked in
  *   such a way that providers may not register or unregister.  This means that
  *   the provider may not call any DTrace API that affects its registration with
  *   the framework, including dtrace_register(), dtrace_unregister(),
  *   dtrace_invalidate(), and dtrace_condense().  However, the context is such
  *   that the provider may (and indeed, is expected to) call probe-related
  *   DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(),
  *   and dtrace_probe_arg().
  *
  * 1.3  void dtps_provide_module(void *arg, modctl_t *mp)
  *
  * 1.3.1  Overview
  *
  *   Called to indicate that the provider should provide all probes in the
  *   specified module.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a modctl structure that indicates the
  *   module for which probes should be created.
  *
  * 1.3.3  Return value
  *
  *   None.
  *
  * 1.3.4  Caller's context
  *
  *   dtps_provide_module() may be called from open() or ioctl() context, but
  *   may also be called from a module loading context.  mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().  However, the context is such that the provider may (and
  *   indeed, is expected to) call probe-related DTrace routines, including
  *   dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg().  Note
  *   that the provider need not implement both dtps_provide() and
  *   dtps_provide_module(); see "Arguments and Notes" for dtrace_register(),
  *   below.
  *
  * 1.4  void dtps_enable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.4.1  Overview
  *
  *   Called to enable the specified probe.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be enabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_enable() will be called when a probe transitions from not being
  *   enabled at all to having one or more ECB.  The number of ECBs associated
  *   with the probe may change without subsequent calls into the provider.
  *   When the number of ECBs drops to zero, the provider will be explicitly
  *   told to disable the probe via dtps_disable().  dtrace_probe() should never
  *   be called for a probe identifier that hasn't been explicitly enabled via
  *   dtps_enable().
  *
  * 1.4.3  Return value
  *
  *   None.
  *
  * 1.4.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.5  void dtps_disable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.5.1  Overview
  *
  *   Called to disable the specified probe.
  *
  * 1.5.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be disabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_disable() will be called when a probe transitions from being enabled
  *   to having zero ECBs.  dtrace_probe() should never be called for a probe
  *   identifier that has been explicitly enabled via dtps_disable().
  *
  * 1.5.3  Return value
  *
  *   None.
  *
  * 1.5.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.6  void dtps_suspend(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.6.1  Overview
  *
  *   Called to suspend the specified enabled probe.  This entry point is for
  *   providers that may need to suspend some or all of their probes when CPUs
  *   are being powered on or when the boot monitor is being entered for a
  *   prolonged period of time.
  *
  * 1.6.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be suspended.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_suspend will only be called on an enabled probe.  Providers that
  *   provide a dtps_suspend entry point will want to take roughly the action
  *   that it takes for dtps_disable.
  *
  * 1.6.3  Return value
  *
  *   None.
  *
  * 1.6.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_suspend().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.7  void dtps_resume(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.7.1  Overview
  *
  *   Called to resume the specified enabled probe.  This entry point is for
  *   providers that may need to resume some or all of their probes after the
  *   completion of an event that induced a call to dtps_suspend().
  *
  * 1.7.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be resumed.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_resume will only be called on an enabled probe.  Providers that
  *   provide a dtps_resume entry point will want to take roughly the action
  *   that it takes for dtps_enable.
  *
  * 1.7.3  Return value
  *
  *   None.
  *
  * 1.7.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_resume().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.8  void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg,
  *           dtrace_argdesc_t *desc)
  *
  * 1.8.1  Overview
  *
  *   Called to retrieve the argument description for an args[X] variable.
  *
  * 1.8.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is a pointer to the argument description.  This
  *   description is both an input and output parameter:  it contains the
  *   index of the desired argument in the dtargd_ndx field, and expects
  *   the other fields to be filled in upon return.  If there is no argument
  *   corresponding to the specified index, the dtargd_ndx field should be set
  *   to DTRACE_ARGNONE.
  *
  * 1.8.3  Return value
  *
  *   None.  The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping
  *   members of the dtrace_argdesc_t structure are all output values.
  *
  * 1.8.4  Caller's context
  *
  *   dtps_getargdesc() is called from ioctl() context. mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().
  *
  * 1.9  uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg,
  *               int argno, int aframes)
  *
  * 1.9.1  Overview
  *
  *   Called to retrieve a value for an argX or args[X] variable.
  *
  * 1.9.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is the number of the argument (the X in the example in
  *   1.9.1). The fifth argument is the number of stack frames that were used
  *   to get from the actual place in the code that fired the probe to
  *   dtrace_probe() itself, the so-called artificial frames. This argument may
  *   be used to descend an appropriate number of frames to find the correct
  *   values. If this entry point is left NULL, the dtrace_getarg() built-in
  *   function is used.
  *
  * 1.9.3  Return value
  *
  *   The value of the argument.
  *
  * 1.9.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.10  int dtps_usermode(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.10.1  Overview
  *
  *   Called to determine if the probe was fired in a user context.
  *
  * 1.10.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create().  This
  *   entry point must not be left NULL for providers whose probes allow for
  *   mixed mode tracing, that is to say those probes that can fire during
  *   kernel- _or_ user-mode execution
  *
  * 1.10.3  Return value
  *
  *   A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
  *   or DTRACE_MODE_USER) and the policy when the privilege of the enabling
  *   is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP,
  *   DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT).  If
  *   DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result
  *   in the probe firing being silently ignored for the enabling; if the
  *   DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not
  *   prevent probe processing for the enabling, but restrictions will be in
  *   place that induce a UPRIV fault upon attempt to examine probe arguments
  *   or current process state.  If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit
  *   is set, similar restrictions will be placed upon operation if the
  *   privilege is sufficient to process the enabling, but does not otherwise
  *   entitle the enabling to all zones.  The DTRACE_MODE_NOPRIV_DROP and
  *   DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these
  *   two policies must be specified), but either may be combined (or not)
  *   with DTRACE_MODE_LIMITEDPRIV_RESTRICT.
  *
  * 1.10.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.11.1 Overview
  *
  *   Called to destroy the specified probe.
  *
  * 1.11.2 Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be destroyed.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().  The
  *   provider should free all state associated with the probe.  The framework
  *   guarantees that dtps_destroy() is only called for probes that have either
  *   been disabled via dtps_disable() or were never enabled via dtps_enable().
  *   Once dtps_disable() has been called for a probe, no further call will be
  *   made specifying the probe.
  *
  * 1.11.3 Return value
  *
  *   None.
  *
  * 1.11.4 Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  mod_lock is held.  cpu_lock is not held, and may not be
  *   acquired.
  *
  *
  * 2 Provider-to-Framework API
  *
  * 2.1  Overview
  *
  * The Provider-to-Framework API provides the mechanism for the provider to
  * register itself with the DTrace framework, to create probes, to lookup
  * probes and (most importantly) to fire probes.  The Provider-to-Framework
  * consists of:
  *
  *   dtrace_register()       <-- Register a provider with the DTrace framework
  *   dtrace_unregister()     <-- Remove a provider's DTrace registration
  *   dtrace_invalidate()     <-- Invalidate the specified provider
  *   dtrace_condense()       <-- Remove a provider's unenabled probes
  *   dtrace_attached()       <-- Indicates whether or not DTrace has attached
  *   dtrace_probe_create()   <-- Create a DTrace probe
  *   dtrace_probe_lookup()   <-- Lookup a DTrace probe based on its name
  *   dtrace_probe_arg()      <-- Return the probe argument for a specific probe
  *   dtrace_probe()          <-- Fire the specified probe
  *
  * 2.2  int dtrace_register(const char *name, const dtrace_pattr_t *pap,
  *          uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg,
  *          dtrace_provider_id_t *idp)
  *
  * 2.2.1  Overview
  *
  *   dtrace_register() registers the calling provider with the DTrace
  *   framework.  It should generally be called by DTrace providers in their
  *   attach(9E) entry point.
  *
  * 2.2.2  Arguments and Notes
  *
  *   The first argument is the name of the provider.  The second argument is a
  *   pointer to the stability attributes for the provider.  The third argument
  *   is the privilege flags for the provider, and must be some combination of:
  *
  *     DTRACE_PRIV_NONE     <= All users may enable probes from this provider
  *
  *     DTRACE_PRIV_PROC     <= Any user with privilege of PRIV_DTRACE_PROC may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_USER     <= Any user with privilege of PRIV_DTRACE_USER may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_KERNEL   <= Any user with privilege of PRIV_DTRACE_KERNEL
  *                             may enable probes from this provider
  *
  *     DTRACE_PRIV_OWNER    <= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a user ID matching the user
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_OWNER privilege.
  *
  *     DTRACE_PRIV_ZONEOWNER<= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a zone ID matching the zone
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_ZONE privilege.
  *
  *   Note that these flags designate the _visibility_ of the probes, not
  *   the conditions under which they may or may not fire.
  *
  *   The fourth argument is the credential that is associated with the
  *   provider.  This argument should be NULL if the privilege flags don't
  *   include DTRACE_PRIV_OWNER or DTRACE_PRIV_ZONEOWNER.  If non-NULL, the
  *   framework stashes the uid and zoneid represented by this credential
  *   for use at probe-time, in implicit predicates.  These limit visibility
  *   of the probes to users and/or zones which have sufficient privilege to
  *   access them.
  *
  *   The fifth argument is a DTrace provider operations vector, which provides
  *   the implementation for the Framework-to-Provider API.  (See Section 1,
  *   above.)  This must be non-NULL, and each member must be non-NULL.  The
  *   exceptions to this are (1) the dtps_provide() and dtps_provide_module()
  *   members (if the provider so desires, _one_ of these members may be left
  *   NULL -- denoting that the provider only implements the other) and (2)
  *   the dtps_suspend() and dtps_resume() members, which must either both be
  *   NULL or both be non-NULL.
  *
  *   The sixth argument is a cookie to be specified as the first argument for
  *   each function in the Framework-to-Provider API.  This argument may have
  *   any value.
  *
  *   The final argument is a pointer to dtrace_provider_id_t.  If
  *   dtrace_register() successfully completes, the provider identifier will be
  *   stored in the memory pointed to be this argument.  This argument must be
  *   non-NULL.
  *
  * 2.2.3  Return value
  *
  *   On success, dtrace_register() returns 0 and stores the new provider's
  *   identifier into the memory pointed to by the idp argument.  On failure,
  *   dtrace_register() returns an errno:
  *
  *     EINVAL   The arguments passed to dtrace_register() were somehow invalid.
  *              This may because a parameter that must be non-NULL was NULL,
  *              because the name was invalid (either empty or an illegal
  *              provider name) or because the attributes were invalid.
  *
  *   No other failure code is returned.
  *
  * 2.2.4  Caller's context
  *
  *   dtrace_register() may induce calls to dtrace_provide(); the provider must
  *   hold no locks across dtrace_register() that may also be acquired by
  *   dtrace_provide().  cpu_lock and mod_lock must not be held.
  *
  * 2.3  int dtrace_unregister(dtrace_provider_t id)
  *
  * 2.3.1  Overview
  *
  *   Unregisters the specified provider from the DTrace framework.  It should
  *   generally be called by DTrace providers in their detach(9E) entry point.
  *
  * 2.3.2  Arguments and Notes
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  As a result of calling
  *   dtrace_unregister(), the DTrace framework will call back into the provider
  *   via the dtps_destroy() entry point.  Once dtrace_unregister() successfully
  *   completes, however, the DTrace framework will no longer make calls through
  *   the Framework-to-Provider API.
  *
  * 2.3.3  Return value
  *
  *   On success, dtrace_unregister returns 0.  On failure, dtrace_unregister()
  *   returns an errno:
  *
  *     EBUSY    There are currently processes that have the DTrace pseudodevice
  *              open, or there exists an anonymous enabling that hasn't yet
  *              been claimed.
  *
  *   No other failure code is returned.
  *
  * 2.3.4  Caller's context
  *
  *   Because a call to dtrace_unregister() may induce calls through the
  *   Framework-to-Provider API, the caller may not hold any lock across
  *   dtrace_register() that is also acquired in any of the Framework-to-
  *   Provider API functions.  Additionally, mod_lock may not be held.
  *
  * 2.4  void dtrace_invalidate(dtrace_provider_id_t id)
  *
  * 2.4.1  Overview
  *
  *   Invalidates the specified provider.  All subsequent probe lookups for the
  *   specified provider will fail, but its probes will not be removed.
  *
  * 2.4.2  Arguments and note
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  In general, a provider's probes
  *   always remain valid; dtrace_invalidate() is a mechanism for invalidating
  *   an entire provider, regardless of whether or not probes are enabled or
  *   not.  Note that dtrace_invalidate() will _not_ prevent already enabled
  *   probes from firing -- it will merely prevent any new enablings of the
  *   provider's probes.
  *
  * 2.5 int dtrace_condense(dtrace_provider_id_t id)
  *
  * 2.5.1  Overview
  *
  *   Removes all the unenabled probes for the given provider. This function is
  *   not unlike dtrace_unregister(), except that it doesn't remove the
  *   provider just as many of its associated probes as it can.
  *
  * 2.5.2  Arguments and Notes
  *
  *   As with dtrace_unregister(), the sole argument is the provider identifier
  *   as returned from a successful call to dtrace_register().  As a result of
  *   calling dtrace_condense(), the DTrace framework will call back into the
  *   given provider's dtps_destroy() entry point for each of the provider's
  *   unenabled probes.
  *
  * 2.5.3  Return value
  *
  *   Currently, dtrace_condense() always returns 0.  However, consumers of this
  *   function should check the return value as appropriate; its behavior may
  *   change in the future.
  *
  * 2.5.4  Caller's context
  *
  *   As with dtrace_unregister(), the caller may not hold any lock across
  *   dtrace_condense() that is also acquired in the provider's entry points.
  *   Also, mod_lock may not be held.
  *
  * 2.6 int dtrace_attached()
  *
  * 2.6.1  Overview
  *
  *   Indicates whether or not DTrace has attached.
  *
  * 2.6.2  Arguments and Notes
  *
  *   For most providers, DTrace makes initial contact beyond registration.
  *   That is, once a provider has registered with DTrace, it waits to hear
  *   from DTrace to create probes.  However, some providers may wish to
  *   proactively create probes without first being told by DTrace to do so.
  *   If providers wish to do this, they must first call dtrace_attached() to
  *   determine if DTrace itself has attached.  If dtrace_attached() returns 0,
  *   the provider must not make any other Provider-to-Framework API call.
  *
  * 2.6.3  Return value
  *
  *   dtrace_attached() returns 1 if DTrace has attached, 0 otherwise.
  *
  * 2.7  int dtrace_probe_create(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name, int aframes, void *arg)
  *
  * 2.7.1  Overview
  *
  *   Creates a probe with specified module name, function name, and name.
  *
  * 2.7.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Of these, module name and function name may both be NULL
  *   (in which case the probe is considered to be unanchored), or they may both
  *   be non-NULL.  The name must be non-NULL, and must point to a non-empty
  *   string.
  *
  *   The fifth argument is the number of artificial stack frames that will be
  *   found on the stack when dtrace_probe() is called for the new probe.  These
  *   artificial frames will be automatically be pruned should the stack() or
  *   stackdepth() functions be called as part of one of the probe's ECBs.  If
  *   the parameter doesn't add an artificial frame, this parameter should be
  *   zero.
  *
  *   The final argument is a probe argument that will be passed back to the
  *   provider when a probe-specific operation is called.  (e.g., via
  *   dtps_enable(), dtps_disable(), etc.)
  *
  *   Note that it is up to the provider to be sure that the probe that it
  *   creates does not already exist -- if the provider is unsure of the probe's
  *   existence, it should assure its absence with dtrace_probe_lookup() before
  *   calling dtrace_probe_create().
  *
  * 2.7.3  Return value
  *
  *   dtrace_probe_create() always succeeds, and always returns the identifier
  *   of the newly-created probe.
  *
  * 2.7.4  Caller's context
  *
  *   While dtrace_probe_create() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may be called from other
  *   non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.8  dtrace_id_t dtrace_probe_lookup(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name)
  *
  * 2.8.1  Overview
  *
  *   Looks up a probe based on provdider and one or more of module name,
  *   function name and probe name.
  *
  * 2.8.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Any of these may be NULL; dtrace_probe_lookup() will return
  *   the identifier of the first probe that is provided by the specified
  *   provider and matches all of the non-NULL matching criteria.
  *   dtrace_probe_lookup() is generally used by a provider to be check the
  *   existence of a probe before creating it with dtrace_probe_create().
  *
  * 2.8.3  Return value
  *
  *   If the probe exists, returns its identifier.  If the probe does not exist,
  *   return DTRACE_IDNONE.
  *
  * 2.8.4  Caller's context
  *
  *   While dtrace_probe_lookup() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.9  void *dtrace_probe_arg(dtrace_provider_t id, dtrace_id_t probe)
  *
  * 2.9.1  Overview
  *
  *   Returns the probe argument associated with the specified probe.
  *
  * 2.9.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second argument is a probe
  *   identifier, as returned from dtrace_probe_lookup() or
  *   dtrace_probe_create().  This is useful if a probe has multiple
  *   provider-specific components to it:  the provider can create the probe
  *   once with provider-specific state, and then add to the state by looking
  *   up the probe based on probe identifier.
  *
  * 2.9.3  Return value
  *
  *   Returns the argument associated with the specified probe.  If the
  *   specified probe does not exist, or if the specified probe is not provided
  *   by the specified provider, NULL is returned.
  *
  * 2.9.4  Caller's context
  *
  *   While dtrace_probe_arg() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.10  void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1,
  *		uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
  *
  * 2.10.1  Overview
  *
  *   The epicenter of DTrace:  fires the specified probes with the specified
  *   arguments.
  *
  * 2.10.2  Arguments and Notes
  *
  *   The first argument is a probe identifier as returned by
  *   dtrace_probe_create() or dtrace_probe_lookup().  The second through sixth
  *   arguments are the values to which the D variables "arg0" through "arg4"
  *   will be mapped.
  *
  *   dtrace_probe() should be called whenever the specified probe has fired --
  *   however the provider defines it.
  *
  * 2.10.3  Return value
  *
  *   None.
  *
  * 2.10.4  Caller's context
  *
  *   dtrace_probe() may be called in virtually any context:  kernel, user,
  *   interrupt, high-level interrupt, with arbitrary adaptive locks held, with
  *   dispatcher locks held, with interrupts disabled, etc.  The only latitude
  *   that must be afforded to DTrace is the ability to make calls within
  *   itself (and to its in-kernel subroutines) and the ability to access
  *   arbitrary (but mapped) memory.  On some platforms, this constrains
  *   context.  For example, on UltraSPARC, dtrace_probe() cannot be called
  *   from any context in which TL is greater than zero.  dtrace_probe() may
  *   also not be called from any routine which may be called by dtrace_probe()
  *   -- which includes functions in the DTrace framework and some in-kernel
  *   DTrace subroutines.  All such functions "dtrace_"; providers that
  *   instrument the kernel arbitrarily should be sure to not instrument these
  *   routines.
  */
 typedef struct dtrace_pops {
 	void (*dtps_provide)(void *arg, dtrace_probedesc_t *spec);
 	void (*dtps_provide_module)(void *arg, modctl_t *mp);
 	void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_getargdesc)(void *arg, dtrace_id_t id, void *parg,
 	    dtrace_argdesc_t *desc);
 	uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg,
 	    int argno, int aframes);
 	int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
 } dtrace_pops_t;
 
 #define	DTRACE_MODE_KERNEL			0x01
 #define	DTRACE_MODE_USER			0x02
 #define	DTRACE_MODE_NOPRIV_DROP			0x10
 #define	DTRACE_MODE_NOPRIV_RESTRICT		0x20
 #define	DTRACE_MODE_LIMITEDPRIV_RESTRICT	0x40
 
 typedef uintptr_t	dtrace_provider_id_t;
 
 extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
     cred_t *, const dtrace_pops_t *, void *, dtrace_provider_id_t *);
 extern int dtrace_unregister(dtrace_provider_id_t);
 extern int dtrace_condense(dtrace_provider_id_t);
 extern void dtrace_invalidate(dtrace_provider_id_t);
 extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *,
     char *, char *);
 extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *,
     const char *, const char *, int, void *);
 extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t);
 extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
 
 /*
  * DTrace Meta Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement meta providers. Meta providers plug into the DTrace
  * framework and are used to instantiate new providers on the fly. At
  * present, there is only one type of meta provider and only one meta
  * provider may be registered with the DTrace framework at a time. The
  * sole meta provider type provides user-land static tracing facilities
  * by taking meta probe descriptions and adding a corresponding provider
  * into the DTrace framework.
  *
  * 1 Framework-to-Provider
  *
  * 1.1 Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_mops structure
  * that the meta provider passes to the framework when registering itself as
  * a meta provider. This structure consists of the following members:
  *
  *   dtms_create_probe()	<-- Add a new probe to a created provider
  *   dtms_provide_pid()		<-- Create a new provider for a given process
  *   dtms_remove_pid()		<-- Remove a previously created provider
  *
  * 1.2  void dtms_create_probe(void *arg, void *parg,
  *           dtrace_helper_probedesc_t *probedesc);
  *
  * 1.2.1  Overview
  *
  *   Called by the DTrace framework to create a new probe in a provider
  *   created by this meta provider.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is the provider cookie for the associated provider;
  *   this is obtained from the return value of dtms_provide_pid(). The third
  *   argument is the helper probe description.
  *
  * 1.2.3  Return value
  *
  *   None
  *
  * 1.2.4  Caller's context
  *
  *   dtms_create_probe() is called from either ioctl() or module load context
  *   in the context of a newly-created provider (that is, a provider that
  *   is a result of a call to dtms_provide_pid()). The DTrace framework is
  *   locked in such a way that meta providers may not register or unregister,
  *   such that no other thread can call into a meta provider operation and that
  *   atomicity is assured with respect to meta provider operations across
  *   dtms_provide_pid() and subsequent calls to dtms_create_probe().
  *   The context is thus effectively single-threaded with respect to the meta
  *   provider, and that the meta provider cannot call dtrace_meta_register()
  *   or dtrace_meta_unregister(). However, the context is such that the
  *   provider may (and is expected to) call provider-related DTrace provider
  *   APIs including dtrace_probe_create().
  *
  * 1.3  void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	      pid_t pid)
  *
  * 1.3.1  Overview
  *
  *   Called by the DTrace framework to instantiate a new provider given the
  *   description of the provider and probes in the mprov argument. The
  *   meta provider should call dtrace_register() to insert the new provider
  *   into the DTrace framework.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the new
  *   helper provider. The third argument is the process identifier for
  *   process associated with this new provider. Note that the name of the
  *   provider as passed to dtrace_register() should be the contatenation of
  *   the dtmpb_provname member of the mprov argument and the processs
  *   identifier as a string.
  *
  * 1.3.3  Return value
  *
  *   The cookie for the provider that the meta provider creates. This is
  *   the same value that it passed to dtrace_register().
  *
  * 1.3.4  Caller's context
  *
  *   dtms_provide_pid() is called from either ioctl() or module load context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to --  call
  *   provider-related DTrace provider APIs including dtrace_register().
  *
  * 1.4  void dtms_remove_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	     pid_t pid)
  *
  * 1.4.1  Overview
  *
  *   Called by the DTrace framework to remove a provider that had previously
  *   been instantiated via the dtms_provide_pid() entry point. The meta
  *   provider need not remove the provider immediately, but this entry
  *   point indicates that the provider should be removed as soon as possible
  *   using the dtrace_unregister() API.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the helper
  *   provider. The third argument is the process identifier for process
  *   associated with this new provider.
  *
  * 1.4.3  Return value
  *
  *   None
  *
  * 1.4.4  Caller's context
  *
  *   dtms_remove_pid() is called from either ioctl() or exit() context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to -- call
  *   provider-related DTrace provider APIs including dtrace_unregister().
  */
 typedef struct dtrace_helper_probedesc {
 	char *dthpb_mod;			/* probe module */
 	char *dthpb_func; 			/* probe function */
 	char *dthpb_name; 			/* probe name */
 	uint64_t dthpb_base;			/* base address */
 	uint32_t *dthpb_offs;			/* offsets array */
 	uint32_t *dthpb_enoffs;			/* is-enabled offsets array */
 	uint32_t dthpb_noffs;			/* offsets count */
 	uint32_t dthpb_nenoffs;			/* is-enabled offsets count */
 	uint8_t *dthpb_args;			/* argument mapping array */
 	uint8_t dthpb_xargc;			/* translated argument count */
 	uint8_t dthpb_nargc;			/* native argument count */
 	char *dthpb_xtypes;			/* translated types strings */
 	char *dthpb_ntypes;			/* native types strings */
 } dtrace_helper_probedesc_t;
 
 typedef struct dtrace_helper_provdesc {
 	char *dthpv_provname;			/* provider name */
 	dtrace_pattr_t dthpv_pattr;		/* stability attributes */
 } dtrace_helper_provdesc_t;
 
 typedef struct dtrace_mops {
 	void (*dtms_create_probe)(void *, void *, dtrace_helper_probedesc_t *);
 	void *(*dtms_provide_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 	void (*dtms_remove_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 } dtrace_mops_t;
 
 typedef uintptr_t	dtrace_meta_provider_id_t;
 
 extern int dtrace_meta_register(const char *, const dtrace_mops_t *, void *,
     dtrace_meta_provider_id_t *);
 extern int dtrace_meta_unregister(dtrace_meta_provider_id_t);
 
 /*
  * DTrace Kernel Hooks
  *
  * The following functions are implemented by the base kernel and form a set of
  * hooks used by the DTrace framework.  DTrace hooks are implemented in either
  * uts/common/os/dtrace_subr.c, an ISA-specific assembly file, or in a
  * uts/<platform>/os/dtrace_subr.c corresponding to each hardware platform.
  */
 
 typedef enum dtrace_vtime_state {
 	DTRACE_VTIME_INACTIVE = 0,	/* No DTrace, no TNF */
 	DTRACE_VTIME_ACTIVE,		/* DTrace virtual time, no TNF */
 	DTRACE_VTIME_INACTIVE_TNF,	/* No DTrace, TNF active */
 	DTRACE_VTIME_ACTIVE_TNF		/* DTrace virtual time _and_ TNF */
 } dtrace_vtime_state_t;
 
 #ifdef illumos
 extern dtrace_vtime_state_t dtrace_vtime_active;
 #endif
 extern void dtrace_vtime_switch(kthread_t *next);
 extern void dtrace_vtime_enable_tnf(void);
 extern void dtrace_vtime_disable_tnf(void);
 extern void dtrace_vtime_enable(void);
 extern void dtrace_vtime_disable(void);
 
 struct regs;
 struct reg;
 
 #ifdef illumos
 extern int (*dtrace_pid_probe_ptr)(struct reg *);
 extern int (*dtrace_return_probe_ptr)(struct reg *);
 extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *);
 extern void (*dtrace_fasttrap_exec_ptr)(proc_t *);
 extern void (*dtrace_fasttrap_exit_ptr)(proc_t *);
 extern void dtrace_fasttrap_fork(proc_t *, proc_t *);
 #endif
 
 typedef uintptr_t dtrace_icookie_t;
 typedef void (*dtrace_xcall_t)(void *);
 
 extern dtrace_icookie_t dtrace_interrupt_disable(void);
 extern void dtrace_interrupt_enable(dtrace_icookie_t);
 
 extern void dtrace_membar_producer(void);
 extern void dtrace_membar_consumer(void);
 
 extern void (*dtrace_cpu_init)(processorid_t);
 #ifdef illumos
 extern void (*dtrace_modload)(modctl_t *);
 extern void (*dtrace_modunload)(modctl_t *);
 #endif
 extern void (*dtrace_helpers_cleanup)(void);
 extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
 extern void (*dtrace_cpustart_init)(void);
 extern void (*dtrace_cpustart_fini)(void);
 extern void (*dtrace_closef)(void);
 
 extern void (*dtrace_debugger_init)(void);
 extern void (*dtrace_debugger_fini)(void);
 extern dtrace_cacheid_t dtrace_predcache_id;
 
 #ifdef illumos
 extern hrtime_t dtrace_gethrtime(void);
 #else
 void dtrace_debug_printf(const char *, ...) __printflike(1, 2);
 #endif
 extern void dtrace_sync(void);
 extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t));
 extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *);
 extern void dtrace_vpanic(const char *, __va_list);
 extern void dtrace_panic(const char *, ...);
 
 extern int dtrace_safe_defer_signal(void);
 extern void dtrace_safe_synchronous_signal(void);
 
 extern int dtrace_mach_aframes(void);
 
 #if defined(__i386) || defined(__amd64)
 extern int dtrace_instr_size(uchar_t *instr);
 extern int dtrace_instr_size_isa(uchar_t *, model_t, int *);
 extern void dtrace_invop_callsite(void);
 #endif
 extern void dtrace_invop_add(int (*)(uintptr_t, struct trapframe *, uintptr_t));
 extern void dtrace_invop_remove(int (*)(uintptr_t, struct trapframe *,
     uintptr_t));
 
 #ifdef __sparc
 extern int dtrace_blksuword32(uintptr_t, uint32_t *, int);
 extern void dtrace_getfsr(uint64_t *);
 #endif
 
 #ifndef illumos
 extern void dtrace_helpers_duplicate(proc_t *, proc_t *);
 extern void dtrace_helpers_destroy(proc_t *);
 #endif
 
 #define	DTRACE_CPUFLAG_ISSET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags & (flag))
 
 #define	DTRACE_CPUFLAG_SET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags |= (flag))
 
 #define	DTRACE_CPUFLAG_CLEAR(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags &= ~(flag))
 
 #endif /* _KERNEL */
 
 #endif	/* _ASM */
 
 #if defined(__i386) || defined(__amd64)
 
 #define	DTRACE_INVOP_PUSHL_EBP		1
 #define	DTRACE_INVOP_PUSHQ_RBP		DTRACE_INVOP_PUSHL_EBP
 #define	DTRACE_INVOP_POPL_EBP		2
 #define	DTRACE_INVOP_POPQ_RBP		DTRACE_INVOP_POPL_EBP
 #define	DTRACE_INVOP_LEAVE		3
 #define	DTRACE_INVOP_NOP		4
 #define	DTRACE_INVOP_RET		5
 
 #elif defined(__powerpc__)
 
 #define DTRACE_INVOP_RET	1
 #define DTRACE_INVOP_BCTR	2
 #define DTRACE_INVOP_BLR	3
 #define DTRACE_INVOP_JUMP	4
 #define DTRACE_INVOP_MFLR_R0	5
 #define DTRACE_INVOP_NOP	6
 
 #elif defined(__arm__)
 
 #define	DTRACE_INVOP_SHIFT	4
 #define	DTRACE_INVOP_MASK	((1 << DTRACE_INVOP_SHIFT) - 1)
 #define	DTRACE_INVOP_DATA(x)	((x) >> DTRACE_INVOP_SHIFT)
 
 #define DTRACE_INVOP_PUSHM	1
 #define DTRACE_INVOP_POPM	2
 #define DTRACE_INVOP_B		3
 
 #elif defined(__aarch64__)
 
 #define	INSN_SIZE	4
 
 #define	B_MASK		0xff000000
 #define	B_DATA_MASK	0x00ffffff
 #define	B_INSTR		0x14000000
 
 #define	RET_INSTR	0xd65f03c0
 
 #define	LDP_STP_MASK	0xffc00000
 #define	STP_32		0x29800000
 #define	STP_64		0xa9800000
 #define	LDP_32		0x28c00000
 #define	LDP_64		0xa8c00000
 #define	LDP_STP_PREIND	(1 << 24)
 #define	LDP_STP_DIR	(1 << 22) /* Load instruction */
 #define	ARG1_SHIFT	0
 #define	ARG1_MASK	0x1f
 #define	ARG2_SHIFT	10
 #define	ARG2_MASK	0x1f
 #define	OFFSET_SHIFT	15
 #define	OFFSET_SIZE	7
 #define	OFFSET_MASK	((1 << OFFSET_SIZE) - 1)
 
 #define	DTRACE_INVOP_PUSHM	1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_B		3
 
 #elif defined(__mips__)
 
 #define	INSN_SIZE		4
 
 /* Load/Store double RA to/from SP */
 #define	LDSD_RA_SP_MASK		0xffff0000
 #define	LDSD_DATA_MASK		0x0000ffff
 #define	SD_RA_SP		0xffbf0000
 #define	LD_RA_SP		0xdfbf0000
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_LD		2
 
-#elif defined(__riscv__)
+#elif defined(__riscv)
 
 #define	SD_RA_SP_MASK		0x01fff07f
 #define	SD_RA_SP		0x00113023
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_NOP	3
 
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DTRACE_H */
Index: head/sys/cddl/contrib/opensolaris/uts/common/sys/isa_defs.h
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/sys/isa_defs.h	(revision 322167)
+++ head/sys/cddl/contrib/opensolaris/uts/common/sys/isa_defs.h	(revision 322168)
@@ -1,695 +1,695 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_ISA_DEFS_H
 #define	_SYS_ISA_DEFS_H
 
 /*
  * This header file serves to group a set of well known defines and to
  * set these for each instruction set architecture.  These defines may
  * be divided into two groups;  characteristics of the processor and
  * implementation choices for Solaris on a processor.
  *
  * Processor Characteristics:
  *
  * _LITTLE_ENDIAN / _BIG_ENDIAN:
  *	The natural byte order of the processor.  A pointer to an int points
  *	to the least/most significant byte of that int.
  *
  * _STACK_GROWS_UPWARD / _STACK_GROWS_DOWNWARD:
  *	The processor specific direction of stack growth.  A push onto the
  *	stack increases/decreases the stack pointer, so it stores data at
  *	successively higher/lower addresses.  (Stackless machines ignored
  *	without regrets).
  *
  * _LONG_LONG_HTOL / _LONG_LONG_LTOH:
  *	A pointer to a long long points to the most/least significant long
  *	within that long long.
  *
  * _BIT_FIELDS_HTOL / _BIT_FIELDS_LTOH:
  *	The C compiler assigns bit fields from the high/low to the low/high end
  *	of an int (most to least significant vs. least to most significant).
  *
  * _IEEE_754:
  *	The processor (or supported implementations of the processor)
  *	supports the ieee-754 floating point standard.  No other floating
  *	point standards are supported (or significant).  Any other supported
  *	floating point formats are expected to be cased on the ISA processor
  *	symbol.
  *
  * _CHAR_IS_UNSIGNED / _CHAR_IS_SIGNED:
  *	The C Compiler implements objects of type `char' as `unsigned' or
  *	`signed' respectively.  This is really an implementation choice of
  *	the compiler writer, but it is specified in the ABI and tends to
  *	be uniform across compilers for an instruction set architecture.
  *	Hence, it has the properties of a processor characteristic.
  *
  * _CHAR_ALIGNMENT / _SHORT_ALIGNMENT / _INT_ALIGNMENT / _LONG_ALIGNMENT /
  * _LONG_LONG_ALIGNMENT / _DOUBLE_ALIGNMENT / _LONG_DOUBLE_ALIGNMENT /
  * _POINTER_ALIGNMENT / _FLOAT_ALIGNMENT:
  *	The ABI defines alignment requirements of each of the primitive
  *	object types.  Some, if not all, may be hardware requirements as
  * 	well.  The values are expressed in "byte-alignment" units.
  *
  * _MAX_ALIGNMENT:
  *	The most stringent alignment requirement as specified by the ABI.
  *	Equal to the maximum of all the above _XXX_ALIGNMENT values.
  *
  * _ALIGNMENT_REQUIRED:
  *	True or false (1 or 0) whether or not the hardware requires the ABI
  *	alignment.
  *
  * _LONG_LONG_ALIGNMENT_32
  *	The 32-bit ABI supported by a 64-bit kernel may have different
  *	alignment requirements for primitive object types.  The value of this
  *	identifier is expressed in "byte-alignment" units.
  *
  * _HAVE_CPUID_INSN
  *	This indicates that the architecture supports the 'cpuid'
  *	instruction as defined by Intel.  (Intel allows other vendors
  *	to extend the instruction for their own purposes.)
  *
  *
  * Implementation Choices:
  *
  * _ILP32 / _LP64:
  *	This specifies the compiler data type implementation as specified in
  *	the relevant ABI.  The choice between these is strongly influenced
  *	by the underlying hardware, but is not absolutely tied to it.
  *	Currently only two data type models are supported:
  *
  *	_ILP32:
  *		Int/Long/Pointer are 32 bits.  This is the historical UNIX
  *		and Solaris implementation.  Due to its historical standing,
  *		this is the default case.
  *
  *	_LP64:
  *		Long/Pointer are 64 bits, Int is 32 bits.  This is the chosen
  *		implementation for 64-bit ABIs such as SPARC V9.
  *
  *	_I32LPx:
  *		A compilation environment where 'int' is 32-bit, and
  *		longs and pointers are simply the same size.
  *
  *	In all cases, Char is 8 bits and Short is 16 bits.
  *
  * _SUNOS_VTOC_8 / _SUNOS_VTOC_16 / _SVR4_VTOC_16:
  *	This specifies the form of the disk VTOC (or label):
  *
  *	_SUNOS_VTOC_8:
  *		This is a VTOC form which is upwardly compatible with the
  *		SunOS 4.x disk label and allows 8 partitions per disk.
  *
  *	_SUNOS_VTOC_16:
  *		In this format the incore vtoc image matches the ondisk
  *		version.  It allows 16 slices per disk, and is not
  *		compatible with the SunOS 4.x disk label.
  *
  *	Note that these are not the only two VTOC forms possible and
  *	additional forms may be added.  One possible form would be the
  *	SVr4 VTOC form.  The symbol for that is reserved now, although
  *	it is not implemented.
  *
  *	_SVR4_VTOC_16:
  *		This VTOC form is compatible with the System V Release 4
  *		VTOC (as implemented on the SVr4 Intel and 3b ports) with
  *		16 partitions per disk.
  *
  *
  * _DMA_USES_PHYSADDR / _DMA_USES_VIRTADDR
  *	This describes the type of addresses used by system DMA:
  *
  *	_DMA_USES_PHYSADDR:
  *		This type of DMA, used in the x86 implementation,
  *		requires physical addresses for DMA buffers.  The 24-bit
  *		addresses used by some legacy boards is the source of the
  *		"low-memory" (<16MB) requirement for some devices using DMA.
  *
  *	_DMA_USES_VIRTADDR:
  *		This method of DMA allows the use of virtual addresses for
  *		DMA transfers.
  *
  * _FIRMWARE_NEEDS_FDISK / _NO_FDISK_PRESENT
  *      This indicates the presence/absence of an fdisk table.
  *
  *      _FIRMWARE_NEEDS_FDISK
  *              The fdisk table is required by system firmware.  If present,
  *              it allows a disk to be subdivided into multiple fdisk
  *              partitions, each of which is equivalent to a separate,
  *              virtual disk.  This enables the co-existence of multiple
  *              operating systems on a shared hard disk.
  *
  *      _NO_FDISK_PRESENT
  *              If the fdisk table is absent, it is assumed that the entire
  *              media is allocated for a single operating system.
  *
  * _HAVE_TEM_FIRMWARE
  *	Defined if this architecture has the (fallback) option of
  *	using prom_* calls for doing I/O if a suitable kernel driver
  *	is not available to do it.
  *
  * _DONT_USE_1275_GENERIC_NAMES
  *		Controls whether or not device tree node names should
  *		comply with the IEEE 1275 "Generic Names" Recommended
  *		Practice. With _DONT_USE_GENERIC_NAMES, device-specific
  *		names identifying the particular device will be used.
  *
  * __i386_COMPAT
  *	This indicates whether the i386 ABI is supported as a *non-native*
  *	mode for the platform.  When this symbol is defined:
  *	-	32-bit xstat-style system calls are enabled
  *	-	32-bit xmknod-style system calls are enabled
  *	-	32-bit system calls use i386 sizes -and- alignments
  *
  *	Note that this is NOT defined for the i386 native environment!
  *
  * __x86
  *	This is ONLY a synonym for defined(__i386) || defined(__amd64)
  *	which is useful only insofar as these two architectures share
  *	common attributes.  Analogous to __sparc.
  *
  * _PSM_MODULES
  *	This indicates whether or not the implementation uses PSM
  *	modules for processor support, reading /etc/mach from inside
  *	the kernel to extract a list.
  *
  * _RTC_CONFIG
  *	This indicates whether or not the implementation uses /etc/rtc_config
  *	to configure the real-time clock in the kernel.
  *
  * _UNIX_KRTLD
  *	This indicates that the implementation uses a dynamically
  *	linked unix + krtld to form the core kernel image at boot
  *	time, or (in the absence of this symbol) a prelinked kernel image.
  *
  * _OBP
  *	This indicates the firmware interface is OBP.
  *
  * _SOFT_HOSTID
  *	This indicates that the implementation obtains the hostid
  *	from the file /etc/hostid, rather than from hardware.
  */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * The following set of definitions characterize Solaris on AMD's
  * 64-bit systems.
  */
 #if defined(__x86_64) || defined(__amd64)
 
 #if !defined(__amd64)
 #define	__amd64		/* preferred guard */
 #endif
 
 #if !defined(__x86)
 #define	__x86
 #endif
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #ifdef illumos
 #define	_LITTLE_ENDIAN
 #endif
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_SIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_ALIGNMENT			8
 #define	_LONG_LONG_ALIGNMENT		8
 #define	_DOUBLE_ALIGNMENT		8
 #define	_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_LONG_DOUBLE_ALIGNMENT		16
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
 #define	_POINTER_ALIGNMENT		8
 #define	_MAX_ALIGNMENT			16
 #define	_ALIGNMENT_REQUIRED		1
 
 /*
  * Different alignment constraints for the i386 ABI in compatibility mode
  */
 #define	_LONG_LONG_ALIGNMENT_32		4
 
 /*
  * Define the appropriate "implementation choices".
  */
 #if !defined(_LP64)
 #define	_LP64
 #endif
 #if !defined(_I32LPx) && defined(_KERNEL)
 #define	_I32LPx
 #endif
 #define	_MULTI_DATAMODEL
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	__i386_COMPAT
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_SOFT_HOSTID
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
 /*
  * The feature test macro __i386 is generic for all processors implementing
  * the Intel 386 instruction set or a superset of it.  Specifically, this
  * includes all members of the 386, 486, and Pentium family of processors.
  */
 #elif defined(__i386) || defined(__i386__)
 
 #if !defined(__i386)
 #define	__i386
 #endif
 
 #if !defined(__x86)
 #define	__x86
 #endif
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #ifdef illumos
 #define	_LITTLE_ENDIAN
 #endif
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_SIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_ALIGNMENT			4
 #define	_LONG_LONG_ALIGNMENT		4
 #define	_DOUBLE_ALIGNMENT		4
 #define	_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_LONG_DOUBLE_ALIGNMENT		4
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_POINTER_ALIGNMENT		4
 #define	_MAX_ALIGNMENT			4
 #define	_ALIGNMENT_REQUIRED		0
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices".
  */
 #if !defined(_ILP32)
 #define	_ILP32
 #endif
 #if !defined(_I32LPx) && defined(_KERNEL)
 #define	_I32LPx
 #endif
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_SOFT_HOSTID
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
 #elif defined(__aarch64__)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_UNSIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_ALIGNMENT			8
 #define	_LONG_LONG_ALIGNMENT		8
 #define	_DOUBLE_ALIGNMENT		8
 #define	_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_LONG_DOUBLE_ALIGNMENT		16
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
 #define	_POINTER_ALIGNMENT		8
 #define	_MAX_ALIGNMENT			16
 #define	_ALIGNMENT_REQUIRED		1
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices"
  */
 #if !defined(_LP64)
 #define	_LP64
 #endif
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
-#elif defined(__riscv__)
+#elif defined(__riscv)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_UNSIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_ALIGNMENT			8
 #define	_LONG_LONG_ALIGNMENT		8
 #define	_DOUBLE_ALIGNMENT		8
 #define	_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_LONG_DOUBLE_ALIGNMENT		16
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
 #define	_POINTER_ALIGNMENT		8
 #define	_MAX_ALIGNMENT			16
 #define	_ALIGNMENT_REQUIRED		1
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices"
  */
 #if !defined(_LP64)
 #define	_LP64
 #endif
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
 #elif defined(__arm__)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_SIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_ALIGNMENT			4
 #define	_LONG_LONG_ALIGNMENT		4
 #define	_DOUBLE_ALIGNMENT		4
 #define	_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_LONG_DOUBLE_ALIGNMENT		4
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_POINTER_ALIGNMENT		4
 #define	_MAX_ALIGNMENT			4
 #define	_ALIGNMENT_REQUIRED		0
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices".
  */
 #if !defined(_ILP32)
 #define	_ILP32
 #endif
 #if !defined(_I32LPx) && defined(_KERNEL)
 #define	_I32LPx
 #endif
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
 #elif defined(__mips__)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_LTOH
 #define	_BIT_FIELDS_LTOH
 #define	_IEEE_754
 #define	_CHAR_IS_SIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #if defined(__mips_n64)
 #define	_LONG_ALIGNMENT			8
 #define	_LONG_LONG_ALIGNMENT		8
 #define	_DOUBLE_ALIGNMENT		8
 #define	_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_LONG_DOUBLE_ALIGNMENT		8
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_POINTER_ALIGNMENT		8
 #define	_MAX_ALIGNMENT			8
 #define	_ALIGNMENT_REQUIRED		0
 
 #define	_LONG_LONG_ALIGNMENT_32		_INT_ALIGNMENT
 /*
  * Define the appropriate "implementation choices".
  */
 #if !defined(_LP64)
 #define	_LP64
 #endif
 #else
 #define	_LONG_ALIGNMENT			4
 #define	_LONG_LONG_ALIGNMENT		4
 #define	_DOUBLE_ALIGNMENT		4
 #define	_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_LONG_DOUBLE_ALIGNMENT		4
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
 #define	_POINTER_ALIGNMENT		4
 #define	_MAX_ALIGNMENT			4
 #define	_ALIGNMENT_REQUIRED		0
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices".
  */
 #define	_ILP32
 #if !defined(_I32LPx) && defined(_KERNEL)
 #define	_I32LPx
 #endif
 #endif
 #define	_SUNOS_VTOC_16
 #define	_DMA_USES_PHYSADDR
 #define	_FIRMWARE_NEEDS_FDISK
 #define	_PSM_MODULES
 #define	_RTC_CONFIG
 #define	_DONT_USE_1275_GENERIC_NAMES
 #define	_HAVE_CPUID_INSN
 
 #elif defined(__powerpc__)
 
 #if defined(__BIG_ENDIAN__)
 #define _BIT_FIELDS_HTOL
 #else
 #define _BIT_FIELDS_LTOH
 #endif
 
 /*
  * The following set of definitions characterize the Solaris on SPARC systems.
  *
  * The symbol __sparc indicates any of the SPARC family of processor
  * architectures.  This includes SPARC V7, SPARC V8 and SPARC V9.
  *
  * The symbol __sparcv8 indicates the 32-bit SPARC V8 architecture as defined
  * by Version 8 of the SPARC Architecture Manual.  (SPARC V7 is close enough
  * to SPARC V8 for the former to be subsumed into the latter definition.)
  *
  * The symbol __sparcv9 indicates the 64-bit SPARC V9 architecture as defined
  * by Version 9 of the SPARC Architecture Manual.
  *
  * The symbols __sparcv8 and __sparcv9 are mutually exclusive, and are only
  * relevant when the symbol __sparc is defined.
  */
 /*
  * XXX Due to the existence of 5110166, "defined(__sparcv9)" needs to be added
  * to support backwards builds.  This workaround should be removed in s10_71.
  */
 #elif defined(__sparc) || defined(__sparcv9) || defined(__sparc__)
 #if !defined(__sparc)
 #define	__sparc
 #endif
 
 /*
  * You can be 32-bit or 64-bit, but not both at the same time.
  */
 #if defined(__sparcv8) && defined(__sparcv9)
 #error	"SPARC Versions 8 and 9 are mutually exclusive choices"
 #endif
 
 /*
  * Existing compilers do not set __sparcv8.  Years will transpire before
  * the compilers can be depended on to set the feature test macro. In
  * the interim, we'll set it here on the basis of historical behaviour;
  * if you haven't asked for SPARC V9, then you must've meant SPARC V8.
  */
 #if !defined(__sparcv9) && !defined(__sparcv8)
 #define	__sparcv8
 #endif
 
 /*
  * Define the appropriate "processor characteristics" shared between
  * all Solaris on SPARC systems.
  */
 #ifdef illumos
 #define	_BIG_ENDIAN
 #endif
 #define	_STACK_GROWS_DOWNWARD
 #define	_LONG_LONG_HTOL
 #define	_BIT_FIELDS_HTOL
 #define	_IEEE_754
 #define	_CHAR_IS_SIGNED
 #define	_BOOL_ALIGNMENT			1
 #define	_CHAR_ALIGNMENT			1
 #define	_SHORT_ALIGNMENT		2
 #define	_INT_ALIGNMENT			4
 #define	_FLOAT_ALIGNMENT		4
 #define	_FLOAT_COMPLEX_ALIGNMENT	4
 #define	_LONG_LONG_ALIGNMENT		8
 #define	_DOUBLE_ALIGNMENT		8
 #define	_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_ALIGNMENT_REQUIRED		1
 
 /*
  * Define the appropriate "implementation choices" shared between versions.
  */
 #define	_SUNOS_VTOC_8
 #define	_DMA_USES_VIRTADDR
 #define	_NO_FDISK_PRESENT
 #define	_HAVE_TEM_FIRMWARE
 #define	_OBP
 
 /*
  * The following set of definitions characterize the implementation of
  * 32-bit Solaris on SPARC V8 systems.
  */
 #if defined(__sparcv8)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_LONG_ALIGNMENT			4
 #define	_LONG_DOUBLE_ALIGNMENT		8
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
 #define	_POINTER_ALIGNMENT		4
 #define	_MAX_ALIGNMENT			8
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices"
  */
 #define	_ILP32
 #if !defined(_I32LPx) && defined(_KERNEL)
 #define	_I32LPx
 #endif
 
 /*
  * The following set of definitions characterize the implementation of
  * 64-bit Solaris on SPARC V9 systems.
  */
 #elif defined(__sparcv9)
 
 /*
  * Define the appropriate "processor characteristics"
  */
 #define	_LONG_ALIGNMENT			8
 #define	_LONG_DOUBLE_ALIGNMENT		16
 #define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
 #define	_POINTER_ALIGNMENT		8
 #define	_MAX_ALIGNMENT			16
 
 #define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
 
 /*
  * Define the appropriate "implementation choices"
  */
 #if !defined(_LP64)
 #define	_LP64
 #endif
 #if !defined(_I32LPx)
 #define	_I32LPx
 #endif
 #define	_MULTI_DATAMODEL
 
 #else
 #error	"unknown SPARC version"
 #endif
 
 /*
  * #error is strictly ansi-C, but works as well as anything for K&R systems.
  */
 #else
 #error "ISA not supported"
 #endif
 
 #if defined(_ILP32) && defined(_LP64)
 #error "Both _ILP32 and _LP64 are defined"
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ISA_DEFS_H */
Index: head/sys/cddl/dev/profile/profile.c
===================================================================
--- head/sys/cddl/dev/profile/profile.c	(revision 322167)
+++ head/sys/cddl/dev/profile/profile.c	(revision 322168)
@@ -1,717 +1,717 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
  *
  * $FreeBSD$
  *
  */
 
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/cpuvar.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kmem.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/selinfo.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/unistd.h>
 #include <machine/cpu.h>
 #include <machine/stdarg.h>
 
 #include <sys/dtrace.h>
 #include <sys/dtrace_bsd.h>
 
 #define	PROF_NAMELEN		15
 
 #define	PROF_PROFILE		0
 #define	PROF_TICK		1
 #define	PROF_PREFIX_PROFILE	"profile-"
 #define	PROF_PREFIX_TICK	"tick-"
 
 /*
  * Regardless of platform, there are five artificial frames in the case of the
  * profile provider:
  *
  *	profile_fire
  *	cyclic_expire
  *	cyclic_fire
  *	[ cbe ]
  *	[ locore ]
  *
  * On amd64, there are two frames associated with locore:  one in locore, and
  * another in common interrupt dispatch code.  (i386 has not been modified to
  * use this common layer.)  Further, on i386, the interrupted instruction
  * appears as its own stack frame.  All of this means that we need to add one
  * frame for amd64, and then take one away for both amd64 and i386.
  *
  * On SPARC, the picture is further complicated because the compiler
  * optimizes away tail-calls -- so the following frames are optimized away:
  *
  * 	profile_fire
  *	cyclic_expire
  *
  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
  * frame cannot be tail-call eliminated, yielding four frames in this case.
  *
  * All of the above constraints lead to the mess below.  Yes, the profile
  * provider should ideally figure this out on-the-fly by hiting one of its own
  * probes and then walking its own stack trace.  This is complicated, however,
  * and the static definition doesn't seem to be overly brittle.  Still, we
  * allow for a manual override in case we get it completely wrong.
  */
 #ifdef __amd64
 #define	PROF_ARTIFICIAL_FRAMES	10
 #else
 #ifdef __i386
 #define	PROF_ARTIFICIAL_FRAMES	6
 #else
 #ifdef __sparc
 #ifdef DEBUG
 #define	PROF_ARTIFICIAL_FRAMES	4
 #else
 #define	PROF_ARTIFICIAL_FRAMES	3
 #endif
 #endif
 #endif
 #endif
 
 #ifdef __mips
 /*
  * This value is bogus just to make module compilable on mips
  */
 #define	PROF_ARTIFICIAL_FRAMES	3
 #endif
 
 #ifdef __powerpc__
 /*
  * This value is bogus just to make module compilable on powerpc
  */
 #define	PROF_ARTIFICIAL_FRAMES	3
 #endif
 
 struct profile_probe_percpu;
 
 #ifdef __mips
 /* bogus */
 #define	PROF_ARTIFICIAL_FRAMES	3
 #endif
 
 #ifdef __arm__
 #define	PROF_ARTIFICIAL_FRAMES	3
 #endif
 
 #ifdef __aarch64__
 /* TODO: verify */
 #define	PROF_ARTIFICIAL_FRAMES	10
 #endif
 
-#ifdef __riscv__
+#ifdef __riscv
 /* TODO: verify */
 #define	PROF_ARTIFICIAL_FRAMES	10
 #endif
 
 typedef struct profile_probe {
 	char		prof_name[PROF_NAMELEN];
 	dtrace_id_t	prof_id;
 	int		prof_kind;
 #ifdef illumos
 	hrtime_t	prof_interval;
 	cyclic_id_t	prof_cyclic;
 #else
 	sbintime_t	prof_interval;
 	struct callout	prof_cyclic;
 	sbintime_t	prof_expected;
 	struct profile_probe_percpu **prof_pcpus;
 #endif
 } profile_probe_t;
 
 typedef struct profile_probe_percpu {
 	hrtime_t	profc_expected;
 	hrtime_t	profc_interval;
 	profile_probe_t	*profc_probe;
 #ifdef __FreeBSD__
 	struct callout	profc_cyclic;
 #endif
 } profile_probe_percpu_t;
 
 static d_open_t	profile_open;
 static int	profile_unload(void);
 static void	profile_create(hrtime_t, char *, int);
 static void	profile_destroy(void *, dtrace_id_t, void *);
 static void	profile_enable(void *, dtrace_id_t, void *);
 static void	profile_disable(void *, dtrace_id_t, void *);
 static void	profile_load(void *);
 static void	profile_provide(void *, dtrace_probedesc_t *);
 
 static int profile_rates[] = {
     97, 199, 499, 997, 1999,
     4001, 4999, 0, 0, 0,
     0, 0, 0, 0, 0,
     0, 0, 0, 0, 0
 };
 
 static int profile_ticks[] = {
     1, 10, 100, 500, 1000,
     5000, 0, 0, 0, 0,
     0, 0, 0, 0, 0
 };
 
 /*
  * profile_max defines the upper bound on the number of profile probes that
  * can exist (this is to prevent malicious or clumsy users from exhausing
  * system resources by creating a slew of profile probes). At mod load time,
  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
  * present in the profile.conf file.
  */
 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
 static uint32_t profile_max = PROFILE_MAX_DEFAULT;
 					/* maximum number of profile probes */
 static uint32_t profile_total;		/* current number of profile probes */
 
 static struct cdevsw profile_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= profile_open,
 	.d_name		= "profile",
 };
 
 static dtrace_pattr_t profile_attr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
 };
 
 static dtrace_pops_t profile_pops = {
 	profile_provide,
 	NULL,
 	profile_enable,
 	profile_disable,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	profile_destroy
 };
 
 static struct cdev		*profile_cdev;
 static dtrace_provider_id_t	profile_id;
 static hrtime_t			profile_interval_min = NANOSEC / 5000;	/* 5000 hz */
 static int			profile_aframes = PROF_ARTIFICIAL_FRAMES;
 
 SYSCTL_DECL(_kern_dtrace);
 SYSCTL_NODE(_kern_dtrace, OID_AUTO, profile, CTLFLAG_RD, 0, "DTrace profile parameters");
 SYSCTL_INT(_kern_dtrace_profile, OID_AUTO, aframes, CTLFLAG_RW, &profile_aframes,
     0, "Skipped frames for profile provider");
 
 static sbintime_t
 nsec_to_sbt(hrtime_t nsec)
 {
 	time_t sec;
 
 	/*
 	 * We need to calculate nsec * 2^32 / 10^9
 	 * Seconds and nanoseconds are split to avoid overflow.
 	 */
 	sec = nsec / NANOSEC;
 	nsec = nsec % NANOSEC;
 	return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC);
 }
 
 static hrtime_t
 sbt_to_nsec(sbintime_t sbt)
 {
 
 	return ((sbt >> 32) * NANOSEC +
 	    (((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32));
 }
 
 static void
 profile_probe(profile_probe_t *prof, hrtime_t late)
 {
 	struct thread *td;
 	struct trapframe *frame;
 	uintfptr_t pc, upc;
 
 	td = curthread;
 	pc = upc = 0;
 
 	/*
 	 * td_intr_frame can be unset if this is a catch-up event upon waking up
 	 * from idle sleep. This can only happen on a CPU idle thread. Use a
 	 * representative arg0 value in this case so that one of the probe
 	 * arguments is non-zero.
 	 */
 	frame = td->td_intr_frame;
 	if (frame != NULL) {
 		if (TRAPF_USERMODE(frame))
 			upc = TRAPF_PC(frame);
 		else
 			pc = TRAPF_PC(frame);
 	} else if (TD_IS_IDLETHREAD(td))
 		pc = (uintfptr_t)&cpu_idle;
 
 	dtrace_probe(prof->prof_id, pc, upc, late, 0, 0);
 }
 
 static void
 profile_fire(void *arg)
 {
 	profile_probe_percpu_t *pcpu = arg;
 	profile_probe_t *prof = pcpu->profc_probe;
 	hrtime_t late;
 
 	late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected);
 
 	profile_probe(prof, late);
 	pcpu->profc_expected += pcpu->profc_interval;
 	callout_schedule_sbt_curcpu(&pcpu->profc_cyclic,
 	    pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
 }
 
 static void
 profile_tick(void *arg)
 {
 	profile_probe_t *prof = arg;
 
 	profile_probe(prof, 0);
 	prof->prof_expected += prof->prof_interval;
 	callout_schedule_sbt(&prof->prof_cyclic,
 	    prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
 }
 
 static void
 profile_create(hrtime_t interval, char *name, int kind)
 {
 	profile_probe_t *prof;
 
 	if (interval < profile_interval_min)
 		return;
 
 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
 		return;
 
 	atomic_add_32(&profile_total, 1);
 	if (profile_total > profile_max) {
 		atomic_add_32(&profile_total, -1);
 		return;
 	}
 
 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
 	(void) strcpy(prof->prof_name, name);
 #ifdef illumos
 	prof->prof_interval = interval;
 	prof->prof_cyclic = CYCLIC_NONE;
 #else
 	prof->prof_interval = nsec_to_sbt(interval);
 	callout_init(&prof->prof_cyclic, 1);
 #endif
 	prof->prof_kind = kind;
 	prof->prof_id = dtrace_probe_create(profile_id,
 	    NULL, NULL, name,
 	    profile_aframes, prof);
 }
 
 /*ARGSUSED*/
 static void
 profile_provide(void *arg, dtrace_probedesc_t *desc)
 {
 	int i, j, rate, kind;
 	hrtime_t val = 0, mult = 1, len = 0;
 	char *name, *suffix = NULL;
 
 	const struct {
 		char *prefix;
 		int kind;
 	} types[] = {
 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
 		{ PROF_PREFIX_TICK, PROF_TICK },
 		{ 0, 0 }
 	};
 
 	const struct {
 		char *name;
 		hrtime_t mult;
 	} suffixes[] = {
 		{ "ns", 	NANOSEC / NANOSEC },
 		{ "nsec",	NANOSEC / NANOSEC },
 		{ "us",		NANOSEC / MICROSEC },
 		{ "usec",	NANOSEC / MICROSEC },
 		{ "ms",		NANOSEC / MILLISEC },
 		{ "msec",	NANOSEC / MILLISEC },
 		{ "s",		NANOSEC / SEC },
 		{ "sec",	NANOSEC / SEC },
 		{ "m",		NANOSEC * (hrtime_t)60 },
 		{ "min",	NANOSEC * (hrtime_t)60 },
 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
 		{ "hz",		0 },
 		{ NULL }
 	};
 
 	if (desc == NULL) {
 		char n[PROF_NAMELEN];
 
 		/*
 		 * If no description was provided, provide all of our probes.
 		 */
 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
 			if ((rate = profile_rates[i]) == 0)
 				continue;
 
 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
 			    PROF_PREFIX_PROFILE, rate);
 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
 		}
 
 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
 			if ((rate = profile_ticks[i]) == 0)
 				continue;
 
 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
 			    PROF_PREFIX_TICK, rate);
 			profile_create(NANOSEC / rate, n, PROF_TICK);
 		}
 
 		return;
 	}
 
 	name = desc->dtpd_name;
 
 	for (i = 0; types[i].prefix != NULL; i++) {
 		len = strlen(types[i].prefix);
 
 		if (strncmp(name, types[i].prefix, len) != 0)
 			continue;
 		break;
 	}
 
 	if (types[i].prefix == NULL)
 		return;
 
 	kind = types[i].kind;
 	j = strlen(name) - len;
 
 	/*
 	 * We need to start before any time suffix.
 	 */
 	for (j = strlen(name); j >= len; j--) {
 		if (name[j] >= '0' && name[j] <= '9')
 			break;
 		suffix = &name[j];
 	}
 
 	ASSERT(suffix != NULL);
 
 	/*
 	 * Now determine the numerical value present in the probe name.
 	 */
 	for (; j >= len; j--) {
 		if (name[j] < '0' || name[j] > '9')
 			return;
 
 		val += (name[j] - '0') * mult;
 		mult *= (hrtime_t)10;
 	}
 
 	if (val == 0)
 		return;
 
 	/*
 	 * Look-up the suffix to determine the multiplier.
 	 */
 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
 			mult = suffixes[i].mult;
 			break;
 		}
 	}
 
 	if (suffixes[i].name == NULL && *suffix != '\0')
 		return;
 
 	if (mult == 0) {
 		/*
 		 * The default is frequency-per-second.
 		 */
 		val = NANOSEC / val;
 	} else {
 		val *= mult;
 	}
 
 	profile_create(val, name, kind);
 }
 
 /* ARGSUSED */
 static void
 profile_destroy(void *arg, dtrace_id_t id, void *parg)
 {
 	profile_probe_t *prof = parg;
 
 #ifdef illumos
 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
 #else
 	ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL);
 #endif
 	kmem_free(prof, sizeof (profile_probe_t));
 
 	ASSERT(profile_total >= 1);
 	atomic_add_32(&profile_total, -1);
 }
 
 #ifdef illumos
 /*ARGSUSED*/
 static void
 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
 {
 	profile_probe_t *prof = arg;
 	profile_probe_percpu_t *pcpu;
 
 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
 	pcpu->profc_probe = prof;
 
 	hdlr->cyh_func = profile_fire;
 	hdlr->cyh_arg = pcpu;
 
 	when->cyt_interval = prof->prof_interval;
 	when->cyt_when = gethrtime() + when->cyt_interval;
 
 	pcpu->profc_expected = when->cyt_when;
 	pcpu->profc_interval = when->cyt_interval;
 }
 
 /*ARGSUSED*/
 static void
 profile_offline(void *arg, cpu_t *cpu, void *oarg)
 {
 	profile_probe_percpu_t *pcpu = oarg;
 
 	ASSERT(pcpu->profc_probe == arg);
 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
 }
 
 /* ARGSUSED */
 static void
 profile_enable(void *arg, dtrace_id_t id, void *parg)
 {
 	profile_probe_t *prof = parg;
 	cyc_omni_handler_t omni;
 	cyc_handler_t hdlr;
 	cyc_time_t when;
 
 	ASSERT(prof->prof_interval != 0);
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	if (prof->prof_kind == PROF_TICK) {
 		hdlr.cyh_func = profile_tick;
 		hdlr.cyh_arg = prof;
 
 		when.cyt_interval = prof->prof_interval;
 		when.cyt_when = gethrtime() + when.cyt_interval;
 	} else {
 		ASSERT(prof->prof_kind == PROF_PROFILE);
 		omni.cyo_online = profile_online;
 		omni.cyo_offline = profile_offline;
 		omni.cyo_arg = prof;
 	}
 
 	if (prof->prof_kind == PROF_TICK) {
 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
 	} else {
 		prof->prof_cyclic = cyclic_add_omni(&omni);
 	}
 }
 
 /* ARGSUSED */
 static void
 profile_disable(void *arg, dtrace_id_t id, void *parg)
 {
 	profile_probe_t *prof = parg;
 
 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	cyclic_remove(prof->prof_cyclic);
 	prof->prof_cyclic = CYCLIC_NONE;
 }
 
 #else
 
 static void
 profile_enable_omni(profile_probe_t *prof)
 {
 	profile_probe_percpu_t *pcpu;
 	int cpu;
 
 	prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP);
 	CPU_FOREACH(cpu) {
 		pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP);
 		prof->prof_pcpus[cpu] = pcpu;
 		pcpu->profc_probe = prof;
 		pcpu->profc_expected = sbinuptime() + prof->prof_interval;
 		pcpu->profc_interval = prof->prof_interval;
 		callout_init(&pcpu->profc_cyclic, 1);
 		callout_reset_sbt_on(&pcpu->profc_cyclic,
 		    pcpu->profc_expected, 0, profile_fire, pcpu,
 		    cpu, C_DIRECT_EXEC | C_ABSOLUTE);
 	}
 }
 
 static void
 profile_disable_omni(profile_probe_t *prof)
 {
 	profile_probe_percpu_t *pcpu;
 	int cpu;
 
 	ASSERT(prof->prof_pcpus != NULL);
 	CPU_FOREACH(cpu) {
 		pcpu = prof->prof_pcpus[cpu];
 		ASSERT(pcpu->profc_probe == prof);
 		ASSERT(callout_active(&pcpu->profc_cyclic));
 		callout_stop(&pcpu->profc_cyclic);
 		callout_drain(&pcpu->profc_cyclic);
 		kmem_free(pcpu, sizeof(profile_probe_percpu_t));
 	}
 	kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu));
 	prof->prof_pcpus = NULL;
 }
 
 /* ARGSUSED */
 static void
 profile_enable(void *arg, dtrace_id_t id, void *parg)
 {
 	profile_probe_t *prof = parg;
 
 	if (prof->prof_kind == PROF_TICK) {
 		prof->prof_expected = sbinuptime() + prof->prof_interval;
 		callout_reset_sbt(&prof->prof_cyclic,
 		    prof->prof_expected, 0, profile_tick, prof,
 		    C_DIRECT_EXEC | C_ABSOLUTE);
 	} else {
 		ASSERT(prof->prof_kind == PROF_PROFILE);
 		profile_enable_omni(prof);
 	}
 }
 
 /* ARGSUSED */
 static void
 profile_disable(void *arg, dtrace_id_t id, void *parg)
 {
 	profile_probe_t *prof = parg;
 
 	if (prof->prof_kind == PROF_TICK) {
 		ASSERT(callout_active(&prof->prof_cyclic));
 		callout_stop(&prof->prof_cyclic);
 		callout_drain(&prof->prof_cyclic);
 	} else {
 		ASSERT(prof->prof_kind == PROF_PROFILE);
 		profile_disable_omni(prof);
 	}
 }
 #endif
 
 static void
 profile_load(void *dummy)
 {
 	/* Create the /dev/dtrace/profile entry. */
 	profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
 	    "dtrace/profile");
 
 	if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
 	    NULL, &profile_pops, NULL, &profile_id) != 0)
 		return;
 }
 
 
 static int
 profile_unload()
 {
 	int error = 0;
 
 	if ((error = dtrace_unregister(profile_id)) != 0)
 		return (error);
 
 	destroy_dev(profile_cdev);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 profile_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		break;
 
 	case MOD_SHUTDOWN:
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
 {
 	return (0);
 }
 
 SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
 SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
 
 DEV_MODULE(profile, profile_modevent, NULL);
 MODULE_VERSION(profile, 1);
 MODULE_DEPEND(profile, dtrace, 1, 1, 1);
 MODULE_DEPEND(profile, opensolaris, 1, 1, 1);
Index: head/sys/compat/linuxkpi/common/src/linux_page.c
===================================================================
--- head/sys/compat/linuxkpi/common/src/linux_page.c	(revision 322167)
+++ head/sys/compat/linuxkpi/common/src/linux_page.c	(revision 322168)
@@ -1,291 +1,291 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org)
  * Copyright (c) 2017 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 
 #include <machine/bus.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
 
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
 
-#if defined(__amd64__) || defined(__aarch64__) || defined(__riscv__)
+#if defined(__amd64__) || defined(__aarch64__) || defined(__riscv)
 #define	LINUXKPI_HAVE_DMAP
 #else
 #undef	LINUXKPI_HAVE_DMAP
 #endif
 
 void *
 linux_page_address(struct page *page)
 {
 
 	if (page->object != kmem_object && page->object != kernel_object) {
 #ifdef LINUXKPI_HAVE_DMAP
 		return ((void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page)));
 #else
 		return (NULL);
 #endif
 	}
 	return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS +
 	    IDX_TO_OFF(page->pindex)));
 }
 
 vm_page_t
 linux_alloc_pages(gfp_t flags, unsigned int order)
 {
 #ifdef LINUXKPI_HAVE_DMAP
 	unsigned long npages = 1UL << order;
 	int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL);
 	vm_page_t page;
 
 	if (order == 0 && (flags & GFP_DMA32) == 0) {
 		page = vm_page_alloc(NULL, 0, req);
 		if (page == NULL)
 			return (NULL);
 	} else {
 		vm_paddr_t pmax = (flags & GFP_DMA32) ?
 		    BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR;
 retry:
 		page = vm_page_alloc_contig(NULL, 0, req,
 		    npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
 
 		if (page == NULL) {
 			if (flags & M_WAITOK) {
 				if (!vm_page_reclaim_contig(req,
 				    npages, 0, pmax, PAGE_SIZE, 0)) {
 					VM_WAIT;
 				}
 				flags &= ~M_WAITOK;
 				goto retry;
 			}
 			return (NULL);
 		}
 	}
 	if (flags & M_ZERO) {
 		unsigned long x;
 
 		for (x = 0; x != npages; x++) {
 			vm_page_t pgo = page + x;
 
 			if ((pgo->flags & PG_ZERO) == 0)
 				pmap_zero_page(pgo);
 		}
 	}
 #else
 	vm_offset_t vaddr;
 	vm_page_t page;
 
 	vaddr = linux_alloc_kmem(flags, order);
 	if (vaddr == 0)
 		return (NULL);
 
 	page = PHYS_TO_VM_PAGE(vtophys((void *)vaddr));
 
 	KASSERT(vaddr == (vm_offset_t)page_address(page),
 	    ("Page address mismatch"));
 #endif
 	return (page);
 }
 
 void
 linux_free_pages(vm_page_t page, unsigned int order)
 {
 #ifdef LINUXKPI_HAVE_DMAP
 	unsigned long npages = 1UL << order;
 	unsigned long x;
 
 	for (x = 0; x != npages; x++) {
 		vm_page_t pgo = page + x;
 
 		vm_page_lock(pgo);
 		vm_page_free(pgo);
 		vm_page_unlock(pgo);
 	}
 #else
 	vm_offset_t vaddr;
 
 	vaddr = (vm_offset_t)page_address(page);
 
 	linux_free_kmem(vaddr, order);
 #endif
 }
 
 vm_offset_t
 linux_alloc_kmem(gfp_t flags, unsigned int order)
 {
 	size_t size = ((size_t)PAGE_SIZE) << order;
 	vm_offset_t addr;
 
 	if ((flags & GFP_DMA32) == 0) {
 		addr = kmem_malloc(kmem_arena, size, flags & GFP_NATIVE_MASK);
 	} else {
 		addr = kmem_alloc_contig(kmem_arena, size,
 		    flags & GFP_NATIVE_MASK, 0, BUS_SPACE_MAXADDR_32BIT,
 		    PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
 	}
 	return (addr);
 }
 
 void
 linux_free_kmem(vm_offset_t addr, unsigned int order)
 {
 	size_t size = ((size_t)PAGE_SIZE) << order;
 
 	kmem_free(kmem_arena, addr, size);
 }
 
 static int
 linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages,
     int write, struct page **pages)
 {
 	vm_prot_t prot;
 	size_t len;
 	int count;
 	int i;
 
 	prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
 	len = ((size_t)nr_pages) << PAGE_SHIFT;
 	count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages);
 	if (count == -1)
 		return (-EFAULT);
 
 	for (i = 0; i != nr_pages; i++) {
 		struct page *pg = pages[i];
 
 		vm_page_lock(pg);
 		vm_page_wire(pg);
 		vm_page_unlock(pg);
 	}
 	return (nr_pages);
 }
 
 int
 __get_user_pages_fast(unsigned long start, int nr_pages, int write,
     struct page **pages)
 {
 	vm_map_t map;
 	vm_page_t *mp;
 	vm_offset_t va;
 	vm_offset_t end;
 	vm_prot_t prot;
 	int count;
 
 	if (nr_pages == 0 || in_interrupt())
 		return (0);
 
 	MPASS(pages != NULL);
 	va = start;
 	map = &curthread->td_proc->p_vmspace->vm_map;
 	end = start + (((size_t)nr_pages) << PAGE_SHIFT);
 	if (start < vm_map_min(map) || end > vm_map_max(map))
 		return (-EINVAL);
 	prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
 	for (count = 0, mp = pages, va = start; va < end;
 	    mp++, va += PAGE_SIZE, count++) {
 		*mp = pmap_extract_and_hold(map->pmap, va, prot);
 		if (*mp == NULL)
 			break;
 
 		vm_page_lock(*mp);
 		vm_page_wire(*mp);
 		vm_page_unlock(*mp);
 
 		if ((prot & VM_PROT_WRITE) != 0 &&
 		    (*mp)->dirty != VM_PAGE_BITS_ALL) {
 			/*
 			 * Explicitly dirty the physical page.  Otherwise, the
 			 * caller's changes may go unnoticed because they are
 			 * performed through an unmanaged mapping or by a DMA
 			 * operation.
 			 *
 			 * The object lock is not held here.
 			 * See vm_page_clear_dirty_mask().
 			 */
 			vm_page_dirty(*mp);
 		}
 	}
 	return (count);
 }
 
 long
 get_user_pages_remote(struct task_struct *task, struct mm_struct *mm,
     unsigned long start, unsigned long nr_pages, int gup_flags,
     struct page **pages, struct vm_area_struct **vmas)
 {
 	vm_map_t map;
 
 	map = &task->task_thread->td_proc->p_vmspace->vm_map;
 	return (linux_get_user_pages_internal(map, start, nr_pages,
 	    !!(gup_flags & FOLL_WRITE), pages));
 }
 
 long
 get_user_pages(unsigned long start, unsigned long nr_pages, int gup_flags,
     struct page **pages, struct vm_area_struct **vmas)
 {
 	vm_map_t map;
 
 	map = &curthread->td_proc->p_vmspace->vm_map;
 	return (linux_get_user_pages_internal(map, start, nr_pages,
 	    !!(gup_flags & FOLL_WRITE), pages));
 }
 
 int
 is_vmalloc_addr(const void *addr)
 {
 	return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL);
 }
Index: head/sys/dev/sym/sym_hipd.c
===================================================================
--- head/sys/dev/sym/sym_hipd.c	(revision 322167)
+++ head/sys/dev/sym/sym_hipd.c	(revision 322168)
@@ -1,9620 +1,9620 @@
 /*-
  *  Device driver optimized for the Symbios/LSI 53C896/53C895A/53C1010
  *  PCI-SCSI controllers.
  *
  *  Copyright (C) 1999-2001  Gerard Roudier <groudier@free.fr>
  *
  *  This driver also supports the following Symbios/LSI PCI-SCSI chips:
  *	53C810A, 53C825A, 53C860, 53C875, 53C876, 53C885, 53C895,
  *	53C810,  53C815,  53C825 and the 53C1510D is 53C8XX mode.
  *
  *
  *  This driver for FreeBSD-CAM is derived from the Linux sym53c8xx driver.
  *  Copyright (C) 1998-1999  Gerard Roudier
  *
  *  The sym53c8xx driver is derived from the ncr53c8xx driver that had been
  *  a port of the FreeBSD ncr driver to Linux-1.2.13.
  *
  *  The original ncr driver has been written for 386bsd and FreeBSD by
  *          Wolfgang Stanglmeier        <wolf@cologne.de>
  *          Stefan Esser                <se@mi.Uni-Koeln.de>
  *  Copyright (C) 1994  Wolfgang Stanglmeier
  *
  *  The initialisation code, and part of the code that addresses
  *  FreeBSD-CAM services is based on the aic7xxx driver for FreeBSD-CAM
  *  written by Justin T. Gibbs.
  *
  *  Other major contributions:
  *
  *  NVRAM detection and reading.
  *  Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk>
  *
  *-----------------------------------------------------------------------------
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define SYM_DRIVER_NAME	"sym-1.6.5-20000902"
 
 /* #define SYM_DEBUG_GENERIC_SUPPORT */
 
 #include <sys/param.h>
 
 /*
  *  Driver configuration options.
  */
 #include "opt_sym.h"
 #include <dev/sym/sym_conf.h>
 
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 
 #include <sys/proc.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/atomic.h>
 
 #ifdef __sparc64__
 #include <dev/ofw/openfirm.h>
 #include <machine/ofw_machdep.h>
 #endif
 
 #include <sys/rman.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_debug.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 /* Short and quite clear integer types */
 typedef int8_t    s8;
 typedef int16_t   s16;
 typedef	int32_t   s32;
 typedef u_int8_t  u8;
 typedef u_int16_t u16;
 typedef	u_int32_t u32;
 
 /*
  *  Driver definitions.
  */
 #include <dev/sym/sym_defs.h>
 #include <dev/sym/sym_fw.h>
 
 /*
  *  IA32 architecture does not reorder STORES and prevents
  *  LOADS from passing STORES. It is called `program order'
  *  by Intel and allows device drivers to deal with memory
  *  ordering by only ensuring that the code is not reordered
  *  by the compiler when ordering is required.
  *  Other architectures implement a weaker ordering that
  *  requires memory barriers (and also IO barriers when they
  *  make sense) to be used.
  */
 #if	defined	__i386__ || defined __amd64__
 #define MEMORY_BARRIER()	do { ; } while(0)
 #elif	defined	__powerpc__
 #define MEMORY_BARRIER()	__asm__ volatile("eieio; sync" : : : "memory")
 #elif	defined	__sparc64__
 #define MEMORY_BARRIER()	__asm__ volatile("membar #Sync" : : : "memory")
 #elif	defined	__arm__
 #define MEMORY_BARRIER()	dmb()
 #elif	defined	__aarch64__
 #define MEMORY_BARRIER()	dmb(sy)
-#elif	defined __riscv__
+#elif	defined __riscv
 #define MEMORY_BARRIER()	fence()
 #else
 #error	"Not supported platform"
 #endif
 
 /*
  *  A la VMS/CAM-3 queue management.
  */
 typedef struct sym_quehead {
 	struct sym_quehead *flink;	/* Forward  pointer */
 	struct sym_quehead *blink;	/* Backward pointer */
 } SYM_QUEHEAD;
 
 #define sym_que_init(ptr) do { \
 	(ptr)->flink = (ptr); (ptr)->blink = (ptr); \
 } while (0)
 
 static __inline void __sym_que_add(struct sym_quehead * new,
 	struct sym_quehead * blink,
 	struct sym_quehead * flink)
 {
 	flink->blink	= new;
 	new->flink	= flink;
 	new->blink	= blink;
 	blink->flink	= new;
 }
 
 static __inline void __sym_que_del(struct sym_quehead * blink,
 	struct sym_quehead * flink)
 {
 	flink->blink = blink;
 	blink->flink = flink;
 }
 
 static __inline int sym_que_empty(struct sym_quehead *head)
 {
 	return head->flink == head;
 }
 
 static __inline void sym_que_splice(struct sym_quehead *list,
 	struct sym_quehead *head)
 {
 	struct sym_quehead *first = list->flink;
 
 	if (first != list) {
 		struct sym_quehead *last = list->blink;
 		struct sym_quehead *at   = head->flink;
 
 		first->blink = head;
 		head->flink  = first;
 
 		last->flink = at;
 		at->blink   = last;
 	}
 }
 
 #define sym_que_entry(ptr, type, member) \
 	((type *)((char *)(ptr)-(size_t)(&((type *)0)->member)))
 
 #define sym_insque(new, pos)		__sym_que_add(new, pos, (pos)->flink)
 
 #define sym_remque(el)			__sym_que_del((el)->blink, (el)->flink)
 
 #define sym_insque_head(new, head)	__sym_que_add(new, head, (head)->flink)
 
 static __inline struct sym_quehead *sym_remque_head(struct sym_quehead *head)
 {
 	struct sym_quehead *elem = head->flink;
 
 	if (elem != head)
 		__sym_que_del(head, elem->flink);
 	else
 		elem = NULL;
 	return elem;
 }
 
 #define sym_insque_tail(new, head)	__sym_que_add(new, (head)->blink, head)
 
 /*
  *  This one may be useful.
  */
 #define FOR_EACH_QUEUED_ELEMENT(head, qp) \
 	for (qp = (head)->flink; qp != (head); qp = qp->flink)
 /*
  *  FreeBSD does not offer our kind of queue in the CAM CCB.
  *  So, we have to cast.
  */
 #define sym_qptr(p)	((struct sym_quehead *) (p))
 
 /*
  *  Simple bitmap operations.
  */
 #define sym_set_bit(p, n)	(((u32 *)(p))[(n)>>5] |=  (1<<((n)&0x1f)))
 #define sym_clr_bit(p, n)	(((u32 *)(p))[(n)>>5] &= ~(1<<((n)&0x1f)))
 #define sym_is_bit(p, n)	(((u32 *)(p))[(n)>>5] &   (1<<((n)&0x1f)))
 
 /*
  *  Number of tasks per device we want to handle.
  */
 #if	SYM_CONF_MAX_TAG_ORDER > 8
 #error	"more than 256 tags per logical unit not allowed."
 #endif
 #define	SYM_CONF_MAX_TASK	(1<<SYM_CONF_MAX_TAG_ORDER)
 
 /*
  *  Donnot use more tasks that we can handle.
  */
 #ifndef	SYM_CONF_MAX_TAG
 #define	SYM_CONF_MAX_TAG	SYM_CONF_MAX_TASK
 #endif
 #if	SYM_CONF_MAX_TAG > SYM_CONF_MAX_TASK
 #undef	SYM_CONF_MAX_TAG
 #define	SYM_CONF_MAX_TAG	SYM_CONF_MAX_TASK
 #endif
 
 /*
  *    This one means 'NO TAG for this job'
  */
 #define NO_TAG	(256)
 
 /*
  *  Number of SCSI targets.
  */
 #if	SYM_CONF_MAX_TARGET > 16
 #error	"more than 16 targets not allowed."
 #endif
 
 /*
  *  Number of logical units per target.
  */
 #if	SYM_CONF_MAX_LUN > 64
 #error	"more than 64 logical units per target not allowed."
 #endif
 
 /*
  *    Asynchronous pre-scaler (ns). Shall be 40 for
  *    the SCSI timings to be compliant.
  */
 #define	SYM_CONF_MIN_ASYNC (40)
 
 /*
  *  Number of entries in the START and DONE queues.
  *
  *  We limit to 1 PAGE in order to succeed allocation of
  *  these queues. Each entry is 8 bytes long (2 DWORDS).
  */
 #ifdef	SYM_CONF_MAX_START
 #define	SYM_CONF_MAX_QUEUE (SYM_CONF_MAX_START+2)
 #else
 #define	SYM_CONF_MAX_QUEUE (7*SYM_CONF_MAX_TASK+2)
 #define	SYM_CONF_MAX_START (SYM_CONF_MAX_QUEUE-2)
 #endif
 
 #if	SYM_CONF_MAX_QUEUE > PAGE_SIZE/8
 #undef	SYM_CONF_MAX_QUEUE
 #define	SYM_CONF_MAX_QUEUE   PAGE_SIZE/8
 #undef	SYM_CONF_MAX_START
 #define	SYM_CONF_MAX_START (SYM_CONF_MAX_QUEUE-2)
 #endif
 
 /*
  *  For this one, we want a short name :-)
  */
 #define MAX_QUEUE	SYM_CONF_MAX_QUEUE
 
 /*
  *  Active debugging tags and verbosity.
  */
 #define DEBUG_ALLOC	(0x0001)
 #define DEBUG_PHASE	(0x0002)
 #define DEBUG_POLL	(0x0004)
 #define DEBUG_QUEUE	(0x0008)
 #define DEBUG_RESULT	(0x0010)
 #define DEBUG_SCATTER	(0x0020)
 #define DEBUG_SCRIPT	(0x0040)
 #define DEBUG_TINY	(0x0080)
 #define DEBUG_TIMING	(0x0100)
 #define DEBUG_NEGO	(0x0200)
 #define DEBUG_TAGS	(0x0400)
 #define DEBUG_POINTER	(0x0800)
 
 #if 0
 static int sym_debug = 0;
 	#define DEBUG_FLAGS sym_debug
 #else
 /*	#define DEBUG_FLAGS (0x0631) */
 	#define DEBUG_FLAGS (0x0000)
 
 #endif
 #define sym_verbose	(np->verbose)
 
 /*
  *  Insert a delay in micro-seconds and milli-seconds.
  */
 static void UDELAY(int us) { DELAY(us); }
 static void MDELAY(int ms) { while (ms--) UDELAY(1000); }
 
 /*
  *  Simple power of two buddy-like allocator.
  *
  *  This simple code is not intended to be fast, but to
  *  provide power of 2 aligned memory allocations.
  *  Since the SCRIPTS processor only supplies 8 bit arithmetic,
  *  this allocator allows simple and fast address calculations
  *  from the SCRIPTS code. In addition, cache line alignment
  *  is guaranteed for power of 2 cache line size.
  *
  *  This allocator has been developed for the Linux sym53c8xx
  *  driver, since this O/S does not provide naturally aligned
  *  allocations.
  *  It has the advantage of allowing the driver to use private
  *  pages of memory that will be useful if we ever need to deal
  *  with IO MMUs for PCI.
  */
 #define MEMO_SHIFT	4	/* 16 bytes minimum memory chunk */
 #define MEMO_PAGE_ORDER	0	/* 1 PAGE  maximum */
 #if 0
 #define MEMO_FREE_UNUSED	/* Free unused pages immediately */
 #endif
 #define MEMO_WARN	1
 #define MEMO_CLUSTER_SHIFT	(PAGE_SHIFT+MEMO_PAGE_ORDER)
 #define MEMO_CLUSTER_SIZE	(1UL << MEMO_CLUSTER_SHIFT)
 #define MEMO_CLUSTER_MASK	(MEMO_CLUSTER_SIZE-1)
 
 #define get_pages()		malloc(MEMO_CLUSTER_SIZE, M_DEVBUF, M_NOWAIT)
 #define free_pages(p)		free((p), M_DEVBUF)
 
 typedef u_long m_addr_t;	/* Enough bits to bit-hack addresses */
 
 typedef struct m_link {		/* Link between free memory chunks */
 	struct m_link *next;
 } m_link_s;
 
 typedef struct m_vtob {		/* Virtual to Bus address translation */
 	struct m_vtob	*next;
 	bus_dmamap_t	dmamap;	/* Map for this chunk */
 	m_addr_t	vaddr;	/* Virtual address */
 	m_addr_t	baddr;	/* Bus physical address */
 } m_vtob_s;
 /* Hash this stuff a bit to speed up translations */
 #define VTOB_HASH_SHIFT		5
 #define VTOB_HASH_SIZE		(1UL << VTOB_HASH_SHIFT)
 #define VTOB_HASH_MASK		(VTOB_HASH_SIZE-1)
 #define VTOB_HASH_CODE(m)	\
 	((((m_addr_t) (m)) >> MEMO_CLUSTER_SHIFT) & VTOB_HASH_MASK)
 
 typedef struct m_pool {		/* Memory pool of a given kind */
 	bus_dma_tag_t	 dev_dmat;	/* Identifies the pool */
 	bus_dma_tag_t	 dmat;		/* Tag for our fixed allocations */
 	m_addr_t (*getp)(struct m_pool *);
 #ifdef	MEMO_FREE_UNUSED
 	void (*freep)(struct m_pool *, m_addr_t);
 #endif
 #define M_GETP()		mp->getp(mp)
 #define M_FREEP(p)		mp->freep(mp, p)
 	int nump;
 	m_vtob_s *(vtob[VTOB_HASH_SIZE]);
 	struct m_pool *next;
 	struct m_link h[MEMO_CLUSTER_SHIFT - MEMO_SHIFT + 1];
 } m_pool_s;
 
 static void *___sym_malloc(m_pool_s *mp, int size)
 {
 	int i = 0;
 	int s = (1 << MEMO_SHIFT);
 	int j;
 	m_addr_t a;
 	m_link_s *h = mp->h;
 
 	if (size > MEMO_CLUSTER_SIZE)
 		return NULL;
 
 	while (size > s) {
 		s <<= 1;
 		++i;
 	}
 
 	j = i;
 	while (!h[j].next) {
 		if (s == MEMO_CLUSTER_SIZE) {
 			h[j].next = (m_link_s *) M_GETP();
 			if (h[j].next)
 				h[j].next->next = NULL;
 			break;
 		}
 		++j;
 		s <<= 1;
 	}
 	a = (m_addr_t) h[j].next;
 	if (a) {
 		h[j].next = h[j].next->next;
 		while (j > i) {
 			j -= 1;
 			s >>= 1;
 			h[j].next = (m_link_s *) (a+s);
 			h[j].next->next = NULL;
 		}
 	}
 #ifdef DEBUG
 	printf("___sym_malloc(%d) = %p\n", size, (void *) a);
 #endif
 	return (void *) a;
 }
 
 static void ___sym_mfree(m_pool_s *mp, void *ptr, int size)
 {
 	int i = 0;
 	int s = (1 << MEMO_SHIFT);
 	m_link_s *q;
 	m_addr_t a, b;
 	m_link_s *h = mp->h;
 
 #ifdef DEBUG
 	printf("___sym_mfree(%p, %d)\n", ptr, size);
 #endif
 
 	if (size > MEMO_CLUSTER_SIZE)
 		return;
 
 	while (size > s) {
 		s <<= 1;
 		++i;
 	}
 
 	a = (m_addr_t) ptr;
 
 	while (1) {
 #ifdef MEMO_FREE_UNUSED
 		if (s == MEMO_CLUSTER_SIZE) {
 			M_FREEP(a);
 			break;
 		}
 #endif
 		b = a ^ s;
 		q = &h[i];
 		while (q->next && q->next != (m_link_s *) b) {
 			q = q->next;
 		}
 		if (!q->next) {
 			((m_link_s *) a)->next = h[i].next;
 			h[i].next = (m_link_s *) a;
 			break;
 		}
 		q->next = q->next->next;
 		a = a & b;
 		s <<= 1;
 		++i;
 	}
 }
 
 static void *__sym_calloc2(m_pool_s *mp, int size, char *name, int uflags)
 {
 	void *p;
 
 	p = ___sym_malloc(mp, size);
 
 	if (DEBUG_FLAGS & DEBUG_ALLOC)
 		printf ("new %-10s[%4d] @%p.\n", name, size, p);
 
 	if (p)
 		bzero(p, size);
 	else if (uflags & MEMO_WARN)
 		printf ("__sym_calloc2: failed to allocate %s[%d]\n", name, size);
 
 	return p;
 }
 
 #define __sym_calloc(mp, s, n)	__sym_calloc2(mp, s, n, MEMO_WARN)
 
 static void __sym_mfree(m_pool_s *mp, void *ptr, int size, char *name)
 {
 	if (DEBUG_FLAGS & DEBUG_ALLOC)
 		printf ("freeing %-10s[%4d] @%p.\n", name, size, ptr);
 
 	___sym_mfree(mp, ptr, size);
 
 }
 
 /*
  * Default memory pool we donnot need to involve in DMA.
  */
 /*
  * With the `bus dma abstraction', we use a separate pool for
  * memory we donnot need to involve in DMA.
  */
 static m_addr_t ___mp0_getp(m_pool_s *mp)
 {
 	m_addr_t m = (m_addr_t) get_pages();
 	if (m)
 		++mp->nump;
 	return m;
 }
 
 #ifdef	MEMO_FREE_UNUSED
 static void ___mp0_freep(m_pool_s *mp, m_addr_t m)
 {
 	free_pages(m);
 	--mp->nump;
 }
 #endif
 
 #ifdef	MEMO_FREE_UNUSED
 static m_pool_s mp0 = {0, 0, ___mp0_getp, ___mp0_freep};
 #else
 static m_pool_s mp0 = {0, 0, ___mp0_getp};
 #endif
 
 /*
  * Actual memory allocation routine for non-DMAed memory.
  */
 static void *sym_calloc(int size, char *name)
 {
 	void *m;
 	/* Lock */
 	m = __sym_calloc(&mp0, size, name);
 	/* Unlock */
 	return m;
 }
 
 /*
  * Actual memory allocation routine for non-DMAed memory.
  */
 static void sym_mfree(void *ptr, int size, char *name)
 {
 	/* Lock */
 	__sym_mfree(&mp0, ptr, size, name);
 	/* Unlock */
 }
 
 /*
  * DMAable pools.
  */
 /*
  * With `bus dma abstraction', we use a separate pool per parent
  * BUS handle. A reverse table (hashed) is maintained for virtual
  * to BUS address translation.
  */
 static void getbaddrcb(void *arg, bus_dma_segment_t *segs, int nseg __unused,
     int error)
 {
 	bus_addr_t *baddr;
 
 	KASSERT(nseg == 1, ("%s: too many DMA segments (%d)", __func__, nseg));
 
 	baddr = (bus_addr_t *)arg;
 	if (error)
 		*baddr = 0;
 	else
 		*baddr = segs->ds_addr;
 }
 
 static m_addr_t ___dma_getp(m_pool_s *mp)
 {
 	m_vtob_s *vbp;
 	void *vaddr = NULL;
 	bus_addr_t baddr = 0;
 
 	vbp = __sym_calloc(&mp0, sizeof(*vbp), "VTOB");
 	if (!vbp)
 		goto out_err;
 
 	if (bus_dmamem_alloc(mp->dmat, &vaddr,
 			BUS_DMA_COHERENT | BUS_DMA_WAITOK, &vbp->dmamap))
 		goto out_err;
 	bus_dmamap_load(mp->dmat, vbp->dmamap, vaddr,
 			MEMO_CLUSTER_SIZE, getbaddrcb, &baddr, BUS_DMA_NOWAIT);
 	if (baddr) {
 		int hc = VTOB_HASH_CODE(vaddr);
 		vbp->vaddr = (m_addr_t) vaddr;
 		vbp->baddr = (m_addr_t) baddr;
 		vbp->next = mp->vtob[hc];
 		mp->vtob[hc] = vbp;
 		++mp->nump;
 		return (m_addr_t) vaddr;
 	}
 out_err:
 	if (baddr)
 		bus_dmamap_unload(mp->dmat, vbp->dmamap);
 	if (vaddr)
 		bus_dmamem_free(mp->dmat, vaddr, vbp->dmamap);
 	if (vbp)
 		__sym_mfree(&mp0, vbp, sizeof(*vbp), "VTOB");
 	return 0;
 }
 
 #ifdef	MEMO_FREE_UNUSED
 static void ___dma_freep(m_pool_s *mp, m_addr_t m)
 {
 	m_vtob_s **vbpp, *vbp;
 	int hc = VTOB_HASH_CODE(m);
 
 	vbpp = &mp->vtob[hc];
 	while (*vbpp && (*vbpp)->vaddr != m)
 		vbpp = &(*vbpp)->next;
 	if (*vbpp) {
 		vbp = *vbpp;
 		*vbpp = (*vbpp)->next;
 		bus_dmamap_unload(mp->dmat, vbp->dmamap);
 		bus_dmamem_free(mp->dmat, (void *) vbp->vaddr, vbp->dmamap);
 		__sym_mfree(&mp0, vbp, sizeof(*vbp), "VTOB");
 		--mp->nump;
 	}
 }
 #endif
 
 static __inline m_pool_s *___get_dma_pool(bus_dma_tag_t dev_dmat)
 {
 	m_pool_s *mp;
 	for (mp = mp0.next; mp && mp->dev_dmat != dev_dmat; mp = mp->next);
 	return mp;
 }
 
 static m_pool_s *___cre_dma_pool(bus_dma_tag_t dev_dmat)
 {
 	m_pool_s *mp = NULL;
 
 	mp = __sym_calloc(&mp0, sizeof(*mp), "MPOOL");
 	if (mp) {
 		mp->dev_dmat = dev_dmat;
 		if (!bus_dma_tag_create(dev_dmat, 1, MEMO_CLUSTER_SIZE,
 			       BUS_SPACE_MAXADDR_32BIT,
 			       BUS_SPACE_MAXADDR,
 			       NULL, NULL, MEMO_CLUSTER_SIZE, 1,
 			       MEMO_CLUSTER_SIZE, 0,
 			       NULL, NULL, &mp->dmat)) {
 			mp->getp = ___dma_getp;
 #ifdef	MEMO_FREE_UNUSED
 			mp->freep = ___dma_freep;
 #endif
 			mp->next = mp0.next;
 			mp0.next = mp;
 			return mp;
 		}
 	}
 	if (mp)
 		__sym_mfree(&mp0, mp, sizeof(*mp), "MPOOL");
 	return NULL;
 }
 
 #ifdef	MEMO_FREE_UNUSED
 static void ___del_dma_pool(m_pool_s *p)
 {
 	struct m_pool **pp = &mp0.next;
 
 	while (*pp && *pp != p)
 		pp = &(*pp)->next;
 	if (*pp) {
 		*pp = (*pp)->next;
 		bus_dma_tag_destroy(p->dmat);
 		__sym_mfree(&mp0, p, sizeof(*p), "MPOOL");
 	}
 }
 #endif
 
 static void *__sym_calloc_dma(bus_dma_tag_t dev_dmat, int size, char *name)
 {
 	struct m_pool *mp;
 	void *m = NULL;
 
 	/* Lock */
 	mp = ___get_dma_pool(dev_dmat);
 	if (!mp)
 		mp = ___cre_dma_pool(dev_dmat);
 	if (mp)
 		m = __sym_calloc(mp, size, name);
 #ifdef	MEMO_FREE_UNUSED
 	if (mp && !mp->nump)
 		___del_dma_pool(mp);
 #endif
 	/* Unlock */
 
 	return m;
 }
 
 static void
 __sym_mfree_dma(bus_dma_tag_t dev_dmat, void *m, int size, char *name)
 {
 	struct m_pool *mp;
 
 	/* Lock */
 	mp = ___get_dma_pool(dev_dmat);
 	if (mp)
 		__sym_mfree(mp, m, size, name);
 #ifdef	MEMO_FREE_UNUSED
 	if (mp && !mp->nump)
 		___del_dma_pool(mp);
 #endif
 	/* Unlock */
 }
 
 static m_addr_t __vtobus(bus_dma_tag_t dev_dmat, void *m)
 {
 	m_pool_s *mp;
 	int hc = VTOB_HASH_CODE(m);
 	m_vtob_s *vp = NULL;
 	m_addr_t a = ((m_addr_t) m) & ~MEMO_CLUSTER_MASK;
 
 	/* Lock */
 	mp = ___get_dma_pool(dev_dmat);
 	if (mp) {
 		vp = mp->vtob[hc];
 		while (vp && (m_addr_t) vp->vaddr != a)
 			vp = vp->next;
 	}
 	/* Unlock */
 	if (!vp)
 		panic("sym: VTOBUS FAILED!\n");
 	return vp ? vp->baddr + (((m_addr_t) m) - a) : 0;
 }
 
 /*
  * Verbs for DMAable memory handling.
  * The _uvptv_ macro avoids a nasty warning about pointer to volatile
  * being discarded.
  */
 #define _uvptv_(p) ((void *)((vm_offset_t)(p)))
 #define _sym_calloc_dma(np, s, n)	__sym_calloc_dma(np->bus_dmat, s, n)
 #define _sym_mfree_dma(np, p, s, n)	\
 				__sym_mfree_dma(np->bus_dmat, _uvptv_(p), s, n)
 #define sym_calloc_dma(s, n)		_sym_calloc_dma(np, s, n)
 #define sym_mfree_dma(p, s, n)		_sym_mfree_dma(np, p, s, n)
 #define _vtobus(np, p)			__vtobus(np->bus_dmat, _uvptv_(p))
 #define vtobus(p)			_vtobus(np, p)
 
 /*
  *  Print a buffer in hexadecimal format.
  */
 static void sym_printb_hex (u_char *p, int n)
 {
 	while (n-- > 0)
 		printf (" %x", *p++);
 }
 
 /*
  *  Same with a label at beginning and .\n at end.
  */
 static void sym_printl_hex (char *label, u_char *p, int n)
 {
 	printf ("%s", label);
 	sym_printb_hex (p, n);
 	printf (".\n");
 }
 
 /*
  *  Return a string for SCSI BUS mode.
  */
 static const char *sym_scsi_bus_mode(int mode)
 {
 	switch(mode) {
 	case SMODE_HVD:	return "HVD";
 	case SMODE_SE:	return "SE";
 	case SMODE_LVD: return "LVD";
 	}
 	return "??";
 }
 
 /*
  *  Some poor and bogus sync table that refers to Tekram NVRAM layout.
  */
 #ifdef SYM_CONF_NVRAM_SUPPORT
 static const u_char Tekram_sync[16] =
 	{25,31,37,43, 50,62,75,125, 12,15,18,21, 6,7,9,10};
 #endif
 
 /*
  *  Union of supported NVRAM formats.
  */
 struct sym_nvram {
 	int type;
 #define	SYM_SYMBIOS_NVRAM	(1)
 #define	SYM_TEKRAM_NVRAM	(2)
 #ifdef	SYM_CONF_NVRAM_SUPPORT
 	union {
 		Symbios_nvram Symbios;
 		Tekram_nvram Tekram;
 	} data;
 #endif
 };
 
 /*
  *  This one is hopefully useless, but actually useful. :-)
  */
 #ifndef assert
 #define	assert(expression) { \
 	if (!(expression)) { \
 		(void)panic( \
 			"assertion \"%s\" failed: file \"%s\", line %d\n", \
 			#expression, \
 			__FILE__, __LINE__); \
 	} \
 }
 #endif
 
 /*
  *  Some provision for a possible big endian mode supported by
  *  Symbios chips (never seen, by the way).
  *  For now, this stuff does not deserve any comments. :)
  */
 #define sym_offb(o)	(o)
 #define sym_offw(o)	(o)
 
 /*
  *  Some provision for support for BIG ENDIAN CPU.
  */
 #define cpu_to_scr(dw)	htole32(dw)
 #define scr_to_cpu(dw)	le32toh(dw)
 
 /*
  *  Access to the chip IO registers and on-chip RAM.
  *  We use the `bus space' interface under FreeBSD-4 and
  *  later kernel versions.
  */
 #if defined(SYM_CONF_IOMAPPED)
 
 #define INB_OFF(o)	bus_read_1(np->io_res, (o))
 #define INW_OFF(o)	bus_read_2(np->io_res, (o))
 #define INL_OFF(o)	bus_read_4(np->io_res, (o))
 
 #define OUTB_OFF(o, v)	bus_write_1(np->io_res, (o), (v))
 #define OUTW_OFF(o, v)	bus_write_2(np->io_res, (o), (v))
 #define OUTL_OFF(o, v)	bus_write_4(np->io_res, (o), (v))
 
 #else	/* Memory mapped IO */
 
 #define INB_OFF(o)	bus_read_1(np->mmio_res, (o))
 #define INW_OFF(o)	bus_read_2(np->mmio_res, (o))
 #define INL_OFF(o)	bus_read_4(np->mmio_res, (o))
 
 #define OUTB_OFF(o, v)	bus_write_1(np->mmio_res, (o), (v))
 #define OUTW_OFF(o, v)	bus_write_2(np->mmio_res, (o), (v))
 #define OUTL_OFF(o, v)	bus_write_4(np->mmio_res, (o), (v))
 
 #endif	/* SYM_CONF_IOMAPPED */
 
 #define OUTRAM_OFF(o, a, l)	\
 	bus_write_region_1(np->ram_res, (o), (a), (l))
 
 /*
  *  Common definitions for both bus space and legacy IO methods.
  */
 #define INB(r)		INB_OFF(offsetof(struct sym_reg,r))
 #define INW(r)		INW_OFF(offsetof(struct sym_reg,r))
 #define INL(r)		INL_OFF(offsetof(struct sym_reg,r))
 
 #define OUTB(r, v)	OUTB_OFF(offsetof(struct sym_reg,r), (v))
 #define OUTW(r, v)	OUTW_OFF(offsetof(struct sym_reg,r), (v))
 #define OUTL(r, v)	OUTL_OFF(offsetof(struct sym_reg,r), (v))
 
 #define OUTONB(r, m)	OUTB(r, INB(r) | (m))
 #define OUTOFFB(r, m)	OUTB(r, INB(r) & ~(m))
 #define OUTONW(r, m)	OUTW(r, INW(r) | (m))
 #define OUTOFFW(r, m)	OUTW(r, INW(r) & ~(m))
 #define OUTONL(r, m)	OUTL(r, INL(r) | (m))
 #define OUTOFFL(r, m)	OUTL(r, INL(r) & ~(m))
 
 /*
  *  We normally want the chip to have a consistent view
  *  of driver internal data structures when we restart it.
  *  Thus these macros.
  */
 #define OUTL_DSP(v)				\
 	do {					\
 		MEMORY_BARRIER();		\
 		OUTL (nc_dsp, (v));		\
 	} while (0)
 
 #define OUTONB_STD()				\
 	do {					\
 		MEMORY_BARRIER();		\
 		OUTONB (nc_dcntl, (STD|NOCOM));	\
 	} while (0)
 
 /*
  *  Command control block states.
  */
 #define HS_IDLE		(0)
 #define HS_BUSY		(1)
 #define HS_NEGOTIATE	(2)	/* sync/wide data transfer*/
 #define HS_DISCONNECT	(3)	/* Disconnected by target */
 #define HS_WAIT		(4)	/* waiting for resource	  */
 
 #define HS_DONEMASK	(0x80)
 #define HS_COMPLETE	(4|HS_DONEMASK)
 #define HS_SEL_TIMEOUT	(5|HS_DONEMASK)	/* Selection timeout      */
 #define HS_UNEXPECTED	(6|HS_DONEMASK)	/* Unexpected disconnect  */
 #define HS_COMP_ERR	(7|HS_DONEMASK)	/* Completed with error	  */
 
 /*
  *  Software Interrupt Codes
  */
 #define	SIR_BAD_SCSI_STATUS	(1)
 #define	SIR_SEL_ATN_NO_MSG_OUT	(2)
 #define	SIR_MSG_RECEIVED	(3)
 #define	SIR_MSG_WEIRD		(4)
 #define	SIR_NEGO_FAILED		(5)
 #define	SIR_NEGO_PROTO		(6)
 #define	SIR_SCRIPT_STOPPED	(7)
 #define	SIR_REJECT_TO_SEND	(8)
 #define	SIR_SWIDE_OVERRUN	(9)
 #define	SIR_SODL_UNDERRUN	(10)
 #define	SIR_RESEL_NO_MSG_IN	(11)
 #define	SIR_RESEL_NO_IDENTIFY	(12)
 #define	SIR_RESEL_BAD_LUN	(13)
 #define	SIR_TARGET_SELECTED	(14)
 #define	SIR_RESEL_BAD_I_T_L	(15)
 #define	SIR_RESEL_BAD_I_T_L_Q	(16)
 #define	SIR_ABORT_SENT		(17)
 #define	SIR_RESEL_ABORTED	(18)
 #define	SIR_MSG_OUT_DONE	(19)
 #define	SIR_COMPLETE_ERROR	(20)
 #define	SIR_DATA_OVERRUN	(21)
 #define	SIR_BAD_PHASE		(22)
 #define	SIR_MAX			(22)
 
 /*
  *  Extended error bit codes.
  *  xerr_status field of struct sym_ccb.
  */
 #define	XE_EXTRA_DATA	(1)	/* unexpected data phase	 */
 #define	XE_BAD_PHASE	(1<<1)	/* illegal phase (4/5)		 */
 #define	XE_PARITY_ERR	(1<<2)	/* unrecovered SCSI parity error */
 #define	XE_SODL_UNRUN	(1<<3)	/* ODD transfer in DATA OUT phase */
 #define	XE_SWIDE_OVRUN	(1<<4)	/* ODD transfer in DATA IN phase */
 
 /*
  *  Negotiation status.
  *  nego_status field of struct sym_ccb.
  */
 #define NS_SYNC		(1)
 #define NS_WIDE		(2)
 #define NS_PPR		(3)
 
 /*
  *  A CCB hashed table is used to retrieve CCB address
  *  from DSA value.
  */
 #define CCB_HASH_SHIFT		8
 #define CCB_HASH_SIZE		(1UL << CCB_HASH_SHIFT)
 #define CCB_HASH_MASK		(CCB_HASH_SIZE-1)
 #define CCB_HASH_CODE(dsa)	(((dsa) >> 9) & CCB_HASH_MASK)
 
 /*
  *  Device flags.
  */
 #define SYM_DISC_ENABLED	(1)
 #define SYM_TAGS_ENABLED	(1<<1)
 #define SYM_SCAN_BOOT_DISABLED	(1<<2)
 #define SYM_SCAN_LUNS_DISABLED	(1<<3)
 
 /*
  *  Host adapter miscellaneous flags.
  */
 #define SYM_AVOID_BUS_RESET	(1)
 #define SYM_SCAN_TARGETS_HILO	(1<<1)
 
 /*
  *  Device quirks.
  *  Some devices, for example the CHEETAH 2 LVD, disconnects without
  *  saving the DATA POINTER then reselects and terminates the IO.
  *  On reselection, the automatic RESTORE DATA POINTER makes the
  *  CURRENT DATA POINTER not point at the end of the IO.
  *  This behaviour just breaks our calculation of the residual.
  *  For now, we just force an AUTO SAVE on disconnection and will
  *  fix that in a further driver version.
  */
 #define SYM_QUIRK_AUTOSAVE 1
 
 /*
  *  Misc.
  */
 #define	SYM_LOCK()		mtx_lock(&np->mtx)
 #define	SYM_LOCK_ASSERT(_what)	mtx_assert(&np->mtx, (_what))
 #define	SYM_LOCK_DESTROY()	mtx_destroy(&np->mtx)
 #define	SYM_LOCK_INIT()		mtx_init(&np->mtx, "sym_lock", NULL, MTX_DEF)
 #define	SYM_LOCK_INITIALIZED()	mtx_initialized(&np->mtx)
 #define	SYM_UNLOCK()		mtx_unlock(&np->mtx)
 
 #define SYM_SNOOP_TIMEOUT (10000000)
 #define SYM_PCI_IO	PCIR_BAR(0)
 #define SYM_PCI_MMIO	PCIR_BAR(1)
 #define SYM_PCI_RAM	PCIR_BAR(2)
 #define SYM_PCI_RAM64	PCIR_BAR(3)
 
 /*
  *  Back-pointer from the CAM CCB to our data structures.
  */
 #define sym_hcb_ptr	spriv_ptr0
 /* #define sym_ccb_ptr	spriv_ptr1 */
 
 /*
  *  We mostly have to deal with pointers.
  *  Thus these typedef's.
  */
 typedef struct sym_tcb *tcb_p;
 typedef struct sym_lcb *lcb_p;
 typedef struct sym_ccb *ccb_p;
 typedef struct sym_hcb *hcb_p;
 
 /*
  *  Gather negotiable parameters value
  */
 struct sym_trans {
 	u8 scsi_version;
 	u8 spi_version;
 	u8 period;
 	u8 offset;
 	u8 width;
 	u8 options;	/* PPR options */
 };
 
 struct sym_tinfo {
 	struct sym_trans current;
 	struct sym_trans goal;
 	struct sym_trans user;
 };
 
 #define BUS_8_BIT	MSG_EXT_WDTR_BUS_8_BIT
 #define BUS_16_BIT	MSG_EXT_WDTR_BUS_16_BIT
 
 /*
  *  Global TCB HEADER.
  *
  *  Due to lack of indirect addressing on earlier NCR chips,
  *  this substructure is copied from the TCB to a global
  *  address after selection.
  *  For SYMBIOS chips that support LOAD/STORE this copy is
  *  not needed and thus not performed.
  */
 struct sym_tcbh {
 	/*
 	 *  Scripts bus addresses of LUN table accessed from scripts.
 	 *  LUN #0 is a special case, since multi-lun devices are rare,
 	 *  and we we want to speed-up the general case and not waste
 	 *  resources.
 	 */
 	u32	luntbl_sa;	/* bus address of this table	*/
 	u32	lun0_sa;	/* bus address of LCB #0	*/
 	/*
 	 *  Actual SYNC/WIDE IO registers value for this target.
 	 *  'sval', 'wval' and 'uval' are read from SCRIPTS and
 	 *  so have alignment constraints.
 	 */
 /*0*/	u_char	uval;		/* -> SCNTL4 register		*/
 /*1*/	u_char	sval;		/* -> SXFER  io register	*/
 /*2*/	u_char	filler1;
 /*3*/	u_char	wval;		/* -> SCNTL3 io register	*/
 };
 
 /*
  *  Target Control Block
  */
 struct sym_tcb {
 	/*
 	 *  TCB header.
 	 *  Assumed at offset 0.
 	 */
 /*0*/	struct sym_tcbh head;
 
 	/*
 	 *  LUN table used by the SCRIPTS processor.
 	 *  An array of bus addresses is used on reselection.
 	 */
 	u32	*luntbl;	/* LCBs bus address table	*/
 
 	/*
 	 *  LUN table used by the C code.
 	 */
 	lcb_p	lun0p;		/* LCB of LUN #0 (usual case)	*/
 #if SYM_CONF_MAX_LUN > 1
 	lcb_p	*lunmp;		/* Other LCBs [1..MAX_LUN]	*/
 #endif
 
 	/*
 	 *  Bitmap that tells about LUNs that succeeded at least
 	 *  1 IO and therefore assumed to be a real device.
 	 *  Avoid useless allocation of the LCB structure.
 	 */
 	u32	lun_map[(SYM_CONF_MAX_LUN+31)/32];
 
 	/*
 	 *  Bitmap that tells about LUNs that haven't yet an LCB
 	 *  allocated (not discovered or LCB allocation failed).
 	 */
 	u32	busy0_map[(SYM_CONF_MAX_LUN+31)/32];
 
 	/*
 	 *  Transfer capabilities (SIP)
 	 */
 	struct sym_tinfo tinfo;
 
 	/*
 	 * Keep track of the CCB used for the negotiation in order
 	 * to ensure that only 1 negotiation is queued at a time.
 	 */
 	ccb_p   nego_cp;	/* CCB used for the nego		*/
 
 	/*
 	 *  Set when we want to reset the device.
 	 */
 	u_char	to_reset;
 
 	/*
 	 *  Other user settable limits and options.
 	 *  These limits are read from the NVRAM if present.
 	 */
 	u_char	usrflags;
 	u_short	usrtags;
 };
 
 /*
  *  Assert some alignments required by the chip.
  */
 CTASSERT(((offsetof(struct sym_reg, nc_sxfer) ^
     offsetof(struct sym_tcb, head.sval)) &3) == 0);
 CTASSERT(((offsetof(struct sym_reg, nc_scntl3) ^
     offsetof(struct sym_tcb, head.wval)) &3) == 0);
 
 /*
  *  Global LCB HEADER.
  *
  *  Due to lack of indirect addressing on earlier NCR chips,
  *  this substructure is copied from the LCB to a global
  *  address after selection.
  *  For SYMBIOS chips that support LOAD/STORE this copy is
  *  not needed and thus not performed.
  */
 struct sym_lcbh {
 	/*
 	 *  SCRIPTS address jumped by SCRIPTS on reselection.
 	 *  For not probed logical units, this address points to
 	 *  SCRIPTS that deal with bad LU handling (must be at
 	 *  offset zero of the LCB for that reason).
 	 */
 /*0*/	u32	resel_sa;
 
 	/*
 	 *  Task (bus address of a CCB) read from SCRIPTS that points
 	 *  to the unique ITL nexus allowed to be disconnected.
 	 */
 	u32	itl_task_sa;
 
 	/*
 	 *  Task table bus address (read from SCRIPTS).
 	 */
 	u32	itlq_tbl_sa;
 };
 
 /*
  *  Logical Unit Control Block
  */
 struct sym_lcb {
 	/*
 	 *  TCB header.
 	 *  Assumed at offset 0.
 	 */
 /*0*/	struct sym_lcbh head;
 
 	/*
 	 *  Task table read from SCRIPTS that contains pointers to
 	 *  ITLQ nexuses. The bus address read from SCRIPTS is
 	 *  inside the header.
 	 */
 	u32	*itlq_tbl;	/* Kernel virtual address	*/
 
 	/*
 	 *  Busy CCBs management.
 	 */
 	u_short	busy_itlq;	/* Number of busy tagged CCBs	*/
 	u_short	busy_itl;	/* Number of busy untagged CCBs	*/
 
 	/*
 	 *  Circular tag allocation buffer.
 	 */
 	u_short	ia_tag;		/* Tag allocation index		*/
 	u_short	if_tag;		/* Tag release index		*/
 	u_char	*cb_tags;	/* Circular tags buffer		*/
 
 	/*
 	 *  Set when we want to clear all tasks.
 	 */
 	u_char to_clear;
 
 	/*
 	 *  Capabilities.
 	 */
 	u_char	user_flags;
 	u_char	current_flags;
 };
 
 /*
  *  Action from SCRIPTS on a task.
  *  Is part of the CCB, but is also used separately to plug
  *  error handling action to perform from SCRIPTS.
  */
 struct sym_actscr {
 	u32	start;		/* Jumped by SCRIPTS after selection	*/
 	u32	restart;	/* Jumped by SCRIPTS on relection	*/
 };
 
 /*
  *  Phase mismatch context.
  *
  *  It is part of the CCB and is used as parameters for the
  *  DATA pointer. We need two contexts to handle correctly the
  *  SAVED DATA POINTER.
  */
 struct sym_pmc {
 	struct	sym_tblmove sg;	/* Updated interrupted SG block	*/
 	u32	ret;		/* SCRIPT return address	*/
 };
 
 /*
  *  LUN control block lookup.
  *  We use a direct pointer for LUN #0, and a table of
  *  pointers which is only allocated for devices that support
  *  LUN(s) > 0.
  */
 #if SYM_CONF_MAX_LUN <= 1
 #define sym_lp(tp, lun) (!lun) ? (tp)->lun0p : 0
 #else
 #define sym_lp(tp, lun) \
 	(!lun) ? (tp)->lun0p : (tp)->lunmp ? (tp)->lunmp[(lun)] : 0
 #endif
 
 /*
  *  Status are used by the host and the script processor.
  *
  *  The last four bytes (status[4]) are copied to the
  *  scratchb register (declared as scr0..scr3) just after the
  *  select/reselect, and copied back just after disconnecting.
  *  Inside the script the XX_REG are used.
  */
 
 /*
  *  Last four bytes (script)
  */
 #define  QU_REG	scr0
 #define  HS_REG	scr1
 #define  HS_PRT	nc_scr1
 #define  SS_REG	scr2
 #define  SS_PRT	nc_scr2
 #define  HF_REG	scr3
 #define  HF_PRT	nc_scr3
 
 /*
  *  Last four bytes (host)
  */
 #define  actualquirks  phys.head.status[0]
 #define  host_status   phys.head.status[1]
 #define  ssss_status   phys.head.status[2]
 #define  host_flags    phys.head.status[3]
 
 /*
  *  Host flags
  */
 #define HF_IN_PM0	1u
 #define HF_IN_PM1	(1u<<1)
 #define HF_ACT_PM	(1u<<2)
 #define HF_DP_SAVED	(1u<<3)
 #define HF_SENSE	(1u<<4)
 #define HF_EXT_ERR	(1u<<5)
 #define HF_DATA_IN	(1u<<6)
 #ifdef SYM_CONF_IARB_SUPPORT
 #define HF_HINT_IARB	(1u<<7)
 #endif
 
 /*
  *  Global CCB HEADER.
  *
  *  Due to lack of indirect addressing on earlier NCR chips,
  *  this substructure is copied from the ccb to a global
  *  address after selection (or reselection) and copied back
  *  before disconnect.
  *  For SYMBIOS chips that support LOAD/STORE this copy is
  *  not needed and thus not performed.
  */
 struct sym_ccbh {
 	/*
 	 *  Start and restart SCRIPTS addresses (must be at 0).
 	 */
 /*0*/	struct sym_actscr go;
 
 	/*
 	 *  SCRIPTS jump address that deal with data pointers.
 	 *  'savep' points to the position in the script responsible
 	 *  for the actual transfer of data.
 	 *  It's written on reception of a SAVE_DATA_POINTER message.
 	 */
 	u32	savep;		/* Jump address to saved data pointer	*/
 	u32	lastp;		/* SCRIPTS address at end of data	*/
 	u32	goalp;		/* Not accessed for now from SCRIPTS	*/
 
 	/*
 	 *  Status fields.
 	 */
 	u8	status[4];
 };
 
 /*
  *  Data Structure Block
  *
  *  During execution of a ccb by the script processor, the
  *  DSA (data structure address) register points to this
  *  substructure of the ccb.
  */
 struct sym_dsb {
 	/*
 	 *  CCB header.
 	 *  Also assumed at offset 0 of the sym_ccb structure.
 	 */
 /*0*/	struct sym_ccbh head;
 
 	/*
 	 *  Phase mismatch contexts.
 	 *  We need two to handle correctly the SAVED DATA POINTER.
 	 *  MUST BOTH BE AT OFFSET < 256, due to using 8 bit arithmetic
 	 *  for address calculation from SCRIPTS.
 	 */
 	struct sym_pmc pm0;
 	struct sym_pmc pm1;
 
 	/*
 	 *  Table data for Script
 	 */
 	struct sym_tblsel  select;
 	struct sym_tblmove smsg;
 	struct sym_tblmove smsg_ext;
 	struct sym_tblmove cmd;
 	struct sym_tblmove sense;
 	struct sym_tblmove wresid;
 	struct sym_tblmove data [SYM_CONF_MAX_SG];
 };
 
 /*
  *  Our Command Control Block
  */
 struct sym_ccb {
 	/*
 	 *  This is the data structure which is pointed by the DSA
 	 *  register when it is executed by the script processor.
 	 *  It must be the first entry.
 	 */
 	struct sym_dsb phys;
 
 	/*
 	 *  Pointer to CAM ccb and related stuff.
 	 */
 	struct callout ch;	/* callout handle		*/
 	union ccb *cam_ccb;	/* CAM scsiio ccb		*/
 	u8	cdb_buf[16];	/* Copy of CDB			*/
 	u8	*sns_bbuf;	/* Bounce buffer for sense data	*/
 #define SYM_SNS_BBUF_LEN	sizeof(struct scsi_sense_data)
 	int	data_len;	/* Total data length		*/
 	int	segments;	/* Number of SG segments	*/
 
 	/*
 	 *  Miscellaneous status'.
 	 */
 	u_char	nego_status;	/* Negotiation status		*/
 	u_char	xerr_status;	/* Extended error flags		*/
 	u32	extra_bytes;	/* Extraneous bytes transferred	*/
 
 	/*
 	 *  Message areas.
 	 *  We prepare a message to be sent after selection.
 	 *  We may use a second one if the command is rescheduled
 	 *  due to CHECK_CONDITION or COMMAND TERMINATED.
 	 *  Contents are IDENTIFY and SIMPLE_TAG.
 	 *  While negotiating sync or wide transfer,
 	 *  a SDTR or WDTR message is appended.
 	 */
 	u_char	scsi_smsg [12];
 	u_char	scsi_smsg2[12];
 
 	/*
 	 *  Auto request sense related fields.
 	 */
 	u_char	sensecmd[6];	/* Request Sense command	*/
 	u_char	sv_scsi_status;	/* Saved SCSI status 		*/
 	u_char	sv_xerr_status;	/* Saved extended status	*/
 	int	sv_resid;	/* Saved residual		*/
 
 	/*
 	 *  Map for the DMA of user data.
 	 */
 	void		*arg;	/* Argument for some callback	*/
 	bus_dmamap_t	dmamap;	/* DMA map for user data	*/
 	u_char		dmamapped;
 #define SYM_DMA_NONE	0
 #define SYM_DMA_READ	1
 #define SYM_DMA_WRITE	2
 	/*
 	 *  Other fields.
 	 */
 	u32	ccb_ba;		/* BUS address of this CCB	*/
 	u_short	tag;		/* Tag for this transfer	*/
 				/*  NO_TAG means no tag		*/
 	u_char	target;
 	u_char	lun;
 	ccb_p	link_ccbh;	/* Host adapter CCB hash chain	*/
 	SYM_QUEHEAD
 		link_ccbq;	/* Link to free/busy CCB queue	*/
 	u32	startp;		/* Initial data pointer		*/
 	int	ext_sg;		/* Extreme data pointer, used	*/
 	int	ext_ofs;	/*  to calculate the residual.	*/
 	u_char	to_abort;	/* Want this IO to be aborted	*/
 };
 
 #define CCB_BA(cp,lbl)	(cp->ccb_ba + offsetof(struct sym_ccb, lbl))
 
 /*
  *  Host Control Block
  */
 struct sym_hcb {
 	struct mtx	mtx;
 
 	/*
 	 *  Global headers.
 	 *  Due to poorness of addressing capabilities, earlier
 	 *  chips (810, 815, 825) copy part of the data structures
 	 *  (CCB, TCB and LCB) in fixed areas.
 	 */
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 	struct sym_ccbh	ccb_head;
 	struct sym_tcbh	tcb_head;
 	struct sym_lcbh	lcb_head;
 #endif
 	/*
 	 *  Idle task and invalid task actions and
 	 *  their bus addresses.
 	 */
 	struct sym_actscr idletask, notask, bad_itl, bad_itlq;
 	vm_offset_t idletask_ba, notask_ba, bad_itl_ba, bad_itlq_ba;
 
 	/*
 	 *  Dummy lun table to protect us against target
 	 *  returning bad lun number on reselection.
 	 */
 	u32	*badluntbl;	/* Table physical address	*/
 	u32	badlun_sa;	/* SCRIPT handler BUS address	*/
 
 	/*
 	 *  Bus address of this host control block.
 	 */
 	u32	hcb_ba;
 
 	/*
 	 *  Bit 32-63 of the on-chip RAM bus address in LE format.
 	 *  The START_RAM64 script loads the MMRS and MMWS from this
 	 *  field.
 	 */
 	u32	scr_ram_seg;
 
 	/*
 	 *  Chip and controller indentification.
 	 */
 	device_t device;
 
 	/*
 	 *  Initial value of some IO register bits.
 	 *  These values are assumed to have been set by BIOS, and may
 	 *  be used to probe adapter implementation differences.
 	 */
 	u_char	sv_scntl0, sv_scntl3, sv_dmode, sv_dcntl, sv_ctest3, sv_ctest4,
 		sv_ctest5, sv_gpcntl, sv_stest2, sv_stest4, sv_scntl4,
 		sv_stest1;
 
 	/*
 	 *  Actual initial value of IO register bits used by the
 	 *  driver. They are loaded at initialisation according to
 	 *  features that are to be enabled/disabled.
 	 */
 	u_char	rv_scntl0, rv_scntl3, rv_dmode, rv_dcntl, rv_ctest3, rv_ctest4,
 		rv_ctest5, rv_stest2, rv_ccntl0, rv_ccntl1, rv_scntl4;
 
 	/*
 	 *  Target data.
 	 */
 #ifdef __amd64__
 	struct sym_tcb	*target;
 #else
 	struct sym_tcb	target[SYM_CONF_MAX_TARGET];
 #endif
 
 	/*
 	 *  Target control block bus address array used by the SCRIPT
 	 *  on reselection.
 	 */
 	u32		*targtbl;
 	u32		targtbl_ba;
 
 	/*
 	 *  CAM SIM information for this instance.
 	 */
 	struct		cam_sim  *sim;
 	struct		cam_path *path;
 
 	/*
 	 *  Allocated hardware resources.
 	 */
 	struct resource	*irq_res;
 	struct resource	*io_res;
 	struct resource	*mmio_res;
 	struct resource	*ram_res;
 	int		ram_id;
 	void *intr;
 
 	/*
 	 *  Bus stuff.
 	 *
 	 *  My understanding of PCI is that all agents must share the
 	 *  same addressing range and model.
 	 *  But some hardware architecture guys provide complex and
 	 *  brain-deaded stuff that makes shit.
 	 *  This driver only support PCI compliant implementations and
 	 *  deals with part of the BUS stuff complexity only to fit O/S
 	 *  requirements.
 	 */
 
 	/*
 	 *  DMA stuff.
 	 */
 	bus_dma_tag_t	bus_dmat;	/* DMA tag from parent BUS	*/
 	bus_dma_tag_t	data_dmat;	/* DMA tag for user data	*/
 	/*
 	 *  BUS addresses of the chip
 	 */
 	vm_offset_t	mmio_ba;	/* MMIO BUS address		*/
 	int		mmio_ws;	/* MMIO Window size		*/
 
 	vm_offset_t	ram_ba;		/* RAM BUS address		*/
 	int		ram_ws;		/* RAM window size		*/
 
 	/*
 	 *  SCRIPTS virtual and physical bus addresses.
 	 *  'script'  is loaded in the on-chip RAM if present.
 	 *  'scripth' stays in main memory for all chips except the
 	 *  53C895A, 53C896 and 53C1010 that provide 8K on-chip RAM.
 	 */
 	u_char		*scripta0;	/* Copies of script and scripth	*/
 	u_char		*scriptb0;	/* Copies of script and scripth	*/
 	vm_offset_t	scripta_ba;	/* Actual script and scripth	*/
 	vm_offset_t	scriptb_ba;	/*  bus addresses.		*/
 	vm_offset_t	scriptb0_ba;
 	u_short		scripta_sz;	/* Actual size of script A	*/
 	u_short		scriptb_sz;	/* Actual size of script B	*/
 
 	/*
 	 *  Bus addresses, setup and patch methods for
 	 *  the selected firmware.
 	 */
 	struct sym_fwa_ba fwa_bas;	/* Useful SCRIPTA bus addresses	*/
 	struct sym_fwb_ba fwb_bas;	/* Useful SCRIPTB bus addresses	*/
 	void		(*fw_setup)(hcb_p np, const struct sym_fw *fw);
 	void		(*fw_patch)(hcb_p np);
 	const char	*fw_name;
 
 	/*
 	 *  General controller parameters and configuration.
 	 */
 	u_short	device_id;	/* PCI device id		*/
 	u_char	revision_id;	/* PCI device revision id	*/
 	u_int	features;	/* Chip features map		*/
 	u_char	myaddr;		/* SCSI id of the adapter	*/
 	u_char	maxburst;	/* log base 2 of dwords burst	*/
 	u_char	maxwide;	/* Maximum transfer width	*/
 	u_char	minsync;	/* Min sync period factor (ST)	*/
 	u_char	maxsync;	/* Max sync period factor (ST)	*/
 	u_char	maxoffs;	/* Max scsi offset        (ST)	*/
 	u_char	minsync_dt;	/* Min sync period factor (DT)	*/
 	u_char	maxsync_dt;	/* Max sync period factor (DT)	*/
 	u_char	maxoffs_dt;	/* Max scsi offset        (DT)	*/
 	u_char	multiplier;	/* Clock multiplier (1,2,4)	*/
 	u_char	clock_divn;	/* Number of clock divisors	*/
 	u32	clock_khz;	/* SCSI clock frequency in KHz	*/
 	u32	pciclk_khz;	/* Estimated PCI clock  in KHz	*/
 	/*
 	 *  Start queue management.
 	 *  It is filled up by the host processor and accessed by the
 	 *  SCRIPTS processor in order to start SCSI commands.
 	 */
 	volatile		/* Prevent code optimizations	*/
 	u32	*squeue;	/* Start queue virtual address	*/
 	u32	squeue_ba;	/* Start queue BUS address	*/
 	u_short	squeueput;	/* Next free slot of the queue	*/
 	u_short	actccbs;	/* Number of allocated CCBs	*/
 
 	/*
 	 *  Command completion queue.
 	 *  It is the same size as the start queue to avoid overflow.
 	 */
 	u_short	dqueueget;	/* Next position to scan	*/
 	volatile		/* Prevent code optimizations	*/
 	u32	*dqueue;	/* Completion (done) queue	*/
 	u32	dqueue_ba;	/* Done queue BUS address	*/
 
 	/*
 	 *  Miscellaneous buffers accessed by the scripts-processor.
 	 *  They shall be DWORD aligned, because they may be read or
 	 *  written with a script command.
 	 */
 	u_char		msgout[8];	/* Buffer for MESSAGE OUT 	*/
 	u_char		msgin [8];	/* Buffer for MESSAGE IN	*/
 	u32		lastmsg;	/* Last SCSI message sent	*/
 	u_char		scratch;	/* Scratch for SCSI receive	*/
 
 	/*
 	 *  Miscellaneous configuration and status parameters.
 	 */
 	u_char		usrflags;	/* Miscellaneous user flags	*/
 	u_char		scsi_mode;	/* Current SCSI BUS mode	*/
 	u_char		verbose;	/* Verbosity for this controller*/
 	u32		cache;		/* Used for cache test at init.	*/
 
 	/*
 	 *  CCB lists and queue.
 	 */
 	ccb_p ccbh[CCB_HASH_SIZE];	/* CCB hashed by DSA value	*/
 	SYM_QUEHEAD	free_ccbq;	/* Queue of available CCBs	*/
 	SYM_QUEHEAD	busy_ccbq;	/* Queue of busy CCBs		*/
 
 	/*
 	 *  During error handling and/or recovery,
 	 *  active CCBs that are to be completed with
 	 *  error or requeued are moved from the busy_ccbq
 	 *  to the comp_ccbq prior to completion.
 	 */
 	SYM_QUEHEAD	comp_ccbq;
 
 	/*
 	 *  CAM CCB pending queue.
 	 */
 	SYM_QUEHEAD	cam_ccbq;
 
 	/*
 	 *  IMMEDIATE ARBITRATION (IARB) control.
 	 *
 	 *  We keep track in 'last_cp' of the last CCB that has been
 	 *  queued to the SCRIPTS processor and clear 'last_cp' when
 	 *  this CCB completes. If last_cp is not zero at the moment
 	 *  we queue a new CCB, we set a flag in 'last_cp' that is
 	 *  used by the SCRIPTS as a hint for setting IARB.
 	 *  We donnot set more than 'iarb_max' consecutive hints for
 	 *  IARB in order to leave devices a chance to reselect.
 	 *  By the way, any non zero value of 'iarb_max' is unfair. :)
 	 */
 #ifdef SYM_CONF_IARB_SUPPORT
 	u_short		iarb_max;	/* Max. # consecutive IARB hints*/
 	u_short		iarb_count;	/* Actual # of these hints	*/
 	ccb_p		last_cp;
 #endif
 
 	/*
 	 *  Command abort handling.
 	 *  We need to synchronize tightly with the SCRIPTS
 	 *  processor in order to handle things correctly.
 	 */
 	u_char		abrt_msg[4];	/* Message to send buffer	*/
 	struct sym_tblmove abrt_tbl;	/* Table for the MOV of it 	*/
 	struct sym_tblsel  abrt_sel;	/* Sync params for selection	*/
 	u_char		istat_sem;	/* Tells the chip to stop (SEM)	*/
 };
 
 #define HCB_BA(np, lbl)	    (np->hcb_ba      + offsetof(struct sym_hcb, lbl))
 
 /*
  *  Return the name of the controller.
  */
 static __inline const char *sym_name(hcb_p np)
 {
 	return device_get_nameunit(np->device);
 }
 
 /*--------------------------------------------------------------------------*/
 /*------------------------------ FIRMWARES ---------------------------------*/
 /*--------------------------------------------------------------------------*/
 
 /*
  *  This stuff will be moved to a separate source file when
  *  the driver will be broken into several source modules.
  */
 
 /*
  *  Macros used for all firmwares.
  */
 #define	SYM_GEN_A(s, label)	((short) offsetof(s, label)),
 #define	SYM_GEN_B(s, label)	((short) offsetof(s, label)),
 #define	PADDR_A(label)		SYM_GEN_PADDR_A(struct SYM_FWA_SCR, label)
 #define	PADDR_B(label)		SYM_GEN_PADDR_B(struct SYM_FWB_SCR, label)
 
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 /*
  *  Allocate firmware #1 script area.
  */
 #define	SYM_FWA_SCR		sym_fw1a_scr
 #define	SYM_FWB_SCR		sym_fw1b_scr
 #include <dev/sym/sym_fw1.h>
 static const struct sym_fwa_ofs sym_fw1a_ofs = {
 	SYM_GEN_FW_A(struct SYM_FWA_SCR)
 };
 static const struct sym_fwb_ofs sym_fw1b_ofs = {
 	SYM_GEN_FW_B(struct SYM_FWB_SCR)
 };
 #undef	SYM_FWA_SCR
 #undef	SYM_FWB_SCR
 #endif	/* SYM_CONF_GENERIC_SUPPORT */
 
 /*
  *  Allocate firmware #2 script area.
  */
 #define	SYM_FWA_SCR		sym_fw2a_scr
 #define	SYM_FWB_SCR		sym_fw2b_scr
 #include <dev/sym/sym_fw2.h>
 static const struct sym_fwa_ofs sym_fw2a_ofs = {
 	SYM_GEN_FW_A(struct SYM_FWA_SCR)
 };
 static const struct sym_fwb_ofs sym_fw2b_ofs = {
 	SYM_GEN_FW_B(struct SYM_FWB_SCR)
 	SYM_GEN_B(struct SYM_FWB_SCR, start64)
 	SYM_GEN_B(struct SYM_FWB_SCR, pm_handle)
 };
 #undef	SYM_FWA_SCR
 #undef	SYM_FWB_SCR
 
 #undef	SYM_GEN_A
 #undef	SYM_GEN_B
 #undef	PADDR_A
 #undef	PADDR_B
 
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 /*
  *  Patch routine for firmware #1.
  */
 static void
 sym_fw1_patch(hcb_p np)
 {
 	struct sym_fw1a_scr *scripta0;
 	struct sym_fw1b_scr *scriptb0;
 
 	scripta0 = (struct sym_fw1a_scr *) np->scripta0;
 	scriptb0 = (struct sym_fw1b_scr *) np->scriptb0;
 
 	/*
 	 *  Remove LED support if not needed.
 	 */
 	if (!(np->features & FE_LED0)) {
 		scripta0->idle[0]	= cpu_to_scr(SCR_NO_OP);
 		scripta0->reselected[0]	= cpu_to_scr(SCR_NO_OP);
 		scripta0->start[0]	= cpu_to_scr(SCR_NO_OP);
 	}
 
 #ifdef SYM_CONF_IARB_SUPPORT
 	/*
 	 *    If user does not want to use IMMEDIATE ARBITRATION
 	 *    when we are reselected while attempting to arbitrate,
 	 *    patch the SCRIPTS accordingly with a SCRIPT NO_OP.
 	 */
 	if (!SYM_CONF_SET_IARB_ON_ARB_LOST)
 		scripta0->ungetjob[0] = cpu_to_scr(SCR_NO_OP);
 #endif
 	/*
 	 *  Patch some data in SCRIPTS.
 	 *  - start and done queue initial bus address.
 	 *  - target bus address table bus address.
 	 */
 	scriptb0->startpos[0]	= cpu_to_scr(np->squeue_ba);
 	scriptb0->done_pos[0]	= cpu_to_scr(np->dqueue_ba);
 	scriptb0->targtbl[0]	= cpu_to_scr(np->targtbl_ba);
 }
 #endif	/* SYM_CONF_GENERIC_SUPPORT */
 
 /*
  *  Patch routine for firmware #2.
  */
 static void
 sym_fw2_patch(hcb_p np)
 {
 	struct sym_fw2a_scr *scripta0;
 	struct sym_fw2b_scr *scriptb0;
 
 	scripta0 = (struct sym_fw2a_scr *) np->scripta0;
 	scriptb0 = (struct sym_fw2b_scr *) np->scriptb0;
 
 	/*
 	 *  Remove LED support if not needed.
 	 */
 	if (!(np->features & FE_LED0)) {
 		scripta0->idle[0]	= cpu_to_scr(SCR_NO_OP);
 		scripta0->reselected[0]	= cpu_to_scr(SCR_NO_OP);
 		scripta0->start[0]	= cpu_to_scr(SCR_NO_OP);
 	}
 
 #ifdef SYM_CONF_IARB_SUPPORT
 	/*
 	 *    If user does not want to use IMMEDIATE ARBITRATION
 	 *    when we are reselected while attempting to arbitrate,
 	 *    patch the SCRIPTS accordingly with a SCRIPT NO_OP.
 	 */
 	if (!SYM_CONF_SET_IARB_ON_ARB_LOST)
 		scripta0->ungetjob[0] = cpu_to_scr(SCR_NO_OP);
 #endif
 	/*
 	 *  Patch some variable in SCRIPTS.
 	 *  - start and done queue initial bus address.
 	 *  - target bus address table bus address.
 	 */
 	scriptb0->startpos[0]	= cpu_to_scr(np->squeue_ba);
 	scriptb0->done_pos[0]	= cpu_to_scr(np->dqueue_ba);
 	scriptb0->targtbl[0]	= cpu_to_scr(np->targtbl_ba);
 
 	/*
 	 *  Remove the load of SCNTL4 on reselection if not a C10.
 	 */
 	if (!(np->features & FE_C10)) {
 		scripta0->resel_scntl4[0] = cpu_to_scr(SCR_NO_OP);
 		scripta0->resel_scntl4[1] = cpu_to_scr(0);
 	}
 
 	/*
 	 *  Remove a couple of work-arounds specific to C1010 if
 	 *  they are not desirable. See `sym_fw2.h' for more details.
 	 */
 	if (!(np->device_id == PCI_ID_LSI53C1010_2 &&
 	      np->revision_id < 0x1 &&
 	      np->pciclk_khz < 60000)) {
 		scripta0->datao_phase[0] = cpu_to_scr(SCR_NO_OP);
 		scripta0->datao_phase[1] = cpu_to_scr(0);
 	}
 	if (!(np->device_id == PCI_ID_LSI53C1010 &&
 	      /* np->revision_id < 0xff */ 1)) {
 		scripta0->sel_done[0] = cpu_to_scr(SCR_NO_OP);
 		scripta0->sel_done[1] = cpu_to_scr(0);
 	}
 
 	/*
 	 *  Patch some other variables in SCRIPTS.
 	 *  These ones are loaded by the SCRIPTS processor.
 	 */
 	scriptb0->pm0_data_addr[0] =
 		cpu_to_scr(np->scripta_ba +
 			   offsetof(struct sym_fw2a_scr, pm0_data));
 	scriptb0->pm1_data_addr[0] =
 		cpu_to_scr(np->scripta_ba +
 			   offsetof(struct sym_fw2a_scr, pm1_data));
 }
 
 /*
  *  Fill the data area in scripts.
  *  To be done for all firmwares.
  */
 static void
 sym_fw_fill_data (u32 *in, u32 *out)
 {
 	int	i;
 
 	for (i = 0; i < SYM_CONF_MAX_SG; i++) {
 		*in++  = SCR_CHMOV_TBL ^ SCR_DATA_IN;
 		*in++  = offsetof (struct sym_dsb, data[i]);
 		*out++ = SCR_CHMOV_TBL ^ SCR_DATA_OUT;
 		*out++ = offsetof (struct sym_dsb, data[i]);
 	}
 }
 
 /*
  *  Setup useful script bus addresses.
  *  To be done for all firmwares.
  */
 static void
 sym_fw_setup_bus_addresses(hcb_p np, const struct sym_fw *fw)
 {
 	u32 *pa;
 	const u_short *po;
 	int i;
 
 	/*
 	 *  Build the bus address table for script A
 	 *  from the script A offset table.
 	 */
 	po = (const u_short *) fw->a_ofs;
 	pa = (u32 *) &np->fwa_bas;
 	for (i = 0 ; i < sizeof(np->fwa_bas)/sizeof(u32) ; i++)
 		pa[i] = np->scripta_ba + po[i];
 
 	/*
 	 *  Same for script B.
 	 */
 	po = (const u_short *) fw->b_ofs;
 	pa = (u32 *) &np->fwb_bas;
 	for (i = 0 ; i < sizeof(np->fwb_bas)/sizeof(u32) ; i++)
 		pa[i] = np->scriptb_ba + po[i];
 }
 
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 /*
  *  Setup routine for firmware #1.
  */
 static void
 sym_fw1_setup(hcb_p np, const struct sym_fw *fw)
 {
 	struct sym_fw1a_scr *scripta0;
 
 	scripta0 = (struct sym_fw1a_scr *) np->scripta0;
 
 	/*
 	 *  Fill variable parts in scripts.
 	 */
 	sym_fw_fill_data(scripta0->data_in, scripta0->data_out);
 
 	/*
 	 *  Setup bus addresses used from the C code..
 	 */
 	sym_fw_setup_bus_addresses(np, fw);
 }
 #endif	/* SYM_CONF_GENERIC_SUPPORT */
 
 /*
  *  Setup routine for firmware #2.
  */
 static void
 sym_fw2_setup(hcb_p np, const struct sym_fw *fw)
 {
 	struct sym_fw2a_scr *scripta0;
 
 	scripta0 = (struct sym_fw2a_scr *) np->scripta0;
 
 	/*
 	 *  Fill variable parts in scripts.
 	 */
 	sym_fw_fill_data(scripta0->data_in, scripta0->data_out);
 
 	/*
 	 *  Setup bus addresses used from the C code..
 	 */
 	sym_fw_setup_bus_addresses(np, fw);
 }
 
 /*
  *  Allocate firmware descriptors.
  */
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 static const struct sym_fw sym_fw1 = SYM_FW_ENTRY(sym_fw1, "NCR-generic");
 #endif	/* SYM_CONF_GENERIC_SUPPORT */
 static const struct sym_fw sym_fw2 = SYM_FW_ENTRY(sym_fw2, "LOAD/STORE-based");
 
 /*
  *  Find the most appropriate firmware for a chip.
  */
 static const struct sym_fw *
 sym_find_firmware(const struct sym_pci_chip *chip)
 {
 	if (chip->features & FE_LDSTR)
 		return &sym_fw2;
 #ifdef	SYM_CONF_GENERIC_SUPPORT
 	else if (!(chip->features & (FE_PFEN|FE_NOPM|FE_DAC)))
 		return &sym_fw1;
 #endif
 	else
 		return NULL;
 }
 
 /*
  *  Bind a script to physical addresses.
  */
 static void sym_fw_bind_script (hcb_p np, u32 *start, int len)
 {
 	u32 opcode, new, old, tmp1, tmp2;
 	u32 *end, *cur;
 	int relocs;
 
 	cur = start;
 	end = start + len/4;
 
 	while (cur < end) {
 
 		opcode = *cur;
 
 		/*
 		 *  If we forget to change the length
 		 *  in scripts, a field will be
 		 *  padded with 0. This is an illegal
 		 *  command.
 		 */
 		if (opcode == 0) {
 			printf ("%s: ERROR0 IN SCRIPT at %d.\n",
 				sym_name(np), (int) (cur-start));
 			MDELAY (10000);
 			++cur;
 			continue;
 		}
 
 		/*
 		 *  We use the bogus value 0xf00ff00f ;-)
 		 *  to reserve data area in SCRIPTS.
 		 */
 		if (opcode == SCR_DATA_ZERO) {
 			*cur++ = 0;
 			continue;
 		}
 
 		if (DEBUG_FLAGS & DEBUG_SCRIPT)
 			printf ("%d:  <%x>\n", (int) (cur-start),
 				(unsigned)opcode);
 
 		/*
 		 *  We don't have to decode ALL commands
 		 */
 		switch (opcode >> 28) {
 		case 0xf:
 			/*
 			 *  LOAD / STORE DSA relative, don't relocate.
 			 */
 			relocs = 0;
 			break;
 		case 0xe:
 			/*
 			 *  LOAD / STORE absolute.
 			 */
 			relocs = 1;
 			break;
 		case 0xc:
 			/*
 			 *  COPY has TWO arguments.
 			 */
 			relocs = 2;
 			tmp1 = cur[1];
 			tmp2 = cur[2];
 			if ((tmp1 ^ tmp2) & 3) {
 				printf ("%s: ERROR1 IN SCRIPT at %d.\n",
 					sym_name(np), (int) (cur-start));
 				MDELAY (10000);
 			}
 			/*
 			 *  If PREFETCH feature not enabled, remove
 			 *  the NO FLUSH bit if present.
 			 */
 			if ((opcode & SCR_NO_FLUSH) &&
 			    !(np->features & FE_PFEN)) {
 				opcode = (opcode & ~SCR_NO_FLUSH);
 			}
 			break;
 		case 0x0:
 			/*
 			 *  MOVE/CHMOV (absolute address)
 			 */
 			if (!(np->features & FE_WIDE))
 				opcode = (opcode | OPC_MOVE);
 			relocs = 1;
 			break;
 		case 0x1:
 			/*
 			 *  MOVE/CHMOV (table indirect)
 			 */
 			if (!(np->features & FE_WIDE))
 				opcode = (opcode | OPC_MOVE);
 			relocs = 0;
 			break;
 		case 0x8:
 			/*
 			 *  JUMP / CALL
 			 *  dont't relocate if relative :-)
 			 */
 			if (opcode & 0x00800000)
 				relocs = 0;
 			else if ((opcode & 0xf8400000) == 0x80400000)/*JUMP64*/
 				relocs = 2;
 			else
 				relocs = 1;
 			break;
 		case 0x4:
 		case 0x5:
 		case 0x6:
 		case 0x7:
 			relocs = 1;
 			break;
 		default:
 			relocs = 0;
 			break;
 		}
 
 		/*
 		 *  Scriptify:) the opcode.
 		 */
 		*cur++ = cpu_to_scr(opcode);
 
 		/*
 		 *  If no relocation, assume 1 argument
 		 *  and just scriptize:) it.
 		 */
 		if (!relocs) {
 			*cur = cpu_to_scr(*cur);
 			++cur;
 			continue;
 		}
 
 		/*
 		 *  Otherwise performs all needed relocations.
 		 */
 		while (relocs--) {
 			old = *cur;
 
 			switch (old & RELOC_MASK) {
 			case RELOC_REGISTER:
 				new = (old & ~RELOC_MASK) + np->mmio_ba;
 				break;
 			case RELOC_LABEL_A:
 				new = (old & ~RELOC_MASK) + np->scripta_ba;
 				break;
 			case RELOC_LABEL_B:
 				new = (old & ~RELOC_MASK) + np->scriptb_ba;
 				break;
 			case RELOC_SOFTC:
 				new = (old & ~RELOC_MASK) + np->hcb_ba;
 				break;
 			case 0:
 				/*
 				 *  Don't relocate a 0 address.
 				 *  They are mostly used for patched or
 				 *  script self-modified areas.
 				 */
 				if (old == 0) {
 					new = old;
 					break;
 				}
 				/* fall through */
 			default:
 				new = 0;
 				panic("sym_fw_bind_script: "
 				      "weird relocation %x\n", old);
 				break;
 			}
 
 			*cur++ = cpu_to_scr(new);
 		}
 	}
 }
 
 /*---------------------------------------------------------------------------*/
 /*--------------------------- END OF FIRMWARES  -----------------------------*/
 /*---------------------------------------------------------------------------*/
 
 /*
  *  Function prototypes.
  */
 static void sym_save_initial_setting (hcb_p np);
 static int  sym_prepare_setting (hcb_p np, struct sym_nvram *nvram);
 static int  sym_prepare_nego (hcb_p np, ccb_p cp, int nego, u_char *msgptr);
 static void sym_put_start_queue (hcb_p np, ccb_p cp);
 static void sym_chip_reset (hcb_p np);
 static void sym_soft_reset (hcb_p np);
 static void sym_start_reset (hcb_p np);
 static int  sym_reset_scsi_bus (hcb_p np, int enab_int);
 static int  sym_wakeup_done (hcb_p np);
 static void sym_flush_busy_queue (hcb_p np, int cam_status);
 static void sym_flush_comp_queue (hcb_p np, int cam_status);
 static void sym_init (hcb_p np, int reason);
 static int  sym_getsync(hcb_p np, u_char dt, u_char sfac, u_char *divp,
 		        u_char *fakp);
 static void sym_setsync (hcb_p np, ccb_p cp, u_char ofs, u_char per,
 			 u_char div, u_char fak);
 static void sym_setwide (hcb_p np, ccb_p cp, u_char wide);
 static void sym_setpprot(hcb_p np, ccb_p cp, u_char dt, u_char ofs,
 			 u_char per, u_char wide, u_char div, u_char fak);
 static void sym_settrans(hcb_p np, ccb_p cp, u_char dt, u_char ofs,
 			 u_char per, u_char wide, u_char div, u_char fak);
 static void sym_log_hard_error (hcb_p np, u_short sist, u_char dstat);
 static void sym_intr (void *arg);
 static void sym_poll (struct cam_sim *sim);
 static void sym_recover_scsi_int (hcb_p np, u_char hsts);
 static void sym_int_sto (hcb_p np);
 static void sym_int_udc (hcb_p np);
 static void sym_int_sbmc (hcb_p np);
 static void sym_int_par (hcb_p np, u_short sist);
 static void sym_int_ma (hcb_p np);
 static int  sym_dequeue_from_squeue(hcb_p np, int i, int target, int lun,
 				    int task);
 static void sym_sir_bad_scsi_status (hcb_p np, ccb_p cp);
 static int  sym_clear_tasks (hcb_p np, int status, int targ, int lun, int task);
 static void sym_sir_task_recovery (hcb_p np, int num);
 static int  sym_evaluate_dp (hcb_p np, ccb_p cp, u32 scr, int *ofs);
 static void sym_modify_dp(hcb_p np, ccb_p cp, int ofs);
 static int  sym_compute_residual (hcb_p np, ccb_p cp);
 static int  sym_show_msg (u_char * msg);
 static void sym_print_msg (ccb_p cp, char *label, u_char *msg);
 static void sym_sync_nego (hcb_p np, tcb_p tp, ccb_p cp);
 static void sym_ppr_nego (hcb_p np, tcb_p tp, ccb_p cp);
 static void sym_wide_nego (hcb_p np, tcb_p tp, ccb_p cp);
 static void sym_nego_default (hcb_p np, tcb_p tp, ccb_p cp);
 static void sym_nego_rejected (hcb_p np, tcb_p tp, ccb_p cp);
 static void sym_int_sir (hcb_p np);
 static void sym_free_ccb (hcb_p np, ccb_p cp);
 static ccb_p sym_get_ccb (hcb_p np, u_char tn, u_char ln, u_char tag_order);
 static ccb_p sym_alloc_ccb (hcb_p np);
 static ccb_p sym_ccb_from_dsa (hcb_p np, u32 dsa);
 static lcb_p sym_alloc_lcb (hcb_p np, u_char tn, u_char ln);
 static void sym_alloc_lcb_tags (hcb_p np, u_char tn, u_char ln);
 static int  sym_snooptest (hcb_p np);
 static void sym_selectclock(hcb_p np, u_char scntl3);
 static void sym_getclock (hcb_p np, int mult);
 static int  sym_getpciclock (hcb_p np);
 static void sym_complete_ok (hcb_p np, ccb_p cp);
 static void sym_complete_error (hcb_p np, ccb_p cp);
 static void sym_callout (void *arg);
 static int  sym_abort_scsiio (hcb_p np, union ccb *ccb, int timed_out);
 static void sym_reset_dev (hcb_p np, union ccb *ccb);
 static void sym_action (struct cam_sim *sim, union ccb *ccb);
 static int  sym_setup_cdb (hcb_p np, struct ccb_scsiio *csio, ccb_p cp);
 static void sym_setup_data_and_start (hcb_p np, struct ccb_scsiio *csio,
 				      ccb_p cp);
 static int sym_fast_scatter_sg_physical(hcb_p np, ccb_p cp,
 					bus_dma_segment_t *psegs, int nsegs);
 static int sym_scatter_sg_physical (hcb_p np, ccb_p cp,
 				    bus_dma_segment_t *psegs, int nsegs);
 static void sym_action2 (struct cam_sim *sim, union ccb *ccb);
 static void sym_update_trans(hcb_p np, struct sym_trans *tip,
 			      struct ccb_trans_settings *cts);
 static void sym_update_dflags(hcb_p np, u_char *flags,
 			      struct ccb_trans_settings *cts);
 
 static const struct sym_pci_chip *sym_find_pci_chip (device_t dev);
 static int  sym_pci_probe (device_t dev);
 static int  sym_pci_attach (device_t dev);
 
 static void sym_pci_free (hcb_p np);
 static int  sym_cam_attach (hcb_p np);
 static void sym_cam_free (hcb_p np);
 
 static void sym_nvram_setup_host (hcb_p np, struct sym_nvram *nvram);
 static void sym_nvram_setup_target (hcb_p np, int targ, struct sym_nvram *nvp);
 static int sym_read_nvram (hcb_p np, struct sym_nvram *nvp);
 
 /*
  *  Print something which allows to retrieve the controller type,
  *  unit, target, lun concerned by a kernel message.
  */
 static void PRINT_TARGET (hcb_p np, int target)
 {
 	printf ("%s:%d:", sym_name(np), target);
 }
 
 static void PRINT_LUN(hcb_p np, int target, int lun)
 {
 	printf ("%s:%d:%d:", sym_name(np), target, lun);
 }
 
 static void PRINT_ADDR (ccb_p cp)
 {
 	if (cp && cp->cam_ccb)
 		xpt_print_path(cp->cam_ccb->ccb_h.path);
 }
 
 /*
  *  Take into account this ccb in the freeze count.
  */
 static void sym_freeze_cam_ccb(union ccb *ccb)
 {
 	if (!(ccb->ccb_h.flags & CAM_DEV_QFRZDIS)) {
 		if (!(ccb->ccb_h.status & CAM_DEV_QFRZN)) {
 			ccb->ccb_h.status |= CAM_DEV_QFRZN;
 			xpt_freeze_devq(ccb->ccb_h.path, 1);
 		}
 	}
 }
 
 /*
  *  Set the status field of a CAM CCB.
  */
 static __inline void sym_set_cam_status(union ccb *ccb, cam_status status)
 {
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	ccb->ccb_h.status |= status;
 }
 
 /*
  *  Get the status field of a CAM CCB.
  */
 static __inline int sym_get_cam_status(union ccb *ccb)
 {
 	return ccb->ccb_h.status & CAM_STATUS_MASK;
 }
 
 /*
  *  Enqueue a CAM CCB.
  */
 static void sym_enqueue_cam_ccb(ccb_p cp)
 {
 	hcb_p np;
 	union ccb *ccb;
 
 	ccb = cp->cam_ccb;
 	np = (hcb_p) cp->arg;
 
 	assert(!(ccb->ccb_h.status & CAM_SIM_QUEUED));
 	ccb->ccb_h.status = CAM_REQ_INPROG;
 
 	callout_reset_sbt(&cp->ch, SBT_1MS * ccb->ccb_h.timeout, 0, sym_callout,
 	    (caddr_t)ccb, 0);
 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
 	ccb->ccb_h.sym_hcb_ptr = np;
 
 	sym_insque_tail(sym_qptr(&ccb->ccb_h.sim_links), &np->cam_ccbq);
 }
 
 /*
  *  Complete a pending CAM CCB.
  */
 
 static void sym_xpt_done(hcb_p np, union ccb *ccb, ccb_p cp)
 {
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	if (ccb->ccb_h.status & CAM_SIM_QUEUED) {
 		callout_stop(&cp->ch);
 		sym_remque(sym_qptr(&ccb->ccb_h.sim_links));
 		ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 		ccb->ccb_h.sym_hcb_ptr = NULL;
 	}
 	xpt_done(ccb);
 }
 
 static void sym_xpt_done2(hcb_p np, union ccb *ccb, int cam_status)
 {
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	sym_set_cam_status(ccb, cam_status);
 	xpt_done(ccb);
 }
 
 /*
  *  SYMBIOS chip clock divisor table.
  *
  *  Divisors are multiplied by 10,000,000 in order to make
  *  calculations more simple.
  */
 #define _5M 5000000
 static const u32 div_10M[] =
 	{2*_5M, 3*_5M, 4*_5M, 6*_5M, 8*_5M, 12*_5M, 16*_5M};
 
 /*
  *  SYMBIOS chips allow burst lengths of 2, 4, 8, 16, 32, 64,
  *  128 transfers. All chips support at least 16 transfers
  *  bursts. The 825A, 875 and 895 chips support bursts of up
  *  to 128 transfers and the 895A and 896 support bursts of up
  *  to 64 transfers. All other chips support up to 16
  *  transfers bursts.
  *
  *  For PCI 32 bit data transfers each transfer is a DWORD.
  *  It is a QUADWORD (8 bytes) for PCI 64 bit data transfers.
  *
  *  We use log base 2 (burst length) as internal code, with
  *  value 0 meaning "burst disabled".
  */
 
 /*
  *  Burst length from burst code.
  */
 #define burst_length(bc) (!(bc))? 0 : 1 << (bc)
 
 /*
  *  Burst code from io register bits.
  */
 #define burst_code(dmode, ctest4, ctest5) \
 	(ctest4) & 0x80? 0 : (((dmode) & 0xc0) >> 6) + ((ctest5) & 0x04) + 1
 
 /*
  *  Set initial io register bits from burst code.
  */
 static __inline void sym_init_burst(hcb_p np, u_char bc)
 {
 	np->rv_ctest4	&= ~0x80;
 	np->rv_dmode	&= ~(0x3 << 6);
 	np->rv_ctest5	&= ~0x4;
 
 	if (!bc) {
 		np->rv_ctest4	|= 0x80;
 	}
 	else {
 		--bc;
 		np->rv_dmode	|= ((bc & 0x3) << 6);
 		np->rv_ctest5	|= (bc & 0x4);
 	}
 }
 
 /*
  * Print out the list of targets that have some flag disabled by user.
  */
 static void sym_print_targets_flag(hcb_p np, int mask, char *msg)
 {
 	int cnt;
 	int i;
 
 	for (cnt = 0, i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) {
 		if (i == np->myaddr)
 			continue;
 		if (np->target[i].usrflags & mask) {
 			if (!cnt++)
 				printf("%s: %s disabled for targets",
 					sym_name(np), msg);
 			printf(" %d", i);
 		}
 	}
 	if (cnt)
 		printf(".\n");
 }
 
 /*
  *  Save initial settings of some IO registers.
  *  Assumed to have been set by BIOS.
  *  We cannot reset the chip prior to reading the
  *  IO registers, since informations will be lost.
  *  Since the SCRIPTS processor may be running, this
  *  is not safe on paper, but it seems to work quite
  *  well. :)
  */
 static void sym_save_initial_setting (hcb_p np)
 {
 	np->sv_scntl0	= INB(nc_scntl0) & 0x0a;
 	np->sv_scntl3	= INB(nc_scntl3) & 0x07;
 	np->sv_dmode	= INB(nc_dmode)  & 0xce;
 	np->sv_dcntl	= INB(nc_dcntl)  & 0xa8;
 	np->sv_ctest3	= INB(nc_ctest3) & 0x01;
 	np->sv_ctest4	= INB(nc_ctest4) & 0x80;
 	np->sv_gpcntl	= INB(nc_gpcntl);
 	np->sv_stest1	= INB(nc_stest1);
 	np->sv_stest2	= INB(nc_stest2) & 0x20;
 	np->sv_stest4	= INB(nc_stest4);
 	if (np->features & FE_C10) {	/* Always large DMA fifo + ultra3 */
 		np->sv_scntl4	= INB(nc_scntl4);
 		np->sv_ctest5	= INB(nc_ctest5) & 0x04;
 	}
 	else
 		np->sv_ctest5	= INB(nc_ctest5) & 0x24;
 }
 
 /*
  *  Prepare io register values used by sym_init() according
  *  to selected and supported features.
  */
 static int sym_prepare_setting(hcb_p np, struct sym_nvram *nvram)
 {
 	u_char	burst_max;
 	u32	period;
 	int i;
 
 	/*
 	 *  Wide ?
 	 */
 	np->maxwide	= (np->features & FE_WIDE)? 1 : 0;
 
 	/*
 	 *  Get the frequency of the chip's clock.
 	 */
 	if	(np->features & FE_QUAD)
 		np->multiplier	= 4;
 	else if	(np->features & FE_DBLR)
 		np->multiplier	= 2;
 	else
 		np->multiplier	= 1;
 
 	np->clock_khz	= (np->features & FE_CLK80)? 80000 : 40000;
 	np->clock_khz	*= np->multiplier;
 
 	if (np->clock_khz != 40000)
 		sym_getclock(np, np->multiplier);
 
 	/*
 	 * Divisor to be used for async (timer pre-scaler).
 	 */
 	i = np->clock_divn - 1;
 	while (--i >= 0) {
 		if (10ul * SYM_CONF_MIN_ASYNC * np->clock_khz > div_10M[i]) {
 			++i;
 			break;
 		}
 	}
 	np->rv_scntl3 = i+1;
 
 	/*
 	 * The C1010 uses hardwired divisors for async.
 	 * So, we just throw away, the async. divisor.:-)
 	 */
 	if (np->features & FE_C10)
 		np->rv_scntl3 = 0;
 
 	/*
 	 * Minimum synchronous period factor supported by the chip.
 	 * Btw, 'period' is in tenths of nanoseconds.
 	 */
 	period = howmany(4 * div_10M[0], np->clock_khz);
 	if	(period <= 250)		np->minsync = 10;
 	else if	(period <= 303)		np->minsync = 11;
 	else if	(period <= 500)		np->minsync = 12;
 	else				np->minsync = howmany(period, 40);
 
 	/*
 	 * Check against chip SCSI standard support (SCSI-2,ULTRA,ULTRA2).
 	 */
 	if	(np->minsync < 25 &&
 		 !(np->features & (FE_ULTRA|FE_ULTRA2|FE_ULTRA3)))
 		np->minsync = 25;
 	else if	(np->minsync < 12 &&
 		 !(np->features & (FE_ULTRA2|FE_ULTRA3)))
 		np->minsync = 12;
 
 	/*
 	 * Maximum synchronous period factor supported by the chip.
 	 */
 	period = (11 * div_10M[np->clock_divn - 1]) / (4 * np->clock_khz);
 	np->maxsync = period > 2540 ? 254 : period / 10;
 
 	/*
 	 * If chip is a C1010, guess the sync limits in DT mode.
 	 */
 	if ((np->features & (FE_C10|FE_ULTRA3)) == (FE_C10|FE_ULTRA3)) {
 		if (np->clock_khz == 160000) {
 			np->minsync_dt = 9;
 			np->maxsync_dt = 50;
 			np->maxoffs_dt = 62;
 		}
 	}
 
 	/*
 	 *  64 bit addressing  (895A/896/1010) ?
 	 */
 	if (np->features & FE_DAC)
 #ifdef __LP64__
 		np->rv_ccntl1	|= (XTIMOD | EXTIBMV);
 #else
 		np->rv_ccntl1	|= (DDAC);
 #endif
 
 	/*
 	 *  Phase mismatch handled by SCRIPTS (895A/896/1010) ?
   	 */
 	if (np->features & FE_NOPM)
 		np->rv_ccntl0	|= (ENPMJ);
 
  	/*
 	 *  C1010 Errata.
 	 *  In dual channel mode, contention occurs if internal cycles
 	 *  are used. Disable internal cycles.
 	 */
 	if (np->device_id == PCI_ID_LSI53C1010 &&
 	    np->revision_id < 0x2)
 		np->rv_ccntl0	|=  DILS;
 
 	/*
 	 *  Select burst length (dwords)
 	 */
 	burst_max	= SYM_SETUP_BURST_ORDER;
 	if (burst_max == 255)
 		burst_max = burst_code(np->sv_dmode, np->sv_ctest4,
 				       np->sv_ctest5);
 	if (burst_max > 7)
 		burst_max = 7;
 	if (burst_max > np->maxburst)
 		burst_max = np->maxburst;
 
 	/*
 	 *  DEL 352 - 53C810 Rev x11 - Part Number 609-0392140 - ITEM 2.
 	 *  This chip and the 860 Rev 1 may wrongly use PCI cache line
 	 *  based transactions on LOAD/STORE instructions. So we have
 	 *  to prevent these chips from using such PCI transactions in
 	 *  this driver. The generic ncr driver that does not use
 	 *  LOAD/STORE instructions does not need this work-around.
 	 */
 	if ((np->device_id == PCI_ID_SYM53C810 &&
 	     np->revision_id >= 0x10 && np->revision_id <= 0x11) ||
 	    (np->device_id == PCI_ID_SYM53C860 &&
 	     np->revision_id <= 0x1))
 		np->features &= ~(FE_WRIE|FE_ERL|FE_ERMP);
 
 	/*
 	 *  Select all supported special features.
 	 *  If we are using on-board RAM for scripts, prefetch (PFEN)
 	 *  does not help, but burst op fetch (BOF) does.
 	 *  Disabling PFEN makes sure BOF will be used.
 	 */
 	if (np->features & FE_ERL)
 		np->rv_dmode	|= ERL;		/* Enable Read Line */
 	if (np->features & FE_BOF)
 		np->rv_dmode	|= BOF;		/* Burst Opcode Fetch */
 	if (np->features & FE_ERMP)
 		np->rv_dmode	|= ERMP;	/* Enable Read Multiple */
 #if 1
 	if ((np->features & FE_PFEN) && !np->ram_ba)
 #else
 	if (np->features & FE_PFEN)
 #endif
 		np->rv_dcntl	|= PFEN;	/* Prefetch Enable */
 	if (np->features & FE_CLSE)
 		np->rv_dcntl	|= CLSE;	/* Cache Line Size Enable */
 	if (np->features & FE_WRIE)
 		np->rv_ctest3	|= WRIE;	/* Write and Invalidate */
 	if (np->features & FE_DFS)
 		np->rv_ctest5	|= DFS;		/* Dma Fifo Size */
 
 	/*
 	 *  Select some other
 	 */
 	if (SYM_SETUP_PCI_PARITY)
 		np->rv_ctest4	|= MPEE; /* Master parity checking */
 	if (SYM_SETUP_SCSI_PARITY)
 		np->rv_scntl0	|= 0x0a; /*  full arb., ena parity, par->ATN  */
 
 	/*
 	 *  Get parity checking, host ID and verbose mode from NVRAM
 	 */
 	np->myaddr = 255;
 	sym_nvram_setup_host (np, nvram);
 #ifdef __sparc64__
 	np->myaddr = OF_getscsinitid(np->device);
 #endif
 
 	/*
 	 *  Get SCSI addr of host adapter (set by bios?).
 	 */
 	if (np->myaddr == 255) {
 		np->myaddr = INB(nc_scid) & 0x07;
 		if (!np->myaddr)
 			np->myaddr = SYM_SETUP_HOST_ID;
 	}
 
 	/*
 	 *  Prepare initial io register bits for burst length
 	 */
 	sym_init_burst(np, burst_max);
 
 	/*
 	 *  Set SCSI BUS mode.
 	 *  - LVD capable chips (895/895A/896/1010) report the
 	 *    current BUS mode through the STEST4 IO register.
 	 *  - For previous generation chips (825/825A/875),
 	 *    user has to tell us how to check against HVD,
 	 *    since a 100% safe algorithm is not possible.
 	 */
 	np->scsi_mode = SMODE_SE;
 	if (np->features & (FE_ULTRA2|FE_ULTRA3))
 		np->scsi_mode = (np->sv_stest4 & SMODE);
 	else if	(np->features & FE_DIFF) {
 		if (SYM_SETUP_SCSI_DIFF == 1) {
 			if (np->sv_scntl3) {
 				if (np->sv_stest2 & 0x20)
 					np->scsi_mode = SMODE_HVD;
 			}
 			else if (nvram->type == SYM_SYMBIOS_NVRAM) {
 				if (!(INB(nc_gpreg) & 0x08))
 					np->scsi_mode = SMODE_HVD;
 			}
 		}
 		else if	(SYM_SETUP_SCSI_DIFF == 2)
 			np->scsi_mode = SMODE_HVD;
 	}
 	if (np->scsi_mode == SMODE_HVD)
 		np->rv_stest2 |= 0x20;
 
 	/*
 	 *  Set LED support from SCRIPTS.
 	 *  Ignore this feature for boards known to use a
 	 *  specific GPIO wiring and for the 895A, 896
 	 *  and 1010 that drive the LED directly.
 	 */
 	if ((SYM_SETUP_SCSI_LED ||
 	     (nvram->type == SYM_SYMBIOS_NVRAM ||
 	      (nvram->type == SYM_TEKRAM_NVRAM &&
 	       np->device_id == PCI_ID_SYM53C895))) &&
 	    !(np->features & FE_LEDC) && !(np->sv_gpcntl & 0x01))
 		np->features |= FE_LED0;
 
 	/*
 	 *  Set irq mode.
 	 */
 	switch(SYM_SETUP_IRQ_MODE & 3) {
 	case 2:
 		np->rv_dcntl	|= IRQM;
 		break;
 	case 1:
 		np->rv_dcntl	|= (np->sv_dcntl & IRQM);
 		break;
 	default:
 		break;
 	}
 
 	/*
 	 *  Configure targets according to driver setup.
 	 *  If NVRAM present get targets setup from NVRAM.
 	 */
 	for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) {
 		tcb_p tp = &np->target[i];
 
 		tp->tinfo.user.scsi_version = tp->tinfo.current.scsi_version= 2;
 		tp->tinfo.user.spi_version  = tp->tinfo.current.spi_version = 2;
 		tp->tinfo.user.period = np->minsync;
 		if (np->features & FE_ULTRA3)
 			tp->tinfo.user.period = np->minsync_dt;
 		tp->tinfo.user.offset = np->maxoffs;
 		tp->tinfo.user.width  = np->maxwide ? BUS_16_BIT : BUS_8_BIT;
 		tp->usrflags |= (SYM_DISC_ENABLED | SYM_TAGS_ENABLED);
 		tp->usrtags = SYM_SETUP_MAX_TAG;
 
 		sym_nvram_setup_target (np, i, nvram);
 
 		/*
 		 *  For now, guess PPR/DT support from the period
 		 *  and BUS width.
 		 */
 		if (np->features & FE_ULTRA3) {
 			if (tp->tinfo.user.period <= 9	&&
 			    tp->tinfo.user.width == BUS_16_BIT) {
 				tp->tinfo.user.options |= PPR_OPT_DT;
 				tp->tinfo.user.offset   = np->maxoffs_dt;
 				tp->tinfo.user.spi_version = 3;
 			}
 		}
 
 		if (!tp->usrtags)
 			tp->usrflags &= ~SYM_TAGS_ENABLED;
 	}
 
 	/*
 	 *  Let user know about the settings.
 	 */
 	i = nvram->type;
 	printf("%s: %s NVRAM, ID %d, Fast-%d, %s, %s\n", sym_name(np),
 		i  == SYM_SYMBIOS_NVRAM ? "Symbios" :
 		(i == SYM_TEKRAM_NVRAM  ? "Tekram" : "No"),
 		np->myaddr,
 		(np->features & FE_ULTRA3) ? 80 :
 		(np->features & FE_ULTRA2) ? 40 :
 		(np->features & FE_ULTRA)  ? 20 : 10,
 		sym_scsi_bus_mode(np->scsi_mode),
 		(np->rv_scntl0 & 0xa)	? "parity checking" : "NO parity");
 	/*
 	 *  Tell him more on demand.
 	 */
 	if (sym_verbose) {
 		printf("%s: %s IRQ line driver%s\n",
 			sym_name(np),
 			np->rv_dcntl & IRQM ? "totem pole" : "open drain",
 			np->ram_ba ? ", using on-chip SRAM" : "");
 		printf("%s: using %s firmware.\n", sym_name(np), np->fw_name);
 		if (np->features & FE_NOPM)
 			printf("%s: handling phase mismatch from SCRIPTS.\n",
 			       sym_name(np));
 	}
 	/*
 	 *  And still more.
 	 */
 	if (sym_verbose > 1) {
 		printf ("%s: initial SCNTL3/DMODE/DCNTL/CTEST3/4/5 = "
 			"(hex) %02x/%02x/%02x/%02x/%02x/%02x\n",
 			sym_name(np), np->sv_scntl3, np->sv_dmode, np->sv_dcntl,
 			np->sv_ctest3, np->sv_ctest4, np->sv_ctest5);
 
 		printf ("%s: final   SCNTL3/DMODE/DCNTL/CTEST3/4/5 = "
 			"(hex) %02x/%02x/%02x/%02x/%02x/%02x\n",
 			sym_name(np), np->rv_scntl3, np->rv_dmode, np->rv_dcntl,
 			np->rv_ctest3, np->rv_ctest4, np->rv_ctest5);
 	}
 	/*
 	 *  Let user be aware of targets that have some disable flags set.
 	 */
 	sym_print_targets_flag(np, SYM_SCAN_BOOT_DISABLED, "SCAN AT BOOT");
 	if (sym_verbose)
 		sym_print_targets_flag(np, SYM_SCAN_LUNS_DISABLED,
 				       "SCAN FOR LUNS");
 
 	return 0;
 }
 
 /*
  *  Prepare the next negotiation message if needed.
  *
  *  Fill in the part of message buffer that contains the
  *  negotiation and the nego_status field of the CCB.
  *  Returns the size of the message in bytes.
  */
 static int sym_prepare_nego(hcb_p np, ccb_p cp, int nego, u_char *msgptr)
 {
 	tcb_p tp = &np->target[cp->target];
 	int msglen = 0;
 
 	/*
 	 *  Early C1010 chips need a work-around for DT
 	 *  data transfer to work.
 	 */
 	if (!(np->features & FE_U3EN))
 		tp->tinfo.goal.options = 0;
 	/*
 	 *  negotiate using PPR ?
 	 */
 	if (tp->tinfo.goal.options & PPR_OPT_MASK)
 		nego = NS_PPR;
 	/*
 	 *  negotiate wide transfers ?
 	 */
 	else if (tp->tinfo.current.width != tp->tinfo.goal.width)
 		nego = NS_WIDE;
 	/*
 	 *  negotiate synchronous transfers?
 	 */
 	else if (tp->tinfo.current.period != tp->tinfo.goal.period ||
 		 tp->tinfo.current.offset != tp->tinfo.goal.offset)
 		nego = NS_SYNC;
 
 	switch (nego) {
 	case NS_SYNC:
 		msgptr[msglen++] = M_EXTENDED;
 		msgptr[msglen++] = 3;
 		msgptr[msglen++] = M_X_SYNC_REQ;
 		msgptr[msglen++] = tp->tinfo.goal.period;
 		msgptr[msglen++] = tp->tinfo.goal.offset;
 		break;
 	case NS_WIDE:
 		msgptr[msglen++] = M_EXTENDED;
 		msgptr[msglen++] = 2;
 		msgptr[msglen++] = M_X_WIDE_REQ;
 		msgptr[msglen++] = tp->tinfo.goal.width;
 		break;
 	case NS_PPR:
 		msgptr[msglen++] = M_EXTENDED;
 		msgptr[msglen++] = 6;
 		msgptr[msglen++] = M_X_PPR_REQ;
 		msgptr[msglen++] = tp->tinfo.goal.period;
 		msgptr[msglen++] = 0;
 		msgptr[msglen++] = tp->tinfo.goal.offset;
 		msgptr[msglen++] = tp->tinfo.goal.width;
 		msgptr[msglen++] = tp->tinfo.goal.options & PPR_OPT_DT;
 		break;
 	}
 
 	cp->nego_status = nego;
 
 	if (nego) {
 		tp->nego_cp = cp; /* Keep track a nego will be performed */
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			sym_print_msg(cp, nego == NS_SYNC ? "sync msgout" :
 					  nego == NS_WIDE ? "wide msgout" :
 					  "ppr msgout", msgptr);
 		}
 	}
 
 	return msglen;
 }
 
 /*
  *  Insert a job into the start queue.
  */
 static void sym_put_start_queue(hcb_p np, ccb_p cp)
 {
 	u_short	qidx;
 
 #ifdef SYM_CONF_IARB_SUPPORT
 	/*
 	 *  If the previously queued CCB is not yet done,
 	 *  set the IARB hint. The SCRIPTS will go with IARB
 	 *  for this job when starting the previous one.
 	 *  We leave devices a chance to win arbitration by
 	 *  not using more than 'iarb_max' consecutive
 	 *  immediate arbitrations.
 	 */
 	if (np->last_cp && np->iarb_count < np->iarb_max) {
 		np->last_cp->host_flags |= HF_HINT_IARB;
 		++np->iarb_count;
 	}
 	else
 		np->iarb_count = 0;
 	np->last_cp = cp;
 #endif
 
 	/*
 	 *  Insert first the idle task and then our job.
 	 *  The MB should ensure proper ordering.
 	 */
 	qidx = np->squeueput + 2;
 	if (qidx >= MAX_QUEUE*2) qidx = 0;
 
 	np->squeue [qidx]	   = cpu_to_scr(np->idletask_ba);
 	MEMORY_BARRIER();
 	np->squeue [np->squeueput] = cpu_to_scr(cp->ccb_ba);
 
 	np->squeueput = qidx;
 
 	if (DEBUG_FLAGS & DEBUG_QUEUE)
 		printf ("%s: queuepos=%d.\n", sym_name (np), np->squeueput);
 
 	/*
 	 *  Script processor may be waiting for reselect.
 	 *  Wake it up.
 	 */
 	MEMORY_BARRIER();
 	OUTB (nc_istat, SIGP|np->istat_sem);
 }
 
 /*
  *  Soft reset the chip.
  *
  *  Raising SRST when the chip is running may cause
  *  problems on dual function chips (see below).
  *  On the other hand, LVD devices need some delay
  *  to settle and report actual BUS mode in STEST4.
  */
 static void sym_chip_reset (hcb_p np)
 {
 	OUTB (nc_istat, SRST);
 	UDELAY (10);
 	OUTB (nc_istat, 0);
 	UDELAY(2000);	/* For BUS MODE to settle */
 }
 
 /*
  *  Soft reset the chip.
  *
  *  Some 896 and 876 chip revisions may hang-up if we set
  *  the SRST (soft reset) bit at the wrong time when SCRIPTS
  *  are running.
  *  So, we need to abort the current operation prior to
  *  soft resetting the chip.
  */
 static void sym_soft_reset (hcb_p np)
 {
 	u_char istat;
 	int i;
 
 	OUTB (nc_istat, CABRT);
 	for (i = 1000000 ; i ; --i) {
 		istat = INB (nc_istat);
 		if (istat & SIP) {
 			INW (nc_sist);
 			continue;
 		}
 		if (istat & DIP) {
 			OUTB (nc_istat, 0);
 			INB (nc_dstat);
 			break;
 		}
 	}
 	if (!i)
 		printf("%s: unable to abort current chip operation.\n",
 			sym_name(np));
 	sym_chip_reset (np);
 }
 
 /*
  *  Start reset process.
  *
  *  The interrupt handler will reinitialize the chip.
  */
 static void sym_start_reset(hcb_p np)
 {
 	(void) sym_reset_scsi_bus(np, 1);
 }
 
 static int sym_reset_scsi_bus(hcb_p np, int enab_int)
 {
 	u32 term;
 	int retv = 0;
 
 	sym_soft_reset(np);	/* Soft reset the chip */
 	if (enab_int)
 		OUTW (nc_sien, RST);
 	/*
 	 *  Enable Tolerant, reset IRQD if present and
 	 *  properly set IRQ mode, prior to resetting the bus.
 	 */
 	OUTB (nc_stest3, TE);
 	OUTB (nc_dcntl, (np->rv_dcntl & IRQM));
 	OUTB (nc_scntl1, CRST);
 	UDELAY (200);
 
 	if (!SYM_SETUP_SCSI_BUS_CHECK)
 		goto out;
 	/*
 	 *  Check for no terminators or SCSI bus shorts to ground.
 	 *  Read SCSI data bus, data parity bits and control signals.
 	 *  We are expecting RESET to be TRUE and other signals to be
 	 *  FALSE.
 	 */
 	term =	INB(nc_sstat0);
 	term =	((term & 2) << 7) + ((term & 1) << 17);	/* rst sdp0 */
 	term |= ((INB(nc_sstat2) & 0x01) << 26) |	/* sdp1     */
 		((INW(nc_sbdl) & 0xff)   << 9)  |	/* d7-0     */
 		((INW(nc_sbdl) & 0xff00) << 10) |	/* d15-8    */
 		INB(nc_sbcl);	/* req ack bsy sel atn msg cd io    */
 
 	if (!(np->features & FE_WIDE))
 		term &= 0x3ffff;
 
 	if (term != (2<<7)) {
 		printf("%s: suspicious SCSI data while resetting the BUS.\n",
 			sym_name(np));
 		printf("%s: %sdp0,d7-0,rst,req,ack,bsy,sel,atn,msg,c/d,i/o = "
 			"0x%lx, expecting 0x%lx\n",
 			sym_name(np),
 			(np->features & FE_WIDE) ? "dp1,d15-8," : "",
 			(u_long)term, (u_long)(2<<7));
 		if (SYM_SETUP_SCSI_BUS_CHECK == 1)
 			retv = 1;
 	}
 out:
 	OUTB (nc_scntl1, 0);
 	/* MDELAY(100); */
 	return retv;
 }
 
 /*
  *  The chip may have completed jobs. Look at the DONE QUEUE.
  *
  *  On architectures that may reorder LOAD/STORE operations,
  *  a memory barrier may be needed after the reading of the
  *  so-called `flag' and prior to dealing with the data.
  */
 static int sym_wakeup_done (hcb_p np)
 {
 	ccb_p cp;
 	int i, n;
 	u32 dsa;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	n = 0;
 	i = np->dqueueget;
 	while (1) {
 		dsa = scr_to_cpu(np->dqueue[i]);
 		if (!dsa)
 			break;
 		np->dqueue[i] = 0;
 		if ((i = i+2) >= MAX_QUEUE*2)
 			i = 0;
 
 		cp = sym_ccb_from_dsa(np, dsa);
 		if (cp) {
 			MEMORY_BARRIER();
 			sym_complete_ok (np, cp);
 			++n;
 		}
 		else
 			printf ("%s: bad DSA (%x) in done queue.\n",
 				sym_name(np), (u_int) dsa);
 	}
 	np->dqueueget = i;
 
 	return n;
 }
 
 /*
  *  Complete all active CCBs with error.
  *  Used on CHIP/SCSI RESET.
  */
 static void sym_flush_busy_queue (hcb_p np, int cam_status)
 {
 	/*
 	 *  Move all active CCBs to the COMP queue
 	 *  and flush this queue.
 	 */
 	sym_que_splice(&np->busy_ccbq, &np->comp_ccbq);
 	sym_que_init(&np->busy_ccbq);
 	sym_flush_comp_queue(np, cam_status);
 }
 
 /*
  *  Start chip.
  *
  *  'reason' means:
  *     0: initialisation.
  *     1: SCSI BUS RESET delivered or received.
  *     2: SCSI BUS MODE changed.
  */
 static void sym_init (hcb_p np, int reason)
 {
  	int	i;
 	u32	phys;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
  	/*
 	 *  Reset chip if asked, otherwise just clear fifos.
  	 */
 	if (reason == 1)
 		sym_soft_reset(np);
 	else {
 		OUTB (nc_stest3, TE|CSF);
 		OUTONB (nc_ctest3, CLF);
 	}
 
 	/*
 	 *  Clear Start Queue
 	 */
 	phys = np->squeue_ba;
 	for (i = 0; i < MAX_QUEUE*2; i += 2) {
 		np->squeue[i]   = cpu_to_scr(np->idletask_ba);
 		np->squeue[i+1] = cpu_to_scr(phys + (i+2)*4);
 	}
 	np->squeue[MAX_QUEUE*2-1] = cpu_to_scr(phys);
 
 	/*
 	 *  Start at first entry.
 	 */
 	np->squeueput = 0;
 
 	/*
 	 *  Clear Done Queue
 	 */
 	phys = np->dqueue_ba;
 	for (i = 0; i < MAX_QUEUE*2; i += 2) {
 		np->dqueue[i]   = 0;
 		np->dqueue[i+1] = cpu_to_scr(phys + (i+2)*4);
 	}
 	np->dqueue[MAX_QUEUE*2-1] = cpu_to_scr(phys);
 
 	/*
 	 *  Start at first entry.
 	 */
 	np->dqueueget = 0;
 
 	/*
 	 *  Install patches in scripts.
 	 *  This also let point to first position the start
 	 *  and done queue pointers used from SCRIPTS.
 	 */
 	np->fw_patch(np);
 
 	/*
 	 *  Wakeup all pending jobs.
 	 */
 	sym_flush_busy_queue(np, CAM_SCSI_BUS_RESET);
 
 	/*
 	 *  Init chip.
 	 */
 	OUTB (nc_istat,  0x00   );	/*  Remove Reset, abort */
 	UDELAY (2000);	/* The 895 needs time for the bus mode to settle */
 
 	OUTB (nc_scntl0, np->rv_scntl0 | 0xc0);
 					/*  full arb., ena parity, par->ATN  */
 	OUTB (nc_scntl1, 0x00);		/*  odd parity, and remove CRST!! */
 
 	sym_selectclock(np, np->rv_scntl3);	/* Select SCSI clock */
 
 	OUTB (nc_scid  , RRE|np->myaddr);	/* Adapter SCSI address */
 	OUTW (nc_respid, 1ul<<np->myaddr);	/* Id to respond to */
 	OUTB (nc_istat , SIGP	);		/*  Signal Process */
 	OUTB (nc_dmode , np->rv_dmode);		/* Burst length, dma mode */
 	OUTB (nc_ctest5, np->rv_ctest5);	/* Large fifo + large burst */
 
 	OUTB (nc_dcntl , NOCOM|np->rv_dcntl);	/* Protect SFBR */
 	OUTB (nc_ctest3, np->rv_ctest3);	/* Write and invalidate */
 	OUTB (nc_ctest4, np->rv_ctest4);	/* Master parity checking */
 
 	/* Extended Sreq/Sack filtering not supported on the C10 */
 	if (np->features & FE_C10)
 		OUTB (nc_stest2, np->rv_stest2);
 	else
 		OUTB (nc_stest2, EXT|np->rv_stest2);
 
 	OUTB (nc_stest3, TE);			/* TolerANT enable */
 	OUTB (nc_stime0, 0x0c);			/* HTH disabled  STO 0.25 sec */
 
 	/*
 	 *  For now, disable AIP generation on C1010-66.
 	 */
 	if (np->device_id == PCI_ID_LSI53C1010_2)
 		OUTB (nc_aipcntl1, DISAIP);
 
 	/*
 	 *  C10101 Errata.
 	 *  Errant SGE's when in narrow. Write bits 4 & 5 of
 	 *  STEST1 register to disable SGE. We probably should do
 	 *  that from SCRIPTS for each selection/reselection, but
 	 *  I just don't want. :)
 	 */
 	if (np->device_id == PCI_ID_LSI53C1010 &&
 	    /* np->revision_id < 0xff */ 1)
 		OUTB (nc_stest1, INB(nc_stest1) | 0x30);
 
 	/*
 	 *  DEL 441 - 53C876 Rev 5 - Part Number 609-0392787/2788 - ITEM 2.
 	 *  Disable overlapped arbitration for some dual function devices,
 	 *  regardless revision id (kind of post-chip-design feature. ;-))
 	 */
 	if (np->device_id == PCI_ID_SYM53C875)
 		OUTB (nc_ctest0, (1<<5));
 	else if (np->device_id == PCI_ID_SYM53C896)
 		np->rv_ccntl0 |= DPR;
 
 	/*
 	 *  Write CCNTL0/CCNTL1 for chips capable of 64 bit addressing
 	 *  and/or hardware phase mismatch, since only such chips
 	 *  seem to support those IO registers.
 	 */
 	if (np->features & (FE_DAC|FE_NOPM)) {
 		OUTB (nc_ccntl0, np->rv_ccntl0);
 		OUTB (nc_ccntl1, np->rv_ccntl1);
 	}
 
 	/*
 	 *  If phase mismatch handled by scripts (895A/896/1010),
 	 *  set PM jump addresses.
 	 */
 	if (np->features & FE_NOPM) {
 		OUTL (nc_pmjad1, SCRIPTB_BA (np, pm_handle));
 		OUTL (nc_pmjad2, SCRIPTB_BA (np, pm_handle));
 	}
 
 	/*
 	 *    Enable GPIO0 pin for writing if LED support from SCRIPTS.
 	 *    Also set GPIO5 and clear GPIO6 if hardware LED control.
 	 */
 	if (np->features & FE_LED0)
 		OUTB(nc_gpcntl, INB(nc_gpcntl) & ~0x01);
 	else if (np->features & FE_LEDC)
 		OUTB(nc_gpcntl, (INB(nc_gpcntl) & ~0x41) | 0x20);
 
 	/*
 	 *      enable ints
 	 */
 	OUTW (nc_sien , STO|HTH|MA|SGE|UDC|RST|PAR);
 	OUTB (nc_dien , MDPE|BF|SSI|SIR|IID);
 
 	/*
 	 *  For 895/6 enable SBMC interrupt and save current SCSI bus mode.
 	 *  Try to eat the spurious SBMC interrupt that may occur when
 	 *  we reset the chip but not the SCSI BUS (at initialization).
 	 */
 	if (np->features & (FE_ULTRA2|FE_ULTRA3)) {
 		OUTONW (nc_sien, SBMC);
 		if (reason == 0) {
 			MDELAY(100);
 			INW (nc_sist);
 		}
 		np->scsi_mode = INB (nc_stest4) & SMODE;
 	}
 
 	/*
 	 *  Fill in target structure.
 	 *  Reinitialize usrsync.
 	 *  Reinitialize usrwide.
 	 *  Prepare sync negotiation according to actual SCSI bus mode.
 	 */
 	for (i=0;i<SYM_CONF_MAX_TARGET;i++) {
 		tcb_p tp = &np->target[i];
 
 		tp->to_reset  = 0;
 		tp->head.sval = 0;
 		tp->head.wval = np->rv_scntl3;
 		tp->head.uval = 0;
 
 		tp->tinfo.current.period = 0;
 		tp->tinfo.current.offset = 0;
 		tp->tinfo.current.width  = BUS_8_BIT;
 		tp->tinfo.current.options = 0;
 	}
 
 	/*
 	 *  Download SCSI SCRIPTS to on-chip RAM if present,
 	 *  and start script processor.
 	 */
 	if (np->ram_ba) {
 		if (sym_verbose > 1)
 			printf ("%s: Downloading SCSI SCRIPTS.\n",
 				sym_name(np));
 		if (np->ram_ws == 8192) {
 			OUTRAM_OFF(4096, np->scriptb0, np->scriptb_sz);
 			OUTL (nc_mmws, np->scr_ram_seg);
 			OUTL (nc_mmrs, np->scr_ram_seg);
 			OUTL (nc_sfs,  np->scr_ram_seg);
 			phys = SCRIPTB_BA (np, start64);
 		}
 		else
 			phys = SCRIPTA_BA (np, init);
 		OUTRAM_OFF(0, np->scripta0, np->scripta_sz);
 	}
 	else
 		phys = SCRIPTA_BA (np, init);
 
 	np->istat_sem = 0;
 
 	OUTL (nc_dsa, np->hcb_ba);
 	OUTL_DSP (phys);
 
 	/*
 	 *  Notify the XPT about the RESET condition.
 	 */
 	if (reason != 0)
 		xpt_async(AC_BUS_RESET, np->path, NULL);
 }
 
 /*
  *  Get clock factor and sync divisor for a given
  *  synchronous factor period.
  */
 static int
 sym_getsync(hcb_p np, u_char dt, u_char sfac, u_char *divp, u_char *fakp)
 {
 	u32	clk = np->clock_khz;	/* SCSI clock frequency in kHz	*/
 	int	div = np->clock_divn;	/* Number of divisors supported	*/
 	u32	fak;			/* Sync factor in sxfer		*/
 	u32	per;			/* Period in tenths of ns	*/
 	u32	kpc;			/* (per * clk)			*/
 	int	ret;
 
 	/*
 	 *  Compute the synchronous period in tenths of nano-seconds
 	 */
 	if (dt && sfac <= 9)	per = 125;
 	else if	(sfac <= 10)	per = 250;
 	else if	(sfac == 11)	per = 303;
 	else if	(sfac == 12)	per = 500;
 	else			per = 40 * sfac;
 	ret = per;
 
 	kpc = per * clk;
 	if (dt)
 		kpc <<= 1;
 
 	/*
 	 *  For earliest C10 revision 0, we cannot use extra
 	 *  clocks for the setting of the SCSI clocking.
 	 *  Note that this limits the lowest sync data transfer
 	 *  to 5 Mega-transfers per second and may result in
 	 *  using higher clock divisors.
 	 */
 #if 1
 	if ((np->features & (FE_C10|FE_U3EN)) == FE_C10) {
 		/*
 		 *  Look for the lowest clock divisor that allows an
 		 *  output speed not faster than the period.
 		 */
 		while (div > 0) {
 			--div;
 			if (kpc > (div_10M[div] << 2)) {
 				++div;
 				break;
 			}
 		}
 		fak = 0;			/* No extra clocks */
 		if (div == np->clock_divn) {	/* Are we too fast ? */
 			ret = -1;
 		}
 		*divp = div;
 		*fakp = fak;
 		return ret;
 	}
 #endif
 
 	/*
 	 *  Look for the greatest clock divisor that allows an
 	 *  input speed faster than the period.
 	 */
 	while (div-- > 0)
 		if (kpc >= (div_10M[div] << 2)) break;
 
 	/*
 	 *  Calculate the lowest clock factor that allows an output
 	 *  speed not faster than the period, and the max output speed.
 	 *  If fak >= 1 we will set both XCLKH_ST and XCLKH_DT.
 	 *  If fak >= 2 we will also set XCLKS_ST and XCLKS_DT.
 	 */
 	if (dt) {
 		fak = (kpc - 1) / (div_10M[div] << 1) + 1 - 2;
 		/* ret = ((2+fak)*div_10M[div])/np->clock_khz; */
 	}
 	else {
 		fak = (kpc - 1) / div_10M[div] + 1 - 4;
 		/* ret = ((4+fak)*div_10M[div])/np->clock_khz; */
 	}
 
 	/*
 	 *  Check against our hardware limits, or bugs :).
 	 */
 	if (fak > 2)	{fak = 2; ret = -1;}
 
 	/*
 	 *  Compute and return sync parameters.
 	 */
 	*divp = div;
 	*fakp = fak;
 
 	return ret;
 }
 
 /*
  *  Tell the SCSI layer about the new transfer parameters.
  */
 static void
 sym_xpt_async_transfer_neg(hcb_p np, int target, u_int spi_valid)
 {
 	struct ccb_trans_settings cts;
 	struct cam_path *path;
 	int sts;
 	tcb_p tp = &np->target[target];
 
 	sts = xpt_create_path(&path, NULL, cam_sim_path(np->sim), target,
 	                      CAM_LUN_WILDCARD);
 	if (sts != CAM_REQ_CMP)
 		return;
 
 	bzero(&cts, sizeof(cts));
 
 #define	cts__scsi (cts.proto_specific.scsi)
 #define	cts__spi  (cts.xport_specific.spi)
 
 	cts.type      = CTS_TYPE_CURRENT_SETTINGS;
 	cts.protocol  = PROTO_SCSI;
 	cts.transport = XPORT_SPI;
 	cts.protocol_version  = tp->tinfo.current.scsi_version;
 	cts.transport_version = tp->tinfo.current.spi_version;
 
 	cts__spi.valid = spi_valid;
 	if (spi_valid & CTS_SPI_VALID_SYNC_RATE)
 		cts__spi.sync_period = tp->tinfo.current.period;
 	if (spi_valid & CTS_SPI_VALID_SYNC_OFFSET)
 		cts__spi.sync_offset = tp->tinfo.current.offset;
 	if (spi_valid & CTS_SPI_VALID_BUS_WIDTH)
 		cts__spi.bus_width   = tp->tinfo.current.width;
 	if (spi_valid & CTS_SPI_VALID_PPR_OPTIONS)
 		cts__spi.ppr_options = tp->tinfo.current.options;
 #undef cts__spi
 #undef cts__scsi
 	xpt_setup_ccb(&cts.ccb_h, path, /*priority*/1);
 	xpt_async(AC_TRANSFER_NEG, path, &cts);
 	xpt_free_path(path);
 }
 
 #define SYM_SPI_VALID_WDTR		\
 	CTS_SPI_VALID_BUS_WIDTH |	\
 	CTS_SPI_VALID_SYNC_RATE |	\
 	CTS_SPI_VALID_SYNC_OFFSET
 #define SYM_SPI_VALID_SDTR		\
 	CTS_SPI_VALID_SYNC_RATE |	\
 	CTS_SPI_VALID_SYNC_OFFSET
 #define SYM_SPI_VALID_PPR		\
 	CTS_SPI_VALID_PPR_OPTIONS |	\
 	CTS_SPI_VALID_BUS_WIDTH |	\
 	CTS_SPI_VALID_SYNC_RATE |	\
 	CTS_SPI_VALID_SYNC_OFFSET
 
 /*
  *  We received a WDTR.
  *  Let everything be aware of the changes.
  */
 static void sym_setwide(hcb_p np, ccb_p cp, u_char wide)
 {
 	tcb_p tp = &np->target[cp->target];
 
 	sym_settrans(np, cp, 0, 0, 0, wide, 0, 0);
 
 	/*
 	 *  Tell the SCSI layer about the new transfer parameters.
 	 */
 	tp->tinfo.goal.width = tp->tinfo.current.width = wide;
 	tp->tinfo.current.offset = 0;
 	tp->tinfo.current.period = 0;
 	tp->tinfo.current.options = 0;
 
 	sym_xpt_async_transfer_neg(np, cp->target, SYM_SPI_VALID_WDTR);
 }
 
 /*
  *  We received a SDTR.
  *  Let everything be aware of the changes.
  */
 static void
 sym_setsync(hcb_p np, ccb_p cp, u_char ofs, u_char per, u_char div, u_char fak)
 {
 	tcb_p tp = &np->target[cp->target];
 	u_char wide = (cp->phys.select.sel_scntl3 & EWS) ? 1 : 0;
 
 	sym_settrans(np, cp, 0, ofs, per, wide, div, fak);
 
 	/*
 	 *  Tell the SCSI layer about the new transfer parameters.
 	 */
 	tp->tinfo.goal.period	= tp->tinfo.current.period  = per;
 	tp->tinfo.goal.offset	= tp->tinfo.current.offset  = ofs;
 	tp->tinfo.goal.options	= tp->tinfo.current.options = 0;
 
 	sym_xpt_async_transfer_neg(np, cp->target, SYM_SPI_VALID_SDTR);
 }
 
 /*
  *  We received a PPR.
  *  Let everything be aware of the changes.
  */
 static void sym_setpprot(hcb_p np, ccb_p cp, u_char dt, u_char ofs,
 			 u_char per, u_char wide, u_char div, u_char fak)
 {
 	tcb_p tp = &np->target[cp->target];
 
 	sym_settrans(np, cp, dt, ofs, per, wide, div, fak);
 
 	/*
 	 *  Tell the SCSI layer about the new transfer parameters.
 	 */
 	tp->tinfo.goal.width	= tp->tinfo.current.width  = wide;
 	tp->tinfo.goal.period	= tp->tinfo.current.period = per;
 	tp->tinfo.goal.offset	= tp->tinfo.current.offset = ofs;
 	tp->tinfo.goal.options	= tp->tinfo.current.options = dt;
 
 	sym_xpt_async_transfer_neg(np, cp->target, SYM_SPI_VALID_PPR);
 }
 
 /*
  *  Switch trans mode for current job and it's target.
  */
 static void sym_settrans(hcb_p np, ccb_p cp, u_char dt, u_char ofs,
 			 u_char per, u_char wide, u_char div, u_char fak)
 {
 	SYM_QUEHEAD *qp;
 	union	ccb *ccb;
 	tcb_p tp;
 	u_char target = INB (nc_sdid) & 0x0f;
 	u_char sval, wval, uval;
 
 	assert (cp);
 	if (!cp) return;
 	ccb = cp->cam_ccb;
 	assert (ccb);
 	if (!ccb) return;
 	assert (target == (cp->target & 0xf));
 	tp = &np->target[target];
 
 	sval = tp->head.sval;
 	wval = tp->head.wval;
 	uval = tp->head.uval;
 
 #if 0
 	printf("XXXX sval=%x wval=%x uval=%x (%x)\n",
 		sval, wval, uval, np->rv_scntl3);
 #endif
 	/*
 	 *  Set the offset.
 	 */
 	if (!(np->features & FE_C10))
 		sval = (sval & ~0x1f) | ofs;
 	else
 		sval = (sval & ~0x3f) | ofs;
 
 	/*
 	 *  Set the sync divisor and extra clock factor.
 	 */
 	if (ofs != 0) {
 		wval = (wval & ~0x70) | ((div+1) << 4);
 		if (!(np->features & FE_C10))
 			sval = (sval & ~0xe0) | (fak << 5);
 		else {
 			uval = uval & ~(XCLKH_ST|XCLKH_DT|XCLKS_ST|XCLKS_DT);
 			if (fak >= 1) uval |= (XCLKH_ST|XCLKH_DT);
 			if (fak >= 2) uval |= (XCLKS_ST|XCLKS_DT);
 		}
 	}
 
 	/*
 	 *  Set the bus width.
 	 */
 	wval = wval & ~EWS;
 	if (wide != 0)
 		wval |= EWS;
 
 	/*
 	 *  Set misc. ultra enable bits.
 	 */
 	if (np->features & FE_C10) {
 		uval = uval & ~(U3EN|AIPCKEN);
 		if (dt)	{
 			assert(np->features & FE_U3EN);
 			uval |= U3EN;
 		}
 	}
 	else {
 		wval = wval & ~ULTRA;
 		if (per <= 12)	wval |= ULTRA;
 	}
 
 	/*
 	 *   Stop there if sync parameters are unchanged.
 	 */
 	if (tp->head.sval == sval &&
 	    tp->head.wval == wval &&
 	    tp->head.uval == uval)
 		return;
 	tp->head.sval = sval;
 	tp->head.wval = wval;
 	tp->head.uval = uval;
 
 	/*
 	 *  Disable extended Sreq/Sack filtering if per < 50.
 	 *  Not supported on the C1010.
 	 */
 	if (per < 50 && !(np->features & FE_C10))
 		OUTOFFB (nc_stest2, EXT);
 
 	/*
 	 *  set actual value and sync_status
 	 */
 	OUTB (nc_sxfer,  tp->head.sval);
 	OUTB (nc_scntl3, tp->head.wval);
 
 	if (np->features & FE_C10) {
 		OUTB (nc_scntl4, tp->head.uval);
 	}
 
 	/*
 	 *  patch ALL busy ccbs of this target.
 	 */
 	FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
 		cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 		if (cp->target != target)
 			continue;
 		cp->phys.select.sel_scntl3 = tp->head.wval;
 		cp->phys.select.sel_sxfer  = tp->head.sval;
 		if (np->features & FE_C10) {
 			cp->phys.select.sel_scntl4 = tp->head.uval;
 		}
 	}
 }
 
 /*
  *  log message for real hard errors
  *
  *  sym0 targ 0?: ERROR (ds:si) (so-si-sd) (sxfer/scntl3) @ name (dsp:dbc).
  *  	      reg: r0 r1 r2 r3 r4 r5 r6 ..... rf.
  *
  *  exception register:
  *  	ds:	dstat
  *  	si:	sist
  *
  *  SCSI bus lines:
  *  	so:	control lines as driven by chip.
  *  	si:	control lines as seen by chip.
  *  	sd:	scsi data lines as seen by chip.
  *
  *  wide/fastmode:
  *  	sxfer:	(see the manual)
  *  	scntl3:	(see the manual)
  *
  *  current script command:
  *  	dsp:	script address (relative to start of script).
  *  	dbc:	first word of script command.
  *
  *  First 24 register of the chip:
  *  	r0..rf
  */
 static void sym_log_hard_error(hcb_p np, u_short sist, u_char dstat)
 {
 	u32	dsp;
 	int	script_ofs;
 	int	script_size;
 	char	*script_name;
 	u_char	*script_base;
 	int	i;
 
 	dsp	= INL (nc_dsp);
 
 	if	(dsp > np->scripta_ba &&
 		 dsp <= np->scripta_ba + np->scripta_sz) {
 		script_ofs	= dsp - np->scripta_ba;
 		script_size	= np->scripta_sz;
 		script_base	= (u_char *) np->scripta0;
 		script_name	= "scripta";
 	}
 	else if (np->scriptb_ba < dsp &&
 		 dsp <= np->scriptb_ba + np->scriptb_sz) {
 		script_ofs	= dsp - np->scriptb_ba;
 		script_size	= np->scriptb_sz;
 		script_base	= (u_char *) np->scriptb0;
 		script_name	= "scriptb";
 	} else {
 		script_ofs	= dsp;
 		script_size	= 0;
 		script_base	= NULL;
 		script_name	= "mem";
 	}
 
 	printf ("%s:%d: ERROR (%x:%x) (%x-%x-%x) (%x/%x) @ (%s %x:%08x).\n",
 		sym_name (np), (unsigned)INB (nc_sdid)&0x0f, dstat, sist,
 		(unsigned)INB (nc_socl), (unsigned)INB (nc_sbcl),
 		(unsigned)INB (nc_sbdl), (unsigned)INB (nc_sxfer),
 		(unsigned)INB (nc_scntl3), script_name, script_ofs,
 		(unsigned)INL (nc_dbc));
 
 	if (((script_ofs & 3) == 0) &&
 	    (unsigned)script_ofs < script_size) {
 		printf ("%s: script cmd = %08x\n", sym_name(np),
 			scr_to_cpu((int) *(u32 *)(script_base + script_ofs)));
 	}
 
         printf ("%s: regdump:", sym_name(np));
         for (i=0; i<24;i++)
             printf (" %02x", (unsigned)INB_OFF(i));
         printf (".\n");
 
 	/*
 	 *  PCI BUS error, read the PCI ststus register.
 	 */
 	if (dstat & (MDPE|BF)) {
 		u_short pci_sts;
 		pci_sts = pci_read_config(np->device, PCIR_STATUS, 2);
 		if (pci_sts & 0xf900) {
 			pci_write_config(np->device, PCIR_STATUS, pci_sts, 2);
 			printf("%s: PCI STATUS = 0x%04x\n",
 				sym_name(np), pci_sts & 0xf900);
 		}
 	}
 }
 
 /*
  *  chip interrupt handler
  *
  *  In normal situations, interrupt conditions occur one at
  *  a time. But when something bad happens on the SCSI BUS,
  *  the chip may raise several interrupt flags before
  *  stopping and interrupting the CPU. The additionnal
  *  interrupt flags are stacked in some extra registers
  *  after the SIP and/or DIP flag has been raised in the
  *  ISTAT. After the CPU has read the interrupt condition
  *  flag from SIST or DSTAT, the chip unstacks the other
  *  interrupt flags and sets the corresponding bits in
  *  SIST or DSTAT. Since the chip starts stacking once the
  *  SIP or DIP flag is set, there is a small window of time
  *  where the stacking does not occur.
  *
  *  Typically, multiple interrupt conditions may happen in
  *  the following situations:
  *
  *  - SCSI parity error + Phase mismatch  (PAR|MA)
  *    When a parity error is detected in input phase
  *    and the device switches to msg-in phase inside a
  *    block MOV.
  *  - SCSI parity error + Unexpected disconnect (PAR|UDC)
  *    When a stupid device does not want to handle the
  *    recovery of an SCSI parity error.
  *  - Some combinations of STO, PAR, UDC, ...
  *    When using non compliant SCSI stuff, when user is
  *    doing non compliant hot tampering on the BUS, when
  *    something really bad happens to a device, etc ...
  *
  *  The heuristic suggested by SYMBIOS to handle
  *  multiple interrupts is to try unstacking all
  *  interrupts conditions and to handle them on some
  *  priority based on error severity.
  *  This will work when the unstacking has been
  *  successful, but we cannot be 100 % sure of that,
  *  since the CPU may have been faster to unstack than
  *  the chip is able to stack. Hmmm ... But it seems that
  *  such a situation is very unlikely to happen.
  *
  *  If this happen, for example STO caught by the CPU
  *  then UDC happenning before the CPU have restarted
  *  the SCRIPTS, the driver may wrongly complete the
  *  same command on UDC, since the SCRIPTS didn't restart
  *  and the DSA still points to the same command.
  *  We avoid this situation by setting the DSA to an
  *  invalid value when the CCB is completed and before
  *  restarting the SCRIPTS.
  *
  *  Another issue is that we need some section of our
  *  recovery procedures to be somehow uninterruptible but
  *  the SCRIPTS processor does not provides such a
  *  feature. For this reason, we handle recovery preferently
  *  from the C code and check against some SCRIPTS critical
  *  sections from the C code.
  *
  *  Hopefully, the interrupt handling of the driver is now
  *  able to resist to weird BUS error conditions, but donnot
  *  ask me for any guarantee that it will never fail. :-)
  *  Use at your own decision and risk.
  */
 static void sym_intr1 (hcb_p np)
 {
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  interrupt on the fly ?
 	 *
 	 *  A `dummy read' is needed to ensure that the
 	 *  clear of the INTF flag reaches the device
 	 *  before the scanning of the DONE queue.
 	 */
 	istat = INB (nc_istat);
 	if (istat & INTF) {
 		OUTB (nc_istat, (istat & SIGP) | INTF | np->istat_sem);
 		istat = INB (nc_istat);		/* DUMMY READ */
 		if (DEBUG_FLAGS & DEBUG_TINY) printf ("F ");
 		(void)sym_wakeup_done (np);
 	}
 
 	if (!(istat & (SIP|DIP)))
 		return;
 
 #if 0	/* We should never get this one */
 	if (istat & CABRT)
 		OUTB (nc_istat, CABRT);
 #endif
 
 	/*
 	 *  PAR and MA interrupts may occur at the same time,
 	 *  and we need to know of both in order to handle
 	 *  this situation properly. We try to unstack SCSI
 	 *  interrupts for that reason. BTW, I dislike a LOT
 	 *  such a loop inside the interrupt routine.
 	 *  Even if DMA interrupt stacking is very unlikely to
 	 *  happen, we also try unstacking these ones, since
 	 *  this has no performance impact.
 	 */
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
 	do {
 		if (istatc & SIP)
 			sist  |= INW (nc_sist);
 		if (istatc & DIP)
 			dstat |= INB (nc_dstat);
 		istatc = INB (nc_istat);
 		istat |= istatc;
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)
 		printf ("<%d|%x:%x|%x:%x>",
 			(int)INB(nc_scr0),
 			dstat,sist,
 			(unsigned)INL(nc_dsp),
 			(unsigned)INL(nc_dbc));
 	/*
 	 *  On paper, a memory barrier may be needed here.
 	 *  And since we are paranoid ... :)
 	 */
 	MEMORY_BARRIER();
 
 	/*
 	 *  First, interrupts we want to service cleanly.
 	 *
 	 *  Phase mismatch (MA) is the most frequent interrupt
 	 *  for chip earlier than the 896 and so we have to service
 	 *  it as quickly as possible.
 	 *  A SCSI parity error (PAR) may be combined with a phase
 	 *  mismatch condition (MA).
 	 *  Programmed interrupts (SIR) are used to call the C code
 	 *  from SCRIPTS.
 	 *  The single step interrupt (SSI) is not used in this
 	 *  driver.
 	 */
 	if (!(sist  & (STO|GEN|HTH|SGE|UDC|SBMC|RST)) &&
 	    !(dstat & (MDPE|BF|ABRT|IID))) {
 		if	(sist & PAR)	sym_int_par (np, sist);
 		else if (sist & MA)	sym_int_ma (np);
 		else if (dstat & SIR)	sym_int_sir (np);
 		else if (dstat & SSI)	OUTONB_STD ();
 		else			goto unknown_int;
 		return;
 	}
 
 	/*
 	 *  Now, interrupts that donnot happen in normal
 	 *  situations and that we may need to recover from.
 	 *
 	 *  On SCSI RESET (RST), we reset everything.
 	 *  On SCSI BUS MODE CHANGE (SBMC), we complete all
 	 *  active CCBs with RESET status, prepare all devices
 	 *  for negotiating again and restart the SCRIPTS.
 	 *  On STO and UDC, we complete the CCB with the corres-
 	 *  ponding status and restart the SCRIPTS.
 	 */
 	if (sist & RST) {
 		xpt_print_path(np->path);
 		printf("SCSI BUS reset detected.\n");
 		sym_init (np, 1);
 		return;
 	}
 
 	OUTB (nc_ctest3, np->rv_ctest3 | CLF);	/* clear dma fifo  */
 	OUTB (nc_stest3, TE|CSF);		/* clear scsi fifo */
 
 	if (!(sist  & (GEN|HTH|SGE)) &&
 	    !(dstat & (MDPE|BF|ABRT|IID))) {
 		if	(sist & SBMC)	sym_int_sbmc (np);
 		else if (sist & STO)	sym_int_sto (np);
 		else if (sist & UDC)	sym_int_udc (np);
 		else			goto unknown_int;
 		return;
 	}
 
 	/*
 	 *  Now, interrupts we are not able to recover cleanly.
 	 *
 	 *  Log message for hard errors.
 	 *  Reset everything.
 	 */
 
 	sym_log_hard_error(np, sist, dstat);
 
 	if ((sist & (GEN|HTH|SGE)) ||
 		(dstat & (MDPE|BF|ABRT|IID))) {
 		sym_start_reset(np);
 		return;
 	}
 
 unknown_int:
 	/*
 	 *  We just miss the cause of the interrupt. :(
 	 *  Print a message. The timeout will do the real work.
 	 */
 	printf(	"%s: unknown interrupt(s) ignored, "
 		"ISTAT=0x%x DSTAT=0x%x SIST=0x%x\n",
 		sym_name(np), istat, dstat, sist);
 }
 
 static void sym_intr(void *arg)
 {
 	hcb_p np = arg;
 
 	SYM_LOCK();
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("[");
 	sym_intr1((hcb_p) arg);
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("]");
 
 	SYM_UNLOCK();
 }
 
 static void sym_poll(struct cam_sim *sim)
 {
 	sym_intr1(cam_sim_softc(sim));
 }
 
 /*
  *  generic recovery from scsi interrupt
  *
  *  The doc says that when the chip gets an SCSI interrupt,
  *  it tries to stop in an orderly fashion, by completing
  *  an instruction fetch that had started or by flushing
  *  the DMA fifo for a write to memory that was executing.
  *  Such a fashion is not enough to know if the instruction
  *  that was just before the current DSP value has been
  *  executed or not.
  *
  *  There are some small SCRIPTS sections that deal with
  *  the start queue and the done queue that may break any
  *  assomption from the C code if we are interrupted
  *  inside, so we reset if this happens. Btw, since these
  *  SCRIPTS sections are executed while the SCRIPTS hasn't
  *  started SCSI operations, it is very unlikely to happen.
  *
  *  All the driver data structures are supposed to be
  *  allocated from the same 4 GB memory window, so there
  *  is a 1 to 1 relationship between DSA and driver data
  *  structures. Since we are careful :) to invalidate the
  *  DSA when we complete a command or when the SCRIPTS
  *  pushes a DSA into a queue, we can trust it when it
  *  points to a CCB.
  */
 static void sym_recover_scsi_int (hcb_p np, u_char hsts)
 {
 	u32	dsp	= INL (nc_dsp);
 	u32	dsa	= INL (nc_dsa);
 	ccb_p cp	= sym_ccb_from_dsa(np, dsa);
 
 	/*
 	 *  If we haven't been interrupted inside the SCRIPTS
 	 *  critical paths, we can safely restart the SCRIPTS
 	 *  and trust the DSA value if it matches a CCB.
 	 */
 	if ((!(dsp > SCRIPTA_BA (np, getjob_begin) &&
 	       dsp < SCRIPTA_BA (np, getjob_end) + 1)) &&
 	    (!(dsp > SCRIPTA_BA (np, ungetjob) &&
 	       dsp < SCRIPTA_BA (np, reselect) + 1)) &&
 	    (!(dsp > SCRIPTB_BA (np, sel_for_abort) &&
 	       dsp < SCRIPTB_BA (np, sel_for_abort_1) + 1)) &&
 	    (!(dsp > SCRIPTA_BA (np, done) &&
 	       dsp < SCRIPTA_BA (np, done_end) + 1))) {
 		OUTB (nc_ctest3, np->rv_ctest3 | CLF);	/* clear dma fifo  */
 		OUTB (nc_stest3, TE|CSF);		/* clear scsi fifo */
 		/*
 		 *  If we have a CCB, let the SCRIPTS call us back for
 		 *  the handling of the error with SCRATCHA filled with
 		 *  STARTPOS. This way, we will be able to freeze the
 		 *  device queue and requeue awaiting IOs.
 		 */
 		if (cp) {
 			cp->host_status = hsts;
 			OUTL_DSP (SCRIPTA_BA (np, complete_error));
 		}
 		/*
 		 *  Otherwise just restart the SCRIPTS.
 		 */
 		else {
 			OUTL (nc_dsa, 0xffffff);
 			OUTL_DSP (SCRIPTA_BA (np, start));
 		}
 	}
 	else
 		goto reset_all;
 
 	return;
 
 reset_all:
 	sym_start_reset(np);
 }
 
 /*
  *  chip exception handler for selection timeout
  */
 static void sym_int_sto (hcb_p np)
 {
 	u32 dsp	= INL (nc_dsp);
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("T");
 
 	if (dsp == SCRIPTA_BA (np, wf_sel_done) + 8)
 		sym_recover_scsi_int(np, HS_SEL_TIMEOUT);
 	else
 		sym_start_reset(np);
 }
 
 /*
  *  chip exception handler for unexpected disconnect
  */
 static void sym_int_udc (hcb_p np)
 {
 	printf ("%s: unexpected disconnect\n", sym_name(np));
 	sym_recover_scsi_int(np, HS_UNEXPECTED);
 }
 
 /*
  *  chip exception handler for SCSI bus mode change
  *
  *  spi2-r12 11.2.3 says a transceiver mode change must
  *  generate a reset event and a device that detects a reset
  *  event shall initiate a hard reset. It says also that a
  *  device that detects a mode change shall set data transfer
  *  mode to eight bit asynchronous, etc...
  *  So, just reinitializing all except chip should be enough.
  */
 static void sym_int_sbmc (hcb_p np)
 {
 	u_char scsi_mode = INB (nc_stest4) & SMODE;
 
 	/*
 	 *  Notify user.
 	 */
 	xpt_print_path(np->path);
 	printf("SCSI BUS mode change from %s to %s.\n",
 		sym_scsi_bus_mode(np->scsi_mode), sym_scsi_bus_mode(scsi_mode));
 
 	/*
 	 *  Should suspend command processing for a few seconds and
 	 *  reinitialize all except the chip.
 	 */
 	sym_init (np, 2);
 }
 
 /*
  *  chip exception handler for SCSI parity error.
  *
  *  When the chip detects a SCSI parity error and is
  *  currently executing a (CH)MOV instruction, it does
  *  not interrupt immediately, but tries to finish the
  *  transfer of the current scatter entry before
  *  interrupting. The following situations may occur:
  *
  *  - The complete scatter entry has been transferred
  *    without the device having changed phase.
  *    The chip will then interrupt with the DSP pointing
  *    to the instruction that follows the MOV.
  *
  *  - A phase mismatch occurs before the MOV finished
  *    and phase errors are to be handled by the C code.
  *    The chip will then interrupt with both PAR and MA
  *    conditions set.
  *
  *  - A phase mismatch occurs before the MOV finished and
  *    phase errors are to be handled by SCRIPTS.
  *    The chip will load the DSP with the phase mismatch
  *    JUMP address and interrupt the host processor.
  */
 static void sym_int_par (hcb_p np, u_short sist)
 {
 	u_char	hsts	= INB (HS_PRT);
 	u32	dsp	= INL (nc_dsp);
 	u32	dbc	= INL (nc_dbc);
 	u32	dsa	= INL (nc_dsa);
 	u_char	sbcl	= INB (nc_sbcl);
 	u_char	cmd	= dbc >> 24;
 	int phase	= cmd & 7;
 	ccb_p	cp	= sym_ccb_from_dsa(np, dsa);
 
 	printf("%s: SCSI parity error detected: SCR1=%d DBC=%x SBCL=%x\n",
 		sym_name(np), hsts, dbc, sbcl);
 
 	/*
 	 *  Check that the chip is connected to the SCSI BUS.
 	 */
 	if (!(INB (nc_scntl1) & ISCON)) {
 		sym_recover_scsi_int(np, HS_UNEXPECTED);
 		return;
 	}
 
 	/*
 	 *  If the nexus is not clearly identified, reset the bus.
 	 *  We will try to do better later.
 	 */
 	if (!cp)
 		goto reset_all;
 
 	/*
 	 *  Check instruction was a MOV, direction was INPUT and
 	 *  ATN is asserted.
 	 */
 	if ((cmd & 0xc0) || !(phase & 1) || !(sbcl & 0x8))
 		goto reset_all;
 
 	/*
 	 *  Keep track of the parity error.
 	 */
 	OUTONB (HF_PRT, HF_EXT_ERR);
 	cp->xerr_status |= XE_PARITY_ERR;
 
 	/*
 	 *  Prepare the message to send to the device.
 	 */
 	np->msgout[0] = (phase == 7) ? M_PARITY : M_ID_ERROR;
 
 	/*
 	 *  If the old phase was DATA IN phase, we have to deal with
 	 *  the 3 situations described above.
 	 *  For other input phases (MSG IN and STATUS), the device
 	 *  must resend the whole thing that failed parity checking
 	 *  or signal error. So, jumping to dispatcher should be OK.
 	 */
 	if (phase == 1 || phase == 5) {
 		/* Phase mismatch handled by SCRIPTS */
 		if (dsp == SCRIPTB_BA (np, pm_handle))
 			OUTL_DSP (dsp);
 		/* Phase mismatch handled by the C code */
 		else if (sist & MA)
 			sym_int_ma (np);
 		/* No phase mismatch occurred */
 		else {
 			OUTL (nc_temp, dsp);
 			OUTL_DSP (SCRIPTA_BA (np, dispatch));
 		}
 	}
 	else
 		OUTL_DSP (SCRIPTA_BA (np, clrack));
 	return;
 
 reset_all:
 	sym_start_reset(np);
 }
 
 /*
  *  chip exception handler for phase errors.
  *
  *  We have to construct a new transfer descriptor,
  *  to transfer the rest of the current block.
  */
 static void sym_int_ma (hcb_p np)
 {
 	u32	dbc;
 	u32	rest;
 	u32	dsp;
 	u32	dsa;
 	u32	nxtdsp;
 	u32	*vdsp;
 	u32	oadr, olen;
 	u32	*tblp;
         u32	newcmd;
 	u_int	delta;
 	u_char	cmd;
 	u_char	hflags, hflags0;
 	struct	sym_pmc *pm;
 	ccb_p	cp;
 
 	dsp	= INL (nc_dsp);
 	dbc	= INL (nc_dbc);
 	dsa	= INL (nc_dsa);
 
 	cmd	= dbc >> 24;
 	rest	= dbc & 0xffffff;
 	delta	= 0;
 
 	/*
 	 *  locate matching cp if any.
 	 */
 	cp = sym_ccb_from_dsa(np, dsa);
 
 	/*
 	 *  Donnot take into account dma fifo and various buffers in
 	 *  INPUT phase since the chip flushes everything before
 	 *  raising the MA interrupt for interrupted INPUT phases.
 	 *  For DATA IN phase, we will check for the SWIDE later.
 	 */
 	if ((cmd & 7) != 1 && (cmd & 7) != 5) {
 		u_char ss0, ss2;
 
 		if (np->features & FE_DFBC)
 			delta = INW (nc_dfbc);
 		else {
 			u32 dfifo;
 
 			/*
 			 * Read DFIFO, CTEST[4-6] using 1 PCI bus ownership.
 			 */
 			dfifo = INL(nc_dfifo);
 
 			/*
 			 *  Calculate remaining bytes in DMA fifo.
 			 *  (CTEST5 = dfifo >> 16)
 			 */
 			if (dfifo & (DFS << 16))
 				delta = ((((dfifo >> 8) & 0x300) |
 				          (dfifo & 0xff)) - rest) & 0x3ff;
 			else
 				delta = ((dfifo & 0xff) - rest) & 0x7f;
 		}
 
 		/*
 		 *  The data in the dma fifo has not been transferred to
 		 *  the target -> add the amount to the rest
 		 *  and clear the data.
 		 *  Check the sstat2 register in case of wide transfer.
 		 */
 		rest += delta;
 		ss0  = INB (nc_sstat0);
 		if (ss0 & OLF) rest++;
 		if (!(np->features & FE_C10))
 			if (ss0 & ORF) rest++;
 		if (cp && (cp->phys.select.sel_scntl3 & EWS)) {
 			ss2 = INB (nc_sstat2);
 			if (ss2 & OLF1) rest++;
 			if (!(np->features & FE_C10))
 				if (ss2 & ORF1) rest++;
 		}
 
 		/*
 		 *  Clear fifos.
 		 */
 		OUTB (nc_ctest3, np->rv_ctest3 | CLF);	/* dma fifo  */
 		OUTB (nc_stest3, TE|CSF);		/* scsi fifo */
 	}
 
 	/*
 	 *  log the information
 	 */
 	if (DEBUG_FLAGS & (DEBUG_TINY|DEBUG_PHASE))
 		printf ("P%x%x RL=%d D=%d ", cmd&7, INB(nc_sbcl)&7,
 			(unsigned) rest, (unsigned) delta);
 
 	/*
 	 *  try to find the interrupted script command,
 	 *  and the address at which to continue.
 	 */
 	vdsp	= NULL;
 	nxtdsp	= 0;
 	if	(dsp >  np->scripta_ba &&
 		 dsp <= np->scripta_ba + np->scripta_sz) {
 		vdsp = (u32 *)((char*)np->scripta0 + (dsp-np->scripta_ba-8));
 		nxtdsp = dsp;
 	}
 	else if	(dsp >  np->scriptb_ba &&
 		 dsp <= np->scriptb_ba + np->scriptb_sz) {
 		vdsp = (u32 *)((char*)np->scriptb0 + (dsp-np->scriptb_ba-8));
 		nxtdsp = dsp;
 	}
 
 	/*
 	 *  log the information
 	 */
 	if (DEBUG_FLAGS & DEBUG_PHASE) {
 		printf ("\nCP=%p DSP=%x NXT=%x VDSP=%p CMD=%x ",
 			cp, (unsigned)dsp, (unsigned)nxtdsp, vdsp, cmd);
 	}
 
 	if (!vdsp) {
 		printf ("%s: interrupted SCRIPT address not found.\n",
 			sym_name (np));
 		goto reset_all;
 	}
 
 	if (!cp) {
 		printf ("%s: SCSI phase error fixup: CCB already dequeued.\n",
 			sym_name (np));
 		goto reset_all;
 	}
 
 	/*
 	 *  get old startaddress and old length.
 	 */
 	oadr = scr_to_cpu(vdsp[1]);
 
 	if (cmd & 0x10) {	/* Table indirect */
 		tblp = (u32 *) ((char*) &cp->phys + oadr);
 		olen = scr_to_cpu(tblp[0]);
 		oadr = scr_to_cpu(tblp[1]);
 	} else {
 		tblp = (u32 *) 0;
 		olen = scr_to_cpu(vdsp[0]) & 0xffffff;
 	}
 
 	if (DEBUG_FLAGS & DEBUG_PHASE) {
 		printf ("OCMD=%x\nTBLP=%p OLEN=%x OADR=%x\n",
 			(unsigned) (scr_to_cpu(vdsp[0]) >> 24),
 			tblp,
 			(unsigned) olen,
 			(unsigned) oadr);
 	}
 
 	/*
 	 *  check cmd against assumed interrupted script command.
 	 *  If dt data phase, the MOVE instruction hasn't bit 4 of
 	 *  the phase.
 	 */
 	if (((cmd & 2) ? cmd : (cmd & ~4)) != (scr_to_cpu(vdsp[0]) >> 24)) {
 		PRINT_ADDR(cp);
 		printf ("internal error: cmd=%02x != %02x=(vdsp[0] >> 24)\n",
 			(unsigned)cmd, (unsigned)scr_to_cpu(vdsp[0]) >> 24);
 
 		goto reset_all;
 	}
 
 	/*
 	 *  if old phase not dataphase, leave here.
 	 */
 	if (cmd & 2) {
 		PRINT_ADDR(cp);
 		printf ("phase change %x-%x %d@%08x resid=%d.\n",
 			cmd&7, INB(nc_sbcl)&7, (unsigned)olen,
 			(unsigned)oadr, (unsigned)rest);
 		goto unexpected_phase;
 	}
 
 	/*
 	 *  Choose the correct PM save area.
 	 *
 	 *  Look at the PM_SAVE SCRIPT if you want to understand
 	 *  this stuff. The equivalent code is implemented in
 	 *  SCRIPTS for the 895A, 896 and 1010 that are able to
 	 *  handle PM from the SCRIPTS processor.
 	 */
 	hflags0 = INB (HF_PRT);
 	hflags = hflags0;
 
 	if (hflags & (HF_IN_PM0 | HF_IN_PM1 | HF_DP_SAVED)) {
 		if (hflags & HF_IN_PM0)
 			nxtdsp = scr_to_cpu(cp->phys.pm0.ret);
 		else if	(hflags & HF_IN_PM1)
 			nxtdsp = scr_to_cpu(cp->phys.pm1.ret);
 
 		if (hflags & HF_DP_SAVED)
 			hflags ^= HF_ACT_PM;
 	}
 
 	if (!(hflags & HF_ACT_PM)) {
 		pm = &cp->phys.pm0;
 		newcmd = SCRIPTA_BA (np, pm0_data);
 	}
 	else {
 		pm = &cp->phys.pm1;
 		newcmd = SCRIPTA_BA (np, pm1_data);
 	}
 
 	hflags &= ~(HF_IN_PM0 | HF_IN_PM1 | HF_DP_SAVED);
 	if (hflags != hflags0)
 		OUTB (HF_PRT, hflags);
 
 	/*
 	 *  fillin the phase mismatch context
 	 */
 	pm->sg.addr = cpu_to_scr(oadr + olen - rest);
 	pm->sg.size = cpu_to_scr(rest);
 	pm->ret     = cpu_to_scr(nxtdsp);
 
 	/*
 	 *  If we have a SWIDE,
 	 *  - prepare the address to write the SWIDE from SCRIPTS,
 	 *  - compute the SCRIPTS address to restart from,
 	 *  - move current data pointer context by one byte.
 	 */
 	nxtdsp = SCRIPTA_BA (np, dispatch);
 	if ((cmd & 7) == 1 && cp && (cp->phys.select.sel_scntl3 & EWS) &&
 	    (INB (nc_scntl2) & WSR)) {
 		u32 tmp;
 
 		/*
 		 *  Set up the table indirect for the MOVE
 		 *  of the residual byte and adjust the data
 		 *  pointer context.
 		 */
 		tmp = scr_to_cpu(pm->sg.addr);
 		cp->phys.wresid.addr = cpu_to_scr(tmp);
 		pm->sg.addr = cpu_to_scr(tmp + 1);
 		tmp = scr_to_cpu(pm->sg.size);
 		cp->phys.wresid.size = cpu_to_scr((tmp&0xff000000) | 1);
 		pm->sg.size = cpu_to_scr(tmp - 1);
 
 		/*
 		 *  If only the residual byte is to be moved,
 		 *  no PM context is needed.
 		 */
 		if ((tmp&0xffffff) == 1)
 			newcmd = pm->ret;
 
 		/*
 		 *  Prepare the address of SCRIPTS that will
 		 *  move the residual byte to memory.
 		 */
 		nxtdsp = SCRIPTB_BA (np, wsr_ma_helper);
 	}
 
 	if (DEBUG_FLAGS & DEBUG_PHASE) {
 		PRINT_ADDR(cp);
 		printf ("PM %x %x %x / %x %x %x.\n",
 			hflags0, hflags, newcmd,
 			(unsigned)scr_to_cpu(pm->sg.addr),
 			(unsigned)scr_to_cpu(pm->sg.size),
 			(unsigned)scr_to_cpu(pm->ret));
 	}
 
 	/*
 	 *  Restart the SCRIPTS processor.
 	 */
 	OUTL (nc_temp, newcmd);
 	OUTL_DSP (nxtdsp);
 	return;
 
 	/*
 	 *  Unexpected phase changes that occurs when the current phase
 	 *  is not a DATA IN or DATA OUT phase are due to error conditions.
 	 *  Such event may only happen when the SCRIPTS is using a
 	 *  multibyte SCSI MOVE.
 	 *
 	 *  Phase change		Some possible cause
 	 *
 	 *  COMMAND  --> MSG IN	SCSI parity error detected by target.
 	 *  COMMAND  --> STATUS	Bad command or refused by target.
 	 *  MSG OUT  --> MSG IN     Message rejected by target.
 	 *  MSG OUT  --> COMMAND    Bogus target that discards extended
 	 *  			negotiation messages.
 	 *
 	 *  The code below does not care of the new phase and so
 	 *  trusts the target. Why to annoy it ?
 	 *  If the interrupted phase is COMMAND phase, we restart at
 	 *  dispatcher.
 	 *  If a target does not get all the messages after selection,
 	 *  the code assumes blindly that the target discards extended
 	 *  messages and clears the negotiation status.
 	 *  If the target does not want all our response to negotiation,
 	 *  we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids
 	 *  bloat for such a should_not_happen situation).
 	 *  In all other situation, we reset the BUS.
 	 *  Are these assumptions reasonnable ? (Wait and see ...)
 	 */
 unexpected_phase:
 	dsp -= 8;
 	nxtdsp = 0;
 
 	switch (cmd & 7) {
 	case 2:	/* COMMAND phase */
 		nxtdsp = SCRIPTA_BA (np, dispatch);
 		break;
 #if 0
 	case 3:	/* STATUS  phase */
 		nxtdsp = SCRIPTA_BA (np, dispatch);
 		break;
 #endif
 	case 6:	/* MSG OUT phase */
 		/*
 		 *  If the device may want to use untagged when we want
 		 *  tagged, we prepare an IDENTIFY without disc. granted,
 		 *  since we will not be able to handle reselect.
 		 *  Otherwise, we just don't care.
 		 */
 		if	(dsp == SCRIPTA_BA (np, send_ident)) {
 			if (cp->tag != NO_TAG && olen - rest <= 3) {
 				cp->host_status = HS_BUSY;
 				np->msgout[0] = M_IDENTIFY | cp->lun;
 				nxtdsp = SCRIPTB_BA (np, ident_break_atn);
 			}
 			else
 				nxtdsp = SCRIPTB_BA (np, ident_break);
 		}
 		else if	(dsp == SCRIPTB_BA (np, send_wdtr) ||
 			 dsp == SCRIPTB_BA (np, send_sdtr) ||
 			 dsp == SCRIPTB_BA (np, send_ppr)) {
 			nxtdsp = SCRIPTB_BA (np, nego_bad_phase);
 		}
 		break;
 #if 0
 	case 7:	/* MSG IN  phase */
 		nxtdsp = SCRIPTA_BA (np, clrack);
 		break;
 #endif
 	}
 
 	if (nxtdsp) {
 		OUTL_DSP (nxtdsp);
 		return;
 	}
 
 reset_all:
 	sym_start_reset(np);
 }
 
 /*
  *  Dequeue from the START queue all CCBs that match
  *  a given target/lun/task condition (-1 means all),
  *  and move them from the BUSY queue to the COMP queue
  *  with CAM_REQUEUE_REQ status condition.
  *  This function is used during error handling/recovery.
  *  It is called with SCRIPTS not running.
  */
 static int
 sym_dequeue_from_squeue(hcb_p np, int i, int target, int lun, int task)
 {
 	int j;
 	ccb_p cp;
 
 	/*
 	 *  Make sure the starting index is within range.
 	 */
 	assert((i >= 0) && (i < 2*MAX_QUEUE));
 
 	/*
 	 *  Walk until end of START queue and dequeue every job
 	 *  that matches the target/lun/task condition.
 	 */
 	j = i;
 	while (i != np->squeueput) {
 		cp = sym_ccb_from_dsa(np, scr_to_cpu(np->squeue[i]));
 		assert(cp);
 #ifdef SYM_CONF_IARB_SUPPORT
 		/* Forget hints for IARB, they may be no longer relevant */
 		cp->host_flags &= ~HF_HINT_IARB;
 #endif
 		if ((target == -1 || cp->target == target) &&
 		    (lun    == -1 || cp->lun    == lun)    &&
 		    (task   == -1 || cp->tag    == task)) {
 			sym_set_cam_status(cp->cam_ccb, CAM_REQUEUE_REQ);
 			sym_remque(&cp->link_ccbq);
 			sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq);
 		}
 		else {
 			if (i != j)
 				np->squeue[j] = np->squeue[i];
 			if ((j += 2) >= MAX_QUEUE*2) j = 0;
 		}
 		if ((i += 2) >= MAX_QUEUE*2) i = 0;
 	}
 	if (i != j)		/* Copy back the idle task if needed */
 		np->squeue[j] = np->squeue[i];
 	np->squeueput = j;	/* Update our current start queue pointer */
 
 	return (i - j) / 2;
 }
 
 /*
  *  Complete all CCBs queued to the COMP queue.
  *
  *  These CCBs are assumed:
  *  - Not to be referenced either by devices or
  *    SCRIPTS-related queues and datas.
  *  - To have to be completed with an error condition
  *    or requeued.
  *
  *  The device queue freeze count is incremented
  *  for each CCB that does not prevent this.
  *  This function is called when all CCBs involved
  *  in error handling/recovery have been reaped.
  */
 static void
 sym_flush_comp_queue(hcb_p np, int cam_status)
 {
 	SYM_QUEHEAD *qp;
 	ccb_p cp;
 
 	while ((qp = sym_remque_head(&np->comp_ccbq)) != NULL) {
 		union ccb *ccb;
 		cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 		sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq);
 		/* Leave quiet CCBs waiting for resources */
 		if (cp->host_status == HS_WAIT)
 			continue;
 		ccb = cp->cam_ccb;
 		if (cam_status)
 			sym_set_cam_status(ccb, cam_status);
 		sym_freeze_cam_ccb(ccb);
 		sym_xpt_done(np, ccb, cp);
 		sym_free_ccb(np, cp);
 	}
 }
 
 /*
  *  chip handler for bad SCSI status condition
  *
  *  In case of bad SCSI status, we unqueue all the tasks
  *  currently queued to the controller but not yet started
  *  and then restart the SCRIPTS processor immediately.
  *
  *  QUEUE FULL and BUSY conditions are handled the same way.
  *  Basically all the not yet started tasks are requeued in
  *  device queue and the queue is frozen until a completion.
  *
  *  For CHECK CONDITION and COMMAND TERMINATED status, we use
  *  the CCB of the failed command to prepare a REQUEST SENSE
  *  SCSI command and queue it to the controller queue.
  *
  *  SCRATCHA is assumed to have been loaded with STARTPOS
  *  before the SCRIPTS called the C code.
  */
 static void sym_sir_bad_scsi_status(hcb_p np, ccb_p cp)
 {
 	tcb_p tp	= &np->target[cp->target];
 	u32		startp;
 	u_char		s_status = cp->ssss_status;
 	u_char		h_flags  = cp->host_flags;
 	int		msglen;
 	int		nego;
 	int		i;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Compute the index of the next job to start from SCRIPTS.
 	 */
 	i = (INL (nc_scratcha) - np->squeue_ba) / 4;
 
 	/*
 	 *  The last CCB queued used for IARB hint may be
 	 *  no longer relevant. Forget it.
 	 */
 #ifdef SYM_CONF_IARB_SUPPORT
 	if (np->last_cp)
 		np->last_cp = NULL;
 #endif
 
 	/*
 	 *  Now deal with the SCSI status.
 	 */
 	switch(s_status) {
 	case S_BUSY:
 	case S_QUEUE_FULL:
 		if (sym_verbose >= 2) {
 			PRINT_ADDR(cp);
 			printf (s_status == S_BUSY ? "BUSY" : "QUEUE FULL\n");
 		}
 	default:	/* S_INT, S_INT_COND_MET, S_CONFLICT */
 		sym_complete_error (np, cp);
 		break;
 	case S_TERMINATED:
 	case S_CHECK_COND:
 		/*
 		 *  If we get an SCSI error when requesting sense, give up.
 		 */
 		if (h_flags & HF_SENSE) {
 			sym_complete_error (np, cp);
 			break;
 		}
 
 		/*
 		 *  Dequeue all queued CCBs for that device not yet started,
 		 *  and restart the SCRIPTS processor immediately.
 		 */
 		(void) sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1);
 		OUTL_DSP (SCRIPTA_BA (np, start));
 
  		/*
 		 *  Save some info of the actual IO.
 		 *  Compute the data residual.
 		 */
 		cp->sv_scsi_status = cp->ssss_status;
 		cp->sv_xerr_status = cp->xerr_status;
 		cp->sv_resid = sym_compute_residual(np, cp);
 
 		/*
 		 *  Prepare all needed data structures for
 		 *  requesting sense data.
 		 */
 
 		/*
 		 *  identify message
 		 */
 		cp->scsi_smsg2[0] = M_IDENTIFY | cp->lun;
 		msglen = 1;
 
 		/*
 		 *  If we are currently using anything different from
 		 *  async. 8 bit data transfers with that target,
 		 *  start a negotiation, since the device may want
 		 *  to report us a UNIT ATTENTION condition due to
 		 *  a cause we currently ignore, and we donnot want
 		 *  to be stuck with WIDE and/or SYNC data transfer.
 		 *
 		 *  cp->nego_status is filled by sym_prepare_nego().
 		 */
 		cp->nego_status = 0;
 		nego = 0;
 		if	(tp->tinfo.current.options & PPR_OPT_MASK)
 			nego = NS_PPR;
 		else if	(tp->tinfo.current.width != BUS_8_BIT)
 			nego = NS_WIDE;
 		else if (tp->tinfo.current.offset != 0)
 			nego = NS_SYNC;
 		if (nego)
 			msglen +=
 			sym_prepare_nego (np,cp, nego, &cp->scsi_smsg2[msglen]);
 		/*
 		 *  Message table indirect structure.
 		 */
 		cp->phys.smsg.addr	= cpu_to_scr(CCB_BA (cp, scsi_smsg2));
 		cp->phys.smsg.size	= cpu_to_scr(msglen);
 
 		/*
 		 *  sense command
 		 */
 		cp->phys.cmd.addr	= cpu_to_scr(CCB_BA (cp, sensecmd));
 		cp->phys.cmd.size	= cpu_to_scr(6);
 
 		/*
 		 *  patch requested size into sense command
 		 */
 		cp->sensecmd[0]		= 0x03;
 		cp->sensecmd[1]		= cp->lun << 5;
 		if (tp->tinfo.current.scsi_version > 2 || cp->lun > 7)
 			cp->sensecmd[1]	= 0;
 		cp->sensecmd[4]		= SYM_SNS_BBUF_LEN;
 		cp->data_len		= SYM_SNS_BBUF_LEN;
 
 		/*
 		 *  sense data
 		 */
 		bzero(cp->sns_bbuf, SYM_SNS_BBUF_LEN);
 		cp->phys.sense.addr	= cpu_to_scr(vtobus(cp->sns_bbuf));
 		cp->phys.sense.size	= cpu_to_scr(SYM_SNS_BBUF_LEN);
 
 		/*
 		 *  requeue the command.
 		 */
 		startp = SCRIPTB_BA (np, sdata_in);
 
 		cp->phys.head.savep	= cpu_to_scr(startp);
 		cp->phys.head.goalp	= cpu_to_scr(startp + 16);
 		cp->phys.head.lastp	= cpu_to_scr(startp);
 		cp->startp	= cpu_to_scr(startp);
 
 		cp->actualquirks = SYM_QUIRK_AUTOSAVE;
 		cp->host_status	= cp->nego_status ? HS_NEGOTIATE : HS_BUSY;
 		cp->ssss_status = S_ILLEGAL;
 		cp->host_flags	= (HF_SENSE|HF_DATA_IN);
 		cp->xerr_status = 0;
 		cp->extra_bytes = 0;
 
 		cp->phys.head.go.start = cpu_to_scr(SCRIPTA_BA (np, select));
 
 		/*
 		 *  Requeue the command.
 		 */
 		sym_put_start_queue(np, cp);
 
 		/*
 		 *  Give back to upper layer everything we have dequeued.
 		 */
 		sym_flush_comp_queue(np, 0);
 		break;
 	}
 }
 
 /*
  *  After a device has accepted some management message
  *  as BUS DEVICE RESET, ABORT TASK, etc ..., or when
  *  a device signals a UNIT ATTENTION condition, some
  *  tasks are thrown away by the device. We are required
  *  to reflect that on our tasks list since the device
  *  will never complete these tasks.
  *
  *  This function move from the BUSY queue to the COMP
  *  queue all disconnected CCBs for a given target that
  *  match the following criteria:
  *  - lun=-1  means any logical UNIT otherwise a given one.
  *  - task=-1 means any task, otherwise a given one.
  */
 static int
 sym_clear_tasks(hcb_p np, int cam_status, int target, int lun, int task)
 {
 	SYM_QUEHEAD qtmp, *qp;
 	int i = 0;
 	ccb_p cp;
 
 	/*
 	 *  Move the entire BUSY queue to our temporary queue.
 	 */
 	sym_que_init(&qtmp);
 	sym_que_splice(&np->busy_ccbq, &qtmp);
 	sym_que_init(&np->busy_ccbq);
 
 	/*
 	 *  Put all CCBs that matches our criteria into
 	 *  the COMP queue and put back other ones into
 	 *  the BUSY queue.
 	 */
 	while ((qp = sym_remque_head(&qtmp)) != NULL) {
 		union ccb *ccb;
 		cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 		ccb = cp->cam_ccb;
 		if (cp->host_status != HS_DISCONNECT ||
 		    cp->target != target	     ||
 		    (lun  != -1 && cp->lun != lun)   ||
 		    (task != -1 &&
 			(cp->tag != NO_TAG && cp->scsi_smsg[2] != task))) {
 			sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq);
 			continue;
 		}
 		sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq);
 
 		/* Preserve the software timeout condition */
 		if (sym_get_cam_status(ccb) != CAM_CMD_TIMEOUT)
 			sym_set_cam_status(ccb, cam_status);
 		++i;
 #if 0
 printf("XXXX TASK @%p CLEARED\n", cp);
 #endif
 	}
 	return i;
 }
 
 /*
  *  chip handler for TASKS recovery
  *
  *  We cannot safely abort a command, while the SCRIPTS
  *  processor is running, since we just would be in race
  *  with it.
  *
  *  As long as we have tasks to abort, we keep the SEM
  *  bit set in the ISTAT. When this bit is set, the
  *  SCRIPTS processor interrupts (SIR_SCRIPT_STOPPED)
  *  each time it enters the scheduler.
  *
  *  If we have to reset a target, clear tasks of a unit,
  *  or to perform the abort of a disconnected job, we
  *  restart the SCRIPTS for selecting the target. Once
  *  selected, the SCRIPTS interrupts (SIR_TARGET_SELECTED).
  *  If it loses arbitration, the SCRIPTS will interrupt again
  *  the next time it will enter its scheduler, and so on ...
  *
  *  On SIR_TARGET_SELECTED, we scan for the more
  *  appropriate thing to do:
  *
  *  - If nothing, we just sent a M_ABORT message to the
  *    target to get rid of the useless SCSI bus ownership.
  *    According to the specs, no tasks shall be affected.
  *  - If the target is to be reset, we send it a M_RESET
  *    message.
  *  - If a logical UNIT is to be cleared , we send the
  *    IDENTIFY(lun) + M_ABORT.
  *  - If an untagged task is to be aborted, we send the
  *    IDENTIFY(lun) + M_ABORT.
  *  - If a tagged task is to be aborted, we send the
  *    IDENTIFY(lun) + task attributes + M_ABORT_TAG.
  *
  *  Once our 'kiss of death' :) message has been accepted
  *  by the target, the SCRIPTS interrupts again
  *  (SIR_ABORT_SENT). On this interrupt, we complete
  *  all the CCBs that should have been aborted by the
  *  target according to our message.
  */
 static void sym_sir_task_recovery(hcb_p np, int num)
 {
 	SYM_QUEHEAD *qp;
 	ccb_p cp;
 	tcb_p tp;
 	int target=-1, lun=-1, task;
 	int i, k;
 
 	switch(num) {
 	/*
 	 *  The SCRIPTS processor stopped before starting
 	 *  the next command in order to allow us to perform
 	 *  some task recovery.
 	 */
 	case SIR_SCRIPT_STOPPED:
 		/*
 		 *  Do we have any target to reset or unit to clear ?
 		 */
 		for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) {
 			tp = &np->target[i];
 			if (tp->to_reset ||
 			    (tp->lun0p && tp->lun0p->to_clear)) {
 				target = i;
 				break;
 			}
 			if (!tp->lunmp)
 				continue;
 			for (k = 1 ; k < SYM_CONF_MAX_LUN ; k++) {
 				if (tp->lunmp[k] && tp->lunmp[k]->to_clear) {
 					target	= i;
 					break;
 				}
 			}
 			if (target != -1)
 				break;
 		}
 
 		/*
 		 *  If not, walk the busy queue for any
 		 *  disconnected CCB to be aborted.
 		 */
 		if (target == -1) {
 			FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
 				cp = sym_que_entry(qp,struct sym_ccb,link_ccbq);
 				if (cp->host_status != HS_DISCONNECT)
 					continue;
 				if (cp->to_abort) {
 					target = cp->target;
 					break;
 				}
 			}
 		}
 
 		/*
 		 *  If some target is to be selected,
 		 *  prepare and start the selection.
 		 */
 		if (target != -1) {
 			tp = &np->target[target];
 			np->abrt_sel.sel_id	= target;
 			np->abrt_sel.sel_scntl3 = tp->head.wval;
 			np->abrt_sel.sel_sxfer  = tp->head.sval;
 			OUTL(nc_dsa, np->hcb_ba);
 			OUTL_DSP (SCRIPTB_BA (np, sel_for_abort));
 			return;
 		}
 
 		/*
 		 *  Now look for a CCB to abort that haven't started yet.
 		 *  Btw, the SCRIPTS processor is still stopped, so
 		 *  we are not in race.
 		 */
 		i = 0;
 		cp = NULL;
 		FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
 			cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 			if (cp->host_status != HS_BUSY &&
 			    cp->host_status != HS_NEGOTIATE)
 				continue;
 			if (!cp->to_abort)
 				continue;
 #ifdef SYM_CONF_IARB_SUPPORT
 			/*
 			 *    If we are using IMMEDIATE ARBITRATION, we donnot
 			 *    want to cancel the last queued CCB, since the
 			 *    SCRIPTS may have anticipated the selection.
 			 */
 			if (cp == np->last_cp) {
 				cp->to_abort = 0;
 				continue;
 			}
 #endif
 			i = 1;	/* Means we have found some */
 			break;
 		}
 		if (!i) {
 			/*
 			 *  We are done, so we donnot need
 			 *  to synchronize with the SCRIPTS anylonger.
 			 *  Remove the SEM flag from the ISTAT.
 			 */
 			np->istat_sem = 0;
 			OUTB (nc_istat, SIGP);
 			break;
 		}
 		/*
 		 *  Compute index of next position in the start
 		 *  queue the SCRIPTS intends to start and dequeue
 		 *  all CCBs for that device that haven't been started.
 		 */
 		i = (INL (nc_scratcha) - np->squeue_ba) / 4;
 		i = sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1);
 
 		/*
 		 *  Make sure at least our IO to abort has been dequeued.
 		 */
 		assert(i && sym_get_cam_status(cp->cam_ccb) == CAM_REQUEUE_REQ);
 
 		/*
 		 *  Keep track in cam status of the reason of the abort.
 		 */
 		if (cp->to_abort == 2)
 			sym_set_cam_status(cp->cam_ccb, CAM_CMD_TIMEOUT);
 		else
 			sym_set_cam_status(cp->cam_ccb, CAM_REQ_ABORTED);
 
 		/*
 		 *  Complete with error everything that we have dequeued.
 	 	 */
 		sym_flush_comp_queue(np, 0);
 		break;
 	/*
 	 *  The SCRIPTS processor has selected a target
 	 *  we may have some manual recovery to perform for.
 	 */
 	case SIR_TARGET_SELECTED:
 		target = (INB (nc_sdid) & 0xf);
 		tp = &np->target[target];
 
 		np->abrt_tbl.addr = cpu_to_scr(vtobus(np->abrt_msg));
 
 		/*
 		 *  If the target is to be reset, prepare a
 		 *  M_RESET message and clear the to_reset flag
 		 *  since we donnot expect this operation to fail.
 		 */
 		if (tp->to_reset) {
 			np->abrt_msg[0] = M_RESET;
 			np->abrt_tbl.size = 1;
 			tp->to_reset = 0;
 			break;
 		}
 
 		/*
 		 *  Otherwise, look for some logical unit to be cleared.
 		 */
 		if (tp->lun0p && tp->lun0p->to_clear)
 			lun = 0;
 		else if (tp->lunmp) {
 			for (k = 1 ; k < SYM_CONF_MAX_LUN ; k++) {
 				if (tp->lunmp[k] && tp->lunmp[k]->to_clear) {
 					lun = k;
 					break;
 				}
 			}
 		}
 
 		/*
 		 *  If a logical unit is to be cleared, prepare
 		 *  an IDENTIFY(lun) + ABORT MESSAGE.
 		 */
 		if (lun != -1) {
 			lcb_p lp = sym_lp(tp, lun);
 			lp->to_clear = 0; /* We donnot expect to fail here */
 			np->abrt_msg[0] = M_IDENTIFY | lun;
 			np->abrt_msg[1] = M_ABORT;
 			np->abrt_tbl.size = 2;
 			break;
 		}
 
 		/*
 		 *  Otherwise, look for some disconnected job to
 		 *  abort for this target.
 		 */
 		i = 0;
 		cp = NULL;
 		FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
 			cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 			if (cp->host_status != HS_DISCONNECT)
 				continue;
 			if (cp->target != target)
 				continue;
 			if (!cp->to_abort)
 				continue;
 			i = 1;	/* Means we have some */
 			break;
 		}
 
 		/*
 		 *  If we have none, probably since the device has
 		 *  completed the command before we won abitration,
 		 *  send a M_ABORT message without IDENTIFY.
 		 *  According to the specs, the device must just
 		 *  disconnect the BUS and not abort any task.
 		 */
 		if (!i) {
 			np->abrt_msg[0] = M_ABORT;
 			np->abrt_tbl.size = 1;
 			break;
 		}
 
 		/*
 		 *  We have some task to abort.
 		 *  Set the IDENTIFY(lun)
 		 */
 		np->abrt_msg[0] = M_IDENTIFY | cp->lun;
 
 		/*
 		 *  If we want to abort an untagged command, we
 		 *  will send an IDENTIFY + M_ABORT.
 		 *  Otherwise (tagged command), we will send
 		 *  an IDENTIFY + task attributes + ABORT TAG.
 		 */
 		if (cp->tag == NO_TAG) {
 			np->abrt_msg[1] = M_ABORT;
 			np->abrt_tbl.size = 2;
 		}
 		else {
 			np->abrt_msg[1] = cp->scsi_smsg[1];
 			np->abrt_msg[2] = cp->scsi_smsg[2];
 			np->abrt_msg[3] = M_ABORT_TAG;
 			np->abrt_tbl.size = 4;
 		}
 		/*
 		 *  Keep track of software timeout condition, since the
 		 *  peripheral driver may not count retries on abort
 		 *  conditions not due to timeout.
 		 */
 		if (cp->to_abort == 2)
 			sym_set_cam_status(cp->cam_ccb, CAM_CMD_TIMEOUT);
 		cp->to_abort = 0; /* We donnot expect to fail here */
 		break;
 
 	/*
 	 *  The target has accepted our message and switched
 	 *  to BUS FREE phase as we expected.
 	 */
 	case SIR_ABORT_SENT:
 		target = (INB (nc_sdid) & 0xf);
 		tp = &np->target[target];
 
 		/*
 		**  If we didn't abort anything, leave here.
 		*/
 		if (np->abrt_msg[0] == M_ABORT)
 			break;
 
 		/*
 		 *  If we sent a M_RESET, then a hardware reset has
 		 *  been performed by the target.
 		 *  - Reset everything to async 8 bit
 		 *  - Tell ourself to negotiate next time :-)
 		 *  - Prepare to clear all disconnected CCBs for
 		 *    this target from our task list (lun=task=-1)
 		 */
 		lun = -1;
 		task = -1;
 		if (np->abrt_msg[0] == M_RESET) {
 			tp->head.sval = 0;
 			tp->head.wval = np->rv_scntl3;
 			tp->head.uval = 0;
 			tp->tinfo.current.period = 0;
 			tp->tinfo.current.offset = 0;
 			tp->tinfo.current.width  = BUS_8_BIT;
 			tp->tinfo.current.options = 0;
 		}
 
 		/*
 		 *  Otherwise, check for the LUN and TASK(s)
 		 *  concerned by the cancellation.
 		 *  If it is not ABORT_TAG then it is CLEAR_QUEUE
 		 *  or an ABORT message :-)
 		 */
 		else {
 			lun = np->abrt_msg[0] & 0x3f;
 			if (np->abrt_msg[1] == M_ABORT_TAG)
 				task = np->abrt_msg[2];
 		}
 
 		/*
 		 *  Complete all the CCBs the device should have
 		 *  aborted due to our 'kiss of death' message.
 		 */
 		i = (INL (nc_scratcha) - np->squeue_ba) / 4;
 		(void) sym_dequeue_from_squeue(np, i, target, lun, -1);
 		(void) sym_clear_tasks(np, CAM_REQ_ABORTED, target, lun, task);
 		sym_flush_comp_queue(np, 0);
 
 		/*
 		 *  If we sent a BDR, make uper layer aware of that.
 		 */
 		if (np->abrt_msg[0] == M_RESET)
 			xpt_async(AC_SENT_BDR, np->path, NULL);
 		break;
 	}
 
 	/*
 	 *  Print to the log the message we intend to send.
 	 */
 	if (num == SIR_TARGET_SELECTED) {
 		PRINT_TARGET(np, target);
 		sym_printl_hex("control msgout:", np->abrt_msg,
 			      np->abrt_tbl.size);
 		np->abrt_tbl.size = cpu_to_scr(np->abrt_tbl.size);
 	}
 
 	/*
 	 *  Let the SCRIPTS processor continue.
 	 */
 	OUTONB_STD ();
 }
 
 /*
  *  Gerard's alchemy:) that deals with with the data
  *  pointer for both MDP and the residual calculation.
  *
  *  I didn't want to bloat the code by more than 200
  *  lignes for the handling of both MDP and the residual.
  *  This has been achieved by using a data pointer
  *  representation consisting in an index in the data
  *  array (dp_sg) and a negative offset (dp_ofs) that
  *  have the following meaning:
  *
  *  - dp_sg = SYM_CONF_MAX_SG
  *    we are at the end of the data script.
  *  - dp_sg < SYM_CONF_MAX_SG
  *    dp_sg points to the next entry of the scatter array
  *    we want to transfer.
  *  - dp_ofs < 0
  *    dp_ofs represents the residual of bytes of the
  *    previous entry scatter entry we will send first.
  *  - dp_ofs = 0
  *    no residual to send first.
  *
  *  The function sym_evaluate_dp() accepts an arbitray
  *  offset (basically from the MDP message) and returns
  *  the corresponding values of dp_sg and dp_ofs.
  */
 static int sym_evaluate_dp(hcb_p np, ccb_p cp, u32 scr, int *ofs)
 {
 	u32	dp_scr;
 	int	dp_ofs, dp_sg, dp_sgmin;
 	int	tmp;
 	struct sym_pmc *pm;
 
 	/*
 	 *  Compute the resulted data pointer in term of a script
 	 *  address within some DATA script and a signed byte offset.
 	 */
 	dp_scr = scr;
 	dp_ofs = *ofs;
 	if	(dp_scr == SCRIPTA_BA (np, pm0_data))
 		pm = &cp->phys.pm0;
 	else if (dp_scr == SCRIPTA_BA (np, pm1_data))
 		pm = &cp->phys.pm1;
 	else
 		pm = NULL;
 
 	if (pm) {
 		dp_scr  = scr_to_cpu(pm->ret);
 		dp_ofs -= scr_to_cpu(pm->sg.size);
 	}
 
 	/*
 	 *  If we are auto-sensing, then we are done.
 	 */
 	if (cp->host_flags & HF_SENSE) {
 		*ofs = dp_ofs;
 		return 0;
 	}
 
 	/*
 	 *  Deduce the index of the sg entry.
 	 *  Keep track of the index of the first valid entry.
 	 *  If result is dp_sg = SYM_CONF_MAX_SG, then we are at the
 	 *  end of the data.
 	 */
 	tmp = scr_to_cpu(cp->phys.head.goalp);
 	dp_sg = SYM_CONF_MAX_SG;
 	if (dp_scr != tmp)
 		dp_sg -= (tmp - 8 - (int)dp_scr) / (2*4);
 	dp_sgmin = SYM_CONF_MAX_SG - cp->segments;
 
 	/*
 	 *  Move to the sg entry the data pointer belongs to.
 	 *
 	 *  If we are inside the data area, we expect result to be:
 	 *
 	 *  Either,
 	 *      dp_ofs = 0 and dp_sg is the index of the sg entry
 	 *      the data pointer belongs to (or the end of the data)
 	 *  Or,
 	 *      dp_ofs < 0 and dp_sg is the index of the sg entry
 	 *      the data pointer belongs to + 1.
 	 */
 	if (dp_ofs < 0) {
 		int n;
 		while (dp_sg > dp_sgmin) {
 			--dp_sg;
 			tmp = scr_to_cpu(cp->phys.data[dp_sg].size);
 			n = dp_ofs + (tmp & 0xffffff);
 			if (n > 0) {
 				++dp_sg;
 				break;
 			}
 			dp_ofs = n;
 		}
 	}
 	else if (dp_ofs > 0) {
 		while (dp_sg < SYM_CONF_MAX_SG) {
 			tmp = scr_to_cpu(cp->phys.data[dp_sg].size);
 			dp_ofs -= (tmp & 0xffffff);
 			++dp_sg;
 			if (dp_ofs <= 0)
 				break;
 		}
 	}
 
 	/*
 	 *  Make sure the data pointer is inside the data area.
 	 *  If not, return some error.
 	 */
 	if	(dp_sg < dp_sgmin || (dp_sg == dp_sgmin && dp_ofs < 0))
 		goto out_err;
 	else if	(dp_sg > SYM_CONF_MAX_SG ||
 		 (dp_sg == SYM_CONF_MAX_SG && dp_ofs > 0))
 		goto out_err;
 
 	/*
 	 *  Save the extreme pointer if needed.
 	 */
 	if (dp_sg > cp->ext_sg ||
             (dp_sg == cp->ext_sg && dp_ofs > cp->ext_ofs)) {
 		cp->ext_sg  = dp_sg;
 		cp->ext_ofs = dp_ofs;
 	}
 
 	/*
 	 *  Return data.
 	 */
 	*ofs = dp_ofs;
 	return dp_sg;
 
 out_err:
 	return -1;
 }
 
 /*
  *  chip handler for MODIFY DATA POINTER MESSAGE
  *
  *  We also call this function on IGNORE WIDE RESIDUE
  *  messages that do not match a SWIDE full condition.
  *  Btw, we assume in that situation that such a message
  *  is equivalent to a MODIFY DATA POINTER (offset=-1).
  */
 static void sym_modify_dp(hcb_p np, ccb_p cp, int ofs)
 {
 	int dp_ofs	= ofs;
 	u32	dp_scr	= INL (nc_temp);
 	u32	dp_ret;
 	u32	tmp;
 	u_char	hflags;
 	int	dp_sg;
 	struct	sym_pmc *pm;
 
 	/*
 	 *  Not supported for auto-sense.
 	 */
 	if (cp->host_flags & HF_SENSE)
 		goto out_reject;
 
 	/*
 	 *  Apply our alchemy:) (see comments in sym_evaluate_dp()),
 	 *  to the resulted data pointer.
 	 */
 	dp_sg = sym_evaluate_dp(np, cp, dp_scr, &dp_ofs);
 	if (dp_sg < 0)
 		goto out_reject;
 
 	/*
 	 *  And our alchemy:) allows to easily calculate the data
 	 *  script address we want to return for the next data phase.
 	 */
 	dp_ret = cpu_to_scr(cp->phys.head.goalp);
 	dp_ret = dp_ret - 8 - (SYM_CONF_MAX_SG - dp_sg) * (2*4);
 
 	/*
 	 *  If offset / scatter entry is zero we donnot need
 	 *  a context for the new current data pointer.
 	 */
 	if (dp_ofs == 0) {
 		dp_scr = dp_ret;
 		goto out_ok;
 	}
 
 	/*
 	 *  Get a context for the new current data pointer.
 	 */
 	hflags = INB (HF_PRT);
 
 	if (hflags & HF_DP_SAVED)
 		hflags ^= HF_ACT_PM;
 
 	if (!(hflags & HF_ACT_PM)) {
 		pm  = &cp->phys.pm0;
 		dp_scr = SCRIPTA_BA (np, pm0_data);
 	}
 	else {
 		pm = &cp->phys.pm1;
 		dp_scr = SCRIPTA_BA (np, pm1_data);
 	}
 
 	hflags &= ~(HF_DP_SAVED);
 
 	OUTB (HF_PRT, hflags);
 
 	/*
 	 *  Set up the new current data pointer.
 	 *  ofs < 0 there, and for the next data phase, we
 	 *  want to transfer part of the data of the sg entry
 	 *  corresponding to index dp_sg-1 prior to returning
 	 *  to the main data script.
 	 */
 	pm->ret = cpu_to_scr(dp_ret);
 	tmp  = scr_to_cpu(cp->phys.data[dp_sg-1].addr);
 	tmp += scr_to_cpu(cp->phys.data[dp_sg-1].size) + dp_ofs;
 	pm->sg.addr = cpu_to_scr(tmp);
 	pm->sg.size = cpu_to_scr(-dp_ofs);
 
 out_ok:
 	OUTL (nc_temp, dp_scr);
 	OUTL_DSP (SCRIPTA_BA (np, clrack));
 	return;
 
 out_reject:
 	OUTL_DSP (SCRIPTB_BA (np, msg_bad));
 }
 
 /*
  *  chip calculation of the data residual.
  *
  *  As I used to say, the requirement of data residual
  *  in SCSI is broken, useless and cannot be achieved
  *  without huge complexity.
  *  But most OSes and even the official CAM require it.
  *  When stupidity happens to be so widely spread inside
  *  a community, it gets hard to convince.
  *
  *  Anyway, I don't care, since I am not going to use
  *  any software that considers this data residual as
  *  a relevant information. :)
  */
 static int sym_compute_residual(hcb_p np, ccb_p cp)
 {
 	int dp_sg, dp_sgmin, resid = 0;
 	int dp_ofs = 0;
 
 	/*
 	 *  Check for some data lost or just thrown away.
 	 *  We are not required to be quite accurate in this
 	 *  situation. Btw, if we are odd for output and the
 	 *  device claims some more data, it may well happen
 	 *  than our residual be zero. :-)
 	 */
 	if (cp->xerr_status & (XE_EXTRA_DATA|XE_SODL_UNRUN|XE_SWIDE_OVRUN)) {
 		if (cp->xerr_status & XE_EXTRA_DATA)
 			resid -= cp->extra_bytes;
 		if (cp->xerr_status & XE_SODL_UNRUN)
 			++resid;
 		if (cp->xerr_status & XE_SWIDE_OVRUN)
 			--resid;
 	}
 
 	/*
 	 *  If all data has been transferred,
 	 *  there is no residual.
 	 */
 	if (cp->phys.head.lastp == cp->phys.head.goalp)
 		return resid;
 
 	/*
 	 *  If no data transfer occurs, or if the data
 	 *  pointer is weird, return full residual.
 	 */
 	if (cp->startp == cp->phys.head.lastp ||
 	    sym_evaluate_dp(np, cp, scr_to_cpu(cp->phys.head.lastp),
 			    &dp_ofs) < 0) {
 		return cp->data_len;
 	}
 
 	/*
 	 *  If we were auto-sensing, then we are done.
 	 */
 	if (cp->host_flags & HF_SENSE) {
 		return -dp_ofs;
 	}
 
 	/*
 	 *  We are now full comfortable in the computation
 	 *  of the data residual (2's complement).
 	 */
 	dp_sgmin = SYM_CONF_MAX_SG - cp->segments;
 	resid = -cp->ext_ofs;
 	for (dp_sg = cp->ext_sg; dp_sg < SYM_CONF_MAX_SG; ++dp_sg) {
 		u_int tmp = scr_to_cpu(cp->phys.data[dp_sg].size);
 		resid += (tmp & 0xffffff);
 	}
 
 	/*
 	 *  Hopefully, the result is not too wrong.
 	 */
 	return resid;
 }
 
 /*
  *  Print out the content of a SCSI message.
  */
 static int sym_show_msg (u_char * msg)
 {
 	u_char i;
 	printf ("%x",*msg);
 	if (*msg==M_EXTENDED) {
 		for (i=1;i<8;i++) {
 			if (i-1>msg[1]) break;
 			printf ("-%x",msg[i]);
 		}
 		return (i+1);
 	} else if ((*msg & 0xf0) == 0x20) {
 		printf ("-%x",msg[1]);
 		return (2);
 	}
 	return (1);
 }
 
 static void sym_print_msg (ccb_p cp, char *label, u_char *msg)
 {
 	PRINT_ADDR(cp);
 	if (label)
 		printf ("%s: ", label);
 
 	(void) sym_show_msg (msg);
 	printf (".\n");
 }
 
 /*
  *  Negotiation for WIDE and SYNCHRONOUS DATA TRANSFER.
  *
  *  When we try to negotiate, we append the negotiation message
  *  to the identify and (maybe) simple tag message.
  *  The host status field is set to HS_NEGOTIATE to mark this
  *  situation.
  *
  *  If the target doesn't answer this message immediately
  *  (as required by the standard), the SIR_NEGO_FAILED interrupt
  *  will be raised eventually.
  *  The handler removes the HS_NEGOTIATE status, and sets the
  *  negotiated value to the default (async / nowide).
  *
  *  If we receive a matching answer immediately, we check it
  *  for validity, and set the values.
  *
  *  If we receive a Reject message immediately, we assume the
  *  negotiation has failed, and fall back to standard values.
  *
  *  If we receive a negotiation message while not in HS_NEGOTIATE
  *  state, it's a target initiated negotiation. We prepare a
  *  (hopefully) valid answer, set our parameters, and send back
  *  this answer to the target.
  *
  *  If the target doesn't fetch the answer (no message out phase),
  *  we assume the negotiation has failed, and fall back to default
  *  settings (SIR_NEGO_PROTO interrupt).
  *
  *  When we set the values, we adjust them in all ccbs belonging
  *  to this target, in the controller's register, and in the "phys"
  *  field of the controller's struct sym_hcb.
  */
 
 /*
  *  chip handler for SYNCHRONOUS DATA TRANSFER REQUEST (SDTR) message.
  */
 static void sym_sync_nego(hcb_p np, tcb_p tp, ccb_p cp)
 {
 	u_char	chg, ofs, per, fak, div;
 	int	req = 1;
 
 	/*
 	 *  Synchronous request message received.
 	 */
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "sync msgin", np->msgin);
 	}
 
 	/*
 	 * request or answer ?
 	 */
 	if (INB (HS_PRT) == HS_NEGOTIATE) {
 		OUTB (HS_PRT, HS_BUSY);
 		if (cp->nego_status && cp->nego_status != NS_SYNC)
 			goto reject_it;
 		req = 0;
 	}
 
 	/*
 	 *  get requested values.
 	 */
 	chg = 0;
 	per = np->msgin[3];
 	ofs = np->msgin[4];
 
 	/*
 	 *  check values against our limits.
 	 */
 	if (ofs) {
 		if (ofs > np->maxoffs)
 			{chg = 1; ofs = np->maxoffs;}
 		if (req) {
 			if (ofs > tp->tinfo.user.offset)
 				{chg = 1; ofs = tp->tinfo.user.offset;}
 		}
 	}
 
 	if (ofs) {
 		if (per < np->minsync)
 			{chg = 1; per = np->minsync;}
 		if (req) {
 			if (per < tp->tinfo.user.period)
 				{chg = 1; per = tp->tinfo.user.period;}
 		}
 	}
 
 	div = fak = 0;
 	if (ofs && sym_getsync(np, 0, per, &div, &fak) < 0)
 		goto reject_it;
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		PRINT_ADDR(cp);
 		printf ("sdtr: ofs=%d per=%d div=%d fak=%d chg=%d.\n",
 			ofs, per, div, fak, chg);
 	}
 
 	/*
 	 *  This was an answer message
 	 */
 	if (req == 0) {
 		if (chg) 	/* Answer wasn't acceptable. */
 			goto reject_it;
 		sym_setsync (np, cp, ofs, per, div, fak);
 		OUTL_DSP (SCRIPTA_BA (np, clrack));
 		return;
 	}
 
 	/*
 	 *  It was a request. Set value and
 	 *  prepare an answer message
 	 */
 	sym_setsync (np, cp, ofs, per, div, fak);
 
 	np->msgout[0] = M_EXTENDED;
 	np->msgout[1] = 3;
 	np->msgout[2] = M_X_SYNC_REQ;
 	np->msgout[3] = per;
 	np->msgout[4] = ofs;
 
 	cp->nego_status = NS_SYNC;
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "sync msgout", np->msgout);
 	}
 
 	np->msgin [0] = M_NOOP;
 
 	OUTL_DSP (SCRIPTB_BA (np, sdtr_resp));
 	return;
 reject_it:
 	sym_setsync (np, cp, 0, 0, 0, 0);
 	OUTL_DSP (SCRIPTB_BA (np, msg_bad));
 }
 
 /*
  *  chip handler for PARALLEL PROTOCOL REQUEST (PPR) message.
  */
 static void sym_ppr_nego(hcb_p np, tcb_p tp, ccb_p cp)
 {
 	u_char	chg, ofs, per, fak, dt, div, wide;
 	int	req = 1;
 
 	/*
 	 * Synchronous request message received.
 	 */
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "ppr msgin", np->msgin);
 	}
 
 	/*
 	 *  get requested values.
 	 */
 	chg  = 0;
 	per  = np->msgin[3];
 	ofs  = np->msgin[5];
 	wide = np->msgin[6];
 	dt   = np->msgin[7] & PPR_OPT_DT;
 
 	/*
 	 * request or answer ?
 	 */
 	if (INB (HS_PRT) == HS_NEGOTIATE) {
 		OUTB (HS_PRT, HS_BUSY);
 		if (cp->nego_status && cp->nego_status != NS_PPR)
 			goto reject_it;
 		req = 0;
 	}
 
 	/*
 	 *  check values against our limits.
 	 */
 	if (wide > np->maxwide)
 		{chg = 1; wide = np->maxwide;}
 	if (!wide || !(np->features & FE_ULTRA3))
 		dt &= ~PPR_OPT_DT;
 	if (req) {
 		if (wide > tp->tinfo.user.width)
 			{chg = 1; wide = tp->tinfo.user.width;}
 	}
 
 	if (!(np->features & FE_U3EN))	/* Broken U3EN bit not supported */
 		dt &= ~PPR_OPT_DT;
 
 	if (dt != (np->msgin[7] & PPR_OPT_MASK)) chg = 1;
 
 	if (ofs) {
 		if (dt) {
 			if (ofs > np->maxoffs_dt)
 				{chg = 1; ofs = np->maxoffs_dt;}
 		}
 		else if (ofs > np->maxoffs)
 			{chg = 1; ofs = np->maxoffs;}
 		if (req) {
 			if (ofs > tp->tinfo.user.offset)
 				{chg = 1; ofs = tp->tinfo.user.offset;}
 		}
 	}
 
 	if (ofs) {
 		if (dt) {
 			if (per < np->minsync_dt)
 				{chg = 1; per = np->minsync_dt;}
 		}
 		else if (per < np->minsync)
 			{chg = 1; per = np->minsync;}
 		if (req) {
 			if (per < tp->tinfo.user.period)
 				{chg = 1; per = tp->tinfo.user.period;}
 		}
 	}
 
 	div = fak = 0;
 	if (ofs && sym_getsync(np, dt, per, &div, &fak) < 0)
 		goto reject_it;
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		PRINT_ADDR(cp);
 		printf ("ppr: "
 			"dt=%x ofs=%d per=%d wide=%d div=%d fak=%d chg=%d.\n",
 			dt, ofs, per, wide, div, fak, chg);
 	}
 
 	/*
 	 *  It was an answer.
 	 */
 	if (req == 0) {
 		if (chg) 	/* Answer wasn't acceptable */
 			goto reject_it;
 		sym_setpprot (np, cp, dt, ofs, per, wide, div, fak);
 		OUTL_DSP (SCRIPTA_BA (np, clrack));
 		return;
 	}
 
 	/*
 	 *  It was a request. Set value and
 	 *  prepare an answer message
 	 */
 	sym_setpprot (np, cp, dt, ofs, per, wide, div, fak);
 
 	np->msgout[0] = M_EXTENDED;
 	np->msgout[1] = 6;
 	np->msgout[2] = M_X_PPR_REQ;
 	np->msgout[3] = per;
 	np->msgout[4] = 0;
 	np->msgout[5] = ofs;
 	np->msgout[6] = wide;
 	np->msgout[7] = dt;
 
 	cp->nego_status = NS_PPR;
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "ppr msgout", np->msgout);
 	}
 
 	np->msgin [0] = M_NOOP;
 
 	OUTL_DSP (SCRIPTB_BA (np, ppr_resp));
 	return;
 reject_it:
 	sym_setpprot (np, cp, 0, 0, 0, 0, 0, 0);
 	OUTL_DSP (SCRIPTB_BA (np, msg_bad));
 	/*
 	 *  If it was a device response that should result in
 	 *  ST, we may want to try a legacy negotiation later.
 	 */
 	if (!req && !dt) {
 		tp->tinfo.goal.options = 0;
 		tp->tinfo.goal.width   = wide;
 		tp->tinfo.goal.period  = per;
 		tp->tinfo.goal.offset  = ofs;
 	}
 }
 
 /*
  *  chip handler for WIDE DATA TRANSFER REQUEST (WDTR) message.
  */
 static void sym_wide_nego(hcb_p np, tcb_p tp, ccb_p cp)
 {
 	u_char	chg, wide;
 	int	req = 1;
 
 	/*
 	 *  Wide request message received.
 	 */
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "wide msgin", np->msgin);
 	}
 
 	/*
 	 * Is it a request from the device?
 	 */
 	if (INB (HS_PRT) == HS_NEGOTIATE) {
 		OUTB (HS_PRT, HS_BUSY);
 		if (cp->nego_status && cp->nego_status != NS_WIDE)
 			goto reject_it;
 		req = 0;
 	}
 
 	/*
 	 *  get requested values.
 	 */
 	chg  = 0;
 	wide = np->msgin[3];
 
 	/*
 	 *  check values against driver limits.
 	 */
 	if (wide > np->maxwide)
 		{chg = 1; wide = np->maxwide;}
 	if (req) {
 		if (wide > tp->tinfo.user.width)
 			{chg = 1; wide = tp->tinfo.user.width;}
 	}
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		PRINT_ADDR(cp);
 		printf ("wdtr: wide=%d chg=%d.\n", wide, chg);
 	}
 
 	/*
 	 * This was an answer message
 	 */
 	if (req == 0) {
 		if (chg)	/*  Answer wasn't acceptable. */
 			goto reject_it;
 		sym_setwide (np, cp, wide);
 
 		/*
 		 * Negotiate for SYNC immediately after WIDE response.
 		 * This allows to negotiate for both WIDE and SYNC on
 		 * a single SCSI command (Suggested by Justin Gibbs).
 		 */
 		if (tp->tinfo.goal.offset) {
 			np->msgout[0] = M_EXTENDED;
 			np->msgout[1] = 3;
 			np->msgout[2] = M_X_SYNC_REQ;
 			np->msgout[3] = tp->tinfo.goal.period;
 			np->msgout[4] = tp->tinfo.goal.offset;
 
 			if (DEBUG_FLAGS & DEBUG_NEGO) {
 				sym_print_msg(cp, "sync msgout", np->msgout);
 			}
 
 			cp->nego_status = NS_SYNC;
 			OUTB (HS_PRT, HS_NEGOTIATE);
 			OUTL_DSP (SCRIPTB_BA (np, sdtr_resp));
 			return;
 		}
 
 		OUTL_DSP (SCRIPTA_BA (np, clrack));
 		return;
 	}
 
 	/*
 	 *  It was a request, set value and
 	 *  prepare an answer message
 	 */
 	sym_setwide (np, cp, wide);
 
 	np->msgout[0] = M_EXTENDED;
 	np->msgout[1] = 2;
 	np->msgout[2] = M_X_WIDE_REQ;
 	np->msgout[3] = wide;
 
 	np->msgin [0] = M_NOOP;
 
 	cp->nego_status = NS_WIDE;
 
 	if (DEBUG_FLAGS & DEBUG_NEGO) {
 		sym_print_msg(cp, "wide msgout", np->msgout);
 	}
 
 	OUTL_DSP (SCRIPTB_BA (np, wdtr_resp));
 	return;
 reject_it:
 	OUTL_DSP (SCRIPTB_BA (np, msg_bad));
 }
 
 /*
  *  Reset SYNC or WIDE to default settings.
  *
  *  Called when a negotiation does not succeed either
  *  on rejection or on protocol error.
  *
  *  If it was a PPR that made problems, we may want to
  *  try a legacy negotiation later.
  */
 static void sym_nego_default(hcb_p np, tcb_p tp, ccb_p cp)
 {
 	/*
 	 *  any error in negotiation:
 	 *  fall back to default mode.
 	 */
 	switch (cp->nego_status) {
 	case NS_PPR:
 #if 0
 		sym_setpprot (np, cp, 0, 0, 0, 0, 0, 0);
 #else
 		tp->tinfo.goal.options = 0;
 		if (tp->tinfo.goal.period < np->minsync)
 			tp->tinfo.goal.period = np->minsync;
 		if (tp->tinfo.goal.offset > np->maxoffs)
 			tp->tinfo.goal.offset = np->maxoffs;
 #endif
 		break;
 	case NS_SYNC:
 		sym_setsync (np, cp, 0, 0, 0, 0);
 		break;
 	case NS_WIDE:
 		sym_setwide (np, cp, 0);
 		break;
 	}
 	np->msgin [0] = M_NOOP;
 	np->msgout[0] = M_NOOP;
 	cp->nego_status = 0;
 }
 
 /*
  *  chip handler for MESSAGE REJECT received in response to
  *  a WIDE or SYNCHRONOUS negotiation.
  */
 static void sym_nego_rejected(hcb_p np, tcb_p tp, ccb_p cp)
 {
 	sym_nego_default(np, tp, cp);
 	OUTB (HS_PRT, HS_BUSY);
 }
 
 /*
  *  chip exception handler for programmed interrupts.
  */
 static void sym_int_sir (hcb_p np)
 {
 	u_char	num	= INB (nc_dsps);
 	u32	dsa	= INL (nc_dsa);
 	ccb_p	cp	= sym_ccb_from_dsa(np, dsa);
 	u_char	target	= INB (nc_sdid) & 0x0f;
 	tcb_p	tp	= &np->target[target];
 	int	tmp;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("I#%d", num);
 
 	switch (num) {
 	/*
 	 *  Command has been completed with error condition
 	 *  or has been auto-sensed.
 	 */
 	case SIR_COMPLETE_ERROR:
 		sym_complete_error(np, cp);
 		return;
 	/*
 	 *  The C code is currently trying to recover from something.
 	 *  Typically, user want to abort some command.
 	 */
 	case SIR_SCRIPT_STOPPED:
 	case SIR_TARGET_SELECTED:
 	case SIR_ABORT_SENT:
 		sym_sir_task_recovery(np, num);
 		return;
 	/*
 	 *  The device didn't go to MSG OUT phase after having
 	 *  been selected with ATN. We donnot want to handle
 	 *  that.
 	 */
 	case SIR_SEL_ATN_NO_MSG_OUT:
 		printf ("%s:%d: No MSG OUT phase after selection with ATN.\n",
 			sym_name (np), target);
 		goto out_stuck;
 	/*
 	 *  The device didn't switch to MSG IN phase after
 	 *  having reseleted the initiator.
 	 */
 	case SIR_RESEL_NO_MSG_IN:
 		printf ("%s:%d: No MSG IN phase after reselection.\n",
 			sym_name (np), target);
 		goto out_stuck;
 	/*
 	 *  After reselection, the device sent a message that wasn't
 	 *  an IDENTIFY.
 	 */
 	case SIR_RESEL_NO_IDENTIFY:
 		printf ("%s:%d: No IDENTIFY after reselection.\n",
 			sym_name (np), target);
 		goto out_stuck;
 	/*
 	 *  The device reselected a LUN we donnot know about.
 	 */
 	case SIR_RESEL_BAD_LUN:
 		np->msgout[0] = M_RESET;
 		goto out;
 	/*
 	 *  The device reselected for an untagged nexus and we
 	 *  haven't any.
 	 */
 	case SIR_RESEL_BAD_I_T_L:
 		np->msgout[0] = M_ABORT;
 		goto out;
 	/*
 	 *  The device reselected for a tagged nexus that we donnot
 	 *  have.
 	 */
 	case SIR_RESEL_BAD_I_T_L_Q:
 		np->msgout[0] = M_ABORT_TAG;
 		goto out;
 	/*
 	 *  The SCRIPTS let us know that the device has grabbed
 	 *  our message and will abort the job.
 	 */
 	case SIR_RESEL_ABORTED:
 		np->lastmsg = np->msgout[0];
 		np->msgout[0] = M_NOOP;
 		printf ("%s:%d: message %x sent on bad reselection.\n",
 			sym_name (np), target, np->lastmsg);
 		goto out;
 	/*
 	 *  The SCRIPTS let us know that a message has been
 	 *  successfully sent to the device.
 	 */
 	case SIR_MSG_OUT_DONE:
 		np->lastmsg = np->msgout[0];
 		np->msgout[0] = M_NOOP;
 		/* Should we really care of that */
 		if (np->lastmsg == M_PARITY || np->lastmsg == M_ID_ERROR) {
 			if (cp) {
 				cp->xerr_status &= ~XE_PARITY_ERR;
 				if (!cp->xerr_status)
 					OUTOFFB (HF_PRT, HF_EXT_ERR);
 			}
 		}
 		goto out;
 	/*
 	 *  The device didn't send a GOOD SCSI status.
 	 *  We may have some work to do prior to allow
 	 *  the SCRIPTS processor to continue.
 	 */
 	case SIR_BAD_SCSI_STATUS:
 		if (!cp)
 			goto out;
 		sym_sir_bad_scsi_status(np, cp);
 		return;
 	/*
 	 *  We are asked by the SCRIPTS to prepare a
 	 *  REJECT message.
 	 */
 	case SIR_REJECT_TO_SEND:
 		sym_print_msg(cp, "M_REJECT to send for ", np->msgin);
 		np->msgout[0] = M_REJECT;
 		goto out;
 	/*
 	 *  We have been ODD at the end of a DATA IN
 	 *  transfer and the device didn't send a
 	 *  IGNORE WIDE RESIDUE message.
 	 *  It is a data overrun condition.
 	 */
 	case SIR_SWIDE_OVERRUN:
 		if (cp) {
 			OUTONB (HF_PRT, HF_EXT_ERR);
 			cp->xerr_status |= XE_SWIDE_OVRUN;
 		}
 		goto out;
 	/*
 	 *  We have been ODD at the end of a DATA OUT
 	 *  transfer.
 	 *  It is a data underrun condition.
 	 */
 	case SIR_SODL_UNDERRUN:
 		if (cp) {
 			OUTONB (HF_PRT, HF_EXT_ERR);
 			cp->xerr_status |= XE_SODL_UNRUN;
 		}
 		goto out;
 	/*
 	 *  The device wants us to transfer more data than
 	 *  expected or in the wrong direction.
 	 *  The number of extra bytes is in scratcha.
 	 *  It is a data overrun condition.
 	 */
 	case SIR_DATA_OVERRUN:
 		if (cp) {
 			OUTONB (HF_PRT, HF_EXT_ERR);
 			cp->xerr_status |= XE_EXTRA_DATA;
 			cp->extra_bytes += INL (nc_scratcha);
 		}
 		goto out;
 	/*
 	 *  The device switched to an illegal phase (4/5).
 	 */
 	case SIR_BAD_PHASE:
 		if (cp) {
 			OUTONB (HF_PRT, HF_EXT_ERR);
 			cp->xerr_status |= XE_BAD_PHASE;
 		}
 		goto out;
 	/*
 	 *  We received a message.
 	 */
 	case SIR_MSG_RECEIVED:
 		if (!cp)
 			goto out_stuck;
 		switch (np->msgin [0]) {
 		/*
 		 *  We received an extended message.
 		 *  We handle MODIFY DATA POINTER, SDTR, WDTR
 		 *  and reject all other extended messages.
 		 */
 		case M_EXTENDED:
 			switch (np->msgin [2]) {
 			case M_X_MODIFY_DP:
 				if (DEBUG_FLAGS & DEBUG_POINTER)
 					sym_print_msg(cp,"modify DP",np->msgin);
 				tmp = (np->msgin[3]<<24) + (np->msgin[4]<<16) +
 				      (np->msgin[5]<<8)  + (np->msgin[6]);
 				sym_modify_dp(np, cp, tmp);
 				return;
 			case M_X_SYNC_REQ:
 				sym_sync_nego(np, tp, cp);
 				return;
 			case M_X_PPR_REQ:
 				sym_ppr_nego(np, tp, cp);
 				return;
 			case M_X_WIDE_REQ:
 				sym_wide_nego(np, tp, cp);
 				return;
 			default:
 				goto out_reject;
 			}
 			break;
 		/*
 		 *  We received a 1/2 byte message not handled from SCRIPTS.
 		 *  We are only expecting MESSAGE REJECT and IGNORE WIDE
 		 *  RESIDUE messages that haven't been anticipated by
 		 *  SCRIPTS on SWIDE full condition. Unanticipated IGNORE
 		 *  WIDE RESIDUE messages are aliased as MODIFY DP (-1).
 		 */
 		case M_IGN_RESIDUE:
 			if (DEBUG_FLAGS & DEBUG_POINTER)
 				sym_print_msg(cp,"ign wide residue", np->msgin);
 			sym_modify_dp(np, cp, -1);
 			return;
 		case M_REJECT:
 			if (INB (HS_PRT) == HS_NEGOTIATE)
 				sym_nego_rejected(np, tp, cp);
 			else {
 				PRINT_ADDR(cp);
 				printf ("M_REJECT received (%x:%x).\n",
 					scr_to_cpu(np->lastmsg), np->msgout[0]);
 			}
 			goto out_clrack;
 			break;
 		default:
 			goto out_reject;
 		}
 		break;
 	/*
 	 *  We received an unknown message.
 	 *  Ignore all MSG IN phases and reject it.
 	 */
 	case SIR_MSG_WEIRD:
 		sym_print_msg(cp, "WEIRD message received", np->msgin);
 		OUTL_DSP (SCRIPTB_BA (np, msg_weird));
 		return;
 	/*
 	 *  Negotiation failed.
 	 *  Target does not send us the reply.
 	 *  Remove the HS_NEGOTIATE status.
 	 */
 	case SIR_NEGO_FAILED:
 		OUTB (HS_PRT, HS_BUSY);
 	/*
 	 *  Negotiation failed.
 	 *  Target does not want answer message.
 	 */
 	case SIR_NEGO_PROTO:
 		sym_nego_default(np, tp, cp);
 		goto out;
 	}
 
 out:
 	OUTONB_STD ();
 	return;
 out_reject:
 	OUTL_DSP (SCRIPTB_BA (np, msg_bad));
 	return;
 out_clrack:
 	OUTL_DSP (SCRIPTA_BA (np, clrack));
 	return;
 out_stuck:
 	return;
 }
 
 /*
  *  Acquire a control block
  */
 static	ccb_p sym_get_ccb (hcb_p np, u_char tn, u_char ln, u_char tag_order)
 {
 	tcb_p tp = &np->target[tn];
 	lcb_p lp = sym_lp(tp, ln);
 	u_short tag = NO_TAG;
 	SYM_QUEHEAD *qp;
 	ccb_p cp = (ccb_p) NULL;
 
 	/*
 	 *  Look for a free CCB
 	 */
 	if (sym_que_empty(&np->free_ccbq))
 		goto out;
 	qp = sym_remque_head(&np->free_ccbq);
 	if (!qp)
 		goto out;
 	cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 
 	/*
 	 *  If the LCB is not yet available and the LUN
 	 *  has been probed ok, try to allocate the LCB.
 	 */
 	if (!lp && sym_is_bit(tp->lun_map, ln)) {
 		lp = sym_alloc_lcb(np, tn, ln);
 		if (!lp)
 			goto out_free;
 	}
 
 	/*
 	 *  If the LCB is not available here, then the
 	 *  logical unit is not yet discovered. For those
 	 *  ones only accept 1 SCSI IO per logical unit,
 	 *  since we cannot allow disconnections.
 	 */
 	if (!lp) {
 		if (!sym_is_bit(tp->busy0_map, ln))
 			sym_set_bit(tp->busy0_map, ln);
 		else
 			goto out_free;
 	} else {
 		/*
 		 *  If we have been asked for a tagged command.
 		 */
 		if (tag_order) {
 			/*
 			 *  Debugging purpose.
 			 */
 			assert(lp->busy_itl == 0);
 			/*
 			 *  Allocate resources for tags if not yet.
 			 */
 			if (!lp->cb_tags) {
 				sym_alloc_lcb_tags(np, tn, ln);
 				if (!lp->cb_tags)
 					goto out_free;
 			}
 			/*
 			 *  Get a tag for this SCSI IO and set up
 			 *  the CCB bus address for reselection,
 			 *  and count it for this LUN.
 			 *  Toggle reselect path to tagged.
 			 */
 			if (lp->busy_itlq < SYM_CONF_MAX_TASK) {
 				tag = lp->cb_tags[lp->ia_tag];
 				if (++lp->ia_tag == SYM_CONF_MAX_TASK)
 					lp->ia_tag = 0;
 				lp->itlq_tbl[tag] = cpu_to_scr(cp->ccb_ba);
 				++lp->busy_itlq;
 				lp->head.resel_sa =
 					cpu_to_scr(SCRIPTA_BA (np, resel_tag));
 			}
 			else
 				goto out_free;
 		}
 		/*
 		 *  This command will not be tagged.
 		 *  If we already have either a tagged or untagged
 		 *  one, refuse to overlap this untagged one.
 		 */
 		else {
 			/*
 			 *  Debugging purpose.
 			 */
 			assert(lp->busy_itl == 0 && lp->busy_itlq == 0);
 			/*
 			 *  Count this nexus for this LUN.
 			 *  Set up the CCB bus address for reselection.
 			 *  Toggle reselect path to untagged.
 			 */
 			if (++lp->busy_itl == 1) {
 				lp->head.itl_task_sa = cpu_to_scr(cp->ccb_ba);
 				lp->head.resel_sa =
 				      cpu_to_scr(SCRIPTA_BA (np, resel_no_tag));
 			}
 			else
 				goto out_free;
 		}
 	}
 	/*
 	 *  Put the CCB into the busy queue.
 	 */
 	sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq);
 
 	/*
 	 *  Remember all informations needed to free this CCB.
 	 */
 	cp->to_abort = 0;
 	cp->tag	   = tag;
 	cp->target = tn;
 	cp->lun    = ln;
 
 	if (DEBUG_FLAGS & DEBUG_TAGS) {
 		PRINT_LUN(np, tn, ln);
 		printf ("ccb @%p using tag %d.\n", cp, tag);
 	}
 
 out:
 	return cp;
 out_free:
 	sym_insque_head(&cp->link_ccbq, &np->free_ccbq);
 	return NULL;
 }
 
 /*
  *  Release one control block
  */
 static void sym_free_ccb(hcb_p np, ccb_p cp)
 {
 	tcb_p tp = &np->target[cp->target];
 	lcb_p lp = sym_lp(tp, cp->lun);
 
 	if (DEBUG_FLAGS & DEBUG_TAGS) {
 		PRINT_LUN(np, cp->target, cp->lun);
 		printf ("ccb @%p freeing tag %d.\n", cp, cp->tag);
 	}
 
 	/*
 	 *  If LCB available,
 	 */
 	if (lp) {
 		/*
 		 *  If tagged, release the tag, set the relect path
 		 */
 		if (cp->tag != NO_TAG) {
 			/*
 			 *  Free the tag value.
 			 */
 			lp->cb_tags[lp->if_tag] = cp->tag;
 			if (++lp->if_tag == SYM_CONF_MAX_TASK)
 				lp->if_tag = 0;
 			/*
 			 *  Make the reselect path invalid,
 			 *  and uncount this CCB.
 			 */
 			lp->itlq_tbl[cp->tag] = cpu_to_scr(np->bad_itlq_ba);
 			--lp->busy_itlq;
 		} else {	/* Untagged */
 			/*
 			 *  Make the reselect path invalid,
 			 *  and uncount this CCB.
 			 */
 			lp->head.itl_task_sa = cpu_to_scr(np->bad_itl_ba);
 			--lp->busy_itl;
 		}
 		/*
 		 *  If no JOB active, make the LUN reselect path invalid.
 		 */
 		if (lp->busy_itlq == 0 && lp->busy_itl == 0)
 			lp->head.resel_sa =
 				cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun));
 	}
 	/*
 	 *  Otherwise, we only accept 1 IO per LUN.
 	 *  Clear the bit that keeps track of this IO.
 	 */
 	else
 		sym_clr_bit(tp->busy0_map, cp->lun);
 
 	/*
 	 *  We donnot queue more than 1 ccb per target
 	 *  with negotiation at any time. If this ccb was
 	 *  used for negotiation, clear this info in the tcb.
 	 */
 	if (cp == tp->nego_cp)
 		tp->nego_cp = NULL;
 
 #ifdef SYM_CONF_IARB_SUPPORT
 	/*
 	 *  If we just complete the last queued CCB,
 	 *  clear this info that is no longer relevant.
 	 */
 	if (cp == np->last_cp)
 		np->last_cp = NULL;
 #endif
 
 	/*
 	 *  Unmap user data from DMA map if needed.
 	 */
 	if (cp->dmamapped) {
 		bus_dmamap_unload(np->data_dmat, cp->dmamap);
 		cp->dmamapped = 0;
 	}
 
 	/*
 	 *  Make this CCB available.
 	 */
 	cp->cam_ccb = NULL;
 	cp->host_status = HS_IDLE;
 	sym_remque(&cp->link_ccbq);
 	sym_insque_head(&cp->link_ccbq, &np->free_ccbq);
 }
 
 /*
  *  Allocate a CCB from memory and initialize its fixed part.
  */
 static ccb_p sym_alloc_ccb(hcb_p np)
 {
 	ccb_p cp = NULL;
 	int hcode;
 
 	SYM_LOCK_ASSERT(MA_NOTOWNED);
 
 	/*
 	 *  Prevent from allocating more CCBs than we can
 	 *  queue to the controller.
 	 */
 	if (np->actccbs >= SYM_CONF_MAX_START)
 		return NULL;
 
 	/*
 	 *  Allocate memory for this CCB.
 	 */
 	cp = sym_calloc_dma(sizeof(struct sym_ccb), "CCB");
 	if (!cp)
 		return NULL;
 
 	/*
 	 *  Allocate a bounce buffer for sense data.
 	 */
 	cp->sns_bbuf = sym_calloc_dma(SYM_SNS_BBUF_LEN, "SNS_BBUF");
 	if (!cp->sns_bbuf)
 		goto out_free;
 
 	/*
 	 *  Allocate a map for the DMA of user data.
 	 */
 	if (bus_dmamap_create(np->data_dmat, 0, &cp->dmamap))
 		goto out_free;
 	/*
 	 *  Count it.
 	 */
 	np->actccbs++;
 
 	/*
 	 * Initialize the callout.
 	 */
 	callout_init(&cp->ch, 1);
 
 	/*
 	 *  Compute the bus address of this ccb.
 	 */
 	cp->ccb_ba = vtobus(cp);
 
 	/*
 	 *  Insert this ccb into the hashed list.
 	 */
 	hcode = CCB_HASH_CODE(cp->ccb_ba);
 	cp->link_ccbh = np->ccbh[hcode];
 	np->ccbh[hcode] = cp;
 
 	/*
 	 *  Initialize the start and restart actions.
 	 */
 	cp->phys.head.go.start   = cpu_to_scr(SCRIPTA_BA (np, idle));
 	cp->phys.head.go.restart = cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l));
 
  	/*
 	 *  Initilialyze some other fields.
 	 */
 	cp->phys.smsg_ext.addr = cpu_to_scr(HCB_BA(np, msgin[2]));
 
 	/*
 	 *  Chain into free ccb queue.
 	 */
 	sym_insque_head(&cp->link_ccbq, &np->free_ccbq);
 
 	return cp;
 out_free:
 	if (cp->sns_bbuf)
 		sym_mfree_dma(cp->sns_bbuf, SYM_SNS_BBUF_LEN, "SNS_BBUF");
 	sym_mfree_dma(cp, sizeof(*cp), "CCB");
 	return NULL;
 }
 
 /*
  *  Look up a CCB from a DSA value.
  */
 static ccb_p sym_ccb_from_dsa(hcb_p np, u32 dsa)
 {
 	int hcode;
 	ccb_p cp;
 
 	hcode = CCB_HASH_CODE(dsa);
 	cp = np->ccbh[hcode];
 	while (cp) {
 		if (cp->ccb_ba == dsa)
 			break;
 		cp = cp->link_ccbh;
 	}
 
 	return cp;
 }
 
 /*
  *  Lun control block allocation and initialization.
  */
 static lcb_p sym_alloc_lcb (hcb_p np, u_char tn, u_char ln)
 {
 	tcb_p tp = &np->target[tn];
 	lcb_p lp = sym_lp(tp, ln);
 
 	/*
 	 *  Already done, just return.
 	 */
 	if (lp)
 		return lp;
 	/*
 	 *  Check against some race.
 	 */
 	assert(!sym_is_bit(tp->busy0_map, ln));
 
 	/*
 	 *  Allocate the LCB bus address array.
 	 *  Compute the bus address of this table.
 	 */
 	if (ln && !tp->luntbl) {
 		int i;
 
 		tp->luntbl = sym_calloc_dma(256, "LUNTBL");
 		if (!tp->luntbl)
 			goto fail;
 		for (i = 0 ; i < 64 ; i++)
 			tp->luntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa));
 		tp->head.luntbl_sa = cpu_to_scr(vtobus(tp->luntbl));
 	}
 
 	/*
 	 *  Allocate the table of pointers for LUN(s) > 0, if needed.
 	 */
 	if (ln && !tp->lunmp) {
 		tp->lunmp = sym_calloc(SYM_CONF_MAX_LUN * sizeof(lcb_p),
 				   "LUNMP");
 		if (!tp->lunmp)
 			goto fail;
 	}
 
 	/*
 	 *  Allocate the lcb.
 	 *  Make it available to the chip.
 	 */
 	lp = sym_calloc_dma(sizeof(struct sym_lcb), "LCB");
 	if (!lp)
 		goto fail;
 	if (ln) {
 		tp->lunmp[ln] = lp;
 		tp->luntbl[ln] = cpu_to_scr(vtobus(lp));
 	}
 	else {
 		tp->lun0p = lp;
 		tp->head.lun0_sa = cpu_to_scr(vtobus(lp));
 	}
 
 	/*
 	 *  Let the itl task point to error handling.
 	 */
 	lp->head.itl_task_sa = cpu_to_scr(np->bad_itl_ba);
 
 	/*
 	 *  Set the reselect pattern to our default. :)
 	 */
 	lp->head.resel_sa = cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun));
 
 	/*
 	 *  Set user capabilities.
 	 */
 	lp->user_flags = tp->usrflags & (SYM_DISC_ENABLED | SYM_TAGS_ENABLED);
 
 fail:
 	return lp;
 }
 
 /*
  *  Allocate LCB resources for tagged command queuing.
  */
 static void sym_alloc_lcb_tags (hcb_p np, u_char tn, u_char ln)
 {
 	tcb_p tp = &np->target[tn];
 	lcb_p lp = sym_lp(tp, ln);
 	int i;
 
 	/*
 	 *  If LCB not available, try to allocate it.
 	 */
 	if (!lp && !(lp = sym_alloc_lcb(np, tn, ln)))
 		return;
 
 	/*
 	 *  Allocate the task table and and the tag allocation
 	 *  circular buffer. We want both or none.
 	 */
 	lp->itlq_tbl = sym_calloc_dma(SYM_CONF_MAX_TASK*4, "ITLQ_TBL");
 	if (!lp->itlq_tbl)
 		return;
 	lp->cb_tags = sym_calloc(SYM_CONF_MAX_TASK, "CB_TAGS");
 	if (!lp->cb_tags) {
 		sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4, "ITLQ_TBL");
 		lp->itlq_tbl = NULL;
 		return;
 	}
 
 	/*
 	 *  Initialize the task table with invalid entries.
 	 */
 	for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++)
 		lp->itlq_tbl[i] = cpu_to_scr(np->notask_ba);
 
 	/*
 	 *  Fill up the tag buffer with tag numbers.
 	 */
 	for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++)
 		lp->cb_tags[i] = i;
 
 	/*
 	 *  Make the task table available to SCRIPTS,
 	 *  And accept tagged commands now.
 	 */
 	lp->head.itlq_tbl_sa = cpu_to_scr(vtobus(lp->itlq_tbl));
 }
 
 /*
  *  Test the pci bus snoop logic :-(
  *
  *  Has to be called with interrupts disabled.
  */
 #ifndef SYM_CONF_IOMAPPED
 static int sym_regtest (hcb_p np)
 {
 	register volatile u32 data;
 	/*
 	 *  chip registers may NOT be cached.
 	 *  write 0xffffffff to a read only register area,
 	 *  and try to read it back.
 	 */
 	data = 0xffffffff;
 	OUTL_OFF(offsetof(struct sym_reg, nc_dstat), data);
 	data = INL_OFF(offsetof(struct sym_reg, nc_dstat));
 #if 1
 	if (data == 0xffffffff) {
 #else
 	if ((data & 0xe2f0fffd) != 0x02000080) {
 #endif
 		printf ("CACHE TEST FAILED: reg dstat-sstat2 readback %x.\n",
 			(unsigned) data);
 		return (0x10);
 	}
 	return (0);
 }
 #endif
 
 static int sym_snooptest (hcb_p np)
 {
 	u32	sym_rd, sym_wr, sym_bk, host_rd, host_wr, pc, dstat;
 	int	i, err=0;
 #ifndef SYM_CONF_IOMAPPED
 	err |= sym_regtest (np);
 	if (err) return (err);
 #endif
 restart_test:
 	/*
 	 *  Enable Master Parity Checking as we intend
 	 *  to enable it for normal operations.
 	 */
 	OUTB (nc_ctest4, (np->rv_ctest4 & MPEE));
 	/*
 	 *  init
 	 */
 	pc  = SCRIPTB0_BA (np, snooptest);
 	host_wr = 1;
 	sym_wr  = 2;
 	/*
 	 *  Set memory and register.
 	 */
 	np->cache = cpu_to_scr(host_wr);
 	OUTL (nc_temp, sym_wr);
 	/*
 	 *  Start script (exchange values)
 	 */
 	OUTL (nc_dsa, np->hcb_ba);
 	OUTL_DSP (pc);
 	/*
 	 *  Wait 'til done (with timeout)
 	 */
 	for (i=0; i<SYM_SNOOP_TIMEOUT; i++)
 		if (INB(nc_istat) & (INTF|SIP|DIP))
 			break;
 	if (i>=SYM_SNOOP_TIMEOUT) {
 		printf ("CACHE TEST FAILED: timeout.\n");
 		return (0x20);
 	}
 	/*
 	 *  Check for fatal DMA errors.
 	 */
 	dstat = INB (nc_dstat);
 #if 1	/* Band aiding for broken hardwares that fail PCI parity */
 	if ((dstat & MDPE) && (np->rv_ctest4 & MPEE)) {
 		printf ("%s: PCI DATA PARITY ERROR DETECTED - "
 			"DISABLING MASTER DATA PARITY CHECKING.\n",
 			sym_name(np));
 		np->rv_ctest4 &= ~MPEE;
 		goto restart_test;
 	}
 #endif
 	if (dstat & (MDPE|BF|IID)) {
 		printf ("CACHE TEST FAILED: DMA error (dstat=0x%02x).", dstat);
 		return (0x80);
 	}
 	/*
 	 *  Save termination position.
 	 */
 	pc = INL (nc_dsp);
 	/*
 	 *  Read memory and register.
 	 */
 	host_rd = scr_to_cpu(np->cache);
 	sym_rd  = INL (nc_scratcha);
 	sym_bk  = INL (nc_temp);
 
 	/*
 	 *  Check termination position.
 	 */
 	if (pc != SCRIPTB0_BA (np, snoopend)+8) {
 		printf ("CACHE TEST FAILED: script execution failed.\n");
 		printf ("start=%08lx, pc=%08lx, end=%08lx\n",
 			(u_long) SCRIPTB0_BA (np, snooptest), (u_long) pc,
 			(u_long) SCRIPTB0_BA (np, snoopend) +8);
 		return (0x40);
 	}
 	/*
 	 *  Show results.
 	 */
 	if (host_wr != sym_rd) {
 		printf ("CACHE TEST FAILED: host wrote %d, chip read %d.\n",
 			(int) host_wr, (int) sym_rd);
 		err |= 1;
 	}
 	if (host_rd != sym_wr) {
 		printf ("CACHE TEST FAILED: chip wrote %d, host read %d.\n",
 			(int) sym_wr, (int) host_rd);
 		err |= 2;
 	}
 	if (sym_bk != sym_wr) {
 		printf ("CACHE TEST FAILED: chip wrote %d, read back %d.\n",
 			(int) sym_wr, (int) sym_bk);
 		err |= 4;
 	}
 
 	return (err);
 }
 
 /*
  *  Determine the chip's clock frequency.
  *
  *  This is essential for the negotiation of the synchronous
  *  transfer rate.
  *
  *  Note: we have to return the correct value.
  *  THERE IS NO SAFE DEFAULT VALUE.
  *
  *  Most NCR/SYMBIOS boards are delivered with a 40 Mhz clock.
  *  53C860 and 53C875 rev. 1 support fast20 transfers but
  *  do not have a clock doubler and so are provided with a
  *  80 MHz clock. All other fast20 boards incorporate a doubler
  *  and so should be delivered with a 40 MHz clock.
  *  The recent fast40 chips (895/896/895A/1010) use a 40 Mhz base
  *  clock and provide a clock quadrupler (160 Mhz).
  */
 
 /*
  *  Select SCSI clock frequency
  */
 static void sym_selectclock(hcb_p np, u_char scntl3)
 {
 	/*
 	 *  If multiplier not present or not selected, leave here.
 	 */
 	if (np->multiplier <= 1) {
 		OUTB(nc_scntl3,	scntl3);
 		return;
 	}
 
 	if (sym_verbose >= 2)
 		printf ("%s: enabling clock multiplier\n", sym_name(np));
 
 	OUTB(nc_stest1, DBLEN);	   /* Enable clock multiplier		  */
 	/*
 	 *  Wait for the LCKFRQ bit to be set if supported by the chip.
 	 *  Otherwise wait 20 micro-seconds.
 	 */
 	if (np->features & FE_LCKFRQ) {
 		int i = 20;
 		while (!(INB(nc_stest4) & LCKFRQ) && --i > 0)
 			UDELAY (20);
 		if (!i)
 			printf("%s: the chip cannot lock the frequency\n",
 				sym_name(np));
 	} else
 		UDELAY (20);
 	OUTB(nc_stest3, HSC);		/* Halt the scsi clock		*/
 	OUTB(nc_scntl3,	scntl3);
 	OUTB(nc_stest1, (DBLEN|DBLSEL));/* Select clock multiplier	*/
 	OUTB(nc_stest3, 0x00);		/* Restart scsi clock 		*/
 }
 
 /*
  *  calculate SCSI clock frequency (in KHz)
  */
 static unsigned getfreq (hcb_p np, int gen)
 {
 	unsigned int ms = 0;
 	unsigned int f;
 
 	/*
 	 * Measure GEN timer delay in order
 	 * to calculate SCSI clock frequency
 	 *
 	 * This code will never execute too
 	 * many loop iterations (if DELAY is
 	 * reasonably correct). It could get
 	 * too low a delay (too high a freq.)
 	 * if the CPU is slow executing the
 	 * loop for some reason (an NMI, for
 	 * example). For this reason we will
 	 * if multiple measurements are to be
 	 * performed trust the higher delay
 	 * (lower frequency returned).
 	 */
 	OUTW (nc_sien , 0);	/* mask all scsi interrupts */
 	(void) INW (nc_sist);	/* clear pending scsi interrupt */
 	OUTB (nc_dien , 0);	/* mask all dma interrupts */
 	(void) INW (nc_sist);	/* another one, just to be sure :) */
 	OUTB (nc_scntl3, 4);	/* set pre-scaler to divide by 3 */
 	OUTB (nc_stime1, 0);	/* disable general purpose timer */
 	OUTB (nc_stime1, gen);	/* set to nominal delay of 1<<gen * 125us */
 	while (!(INW(nc_sist) & GEN) && ms++ < 100000)
 		UDELAY (1000);	/* count ms */
 	OUTB (nc_stime1, 0);	/* disable general purpose timer */
  	/*
  	 * set prescaler to divide by whatever 0 means
  	 * 0 ought to choose divide by 2, but appears
  	 * to set divide by 3.5 mode in my 53c810 ...
  	 */
  	OUTB (nc_scntl3, 0);
 
   	/*
  	 * adjust for prescaler, and convert into KHz
   	 */
 	f = ms ? ((1 << gen) * 4340) / ms : 0;
 
 	if (sym_verbose >= 2)
 		printf ("%s: Delay (GEN=%d): %u msec, %u KHz\n",
 			sym_name(np), gen, ms, f);
 
 	return f;
 }
 
 static unsigned sym_getfreq (hcb_p np)
 {
 	u_int f1, f2;
 	int gen = 11;
 
 	(void) getfreq (np, gen);	/* throw away first result */
 	f1 = getfreq (np, gen);
 	f2 = getfreq (np, gen);
 	if (f1 > f2) f1 = f2;		/* trust lower result	*/
 	return f1;
 }
 
 /*
  *  Get/probe chip SCSI clock frequency
  */
 static void sym_getclock (hcb_p np, int mult)
 {
 	unsigned char scntl3 = np->sv_scntl3;
 	unsigned char stest1 = np->sv_stest1;
 	unsigned f1;
 
 	/*
 	 *  For the C10 core, assume 40 MHz.
 	 */
 	if (np->features & FE_C10) {
 		np->multiplier = mult;
 		np->clock_khz = 40000 * mult;
 		return;
 	}
 
 	np->multiplier = 1;
 	f1 = 40000;
 	/*
 	 *  True with 875/895/896/895A with clock multiplier selected
 	 */
 	if (mult > 1 && (stest1 & (DBLEN+DBLSEL)) == DBLEN+DBLSEL) {
 		if (sym_verbose >= 2)
 			printf ("%s: clock multiplier found\n", sym_name(np));
 		np->multiplier = mult;
 	}
 
 	/*
 	 *  If multiplier not found or scntl3 not 7,5,3,
 	 *  reset chip and get frequency from general purpose timer.
 	 *  Otherwise trust scntl3 BIOS setting.
 	 */
 	if (np->multiplier != mult || (scntl3 & 7) < 3 || !(scntl3 & 1)) {
 		OUTB (nc_stest1, 0);		/* make sure doubler is OFF */
 		f1 = sym_getfreq (np);
 
 		if (sym_verbose)
 			printf ("%s: chip clock is %uKHz\n", sym_name(np), f1);
 
 		if	(f1 <	45000)		f1 =  40000;
 		else if (f1 <	55000)		f1 =  50000;
 		else				f1 =  80000;
 
 		if (f1 < 80000 && mult > 1) {
 			if (sym_verbose >= 2)
 				printf ("%s: clock multiplier assumed\n",
 					sym_name(np));
 			np->multiplier	= mult;
 		}
 	} else {
 		if	((scntl3 & 7) == 3)	f1 =  40000;
 		else if	((scntl3 & 7) == 5)	f1 =  80000;
 		else 				f1 = 160000;
 
 		f1 /= np->multiplier;
 	}
 
 	/*
 	 *  Compute controller synchronous parameters.
 	 */
 	f1		*= np->multiplier;
 	np->clock_khz	= f1;
 }
 
 /*
  *  Get/probe PCI clock frequency
  */
 static int sym_getpciclock (hcb_p np)
 {
 	int f = 0;
 
 	/*
 	 *  For the C1010-33, this doesn't work.
 	 *  For the C1010-66, this will be tested when I'll have
 	 *  such a beast to play with.
 	 */
 	if (!(np->features & FE_C10)) {
 		OUTB (nc_stest1, SCLK);	/* Use the PCI clock as SCSI clock */
 		f = (int) sym_getfreq (np);
 		OUTB (nc_stest1, 0);
 	}
 	np->pciclk_khz = f;
 
 	return f;
 }
 
 /*============= DRIVER ACTION/COMPLETION ====================*/
 
 /*
  *  Print something that tells about extended errors.
  */
 static void sym_print_xerr(ccb_p cp, int x_status)
 {
 	if (x_status & XE_PARITY_ERR) {
 		PRINT_ADDR(cp);
 		printf ("unrecovered SCSI parity error.\n");
 	}
 	if (x_status & XE_EXTRA_DATA) {
 		PRINT_ADDR(cp);
 		printf ("extraneous data discarded.\n");
 	}
 	if (x_status & XE_BAD_PHASE) {
 		PRINT_ADDR(cp);
 		printf ("illegal scsi phase (4/5).\n");
 	}
 	if (x_status & XE_SODL_UNRUN) {
 		PRINT_ADDR(cp);
 		printf ("ODD transfer in DATA OUT phase.\n");
 	}
 	if (x_status & XE_SWIDE_OVRUN) {
 		PRINT_ADDR(cp);
 		printf ("ODD transfer in DATA IN phase.\n");
 	}
 }
 
 /*
  *  Choose the more appropriate CAM status if
  *  the IO encountered an extended error.
  */
 static int sym_xerr_cam_status(int cam_status, int x_status)
 {
 	if (x_status) {
 		if	(x_status & XE_PARITY_ERR)
 			cam_status = CAM_UNCOR_PARITY;
 		else if	(x_status &(XE_EXTRA_DATA|XE_SODL_UNRUN|XE_SWIDE_OVRUN))
 			cam_status = CAM_DATA_RUN_ERR;
 		else if	(x_status & XE_BAD_PHASE)
 			cam_status = CAM_REQ_CMP_ERR;
 		else
 			cam_status = CAM_REQ_CMP_ERR;
 	}
 	return cam_status;
 }
 
 /*
  *  Complete execution of a SCSI command with extented
  *  error, SCSI status error, or having been auto-sensed.
  *
  *  The SCRIPTS processor is not running there, so we
  *  can safely access IO registers and remove JOBs from
  *  the START queue.
  *  SCRATCHA is assumed to have been loaded with STARTPOS
  *  before the SCRIPTS called the C code.
  */
 static void sym_complete_error (hcb_p np, ccb_p cp)
 {
 	struct ccb_scsiio *csio;
 	u_int cam_status;
 	int i, sense_returned;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Paranoid check. :)
 	 */
 	if (!cp || !cp->cam_ccb)
 		return;
 
 	if (DEBUG_FLAGS & (DEBUG_TINY|DEBUG_RESULT)) {
 		printf ("CCB=%lx STAT=%x/%x/%x DEV=%d/%d\n", (unsigned long)cp,
 			cp->host_status, cp->ssss_status, cp->host_flags,
 			cp->target, cp->lun);
 		MDELAY(100);
 	}
 
 	/*
 	 *  Get CAM command pointer.
 	 */
 	csio = &cp->cam_ccb->csio;
 
 	/*
 	 *  Check for extended errors.
 	 */
 	if (cp->xerr_status) {
 		if (sym_verbose)
 			sym_print_xerr(cp, cp->xerr_status);
 		if (cp->host_status == HS_COMPLETE)
 			cp->host_status = HS_COMP_ERR;
 	}
 
 	/*
 	 *  Calculate the residual.
 	 */
 	csio->sense_resid = 0;
 	csio->resid = sym_compute_residual(np, cp);
 
 	if (!SYM_CONF_RESIDUAL_SUPPORT) {/* If user does not want residuals */
 		csio->resid  = 0;	/* throw them away. :)		   */
 		cp->sv_resid = 0;
 	}
 
 	if (cp->host_flags & HF_SENSE) {		/* Auto sense     */
 		csio->scsi_status = cp->sv_scsi_status;	/* Restore status */
 		csio->sense_resid = csio->resid;	/* Swap residuals */
 		csio->resid       = cp->sv_resid;
 		cp->sv_resid	  = 0;
 		if (sym_verbose && cp->sv_xerr_status)
 			sym_print_xerr(cp, cp->sv_xerr_status);
 		if (cp->host_status == HS_COMPLETE &&
 		    cp->ssss_status == S_GOOD &&
 		    cp->xerr_status == 0) {
 			cam_status = sym_xerr_cam_status(CAM_SCSI_STATUS_ERROR,
 							 cp->sv_xerr_status);
 			cam_status |= CAM_AUTOSNS_VALID;
 			/*
 			 *  Bounce back the sense data to user and
 			 *  fix the residual.
 			 */
 			bzero(&csio->sense_data, sizeof(csio->sense_data));
 			sense_returned = SYM_SNS_BBUF_LEN - csio->sense_resid;
 			if (sense_returned < csio->sense_len)
 				csio->sense_resid = csio->sense_len -
 				    sense_returned;
 			else
 				csio->sense_resid = 0;
 			bcopy(cp->sns_bbuf, &csio->sense_data,
 			    MIN(csio->sense_len, sense_returned));
 #if 0
 			/*
 			 *  If the device reports a UNIT ATTENTION condition
 			 *  due to a RESET condition, we should consider all
 			 *  disconnect CCBs for this unit as aborted.
 			 */
 			if (1) {
 				u_char *p;
 				p  = (u_char *) csio->sense_data;
 				if (p[0]==0x70 && p[2]==0x6 && p[12]==0x29)
 					sym_clear_tasks(np, CAM_REQ_ABORTED,
 							cp->target,cp->lun, -1);
 			}
 #endif
 		}
 		else
 			cam_status = CAM_AUTOSENSE_FAIL;
 	}
 	else if (cp->host_status == HS_COMPLETE) {	/* Bad SCSI status */
 		csio->scsi_status = cp->ssss_status;
 		cam_status = CAM_SCSI_STATUS_ERROR;
 	}
 	else if (cp->host_status == HS_SEL_TIMEOUT)	/* Selection timeout */
 		cam_status = CAM_SEL_TIMEOUT;
 	else if (cp->host_status == HS_UNEXPECTED)	/* Unexpected BUS FREE*/
 		cam_status = CAM_UNEXP_BUSFREE;
 	else {						/* Extended error */
 		if (sym_verbose) {
 			PRINT_ADDR(cp);
 			printf ("COMMAND FAILED (%x %x %x).\n",
 				cp->host_status, cp->ssss_status,
 				cp->xerr_status);
 		}
 		csio->scsi_status = cp->ssss_status;
 		/*
 		 *  Set the most appropriate value for CAM status.
 		 */
 		cam_status = sym_xerr_cam_status(CAM_REQ_CMP_ERR,
 						 cp->xerr_status);
 	}
 
 	/*
 	 *  Dequeue all queued CCBs for that device
 	 *  not yet started by SCRIPTS.
 	 */
 	i = (INL (nc_scratcha) - np->squeue_ba) / 4;
 	(void) sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1);
 
 	/*
 	 *  Restart the SCRIPTS processor.
 	 */
 	OUTL_DSP (SCRIPTA_BA (np, start));
 
 	/*
 	 *  Synchronize DMA map if needed.
 	 */
 	if (cp->dmamapped) {
 		bus_dmamap_sync(np->data_dmat, cp->dmamap,
 			(cp->dmamapped == SYM_DMA_READ ?
 				BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE));
 	}
 	/*
 	 *  Add this one to the COMP queue.
 	 *  Complete all those commands with either error
 	 *  or requeue condition.
 	 */
 	sym_set_cam_status((union ccb *) csio, cam_status);
 	sym_remque(&cp->link_ccbq);
 	sym_insque_head(&cp->link_ccbq, &np->comp_ccbq);
 	sym_flush_comp_queue(np, 0);
 }
 
 /*
  *  Complete execution of a successful SCSI command.
  *
  *  Only successful commands go to the DONE queue,
  *  since we need to have the SCRIPTS processor
  *  stopped on any error condition.
  *  The SCRIPTS processor is running while we are
  *  completing successful commands.
  */
 static void sym_complete_ok (hcb_p np, ccb_p cp)
 {
 	struct ccb_scsiio *csio;
 	tcb_p tp;
 	lcb_p lp;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Paranoid check. :)
 	 */
 	if (!cp || !cp->cam_ccb)
 		return;
 	assert (cp->host_status == HS_COMPLETE);
 
 	/*
 	 *  Get command, target and lun pointers.
 	 */
 	csio = &cp->cam_ccb->csio;
 	tp = &np->target[cp->target];
 	lp = sym_lp(tp, cp->lun);
 
 	/*
 	 *  Assume device discovered on first success.
 	 */
 	if (!lp)
 		sym_set_bit(tp->lun_map, cp->lun);
 
 	/*
 	 *  If all data have been transferred, given than no
 	 *  extended error did occur, there is no residual.
 	 */
 	csio->resid = 0;
 	if (cp->phys.head.lastp != cp->phys.head.goalp)
 		csio->resid = sym_compute_residual(np, cp);
 
 	/*
 	 *  Wrong transfer residuals may be worse than just always
 	 *  returning zero. User can disable this feature from
 	 *  sym_conf.h. Residual support is enabled by default.
 	 */
 	if (!SYM_CONF_RESIDUAL_SUPPORT)
 		csio->resid  = 0;
 
 	/*
 	 *  Synchronize DMA map if needed.
 	 */
 	if (cp->dmamapped) {
 		bus_dmamap_sync(np->data_dmat, cp->dmamap,
 			(cp->dmamapped == SYM_DMA_READ ?
 				BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE));
 	}
 	/*
 	 *  Set status and complete the command.
 	 */
 	csio->scsi_status = cp->ssss_status;
 	sym_set_cam_status((union ccb *) csio, CAM_REQ_CMP);
 	sym_xpt_done(np, (union ccb *) csio, cp);
 	sym_free_ccb(np, cp);
 }
 
 /*
  *  Our callout handler
  */
 static void sym_callout(void *arg)
 {
 	union ccb *ccb = (union ccb *) arg;
 	hcb_p np = ccb->ccb_h.sym_hcb_ptr;
 
 	/*
 	 *  Check that the CAM CCB is still queued.
 	 */
 	if (!np)
 		return;
 
 	SYM_LOCK();
 
 	switch(ccb->ccb_h.func_code) {
 	case XPT_SCSI_IO:
 		(void) sym_abort_scsiio(np, ccb, 1);
 		break;
 	default:
 		break;
 	}
 
 	SYM_UNLOCK();
 }
 
 /*
  *  Abort an SCSI IO.
  */
 static int sym_abort_scsiio(hcb_p np, union ccb *ccb, int timed_out)
 {
 	ccb_p cp;
 	SYM_QUEHEAD *qp;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Look up our CCB control block.
 	 */
 	cp = NULL;
 	FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
 		ccb_p cp2 = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 		if (cp2->cam_ccb == ccb) {
 			cp = cp2;
 			break;
 		}
 	}
 	if (!cp || cp->host_status == HS_WAIT)
 		return -1;
 
 	/*
 	 *  If a previous abort didn't succeed in time,
 	 *  perform a BUS reset.
 	 */
 	if (cp->to_abort) {
 		sym_reset_scsi_bus(np, 1);
 		return 0;
 	}
 
 	/*
 	 *  Mark the CCB for abort and allow time for.
 	 */
 	cp->to_abort = timed_out ? 2 : 1;
 	callout_reset(&cp->ch, 10 * hz, sym_callout, (caddr_t) ccb);
 
 	/*
 	 *  Tell the SCRIPTS processor to stop and synchronize with us.
 	 */
 	np->istat_sem = SEM;
 	OUTB (nc_istat, SIGP|SEM);
 	return 0;
 }
 
 /*
  *  Reset a SCSI device (all LUNs of a target).
  */
 static void sym_reset_dev(hcb_p np, union ccb *ccb)
 {
 	tcb_p tp;
 	struct ccb_hdr *ccb_h = &ccb->ccb_h;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	if (ccb_h->target_id   == np->myaddr ||
 	    ccb_h->target_id   >= SYM_CONF_MAX_TARGET ||
 	    ccb_h->target_lun  >= SYM_CONF_MAX_LUN) {
 		sym_xpt_done2(np, ccb, CAM_DEV_NOT_THERE);
 		return;
 	}
 
 	tp = &np->target[ccb_h->target_id];
 
 	tp->to_reset = 1;
 	sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 
 	np->istat_sem = SEM;
 	OUTB (nc_istat, SIGP|SEM);
 }
 
 /*
  *  SIM action entry point.
  */
 static void sym_action(struct cam_sim *sim, union ccb *ccb)
 {
 	hcb_p	np;
 	tcb_p	tp;
 	lcb_p	lp;
 	ccb_p	cp;
 	int 	tmp;
 	u_char	idmsg, *msgptr;
 	u_int   msglen;
 	struct	ccb_scsiio *csio;
 	struct	ccb_hdr  *ccb_h;
 
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("sym_action\n"));
 
 	/*
 	 *  Retrieve our controller data structure.
 	 */
 	np = (hcb_p) cam_sim_softc(sim);
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  The common case is SCSI IO.
 	 *  We deal with other ones elsewhere.
 	 */
 	if (ccb->ccb_h.func_code != XPT_SCSI_IO) {
 		sym_action2(sim, ccb);
 		return;
 	}
 	csio  = &ccb->csio;
 	ccb_h = &csio->ccb_h;
 
 	/*
 	 *  Work around races.
 	 */
 	if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
 		xpt_done(ccb);
 		return;
 	}
 
 	/*
 	 *  Minimal checkings, so that we will not
 	 *  go outside our tables.
 	 */
 	if (ccb_h->target_id   == np->myaddr ||
 	    ccb_h->target_id   >= SYM_CONF_MAX_TARGET ||
 	    ccb_h->target_lun  >= SYM_CONF_MAX_LUN) {
 		sym_xpt_done2(np, ccb, CAM_DEV_NOT_THERE);
 		return;
         }
 
 	/*
 	 *  Retrieve the target and lun descriptors.
 	 */
 	tp = &np->target[ccb_h->target_id];
 	lp = sym_lp(tp, ccb_h->target_lun);
 
 	/*
 	 *  Complete the 1st INQUIRY command with error
 	 *  condition if the device is flagged NOSCAN
 	 *  at BOOT in the NVRAM. This may speed up
 	 *  the boot and maintain coherency with BIOS
 	 *  device numbering. Clearing the flag allows
 	 *  user to rescan skipped devices later.
 	 *  We also return error for devices not flagged
 	 *  for SCAN LUNS in the NVRAM since some mono-lun
 	 *  devices behave badly when asked for some non
 	 *  zero LUN. Btw, this is an absolute hack.:-)
 	 */
 	if (!(ccb_h->flags & CAM_CDB_PHYS) &&
 	    (0x12 == ((ccb_h->flags & CAM_CDB_POINTER) ?
 		  csio->cdb_io.cdb_ptr[0] : csio->cdb_io.cdb_bytes[0]))) {
 		if ((tp->usrflags & SYM_SCAN_BOOT_DISABLED) ||
 		    ((tp->usrflags & SYM_SCAN_LUNS_DISABLED) &&
 		     ccb_h->target_lun != 0)) {
 			tp->usrflags &= ~SYM_SCAN_BOOT_DISABLED;
 			sym_xpt_done2(np, ccb, CAM_DEV_NOT_THERE);
 			return;
 		}
 	}
 
 	/*
 	 *  Get a control block for this IO.
 	 */
 	tmp = ((ccb_h->flags & CAM_TAG_ACTION_VALID) != 0);
 	cp = sym_get_ccb(np, ccb_h->target_id, ccb_h->target_lun, tmp);
 	if (!cp) {
 		sym_xpt_done2(np, ccb, CAM_RESRC_UNAVAIL);
 		return;
 	}
 
 	/*
 	 *  Keep track of the IO in our CCB.
 	 */
 	cp->cam_ccb = ccb;
 
 	/*
 	 *  Build the IDENTIFY message.
 	 */
 	idmsg = M_IDENTIFY | cp->lun;
 	if (cp->tag != NO_TAG || (lp && (lp->current_flags & SYM_DISC_ENABLED)))
 		idmsg |= 0x40;
 
 	msgptr = cp->scsi_smsg;
 	msglen = 0;
 	msgptr[msglen++] = idmsg;
 
 	/*
 	 *  Build the tag message if present.
 	 */
 	if (cp->tag != NO_TAG) {
 		u_char order = csio->tag_action;
 
 		switch(order) {
 		case M_ORDERED_TAG:
 			break;
 		case M_HEAD_TAG:
 			break;
 		default:
 			order = M_SIMPLE_TAG;
 		}
 		msgptr[msglen++] = order;
 
 		/*
 		 *  For less than 128 tags, actual tags are numbered
 		 *  1,3,5,..2*MAXTAGS+1,since we may have to deal
 		 *  with devices that have problems with #TAG 0 or too
 		 *  great #TAG numbers. For more tags (up to 256),
 		 *  we use directly our tag number.
 		 */
 #if SYM_CONF_MAX_TASK > (512/4)
 		msgptr[msglen++] = cp->tag;
 #else
 		msgptr[msglen++] = (cp->tag << 1) + 1;
 #endif
 	}
 
 	/*
 	 *  Build a negotiation message if needed.
 	 *  (nego_status is filled by sym_prepare_nego())
 	 */
 	cp->nego_status = 0;
 	if (tp->tinfo.current.width   != tp->tinfo.goal.width  ||
 	    tp->tinfo.current.period  != tp->tinfo.goal.period ||
 	    tp->tinfo.current.offset  != tp->tinfo.goal.offset ||
 	    tp->tinfo.current.options != tp->tinfo.goal.options) {
 		if (!tp->nego_cp && lp)
 			msglen += sym_prepare_nego(np, cp, 0, msgptr + msglen);
 	}
 
 	/*
 	 *  Fill in our ccb
 	 */
 
 	/*
 	 *  Startqueue
 	 */
 	cp->phys.head.go.start   = cpu_to_scr(SCRIPTA_BA (np, select));
 	cp->phys.head.go.restart = cpu_to_scr(SCRIPTA_BA (np, resel_dsa));
 
 	/*
 	 *  select
 	 */
 	cp->phys.select.sel_id		= cp->target;
 	cp->phys.select.sel_scntl3	= tp->head.wval;
 	cp->phys.select.sel_sxfer	= tp->head.sval;
 	cp->phys.select.sel_scntl4	= tp->head.uval;
 
 	/*
 	 *  message
 	 */
 	cp->phys.smsg.addr	= cpu_to_scr(CCB_BA (cp, scsi_smsg));
 	cp->phys.smsg.size	= cpu_to_scr(msglen);
 
 	/*
 	 *  command
 	 */
 	if (sym_setup_cdb(np, csio, cp) < 0) {
 		sym_xpt_done(np, ccb, cp);
 		sym_free_ccb(np, cp);
 		return;
 	}
 
 	/*
 	 *  status
 	 */
 #if	0	/* Provision */
 	cp->actualquirks	= tp->quirks;
 #endif
 	cp->actualquirks	= SYM_QUIRK_AUTOSAVE;
 	cp->host_status		= cp->nego_status ? HS_NEGOTIATE : HS_BUSY;
 	cp->ssss_status		= S_ILLEGAL;
 	cp->xerr_status		= 0;
 	cp->host_flags		= 0;
 	cp->extra_bytes		= 0;
 
 	/*
 	 *  extreme data pointer.
 	 *  shall be positive, so -1 is lower than lowest.:)
 	 */
 	cp->ext_sg  = -1;
 	cp->ext_ofs = 0;
 
 	/*
 	 *  Build the data descriptor block
 	 *  and start the IO.
 	 */
 	sym_setup_data_and_start(np, csio, cp);
 }
 
 /*
  *  Setup buffers and pointers that address the CDB.
  *  I bet, physical CDBs will never be used on the planet,
  *  since they can be bounced without significant overhead.
  */
 static int sym_setup_cdb(hcb_p np, struct ccb_scsiio *csio, ccb_p cp)
 {
 	struct ccb_hdr *ccb_h;
 	u32	cmd_ba;
 	int	cmd_len;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	ccb_h = &csio->ccb_h;
 
 	/*
 	 *  CDB is 16 bytes max.
 	 */
 	if (csio->cdb_len > sizeof(cp->cdb_buf)) {
 		sym_set_cam_status(cp->cam_ccb, CAM_REQ_INVALID);
 		return -1;
 	}
 	cmd_len = csio->cdb_len;
 
 	if (ccb_h->flags & CAM_CDB_POINTER) {
 		/* CDB is a pointer */
 		if (!(ccb_h->flags & CAM_CDB_PHYS)) {
 			/* CDB pointer is virtual */
 			bcopy(csio->cdb_io.cdb_ptr, cp->cdb_buf, cmd_len);
 			cmd_ba = CCB_BA (cp, cdb_buf[0]);
 		} else {
 			/* CDB pointer is physical */
 #if 0
 			cmd_ba = ((u32)csio->cdb_io.cdb_ptr) & 0xffffffff;
 #else
 			sym_set_cam_status(cp->cam_ccb, CAM_REQ_INVALID);
 			return -1;
 #endif
 		}
 	} else {
 		/* CDB is in the CAM ccb (buffer) */
 		bcopy(csio->cdb_io.cdb_bytes, cp->cdb_buf, cmd_len);
 		cmd_ba = CCB_BA (cp, cdb_buf[0]);
 	}
 
 	cp->phys.cmd.addr	= cpu_to_scr(cmd_ba);
 	cp->phys.cmd.size	= cpu_to_scr(cmd_len);
 
 	return 0;
 }
 
 /*
  *  Set up data pointers used by SCRIPTS.
  */
 static void __inline
 sym_setup_data_pointers(hcb_p np, ccb_p cp, int dir)
 {
 	u32 lastp, goalp;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  No segments means no data.
 	 */
 	if (!cp->segments)
 		dir = CAM_DIR_NONE;
 
 	/*
 	 *  Set the data pointer.
 	 */
 	switch(dir) {
 	case CAM_DIR_OUT:
 		goalp = SCRIPTA_BA (np, data_out2) + 8;
 		lastp = goalp - 8 - (cp->segments * (2*4));
 		break;
 	case CAM_DIR_IN:
 		cp->host_flags |= HF_DATA_IN;
 		goalp = SCRIPTA_BA (np, data_in2) + 8;
 		lastp = goalp - 8 - (cp->segments * (2*4));
 		break;
 	case CAM_DIR_NONE:
 	default:
 		lastp = goalp = SCRIPTB_BA (np, no_data);
 		break;
 	}
 
 	cp->phys.head.lastp = cpu_to_scr(lastp);
 	cp->phys.head.goalp = cpu_to_scr(goalp);
 	cp->phys.head.savep = cpu_to_scr(lastp);
 	cp->startp	    = cp->phys.head.savep;
 }
 
 /*
  *  Call back routine for the DMA map service.
  *  If bounce buffers are used (why ?), we may sleep and then
  *  be called there in another context.
  */
 static void
 sym_execute_ccb(void *arg, bus_dma_segment_t *psegs, int nsegs, int error)
 {
 	ccb_p	cp;
 	hcb_p	np;
 	union	ccb *ccb;
 
 	cp  = (ccb_p) arg;
 	ccb = cp->cam_ccb;
 	np  = (hcb_p) cp->arg;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Deal with weird races.
 	 */
 	if (sym_get_cam_status(ccb) != CAM_REQ_INPROG)
 		goto out_abort;
 
 	/*
 	 *  Deal with weird errors.
 	 */
 	if (error) {
 		cp->dmamapped = 0;
 		sym_set_cam_status(cp->cam_ccb, CAM_REQ_ABORTED);
 		goto out_abort;
 	}
 
 	/*
 	 *  Build the data descriptor for the chip.
 	 */
 	if (nsegs) {
 		int retv;
 		/* 896 rev 1 requires to be careful about boundaries */
 		if (np->device_id == PCI_ID_SYM53C896 && np->revision_id <= 1)
 			retv = sym_scatter_sg_physical(np, cp, psegs, nsegs);
 		else
 			retv = sym_fast_scatter_sg_physical(np,cp, psegs,nsegs);
 		if (retv < 0) {
 			sym_set_cam_status(cp->cam_ccb, CAM_REQ_TOO_BIG);
 			goto out_abort;
 		}
 	}
 
 	/*
 	 *  Synchronize the DMA map only if we have
 	 *  actually mapped the data.
 	 */
 	if (cp->dmamapped) {
 		bus_dmamap_sync(np->data_dmat, cp->dmamap,
 			(cp->dmamapped == SYM_DMA_READ ?
 				BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE));
 	}
 
 	/*
 	 *  Set host status to busy state.
 	 *  May have been set back to HS_WAIT to avoid a race.
 	 */
 	cp->host_status	= cp->nego_status ? HS_NEGOTIATE : HS_BUSY;
 
 	/*
 	 *  Set data pointers.
 	 */
 	sym_setup_data_pointers(np, cp,  (ccb->ccb_h.flags & CAM_DIR_MASK));
 
 	/*
 	 *  Enqueue this IO in our pending queue.
 	 */
 	sym_enqueue_cam_ccb(cp);
 
 	/*
 	 *  When `#ifed 1', the code below makes the driver
 	 *  panic on the first attempt to write to a SCSI device.
 	 *  It is the first test we want to do after a driver
 	 *  change that does not seem obviously safe. :)
 	 */
 #if 0
 	switch (cp->cdb_buf[0]) {
 	case 0x0A: case 0x2A: case 0xAA:
 		panic("XXXXXXXXXXXXX WRITE NOT YET ALLOWED XXXXXXXXXXXXXX\n");
 		MDELAY(10000);
 		break;
 	default:
 		break;
 	}
 #endif
 	/*
 	 *  Activate this job.
 	 */
 	sym_put_start_queue(np, cp);
 	return;
 out_abort:
 	sym_xpt_done(np, ccb, cp);
 	sym_free_ccb(np, cp);
 }
 
 /*
  *  How complex it gets to deal with the data in CAM.
  *  The Bus Dma stuff makes things still more complex.
  */
 static void
 sym_setup_data_and_start(hcb_p np, struct ccb_scsiio *csio, ccb_p cp)
 {
 	struct ccb_hdr *ccb_h;
 	int dir, retv;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	ccb_h = &csio->ccb_h;
 
 	/*
 	 *  Now deal with the data.
 	 */
 	cp->data_len = csio->dxfer_len;
 	cp->arg      = np;
 
 	/*
 	 *  No direction means no data.
 	 */
 	dir = (ccb_h->flags & CAM_DIR_MASK);
 	if (dir == CAM_DIR_NONE) {
 		sym_execute_ccb(cp, NULL, 0, 0);
 		return;
 	}
 
 	cp->dmamapped = (dir == CAM_DIR_IN) ?  SYM_DMA_READ : SYM_DMA_WRITE;
 	retv = bus_dmamap_load_ccb(np->data_dmat, cp->dmamap,
 			       (union ccb *)csio, sym_execute_ccb, cp, 0);
 	if (retv == EINPROGRESS) {
 		cp->host_status	= HS_WAIT;
 		xpt_freeze_simq(np->sim, 1);
 		csio->ccb_h.status |= CAM_RELEASE_SIMQ;
 	}
 }
 
 /*
  *  Move the scatter list to our data block.
  */
 static int
 sym_fast_scatter_sg_physical(hcb_p np, ccb_p cp,
 			     bus_dma_segment_t *psegs, int nsegs)
 {
 	struct sym_tblmove *data;
 	bus_dma_segment_t *psegs2;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	if (nsegs > SYM_CONF_MAX_SG)
 		return -1;
 
 	data   = &cp->phys.data[SYM_CONF_MAX_SG-1];
 	psegs2 = &psegs[nsegs-1];
 	cp->segments = nsegs;
 
 	while (1) {
 		data->addr = cpu_to_scr(psegs2->ds_addr);
 		data->size = cpu_to_scr(psegs2->ds_len);
 		if (DEBUG_FLAGS & DEBUG_SCATTER) {
 			printf ("%s scatter: paddr=%lx len=%ld\n",
 				sym_name(np), (long) psegs2->ds_addr,
 				(long) psegs2->ds_len);
 		}
 		if (psegs2 != psegs) {
 			--data;
 			--psegs2;
 			continue;
 		}
 		break;
 	}
 	return 0;
 }
 
 /*
  *  Scatter a SG list with physical addresses into bus addressable chunks.
  */
 static int
 sym_scatter_sg_physical(hcb_p np, ccb_p cp, bus_dma_segment_t *psegs, int nsegs)
 {
 	u_long	ps, pe, pn;
 	u_long	k;
 	int s, t;
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	s  = SYM_CONF_MAX_SG - 1;
 	t  = nsegs - 1;
 	ps = psegs[t].ds_addr;
 	pe = ps + psegs[t].ds_len;
 
 	while (s >= 0) {
 		pn = rounddown2(pe - 1, SYM_CONF_DMA_BOUNDARY);
 		if (pn <= ps)
 			pn = ps;
 		k = pe - pn;
 		if (DEBUG_FLAGS & DEBUG_SCATTER) {
 			printf ("%s scatter: paddr=%lx len=%ld\n",
 				sym_name(np), pn, k);
 		}
 		cp->phys.data[s].addr = cpu_to_scr(pn);
 		cp->phys.data[s].size = cpu_to_scr(k);
 		--s;
 		if (pn == ps) {
 			if (--t < 0)
 				break;
 			ps = psegs[t].ds_addr;
 			pe = ps + psegs[t].ds_len;
 		}
 		else
 			pe = pn;
 	}
 
 	cp->segments = SYM_CONF_MAX_SG - 1 - s;
 
 	return t >= 0 ? -1 : 0;
 }
 
 /*
  *  SIM action for non performance critical stuff.
  */
 static void sym_action2(struct cam_sim *sim, union ccb *ccb)
 {
 	union ccb *abort_ccb;
 	struct ccb_hdr *ccb_h;
 	struct ccb_pathinq *cpi;
 	struct ccb_trans_settings *cts;
 	struct sym_trans *tip;
 	hcb_p	np;
 	tcb_p	tp;
 	lcb_p	lp;
 	u_char dflags;
 
 	/*
 	 *  Retrieve our controller data structure.
 	 */
 	np = (hcb_p) cam_sim_softc(sim);
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	ccb_h = &ccb->ccb_h;
 
 	switch (ccb_h->func_code) {
 	case XPT_SET_TRAN_SETTINGS:
 		cts  = &ccb->cts;
 		tp = &np->target[ccb_h->target_id];
 
 		/*
 		 *  Update SPI transport settings in TARGET control block.
 		 *  Update SCSI device settings in LUN control block.
 		 */
 		lp = sym_lp(tp, ccb_h->target_lun);
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS) {
 			sym_update_trans(np, &tp->tinfo.goal, cts);
 			if (lp)
 				sym_update_dflags(np, &lp->current_flags, cts);
 		}
 		if (cts->type == CTS_TYPE_USER_SETTINGS) {
 			sym_update_trans(np, &tp->tinfo.user, cts);
 			if (lp)
 				sym_update_dflags(np, &lp->user_flags, cts);
 		}
 
 		sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 		break;
 	case XPT_GET_TRAN_SETTINGS:
 		cts = &ccb->cts;
 		tp = &np->target[ccb_h->target_id];
 		lp = sym_lp(tp, ccb_h->target_lun);
 
 #define	cts__scsi (&cts->proto_specific.scsi)
 #define	cts__spi  (&cts->xport_specific.spi)
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS) {
 			tip = &tp->tinfo.current;
 			dflags = lp ? lp->current_flags : 0;
 		}
 		else {
 			tip = &tp->tinfo.user;
 			dflags = lp ? lp->user_flags : tp->usrflags;
 		}
 
 		cts->protocol  = PROTO_SCSI;
 		cts->transport = XPORT_SPI;
 		cts->protocol_version  = tip->scsi_version;
 		cts->transport_version = tip->spi_version;
 
 		cts__spi->sync_period = tip->period;
 		cts__spi->sync_offset = tip->offset;
 		cts__spi->bus_width   = tip->width;
 		cts__spi->ppr_options = tip->options;
 
 		cts__spi->valid = CTS_SPI_VALID_SYNC_RATE
 		                | CTS_SPI_VALID_SYNC_OFFSET
 		                | CTS_SPI_VALID_BUS_WIDTH
 		                | CTS_SPI_VALID_PPR_OPTIONS;
 
 		cts__spi->flags &= ~CTS_SPI_FLAGS_DISC_ENB;
 		if (dflags & SYM_DISC_ENABLED)
 			cts__spi->flags |= CTS_SPI_FLAGS_DISC_ENB;
 		cts__spi->valid |= CTS_SPI_VALID_DISC;
 
 		cts__scsi->flags &= ~CTS_SCSI_FLAGS_TAG_ENB;
 		if (dflags & SYM_TAGS_ENABLED)
 			cts__scsi->flags |= CTS_SCSI_FLAGS_TAG_ENB;
 		cts__scsi->valid |= CTS_SCSI_VALID_TQ;
 #undef	cts__spi
 #undef	cts__scsi
 		sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 		break;
 	case XPT_CALC_GEOMETRY:
 		cam_calc_geometry(&ccb->ccg, /*extended*/1);
 		sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 		break;
 	case XPT_PATH_INQ:
 		cpi = &ccb->cpi;
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_MDP_ABLE|PI_SDTR_ABLE|PI_TAG_ABLE;
 		if ((np->features & FE_WIDE) != 0)
 			cpi->hba_inquiry |= PI_WIDE_16;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_UNMAPPED;
 		if (np->usrflags & SYM_SCAN_TARGETS_HILO)
 			cpi->hba_misc |= PIM_SCANHILO;
 		if (np->usrflags & SYM_AVOID_BUS_RESET)
 			cpi->hba_misc |= PIM_NOBUSRESET;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = (np->features & FE_WIDE) ? 15 : 7;
 		/* Semantic problem:)LUN number max = max number of LUNs - 1 */
 		cpi->max_lun = SYM_CONF_MAX_LUN-1;
 		if (SYM_SETUP_MAX_LUN < SYM_CONF_MAX_LUN)
 			cpi->max_lun = SYM_SETUP_MAX_LUN-1;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->initiator_id = np->myaddr;
 		cpi->base_transfer_speed = 3300;
 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strlcpy(cpi->hba_vid, "Symbios", HBA_IDLEN);
 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_2;
 		cpi->transport = XPORT_SPI;
 		cpi->transport_version = 2;
 		cpi->xport_specific.spi.ppr_options = SID_SPI_CLOCK_ST;
 		if (np->features & FE_ULTRA3) {
 			cpi->transport_version = 3;
 			cpi->xport_specific.spi.ppr_options =
 			    SID_SPI_CLOCK_DT_ST;
 		}
 		cpi->maxio = SYM_CONF_MAX_SG * PAGE_SIZE;
 		sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 		break;
 	case XPT_ABORT:
 		abort_ccb = ccb->cab.abort_ccb;
 		switch(abort_ccb->ccb_h.func_code) {
 		case XPT_SCSI_IO:
 			if (sym_abort_scsiio(np, abort_ccb, 0) == 0) {
 				sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 				break;
 			}
 		default:
 			sym_xpt_done2(np, ccb, CAM_UA_ABORT);
 			break;
 		}
 		break;
 	case XPT_RESET_DEV:
 		sym_reset_dev(np, ccb);
 		break;
 	case XPT_RESET_BUS:
 		sym_reset_scsi_bus(np, 0);
 		if (sym_verbose) {
 			xpt_print_path(np->path);
 			printf("SCSI BUS reset delivered.\n");
 		}
 		sym_init (np, 1);
 		sym_xpt_done2(np, ccb, CAM_REQ_CMP);
 		break;
 	case XPT_TERM_IO:
 	default:
 		sym_xpt_done2(np, ccb, CAM_REQ_INVALID);
 		break;
 	}
 }
 
 /*
  *  Asynchronous notification handler.
  */
 static void
 sym_async(void *cb_arg, u32 code, struct cam_path *path, void *args __unused)
 {
 	hcb_p np;
 	struct cam_sim *sim;
 	u_int tn;
 	tcb_p tp;
 
 	sim = (struct cam_sim *) cb_arg;
 	np  = (hcb_p) cam_sim_softc(sim);
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	switch (code) {
 	case AC_LOST_DEVICE:
 		tn = xpt_path_target_id(path);
 		if (tn >= SYM_CONF_MAX_TARGET)
 			break;
 
 		tp = &np->target[tn];
 
 		tp->to_reset  = 0;
 		tp->head.sval = 0;
 		tp->head.wval = np->rv_scntl3;
 		tp->head.uval = 0;
 
 		tp->tinfo.current.period  = tp->tinfo.goal.period = 0;
 		tp->tinfo.current.offset  = tp->tinfo.goal.offset = 0;
 		tp->tinfo.current.width   = tp->tinfo.goal.width  = BUS_8_BIT;
 		tp->tinfo.current.options = tp->tinfo.goal.options = 0;
 
 		break;
 	default:
 		break;
 	}
 }
 
 /*
  *  Update transfer settings of a target.
  */
 static void sym_update_trans(hcb_p np, struct sym_trans *tip,
     struct ccb_trans_settings *cts)
 {
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 	/*
 	 *  Update the infos.
 	 */
 #define cts__spi (&cts->xport_specific.spi)
 	if ((cts__spi->valid & CTS_SPI_VALID_BUS_WIDTH) != 0)
 		tip->width = cts__spi->bus_width;
 	if ((cts__spi->valid & CTS_SPI_VALID_SYNC_OFFSET) != 0)
 		tip->offset = cts__spi->sync_offset;
 	if ((cts__spi->valid & CTS_SPI_VALID_SYNC_RATE) != 0)
 		tip->period = cts__spi->sync_period;
 	if ((cts__spi->valid & CTS_SPI_VALID_PPR_OPTIONS) != 0)
 		tip->options = (cts__spi->ppr_options & PPR_OPT_DT);
 	if (cts->protocol_version != PROTO_VERSION_UNSPECIFIED &&
 	    cts->protocol_version != PROTO_VERSION_UNKNOWN)
 		tip->scsi_version = cts->protocol_version;
 	if (cts->transport_version != XPORT_VERSION_UNSPECIFIED &&
 	    cts->transport_version != XPORT_VERSION_UNKNOWN)
 		tip->spi_version = cts->transport_version;
 #undef cts__spi
 	/*
 	 *  Scale against driver configuration limits.
 	 */
 	if (tip->width  > SYM_SETUP_MAX_WIDE) tip->width  = SYM_SETUP_MAX_WIDE;
 	if (tip->period && tip->offset) {
 		if (tip->offset > SYM_SETUP_MAX_OFFS) tip->offset = SYM_SETUP_MAX_OFFS;
 		if (tip->period < SYM_SETUP_MIN_SYNC) tip->period = SYM_SETUP_MIN_SYNC;
 	} else {
 		tip->offset = 0;
 		tip->period = 0;
 	}
 
 	/*
 	 *  Scale against actual controller BUS width.
 	 */
 	if (tip->width > np->maxwide)
 		tip->width  = np->maxwide;
 
 	/*
 	 *  Only accept DT if controller supports and SYNC/WIDE asked.
 	 */
 	if (!((np->features & (FE_C10|FE_ULTRA3)) == (FE_C10|FE_ULTRA3)) ||
 	    !(tip->width == BUS_16_BIT && tip->offset)) {
 		tip->options &= ~PPR_OPT_DT;
 	}
 
 	/*
 	 *  Scale period factor and offset against controller limits.
 	 */
 	if (tip->offset && tip->period) {
 		if (tip->options & PPR_OPT_DT) {
 			if (tip->period < np->minsync_dt)
 				tip->period = np->minsync_dt;
 			if (tip->period > np->maxsync_dt)
 				tip->period = np->maxsync_dt;
 			if (tip->offset > np->maxoffs_dt)
 				tip->offset = np->maxoffs_dt;
 		}
 		else {
 			if (tip->period < np->minsync)
 				tip->period = np->minsync;
 			if (tip->period > np->maxsync)
 				tip->period = np->maxsync;
 			if (tip->offset > np->maxoffs)
 				tip->offset = np->maxoffs;
 		}
 	}
 }
 
 /*
  *  Update flags for a device (logical unit).
  */
 static void
 sym_update_dflags(hcb_p np, u_char *flags, struct ccb_trans_settings *cts)
 {
 
 	SYM_LOCK_ASSERT(MA_OWNED);
 
 #define	cts__scsi (&cts->proto_specific.scsi)
 #define	cts__spi  (&cts->xport_specific.spi)
 	if ((cts__spi->valid & CTS_SPI_VALID_DISC) != 0) {
 		if ((cts__spi->flags & CTS_SPI_FLAGS_DISC_ENB) != 0)
 			*flags |= SYM_DISC_ENABLED;
 		else
 			*flags &= ~SYM_DISC_ENABLED;
 	}
 
 	if ((cts__scsi->valid & CTS_SCSI_VALID_TQ) != 0) {
 		if ((cts__scsi->flags & CTS_SCSI_FLAGS_TAG_ENB) != 0)
 			*flags |= SYM_TAGS_ENABLED;
 		else
 			*flags &= ~SYM_TAGS_ENABLED;
 	}
 #undef	cts__spi
 #undef	cts__scsi
 }
 
 /*============= DRIVER INITIALISATION ==================*/
 
 static device_method_t sym_pci_methods[] = {
 	DEVMETHOD(device_probe,	 sym_pci_probe),
 	DEVMETHOD(device_attach, sym_pci_attach),
 	DEVMETHOD_END
 };
 
 static driver_t sym_pci_driver = {
 	"sym",
 	sym_pci_methods,
 	1	/* no softc */
 };
 
 static devclass_t sym_devclass;
 
 DRIVER_MODULE(sym, pci, sym_pci_driver, sym_devclass, NULL, NULL);
 MODULE_DEPEND(sym, cam, 1, 1, 1);
 MODULE_DEPEND(sym, pci, 1, 1, 1);
 
 static const struct sym_pci_chip sym_pci_dev_table[] = {
  {PCI_ID_SYM53C810, 0x0f, "810", 4, 8, 4, 64,
  FE_ERL}
  ,
 #ifdef SYM_DEBUG_GENERIC_SUPPORT
  {PCI_ID_SYM53C810, 0xff, "810a", 4,  8, 4, 1,
  FE_BOF}
  ,
 #else
  {PCI_ID_SYM53C810, 0xff, "810a", 4,  8, 4, 1,
  FE_CACHE_SET|FE_LDSTR|FE_PFEN|FE_BOF}
  ,
 #endif
  {PCI_ID_SYM53C815, 0xff, "815", 4,  8, 4, 64,
  FE_BOF|FE_ERL}
  ,
  {PCI_ID_SYM53C825, 0x0f, "825", 6,  8, 4, 64,
  FE_WIDE|FE_BOF|FE_ERL|FE_DIFF}
  ,
  {PCI_ID_SYM53C825, 0xff, "825a", 6,  8, 4, 2,
  FE_WIDE|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|FE_RAM|FE_DIFF}
  ,
  {PCI_ID_SYM53C860, 0xff, "860", 4,  8, 5, 1,
  FE_ULTRA|FE_CLK80|FE_CACHE_SET|FE_BOF|FE_LDSTR|FE_PFEN}
  ,
  {PCI_ID_SYM53C875, 0x01, "875", 6, 16, 5, 2,
  FE_WIDE|FE_ULTRA|FE_CLK80|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_DIFF}
  ,
  {PCI_ID_SYM53C875, 0xff, "875", 6, 16, 5, 2,
  FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_DIFF}
  ,
  {PCI_ID_SYM53C875_2, 0xff, "875", 6, 16, 5, 2,
  FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_DIFF}
  ,
  {PCI_ID_SYM53C885, 0xff, "885", 6, 16, 5, 2,
  FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_DIFF}
  ,
 #ifdef SYM_DEBUG_GENERIC_SUPPORT
  {PCI_ID_SYM53C895, 0xff, "895", 6, 31, 7, 2,
  FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|
  FE_RAM|FE_LCKFRQ}
  ,
 #else
  {PCI_ID_SYM53C895, 0xff, "895", 6, 31, 7, 2,
  FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_LCKFRQ}
  ,
 #endif
  {PCI_ID_SYM53C896, 0xff, "896", 6, 31, 7, 4,
  FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_LCKFRQ}
  ,
  {PCI_ID_SYM53C895A, 0xff, "895a", 6, 31, 7, 4,
  FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_RAM8K|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_LCKFRQ}
  ,
  {PCI_ID_LSI53C1010, 0x00, "1010-33", 6, 31, 7, 8,
  FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_CRC|
  FE_C10}
  ,
  {PCI_ID_LSI53C1010, 0xff, "1010-33", 6, 31, 7, 8,
  FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_CRC|
  FE_C10|FE_U3EN}
  ,
  {PCI_ID_LSI53C1010_2, 0xff, "1010-66", 6, 31, 7, 8,
  FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_66MHZ|FE_CRC|
  FE_C10|FE_U3EN}
  ,
  {PCI_ID_LSI53C1510D, 0xff, "1510d", 6, 31, 7, 4,
  FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|
  FE_RAM|FE_IO256|FE_LEDC}
 };
 
 /*
  *  Look up the chip table.
  *
  *  Return a pointer to the chip entry if found,
  *  zero otherwise.
  */
 static const struct sym_pci_chip *
 sym_find_pci_chip(device_t dev)
 {
 	const struct	sym_pci_chip *chip;
 	int	i;
 	u_short	device_id;
 	u_char	revision;
 
 	if (pci_get_vendor(dev) != PCI_VENDOR_NCR)
 		return NULL;
 
 	device_id = pci_get_device(dev);
 	revision  = pci_get_revid(dev);
 
 	for (i = 0; i < nitems(sym_pci_dev_table); i++) {
 		chip = &sym_pci_dev_table[i];
 		if (device_id != chip->device_id)
 			continue;
 		if (revision > chip->revision_id)
 			continue;
 		return chip;
 	}
 
 	return NULL;
 }
 
 /*
  *  Tell upper layer if the chip is supported.
  */
 static int
 sym_pci_probe(device_t dev)
 {
 	const struct	sym_pci_chip *chip;
 
 	chip = sym_find_pci_chip(dev);
 	if (chip && sym_find_firmware(chip)) {
 		device_set_desc(dev, chip->name);
 		return (chip->lp_probe_bit & SYM_SETUP_LP_PROBE_MAP)?
 		  BUS_PROBE_LOW_PRIORITY : BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 /*
  *  Attach a sym53c8xx device.
  */
 static int
 sym_pci_attach(device_t dev)
 {
 	const struct	sym_pci_chip *chip;
 	u_short	command;
 	u_char	cachelnsz;
 	struct	sym_hcb *np = NULL;
 	struct	sym_nvram nvram;
 	const struct	sym_fw *fw = NULL;
 	int 	i;
 	bus_dma_tag_t	bus_dmat;
 
 	bus_dmat = bus_get_dma_tag(dev);
 
 	/*
 	 *  Only probed devices should be attached.
 	 *  We just enjoy being paranoid. :)
 	 */
 	chip = sym_find_pci_chip(dev);
 	if (chip == NULL || (fw = sym_find_firmware(chip)) == NULL)
 		return (ENXIO);
 
 	/*
 	 *  Allocate immediately the host control block,
 	 *  since we are only expecting to succeed. :)
 	 *  We keep track in the HCB of all the resources that
 	 *  are to be released on error.
 	 */
 	np = __sym_calloc_dma(bus_dmat, sizeof(*np), "HCB");
 	if (np)
 		np->bus_dmat = bus_dmat;
 	else
 		return (ENXIO);
 	device_set_softc(dev, np);
 
 	SYM_LOCK_INIT();
 
 	/*
 	 *  Copy some useful infos to the HCB.
 	 */
 	np->hcb_ba	 = vtobus(np);
 	np->verbose	 = bootverbose;
 	np->device	 = dev;
 	np->device_id	 = pci_get_device(dev);
 	np->revision_id  = pci_get_revid(dev);
 	np->features	 = chip->features;
 	np->clock_divn	 = chip->nr_divisor;
 	np->maxoffs	 = chip->offset_max;
 	np->maxburst	 = chip->burst_max;
 	np->scripta_sz	 = fw->a_size;
 	np->scriptb_sz	 = fw->b_size;
 	np->fw_setup	 = fw->setup;
 	np->fw_patch	 = fw->patch;
 	np->fw_name	 = fw->name;
 
 #ifdef __amd64__
 	np->target = sym_calloc_dma(SYM_CONF_MAX_TARGET * sizeof(*(np->target)),
 			"TARGET");
 	if (!np->target)
 		goto attach_failed;
 #endif
 
 	/*
 	 *  Initialize the CCB free and busy queues.
 	 */
 	sym_que_init(&np->free_ccbq);
 	sym_que_init(&np->busy_ccbq);
 	sym_que_init(&np->comp_ccbq);
 	sym_que_init(&np->cam_ccbq);
 
 	/*
 	 *  Allocate a tag for the DMA of user data.
 	 */
 	if (bus_dma_tag_create(np->bus_dmat, 1, SYM_CONF_DMA_BOUNDARY,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE_32BIT, SYM_CONF_MAX_SG, SYM_CONF_DMA_BOUNDARY,
 	    0, busdma_lock_mutex, &np->mtx, &np->data_dmat)) {
 		device_printf(dev, "failed to create DMA tag.\n");
 		goto attach_failed;
 	}
 
 	/*
 	 *  Read and apply some fix-ups to the PCI COMMAND
 	 *  register. We want the chip to be enabled for:
 	 *  - BUS mastering
 	 *  - PCI parity checking (reporting would also be fine)
 	 *  - Write And Invalidate.
 	 */
 	command = pci_read_config(dev, PCIR_COMMAND, 2);
 	command |= PCIM_CMD_BUSMASTEREN | PCIM_CMD_PERRESPEN |
 	    PCIM_CMD_MWRICEN;
 	pci_write_config(dev, PCIR_COMMAND, command, 2);
 
 	/*
 	 *  Let the device know about the cache line size,
 	 *  if it doesn't yet.
 	 */
 	cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
 	if (!cachelnsz) {
 		cachelnsz = 8;
 		pci_write_config(dev, PCIR_CACHELNSZ, cachelnsz, 1);
 	}
 
 	/*
 	 *  Alloc/get/map/retrieve everything that deals with MMIO.
 	 */
 	i = SYM_PCI_MMIO;
 	np->mmio_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &i,
 	    RF_ACTIVE);
 	if (!np->mmio_res) {
 		device_printf(dev, "failed to allocate MMIO resources\n");
 		goto attach_failed;
 	}
 	np->mmio_ba = rman_get_start(np->mmio_res);
 
 	/*
 	 *  Allocate the IRQ.
 	 */
 	i = 0;
 	np->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i,
 					     RF_ACTIVE | RF_SHAREABLE);
 	if (!np->irq_res) {
 		device_printf(dev, "failed to allocate IRQ resource\n");
 		goto attach_failed;
 	}
 
 #ifdef	SYM_CONF_IOMAPPED
 	/*
 	 *  User want us to use normal IO with PCI.
 	 *  Alloc/get/map/retrieve everything that deals with IO.
 	 */
 	i = SYM_PCI_IO;
 	np->io_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &i, RF_ACTIVE);
 	if (!np->io_res) {
 		device_printf(dev, "failed to allocate IO resources\n");
 		goto attach_failed;
 	}
 
 #endif /* SYM_CONF_IOMAPPED */
 
 	/*
 	 *  If the chip has RAM.
 	 *  Alloc/get/map/retrieve the corresponding resources.
 	 */
 	if (np->features & (FE_RAM|FE_RAM8K)) {
 		int regs_id = SYM_PCI_RAM;
 		if (np->features & FE_64BIT)
 			regs_id = SYM_PCI_RAM64;
 		np->ram_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 						     &regs_id, RF_ACTIVE);
 		if (!np->ram_res) {
 			device_printf(dev,"failed to allocate RAM resources\n");
 			goto attach_failed;
 		}
 		np->ram_id  = regs_id;
 		np->ram_ba = rman_get_start(np->ram_res);
 	}
 
 	/*
 	 *  Save setting of some IO registers, so we will
 	 *  be able to probe specific implementations.
 	 */
 	sym_save_initial_setting (np);
 
 	/*
 	 *  Reset the chip now, since it has been reported
 	 *  that SCSI clock calibration may not work properly
 	 *  if the chip is currently active.
 	 */
 	sym_chip_reset (np);
 
 	/*
 	 *  Try to read the user set-up.
 	 */
 	(void) sym_read_nvram(np, &nvram);
 
 	/*
 	 *  Prepare controller and devices settings, according
 	 *  to chip features, user set-up and driver set-up.
 	 */
 	(void) sym_prepare_setting(np, &nvram);
 
 	/*
 	 *  Check the PCI clock frequency.
 	 *  Must be performed after prepare_setting since it destroys
 	 *  STEST1 that is used to probe for the clock doubler.
 	 */
 	i = sym_getpciclock(np);
 	if (i > 37000)
 		device_printf(dev, "PCI BUS clock seems too high: %u KHz.\n",i);
 
 	/*
 	 *  Allocate the start queue.
 	 */
 	np->squeue = (u32 *) sym_calloc_dma(sizeof(u32)*(MAX_QUEUE*2),"SQUEUE");
 	if (!np->squeue)
 		goto attach_failed;
 	np->squeue_ba = vtobus(np->squeue);
 
 	/*
 	 *  Allocate the done queue.
 	 */
 	np->dqueue = (u32 *) sym_calloc_dma(sizeof(u32)*(MAX_QUEUE*2),"DQUEUE");
 	if (!np->dqueue)
 		goto attach_failed;
 	np->dqueue_ba = vtobus(np->dqueue);
 
 	/*
 	 *  Allocate the target bus address array.
 	 */
 	np->targtbl = (u32 *) sym_calloc_dma(256, "TARGTBL");
 	if (!np->targtbl)
 		goto attach_failed;
 	np->targtbl_ba = vtobus(np->targtbl);
 
 	/*
 	 *  Allocate SCRIPTS areas.
 	 */
 	np->scripta0 = sym_calloc_dma(np->scripta_sz, "SCRIPTA0");
 	np->scriptb0 = sym_calloc_dma(np->scriptb_sz, "SCRIPTB0");
 	if (!np->scripta0 || !np->scriptb0)
 		goto attach_failed;
 
 	/*
 	 *  Allocate the CCBs. We need at least ONE.
 	 */
 	for (i = 0; sym_alloc_ccb(np) != NULL; i++)
 		;
 	if (i < 1)
 		goto attach_failed;
 
 	/*
 	 *  Calculate BUS addresses where we are going
 	 *  to load the SCRIPTS.
 	 */
 	np->scripta_ba	= vtobus(np->scripta0);
 	np->scriptb_ba	= vtobus(np->scriptb0);
 	np->scriptb0_ba	= np->scriptb_ba;
 
 	if (np->ram_ba) {
 		np->scripta_ba	= np->ram_ba;
 		if (np->features & FE_RAM8K) {
 			np->ram_ws = 8192;
 			np->scriptb_ba = np->scripta_ba + 4096;
 #ifdef __LP64__
 			np->scr_ram_seg = cpu_to_scr(np->scripta_ba >> 32);
 #endif
 		}
 		else
 			np->ram_ws = 4096;
 	}
 
 	/*
 	 *  Copy scripts to controller instance.
 	 */
 	bcopy(fw->a_base, np->scripta0, np->scripta_sz);
 	bcopy(fw->b_base, np->scriptb0, np->scriptb_sz);
 
 	/*
 	 *  Setup variable parts in scripts and compute
 	 *  scripts bus addresses used from the C code.
 	 */
 	np->fw_setup(np, fw);
 
 	/*
 	 *  Bind SCRIPTS with physical addresses usable by the
 	 *  SCRIPTS processor (as seen from the BUS = BUS addresses).
 	 */
 	sym_fw_bind_script(np, (u32 *) np->scripta0, np->scripta_sz);
 	sym_fw_bind_script(np, (u32 *) np->scriptb0, np->scriptb_sz);
 
 #ifdef SYM_CONF_IARB_SUPPORT
 	/*
 	 *    If user wants IARB to be set when we win arbitration
 	 *    and have other jobs, compute the max number of consecutive
 	 *    settings of IARB hints before we leave devices a chance to
 	 *    arbitrate for reselection.
 	 */
 #ifdef	SYM_SETUP_IARB_MAX
 	np->iarb_max = SYM_SETUP_IARB_MAX;
 #else
 	np->iarb_max = 4;
 #endif
 #endif
 
 	/*
 	 *  Prepare the idle and invalid task actions.
 	 */
 	np->idletask.start	= cpu_to_scr(SCRIPTA_BA (np, idle));
 	np->idletask.restart	= cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l));
 	np->idletask_ba		= vtobus(&np->idletask);
 
 	np->notask.start	= cpu_to_scr(SCRIPTA_BA (np, idle));
 	np->notask.restart	= cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l));
 	np->notask_ba		= vtobus(&np->notask);
 
 	np->bad_itl.start	= cpu_to_scr(SCRIPTA_BA (np, idle));
 	np->bad_itl.restart	= cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l));
 	np->bad_itl_ba		= vtobus(&np->bad_itl);
 
 	np->bad_itlq.start	= cpu_to_scr(SCRIPTA_BA (np, idle));
 	np->bad_itlq.restart	= cpu_to_scr(SCRIPTB_BA (np,bad_i_t_l_q));
 	np->bad_itlq_ba		= vtobus(&np->bad_itlq);
 
 	/*
 	 *  Allocate and prepare the lun JUMP table that is used
 	 *  for a target prior the probing of devices (bad lun table).
 	 *  A private table will be allocated for the target on the
 	 *  first INQUIRY response received.
 	 */
 	np->badluntbl = sym_calloc_dma(256, "BADLUNTBL");
 	if (!np->badluntbl)
 		goto attach_failed;
 
 	np->badlun_sa = cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun));
 	for (i = 0 ; i < 64 ; i++)	/* 64 luns/target, no less */
 		np->badluntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa));
 
 	/*
 	 *  Prepare the bus address array that contains the bus
 	 *  address of each target control block.
 	 *  For now, assume all logical units are wrong. :)
 	 */
 	for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) {
 		np->targtbl[i] = cpu_to_scr(vtobus(&np->target[i]));
 		np->target[i].head.luntbl_sa =
 				cpu_to_scr(vtobus(np->badluntbl));
 		np->target[i].head.lun0_sa =
 				cpu_to_scr(vtobus(&np->badlun_sa));
 	}
 
 	/*
 	 *  Now check the cache handling of the pci chipset.
 	 */
 	if (sym_snooptest (np)) {
 		device_printf(dev, "CACHE INCORRECTLY CONFIGURED.\n");
 		goto attach_failed;
 	}
 
 	/*
 	 *  Now deal with CAM.
 	 *  Hopefully, we will succeed with that one.:)
 	 */
 	if (!sym_cam_attach(np))
 		goto attach_failed;
 
 	/*
 	 *  Sigh! we are done.
 	 */
 	return 0;
 
 	/*
 	 *  We have failed.
 	 *  We will try to free all the resources we have
 	 *  allocated, but if we are a boot device, this
 	 *  will not help that much.;)
 	 */
 attach_failed:
 	if (np)
 		sym_pci_free(np);
 	return ENXIO;
 }
 
 /*
  *  Free everything that have been allocated for this device.
  */
 static void sym_pci_free(hcb_p np)
 {
 	SYM_QUEHEAD *qp;
 	ccb_p cp;
 	tcb_p tp;
 	lcb_p lp;
 	int target, lun;
 
 	/*
 	 *  First free CAM resources.
 	 */
 	sym_cam_free(np);
 
 	/*
 	 *  Now every should be quiet for us to
 	 *  free other resources.
 	 */
 	if (np->ram_res)
 		bus_release_resource(np->device, SYS_RES_MEMORY,
 				     np->ram_id, np->ram_res);
 	if (np->mmio_res)
 		bus_release_resource(np->device, SYS_RES_MEMORY,
 				     SYM_PCI_MMIO, np->mmio_res);
 	if (np->io_res)
 		bus_release_resource(np->device, SYS_RES_IOPORT,
 				     SYM_PCI_IO, np->io_res);
 	if (np->irq_res)
 		bus_release_resource(np->device, SYS_RES_IRQ,
 				     0, np->irq_res);
 
 	if (np->scriptb0)
 		sym_mfree_dma(np->scriptb0, np->scriptb_sz, "SCRIPTB0");
 	if (np->scripta0)
 		sym_mfree_dma(np->scripta0, np->scripta_sz, "SCRIPTA0");
 	if (np->squeue)
 		sym_mfree_dma(np->squeue, sizeof(u32)*(MAX_QUEUE*2), "SQUEUE");
 	if (np->dqueue)
 		sym_mfree_dma(np->dqueue, sizeof(u32)*(MAX_QUEUE*2), "DQUEUE");
 
 	while ((qp = sym_remque_head(&np->free_ccbq)) != NULL) {
 		cp = sym_que_entry(qp, struct sym_ccb, link_ccbq);
 		bus_dmamap_destroy(np->data_dmat, cp->dmamap);
 		sym_mfree_dma(cp->sns_bbuf, SYM_SNS_BBUF_LEN, "SNS_BBUF");
 		sym_mfree_dma(cp, sizeof(*cp), "CCB");
 	}
 
 	if (np->badluntbl)
 		sym_mfree_dma(np->badluntbl, 256,"BADLUNTBL");
 
 	for (target = 0; target < SYM_CONF_MAX_TARGET ; target++) {
 		tp = &np->target[target];
 		for (lun = 0 ; lun < SYM_CONF_MAX_LUN ; lun++) {
 			lp = sym_lp(tp, lun);
 			if (!lp)
 				continue;
 			if (lp->itlq_tbl)
 				sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4,
 				       "ITLQ_TBL");
 			if (lp->cb_tags)
 				sym_mfree(lp->cb_tags, SYM_CONF_MAX_TASK,
 				       "CB_TAGS");
 			sym_mfree_dma(lp, sizeof(*lp), "LCB");
 		}
 #if SYM_CONF_MAX_LUN > 1
 		if (tp->lunmp)
 			sym_mfree(tp->lunmp, SYM_CONF_MAX_LUN*sizeof(lcb_p),
 			       "LUNMP");
 #endif
 	}
 #ifdef __amd64__
 	if (np->target)
 		sym_mfree_dma(np->target,
 			SYM_CONF_MAX_TARGET * sizeof(*(np->target)), "TARGET");
 #endif
 	if (np->targtbl)
 		sym_mfree_dma(np->targtbl, 256, "TARGTBL");
 	if (np->data_dmat)
 		bus_dma_tag_destroy(np->data_dmat);
 	if (SYM_LOCK_INITIALIZED() != 0)
 		SYM_LOCK_DESTROY();
 	device_set_softc(np->device, NULL);
 	sym_mfree_dma(np, sizeof(*np), "HCB");
 }
 
 /*
  *  Allocate CAM resources and register a bus to CAM.
  */
 static int sym_cam_attach(hcb_p np)
 {
 	struct cam_devq *devq = NULL;
 	struct cam_sim *sim = NULL;
 	struct cam_path *path = NULL;
 	int err;
 
 	/*
 	 *  Establish our interrupt handler.
 	 */
 	err = bus_setup_intr(np->device, np->irq_res,
 			INTR_ENTROPY | INTR_MPSAFE | INTR_TYPE_CAM,
 			NULL, sym_intr, np, &np->intr);
 	if (err) {
 		device_printf(np->device, "bus_setup_intr() failed: %d\n",
 			      err);
 		goto fail;
 	}
 
 	/*
 	 *  Create the device queue for our sym SIM.
 	 */
 	devq = cam_simq_alloc(SYM_CONF_MAX_START);
 	if (!devq)
 		goto fail;
 
 	/*
 	 *  Construct our SIM entry.
 	 */
 	sim = cam_sim_alloc(sym_action, sym_poll, "sym", np,
 			device_get_unit(np->device),
 			&np->mtx, 1, SYM_SETUP_MAX_TAG, devq);
 	if (!sim)
 		goto fail;
 
 	SYM_LOCK();
 
 	if (xpt_bus_register(sim, np->device, 0) != CAM_SUCCESS)
 		goto fail;
 	np->sim = sim;
 	sim = NULL;
 
 	if (xpt_create_path(&path, NULL,
 			    cam_sim_path(np->sim), CAM_TARGET_WILDCARD,
 			    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		goto fail;
 	}
 	np->path = path;
 
 	/*
 	 *  Establish our async notification handler.
 	 */
 	if (xpt_register_async(AC_LOST_DEVICE, sym_async, np->sim, path) !=
 	    CAM_REQ_CMP)
 		goto fail;
 
 	/*
 	 *  Start the chip now, without resetting the BUS, since
 	 *  it seems that this must stay under control of CAM.
 	 *  With LVD/SE capable chips and BUS in SE mode, we may
 	 *  get a spurious SMBC interrupt.
 	 */
 	sym_init (np, 0);
 
 	SYM_UNLOCK();
 
 	return 1;
 fail:
 	if (sim)
 		cam_sim_free(sim, FALSE);
 	if (devq)
 		cam_simq_free(devq);
 
 	SYM_UNLOCK();
 
 	sym_cam_free(np);
 
 	return 0;
 }
 
 /*
  *  Free everything that deals with CAM.
  */
 static void sym_cam_free(hcb_p np)
 {
 
 	SYM_LOCK_ASSERT(MA_NOTOWNED);
 
 	if (np->intr) {
 		bus_teardown_intr(np->device, np->irq_res, np->intr);
 		np->intr = NULL;
 	}
 
 	SYM_LOCK();
 
 	if (np->sim) {
 		xpt_bus_deregister(cam_sim_path(np->sim));
 		cam_sim_free(np->sim, /*free_devq*/ TRUE);
 		np->sim = NULL;
 	}
 	if (np->path) {
 		xpt_free_path(np->path);
 		np->path = NULL;
 	}
 
 	SYM_UNLOCK();
 }
 
 /*============ OPTIONNAL NVRAM SUPPORT =================*/
 
 /*
  *  Get host setup from NVRAM.
  */
 static void sym_nvram_setup_host (hcb_p np, struct sym_nvram *nvram)
 {
 #ifdef SYM_CONF_NVRAM_SUPPORT
 	/*
 	 *  Get parity checking, host ID, verbose mode
 	 *  and miscellaneous host flags from NVRAM.
 	 */
 	switch(nvram->type) {
 	case SYM_SYMBIOS_NVRAM:
 		if (!(nvram->data.Symbios.flags & SYMBIOS_PARITY_ENABLE))
 			np->rv_scntl0  &= ~0x0a;
 		np->myaddr = nvram->data.Symbios.host_id & 0x0f;
 		if (nvram->data.Symbios.flags & SYMBIOS_VERBOSE_MSGS)
 			np->verbose += 1;
 		if (nvram->data.Symbios.flags1 & SYMBIOS_SCAN_HI_LO)
 			np->usrflags |= SYM_SCAN_TARGETS_HILO;
 		if (nvram->data.Symbios.flags2 & SYMBIOS_AVOID_BUS_RESET)
 			np->usrflags |= SYM_AVOID_BUS_RESET;
 		break;
 	case SYM_TEKRAM_NVRAM:
 		np->myaddr = nvram->data.Tekram.host_id & 0x0f;
 		break;
 	default:
 		break;
 	}
 #endif
 }
 
 /*
  *  Get target setup from NVRAM.
  */
 #ifdef SYM_CONF_NVRAM_SUPPORT
 static void sym_Symbios_setup_target(hcb_p np,int target, Symbios_nvram *nvram);
 static void sym_Tekram_setup_target(hcb_p np,int target, Tekram_nvram *nvram);
 #endif
 
 static void
 sym_nvram_setup_target (hcb_p np, int target, struct sym_nvram *nvp)
 {
 #ifdef SYM_CONF_NVRAM_SUPPORT
 	switch(nvp->type) {
 	case SYM_SYMBIOS_NVRAM:
 		sym_Symbios_setup_target (np, target, &nvp->data.Symbios);
 		break;
 	case SYM_TEKRAM_NVRAM:
 		sym_Tekram_setup_target (np, target, &nvp->data.Tekram);
 		break;
 	default:
 		break;
 	}
 #endif
 }
 
 #ifdef SYM_CONF_NVRAM_SUPPORT
 /*
  *  Get target set-up from Symbios format NVRAM.
  */
 static void
 sym_Symbios_setup_target(hcb_p np, int target, Symbios_nvram *nvram)
 {
 	tcb_p tp = &np->target[target];
 	Symbios_target *tn = &nvram->target[target];
 
 	tp->tinfo.user.period = tn->sync_period ? (tn->sync_period + 3) / 4 : 0;
 	tp->tinfo.user.width  = tn->bus_width == 0x10 ? BUS_16_BIT : BUS_8_BIT;
 	tp->usrtags =
 		(tn->flags & SYMBIOS_QUEUE_TAGS_ENABLED)? SYM_SETUP_MAX_TAG : 0;
 
 	if (!(tn->flags & SYMBIOS_DISCONNECT_ENABLE))
 		tp->usrflags &= ~SYM_DISC_ENABLED;
 	if (!(tn->flags & SYMBIOS_SCAN_AT_BOOT_TIME))
 		tp->usrflags |= SYM_SCAN_BOOT_DISABLED;
 	if (!(tn->flags & SYMBIOS_SCAN_LUNS))
 		tp->usrflags |= SYM_SCAN_LUNS_DISABLED;
 }
 
 /*
  *  Get target set-up from Tekram format NVRAM.
  */
 static void
 sym_Tekram_setup_target(hcb_p np, int target, Tekram_nvram *nvram)
 {
 	tcb_p tp = &np->target[target];
 	struct Tekram_target *tn = &nvram->target[target];
 	int i;
 
 	if (tn->flags & TEKRAM_SYNC_NEGO) {
 		i = tn->sync_index & 0xf;
 		tp->tinfo.user.period = Tekram_sync[i];
 	}
 
 	tp->tinfo.user.width =
 		(tn->flags & TEKRAM_WIDE_NEGO) ? BUS_16_BIT : BUS_8_BIT;
 
 	if (tn->flags & TEKRAM_TAGGED_COMMANDS) {
 		tp->usrtags = 2 << nvram->max_tags_index;
 	}
 
 	if (tn->flags & TEKRAM_DISCONNECT_ENABLE)
 		tp->usrflags |= SYM_DISC_ENABLED;
 
 	/* If any device does not support parity, we will not use this option */
 	if (!(tn->flags & TEKRAM_PARITY_CHECK))
 		np->rv_scntl0  &= ~0x0a; /* SCSI parity checking disabled */
 }
 
 #ifdef	SYM_CONF_DEBUG_NVRAM
 /*
  *  Dump Symbios format NVRAM for debugging purpose.
  */
 static void sym_display_Symbios_nvram(hcb_p np, Symbios_nvram *nvram)
 {
 	int i;
 
 	/* display Symbios nvram host data */
 	printf("%s: HOST ID=%d%s%s%s%s%s%s\n",
 		sym_name(np), nvram->host_id & 0x0f,
 		(nvram->flags  & SYMBIOS_SCAM_ENABLE)	? " SCAM"	:"",
 		(nvram->flags  & SYMBIOS_PARITY_ENABLE)	? " PARITY"	:"",
 		(nvram->flags  & SYMBIOS_VERBOSE_MSGS)	? " VERBOSE"	:"",
 		(nvram->flags  & SYMBIOS_CHS_MAPPING)	? " CHS_ALT"	:"",
 		(nvram->flags2 & SYMBIOS_AVOID_BUS_RESET)?" NO_RESET"	:"",
 		(nvram->flags1 & SYMBIOS_SCAN_HI_LO)	? " HI_LO"	:"");
 
 	/* display Symbios nvram drive data */
 	for (i = 0 ; i < 15 ; i++) {
 		struct Symbios_target *tn = &nvram->target[i];
 		printf("%s-%d:%s%s%s%s WIDTH=%d SYNC=%d TMO=%d\n",
 		sym_name(np), i,
 		(tn->flags & SYMBIOS_DISCONNECT_ENABLE)	? " DISC"	: "",
 		(tn->flags & SYMBIOS_SCAN_AT_BOOT_TIME)	? " SCAN_BOOT"	: "",
 		(tn->flags & SYMBIOS_SCAN_LUNS)		? " SCAN_LUNS"	: "",
 		(tn->flags & SYMBIOS_QUEUE_TAGS_ENABLED)? " TCQ"	: "",
 		tn->bus_width,
 		tn->sync_period / 4,
 		tn->timeout);
 	}
 }
 
 /*
  *  Dump TEKRAM format NVRAM for debugging purpose.
  */
 static const u_char Tekram_boot_delay[7] = {3, 5, 10, 20, 30, 60, 120};
 static void sym_display_Tekram_nvram(hcb_p np, Tekram_nvram *nvram)
 {
 	int i, tags, boot_delay;
 	char *rem;
 
 	/* display Tekram nvram host data */
 	tags = 2 << nvram->max_tags_index;
 	boot_delay = 0;
 	if (nvram->boot_delay_index < 6)
 		boot_delay = Tekram_boot_delay[nvram->boot_delay_index];
 	switch((nvram->flags & TEKRAM_REMOVABLE_FLAGS) >> 6) {
 	default:
 	case 0:	rem = "";			break;
 	case 1: rem = " REMOVABLE=boot device";	break;
 	case 2: rem = " REMOVABLE=all";		break;
 	}
 
 	printf("%s: HOST ID=%d%s%s%s%s%s%s%s%s%s BOOT DELAY=%d tags=%d\n",
 		sym_name(np), nvram->host_id & 0x0f,
 		(nvram->flags1 & SYMBIOS_SCAM_ENABLE)	? " SCAM"	:"",
 		(nvram->flags & TEKRAM_MORE_THAN_2_DRIVES) ? " >2DRIVES"	:"",
 		(nvram->flags & TEKRAM_DRIVES_SUP_1GB)	? " >1GB"	:"",
 		(nvram->flags & TEKRAM_RESET_ON_POWER_ON) ? " RESET"	:"",
 		(nvram->flags & TEKRAM_ACTIVE_NEGATION)	? " ACT_NEG"	:"",
 		(nvram->flags & TEKRAM_IMMEDIATE_SEEK)	? " IMM_SEEK"	:"",
 		(nvram->flags & TEKRAM_SCAN_LUNS)	? " SCAN_LUNS"	:"",
 		(nvram->flags1 & TEKRAM_F2_F6_ENABLED)	? " F2_F6"	:"",
 		rem, boot_delay, tags);
 
 	/* display Tekram nvram drive data */
 	for (i = 0; i <= 15; i++) {
 		int sync, j;
 		struct Tekram_target *tn = &nvram->target[i];
 		j = tn->sync_index & 0xf;
 		sync = Tekram_sync[j];
 		printf("%s-%d:%s%s%s%s%s%s PERIOD=%d\n",
 		sym_name(np), i,
 		(tn->flags & TEKRAM_PARITY_CHECK)	? " PARITY"	: "",
 		(tn->flags & TEKRAM_SYNC_NEGO)		? " SYNC"	: "",
 		(tn->flags & TEKRAM_DISCONNECT_ENABLE)	? " DISC"	: "",
 		(tn->flags & TEKRAM_START_CMD)		? " START"	: "",
 		(tn->flags & TEKRAM_TAGGED_COMMANDS)	? " TCQ"	: "",
 		(tn->flags & TEKRAM_WIDE_NEGO)		? " WIDE"	: "",
 		sync);
 	}
 }
 #endif	/* SYM_CONF_DEBUG_NVRAM */
 #endif	/* SYM_CONF_NVRAM_SUPPORT */
 
 /*
  *  Try reading Symbios or Tekram NVRAM
  */
 #ifdef SYM_CONF_NVRAM_SUPPORT
 static int sym_read_Symbios_nvram (hcb_p np, Symbios_nvram *nvram);
 static int sym_read_Tekram_nvram  (hcb_p np, Tekram_nvram *nvram);
 #endif
 
 static int sym_read_nvram(hcb_p np, struct sym_nvram *nvp)
 {
 #ifdef SYM_CONF_NVRAM_SUPPORT
 	/*
 	 *  Try to read SYMBIOS nvram.
 	 *  Try to read TEKRAM nvram if Symbios nvram not found.
 	 */
 	if	(SYM_SETUP_SYMBIOS_NVRAM &&
 		 !sym_read_Symbios_nvram (np, &nvp->data.Symbios)) {
 		nvp->type = SYM_SYMBIOS_NVRAM;
 #ifdef SYM_CONF_DEBUG_NVRAM
 		sym_display_Symbios_nvram(np, &nvp->data.Symbios);
 #endif
 	}
 	else if	(SYM_SETUP_TEKRAM_NVRAM &&
 		 !sym_read_Tekram_nvram (np, &nvp->data.Tekram)) {
 		nvp->type = SYM_TEKRAM_NVRAM;
 #ifdef SYM_CONF_DEBUG_NVRAM
 		sym_display_Tekram_nvram(np, &nvp->data.Tekram);
 #endif
 	}
 	else
 		nvp->type = 0;
 #else
 	nvp->type = 0;
 #endif
 	return nvp->type;
 }
 
 #ifdef SYM_CONF_NVRAM_SUPPORT
 /*
  *  24C16 EEPROM reading.
  *
  *  GPOI0 - data in/data out
  *  GPIO1 - clock
  *  Symbios NVRAM wiring now also used by Tekram.
  */
 
 #define SET_BIT 0
 #define CLR_BIT 1
 #define SET_CLK 2
 #define CLR_CLK 3
 
 /*
  *  Set/clear data/clock bit in GPIO0
  */
 static void S24C16_set_bit(hcb_p np, u_char write_bit, u_char *gpreg,
 			  int bit_mode)
 {
 	UDELAY (5);
 	switch (bit_mode){
 	case SET_BIT:
 		*gpreg |= write_bit;
 		break;
 	case CLR_BIT:
 		*gpreg &= 0xfe;
 		break;
 	case SET_CLK:
 		*gpreg |= 0x02;
 		break;
 	case CLR_CLK:
 		*gpreg &= 0xfd;
 		break;
 
 	}
 	OUTB (nc_gpreg, *gpreg);
 	UDELAY (5);
 }
 
 /*
  *  Send START condition to NVRAM to wake it up.
  */
 static void S24C16_start(hcb_p np, u_char *gpreg)
 {
 	S24C16_set_bit(np, 1, gpreg, SET_BIT);
 	S24C16_set_bit(np, 0, gpreg, SET_CLK);
 	S24C16_set_bit(np, 0, gpreg, CLR_BIT);
 	S24C16_set_bit(np, 0, gpreg, CLR_CLK);
 }
 
 /*
  *  Send STOP condition to NVRAM - puts NVRAM to sleep... ZZzzzz!!
  */
 static void S24C16_stop(hcb_p np, u_char *gpreg)
 {
 	S24C16_set_bit(np, 0, gpreg, SET_CLK);
 	S24C16_set_bit(np, 1, gpreg, SET_BIT);
 }
 
 /*
  *  Read or write a bit to the NVRAM,
  *  read if GPIO0 input else write if GPIO0 output
  */
 static void S24C16_do_bit(hcb_p np, u_char *read_bit, u_char write_bit,
 			 u_char *gpreg)
 {
 	S24C16_set_bit(np, write_bit, gpreg, SET_BIT);
 	S24C16_set_bit(np, 0, gpreg, SET_CLK);
 	if (read_bit)
 		*read_bit = INB (nc_gpreg);
 	S24C16_set_bit(np, 0, gpreg, CLR_CLK);
 	S24C16_set_bit(np, 0, gpreg, CLR_BIT);
 }
 
 /*
  *  Output an ACK to the NVRAM after reading,
  *  change GPIO0 to output and when done back to an input
  */
 static void S24C16_write_ack(hcb_p np, u_char write_bit, u_char *gpreg,
 			    u_char *gpcntl)
 {
 	OUTB (nc_gpcntl, *gpcntl & 0xfe);
 	S24C16_do_bit(np, 0, write_bit, gpreg);
 	OUTB (nc_gpcntl, *gpcntl);
 }
 
 /*
  *  Input an ACK from NVRAM after writing,
  *  change GPIO0 to input and when done back to an output
  */
 static void S24C16_read_ack(hcb_p np, u_char *read_bit, u_char *gpreg,
 			   u_char *gpcntl)
 {
 	OUTB (nc_gpcntl, *gpcntl | 0x01);
 	S24C16_do_bit(np, read_bit, 1, gpreg);
 	OUTB (nc_gpcntl, *gpcntl);
 }
 
 /*
  *  WRITE a byte to the NVRAM and then get an ACK to see it was accepted OK,
  *  GPIO0 must already be set as an output
  */
 static void S24C16_write_byte(hcb_p np, u_char *ack_data, u_char write_data,
 			     u_char *gpreg, u_char *gpcntl)
 {
 	int x;
 
 	for (x = 0; x < 8; x++)
 		S24C16_do_bit(np, 0, (write_data >> (7 - x)) & 0x01, gpreg);
 
 	S24C16_read_ack(np, ack_data, gpreg, gpcntl);
 }
 
 /*
  *  READ a byte from the NVRAM and then send an ACK to say we have got it,
  *  GPIO0 must already be set as an input
  */
 static void S24C16_read_byte(hcb_p np, u_char *read_data, u_char ack_data,
 			    u_char *gpreg, u_char *gpcntl)
 {
 	int x;
 	u_char read_bit;
 
 	*read_data = 0;
 	for (x = 0; x < 8; x++) {
 		S24C16_do_bit(np, &read_bit, 1, gpreg);
 		*read_data |= ((read_bit & 0x01) << (7 - x));
 	}
 
 	S24C16_write_ack(np, ack_data, gpreg, gpcntl);
 }
 
 /*
  *  Read 'len' bytes starting at 'offset'.
  */
 static int sym_read_S24C16_nvram (hcb_p np, int offset, u_char *data, int len)
 {
 	u_char	gpcntl, gpreg;
 	u_char	old_gpcntl, old_gpreg;
 	u_char	ack_data;
 	int	retv = 1;
 	int	x;
 
 	/* save current state of GPCNTL and GPREG */
 	old_gpreg	= INB (nc_gpreg);
 	old_gpcntl	= INB (nc_gpcntl);
 	gpcntl		= old_gpcntl & 0x1c;
 
 	/* set up GPREG & GPCNTL to set GPIO0 and GPIO1 in to known state */
 	OUTB (nc_gpreg,  old_gpreg);
 	OUTB (nc_gpcntl, gpcntl);
 
 	/* this is to set NVRAM into a known state with GPIO0/1 both low */
 	gpreg = old_gpreg;
 	S24C16_set_bit(np, 0, &gpreg, CLR_CLK);
 	S24C16_set_bit(np, 0, &gpreg, CLR_BIT);
 
 	/* now set NVRAM inactive with GPIO0/1 both high */
 	S24C16_stop(np, &gpreg);
 
 	/* activate NVRAM */
 	S24C16_start(np, &gpreg);
 
 	/* write device code and random address MSB */
 	S24C16_write_byte(np, &ack_data,
 		0xa0 | ((offset >> 7) & 0x0e), &gpreg, &gpcntl);
 	if (ack_data & 0x01)
 		goto out;
 
 	/* write random address LSB */
 	S24C16_write_byte(np, &ack_data,
 		offset & 0xff, &gpreg, &gpcntl);
 	if (ack_data & 0x01)
 		goto out;
 
 	/* regenerate START state to set up for reading */
 	S24C16_start(np, &gpreg);
 
 	/* rewrite device code and address MSB with read bit set (lsb = 0x01) */
 	S24C16_write_byte(np, &ack_data,
 		0xa1 | ((offset >> 7) & 0x0e), &gpreg, &gpcntl);
 	if (ack_data & 0x01)
 		goto out;
 
 	/* now set up GPIO0 for inputting data */
 	gpcntl |= 0x01;
 	OUTB (nc_gpcntl, gpcntl);
 
 	/* input all requested data - only part of total NVRAM */
 	for (x = 0; x < len; x++)
 		S24C16_read_byte(np, &data[x], (x == (len-1)), &gpreg, &gpcntl);
 
 	/* finally put NVRAM back in inactive mode */
 	gpcntl &= 0xfe;
 	OUTB (nc_gpcntl, gpcntl);
 	S24C16_stop(np, &gpreg);
 	retv = 0;
 out:
 	/* return GPIO0/1 to original states after having accessed NVRAM */
 	OUTB (nc_gpcntl, old_gpcntl);
 	OUTB (nc_gpreg,  old_gpreg);
 
 	return retv;
 }
 
 #undef SET_BIT /* 0 */
 #undef CLR_BIT /* 1 */
 #undef SET_CLK /* 2 */
 #undef CLR_CLK /* 3 */
 
 /*
  *  Try reading Symbios NVRAM.
  *  Return 0 if OK.
  */
 static int sym_read_Symbios_nvram (hcb_p np, Symbios_nvram *nvram)
 {
 	static u_char Symbios_trailer[6] = {0xfe, 0xfe, 0, 0, 0, 0};
 	u_char *data = (u_char *) nvram;
 	int len  = sizeof(*nvram);
 	u_short	csum;
 	int x;
 
 	/* probe the 24c16 and read the SYMBIOS 24c16 area */
 	if (sym_read_S24C16_nvram (np, SYMBIOS_NVRAM_ADDRESS, data, len))
 		return 1;
 
 	/* check valid NVRAM signature, verify byte count and checksum */
 	if (nvram->type != 0 ||
 	    bcmp(nvram->trailer, Symbios_trailer, 6) ||
 	    nvram->byte_count != len - 12)
 		return 1;
 
 	/* verify checksum */
 	for (x = 6, csum = 0; x < len - 6; x++)
 		csum += data[x];
 	if (csum != nvram->checksum)
 		return 1;
 
 	return 0;
 }
 
 /*
  *  93C46 EEPROM reading.
  *
  *  GPOI0 - data in
  *  GPIO1 - data out
  *  GPIO2 - clock
  *  GPIO4 - chip select
  *
  *  Used by Tekram.
  */
 
 /*
  *  Pulse clock bit in GPIO0
  */
 static void T93C46_Clk(hcb_p np, u_char *gpreg)
 {
 	OUTB (nc_gpreg, *gpreg | 0x04);
 	UDELAY (2);
 	OUTB (nc_gpreg, *gpreg);
 }
 
 /*
  *  Read bit from NVRAM
  */
 static void T93C46_Read_Bit(hcb_p np, u_char *read_bit, u_char *gpreg)
 {
 	UDELAY (2);
 	T93C46_Clk(np, gpreg);
 	*read_bit = INB (nc_gpreg);
 }
 
 /*
  *  Write bit to GPIO0
  */
 static void T93C46_Write_Bit(hcb_p np, u_char write_bit, u_char *gpreg)
 {
 	if (write_bit & 0x01)
 		*gpreg |= 0x02;
 	else
 		*gpreg &= 0xfd;
 
 	*gpreg |= 0x10;
 
 	OUTB (nc_gpreg, *gpreg);
 	UDELAY (2);
 
 	T93C46_Clk(np, gpreg);
 }
 
 /*
  *  Send STOP condition to NVRAM - puts NVRAM to sleep... ZZZzzz!!
  */
 static void T93C46_Stop(hcb_p np, u_char *gpreg)
 {
 	*gpreg &= 0xef;
 	OUTB (nc_gpreg, *gpreg);
 	UDELAY (2);
 
 	T93C46_Clk(np, gpreg);
 }
 
 /*
  *  Send read command and address to NVRAM
  */
 static void T93C46_Send_Command(hcb_p np, u_short write_data,
 				u_char *read_bit, u_char *gpreg)
 {
 	int x;
 
 	/* send 9 bits, start bit (1), command (2), address (6)  */
 	for (x = 0; x < 9; x++)
 		T93C46_Write_Bit(np, (u_char) (write_data >> (8 - x)), gpreg);
 
 	*read_bit = INB (nc_gpreg);
 }
 
 /*
  *  READ 2 bytes from the NVRAM
  */
 static void T93C46_Read_Word(hcb_p np, u_short *nvram_data, u_char *gpreg)
 {
 	int x;
 	u_char read_bit;
 
 	*nvram_data = 0;
 	for (x = 0; x < 16; x++) {
 		T93C46_Read_Bit(np, &read_bit, gpreg);
 
 		if (read_bit & 0x01)
 			*nvram_data |=  (0x01 << (15 - x));
 		else
 			*nvram_data &= ~(0x01 << (15 - x));
 	}
 }
 
 /*
  *  Read Tekram NvRAM data.
  */
 static int T93C46_Read_Data(hcb_p np, u_short *data,int len,u_char *gpreg)
 {
 	u_char	read_bit;
 	int	x;
 
 	for (x = 0; x < len; x++)  {
 
 		/* output read command and address */
 		T93C46_Send_Command(np, 0x180 | x, &read_bit, gpreg);
 		if (read_bit & 0x01)
 			return 1; /* Bad */
 		T93C46_Read_Word(np, &data[x], gpreg);
 		T93C46_Stop(np, gpreg);
 	}
 
 	return 0;
 }
 
 /*
  *  Try reading 93C46 Tekram NVRAM.
  */
 static int sym_read_T93C46_nvram (hcb_p np, Tekram_nvram *nvram)
 {
 	u_char gpcntl, gpreg;
 	u_char old_gpcntl, old_gpreg;
 	int retv = 1;
 
 	/* save current state of GPCNTL and GPREG */
 	old_gpreg	= INB (nc_gpreg);
 	old_gpcntl	= INB (nc_gpcntl);
 
 	/* set up GPREG & GPCNTL to set GPIO0/1/2/4 in to known state, 0 in,
 	   1/2/4 out */
 	gpreg = old_gpreg & 0xe9;
 	OUTB (nc_gpreg, gpreg);
 	gpcntl = (old_gpcntl & 0xe9) | 0x09;
 	OUTB (nc_gpcntl, gpcntl);
 
 	/* input all of NVRAM, 64 words */
 	retv = T93C46_Read_Data(np, (u_short *) nvram,
 				sizeof(*nvram) / sizeof(short), &gpreg);
 
 	/* return GPIO0/1/2/4 to original states after having accessed NVRAM */
 	OUTB (nc_gpcntl, old_gpcntl);
 	OUTB (nc_gpreg,  old_gpreg);
 
 	return retv;
 }
 
 /*
  *  Try reading Tekram NVRAM.
  *  Return 0 if OK.
  */
 static int sym_read_Tekram_nvram (hcb_p np, Tekram_nvram *nvram)
 {
 	u_char *data = (u_char *) nvram;
 	int len = sizeof(*nvram);
 	u_short	csum;
 	int x;
 
 	switch (np->device_id) {
 	case PCI_ID_SYM53C885:
 	case PCI_ID_SYM53C895:
 	case PCI_ID_SYM53C896:
 		x = sym_read_S24C16_nvram(np, TEKRAM_24C16_NVRAM_ADDRESS,
 					  data, len);
 		break;
 	case PCI_ID_SYM53C875:
 		x = sym_read_S24C16_nvram(np, TEKRAM_24C16_NVRAM_ADDRESS,
 					  data, len);
 		if (!x)
 			break;
 	default:
 		x = sym_read_T93C46_nvram(np, nvram);
 		break;
 	}
 	if (x)
 		return 1;
 
 	/* verify checksum */
 	for (x = 0, csum = 0; x < len - 1; x += 2)
 		csum += data[x] + (data[x+1] << 8);
 	if (csum != 0x1234)
 		return 1;
 
 	return 0;
 }
 
 #endif	/* SYM_CONF_NVRAM_SUPPORT */
Index: head/sys/kern/subr_devmap.c
===================================================================
--- head/sys/kern/subr_devmap.c	(revision 322167)
+++ head/sys/kern/subr_devmap.c	(revision 322168)
@@ -1,319 +1,319 @@
 /*-
  * Copyright (c) 2013 Ian Lepore <ian@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* Routines for mapping device memory. */
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devmap.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <machine/vmparam.h>
 
 static const struct devmap_entry *devmap_table;
 static boolean_t devmap_bootstrap_done = false;
 
 /*
  * The allocated-kva (akva) devmap table and metadata.  Platforms can call
  * devmap_add_entry() to add static device mappings to this table using
  * automatically allocated virtual addresses carved out of the top of kva space.
  * Allocation begins immediately below the ARM_VECTORS_HIGH address.
  */
 #define	AKVA_DEVMAP_MAX_ENTRIES	32
 static struct devmap_entry	akva_devmap_entries[AKVA_DEVMAP_MAX_ENTRIES];
 static u_int			akva_devmap_idx;
 static vm_offset_t		akva_devmap_vaddr = DEVMAP_MAX_VADDR;
 
-#if defined(__aarch64__) || defined(__riscv__)
+#if defined(__aarch64__) || defined(__riscv)
 extern int early_boot;
 #endif
 
 /*
  * Print the contents of the static mapping table using the provided printf-like
  * output function (which will be either printf or db_printf).
  */
 static void
 devmap_dump_table(int (*prfunc)(const char *, ...))
 {
 	const struct devmap_entry *pd;
 
 	if (devmap_table == NULL || devmap_table[0].pd_size == 0) {
 		prfunc("No static device mappings.\n");
 		return;
 	}
 
 	prfunc("Static device mappings:\n");
 	for (pd = devmap_table; pd->pd_size != 0; ++pd) {
 		prfunc("  0x%08x - 0x%08x mapped at VA 0x%08x\n",
 		    pd->pd_pa, pd->pd_pa + pd->pd_size - 1, pd->pd_va);
 	}
 }
 
 /*
  * Print the contents of the static mapping table.  Used for bootverbose.
  */
 void
 devmap_print_table()
 {
 	devmap_dump_table(printf);
 }
 
 /*
  * Return the "last" kva address used by the registered devmap table.  It's
  * actually the lowest address used by the static mappings, i.e., the address of
  * the first unusable byte of KVA.
  */
 vm_offset_t
 devmap_lastaddr()
 {
 	const struct devmap_entry *pd;
 	vm_offset_t lowaddr;
 
 	if (akva_devmap_idx > 0)
 		return (akva_devmap_vaddr);
 
 	lowaddr = DEVMAP_MAX_VADDR;
 	for (pd = devmap_table; pd != NULL && pd->pd_size != 0; ++pd) {
 		if (lowaddr > pd->pd_va)
 			lowaddr = pd->pd_va;
 	}
 
 	return (lowaddr);
 }
 
 /*
  * Add an entry to the internal "akva" static devmap table using the given
  * physical address and size and a virtual address allocated from the top of
  * kva.  This automatically registers the akva table on the first call, so all a
  * platform has to do is call this routine to install as many mappings as it
  * needs and when initarm() calls devmap_bootstrap() it will pick up all the
  * entries in the akva table automatically.
  */
 void
 devmap_add_entry(vm_paddr_t pa, vm_size_t sz)
 {
 	struct devmap_entry *m;
 
 	if (devmap_bootstrap_done)
 		panic("devmap_add_entry() after devmap_bootstrap()");
 
 	if (akva_devmap_idx == (AKVA_DEVMAP_MAX_ENTRIES - 1))
 		panic("AKVA_DEVMAP_MAX_ENTRIES is too small");
 
 	if (akva_devmap_idx == 0)
 		devmap_register_table(akva_devmap_entries);
 
 	/*
 	 * Allocate virtual address space from the top of kva downwards.  If the
 	 * range being mapped is aligned and sized to 1MB boundaries then also
 	 * align the virtual address to the next-lower 1MB boundary so that we
 	 * end up with a nice efficient section mapping.
 	 */
 #ifdef __arm__
 	if ((pa & 0x000fffff) == 0 && (sz & 0x000fffff) == 0) {
 		akva_devmap_vaddr = trunc_1mpage(akva_devmap_vaddr - sz);
 	} else
 #endif
 	{
 		akva_devmap_vaddr = trunc_page(akva_devmap_vaddr - sz);
 	}
 	m = &akva_devmap_entries[akva_devmap_idx++];
 	m->pd_va    = akva_devmap_vaddr;
 	m->pd_pa    = pa;
 	m->pd_size  = sz;
 }
 
 /*
  * Register the given table as the one to use in devmap_bootstrap().
  */
 void
 devmap_register_table(const struct devmap_entry *table)
 {
 
 	devmap_table = table;
 }
 
 /*
  * Map all of the static regions in the devmap table, and remember the devmap
  * table so the mapdev, ptov, and vtop functions can do lookups later.
  *
  * If a non-NULL table pointer is given it is used unconditionally, otherwise
  * the previously-registered table is used.  This smooths transition from legacy
  * code that fills in a local table then calls this function passing that table,
  * and newer code that uses devmap_register_table() in platform-specific
  * code, then lets the common initarm() call this function with a NULL pointer.
  */
 void
 devmap_bootstrap(vm_offset_t l1pt, const struct devmap_entry *table)
 {
 	const struct devmap_entry *pd;
 
 	devmap_bootstrap_done = true;
 
 	/*
 	 * If given a table pointer, use it.  Otherwise, if a table was
 	 * previously registered, use it.  Otherwise, no work to do.
 	 */
 	if (table != NULL)
 		devmap_table = table;
 	else if (devmap_table == NULL)
 		return;
 
 	for (pd = devmap_table; pd->pd_size != 0; ++pd) {
 #if defined(__arm__)
 #if __ARM_ARCH >= 6
 		pmap_preboot_map_attr(pd->pd_pa, pd->pd_va, pd->pd_size,
 		    VM_PROT_READ | VM_PROT_WRITE, VM_MEMATTR_DEVICE);
 #else
 		pmap_map_chunk(l1pt, pd->pd_va, pd->pd_pa, pd->pd_size,
 		    VM_PROT_READ | VM_PROT_WRITE, PTE_DEVICE);
 #endif
-#elif defined(__aarch64__) || defined(__riscv__)
+#elif defined(__aarch64__) || defined(__riscv)
 		pmap_kenter_device(pd->pd_va, pd->pd_size, pd->pd_pa);
 #endif
 	}
 }
 
 /*
  * Look up the given physical address in the static mapping data and return the
  * corresponding virtual address, or NULL if not found.
  */
 void *
 devmap_ptov(vm_paddr_t pa, vm_size_t size)
 {
 	const struct devmap_entry *pd;
 
 	if (devmap_table == NULL)
 		return (NULL);
 
 	for (pd = devmap_table; pd->pd_size != 0; ++pd) {
 		if (pa >= pd->pd_pa && pa + size <= pd->pd_pa + pd->pd_size)
 			return ((void *)(pd->pd_va + (pa - pd->pd_pa)));
 	}
 
 	return (NULL);
 }
 
 /*
  * Look up the given virtual address in the static mapping data and return the
  * corresponding physical address, or DEVMAP_PADDR_NOTFOUND if not found.
  */
 vm_paddr_t
 devmap_vtop(void * vpva, vm_size_t size)
 {
 	const struct devmap_entry *pd;
 	vm_offset_t va;
 
 	if (devmap_table == NULL)
 		return (DEVMAP_PADDR_NOTFOUND);
 
 	va = (vm_offset_t)vpva;
 	for (pd = devmap_table; pd->pd_size != 0; ++pd) {
 		if (va >= pd->pd_va && va + size <= pd->pd_va + pd->pd_size)
 			return ((vm_paddr_t)(pd->pd_pa + (va - pd->pd_va)));
 	}
 
 	return (DEVMAP_PADDR_NOTFOUND);
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual address space.
  * Return a pointer to where it is mapped.
  *
  * This uses a pre-established static mapping if one exists for the requested
  * range, otherwise it allocates kva space and maps the physical pages into it.
  *
  * This routine is intended to be used for mapping device memory, NOT real
  * memory; the mapping type is inherently VM_MEMATTR_DEVICE in
  * pmap_kenter_device().
  */
 void *
 pmap_mapdev(vm_offset_t pa, vm_size_t size)
 {
 	vm_offset_t va, offset;
 	void * rva;
 
 	/* First look in the static mapping table. */
 	if ((rva = devmap_ptov(pa, size)) != NULL)
 		return (rva);
 
 	offset = pa & PAGE_MASK;
 	pa = trunc_page(pa);
 	size = round_page(size + offset);
 
-#if defined(__aarch64__) || defined(__riscv__)
+#if defined(__aarch64__) || defined(__riscv)
 	if (early_boot) {
 		akva_devmap_vaddr = trunc_page(akva_devmap_vaddr - size);
 		va = akva_devmap_vaddr;
 		KASSERT(va >= VM_MAX_KERNEL_ADDRESS - L2_SIZE,
 		    ("Too many early devmap mappings"));
 	} else
 #endif
 		va = kva_alloc(size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pmap_kenter_device(va, size, pa);
 
 	return ((void *)(va + offset));
 }
 
 /*
  * Unmap device memory and free the kva space.
  */
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t offset;
 
 	/* Nothing to do if we find the mapping in the static table. */
 	if (devmap_vtop((void*)va, size) != DEVMAP_PADDR_NOTFOUND)
 		return;
 
 	offset = va & PAGE_MASK;
 	va = trunc_page(va);
 	size = round_page(size + offset);
 
 	pmap_kremove_device(va, size);
 	kva_free(va, size);
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(devmap, db_show_devmap)
 {
 	devmap_dump_table(db_printf);
 }
 
 #endif /* DDB */
 
Index: head/sys/modules/dtrace/dtraceall/dtraceall.c
===================================================================
--- head/sys/modules/dtrace/dtraceall/dtraceall.c	(revision 322167)
+++ head/sys/modules/dtrace/dtraceall/dtraceall.c	(revision 322168)
@@ -1,84 +1,84 @@
 /*
  * Copyright (C) 2008 John Birrell <jb@freebsd.org>
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/errno.h>
 #include "opt_compat.h"
 #include "opt_nfs.h"
 
 static int
 dtraceall_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		break;
 
 	case MOD_SHUTDOWN:
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 
 	}
 
 	return (error);
 }
 
 DEV_MODULE(dtraceall, dtraceall_modevent, NULL);
 MODULE_VERSION(dtraceall, 1);
 
 /* All the DTrace modules should be dependencies here: */
 MODULE_DEPEND(dtraceall, opensolaris, 1, 1, 1);
 MODULE_DEPEND(dtraceall, dtrace, 1, 1, 1);
 MODULE_DEPEND(dtraceall, dtmalloc, 1, 1, 1);
 #if defined(NFSCL)
 MODULE_DEPEND(dtraceall, dtnfscl, 1, 1, 1);
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
-    defined(__i386__) || defined(__powerpc__) || defined(__riscv__)
+    defined(__i386__) || defined(__powerpc__) || defined(__riscv)
 MODULE_DEPEND(dtraceall, fbt, 1, 1, 1);
 #endif
 #if defined(__amd64__) || defined(__i386__)
 MODULE_DEPEND(dtraceall, fasttrap, 1, 1, 1);
 #endif
 MODULE_DEPEND(dtraceall, sdt, 1, 1, 1);
 MODULE_DEPEND(dtraceall, systrace, 1, 1, 1);
 #if defined(COMPAT_FREEBSD32)
 MODULE_DEPEND(dtraceall, systrace_freebsd32, 1, 1, 1);
 #endif
 MODULE_DEPEND(dtraceall, profile, 1, 1, 1);
Index: head/sys/sys/cdefs.h
===================================================================
--- head/sys/sys/cdefs.h	(revision 322167)
+++ head/sys/sys/cdefs.h	(revision 322168)
@@ -1,887 +1,887 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Berkeley Software Design, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cdefs.h	8.8 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef	_SYS_CDEFS_H_
 #define	_SYS_CDEFS_H_
 
 /*
  * Testing against Clang-specific extensions.
  */
 #ifndef	__has_attribute
 #define	__has_attribute(x)	0
 #endif
 #ifndef	__has_extension
 #define	__has_extension		__has_feature
 #endif
 #ifndef	__has_feature
 #define	__has_feature(x)	0
 #endif
 #ifndef	__has_include
 #define	__has_include(x)	0
 #endif
 #ifndef	__has_builtin
 #define	__has_builtin(x)	0
 #endif
 
 #if defined(__cplusplus)
 #define	__BEGIN_DECLS	extern "C" {
 #define	__END_DECLS	}
 #else
 #define	__BEGIN_DECLS
 #define	__END_DECLS
 #endif
 
 /*
  * This code has been put in place to help reduce the addition of
  * compiler specific defines in FreeBSD code.  It helps to aid in
  * having a compiler-agnostic source tree.
  */
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #if __GNUC__ >= 3 || defined(__INTEL_COMPILER)
 #define	__GNUCLIKE_ASM 3
 #define	__GNUCLIKE_MATH_BUILTIN_CONSTANTS
 #else
 #define	__GNUCLIKE_ASM 2
 #endif
 #define	__GNUCLIKE___TYPEOF 1
 #define	__GNUCLIKE___OFFSETOF 1
 #define	__GNUCLIKE___SECTION 1
 
 #ifndef __INTEL_COMPILER
 #define	__GNUCLIKE_CTOR_SECTION_HANDLING 1
 #endif
 
 #define	__GNUCLIKE_BUILTIN_CONSTANT_P 1
 #if defined(__INTEL_COMPILER) && defined(__cplusplus) && \
    __INTEL_COMPILER < 800
 #undef __GNUCLIKE_BUILTIN_CONSTANT_P
 #endif
 
 #if (__GNUC_MINOR__ > 95 || __GNUC__ >= 3)
 #define	__GNUCLIKE_BUILTIN_VARARGS 1
 #define	__GNUCLIKE_BUILTIN_STDARG 1
 #define	__GNUCLIKE_BUILTIN_VAALIST 1
 #endif
 
 #if defined(__GNUC__)
 #define	__GNUC_VA_LIST_COMPATIBILITY 1
 #endif
 
 /*
  * Compiler memory barriers, specific to gcc and clang.
  */
 #if defined(__GNUC__)
 #define	__compiler_membar()	__asm __volatile(" " : : : "memory")
 #endif
 
 #ifndef __INTEL_COMPILER
 #define	__GNUCLIKE_BUILTIN_NEXT_ARG 1
 #define	__GNUCLIKE_MATH_BUILTIN_RELOPS
 #endif
 
 #define	__GNUCLIKE_BUILTIN_MEMCPY 1
 
 /* XXX: if __GNUC__ >= 2: not tested everywhere originally, where replaced */
 #define	__CC_SUPPORTS_INLINE 1
 #define	__CC_SUPPORTS___INLINE 1
 #define	__CC_SUPPORTS___INLINE__ 1
 
 #define	__CC_SUPPORTS___FUNC__ 1
 #define	__CC_SUPPORTS_WARNING 1
 
 #define	__CC_SUPPORTS_VARADIC_XXX 1 /* see varargs.h */
 
 #define	__CC_SUPPORTS_DYNAMIC_ARRAY_INIT 1
 
 #endif /* __GNUC__ || __INTEL_COMPILER */
 
 /*
  * Macro to test if we're using a specific version of gcc or later.
  */
 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #define	__GNUC_PREREQ__(ma, mi)	\
 	(__GNUC__ > (ma) || __GNUC__ == (ma) && __GNUC_MINOR__ >= (mi))
 #else
 #define	__GNUC_PREREQ__(ma, mi)	0
 #endif
 
 /*
  * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
  * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
  * The __CONCAT macro is a bit tricky to use if it must work in non-ANSI
  * mode -- there must be no spaces between its arguments, and for nested
  * __CONCAT's, all the __CONCAT's must be at the left.  __CONCAT can also
  * concatenate double-quoted strings produced by the __STRING macro, but
  * this only works with ANSI C.
  *
  * __XSTRING is like __STRING, but it expands any macros in its argument
  * first.  It is only available with ANSI C.
  */
 #if defined(__STDC__) || defined(__cplusplus)
 #define	__P(protos)	protos		/* full-blown ANSI C */
 #define	__CONCAT1(x,y)	x ## y
 #define	__CONCAT(x,y)	__CONCAT1(x,y)
 #define	__STRING(x)	#x		/* stringify without expanding x */
 #define	__XSTRING(x)	__STRING(x)	/* expand x, then stringify */
 
 #define	__const		const		/* define reserved names to standard */
 #define	__signed	signed
 #define	__volatile	volatile
 #if defined(__cplusplus)
 #define	__inline	inline		/* convert to C++ keyword */
 #else
 #if !(defined(__CC_SUPPORTS___INLINE))
 #define	__inline			/* delete GCC keyword */
 #endif /* ! __CC_SUPPORTS___INLINE */
 #endif /* !__cplusplus */
 
 #else	/* !(__STDC__ || __cplusplus) */
 #define	__P(protos)	()		/* traditional C preprocessor */
 #define	__CONCAT(x,y)	x/**/y
 #define	__STRING(x)	"x"
 
 #if !defined(__CC_SUPPORTS___INLINE)
 #define	__const				/* delete pseudo-ANSI C keywords */
 #define	__inline
 #define	__signed
 #define	__volatile
 /*
  * In non-ANSI C environments, new programs will want ANSI-only C keywords
  * deleted from the program and old programs will want them left alone.
  * When using a compiler other than gcc, programs using the ANSI C keywords
  * const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
  * When using "gcc -traditional", we assume that this is the intent; if
  * __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
  */
 #ifndef	NO_ANSI_KEYWORDS
 #define	const				/* delete ANSI C keywords */
 #define	inline
 #define	signed
 #define	volatile
 #endif	/* !NO_ANSI_KEYWORDS */
 #endif	/* !__CC_SUPPORTS___INLINE */
 #endif	/* !(__STDC__ || __cplusplus) */
 
 /*
  * Compiler-dependent macros to help declare dead (non-returning) and
  * pure (no side effects) functions, and unused variables.  They are
  * null except for versions of gcc that are known to support the features
  * properly (old versions of gcc-2 supported the dead and pure features
  * in a different (wrong) way).  If we do not provide an implementation
  * for a given compiler, let the compile fail if it is told to use
  * a feature that we cannot live without.
  */
 #ifdef lint
 #define	__dead2
 #define	__pure2
 #define	__unused
 #define	__packed
 #define	__aligned(x)
 #define	__alloc_align(x)
 #define	__alloc_size(x)
 #define	__section(x)
 #define	__weak_symbol
 #else
 #define	__weak_symbol	__attribute__((__weak__))
 #if !__GNUC_PREREQ__(2, 5) && !defined(__INTEL_COMPILER)
 #define	__dead2
 #define	__pure2
 #define	__unused
 #endif
 #if __GNUC__ == 2 && __GNUC_MINOR__ >= 5 && __GNUC_MINOR__ < 7 && !defined(__INTEL_COMPILER)
 #define	__dead2		__attribute__((__noreturn__))
 #define	__pure2		__attribute__((__const__))
 #define	__unused
 /* XXX Find out what to do for __packed, __aligned and __section */
 #endif
 #if __GNUC_PREREQ__(2, 7) || defined(__INTEL_COMPILER)
 #define	__dead2		__attribute__((__noreturn__))
 #define	__pure2		__attribute__((__const__))
 #define	__unused	__attribute__((__unused__))
 #define	__used		__attribute__((__used__))
 #define	__packed	__attribute__((__packed__))
 #define	__aligned(x)	__attribute__((__aligned__(x)))
 #define	__section(x)	__attribute__((__section__(x)))
 #endif
 #if __GNUC_PREREQ__(4, 3) || __has_attribute(__alloc_size__)
 #define	__alloc_size(x)	__attribute__((__alloc_size__(x)))
 #else
 #define	__alloc_size(x)
 #endif
 #if __GNUC_PREREQ__(4, 9) || __has_attribute(__alloc_align__)
 #define	__alloc_align(x)	__attribute__((__alloc_align__(x)))
 #else
 #define	__alloc_align(x)
 #endif
 #endif /* lint */
 
 #if !__GNUC_PREREQ__(2, 95)
 #define	__alignof(x)	__offsetof(struct { char __a; x __b; }, __b)
 #endif
 
 /*
  * Keywords added in C11.
  */
 
 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L || defined(lint)
 
 #if !__has_extension(c_alignas)
 #if (defined(__cplusplus) && __cplusplus >= 201103L) || \
     __has_extension(cxx_alignas)
 #define	_Alignas(x)		alignas(x)
 #else
 /* XXX: Only emulates _Alignas(constant-expression); not _Alignas(type-name). */
 #define	_Alignas(x)		__aligned(x)
 #endif
 #endif
 
 #if defined(__cplusplus) && __cplusplus >= 201103L
 #define	_Alignof(x)		alignof(x)
 #else
 #define	_Alignof(x)		__alignof(x)
 #endif
 
 #if !defined(__cplusplus) && !__has_extension(c_atomic) && \
     !__has_extension(cxx_atomic)
 /*
  * No native support for _Atomic(). Place object in structure to prevent
  * most forms of direct non-atomic access.
  */
 #define	_Atomic(T)		struct { T volatile __val; }
 #endif
 
 #if defined(__cplusplus) && __cplusplus >= 201103L
 #define	_Noreturn		[[noreturn]]
 #else
 #define	_Noreturn		__dead2
 #endif
 
 #if !__has_extension(c_static_assert)
 #if (defined(__cplusplus) && __cplusplus >= 201103L) || \
     __has_extension(cxx_static_assert)
 #define	_Static_assert(x, y)	static_assert(x, y)
 #elif __GNUC_PREREQ__(4,6)
 /* Nothing, gcc 4.6 and higher has _Static_assert built-in */
 #elif defined(__COUNTER__)
 #define	_Static_assert(x, y)	__Static_assert(x, __COUNTER__)
 #define	__Static_assert(x, y)	___Static_assert(x, y)
 #define	___Static_assert(x, y)	typedef char __assert_ ## y[(x) ? 1 : -1] \
 				__unused
 #else
 #define	_Static_assert(x, y)	struct __hack
 #endif
 #endif
 
 #if !__has_extension(c_thread_local)
 /*
  * XXX: Some compilers (Clang 3.3, GCC 4.7) falsely announce C++11 mode
  * without actually supporting the thread_local keyword. Don't check for
  * the presence of C++11 when defining _Thread_local.
  */
 #if /* (defined(__cplusplus) && __cplusplus >= 201103L) || */ \
     __has_extension(cxx_thread_local)
 #define	_Thread_local		thread_local
 #else
 #define	_Thread_local		__thread
 #endif
 #endif
 
 #endif /* __STDC_VERSION__ || __STDC_VERSION__ < 201112L */
 
 /*
  * Emulation of C11 _Generic().  Unlike the previously defined C11
  * keywords, it is not possible to implement this using exactly the same
  * syntax.  Therefore implement something similar under the name
  * __generic().  Unlike _Generic(), this macro can only distinguish
  * between a single type, so it requires nested invocations to
  * distinguish multiple cases.
  */
 
 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
     __has_extension(c_generic_selections)
 #define	__generic(expr, t, yes, no)					\
 	_Generic(expr, t: yes, default: no)
 #elif __GNUC_PREREQ__(3, 1) && !defined(__cplusplus)
 #define	__generic(expr, t, yes, no)					\
 	__builtin_choose_expr(						\
 	    __builtin_types_compatible_p(__typeof(expr), t), yes, no)
 #endif
 
 /*
  * C99 Static array indices in function parameter declarations.  Syntax such as:
  * void bar(int myArray[static 10]);
  * is allowed in C99 but not in C++.  Define __min_size appropriately so
  * headers using it can be compiled in either language.  Use like this:
  * void bar(int myArray[__min_size(10)]);
  */
 #if !defined(__cplusplus) && \
     (defined(__clang__) || __GNUC_PREREQ__(4, 6)) && \
     (!defined(__STDC_VERSION__) || (__STDC_VERSION__ >= 199901))
 #define __min_size(x)	static (x)
 #else
 #define __min_size(x)	(x)
 #endif
 
 #if __GNUC_PREREQ__(2, 96)
 #define	__malloc_like	__attribute__((__malloc__))
 #define	__pure		__attribute__((__pure__))
 #else
 #define	__malloc_like
 #define	__pure
 #endif
 
 #if __GNUC_PREREQ__(3, 1) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 800)
 #define	__always_inline	__attribute__((__always_inline__))
 #else
 #define	__always_inline
 #endif
 
 #if __GNUC_PREREQ__(3, 1)
 #define	__noinline	__attribute__ ((__noinline__))
 #else
 #define	__noinline
 #endif
 
 #if __GNUC_PREREQ__(3, 4)
 #define	__fastcall	__attribute__((__fastcall__))
 #define	__result_use_check	__attribute__((__warn_unused_result__))
 #else
 #define	__fastcall
 #define	__result_use_check
 #endif
 
 #if __GNUC_PREREQ__(4, 1)
 #define	__returns_twice	__attribute__((__returns_twice__))
 #else
 #define	__returns_twice
 #endif
 
 #if __GNUC_PREREQ__(4, 6) || __has_builtin(__builtin_unreachable)
 #define	__unreachable()	__builtin_unreachable()
 #else
 #define	__unreachable()	((void)0)
 #endif
 
 /* XXX: should use `#if __STDC_VERSION__ < 199901'. */
 #if !__GNUC_PREREQ__(2, 7) && !defined(__INTEL_COMPILER)
 #define	__func__	NULL
 #endif
 
 #if (defined(__INTEL_COMPILER) || (defined(__GNUC__) && __GNUC__ >= 2)) && !defined(__STRICT_ANSI__) || __STDC_VERSION__ >= 199901
 #define	__LONG_LONG_SUPPORTED
 #endif
 
 /* C++11 exposes a load of C99 stuff */
 #if defined(__cplusplus) && __cplusplus >= 201103L
 #define	__LONG_LONG_SUPPORTED
 #ifndef	__STDC_LIMIT_MACROS
 #define	__STDC_LIMIT_MACROS
 #endif
 #ifndef	__STDC_CONSTANT_MACROS
 #define	__STDC_CONSTANT_MACROS
 #endif
 #endif
 
 /*
  * GCC 2.95 provides `__restrict' as an extension to C90 to support the
  * C99-specific `restrict' type qualifier.  We happen to use `__restrict' as
  * a way to define the `restrict' type qualifier without disturbing older
  * software that is unaware of C99 keywords.
  */
 #if !(__GNUC__ == 2 && __GNUC_MINOR__ == 95)
 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901 || defined(lint)
 #define	__restrict
 #else
 #define	__restrict	restrict
 #endif
 #endif
 
 /*
  * GNU C version 2.96 adds explicit branch prediction so that
  * the CPU back-end can hint the processor and also so that
  * code blocks can be reordered such that the predicted path
  * sees a more linear flow, thus improving cache behavior, etc.
  *
  * The following two macros provide us with a way to utilize this
  * compiler feature.  Use __predict_true() if you expect the expression
  * to evaluate to true, and __predict_false() if you expect the
  * expression to evaluate to false.
  *
  * A few notes about usage:
  *
  *	* Generally, __predict_false() error condition checks (unless
  *	  you have some _strong_ reason to do otherwise, in which case
  *	  document it), and/or __predict_true() `no-error' condition
  *	  checks, assuming you want to optimize for the no-error case.
  *
  *	* Other than that, if you don't know the likelihood of a test
  *	  succeeding from empirical or other `hard' evidence, don't
  *	  make predictions.
  *
  *	* These are meant to be used in places that are run `a lot'.
  *	  It is wasteful to make predictions in code that is run
  *	  seldomly (e.g. at subsystem initialization time) as the
  *	  basic block reordering that this affects can often generate
  *	  larger code.
  */
 #if __GNUC_PREREQ__(2, 96)
 #define	__predict_true(exp)     __builtin_expect((exp), 1)
 #define	__predict_false(exp)    __builtin_expect((exp), 0)
 #else
 #define	__predict_true(exp)     (exp)
 #define	__predict_false(exp)    (exp)
 #endif
 
 #if __GNUC_PREREQ__(4, 0)
 #define	__null_sentinel	__attribute__((__sentinel__))
 #define	__exported	__attribute__((__visibility__("default")))
 #define	__hidden	__attribute__((__visibility__("hidden")))
 #else
 #define	__null_sentinel
 #define	__exported
 #define	__hidden
 #endif
 
 /*
  * We define this here since <stddef.h>, <sys/queue.h>, and <sys/types.h>
  * require it.
  */
 #if __GNUC_PREREQ__(4, 1)
 #define	__offsetof(type, field)	 __builtin_offsetof(type, field)
 #else
 #ifndef __cplusplus
 #define	__offsetof(type, field) \
 	((__size_t)(__uintptr_t)((const volatile void *)&((type *)0)->field))
 #else
 #define	__offsetof(type, field)					\
   (__offsetof__ (reinterpret_cast <__size_t>			\
                  (&reinterpret_cast <const volatile char &>	\
                   (static_cast<type *> (0)->field))))
 #endif
 #endif
 #define	__rangeof(type, start, end) \
 	(__offsetof(type, end) - __offsetof(type, start))
 
 /*
  * Given the pointer x to the member m of the struct s, return
  * a pointer to the containing structure.  When using GCC, we first
  * assign pointer x to a local variable, to check that its type is
  * compatible with member m.
  */
 #if __GNUC_PREREQ__(3, 1)
 #define	__containerof(x, s, m) ({					\
 	const volatile __typeof(((s *)0)->m) *__x = (x);		\
 	__DEQUALIFY(s *, (const volatile char *)__x - __offsetof(s, m));\
 })
 #else
 #define	__containerof(x, s, m)						\
 	__DEQUALIFY(s *, (const volatile char *)(x) - __offsetof(s, m))
 #endif
 
 /*
  * Compiler-dependent macros to declare that functions take printf-like
  * or scanf-like arguments.  They are null except for versions of gcc
  * that are known to support the features properly (old versions of gcc-2
  * didn't permit keeping the keywords out of the application namespace).
  */
 #if !__GNUC_PREREQ__(2, 7) && !defined(__INTEL_COMPILER)
 #define	__printflike(fmtarg, firstvararg)
 #define	__scanflike(fmtarg, firstvararg)
 #define	__format_arg(fmtarg)
 #define	__strfmonlike(fmtarg, firstvararg)
 #define	__strftimelike(fmtarg, firstvararg)
 #else
 #define	__printflike(fmtarg, firstvararg) \
 	    __attribute__((__format__ (__printf__, fmtarg, firstvararg)))
 #define	__scanflike(fmtarg, firstvararg) \
 	    __attribute__((__format__ (__scanf__, fmtarg, firstvararg)))
 #define	__format_arg(fmtarg)	__attribute__((__format_arg__ (fmtarg)))
 #define	__strfmonlike(fmtarg, firstvararg) \
 	    __attribute__((__format__ (__strfmon__, fmtarg, firstvararg)))
 #define	__strftimelike(fmtarg, firstvararg) \
 	    __attribute__((__format__ (__strftime__, fmtarg, firstvararg)))
 #endif
 
 /* Compiler-dependent macros that rely on FreeBSD-specific extensions. */
 #if defined(__FreeBSD_cc_version) && __FreeBSD_cc_version >= 300001 && \
     defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #define	__printf0like(fmtarg, firstvararg) \
 	    __attribute__((__format__ (__printf0__, fmtarg, firstvararg)))
 #else
 #define	__printf0like(fmtarg, firstvararg)
 #endif
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 #ifndef __INTEL_COMPILER
 #define	__strong_reference(sym,aliassym)	\
 	extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym)))
 #endif
 #ifdef __STDC__
 #define	__weak_reference(sym,alias)	\
 	__asm__(".weak " #alias);	\
 	__asm__(".equ "  #alias ", " #sym)
 #define	__warn_references(sym,msg)	\
 	__asm__(".section .gnu.warning." #sym);	\
 	__asm__(".asciz \"" msg "\"");	\
 	__asm__(".previous")
 #define	__sym_compat(sym,impl,verid)	\
 	__asm__(".symver " #impl ", " #sym "@" #verid)
 #define	__sym_default(sym,impl,verid)	\
 	__asm__(".symver " #impl ", " #sym "@@" #verid)
 #else
 #define	__weak_reference(sym,alias)	\
 	__asm__(".weak alias");		\
 	__asm__(".equ alias, sym")
 #define	__warn_references(sym,msg)	\
 	__asm__(".section .gnu.warning.sym"); \
 	__asm__(".asciz \"msg\"");	\
 	__asm__(".previous")
 #define	__sym_compat(sym,impl,verid)	\
 	__asm__(".symver impl, sym@verid")
 #define	__sym_default(impl,sym,verid)	\
 	__asm__(".symver impl, sym@@verid")
 #endif	/* __STDC__ */
 #endif	/* __GNUC__ || __INTEL_COMPILER */
 
 #define	__GLOBL1(sym)	__asm__(".globl " #sym)
 #define	__GLOBL(sym)	__GLOBL1(sym)
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 #define	__IDSTRING(name,string)	__asm__(".ident\t\"" string "\"")
 #else
 /*
  * The following definition might not work well if used in header files,
  * but it should be better than nothing.  If you want a "do nothing"
  * version, then it should generate some harmless declaration, such as:
  *    #define	__IDSTRING(name,string)	struct __hack
  */
 #define	__IDSTRING(name,string)	static const char name[] __unused = string
 #endif
 
 /*
  * Embed the rcs id of a source file in the resulting library.  Note that in
  * more recent ELF binutils, we use .ident allowing the ID to be stripped.
  * Usage:
  *	__FBSDID("$FreeBSD$");
  */
 #ifndef	__FBSDID
 #if !defined(lint) && !defined(STRIP_FBSDID)
 #define	__FBSDID(s)	__IDSTRING(__CONCAT(__rcsid_,__LINE__),s)
 #else
 #define	__FBSDID(s)	struct __hack
 #endif
 #endif
 
 #ifndef	__RCSID
 #ifndef	NO__RCSID
 #define	__RCSID(s)	__IDSTRING(__CONCAT(__rcsid_,__LINE__),s)
 #else
 #define	__RCSID(s)	struct __hack
 #endif
 #endif
 
 #ifndef	__RCSID_SOURCE
 #ifndef	NO__RCSID_SOURCE
 #define	__RCSID_SOURCE(s)	__IDSTRING(__CONCAT(__rcsid_source_,__LINE__),s)
 #else
 #define	__RCSID_SOURCE(s)	struct __hack
 #endif
 #endif
 
 #ifndef	__SCCSID
 #ifndef	NO__SCCSID
 #define	__SCCSID(s)	__IDSTRING(__CONCAT(__sccsid_,__LINE__),s)
 #else
 #define	__SCCSID(s)	struct __hack
 #endif
 #endif
 
 #ifndef	__COPYRIGHT
 #ifndef	NO__COPYRIGHT
 #define	__COPYRIGHT(s)	__IDSTRING(__CONCAT(__copyright_,__LINE__),s)
 #else
 #define	__COPYRIGHT(s)	struct __hack
 #endif
 #endif
 
 #ifndef	__DECONST
 #define	__DECONST(type, var)	((type)(__uintptr_t)(const void *)(var))
 #endif
 
 #ifndef	__DEVOLATILE
 #define	__DEVOLATILE(type, var)	((type)(__uintptr_t)(volatile void *)(var))
 #endif
 
 #ifndef	__DEQUALIFY
 #define	__DEQUALIFY(type, var)	((type)(__uintptr_t)(const volatile void *)(var))
 #endif
 
 /*-
  * The following definitions are an extension of the behavior originally
  * implemented in <sys/_posix.h>, but with a different level of granularity.
  * POSIX.1 requires that the macros we test be defined before any standard
  * header file is included.
  *
  * Here's a quick run-down of the versions:
  *  defined(_POSIX_SOURCE)		1003.1-1988
  *  _POSIX_C_SOURCE == 1		1003.1-1990
  *  _POSIX_C_SOURCE == 2		1003.2-1992 C Language Binding Option
  *  _POSIX_C_SOURCE == 199309		1003.1b-1993
  *  _POSIX_C_SOURCE == 199506		1003.1c-1995, 1003.1i-1995,
  *					and the omnibus ISO/IEC 9945-1: 1996
  *  _POSIX_C_SOURCE == 200112		1003.1-2001
  *  _POSIX_C_SOURCE == 200809		1003.1-2008
  *
  * In addition, the X/Open Portability Guide, which is now the Single UNIX
  * Specification, defines a feature-test macro which indicates the version of
  * that specification, and which subsumes _POSIX_C_SOURCE.
  *
  * Our macros begin with two underscores to avoid namespace screwage.
  */
 
 /* Deal with IEEE Std. 1003.1-1990, in which _POSIX_C_SOURCE == 1. */
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE == 1
 #undef _POSIX_C_SOURCE		/* Probably illegal, but beyond caring now. */
 #define	_POSIX_C_SOURCE		199009
 #endif
 
 /* Deal with IEEE Std. 1003.2-1992, in which _POSIX_C_SOURCE == 2. */
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE == 2
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		199209
 #endif
 
 /* Deal with various X/Open Portability Guides and Single UNIX Spec. */
 #ifdef _XOPEN_SOURCE
 #if _XOPEN_SOURCE - 0 >= 700
 #define	__XSI_VISIBLE		700
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		200809
 #elif _XOPEN_SOURCE - 0 >= 600
 #define	__XSI_VISIBLE		600
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		200112
 #elif _XOPEN_SOURCE - 0 >= 500
 #define	__XSI_VISIBLE		500
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		199506
 #endif
 #endif
 
 /*
  * Deal with all versions of POSIX.  The ordering relative to the tests above is
  * important.
  */
 #if defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE)
 #define	_POSIX_C_SOURCE		198808
 #endif
 #ifdef _POSIX_C_SOURCE
 #if _POSIX_C_SOURCE >= 200809
 #define	__POSIX_VISIBLE		200809
 #define	__ISO_C_VISIBLE		1999
 #elif _POSIX_C_SOURCE >= 200112
 #define	__POSIX_VISIBLE		200112
 #define	__ISO_C_VISIBLE		1999
 #elif _POSIX_C_SOURCE >= 199506
 #define	__POSIX_VISIBLE		199506
 #define	__ISO_C_VISIBLE		1990
 #elif _POSIX_C_SOURCE >= 199309
 #define	__POSIX_VISIBLE		199309
 #define	__ISO_C_VISIBLE		1990
 #elif _POSIX_C_SOURCE >= 199209
 #define	__POSIX_VISIBLE		199209
 #define	__ISO_C_VISIBLE		1990
 #elif _POSIX_C_SOURCE >= 199009
 #define	__POSIX_VISIBLE		199009
 #define	__ISO_C_VISIBLE		1990
 #else
 #define	__POSIX_VISIBLE		198808
 #define	__ISO_C_VISIBLE		0
 #endif /* _POSIX_C_SOURCE */
 #else
 /*-
  * Deal with _ANSI_SOURCE:
  * If it is defined, and no other compilation environment is explicitly
  * requested, then define our internal feature-test macros to zero.  This
  * makes no difference to the preprocessor (undefined symbols in preprocessing
  * expressions are defined to have value zero), but makes it more convenient for
  * a test program to print out the values.
  *
  * If a program mistakenly defines _ANSI_SOURCE and some other macro such as
  * _POSIX_C_SOURCE, we will assume that it wants the broader compilation
  * environment (and in fact we will never get here).
  */
 #if defined(_ANSI_SOURCE)	/* Hide almost everything. */
 #define	__POSIX_VISIBLE		0
 #define	__XSI_VISIBLE		0
 #define	__BSD_VISIBLE		0
 #define	__ISO_C_VISIBLE		1990
 #define	__EXT1_VISIBLE		0
 #elif defined(_C99_SOURCE)	/* Localism to specify strict C99 env. */
 #define	__POSIX_VISIBLE		0
 #define	__XSI_VISIBLE		0
 #define	__BSD_VISIBLE		0
 #define	__ISO_C_VISIBLE		1999
 #define	__EXT1_VISIBLE		0
 #elif defined(_C11_SOURCE)	/* Localism to specify strict C11 env. */
 #define	__POSIX_VISIBLE		0
 #define	__XSI_VISIBLE		0
 #define	__BSD_VISIBLE		0
 #define	__ISO_C_VISIBLE		2011
 #define	__EXT1_VISIBLE		0
 #else				/* Default environment: show everything. */
 #define	__POSIX_VISIBLE		200809
 #define	__XSI_VISIBLE		700
 #define	__BSD_VISIBLE		1
 #define	__ISO_C_VISIBLE		2011
 #define	__EXT1_VISIBLE		1
 #endif
 #endif
 
 /* User override __EXT1_VISIBLE */
 #if defined(__STDC_WANT_LIB_EXT1__)
 #undef	__EXT1_VISIBLE
 #if __STDC_WANT_LIB_EXT1__
 #define	__EXT1_VISIBLE		1
 #else
 #define	__EXT1_VISIBLE		0
 #endif
 #endif /* __STDC_WANT_LIB_EXT1__ */
 
-#if defined(__mips) || defined(__powerpc64__) || defined(__riscv__)
+#if defined(__mips) || defined(__powerpc64__) || defined(__riscv)
 #define	__NO_TLS 1
 #endif
 
 /*
  * Old versions of GCC use non-standard ARM arch symbols; acle-compat.h
  * translates them to __ARM_ARCH and the modern feature symbols defined by ARM.
  */
 #if defined(__arm__) && !defined(__ARM_ARCH)
 #include <machine/acle-compat.h>
 #endif
 
 /*
  * Nullability qualifiers: currently only supported by Clang.
  */
 #if !(defined(__clang__) && __has_feature(nullability))
 #define	_Nonnull
 #define	_Nullable
 #define	_Null_unspecified
 #define	__NULLABILITY_PRAGMA_PUSH
 #define	__NULLABILITY_PRAGMA_POP
 #else
 #define	__NULLABILITY_PRAGMA_PUSH _Pragma("clang diagnostic push")	\
 	_Pragma("clang diagnostic ignored \"-Wnullability-completeness\"")
 #define	__NULLABILITY_PRAGMA_POP _Pragma("clang diagnostic pop")
 #endif
 
 /*
  * Type Safety Checking
  *
  * Clang provides additional attributes to enable checking type safety
  * properties that cannot be enforced by the C type system. 
  */
 
 #if __has_attribute(__argument_with_type_tag__) && \
     __has_attribute(__type_tag_for_datatype__) && !defined(lint)
 #define	__arg_type_tag(arg_kind, arg_idx, type_tag_idx) \
 	    __attribute__((__argument_with_type_tag__(arg_kind, arg_idx, type_tag_idx)))
 #define	__datatype_type_tag(kind, type) \
 	    __attribute__((__type_tag_for_datatype__(kind, type)))
 #else
 #define	__arg_type_tag(arg_kind, arg_idx, type_tag_idx)
 #define	__datatype_type_tag(kind, type)
 #endif
 
 /*
  * Lock annotations.
  *
  * Clang provides support for doing basic thread-safety tests at
  * compile-time, by marking which locks will/should be held when
  * entering/leaving a functions.
  *
  * Furthermore, it is also possible to annotate variables and structure
  * members to enforce that they are only accessed when certain locks are
  * held.
  */
 
 #if __has_extension(c_thread_safety_attributes)
 #define	__lock_annotate(x)	__attribute__((x))
 #else
 #define	__lock_annotate(x)
 #endif
 
 /* Structure implements a lock. */
 #define	__lockable		__lock_annotate(lockable)
 
 /* Function acquires an exclusive or shared lock. */
 #define	__locks_exclusive(...) \
 	__lock_annotate(exclusive_lock_function(__VA_ARGS__))
 #define	__locks_shared(...) \
 	__lock_annotate(shared_lock_function(__VA_ARGS__))
 
 /* Function attempts to acquire an exclusive or shared lock. */
 #define	__trylocks_exclusive(...) \
 	__lock_annotate(exclusive_trylock_function(__VA_ARGS__))
 #define	__trylocks_shared(...) \
 	__lock_annotate(shared_trylock_function(__VA_ARGS__))
 
 /* Function releases a lock. */
 #define	__unlocks(...)		__lock_annotate(unlock_function(__VA_ARGS__))
 
 /* Function asserts that an exclusive or shared lock is held. */
 #define	__asserts_exclusive(...) \
 	__lock_annotate(assert_exclusive_lock(__VA_ARGS__))
 #define	__asserts_shared(...) \
 	__lock_annotate(assert_shared_lock(__VA_ARGS__))
 
 /* Function requires that an exclusive or shared lock is or is not held. */
 #define	__requires_exclusive(...) \
 	__lock_annotate(exclusive_locks_required(__VA_ARGS__))
 #define	__requires_shared(...) \
 	__lock_annotate(shared_locks_required(__VA_ARGS__))
 #define	__requires_unlocked(...) \
 	__lock_annotate(locks_excluded(__VA_ARGS__))
 
 /* Function should not be analyzed. */
 #define	__no_lock_analysis	__lock_annotate(no_thread_safety_analysis)
 
 /* Guard variables and structure members by lock. */
 #define	__guarded_by(x)		__lock_annotate(guarded_by(x))
 #define	__pt_guarded_by(x)	__lock_annotate(pt_guarded_by(x))
 
 #endif /* !_SYS_CDEFS_H_ */
Index: head/usr.bin/ldd/ldd.c
===================================================================
--- head/usr.bin/ldd/ldd.c	(revision 322167)
+++ head/usr.bin/ldd/ldd.c	(revision 322168)
@@ -1,413 +1,413 @@
 /*
  * Copyright (c) 1993 Paul Kranenburg
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Paul Kranenburg.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/wait.h>
 
 #include <machine/elf.h>
 
 #include <arpa/inet.h>
 
 #include <dlfcn.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "extern.h"
 
 /* We don't support a.out executables on arm64 and riscv */
-#if !defined(__aarch64__) && !defined(__riscv__)
+#if !defined(__aarch64__) && !defined(__riscv)
 #include <a.out.h>
 #define	AOUT_SUPPORTED
 #endif
 
 /*
  * 32-bit ELF data structures can only be used if the system header[s] declare
  * them.  There is no official macro for determining whether they are declared,
  * so check for the existence of one of the 32-macros defined in elf(5).
  */
 #ifdef ELF32_R_TYPE
 #define	ELF32_SUPPORTED
 #endif
 
 #define	LDD_SETENV(name, value, overwrite) do {		\
 	setenv("LD_" name, value, overwrite);		\
 	setenv("LD_32_" name, value, overwrite);	\
 } while (0)
 
 #define	LDD_UNSETENV(name) do {		\
 	unsetenv("LD_" name);		\
 	unsetenv("LD_32_" name);	\
 } while (0)
 
 static int	is_executable(const char *fname, int fd, int *is_shlib,
 		    int *type);
 static void	usage(void);
 
 #define	TYPE_UNKNOWN	0
 #define	TYPE_AOUT	1
 #define	TYPE_ELF	2	/* Architecture default */
 #if __ELF_WORD_SIZE > 32 && defined(ELF32_SUPPORTED)
 #define	TYPE_ELF32	3	/* Explicit 32 bits on architectures >32 bits */
 
 #define	_PATH_LDD32	"/usr/bin/ldd32"
 
 static int
 execldd32(char *file, char *fmt1, char *fmt2, int aflag, int vflag)
 {
 	char *argv[9];
 	int i, rval, status;
 
 	LDD_UNSETENV("TRACE_LOADED_OBJECTS");
 	rval = 0;
 	i = 0;
 	argv[i++] = strdup(_PATH_LDD32);
 	if (aflag)
 		argv[i++] = strdup("-a");
 	if (vflag)
 		argv[i++] = strdup("-v");
 	if (fmt1 != NULL) {
 		argv[i++] = strdup("-f");
 		argv[i++] = strdup(fmt1);
 	}
 	if (fmt2 != NULL) {
 		argv[i++] = strdup("-f");
 		argv[i++] = strdup(fmt2);
 	}
 	argv[i++] = strdup(file);
 	argv[i++] = NULL;
 
 	switch (fork()) {
 	case -1:
 		err(1, "fork");
 		break;
 	case 0:
 		execv(_PATH_LDD32, argv);
 		warn("%s", _PATH_LDD32);
 		_exit(127);
 		break;
 	default:
 		if (wait(&status) < 0)
 			rval = 1;
 		else if (WIFSIGNALED(status))
 			rval = 1;
 		else if (WIFEXITED(status) && WEXITSTATUS(status) != 0)
 			rval = 1;
 		break;
 	}
 	while (i--)
 		free(argv[i]);
 	LDD_SETENV("TRACE_LOADED_OBJECTS", "yes", 1);
 	return (rval);
 }
 #endif
 
 int
 main(int argc, char *argv[])
 {
 	char *fmt1, *fmt2;
 	int rval, c, aflag, vflag;
 
 	aflag = vflag = 0;
 	fmt1 = fmt2 = NULL;
 
 	while ((c = getopt(argc, argv, "af:v")) != -1) {
 		switch (c) {
 		case 'a':
 			aflag++;
 			break;
 		case 'f':
 			if (fmt1 != NULL) {
 				if (fmt2 != NULL)
 					errx(1, "too many formats");
 				fmt2 = optarg;
 			} else
 				fmt1 = optarg;
 			break;
 		case 'v':
 			vflag++;
 			break;
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (vflag && fmt1 != NULL)
 		errx(1, "-v may not be used with -f");
 
 	if (argc <= 0) {
 		usage();
 		/* NOTREACHED */
 	}
 
 #ifdef __i386__
 	if (vflag) {
 		for (c = 0; c < argc; c++)
 			dump_file(argv[c]);
 		exit(error_count == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
 	}
 #endif
 
 	rval = 0;
 	for (; argc > 0; argc--, argv++) {
 		int fd, status, is_shlib, rv, type;
 
 		if ((fd = open(*argv, O_RDONLY, 0)) < 0) {
 			warn("%s", *argv);
 			rval |= 1;
 			continue;
 		}
 		rv = is_executable(*argv, fd, &is_shlib, &type);
 		close(fd);
 		if (rv == 0) {
 			rval |= 1;
 			continue;
 		}
 
 		switch (type) {
 		case TYPE_ELF:
 		case TYPE_AOUT:
 			break;
 #if __ELF_WORD_SIZE > 32 && defined(ELF32_SUPPORTED)
 		case TYPE_ELF32:
 			rval |= execldd32(*argv, fmt1, fmt2, aflag, vflag);
 			continue;
 #endif
 		case TYPE_UNKNOWN:
 		default:
 			/*
 			 * This shouldn't happen unless is_executable()
 			 * is broken.
 			 */
 			errx(EDOOFUS, "unknown executable type");
 		}
 
 		/* ld.so magic */
 		LDD_SETENV("TRACE_LOADED_OBJECTS", "yes", 1);
 		if (fmt1 != NULL)
 			LDD_SETENV("TRACE_LOADED_OBJECTS_FMT1", fmt1, 1);
 		if (fmt2 != NULL)
 			LDD_SETENV("TRACE_LOADED_OBJECTS_FMT2", fmt2, 1);
 
 		LDD_SETENV("TRACE_LOADED_OBJECTS_PROGNAME", *argv, 1);
 		if (aflag)
 			LDD_SETENV("TRACE_LOADED_OBJECTS_ALL", "1", 1);
 		else if (fmt1 == NULL && fmt2 == NULL)
 			/* Default formats */
 			printf("%s:\n", *argv);
 		fflush(stdout);
 
 		switch (fork()) {
 		case -1:
 			err(1, "fork");
 			break;
 		default:
 			if (wait(&status) < 0) {
 				warn("wait");
 				rval |= 1;
 			} else if (WIFSIGNALED(status)) {
 				fprintf(stderr, "%s: signal %d\n", *argv,
 				    WTERMSIG(status));
 				rval |= 1;
 			} else if (WIFEXITED(status) &&
 			    WEXITSTATUS(status) != 0) {
 				fprintf(stderr, "%s: exit status %d\n", *argv,
 				    WEXITSTATUS(status));
 				rval |= 1;
 			}
 			break;
 		case 0:
 			if (is_shlib == 0) {
 				execl(*argv, *argv, (char *)NULL);
 				warn("%s", *argv);
 			} else {
 				dlopen(*argv, RTLD_TRACE);
 				warnx("%s: %s", *argv, dlerror());
 			}
 			_exit(1);
 		}
 	}
 
 	return rval;
 }
 
 static void
 usage(void)
 {
 
 	fprintf(stderr, "usage: ldd [-a] [-v] [-f format] program ...\n");
 	exit(1);
 }
 
 static int
 is_executable(const char *fname, int fd, int *is_shlib, int *type)
 {
 	union {
 #ifdef AOUT_SUPPORTED
 		struct exec aout;
 #endif
 #if __ELF_WORD_SIZE > 32 && defined(ELF32_SUPPORTED)
 		Elf32_Ehdr elf32;
 #endif
 		Elf_Ehdr elf;
 	} hdr;
 	int n;
 
 	*is_shlib = 0;
 	*type = TYPE_UNKNOWN;
 
 	if ((n = read(fd, &hdr, sizeof(hdr))) == -1) {
 		warn("%s: can't read program header", fname);
 		return (0);
 	}
 
 #ifdef AOUT_SUPPORTED
 	if ((size_t)n >= sizeof(hdr.aout) && !N_BADMAG(hdr.aout)) {
 		/* a.out file */
 		if ((N_GETFLAG(hdr.aout) & EX_DPMASK) != EX_DYNAMIC
 #if 1 /* Compatibility */
 		    || hdr.aout.a_entry < __LDPGSZ
 #endif
 			) {
 			warnx("%s: not a dynamic executable", fname);
 			return (0);
 		}
 		*type = TYPE_AOUT;
 		return (1);
 	}
 #endif
 
 #if __ELF_WORD_SIZE > 32 && defined(ELF32_SUPPORTED)
 	if ((size_t)n >= sizeof(hdr.elf32) && IS_ELF(hdr.elf32) &&
 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
 		/* Handle 32 bit ELF objects */
 		Elf32_Phdr phdr;
 		int dynamic, i;
 
 		dynamic = 0;
 		*type = TYPE_ELF32;
 
 		if (lseek(fd, hdr.elf32.e_phoff, SEEK_SET) == -1) {
 			warnx("%s: header too short", fname);
 			return (0);
 		}
 		for (i = 0; i < hdr.elf32.e_phnum; i++) {
 			if (read(fd, &phdr, hdr.elf32.e_phentsize) !=
 			    sizeof(phdr)) {
 				warnx("%s: can't read program header", fname);
 				return (0);
 			}
 			if (phdr.p_type == PT_DYNAMIC) {
 				dynamic = 1;
 				break;
 			}
 		}
 
 		if (!dynamic) {
 			warnx("%s: not a dynamic ELF executable", fname);
 			return (0);
 		}
 		if (hdr.elf32.e_type == ET_DYN) {
 			if (hdr.elf32.e_ident[EI_OSABI] == ELFOSABI_FREEBSD) {
 				*is_shlib = 1;
 				return (1);
 			}
 			warnx("%s: not a FreeBSD ELF shared object", fname);
 			return (0);
 		}
 
 		return (1);
 	}
 #endif
 
 	if ((size_t)n >= sizeof(hdr.elf) && IS_ELF(hdr.elf) &&
 	    hdr.elf.e_ident[EI_CLASS] == ELF_TARG_CLASS) {
 		/* Handle default ELF objects on this architecture */
 		Elf_Phdr phdr;
 		int dynamic, i;
 
 		dynamic = 0;
 		*type = TYPE_ELF;
 
 		if (lseek(fd, hdr.elf.e_phoff, SEEK_SET) == -1) {
 			warnx("%s: header too short", fname);
 			return (0);
 		}
 		for (i = 0; i < hdr.elf.e_phnum; i++) {
 			if (read(fd, &phdr, hdr.elf.e_phentsize)
 			   != sizeof(phdr)) {
 				warnx("%s: can't read program header", fname);
 				return (0);
 			}
 			if (phdr.p_type == PT_DYNAMIC) {
 				dynamic = 1;
 				break;
 			}
 		}
 
 		if (!dynamic) {
 			warnx("%s: not a dynamic ELF executable", fname);
 			return (0);
 		}
 		if (hdr.elf.e_type == ET_DYN) {
 			switch (hdr.elf.e_ident[EI_OSABI]) {
 			case ELFOSABI_FREEBSD:
 				*is_shlib = 1;
 				return (1);
 #ifdef __ARM_EABI__
 			case ELFOSABI_NONE:
 				if (hdr.elf.e_machine != EM_ARM)
 					break;
 				if (EF_ARM_EABI_VERSION(hdr.elf.e_flags) <
 				    EF_ARM_EABI_FREEBSD_MIN)
 					break;
 				*is_shlib = 1;
 				return (1);
 #endif
 			}
 			warnx("%s: not a FreeBSD ELF shared object", fname);
 			return (0);
 		}
 
 		return (1);
 	}
 
 	warnx("%s: not a dynamic executable", fname);
 	return (0);
 }
Index: head/usr.bin/xlint/lint1/param.h
===================================================================
--- head/usr.bin/xlint/lint1/param.h	(revision 322167)
+++ head/usr.bin/xlint/lint1/param.h	(revision 322168)
@@ -1,143 +1,143 @@
 /*	$NetBSD: param.h,v 1.4 1995/07/23 18:14:41 ragge Exp $	*/
 
 /*
  * Copyright (c) 1994, 1995 Jochen Pohl
  * All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Jochen Pohl for
  *	The NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Minimun size of string buffer. If this is not enough, the buffer
  * is enlarged in steps of STRBLEN bytes.
  */
 #define	STRBLEN		256
 
 /*
  * This defines the size of memory blocks which are used to allocate
  * memory in larger chunks.
  */
 #define	MBLKSIZ		((size_t)0x4000)
 
 /*
  * Sizes of hash tables
  * Should be a prime. Possible primes are
  * 307, 401, 503, 601, 701, 809, 907, 1009, 1103, 1201, 1301, 1409, 1511.
  *
  * HSHSIZ1	symbol table 1st pass
  * HSHSIZ2	symbol table 2nd pass
  * THSHSIZ2	type table 2nd pass
  */
 #define	HSHSIZ1		503
 #define HSHSIZ2		1009
 #define	THSHSIZ2	1009
 
 /*
  * Should be set to 1 if the difference of two pointers is of type long
  * or the value of sizeof is of type unsigned long.
  */
 #if __amd64__
 #define PTRDIFF_IS_LONG		1
 #define SIZEOF_IS_ULONG		1
 #elif __alpha__
 #define PTRDIFF_IS_LONG		1
 #define SIZEOF_IS_ULONG		1
 #elif __i386__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __m68k__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __ns32k__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __powerpc__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
-#elif __riscv__
+#elif __riscv
 #define PTRDIFF_IS_LONG		1
 #define SIZEOF_IS_ULONG		1
 #elif __sparc__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __sparc64__
 #define PTRDIFF_IS_LONG		1
 #define SIZEOF_IS_ULONG		1
 #elif __vax__
 #define PTRDIFF_IS_LONG         0
 #define SIZEOF_IS_ULONG         0
 #elif __arm__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __mips__
 #define PTRDIFF_IS_LONG		0
 #define SIZEOF_IS_ULONG		0
 #elif __aarch64__
 #define PTRDIFF_IS_LONG		1
 #define SIZEOF_IS_ULONG		1
 #else
 #error unknown machine type
 #endif
 
 /*
  * Make sure this matches wchar_t.
  */
 #define WCHAR	SHORT
 
 #ifndef __GNUC__
 #ifndef lint
 #ifndef QUAD_MAX	/* necessary for mkdep */
 #define QUAD_MAX	LONG_MAX
 #define QUAD_MIN	LONG_MIN
 #define UQUAD_MAX	ULONG_MAX
 #endif
 typedef	long	quad_t;
 typedef	u_long	u_quad_t;
 #endif
 #endif
 
 
 /*
  * long double only in ANSI C.
  */
 #ifdef __STDC__
 typedef	long double ldbl_t;
 #else
 typedef	double	ldbl_t;
 #endif
 
 /*
  * Some traditional compilers are not able to assign structures.
  */
 #ifdef __STDC__
 #define STRUCT_ASSIGN(dest, src)	(dest) = (src)
 #else
 #define STRUCT_ASSIGN(dest, src)	(void)memcpy(&(dest), &(src), \
 						     sizeof (dest));
 #endif