Index: head/sys/ddb/db_variables.c
===================================================================
--- head/sys/ddb/db_variables.c	(revision 298353)
+++ head/sys/ddb/db_variables.c	(revision 298354)
@@ -1,159 +1,158 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 /*
  * 	Author: David B. Golub, Carnegie Mellon University
  *	Date:	7/90
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 #include <ddb/db_variables.h>
 
 static int	db_find_variable(struct db_variable **varp);
 
 static struct db_variable db_vars[] = {
 	{ "radix",	&db_radix, FCN_NULL },
 	{ "maxoff",	&db_maxoff, FCN_NULL },
 	{ "maxwidth",	&db_max_width, FCN_NULL },
 	{ "tabstops",	&db_tab_stop_width, FCN_NULL },
 	{ "lines",	&db_lines_per_page, FCN_NULL },
 	{ "curcpu",	NULL, db_var_curcpu },
 	{ "db_cpu",	NULL, db_var_db_cpu },
 #ifdef VIMAGE
 	{ "curvnet",	NULL, db_var_curvnet },
 	{ "db_vnet",	NULL, db_var_db_vnet },
 #endif
 };
-static struct db_variable *db_evars =
-	db_vars + nitems(db_vars);
+static struct db_variable *db_evars = db_vars + nitems(db_vars);
 
 static int
 db_find_variable(struct db_variable **varp)
 {
 	struct db_variable *vp;
 	int t;
 
 	t = db_read_token();
 	if (t == tIDENT) {
 		for (vp = db_vars; vp < db_evars; vp++) {
 			if (!strcmp(db_tok_string, vp->name)) {
 				*varp = vp;
 				return (1);
 			}
 		}
 		for (vp = db_regs; vp < db_eregs; vp++) {
 			if (!strcmp(db_tok_string, vp->name)) {
 				*varp = vp;
 				return (1);
 			}
 		}
 	}
 	db_error("Unknown variable\n");
 	return (0);
 }
 
 int
 db_get_variable(db_expr_t *valuep)
 {
 	struct db_variable *vp;
 
 	if (!db_find_variable(&vp))
 		return (0);
 
 	return (db_read_variable(vp, valuep));
 }
 
 int
 db_set_variable(db_expr_t value)
 {
 	struct db_variable *vp;
 
 	if (!db_find_variable(&vp))
 		return (0);
 
 	return (db_write_variable(vp, value));
 }
 
 int
 db_read_variable(struct db_variable *vp, db_expr_t *valuep)
 {
 	db_varfcn_t *func = vp->fcn;
 
 	if (func == FCN_NULL) {
 		*valuep = *(vp->valuep);
 		return (1);
 	}
 	return ((*func)(vp, valuep, DB_VAR_GET));
 }
 
 int
 db_write_variable(struct db_variable *vp, db_expr_t value)
 {
 	db_varfcn_t *func = vp->fcn;
 
 	if (func == FCN_NULL) {
 		*(vp->valuep) = value;
 		return (1);
 	}
 	return ((*func)(vp, &value, DB_VAR_SET));
 }
 
 void
 db_set_cmd(db_expr_t dummy1, bool dummy2, db_expr_t dummy3, char *dummy4)
 {
 	struct db_variable *vp;
 	db_expr_t value;
 	int t;
 
 	t = db_read_token();
 	if (t != tDOLLAR) {
 		db_error("Unknown variable\n");
 		return;
 	}
 	if (!db_find_variable(&vp)) {
 		db_error("Unknown variable\n");
 		return;
 	}
 
 	t = db_read_token();
 	if (t != tEQ)
 		db_unread_token(t);
 
 	if (!db_expression(&value)) {
 		db_error("No value\n");
 		return;
 	}
 	if (db_read_token() != tEOL)
 		db_error("?\n");
 
 	db_write_variable(vp, value);
 }
Index: head/sys/geom/part/g_part_bsd.c
===================================================================
--- head/sys/geom/part/g_part_bsd.c	(revision 298353)
+++ head/sys/geom/part/g_part_bsd.c	(revision 298354)
@@ -1,540 +1,539 @@
 /*-
  * Copyright (c) 2007 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/disklabel.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 #define	BOOT1_SIZE	512
 #define	LABEL_SIZE	512
 #define	BOOT2_OFF	(BOOT1_SIZE + LABEL_SIZE)
 #define	BOOT2_SIZE	(BBSIZE - BOOT2_OFF)
 
 FEATURE(geom_part_bsd, "GEOM partitioning class for BSD disklabels");
 
 struct g_part_bsd_table {
 	struct g_part_table	base;
 	u_char			*bbarea;
 	uint32_t		offset;
 };
 
 struct g_part_bsd_entry {
 	struct g_part_entry	base;
 	struct partition	part;
 };
 
 static int g_part_bsd_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_bsd_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_bsd_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_bsd_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_bsd_modify(struct g_part_table *, struct g_part_entry *,  
     struct g_part_parms *);
 static const char *g_part_bsd_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_bsd_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd_write(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_bsd_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_bsd_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_bsd_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_bsd_create),
 	KOBJMETHOD(g_part_destroy,	g_part_bsd_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_bsd_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_bsd_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_bsd_modify),
 	KOBJMETHOD(g_part_resize,	g_part_bsd_resize),
 	KOBJMETHOD(g_part_name,		g_part_bsd_name),
 	KOBJMETHOD(g_part_probe,	g_part_bsd_probe),
 	KOBJMETHOD(g_part_read,		g_part_bsd_read),
 	KOBJMETHOD(g_part_type,		g_part_bsd_type),
 	KOBJMETHOD(g_part_write,	g_part_bsd_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_bsd_scheme = {
 	"BSD",
 	g_part_bsd_methods,
 	sizeof(struct g_part_bsd_table),
 	.gps_entrysz = sizeof(struct g_part_bsd_entry),
 	.gps_minent = 8,
 	.gps_maxent = 20,	/* Only 22 entries fit in 512 byte sectors */
 	.gps_bootcodesz = BBSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_bsd);
 
 static struct g_part_bsd_alias {
 	uint8_t		type;
 	int		alias;
 } bsd_alias_match[] = {
 	{ FS_BSDFFS,	G_PART_ALIAS_FREEBSD_UFS },
 	{ FS_SWAP,	G_PART_ALIAS_FREEBSD_SWAP },
 	{ FS_ZFS,	G_PART_ALIAS_FREEBSD_ZFS },
 	{ FS_VINUM,	G_PART_ALIAS_FREEBSD_VINUM },
 	{ FS_NANDFS,	G_PART_ALIAS_FREEBSD_NANDFS },
 	{ FS_HAMMER,	G_PART_ALIAS_DFBSD_HAMMER },
 	{ FS_HAMMER2,	G_PART_ALIAS_DFBSD_HAMMER2 },
 };
 
 static int
 bsd_parse_type(const char *type, uint8_t *fstype)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*fstype = (u_int)lt;
 		return (0);
 	}
-	for (i = 0;
-	    i < nitems(bsd_alias_match); i++) {
+	for (i = 0; i < nitems(bsd_alias_match); i++) {
 		alias = g_part_alias_name(bsd_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*fstype = bsd_alias_match[i].type;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 g_part_bsd_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	table = (struct g_part_bsd_table *)basetable;
 
 	entry->part.p_size = gpp->gpp_size;
 	entry->part.p_offset = gpp->gpp_start + table->offset;
 	entry->part.p_fsize = 0;
 	entry->part.p_frag = 0;
 	entry->part.p_cpg = 0;
 	return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype));
 }
 
 static int
 g_part_bsd_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_table *table;
 	const u_char *codeptr;
 
 	if (gpp->gpp_codesize != BOOT1_SIZE && gpp->gpp_codesize != BBSIZE)
 		return (ENODEV);
 
 	table = (struct g_part_bsd_table *)basetable;
 	codeptr = gpp->gpp_codeptr;
 	bcopy(codeptr, table->bbarea, BOOT1_SIZE);
 	if (gpp->gpp_codesize == BBSIZE)
 		bcopy(codeptr + BOOT2_OFF, table->bbarea + BOOT2_OFF,
 		    BOOT2_SIZE);
 	return (0);
 }
 
 static int
 g_part_bsd_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 	u_char *ptr;
 	uint32_t msize, ncyls, secpercyl;
 
 	pp = gpp->gpp_provider;
 
 	if (pp->sectorsize < sizeof(struct disklabel))
 		return (ENOSPC);
 	if (BBSIZE % pp->sectorsize)
 		return (ENOTBLK);
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	secpercyl = basetable->gpt_sectors * basetable->gpt_heads;
 	ncyls = msize / secpercyl;
 
 	table = (struct g_part_bsd_table *)basetable;
 	table->bbarea = g_malloc(BBSIZE, M_WAITOK | M_ZERO);
 	ptr = table->bbarea + pp->sectorsize;
 
 	le32enc(ptr + 0, DISKMAGIC);			/* d_magic */
 	le32enc(ptr + 40, pp->sectorsize);		/* d_secsize */
 	le32enc(ptr + 44, basetable->gpt_sectors);	/* d_nsectors */
 	le32enc(ptr + 48, basetable->gpt_heads);	/* d_ntracks */
 	le32enc(ptr + 52, ncyls);			/* d_ncylinders */
 	le32enc(ptr + 56, secpercyl);			/* d_secpercyl */
 	le32enc(ptr + 60, msize);			/* d_secperunit */
 	le16enc(ptr + 72, 3600);			/* d_rpm */
 	le32enc(ptr + 132, DISKMAGIC);			/* d_magic2 */
 	le16enc(ptr + 138, basetable->gpt_entries);	/* d_npartitions */
 	le32enc(ptr + 140, BBSIZE);			/* d_bbsize */
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_isleaf = 1;
 
 	baseentry = g_part_new_entry(basetable, RAW_PART + 1,
 	    basetable->gpt_first, basetable->gpt_last);
 	baseentry->gpe_internal = 1;
 	entry = (struct g_part_bsd_entry *)baseentry;
 	entry->part.p_size = basetable->gpt_last + 1;
 	entry->part.p_offset = table->offset;
 
 	return (0);
 }
 
 static int
 g_part_bsd_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_table *table;
 
 	table = (struct g_part_bsd_table *)basetable;
 	if (table->bbarea != NULL)
 		g_free(table->bbarea);
 	table->bbarea = NULL;
 
 	/* Wipe the second sector to clear the partitioning. */
 	basetable->gpt_smhead |= 2;
 	return (0);
 }
 
 static void
 g_part_bsd_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, 
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_bsd_entry *entry;
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs BSD xt %u", entry->part.p_fstype);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->part.p_fstype);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_bsd_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
 {
 	struct g_part_bsd_entry *entry;
 
 	/* Allow dumping to a swap partition or an unused partition. */
 	entry = (struct g_part_bsd_entry *)baseentry;
 	return ((entry->part.p_fstype == FS_UNUSED ||
 	    entry->part.p_fstype == FS_SWAP) ? 1 : 0);
 }
 
 static int
 g_part_bsd_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype));
 	return (0);
 }
 
 static void
 bsd_set_rawsize(struct g_part_table *basetable, struct g_provider *pp)
 {
 	struct g_part_bsd_table *table;
 	struct g_part_bsd_entry *entry;
 	struct g_part_entry *baseentry;
 	uint32_t msize;
 
 	table = (struct g_part_bsd_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	le32enc(table->bbarea + pp->sectorsize + 60, msize); /* d_secperunit */
 	basetable->gpt_last = msize - 1;
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_index != RAW_PART + 1)
 			continue;
 		baseentry->gpe_end = basetable->gpt_last;
 		entry = (struct g_part_bsd_entry *)baseentry;
 		entry->part.p_size = msize;
 		return;
 	}
 }
 
 static int
 g_part_bsd_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 	struct g_provider *pp;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		bsd_set_rawsize(basetable, pp);
 		return (0);
 	}
 	entry = (struct g_part_bsd_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
 	entry->part.p_size = gpp->gpp_size;
 
 	return (0);
 }
 
 static const char *
 g_part_bsd_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "%c", 'a' + baseentry->gpe_index - 1);
 	return (buf);
 }
 
 static int
 g_part_bsd_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	uint32_t magic1, magic2;
 	int error;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < sizeof(struct disklabel) ||
 	    pp->mediasize < BBSIZE)
 		return (ENOSPC);
 	if (BBSIZE % pp->sectorsize)
 		return (ENOTBLK);
 
 	/* Check that there's a disklabel. */
 	buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	magic1 = le32dec(buf + 0);
 	magic2 = le32dec(buf + 132);
 	g_free(buf);
 	return ((magic1 == DISKMAGIC && magic2 == DISKMAGIC)
 	    ? G_PART_PROBE_PRI_HIGH : ENXIO);
 }
 
 static int
 g_part_bsd_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_bsd_table *table;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct partition part;
 	u_char *buf, *p;
 	off_t chs, msize;
 	u_int sectors, heads;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	table->bbarea = g_read_data(cp, 0, BBSIZE, &error);
 	if (table->bbarea == NULL)
 		return (error);
 
 	buf = table->bbarea + pp->sectorsize;
 
 	if (le32dec(buf + 40) != pp->sectorsize)
 		goto invalid_label;
 	sectors = le32dec(buf + 44);
 	if (sectors < 1 || sectors > 255)
 		goto invalid_label;
 	if (sectors != basetable->gpt_sectors && !basetable->gpt_fixgeom) {
 		g_part_geometry_heads(msize, sectors, &chs, &heads);
 		if (chs != 0) {
 			basetable->gpt_sectors = sectors;
 			basetable->gpt_heads = heads;
 		}
 	}
 	heads = le32dec(buf + 48);
 	if (heads < 1 || heads > 255)
 		goto invalid_label;
 	if (heads != basetable->gpt_heads && !basetable->gpt_fixgeom)
 		basetable->gpt_heads = heads;
 
 	chs = le32dec(buf + 60);
 	if (chs < 1)
 		goto invalid_label;
 	/* Fix-up a sysinstall bug. */
 	if (chs > msize) {
 		chs = msize;
 		le32enc(buf + 60, msize);
 	}
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_isleaf = 1;
 
 	basetable->gpt_entries = le16dec(buf + 138);
 	if (basetable->gpt_entries < g_part_bsd_scheme.gps_minent ||
 	    basetable->gpt_entries > g_part_bsd_scheme.gps_maxent)
 		goto invalid_label;
 
 	table->offset = le32dec(buf + 148 + RAW_PART * 16 + 4);
 	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
 		p = buf + 148 + index * 16;
 		part.p_size = le32dec(p + 0);
 		part.p_offset = le32dec(p + 4);
 		part.p_fsize = le32dec(p + 8);
 		part.p_fstype = p[12];
 		part.p_frag = p[13];
 		part.p_cpg = le16dec(p + 14);
 		if (part.p_size == 0)
 			continue;
 		if (part.p_offset < table->offset)
 			continue;
 		if (part.p_offset - table->offset > basetable->gpt_last)
 			goto invalid_label;
 		baseentry = g_part_new_entry(basetable, index + 1,
 		    part.p_offset - table->offset,
 		    part.p_offset - table->offset + part.p_size - 1);
 		entry = (struct g_part_bsd_entry *)baseentry;
 		entry->part = part;
 		if (index == RAW_PART)
 			baseentry->gpe_internal = 1;
 	}
 
 	return (0);
 
  invalid_label:
 	printf("GEOM: %s: invalid disklabel.\n", pp->name);
 	g_free(table->bbarea);
 	table->bbarea = NULL;
 	return (EINVAL);
 }
 
 static const char *
 g_part_bsd_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
     char *buf, size_t bufsz)
 {
 	struct g_part_bsd_entry *entry;
 	int type;
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	type = entry->part.p_fstype;
 	if (type == FS_NANDFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS));
 	if (type == FS_SWAP)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
 	if (type == FS_BSDFFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
 	if (type == FS_VINUM)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
 	if (type == FS_ZFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
 	snprintf(buf, bufsz, "!%d", type);
 	return (buf);
 }
 
 static int
 g_part_bsd_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 	uint16_t sum;
 	u_char *label, *p, *pe;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd_table *)basetable;
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	label = table->bbarea + pp->sectorsize;
 	for (index = 1; index <= basetable->gpt_entries; index++) {
 		p = label + 148 + (index - 1) * 16;
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_bsd_entry *)baseentry : NULL;
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			le32enc(p + 0, entry->part.p_size);
 			le32enc(p + 4, entry->part.p_offset);
 			le32enc(p + 8, entry->part.p_fsize);
 			p[12] = entry->part.p_fstype;
 			p[13] = entry->part.p_frag;
 			le16enc(p + 14, entry->part.p_cpg);
 		} else
 			bzero(p, 16);
 
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	/* Calculate checksum. */
 	le16enc(label + 136, 0);
 	pe = label + 148 + basetable->gpt_entries * 16;
 	sum = 0;
 	for (p = label; p < pe; p += 2)
 		sum ^= le16dec(p);
 	le16enc(label + 136, sum);
 
 	error = g_write_data(cp, 0, table->bbarea, BBSIZE);
 	return (error);
 }
Index: head/sys/geom/part/g_part_ebr.c
===================================================================
--- head/sys/geom/part/g_part_ebr.c	(revision 298353)
+++ head/sys/geom/part/g_part_ebr.c	(revision 298354)
@@ -1,696 +1,694 @@
 /*-
  * Copyright (c) 2007-2009 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "opt_geom.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_ebr,
     "GEOM partitioning class for extended boot records support");
 #if defined(GEOM_PART_EBR_COMPAT)
 FEATURE(geom_part_ebr_compat,
     "GEOM EBR partitioning class: backward-compatible partition names");
 #endif
 
 #define	EBRSIZE		512
 
 struct g_part_ebr_table {
 	struct g_part_table	base;
 #ifndef GEOM_PART_EBR_COMPAT
 	u_char		ebr[EBRSIZE];
 #endif
 };
 
 struct g_part_ebr_entry {
 	struct g_part_entry	base;
 	struct dos_partition	ent;
 };
 
 static int g_part_ebr_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_ebr_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_ebr_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_ebr_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_ebr_dumpto(struct g_part_table *, struct g_part_entry *);
 #if defined(GEOM_PART_EBR_COMPAT)
 static void g_part_ebr_fullname(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 #endif
 static int g_part_ebr_modify(struct g_part_table *, struct g_part_entry *,  
     struct g_part_parms *);
 static const char *g_part_ebr_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ebr_precheck(struct g_part_table *, enum g_part_ctl,
     struct g_part_parms *);
 static int g_part_ebr_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_read(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_setunset(struct g_part_table *, struct g_part_entry *,
     const char *, unsigned int);
 static const char *g_part_ebr_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ebr_write(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_ebr_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_ebr_add),
 	KOBJMETHOD(g_part_create,	g_part_ebr_create),
 	KOBJMETHOD(g_part_destroy,	g_part_ebr_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_ebr_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_ebr_dumpto),
 #if defined(GEOM_PART_EBR_COMPAT)
 	KOBJMETHOD(g_part_fullname,	g_part_ebr_fullname),
 #endif
 	KOBJMETHOD(g_part_modify,	g_part_ebr_modify),
 	KOBJMETHOD(g_part_name,		g_part_ebr_name),
 	KOBJMETHOD(g_part_precheck,	g_part_ebr_precheck),
 	KOBJMETHOD(g_part_probe,	g_part_ebr_probe),
 	KOBJMETHOD(g_part_read,		g_part_ebr_read),
 	KOBJMETHOD(g_part_resize,	g_part_ebr_resize),
 	KOBJMETHOD(g_part_setunset,	g_part_ebr_setunset),
 	KOBJMETHOD(g_part_type,		g_part_ebr_type),
 	KOBJMETHOD(g_part_write,	g_part_ebr_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_ebr_scheme = {
 	"EBR",
 	g_part_ebr_methods,
 	sizeof(struct g_part_ebr_table),
 	.gps_entrysz = sizeof(struct g_part_ebr_entry),
 	.gps_minent = 1,
 	.gps_maxent = INT_MAX,
 };
 G_PART_SCHEME_DECLARE(g_part_ebr);
 
 static struct g_part_ebr_alias {
 	u_char		typ;
 	int		alias;
 } ebr_alias_match[] = {
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 };
 
 static void ebr_set_chs(struct g_part_table *, uint32_t, u_char *, u_char *,
     u_char *);
 
 static void
 ebr_entry_decode(const char *p, struct dos_partition *ent)
 {
 	ent->dp_flag = p[0];
 	ent->dp_shd = p[1];
 	ent->dp_ssect = p[2];
 	ent->dp_scyl = p[3];
 	ent->dp_typ = p[4];
 	ent->dp_ehd = p[5];
 	ent->dp_esect = p[6];
 	ent->dp_ecyl = p[7];
 	ent->dp_start = le32dec(p + 8);
 	ent->dp_size = le32dec(p + 12);
 }
 
 static void
 ebr_entry_link(struct g_part_table *table, uint32_t start, uint32_t end,
    u_char *buf)
 {
 
 	buf[0] = 0 /* dp_flag */;
 	ebr_set_chs(table, start, &buf[3] /* dp_scyl */, &buf[1] /* dp_shd */,
 	    &buf[2] /* dp_ssect */);
 	buf[4] = 5 /* dp_typ */;
 	ebr_set_chs(table, end, &buf[7] /* dp_ecyl */, &buf[5] /* dp_ehd */,
 	    &buf[6] /* dp_esect */);
 	le32enc(buf + 8, start);
 	le32enc(buf + 12, end - start + 1);
 }
 
 static int
 ebr_parse_type(const char *type, u_char *dp_typ)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*dp_typ = (u_char)lt;
 		return (0);
 	}
-	for (i = 0;
-	    i < nitems(ebr_alias_match); i++) {
+	for (i = 0; i < nitems(ebr_alias_match); i++) {
 		alias = g_part_alias_name(ebr_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*dp_typ = ebr_alias_match[i].typ;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 
 static void
 ebr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
     u_char *secp)
 {
 	uint32_t cyl, hd, sec;
 
 	sec = lba % table->gpt_sectors + 1;
 	lba /= table->gpt_sectors;
 	hd = lba % table->gpt_heads;
 	lba /= table->gpt_heads;
 	cyl = lba;
 	if (cyl > 1023)
 		sec = hd = cyl = ~0;
 
 	*cylp = cyl & 0xff;
 	*hdp = hd & 0xff;
 	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
 }
 
 static int
 ebr_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
 {
 	uint32_t sectors;
 
 	sectors = basetable->gpt_sectors;
 	if (*size < 2 * sectors)
 		return (EINVAL);
 	if (*start % sectors) {
 		*size += (*start % sectors) - sectors;
 		*start -= (*start % sectors) - sectors;
 	}
 	if (*size % sectors)
 		*size -= (*size % sectors);
 	if (*size < 2 * sectors)
 		return (EINVAL);
 	return (0);
 }
 
 
 static int
 g_part_ebr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_ebr_entry *entry;
 	uint32_t start, size;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	entry = (struct g_part_ebr_entry *)baseentry;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (ebr_align(basetable, &start, &size) != 0)
 		return (EINVAL);
 	if (baseentry->gpe_deleted)
 		bzero(&entry->ent, sizeof(entry->ent));
 
 	KASSERT(baseentry->gpe_start <= start, ("%s", __func__));
 	KASSERT(baseentry->gpe_end >= start + size - 1, ("%s", __func__));
 	baseentry->gpe_index = (start / basetable->gpt_sectors) + 1;
 	baseentry->gpe_offset =
 	    (off_t)(start + basetable->gpt_sectors) * pp->sectorsize;
 	baseentry->gpe_start = start;
 	baseentry->gpe_end = start + size - 1;
 	entry->ent.dp_start = basetable->gpt_sectors;
 	entry->ent.dp_size = size - basetable->gpt_sectors;
 	ebr_set_chs(basetable, entry->ent.dp_start, &entry->ent.dp_scyl,
 	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
 	ebr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (ebr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 }
 
 static int
 g_part_ebr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	char type[64];
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	uint32_t msize;
 	int error;
 
 	pp = gpp->gpp_provider;
 
 	if (pp->sectorsize < EBRSIZE)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* Check that we have a parent and that it's a MBR. */
 	if (basetable->gpt_depth == 0)
 		return (ENXIO);
 	cp = LIST_FIRST(&pp->consumers);
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "MBR") != 0)
 		return (ENXIO);
 	error = g_getattr("PART::type", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "ebr") != 0)
 		return (ENXIO);
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_entries = msize / basetable->gpt_sectors;
 	return (0);
 }
 
 static int
 g_part_ebr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first sector to clear the partitioning. */
 	basetable->gpt_smhead |= 1;
 	return (0);
 }
 
 static void
 g_part_ebr_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, 
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_ebr_entry *entry;
  
 	entry = (struct g_part_ebr_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs MBREXT xt %u", entry->ent.dp_typ);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->ent.dp_typ);
 		if (entry->ent.dp_flag & 0x80)
 			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_ebr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
 {
 	struct g_part_ebr_entry *entry;
 
 	/* Allow dumping to a FreeBSD partition or Linux swap partition only. */
 	entry = (struct g_part_ebr_entry *)baseentry;
 	return ((entry->ent.dp_typ == DOSPTYP_386BSD ||
 	    entry->ent.dp_typ == DOSPTYP_LINSWP) ? 1 : 0);
 }
 
 #if defined(GEOM_PART_EBR_COMPAT)
 static void
 g_part_ebr_fullname(struct g_part_table *table, struct g_part_entry *entry,
     struct sbuf *sb, const char *pfx)
 {
 	struct g_part_entry *iter;
 	u_int idx;
 
 	idx = 5;
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter == entry)
 			break;
 		idx++;
 	}
 	sbuf_printf(sb, "%.*s%u", (int)strlen(pfx) - 1, pfx, idx);
 }
 #endif
 
 static int
 g_part_ebr_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_ebr_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_ebr_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (ebr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 	return (0);
 }
 
 static int
 g_part_ebr_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 
 	if (baseentry != NULL)
 		return (EOPNOTSUPP);
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 	    UINT32_MAX) - 1;
 	return (0);
 }
 
 static const char *
 g_part_ebr_name(struct g_part_table *table, struct g_part_entry *entry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "+%08u", entry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_ebr_precheck(struct g_part_table *table, enum g_part_ctl req,
     struct g_part_parms *gpp)
 {
 #if defined(GEOM_PART_EBR_COMPAT)
 	if (req == G_PART_CTL_DESTROY)
 		return (0);
 	return (ECANCELED);
 #else
 	/*
 	 * The index is a function of the start of the partition.
 	 * This is not something the user can override, nor is it
 	 * something the common code will do right. We can set the
 	 * index now so that we get what we need.
 	 */
 	if (req == G_PART_CTL_ADD)
 		gpp->gpp_index = (gpp->gpp_start / table->gpt_sectors) + 1;
 	return (0);
 #endif
 }
 
 static int
 g_part_ebr_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	char type[64];
 	struct g_provider *pp;
 	u_char *buf, *p;
 	int error, index, res;
 	uint16_t magic;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < EBRSIZE || pp->mediasize < pp->sectorsize)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* Check that we have a parent and that it's a MBR. */
 	if (table->gpt_depth == 0)
 		return (ENXIO);
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "MBR") != 0)
 		return (ENXIO);
 	/* Check that partition has type DOSPTYP_EBR. */
 	error = g_getattr("PART::type", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "ebr") != 0)
 		return (ENXIO);
 
 	/* Check that there's a EBR. */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	/* We goto out on mismatch. */
 	res = ENXIO;
 
 	magic = le16dec(buf + DOSMAGICOFFSET);
 	if (magic != DOSMAGIC)
 		goto out;
 
 	for (index = 0; index < 2; index++) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		if (p[0] != 0 && p[0] != 0x80)
 			goto out;
 	}
 	res = G_PART_PROBE_PRI_NORM;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_ebr_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct dos_partition ent[2];
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_ebr_table *table;
 	struct g_part_ebr_entry *entry;
 	u_char *buf;
 	off_t ofs, msize;
 	u_int lba;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_ebr_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	lba = 0;
 	while (1) {
 		ofs = (off_t)lba * pp->sectorsize;
 		buf = g_read_data(cp, ofs, pp->sectorsize, &error);
 		if (buf == NULL)
 			return (error);
 
 		ebr_entry_decode(buf + DOSPARTOFF + 0 * DOSPARTSIZE, ent + 0);
 		ebr_entry_decode(buf + DOSPARTOFF + 1 * DOSPARTSIZE, ent + 1);
 
 		/* The 3rd & 4th entries should be zeroes. */
 		if (le64dec(buf + DOSPARTOFF + 2 * DOSPARTSIZE) +
 		    le64dec(buf + DOSPARTOFF + 3 * DOSPARTSIZE) != 0) {
 			basetable->gpt_corrupt = 1;
 			printf("GEOM: %s: invalid entries in the EBR ignored.\n",
 			    pp->name);
 		}
 #ifndef GEOM_PART_EBR_COMPAT
 		/* Save the first EBR, it can contain a boot code */
 		if (lba == 0)
 			bcopy(buf, table->ebr, sizeof(table->ebr));
 #endif
 		g_free(buf);
 
 		if (ent[0].dp_typ == 0)
 			break;
 
 		if (ent[0].dp_typ == 5 && ent[1].dp_typ == 0) {
 			lba = ent[0].dp_start;
 			continue;
 		}
 
 		index = (lba / basetable->gpt_sectors) + 1;
 		baseentry = (struct g_part_entry *)g_part_new_entry(basetable,
 		    index, lba, lba + ent[0].dp_start + ent[0].dp_size - 1);
 		baseentry->gpe_offset = (off_t)(lba + ent[0].dp_start) *
 		    pp->sectorsize;
 		entry = (struct g_part_ebr_entry *)baseentry;
 		entry->ent = ent[0];
 
 		if (ent[1].dp_typ == 0)
 			break;
 
 		lba = ent[1].dp_start;
 	}
 
 	basetable->gpt_entries = msize / basetable->gpt_sectors;
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	return (0);
 }
 
 static int
 g_part_ebr_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
     const char *attrib, unsigned int set)
 {
 	struct g_part_entry *iter;
 	struct g_part_ebr_entry *entry;
 	int changed;
 
 	if (baseentry == NULL)
 		return (ENODEV);
 	if (strcasecmp(attrib, "active") != 0)
 		return (EINVAL);
 
 	/* Only one entry can have the active attribute. */
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter->gpe_deleted)
 			continue;
 		changed = 0;
 		entry = (struct g_part_ebr_entry *)iter;
 		if (iter == baseentry) {
 			if (set && (entry->ent.dp_flag & 0x80) == 0) {
 				entry->ent.dp_flag |= 0x80;
 				changed = 1;
 			} else if (!set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		} else {
 			if (set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		}
 		if (changed && !iter->gpe_created)
 			iter->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_ebr_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
     char *buf, size_t bufsz)
 {
 	struct g_part_ebr_entry *entry;
 	int i;
 
 	entry = (struct g_part_ebr_entry *)baseentry;
-	for (i = 0;
-	    i < nitems(ebr_alias_match); i++) {
+	for (i = 0; i < nitems(ebr_alias_match); i++) {
 		if (ebr_alias_match[i].typ == entry->ent.dp_typ)
 			return (g_part_alias_name(ebr_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->ent.dp_typ);
 	return (buf);
 }
 
 static int
 g_part_ebr_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 #ifndef GEOM_PART_EBR_COMPAT
 	struct g_part_ebr_table *table;
 #endif
 	struct g_provider *pp;
 	struct g_part_entry *baseentry, *next;
 	struct g_part_ebr_entry *entry;
 	u_char *buf;
 	u_char *p;
 	int error;
 
 	pp = cp->provider;
 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 #ifndef GEOM_PART_EBR_COMPAT
 	table = (struct g_part_ebr_table *)basetable;
 	bcopy(table->ebr, buf, DOSPARTOFF);
 #endif
 	le16enc(buf + DOSMAGICOFFSET, DOSMAGIC);
 
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	while (baseentry != NULL && baseentry->gpe_deleted)
 		baseentry = LIST_NEXT(baseentry, gpe_entry);
 
 	/* Wipe-out the first EBR when there are no slices. */
 	if (baseentry == NULL) {
 		error = g_write_data(cp, 0, buf, pp->sectorsize);
 		goto out;
 	}
 
 	/*
 	 * If the first partition is not in LBA 0, we need to
 	 * put a "link" EBR in LBA 0.
 	 */
 	if (baseentry->gpe_start != 0) {
 		ebr_entry_link(basetable, (uint32_t)baseentry->gpe_start,
 		    (uint32_t)baseentry->gpe_end, buf + DOSPARTOFF);
 		error = g_write_data(cp, 0, buf, pp->sectorsize);
 		if (error)
 			goto out;
 	}
 
 	do {
 		entry = (struct g_part_ebr_entry *)baseentry;
 
 		p = buf + DOSPARTOFF;
 		p[0] = entry->ent.dp_flag;
 		p[1] = entry->ent.dp_shd;
 		p[2] = entry->ent.dp_ssect;
 		p[3] = entry->ent.dp_scyl;
 		p[4] = entry->ent.dp_typ;
 		p[5] = entry->ent.dp_ehd;
 		p[6] = entry->ent.dp_esect;
 		p[7] = entry->ent.dp_ecyl;
 		le32enc(p + 8, entry->ent.dp_start);
 		le32enc(p + 12, entry->ent.dp_size);
  
 		next = LIST_NEXT(baseentry, gpe_entry);
 		while (next != NULL && next->gpe_deleted)
 			next = LIST_NEXT(next, gpe_entry);
 
 		p += DOSPARTSIZE;
 		if (next != NULL)
 			ebr_entry_link(basetable, (uint32_t)next->gpe_start,
 			    (uint32_t)next->gpe_end, p);
 		else
 			bzero(p, DOSPARTSIZE);
 
 		error = g_write_data(cp, baseentry->gpe_start * pp->sectorsize,
 		    buf, pp->sectorsize);
 #ifndef GEOM_PART_EBR_COMPAT
 		if (baseentry->gpe_start == 0)
 			bzero(buf, DOSPARTOFF);
 #endif
 		baseentry = next;
 	} while (!error && baseentry != NULL);
 
  out:
 	g_free(buf);
 	return (error);
 }
Index: head/sys/geom/part/g_part_ldm.c
===================================================================
--- head/sys/geom/part/g_part_ldm.c	(revision 298353)
+++ head/sys/geom/part/g_part_ldm.c	(revision 298354)
@@ -1,1485 +1,1483 @@
 /*-
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
 
 SYSCTL_DECL(_kern_geom_part);
 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
     "GEOM_PART_LDM Logical Disk Manager");
 
 static u_int ldm_debug = 0;
 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
     CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
 
 /*
  * This allows access to mirrored LDM volumes. Since we do not
  * doing mirroring here, it is not enabled by default.
  */
 static u_int show_mirrors = 0;
 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
     CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
 
 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
 	if (ldm_debug >= (lvl)) {					\
 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
 	}								\
 } while (0)
 #define	LDM_DUMP(buf, size)	do {					\
 	if (ldm_debug > 1) {						\
 		hexdump(buf, size, NULL, 0);				\
 	}								\
 } while (0)
 
 /*
  * There are internal representations of LDM structures.
  *
  * We do not keep all fields of on-disk structures, only most useful.
  * All numbers in an on-disk structures are in big-endian format.
  */
 
 /*
  * Private header is 512 bytes long. There are three copies on each disk.
  * Offset and sizes are in sectors. Location of each copy:
  * - the first offset is relative to the disk start;
  * - the second and third offset are relative to the LDM database start.
  *
  * On a disk partitioned with GPT, the LDM has not first private header.
  */
 #define	LDM_PH_MBRINDEX		0
 #define	LDM_PH_GPTINDEX		2
 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
 #define	LDM_VERSION_2K		0x2000b
 #define	LDM_VERSION_VISTA	0x2000c
 #define	LDM_PH_VERSION_OFF	0x00c
 #define	LDM_PH_DISKGUID_OFF	0x030
 #define	LDM_PH_DGGUID_OFF	0x0b0
 #define	LDM_PH_DGNAME_OFF	0x0f0
 #define	LDM_PH_START_OFF	0x11b
 #define	LDM_PH_SIZE_OFF		0x123
 #define	LDM_PH_DB_OFF		0x12b
 #define	LDM_PH_DBSIZE_OFF	0x133
 #define	LDM_PH_TH1_OFF		0x13b
 #define	LDM_PH_TH2_OFF		0x143
 #define	LDM_PH_CONFSIZE_OFF	0x153
 #define	LDM_PH_LOGSIZE_OFF	0x15b
 #define	LDM_PH_SIGN		"PRIVHEAD"
 struct ldm_privhdr {
 	struct uuid	disk_guid;
 	struct uuid	dg_guid;
 	u_char		dg_name[32];
 	uint64_t	start;		/* logical disk start */
 	uint64_t	size;		/* logical disk size */
 	uint64_t	db_offset;	/* LDM database start */
 #define	LDM_DB_SIZE		2048
 	uint64_t	db_size;	/* LDM database size */
 #define	LDM_TH_COUNT		2
 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
 	uint64_t	conf_size;	/* configuration size */
 	uint64_t	log_size;	/* size of log */
 };
 
 /*
  * Table of contents header is 512 bytes long.
  * There are two identical copies at offsets from the private header.
  * Offsets are relative to the LDM database start.
  */
 #define	LDM_TH_SIGN		"TOCBLOCK"
 #define	LDM_TH_NAME1		"config"
 #define	LDM_TH_NAME2		"log"
 #define	LDM_TH_NAME1_OFF	0x024
 #define	LDM_TH_CONF_OFF		0x02e
 #define	LDM_TH_CONFSIZE_OFF	0x036
 #define	LDM_TH_NAME2_OFF	0x046
 #define	LDM_TH_LOG_OFF		0x050
 #define	LDM_TH_LOGSIZE_OFF	0x058
 struct ldm_tochdr {
 	uint64_t	conf_offset;	/* configuration offset */
 	uint64_t	log_offset;	/* log offset */
 };
 
 /*
  * LDM database header is 512 bytes long.
  */
 #define	LDM_VMDB_SIGN		"VMDB"
 #define	LDM_DB_LASTSEQ_OFF	0x004
 #define	LDM_DB_SIZE_OFF		0x008
 #define	LDM_DB_STATUS_OFF	0x010
 #define	LDM_DB_VERSION_OFF	0x012
 #define	LDM_DB_DGNAME_OFF	0x016
 #define	LDM_DB_DGGUID_OFF	0x035
 struct ldm_vmdbhdr {
 	uint32_t	last_seq;	/* sequence number of last VBLK */
 	uint32_t	size;		/* size of VBLK */
 };
 
 /*
  * The LDM database configuration section contains VMDB header and
  * many VBLKs. Each VBLK represents a disk group, disk partition,
  * component or volume.
  *
  * The most interesting for us are volumes, they are represents
  * partitions in the GEOM_PART meaning. But volume VBLK does not
  * contain all information needed to create GEOM provider. And we
  * should get this information from the related VBLK. This is how
  * VBLK releated:
  *	Volumes <- Components <- Partitions -> Disks
  *
  * One volume can contain several components. In this case LDM
  * does mirroring of volume data to each component.
  *
  * Also each component can contain several partitions (spanned or
  * striped volumes).
  */
 
 struct ldm_component {
 	uint64_t	id;		/* object id */
 	uint64_t	vol_id;		/* parent volume object id */
 
 	int		count;
 	LIST_HEAD(, ldm_partition) partitions;
 	LIST_ENTRY(ldm_component) entry;
 };
 
 struct ldm_volume {
 	uint64_t	id;		/* object id */
 	uint64_t	size;		/* volume size */
 	uint8_t		number;		/* used for ordering */
 	uint8_t		part_type;	/* partition type */
 
 	int		count;
 	LIST_HEAD(, ldm_component) components;
 	LIST_ENTRY(ldm_volume)	entry;
 };
 
 struct ldm_disk {
 	uint64_t	id;		/* object id */
 	struct uuid	guid;		/* disk guid */
 
 	LIST_ENTRY(ldm_disk) entry;
 };
 
 #if 0
 struct ldm_disk_group {
 	uint64_t	id;		/* object id */
 	struct uuid	guid;		/* disk group guid */
 	u_char		name[32];	/* disk group name */
 
 	LIST_ENTRY(ldm_disk_group) entry;
 };
 #endif
 
 struct ldm_partition {
 	uint64_t	id;		/* object id */
 	uint64_t	disk_id;	/* disk object id */
 	uint64_t	comp_id;	/* parent component object id */
 	uint64_t	start;		/* offset relative to disk start */
 	uint64_t	offset;		/* offset for spanned volumes */
 	uint64_t	size;		/* partition size */
 
 	LIST_ENTRY(ldm_partition) entry;
 };
 
 /*
  * Each VBLK is 128 bytes long and has standard 16 bytes header.
  * Some of VBLK's fields are fixed size, but others has variable size.
  * Fields with variable size are prefixed with one byte length marker.
  * Some fields are strings and also can have fixed size and variable.
  * Strings with fixed size are NULL-terminated, others are not.
  * All VBLKs have same several first fields:
  *	Offset		Size		Description
  *	---------------+---------------+--------------------------
  *	0x00		16		standard VBLK header
  *	0x10		2		update status
  *	0x13		1		VBLK type
  *	0x18		PS		object id
  *	0x18+		PN		object name
  *
  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
  *  o 'P' in size column means 'prefixed' (variable-width),
  *    'S' - string, 'N' - number.
  */
 #define	LDM_VBLK_SIGN		"VBLK"
 #define	LDM_VBLK_SEQ_OFF	0x04
 #define	LDM_VBLK_GROUP_OFF	0x08
 #define	LDM_VBLK_INDEX_OFF	0x0c
 #define	LDM_VBLK_COUNT_OFF	0x0e
 #define	LDM_VBLK_TYPE_OFF	0x13
 #define	LDM_VBLK_OID_OFF	0x18
 struct ldm_vblkhdr {
 	uint32_t	seq;		/* sequence number */
 	uint32_t	group;		/* group number */
 	uint16_t	index;		/* index in the group */
 	uint16_t	count;		/* number of entries in the group */
 };
 
 #define	LDM_VBLK_T_COMPONENT	0x32
 #define	LDM_VBLK_T_PARTITION	0x33
 #define	LDM_VBLK_T_DISK		0x34
 #define	LDM_VBLK_T_DISKGROUP	0x35
 #define	LDM_VBLK_T_DISK4	0x44
 #define	LDM_VBLK_T_DISKGROUP4	0x45
 #define	LDM_VBLK_T_VOLUME	0x51
 struct ldm_vblk {
 	uint8_t		type;		/* VBLK type */
 	union {
 		uint64_t		id;
 		struct ldm_volume	vol;
 		struct ldm_component	comp;
 		struct ldm_disk		disk;
 		struct ldm_partition	part;
 #if 0
 		struct ldm_disk_group	disk_group;
 #endif
 	} u;
 	LIST_ENTRY(ldm_vblk) entry;
 };
 
 /*
  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
  * should be placed into continuous memory buffer. We can determine xVBLK
  * by the count field in the standard VBLK header (count > 1).
  */
 struct ldm_xvblk {
 	uint32_t	group;		/* xVBLK group number */
 	uint32_t	size;		/* the total size of xVBLK */
 	uint8_t		map;		/* bitmask of currently saved VBLKs */
 	u_char		*data;		/* xVBLK data */
 
 	LIST_ENTRY(ldm_xvblk)	entry;
 };
 
 /* The internal representation of LDM database. */
 struct ldm_db {
 	struct ldm_privhdr		ph;	/* private header */
 	struct ldm_tochdr		th;	/* TOC header */
 	struct ldm_vmdbhdr		dh;	/* VMDB header */
 
 	LIST_HEAD(, ldm_volume)		volumes;
 	LIST_HEAD(, ldm_disk)		disks;
 	LIST_HEAD(, ldm_vblk)		vblks;
 	LIST_HEAD(, ldm_xvblk)		xvblks;
 };
 
 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
 
 struct g_part_ldm_table {
 	struct g_part_table	base;
 	uint64_t		db_offset;
 	int			is_gpt;
 };
 struct g_part_ldm_entry {
 	struct g_part_entry	base;
 	uint8_t			type;
 };
 
 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
 
 static kobj_method_t g_part_ldm_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_ldm_scheme = {
 	"LDM",
 	g_part_ldm_methods,
 	sizeof(struct g_part_ldm_table),
 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
 };
 G_PART_SCHEME_DECLARE(g_part_ldm);
 
 static struct g_part_ldm_alias {
 	u_char		typ;
 	int		alias;
 } ldm_alias_match[] = {
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 };
 
 static u_char*
 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
 {
 	struct g_provider *pp;
 	u_char *buf;
 
 	pp = cp->provider;
 	buf = g_read_data(cp, off, pp->sectorsize, error);
 	if (buf == NULL)
 		return (NULL);
 
 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
 		    pp->name);
 		g_free(buf);
 		buf = NULL;
 		*error = EINVAL;
 	}
 	return (buf);
 }
 
 static int
 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
     const u_char *buf)
 {
 	uint32_t version;
 	int error;
 
 	memset(hdr, 0, sizeof(*hdr));
 	version = be32dec(buf + LDM_PH_VERSION_OFF);
 	if (version != LDM_VERSION_2K &&
 	    version != LDM_VERSION_VISTA) {
 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
 		    cp->provider->name, version >> 16,
 		    version & 0xFFFF);
 		return (ENXIO);
 	}
 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
 	if (error != 0)
 		return (error);
 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
 	if (error != 0)
 		return (error);
 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
 	return (0);
 }
 
 static int
 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
 {
 	struct g_consumer *cp2;
 	struct g_provider *pp;
 	struct ldm_privhdr hdr;
 	uint64_t offset, last;
 	int error, found, i;
 	u_char *buf;
 
 	pp = cp->provider;
 	if (is_gpt) {
 		/*
 		 * The last LBA is used in several checks below, for the
 		 * GPT case it should be calculated relative to the whole
 		 * disk.
 		 */
 		cp2 = LIST_FIRST(&pp->geom->consumer);
 		last =
 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
 	} else
 		last = pp->mediasize / pp->sectorsize - 1;
-	for (found = 0, i = is_gpt;
-	    i < nitems(ldm_ph_off); i++) {
+	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
 		offset = ldm_ph_off[i];
 		/*
 		 * In the GPT case consumer is attached to the LDM metadata
 		 * partition and we don't need add db_offset.
 		 */
 		if (!is_gpt)
 			offset += db->ph.db_offset;
 		if (i == LDM_PH_MBRINDEX) {
 			/*
 			 * Prepare to errors and setup new base offset
 			 * to read backup private headers. Assume that LDM
 			 * database is in the last 1Mbyte area.
 			 */
 			db->ph.db_offset = last - LDM_DB_SIZE;
 		}
 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(1, "%s: failed to read private header "
 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
 			continue;
 		}
 		error = ldm_privhdr_parse(cp, &hdr, buf);
 		if (error != 0) {
 			LDM_DEBUG(1, "%s: failed to parse private "
 			    "header %d", pp->name, i);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		g_free(buf);
 		if (hdr.start > last ||
 		    hdr.start + hdr.size - 1 > last ||
 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
 		    hdr.db_size != LDM_DB_SIZE ||
 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
 			LDM_DEBUG(1, "%s: invalid values in the "
 			    "private header %d", pp->name, i);
 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
 			    "last: %jd", pp->name, hdr.start, hdr.size,
 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
 			    last);
 			continue;
 		}
 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
 			LDM_DEBUG(0, "%s: private headers are not equal",
 			    pp->name);
 			if (i > 1) {
 				/*
 				 * We have different headers in the LDM.
 				 * We can not trust this metadata.
 				 */
 				LDM_DEBUG(0, "%s: refuse LDM metadata",
 				    pp->name);
 				return (EINVAL);
 			}
 			/*
 			 * We already have read primary private header
 			 * and it differs from this backup one.
 			 * Prefer the backup header and save it.
 			 */
 			found = 0;
 		}
 		if (found == 0)
 			memcpy(&db->ph, &hdr, sizeof(hdr));
 		found = 1;
 	}
 	if (found == 0) {
 		LDM_DEBUG(1, "%s: valid LDM private header not found",
 		    pp->name);
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_part_table *gpt;
 	struct g_part_entry *e;
 	struct g_consumer *cp2;
 	int error;
 
 	cp2 = LIST_NEXT(cp, consumer);
 	g_topology_lock();
 	gpt = cp->provider->geom->softc;
 	error = 0;
 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
 		if (cp->provider == e->gpe_pp) {
 			/* ms-ldm-metadata partition */
 			if (e->gpe_start != db->ph.db_offset ||
 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
 				error++;
 		} else if (cp2->provider == e->gpe_pp) {
 			/* ms-ldm-data partition */
 			if (e->gpe_start != db->ph.start ||
 			    e->gpe_end != db->ph.start + db->ph.size - 1)
 				error++;
 		}
 		if (error != 0) {
 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
 			    "do not match with the LDM metadata",
 			    e->gpe_pp->name, e->gpe_index);
 			error = ENXIO;
 			break;
 		}
 	}
 	g_topology_unlock();
 	return (error);
 }
 
 static int
 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct ldm_tochdr hdr;
 	uint64_t offset, conf_size, log_size;
 	int error, found, i;
 	u_char *buf;
 
 	pp = cp->provider;
 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
 		offset = db->ph.db_offset + db->ph.th_offset[i];
 		buf = g_read_data(cp,
 		    offset * pp->sectorsize, pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(1, "%s: failed to read TOC header "
 			    "at LBA %ju", pp->name, (uintmax_t)offset);
 			continue;
 		}
 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
 		    strlen(LDM_TH_NAME1)) != 0 ||
 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
 		    strlen(LDM_TH_NAME2)) != 0) {
 			LDM_DEBUG(1, "%s: failed to parse TOC header "
 			    "at LBA %ju", pp->name, (uintmax_t)offset);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
 		if (conf_size != db->ph.conf_size ||
 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
 		    log_size != db->ph.log_size ||
 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
 			LDM_DEBUG(1, "%s: invalid values in the "
 			    "TOC header at LBA %ju", pp->name,
 			    (uintmax_t)offset);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		g_free(buf);
 		if (found == 0)
 			memcpy(&db->th, &hdr, sizeof(hdr));
 		found = 1;
 	}
 	if (found == 0) {
 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
 		    pp->name);
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct uuid dg_guid;
 	uint64_t offset;
 	uint32_t version;
 	int error;
 	u_char *buf;
 
 	pp = cp->provider;
 	offset = db->ph.db_offset + db->th.conf_offset;
 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
 	    &error);
 	if (buf == NULL) {
 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
 		    "LBA %ju", pp->name, (uintmax_t)offset);
 		return (error);
 	}
 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
 		    "LBA %ju", pp->name, (uintmax_t)offset);
 		return (ENXIO);
 	}
 	/* Check version. */
 	version = be32dec(buf + LDM_DB_VERSION_OFF);
 	if (version != 0x4000A) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
 		    pp->name, version >> 16, version & 0xFFFF);
 		return (ENXIO);
 	}
 	/*
 	 * Check VMDB update status:
 	 *	1 - in a consistent state;
 	 *	2 - in a creation phase;
 	 *	3 - in a deletion phase;
 	 */
 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
 		    pp->name);
 		return (ENXIO);
 	}
 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
 	/* Compare disk group name and guid from VMDB and private headers */
 	if (error != 0 || db->dh.size == 0 ||
 	    pp->sectorsize % db->dh.size != 0 ||
 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
 	    db->dh.size * db->dh.last_seq >
 	    db->ph.conf_size * pp->sectorsize) {
 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
 		    pp->name);
 		LDM_DUMP(buf, pp->sectorsize);
 		g_free(buf);
 		return (EINVAL);
 	}
 	g_free(buf);
 	return (0);
 }
 
 static int
 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
 {
 	struct ldm_xvblk *blk;
 	size_t size;
 
 	size = db->dh.size - 16;
 	LIST_FOREACH(blk, &db->xvblks, entry)
 		if (blk->group == vh->group)
 			break;
 	if (blk == NULL) {
 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
 		blk->group = vh->group;
 		blk->size = size * vh->count + 16;
 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
 		blk->map = 0xFF << vh->count;
 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
 	}
 	if ((blk->map & (1 << vh->index)) != 0) {
 		/* Block with given index has been already saved. */
 		return (EINVAL);
 	}
 	/* Copy the data block to the place related to index. */
 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
 	blk->map |= 1 << vh->index;
 	return (0);
 }
 
 /* Read the variable-width numeric field and return new offset */
 static int
 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
 {
 	uint64_t num;
 	uint8_t len;
 
 	len = buf[offset++];
 	if (len > sizeof(uint64_t) || len + offset >= range)
 		return (-1);
 	for (num = 0; len > 0; len--)
 		num = (num << 8) | buf[offset++];
 	*result = num;
 	return (offset);
 }
 
 /* Read the variable-width string and return new offset */
 static int
 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
     size_t maxlen, size_t range)
 {
 	uint8_t len;
 
 	len = buf[offset++];
 	if (len >= maxlen || len + offset >= range)
 		return (-1);
 	memcpy(result, buf + offset, len);
 	result[len] = '\0';
 	return (offset + len);
 }
 
 /* Just skip the variable-width variable and return new offset */
 static int
 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
 {
 	uint8_t len;
 
 	len = buf[offset++];
 	if (offset + len >= range)
 		return (-1);
 
 	return (offset + len);
 }
 
 static int
 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
 {
 	struct ldm_vblk *blk;
 	struct ldm_volume *volume, *last;
 	const char *errstr;
 	u_char vstr[64];
 	int error, offset;
 
 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
 	blk->type = p[LDM_VBLK_TYPE_OFF];
 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
 	if (offset < 0) {
 		errstr = "object id";
 		goto fail;
 	}
 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 	if (offset < 0) {
 		errstr = "object name";
 		goto fail;
 	}
 	switch (blk->type) {
 	/*
 	 * Component VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	volume state
 	 *  0x18+5	PN	component children count
 	 *  0x1D+16	PN	parent's volume object id
 	 *  0x2D+1	PN	stripe size
 	 */
 	case LDM_VBLK_T_COMPONENT:
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "volume state";
 			goto fail;
 		}
 		offset = ldm_vparm_skip(p, offset + 5, size);
 		if (offset < 0) {
 			errstr = "children count";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset + 16,
 		    &blk->u.comp.vol_id, size);
 		if (offset < 0) {
 			errstr = "volume id";
 			goto fail;
 		}
 		break;
 	/*
 	 * Partition VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+12	8	partition start offset
 	 *  0x18+20	8	volume offset
 	 *  0x18+28	PN	partition size
 	 *  0x34+	PN	parent's component object id
 	 *  0x34+	PN	disk's object id
 	 */
 	case LDM_VBLK_T_PARTITION:
 		if (offset + 28 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.part.start = be64dec(p + offset + 12);
 		blk->u.part.offset = be64dec(p + offset + 20);
 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
 		if (offset < 0) {
 			errstr = "partition size";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
 		if (offset < 0) {
 			errstr = "component id";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
 		if (offset < 0) {
 			errstr = "disk id";
 			goto fail;
 		}
 		break;
 	/*
 	 * Disk VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	disk GUID
 	 */
 	case LDM_VBLK_T_DISK:
 		errstr = "disk guid";
 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 		if (offset < 0)
 			goto fail;
 		error = parse_uuid(vstr, &blk->u.disk.guid);
 		if (error != 0)
 			goto fail;
 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
 		break;
 	/*
 	 * Disk group VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	disk group GUID
 	 */
 	case LDM_VBLK_T_DISKGROUP:
 #if 0
 		strncpy(blk->u.disk_group.name, vstr,
 		    sizeof(blk->u.disk_group.name));
 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 		if (offset < 0) {
 			errstr = "disk group guid";
 			goto fail;
 		}
 		error = parse_uuid(name, &blk->u.disk_group.guid);
 		if (error != 0) {
 			errstr = "disk group guid";
 			goto fail;
 		}
 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
 #endif
 		break;
 	/*
 	 * Disk VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	16	disk GUID
 	 */
 	case LDM_VBLK_T_DISK4:
 		be_uuid_dec(p + offset, &blk->u.disk.guid);
 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
 		break;
 	/*
 	 * Disk group VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	16	disk GUID
 	 */
 	case LDM_VBLK_T_DISKGROUP4:
 #if 0
 		strncpy(blk->u.disk_group.name, vstr,
 		    sizeof(blk->u.disk_group.name));
 		be_uuid_dec(p + offset, &blk->u.disk.guid);
 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
 #endif
 		break;
 	/*
 	 * Volume VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	volume type
 	 *  0x18+	PS	unknown
 	 *  0x18+	14(S)	volume state
 	 *  0x18+16	1	volume number
 	 *  0x18+21	PN	volume children count
 	 *  0x2D+16	PN	volume size
 	 *  0x3D+4	1	partition type
 	 */
 	case LDM_VBLK_T_VOLUME:
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "volume type";
 			goto fail;
 		}
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "unknown param";
 			goto fail;
 		}
 		if (offset + 21 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.vol.number = p[offset + 16];
 		offset = ldm_vparm_skip(p, offset + 21, size);
 		if (offset < 0) {
 			errstr = "children count";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
 		if (offset < 0) {
 			errstr = "volume size";
 			goto fail;
 		}
 		if (offset + 4 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.vol.part_type = p[offset + 4];
 		/* keep volumes ordered by volume number */
 		last = NULL;
 		LIST_FOREACH(volume, &db->volumes, entry) {
 			if (volume->number > blk->u.vol.number)
 				break;
 			last = volume;
 		}
 		if (last != NULL)
 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
 		else
 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
 		break;
 	default:
 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
 		LDM_DUMP(p, size);
 	}
 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
 	return (0);
 fail:
 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
 	    errstr, blk->type);
 	LDM_DUMP(p, size);
 	g_free(blk);
 	return (EINVAL);
 }
 
 static void
 ldm_vmdb_free(struct ldm_db *db)
 {
 	struct ldm_vblk *vblk;
 	struct ldm_xvblk *xvblk;
 
 	while (!LIST_EMPTY(&db->xvblks)) {
 		xvblk = LIST_FIRST(&db->xvblks);
 		LIST_REMOVE(xvblk, entry);
 		g_free(xvblk->data);
 		g_free(xvblk);
 	}
 	while (!LIST_EMPTY(&db->vblks)) {
 		vblk = LIST_FIRST(&db->vblks);
 		LIST_REMOVE(vblk, entry);
 		g_free(vblk);
 	}
 }
 
 static int
 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct ldm_vblk *vblk;
 	struct ldm_xvblk *xvblk;
 	struct ldm_volume *volume;
 	struct ldm_component *comp;
 	struct ldm_vblkhdr vh;
 	u_char *buf, *p;
 	size_t size, n, sectors;
 	uint64_t offset;
 	int error;
 
 	pp = cp->provider;
 	size = (db->dh.last_seq * db->dh.size +
 	    pp->sectorsize - 1) / pp->sectorsize;
 	size -= 1; /* one sector takes vmdb header */
 	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
 		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
 		    MAXPHYS / pp->sectorsize: size - n;
 		/* read VBLKs */
 		buf = g_read_data(cp, offset * pp->sectorsize,
 		    sectors * pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
 			    pp->name);
 			goto fail;
 		}
 		for (p = buf; p < buf + sectors * pp->sectorsize;
 		    p += db->dh.size) {
 			if (memcmp(p, LDM_VBLK_SIGN,
 			    strlen(LDM_VBLK_SIGN)) != 0) {
 				LDM_DEBUG(0, "%s: no VBLK signature\n",
 				    pp->name);
 				LDM_DUMP(p, db->dh.size);
 				goto fail;
 			}
 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
 			/* skip empty blocks */
 			if (vh.seq == 0 || vh.group == 0)
 				continue;
 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
 			if (vh.count == 0 || vh.count > 4 ||
 			    vh.seq > db->dh.last_seq) {
 				LDM_DEBUG(0, "%s: invalid values "
 				    "in the VBLK header\n", pp->name);
 				LDM_DUMP(p, db->dh.size);
 				goto fail;
 			}
 			if (vh.count > 1) {
 				error = ldm_xvblk_handle(db, &vh, p);
 				if (error != 0) {
 					LDM_DEBUG(0, "%s: xVBLK "
 					    "is corrupted\n", pp->name);
 					LDM_DUMP(p, db->dh.size);
 					goto fail;
 				}
 				continue;
 			}
 			if (be16dec(p + 16) != 0)
 				LDM_DEBUG(1, "%s: VBLK update"
 				    " status is %u\n", pp->name,
 				    be16dec(p + 16));
 			error = ldm_vblk_handle(db, p, db->dh.size);
 			if (error != 0)
 				goto fail;
 		}
 		g_free(buf);
 		buf = NULL;
 	}
 	/* Parse xVBLKs */
 	while (!LIST_EMPTY(&db->xvblks)) {
 		xvblk = LIST_FIRST(&db->xvblks);
 		if (xvblk->map == 0xFF) {
 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
 			if (error != 0)
 				goto fail;
 		} else {
 			LDM_DEBUG(0, "%s: incomplete or corrupt "
 			    "xVBLK found\n", pp->name);
 			goto fail;
 		}
 		LIST_REMOVE(xvblk, entry);
 		g_free(xvblk->data);
 		g_free(xvblk);
 	}
 	/* construct all VBLKs relations */
 	LIST_FOREACH(volume, &db->volumes, entry) {
 		LIST_FOREACH(vblk, &db->vblks, entry)
 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
 			    vblk->u.comp.vol_id == volume->id) {
 				LIST_INSERT_HEAD(&volume->components,
 				    &vblk->u.comp, entry);
 				volume->count++;
 			}
 		LIST_FOREACH(comp, &volume->components, entry)
 			LIST_FOREACH(vblk, &db->vblks, entry)
 				if (vblk->type == LDM_VBLK_T_PARTITION &&
 				    vblk->u.part.comp_id == comp->id) {
 					LIST_INSERT_HEAD(&comp->partitions,
 					    &vblk->u.part, entry);
 					comp->count++;
 				}
 	}
 	return (0);
 fail:
 	ldm_vmdb_free(db);
 	g_free(buf);
 	return (ENXIO);
 }
 
 static int
 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_ldm_table *table;
 	struct g_provider *pp;
 
 	table = (struct g_part_ldm_table *)basetable;
 	/*
 	 * To destroy LDM on a disk partitioned with GPT we should delete
 	 * ms-ldm-metadata partition, but we can't do this via standard
 	 * GEOM_PART method.
 	 */
 	if (table->is_gpt)
 		return (ENOSYS);
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	/*
 	 * To destroy LDM we should wipe MBR, first private header and
 	 * backup private headers.
 	 */
 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
 	/*
 	 * Don't touch last backup private header when LDM database is
 	 * not located in the last 1MByte area.
 	 * XXX: can't remove all blocks.
 	 */
 	if (table->db_offset + LDM_DB_SIZE ==
 	    pp->mediasize / pp->sectorsize)
 		basetable->gpt_smtail = 1;
 	return (0);
 }
 
 static void
 g_part_ldm_dumpconf(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
 {
 	struct g_part_ldm_entry *entry;
 
 	entry = (struct g_part_ldm_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->type);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 
 	return (0);
 }
 
 static int
 g_part_ldm_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static const char *
 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_ldm_table *table;
 	struct g_part_table *gpt;
 	struct g_part_entry *entry;
 	struct g_consumer *cp2;
 	struct gpt_ent *part;
 	u_char *buf;
 	int error;
 
 	/*
 	 * XXX: We use some knowlege about GEOM_PART_GPT internal
 	 * structures, but it is easier than parse GPT by himself.
 	 */
 	g_topology_lock();
 	gpt = cp->provider->geom->softc;
 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
 		part = (struct gpt_ent *)(entry + 1);
 		/* Search ms-ldm-metadata partition */
 		if (memcmp(&part->ent_type,
 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
 			continue;
 
 		/* Create new consumer and attach it to metadata partition */
 		cp2 = g_new_consumer(cp->geom);
 		error = g_attach(cp2, entry->gpe_pp);
 		if (error != 0) {
 			g_destroy_consumer(cp2);
 			g_topology_unlock();
 			return (ENXIO);
 		}
 		error = g_access(cp2, 1, 0, 0);
 		if (error != 0) {
 			g_detach(cp2);
 			g_destroy_consumer(cp2);
 			g_topology_unlock();
 			return (ENXIO);
 		}
 		g_topology_unlock();
 
 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
 		    cp->provider->name, cp2->provider->name);
 		/* Read the LDM private header */
 		buf = ldm_privhdr_read(cp2,
 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
 		    &error);
 		if (buf != NULL) {
 			table = (struct g_part_ldm_table *)basetable;
 			table->is_gpt = 1;
 			g_free(buf);
 			return (G_PART_PROBE_PRI_HIGH);
 		}
 
 		/* second consumer is no longer needed. */
 		g_topology_lock();
 		g_access(cp2, -1, 0, 0);
 		g_detach(cp2);
 		g_destroy_consumer(cp2);
 		break;
 	}
 	g_topology_unlock();
 	return (ENXIO);
 }
 
 static int
 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf, type[64];
 	int error, idx;
 
 
 	pp = cp->provider;
 	if (pp->sectorsize != 512)
 		return (ENXIO);
 
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error == 0 && strcmp(type, "GPT") == 0) {
 		if (g_getattr("PART::type", cp, &type) != 0 ||
 		    strcmp(type, "ms-ldm-data") != 0)
 			return (ENXIO);
 		error = ldm_gpt_probe(basetable, cp);
 		return (error);
 	}
 
 	if (basetable->gpt_depth != 0)
 		return (ENXIO);
 
 	/* LDM has 1M metadata area */
 	if (pp->mediasize <= 1024 * 1024)
 		return (ENOSPC);
 
 	/* Check that there's a MBR */
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
 		g_free(buf);
 		return (ENXIO);
 	}
 	error = ENXIO;
 	/* Check that we have LDM partitions in the MBR */
 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
 			error = 0;
 	}
 	g_free(buf);
 	if (error == 0) {
 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
 		    pp->name);
 		/* Read the LDM private header */
 		buf = ldm_privhdr_read(cp,
 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
 		if (buf == NULL)
 			return (error);
 		g_free(buf);
 		return (G_PART_PROBE_PRI_HIGH);
 	}
 	return (error);
 }
 
 static int
 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_ldm_table *table;
 	struct g_part_ldm_entry *entry;
 	struct g_consumer *cp2;
 	struct ldm_component *comp;
 	struct ldm_partition *part;
 	struct ldm_volume *vol;
 	struct ldm_disk *disk;
 	struct ldm_db db;
 	int error, index, skipped;
 
 	table = (struct g_part_ldm_table *)basetable;
 	memset(&db, 0, sizeof(db));
 	cp2 = cp;					/* ms-ldm-data */
 	if (table->is_gpt)
 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
 	/* Read and parse LDM private headers. */
 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
 	if (error != 0)
 		goto gpt_cleanup;
 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
 	table->db_offset = db.ph.db_offset;
 	/* Make additional checks for GPT */
 	if (table->is_gpt) {
 		error = ldm_gpt_check(&db, cp);
 		if (error != 0)
 			goto gpt_cleanup;
 		/*
 		 * Now we should reset database offset to zero, because our
 		 * consumer cp is attached to the ms-ldm-metadata partition
 		 * and we don't need add db_offset to read from it.
 		 */
 		db.ph.db_offset = 0;
 	}
 	/* Read and parse LDM TOC headers. */
 	error = ldm_tochdr_check(&db, cp);
 	if (error != 0)
 		goto gpt_cleanup;
 	/* Read and parse LDM VMDB header. */
 	error = ldm_vmdbhdr_check(&db, cp);
 	if (error != 0)
 		goto gpt_cleanup;
 	error = ldm_vmdb_parse(&db, cp);
 	/*
 	 * For the GPT case we must detach and destroy
 	 * second consumer before return.
 	 */
 gpt_cleanup:
 	if (table->is_gpt) {
 		g_topology_lock();
 		g_access(cp, -1, 0, 0);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		cp = cp2;
 	}
 	if (error != 0)
 		return (error);
 	/* Search current disk in the disk list. */
 	LIST_FOREACH(disk, &db.disks, entry)
 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
 		sizeof(struct uuid)) == 0)
 		    break;
 	if (disk == NULL) {
 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
 		    cp->provider->name);
 		ldm_vmdb_free(&db);
 		return (ENXIO);
 	}
 	index = 1;
 	LIST_FOREACH(vol, &db.volumes, entry) {
 		LIST_FOREACH(comp, &vol->components, entry) {
 			/* Skip volumes from different disks. */
 			part = LIST_FIRST(&comp->partitions);
 			if (part->disk_id != disk->id)
 				continue;
 			skipped = 0;
 			/* We don't support spanned and striped volumes. */
 			if (comp->count > 1 || part->offset != 0) {
 				LDM_DEBUG(1, "%s: LDM volume component "
 				    "%ju has %u partitions. Skipped",
 				    cp->provider->name, (uintmax_t)comp->id,
 				    comp->count);
 				skipped = 1;
 			}
 			/*
 			 * Allow mirrored volumes only when they are explicitly
 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
 			 */
 			if (vol->count > 1 && show_mirrors == 0) {
 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
 				    "components. Skipped",
 				    cp->provider->name, (uintmax_t)vol->id,
 				    vol->count);
 				skipped = 1;
 			}
 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
 			    basetable, index++,
 			    basetable->gpt_first + part->start,
 			    basetable->gpt_first + part->start +
 			    part->size - 1);
 			/*
 			 * Mark skipped partition as ms-ldm-data partition.
 			 * We do not support them, but it is better to show
 			 * that we have something there, than just show
 			 * free space.
 			 */
 			if (skipped == 0)
 				entry->type = vol->part_type;
 			else
 				entry->type = DOSPTYP_LDM;
 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
 			    (uintmax_t)part->id,(uintmax_t)part->start +
 			    basetable->gpt_first, (uintmax_t)part->start +
 			    part->size + basetable->gpt_first - 1,
 			    vol->part_type);
 		}
 	}
 	ldm_vmdb_free(&db);
 	return (error);
 }
 
 static const char *
 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_ldm_entry *entry;
 	int i;
 
 	entry = (struct g_part_ldm_entry *)baseentry;
-	for (i = 0;
-	    i < nitems(ldm_alias_match); i++) {
+	for (i = 0; i < nitems(ldm_alias_match); i++) {
 		if (ldm_alias_match[i].typ == entry->type)
 			return (g_part_alias_name(ldm_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->type);
 	return (buf);
 }
 
 static int
 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 
 	return (ENOSYS);
 }
Index: head/sys/geom/part/g_part_mbr.c
===================================================================
--- head/sys/geom/part/g_part_mbr.c	(revision 298353)
+++ head/sys/geom/part/g_part_mbr.c	(revision 298354)
@@ -1,607 +1,605 @@
 /*-
  * Copyright (c) 2007, 2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_mbr, "GEOM partitioning class for MBR support");
 
 SYSCTL_DECL(_kern_geom_part);
 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, mbr, CTLFLAG_RW, 0,
     "GEOM_PART_MBR Master Boot Record");
 
 static u_int enforce_chs = 0;
 SYSCTL_UINT(_kern_geom_part_mbr, OID_AUTO, enforce_chs,
     CTLFLAG_RWTUN, &enforce_chs, 0, "Enforce alignment to CHS addressing");
 
 #define	MBRSIZE		512
 
 struct g_part_mbr_table {
 	struct g_part_table	base;
 	u_char		mbr[MBRSIZE];
 };
 
 struct g_part_mbr_entry {
 	struct g_part_entry	base;
 	struct dos_partition ent;
 };
 
 static int g_part_mbr_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_mbr_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_mbr_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_mbr_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_mbr_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_mbr_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_mbr_modify(struct g_part_table *, struct g_part_entry *,  
     struct g_part_parms *);
 static const char *g_part_mbr_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_mbr_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_read(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_setunset(struct g_part_table *, struct g_part_entry *,
     const char *, unsigned int);
 static const char *g_part_mbr_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_mbr_write(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_mbr_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_mbr_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_mbr_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_mbr_create),
 	KOBJMETHOD(g_part_destroy,	g_part_mbr_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_mbr_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_mbr_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_mbr_modify),
 	KOBJMETHOD(g_part_resize,	g_part_mbr_resize),
 	KOBJMETHOD(g_part_name,		g_part_mbr_name),
 	KOBJMETHOD(g_part_probe,	g_part_mbr_probe),
 	KOBJMETHOD(g_part_read,		g_part_mbr_read),
 	KOBJMETHOD(g_part_setunset,	g_part_mbr_setunset),
 	KOBJMETHOD(g_part_type,		g_part_mbr_type),
 	KOBJMETHOD(g_part_write,	g_part_mbr_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_mbr_scheme = {
 	"MBR",
 	g_part_mbr_methods,
 	sizeof(struct g_part_mbr_table),
 	.gps_entrysz = sizeof(struct g_part_mbr_entry),
 	.gps_minent = NDOSPART,
 	.gps_maxent = NDOSPART,
 	.gps_bootcodesz = MBRSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_mbr);
 
 static struct g_part_mbr_alias {
 	u_char		typ;
 	int		alias;
 } mbr_alias_match[] = {
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_EXT,		G_PART_ALIAS_EBR },
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT16,	G_PART_ALIAS_MS_FAT16 },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_EXTLBA,	G_PART_ALIAS_EBR },
 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 	{ DOSPTYP_PPCBOOT,	G_PART_ALIAS_PREP_BOOT },
 	{ DOSPTYP_VMFS,		G_PART_ALIAS_VMFS },
 	{ DOSPTYP_VMKDIAG,	G_PART_ALIAS_VMKDIAG },
 	{ DOSPTYP_APPLE_UFS,	G_PART_ALIAS_APPLE_UFS },
 	{ DOSPTYP_APPLE_BOOT,	G_PART_ALIAS_APPLE_BOOT },
 	{ DOSPTYP_HFS,		G_PART_ALIAS_APPLE_HFS },
 };
 
 static int
 mbr_parse_type(const char *type, u_char *dp_typ)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*dp_typ = (u_char)lt;
 		return (0);
 	}
-	for (i = 0;
-	    i < nitems(mbr_alias_match); i++) {
+	for (i = 0; i < nitems(mbr_alias_match); i++) {
 		alias = g_part_alias_name(mbr_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*dp_typ = mbr_alias_match[i].typ;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 mbr_probe_bpb(u_char *bpb)
 {
 	uint16_t secsz;
 	uint8_t clstsz;
 
 #define PO2(x)	((x & (x - 1)) == 0)
 	secsz = le16dec(bpb);
 	if (secsz < 512 || secsz > 4096 || !PO2(secsz))
 		return (0);
 	clstsz = bpb[2];
 	if (clstsz < 1 || clstsz > 128 || !PO2(clstsz))
 		return (0);
 #undef PO2
 
 	return (1);
 }
 
 static void
 mbr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
     u_char *secp)
 {
 	uint32_t cyl, hd, sec;
 
 	sec = lba % table->gpt_sectors + 1;
 	lba /= table->gpt_sectors;
 	hd = lba % table->gpt_heads;
 	lba /= table->gpt_heads;
 	cyl = lba;
 	if (cyl > 1023)
 		sec = hd = cyl = ~0;
 
 	*cylp = cyl & 0xff;
 	*hdp = hd & 0xff;
 	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
 }
 
 static int
 mbr_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
 {
 	uint32_t sectors;
 
 	if (enforce_chs == 0)
 		return (0);
 	sectors = basetable->gpt_sectors;
 	if (*size < sectors)
 		return (EINVAL);
 	if (start != NULL && (*start % sectors)) {
 		*size += (*start % sectors) - sectors;
 		*start -= (*start % sectors) - sectors;
 	}
 	if (*size % sectors)
 		*size -= (*size % sectors);
 	if (*size < sectors)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_part_mbr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 	uint32_t start, size;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_mbr_entry *)baseentry;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (mbr_align(basetable, &start, &size) != 0)
 		return (EINVAL);
 	if (baseentry->gpe_deleted)
 		bzero(&entry->ent, sizeof(entry->ent));
 
 	KASSERT(baseentry->gpe_start <= start, ("%s", __func__));
 	KASSERT(baseentry->gpe_end >= start + size - 1, ("%s", __func__));
 	baseentry->gpe_start = start;
 	baseentry->gpe_end = start + size - 1;
 	entry->ent.dp_start = start;
 	entry->ent.dp_size = size;
 	mbr_set_chs(basetable, baseentry->gpe_start, &entry->ent.dp_scyl,
 	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
 	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 }
 
 static int
 g_part_mbr_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_table *table;
 	uint32_t dsn;
 
 	if (gpp->gpp_codesize != MBRSIZE)
 		return (ENODEV);
 
 	table = (struct g_part_mbr_table *)basetable;
 	dsn = *(uint32_t *)(table->mbr + DOSDSNOFF);
 	bcopy(gpp->gpp_codeptr, table->mbr, DOSPARTOFF);
 	if (dsn != 0)
 		*(uint32_t *)(table->mbr + DOSDSNOFF) = dsn;
 	return (0);
 }
 
 static int
 g_part_mbr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_mbr_table *table;
 
 	pp = gpp->gpp_provider;
 	if (pp->sectorsize < MBRSIZE)
 		return (ENOSPC);
 
 	basetable->gpt_first = basetable->gpt_sectors;
 	basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 	    UINT32_MAX) - 1;
 
 	table = (struct g_part_mbr_table *)basetable;
 	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
 	return (0);
 }
 
 static int
 g_part_mbr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first sector to clear the partitioning. */
 	basetable->gpt_smhead |= 1;
 	return (0);
 }
 
 static void
 g_part_mbr_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, 
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_mbr_entry *entry;
  
 	entry = (struct g_part_mbr_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs MBR xt %u", entry->ent.dp_typ);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->ent.dp_typ);
 		if (entry->ent.dp_flag & 0x80)
 			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_mbr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
 {
 	struct g_part_mbr_entry *entry;
 
 	/* Allow dumping to a FreeBSD partition or Linux swap partition only. */
 	entry = (struct g_part_mbr_entry *)baseentry;
 	return ((entry->ent.dp_typ == DOSPTYP_386BSD ||
 	    entry->ent.dp_typ == DOSPTYP_LINSWP) ? 1 : 0);
 }
 
 static int
 g_part_mbr_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_mbr_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 	return (0);
 }
 
 static int
 g_part_mbr_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 	struct g_provider *pp;
 	uint32_t size;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 		    UINT32_MAX) - 1;
 		return (0);
 	}
 	size = gpp->gpp_size;
 	if (mbr_align(basetable, NULL, &size) != 0)
 		return (EINVAL);
 	/* XXX: prevent unexpected shrinking. */
 	pp = baseentry->gpe_pp;
 	if ((g_debugflags & 0x10) == 0 && size < gpp->gpp_size &&
 	    pp->mediasize / pp->sectorsize > size)
 		return (EBUSY);
 	entry = (struct g_part_mbr_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + size - 1;
 	entry->ent.dp_size = size;
 	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (0);
 }
 
 static const char *
 g_part_mbr_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_mbr_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	char psn[8];
 	struct g_provider *pp;
 	u_char *buf, *p;
 	int error, index, res, sum;
 	uint16_t magic;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < MBRSIZE || pp->mediasize < pp->sectorsize)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* We don't nest under an MBR (see EBR instead). */
 	error = g_getattr("PART::scheme", cp, &psn);
 	if (error == 0 && strcmp(psn, g_part_mbr_scheme.name) == 0)
 		return (ELOOP);
 
 	/* Check that there's a MBR. */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	/* We goto out on mismatch. */
 	res = ENXIO;
 
 	magic = le16dec(buf + DOSMAGICOFFSET);
 	if (magic != DOSMAGIC)
 		goto out;
 
 	for (index = 0; index < NDOSPART; index++) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		if (p[0] != 0 && p[0] != 0x80)
 			goto out;
 	}
 
 	/*
 	 * If the partition table does not consist of all zeroes,
 	 * assume we have a MBR. If it's all zeroes, we could have
 	 * a boot sector. For example, a boot sector that doesn't
 	 * have boot code -- common on non-i386 hardware. In that
 	 * case we check if we have a possible BPB. If so, then we
 	 * assume we have a boot sector instead.
 	 */
 	sum = 0;
 	for (index = 0; index < NDOSPART * DOSPARTSIZE; index++)
 		sum += buf[DOSPARTOFF + index];
 	if (sum != 0 || !mbr_probe_bpb(buf + 0x0b))
 		res = G_PART_PROBE_PRI_NORM;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct dos_partition ent;
 	struct g_provider *pp;
 	struct g_part_mbr_table *table;
 	struct g_part_mbr_entry *entry;
 	u_char *buf, *p;
 	off_t chs, msize, first;
 	u_int sectors, heads;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_mbr_table *)basetable;
 	first = basetable->gpt_sectors;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	bcopy(buf, table->mbr, sizeof(table->mbr));
 	for (index = NDOSPART - 1; index >= 0; index--) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		ent.dp_flag = p[0];
 		ent.dp_shd = p[1];
 		ent.dp_ssect = p[2];
 		ent.dp_scyl = p[3];
 		ent.dp_typ = p[4];
 		ent.dp_ehd = p[5];
 		ent.dp_esect = p[6];
 		ent.dp_ecyl = p[7];
 		ent.dp_start = le32dec(p + 8);
 		ent.dp_size = le32dec(p + 12);
 		if (ent.dp_typ == 0 || ent.dp_typ == DOSPTYP_PMBR)
 			continue;
 		if (ent.dp_start == 0 || ent.dp_size == 0)
 			continue;
 		sectors = ent.dp_esect & 0x3f;
 		if (sectors > basetable->gpt_sectors &&
 		    !basetable->gpt_fixgeom) {
 			g_part_geometry_heads(msize, sectors, &chs, &heads);
 			if (chs != 0) {
 				basetable->gpt_sectors = sectors;
 				basetable->gpt_heads = heads;
 			}
 		}
 		if (ent.dp_start < first)
 			first = ent.dp_start;
 		entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable,
 		    index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1);
 		entry->ent = ent;
 	}
 
 	basetable->gpt_entries = NDOSPART;
 	basetable->gpt_first = basetable->gpt_sectors;
 	basetable->gpt_last = msize - 1;
 
 	if (first < basetable->gpt_first)
 		basetable->gpt_first = 1;
 
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_part_mbr_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
     const char *attrib, unsigned int set)
 {
 	struct g_part_entry *iter;
 	struct g_part_mbr_entry *entry;
 	int changed;
 
 	if (baseentry == NULL)
 		return (ENODEV);
 	if (strcasecmp(attrib, "active") != 0)
 		return (EINVAL);
 
 	/* Only one entry can have the active attribute. */
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter->gpe_deleted)
 			continue;
 		changed = 0;
 		entry = (struct g_part_mbr_entry *)iter;
 		if (iter == baseentry) {
 			if (set && (entry->ent.dp_flag & 0x80) == 0) {
 				entry->ent.dp_flag |= 0x80;
 				changed = 1;
 			} else if (!set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		} else {
 			if (set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		}
 		if (changed && !iter->gpe_created)
 			iter->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_mbr_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
     char *buf, size_t bufsz)
 {
 	struct g_part_mbr_entry *entry;
 	int i;
 
 	entry = (struct g_part_mbr_entry *)baseentry;
-	for (i = 0;
-	    i < nitems(mbr_alias_match); i++) {
+	for (i = 0; i < nitems(mbr_alias_match); i++) {
 		if (mbr_alias_match[i].typ == entry->ent.dp_typ)
 			return (g_part_alias_name(mbr_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->ent.dp_typ);
 	return (buf);
 }
 
 static int
 g_part_mbr_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_entry *baseentry;
 	struct g_part_mbr_entry *entry;
 	struct g_part_mbr_table *table;
 	u_char *p;
 	int error, index;
 
 	table = (struct g_part_mbr_table *)basetable;
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	for (index = 1; index <= basetable->gpt_entries; index++) {
 		p = table->mbr + DOSPARTOFF + (index - 1) * DOSPARTSIZE;
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_mbr_entry *)baseentry : NULL;
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			p[0] = entry->ent.dp_flag;
 			p[1] = entry->ent.dp_shd;
 			p[2] = entry->ent.dp_ssect;
 			p[3] = entry->ent.dp_scyl;
 			p[4] = entry->ent.dp_typ;
 			p[5] = entry->ent.dp_ehd;
 			p[6] = entry->ent.dp_esect;
 			p[7] = entry->ent.dp_ecyl;
 			le32enc(p + 8, entry->ent.dp_start);
 			le32enc(p + 12, entry->ent.dp_size);
 		} else
 			bzero(p, DOSPARTSIZE);
 
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	error = g_write_data(cp, 0, table->mbr, cp->provider->sectorsize);
 	return (error);
 }
Index: head/sys/kern/sysv_msg.c
===================================================================
--- head/sys/kern/sysv_msg.c	(revision 298353)
+++ head/sys/kern/sysv_msg.c	(revision 298354)
@@ -1,1590 +1,1589 @@
 /*-
  * Implementation of SVID messages
  *
  * Author:  Daniel Boulet
  *
  * Copyright 1993 Daniel Boulet and RTMX Inc.
  *
  * This system call was implemented by Daniel Boulet under contract from RTMX.
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  */
 /*-
  * Copyright (c) 2003-2005 McAfee, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project in part by McAfee
  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
  * program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/msg.h>
 #include <sys/racct.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 
 #include <security/mac/mac_framework.h>
 
 FEATURE(sysv_msg, "System V message queues support");
 
 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
 
 static int msginit(void);
 static int msgunload(void);
 static int sysvmsg_modload(struct module *, int, void *);
 
 
 #ifdef MSG_DEBUG
 #define DPRINTF(a)	printf a
 #else
 #define DPRINTF(a)	(void)0
 #endif
 
 static void msg_freehdr(struct msg *msghdr);
 
 #ifndef MSGSSZ
 #define MSGSSZ	8		/* Each segment must be 2^N long */
 #endif
 #ifndef MSGSEG
 #define MSGSEG	2048		/* must be less than 32767 */
 #endif
 #define MSGMAX	(MSGSSZ*MSGSEG)
 #ifndef MSGMNB
 #define MSGMNB	2048		/* max # of bytes in a queue */
 #endif
 #ifndef MSGMNI
 #define MSGMNI	40
 #endif
 #ifndef MSGTQL
 #define MSGTQL	40
 #endif
 
 /*
  * Based on the configuration parameters described in an SVR2 (yes, two)
  * config(1m) man page.
  *
  * Each message is broken up and stored in segments that are msgssz bytes
  * long.  For efficiency reasons, this should be a power of two.  Also,
  * it doesn't make sense if it is less than 8 or greater than about 256.
  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
  * two between 8 and 1024 inclusive (and panic's if it isn't).
  */
 struct msginfo msginfo = {
                 MSGMAX,         /* max chars in a message */
                 MSGMNI,         /* # of message queue identifiers */
                 MSGMNB,         /* max chars in a queue */
                 MSGTQL,         /* max messages in system */
                 MSGSSZ,         /* size of a message segment */
                 		/* (must be small power of 2 greater than 4) */
                 MSGSEG          /* number of message segments */
 };
 
 /*
  * macros to convert between msqid_ds's and msqid's.
  * (specific to this implementation)
  */
 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
 #define MSQID_IX(id)	((id) & 0xffff)
 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
 
 /*
  * The rest of this file is specific to this particular implementation.
  */
 
 struct msgmap {
 	short	next;		/* next segment in buffer */
     				/* -1 -> available */
     				/* 0..(MSGSEG-1) -> index of next segment */
 };
 
 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
 
 static int nfree_msgmaps;	/* # of free map entries */
 static short free_msgmaps;	/* head of linked list of free map entries */
 static struct msg *free_msghdrs;/* list of free msg headers */
 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
 static struct msg *msghdrs;	/* MSGTQL msg headers */
 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
 static struct mtx msq_mtx;	/* global mutex for message queues. */
 
 static struct syscall_helper_data msg_syscalls[] = {
 	SYSCALL_INIT_HELPER(msgctl),
 	SYSCALL_INIT_HELPER(msgget),
 	SYSCALL_INIT_HELPER(msgsnd),
 	SYSCALL_INIT_HELPER(msgrcv),
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	SYSCALL_INIT_HELPER(msgsys),
 	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
 #endif
 	SYSCALL_INIT_LAST
 };
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 static struct syscall_helper_data msg32_syscalls[] = {
 	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
 	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
 	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
 	SYSCALL32_INIT_HELPER_COMPAT(msgget),
 	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
 #endif
 	SYSCALL_INIT_LAST
 };
 #endif
 
 static int
 msginit()
 {
 	int i, error;
 
 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
 	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
 	    M_WAITOK);
 
 	/*
 	 * msginfo.msgssz should be a power of two for efficiency reasons.
 	 * It is also pretty silly if msginfo.msgssz is less than 8
 	 * or greater than about 256 so ...
 	 */
 
 	i = 8;
 	while (i < 1024 && i != msginfo.msgssz)
 		i <<= 1;
     	if (i != msginfo.msgssz) {
 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
 		    msginfo.msgssz));
 		panic("msginfo.msgssz not a small power of 2");
 	}
 
 	if (msginfo.msgseg > 32767) {
 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
 		panic("msginfo.msgseg > 32767");
 	}
 
 	for (i = 0; i < msginfo.msgseg; i++) {
 		if (i > 0)
 			msgmaps[i-1].next = i;
 		msgmaps[i].next = -1;	/* implies entry is available */
 	}
 	free_msgmaps = 0;
 	nfree_msgmaps = msginfo.msgseg;
 
 	for (i = 0; i < msginfo.msgtql; i++) {
 		msghdrs[i].msg_type = 0;
 		if (i > 0)
 			msghdrs[i-1].msg_next = &msghdrs[i];
 		msghdrs[i].msg_next = NULL;
 #ifdef MAC
 		mac_sysvmsg_init(&msghdrs[i]);
 #endif
     	}
 	free_msghdrs = &msghdrs[0];
 
 	for (i = 0; i < msginfo.msgmni; i++) {
 		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
 		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
 		msqids[i].u.msg_perm.mode = 0;
 #ifdef MAC
 		mac_sysvmsq_init(&msqids[i]);
 #endif
 	}
 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
 
 	error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #endif
 	return (0);
 }
 
 static int
 msgunload()
 {
 	struct msqid_kernel *msqkptr;
 	int msqid;
 #ifdef MAC
 	int i;
 #endif
 
 	syscall_helper_unregister(msg_syscalls);
 #ifdef COMPAT_FREEBSD32
 	syscall32_helper_unregister(msg32_syscalls);
 #endif
 
 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 		/*
 		 * Look for an unallocated and unlocked msqid_ds.
 		 * msqid_ds's can be locked by msgsnd or msgrcv while
 		 * they are copying the message in/out.  We can't
 		 * re-use the entry until they release it.
 		 */
 		msqkptr = &msqids[msqid];
 		if (msqkptr->u.msg_qbytes != 0 ||
 		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
 			break;
 	}
 	if (msqid != msginfo.msgmni)
 		return (EBUSY);
 
 #ifdef MAC
 	for (i = 0; i < msginfo.msgtql; i++)
 		mac_sysvmsg_destroy(&msghdrs[i]);
 	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
 		mac_sysvmsq_destroy(&msqids[msqid]);
 #endif
 	free(msgpool, M_MSG);
 	free(msgmaps, M_MSG);
 	free(msghdrs, M_MSG);
 	free(msqids, M_MSG);
 	mtx_destroy(&msq_mtx);
 	return (0);
 }
 
 
 static int
 sysvmsg_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		error = msginit();
 		if (error != 0)
 			msgunload();
 		break;
 	case MOD_UNLOAD:
 		error = msgunload();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t sysvmsg_mod = {
 	"sysvmsg",
 	&sysvmsg_modload,
 	NULL
 };
 
 DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
 MODULE_VERSION(sysvmsg, 1);
 
 static void
 msg_freehdr(msghdr)
 	struct msg *msghdr;
 {
 	while (msghdr->msg_ts > 0) {
 		short next;
 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
 			panic("msghdr->msg_spot out of range");
 		next = msgmaps[msghdr->msg_spot].next;
 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
 		free_msgmaps = msghdr->msg_spot;
 		nfree_msgmaps++;
 		msghdr->msg_spot = next;
 		if (msghdr->msg_ts >= msginfo.msgssz)
 			msghdr->msg_ts -= msginfo.msgssz;
 		else
 			msghdr->msg_ts = 0;
 	}
 	if (msghdr->msg_spot != -1)
 		panic("msghdr->msg_spot != -1");
 	msghdr->msg_next = free_msghdrs;
 	free_msghdrs = msghdr;
 #ifdef MAC
 	mac_sysvmsg_cleanup(msghdr);
 #endif
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgctl_args {
 	int	msqid;
 	int	cmd;
 	struct	msqid_ds *buf;
 };
 #endif
 int
 sys_msgctl(td, uap)
 	struct thread *td;
 	register struct msgctl_args *uap;
 {
 	int msqid = uap->msqid;
 	int cmd = uap->cmd;
 	struct msqid_ds msqbuf;
 	int error;
 
 	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
 	if (cmd == IPC_SET &&
 	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
 		return (error);
 	error = kern_msgctl(td, msqid, cmd, &msqbuf);
 	if (cmd == IPC_STAT && error == 0)
 		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
 	return (error);
 }
 
 int
 kern_msgctl(td, msqid, cmd, msqbuf)
 	struct thread *td;
 	int msqid;
 	int cmd;
 	struct msqid_ds *msqbuf;
 {
 	int rval, error, msqix;
 	register struct msqid_kernel *msqkptr;
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
 
 	if (msqix < 0 || msqix >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
 		    msginfo.msgmni));
 		return (EINVAL);
 	}
 
 	msqkptr = &msqids[msqix];
 
 	mtx_lock(&msq_mtx);
 	if (msqkptr->u.msg_qbytes == 0) {
 		DPRINTF(("no such msqid\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 #ifdef MAC
 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
 	if (error != 0)
 		goto done2;
 #endif
 
 	error = 0;
 	rval = 0;
 
 	switch (cmd) {
 
 	case IPC_RMID:
 	{
 		struct msg *msghdr;
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
 			goto done2;
 
 #ifdef MAC
 		/*
 		 * Check that the thread has MAC access permissions to
 		 * individual msghdrs.  Note: We need to do this in a
 		 * separate loop because the actual loop alters the
 		 * msq/msghdr info as it progresses, and there is no going
 		 * back if half the way through we discover that the
 		 * thread cannot free a certain msghdr.  The msq will get
 		 * into an inconsistent state.
 		 */
 		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
 		    msghdr = msghdr->msg_next) {
 			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
 			if (error != 0)
 				goto done2;
 		}
 #endif
 
 		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
 		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
 		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
 		crfree(msqkptr->cred);
 		msqkptr->cred = NULL;
 
 		/* Free the message headers */
 		msghdr = msqkptr->u.msg_first;
 		while (msghdr != NULL) {
 			struct msg *msghdr_tmp;
 
 			/* Free the segments of each message */
 			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
 			msqkptr->u.msg_qnum--;
 			msghdr_tmp = msghdr;
 			msghdr = msghdr->msg_next;
 			msg_freehdr(msghdr_tmp);
 		}
 
 		if (msqkptr->u.msg_cbytes != 0)
 			panic("msg_cbytes is screwed up");
 		if (msqkptr->u.msg_qnum != 0)
 			panic("msg_qnum is screwed up");
 
 		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
 
 #ifdef MAC
 		mac_sysvmsq_cleanup(msqkptr);
 #endif
 
 		wakeup(msqkptr);
 	}
 
 		break;
 
 	case IPC_SET:
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
 			goto done2;
 		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
 			error = priv_check(td, PRIV_IPC_MSGSIZE);
 			if (error)
 				goto done2;
 		}
 		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
 			DPRINTF(("can't increase msg_qbytes beyond %d"
 			    "(truncating)\n", msginfo.msgmnb));
 			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
 		}
 		if (msqbuf->msg_qbytes == 0) {
 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
 			error = EINVAL;		/* non-standard errno! */
 			goto done2;
 		}
 		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
 		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
 		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
 		    (msqbuf->msg_perm.mode & 0777);
 		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
 		msqkptr->u.msg_ctime = time_second;
 		break;
 
 	case IPC_STAT:
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
 			DPRINTF(("requester doesn't have read access\n"));
 			goto done2;
 		}
 		*msqbuf = msqkptr->u;
 		break;
 
 	default:
 		DPRINTF(("invalid command %d\n", cmd));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgget_args {
 	key_t	key;
 	int	msgflg;
 };
 #endif
 
 int
 sys_msgget(td, uap)
 	struct thread *td;
 	register struct msgget_args *uap;
 {
 	int msqid, error = 0;
 	int key = uap->key;
 	int msgflg = uap->msgflg;
 	struct ucred *cred = td->td_ucred;
 	register struct msqid_kernel *msqkptr = NULL;
 
 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
 	if (key != IPC_PRIVATE) {
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			msqkptr = &msqids[msqid];
 			if (msqkptr->u.msg_qbytes != 0 &&
 			    msqkptr->u.msg_perm.key == key)
 				break;
 		}
 		if (msqid < msginfo.msgmni) {
 			DPRINTF(("found public key\n"));
 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
 				DPRINTF(("not exclusive\n"));
 				error = EEXIST;
 				goto done2;
 			}
 			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
 			    msgflg & 0700))) {
 				DPRINTF(("requester doesn't have 0%o access\n",
 				    msgflg & 0700));
 				goto done2;
 			}
 #ifdef MAC
 			error = mac_sysvmsq_check_msqget(cred, msqkptr);
 			if (error != 0)
 				goto done2;
 #endif
 			goto found;
 		}
 	}
 
 	DPRINTF(("need to allocate the msqid_ds\n"));
 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			/*
 			 * Look for an unallocated and unlocked msqid_ds.
 			 * msqid_ds's can be locked by msgsnd or msgrcv while
 			 * they are copying the message in/out.  We can't
 			 * re-use the entry until they release it.
 			 */
 			msqkptr = &msqids[msqid];
 			if (msqkptr->u.msg_qbytes == 0 &&
 			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
 				break;
 		}
 		if (msqid == msginfo.msgmni) {
 			DPRINTF(("no more msqid_ds's available\n"));
 			error = ENOSPC;
 			goto done2;
 		}
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
 			PROC_UNLOCK(td->td_proc);
 			if (error != 0) {
 				error = ENOSPC;
 				goto done2;
 			}
 		}
 #endif
 		DPRINTF(("msqid %d is available\n", msqid));
 		msqkptr->u.msg_perm.key = key;
 		msqkptr->u.msg_perm.cuid = cred->cr_uid;
 		msqkptr->u.msg_perm.uid = cred->cr_uid;
 		msqkptr->u.msg_perm.cgid = cred->cr_gid;
 		msqkptr->u.msg_perm.gid = cred->cr_gid;
 		msqkptr->u.msg_perm.mode = (msgflg & 0777);
 		msqkptr->cred = crhold(cred);
 		/* Make sure that the returned msqid is unique */
 		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
 		msqkptr->u.msg_first = NULL;
 		msqkptr->u.msg_last = NULL;
 		msqkptr->u.msg_cbytes = 0;
 		msqkptr->u.msg_qnum = 0;
 		msqkptr->u.msg_qbytes = msginfo.msgmnb;
 		msqkptr->u.msg_lspid = 0;
 		msqkptr->u.msg_lrpid = 0;
 		msqkptr->u.msg_stime = 0;
 		msqkptr->u.msg_rtime = 0;
 		msqkptr->u.msg_ctime = time_second;
 #ifdef MAC
 		mac_sysvmsq_create(cred, msqkptr);
 #endif
 	} else {
 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
 		error = ENOENT;
 		goto done2;
 	}
 
 found:
 	/* Construct the unique msqid */
 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgsnd_args {
 	int	msqid;
 	const void	*msgp;
 	size_t	msgsz;
 	int	msgflg;
 };
 #endif
 int
 kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
 	struct thread *td;
 	int msqid;
 	const void *msgp;	/* XXX msgp is actually mtext. */
 	size_t msgsz;
 	int msgflg;
 	long mtype;
 {
 	int msqix, segs_needed, error = 0;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
 	short next;
 #ifdef RACCT
 	size_t saved_msgsz;
 #endif
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
 	msqix = IPCID_TO_IX(msqid);
 
 	if (msqix < 0 || msqix >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
 		    msginfo.msgmni));
 		error = EINVAL;
 		goto done2;
 	}
 
 	msqkptr = &msqids[msqix];
 	if (msqkptr->u.msg_qbytes == 0) {
 		DPRINTF(("no such message queue id\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
 		DPRINTF(("requester doesn't have write access\n"));
 		goto done2;
 	}
 
 #ifdef MAC
 	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
 	if (error != 0)
 		goto done2;
 #endif
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(td->td_proc);
 		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
 			PROC_UNLOCK(td->td_proc);
 			error = EAGAIN;
 			goto done2;
 		}
 		saved_msgsz = msgsz;
 		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
 			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
 			PROC_UNLOCK(td->td_proc);
 			error = EAGAIN;
 			goto done2;
 		}
 		PROC_UNLOCK(td->td_proc);
 	}
 #endif
 
 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
 	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
 	    msginfo.msgssz, segs_needed));
 	for (;;) {
 		int need_more_resources = 0;
 
 		/*
 		 * check msgsz
 		 * (inside this loop in case msg_qbytes changes while we sleep)
 		 */
 
 		if (msgsz > msqkptr->u.msg_qbytes) {
 			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
 			error = EINVAL;
 			goto done3;
 		}
 
 		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
 			DPRINTF(("msqid is locked\n"));
 			need_more_resources = 1;
 		}
 		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
 			need_more_resources = 1;
 		}
 		if (segs_needed > nfree_msgmaps) {
 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
 			need_more_resources = 1;
 		}
 		if (free_msghdrs == NULL) {
 			DPRINTF(("no more msghdrs\n"));
 			need_more_resources = 1;
 		}
 
 		if (need_more_resources) {
 			int we_own_it;
 
 			if ((msgflg & IPC_NOWAIT) != 0) {
 				DPRINTF(("need more resources but caller "
 				    "doesn't want to wait\n"));
 				error = EAGAIN;
 				goto done3;
 			}
 
 			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
 				DPRINTF(("we don't own the msqid_ds\n"));
 				we_own_it = 0;
 			} else {
 				/* Force later arrivals to wait for our
 				   request */
 				DPRINTF(("we own the msqid_ds\n"));
 				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
 				we_own_it = 1;
 			}
 			DPRINTF(("msgsnd:  goodnight\n"));
 			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
 			    "msgsnd", hz);
 			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
 			if (we_own_it)
 				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
 			if (error == EWOULDBLOCK) {
 				DPRINTF(("msgsnd:  timed out\n"));
 				continue;
 			}
 			if (error != 0) {
 				DPRINTF(("msgsnd:  interrupted system call\n"));
 				error = EINTR;
 				goto done3;
 			}
 
 			/*
 			 * Make sure that the msq queue still exists
 			 */
 
 			if (msqkptr->u.msg_qbytes == 0) {
 				DPRINTF(("msqid deleted\n"));
 				error = EIDRM;
 				goto done3;
 			}
 
 		} else {
 			DPRINTF(("got all the resources that we need\n"));
 			break;
 		}
 	}
 
 	/*
 	 * We have the resources that we need.
 	 * Make sure!
 	 */
 
 	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
 		panic("msg_perm.mode & MSG_LOCKED");
 	if (segs_needed > nfree_msgmaps)
 		panic("segs_needed > nfree_msgmaps");
 	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
 		panic("msgsz + msg_cbytes > msg_qbytes");
 	if (free_msghdrs == NULL)
 		panic("no more msghdrs");
 
 	/*
 	 * Re-lock the msqid_ds in case we page-fault when copying in the
 	 * message
 	 */
 
 	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
 		panic("msqid_ds is already locked");
 	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
 
 	/*
 	 * Allocate a message header
 	 */
 
 	msghdr = free_msghdrs;
 	free_msghdrs = msghdr->msg_next;
 	msghdr->msg_spot = -1;
 	msghdr->msg_ts = msgsz;
 	msghdr->msg_type = mtype;
 #ifdef MAC
 	/*
 	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
 	 * immediately?  Or, should it be checked just before the msg is
 	 * enqueued in the msgq (as it is done now)?
 	 */
 	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
 #endif
 
 	/*
 	 * Allocate space for the message
 	 */
 
 	while (segs_needed > 0) {
 		if (nfree_msgmaps <= 0)
 			panic("not enough msgmaps");
 		if (free_msgmaps == -1)
 			panic("nil free_msgmaps");
 		next = free_msgmaps;
 		if (next <= -1)
 			panic("next too low #1");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #1");
 		DPRINTF(("allocating segment %d to message\n", next));
 		free_msgmaps = msgmaps[next].next;
 		nfree_msgmaps--;
 		msgmaps[next].next = msghdr->msg_spot;
 		msghdr->msg_spot = next;
 		segs_needed--;
 	}
 
 	/*
 	 * Validate the message type
 	 */
 
 	if (msghdr->msg_type < 1) {
 		msg_freehdr(msghdr);
 		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
 		wakeup(msqkptr);
 		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
 		error = EINVAL;
 		goto done3;
 	}
 
 	/*
 	 * Copy in the message body
 	 */
 
 	next = msghdr->msg_spot;
 	while (msgsz > 0) {
 		size_t tlen;
 		if (msgsz > msginfo.msgssz)
 			tlen = msginfo.msgssz;
 		else
 			tlen = msgsz;
 		if (next <= -1)
 			panic("next too low #2");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #2");
 		mtx_unlock(&msq_mtx);
 		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
 		    tlen)) != 0) {
 			mtx_lock(&msq_mtx);
 			DPRINTF(("error %d copying in message segment\n",
 			    error));
 			msg_freehdr(msghdr);
 			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
 			wakeup(msqkptr);
 			goto done3;
 		}
 		mtx_lock(&msq_mtx);
 		msgsz -= tlen;
 		msgp = (const char *)msgp + tlen;
 		next = msgmaps[next].next;
 	}
 	if (next != -1)
 		panic("didn't use all the msg segments");
 
 	/*
 	 * We've got the message.  Unlock the msqid_ds.
 	 */
 
 	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
 
 	/*
 	 * Make sure that the msqid_ds is still allocated.
 	 */
 
 	if (msqkptr->u.msg_qbytes == 0) {
 		msg_freehdr(msghdr);
 		wakeup(msqkptr);
 		error = EIDRM;
 		goto done3;
 	}
 
 #ifdef MAC
 	/*
 	 * Note: Since the task/thread allocates the msghdr and usually
 	 * primes it with its own MAC label, for a majority of policies, it
 	 * won't be necessary to check whether the msghdr has access
 	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
 	 * suffice in that case.  However, this hook may be required where
 	 * individual policies derive a non-identical label for the msghdr
 	 * from the current thread label and may want to check the msghdr
 	 * enqueue permissions, along with read/write permissions to the
 	 * msgq.
 	 */
 	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
 	if (error != 0) {
 		msg_freehdr(msghdr);
 		wakeup(msqkptr);
 		goto done3;
 	}
 #endif
 
 	/*
 	 * Put the message into the queue
 	 */
 	if (msqkptr->u.msg_first == NULL) {
 		msqkptr->u.msg_first = msghdr;
 		msqkptr->u.msg_last = msghdr;
 	} else {
 		msqkptr->u.msg_last->msg_next = msghdr;
 		msqkptr->u.msg_last = msghdr;
 	}
 	msqkptr->u.msg_last->msg_next = NULL;
 
 	msqkptr->u.msg_cbytes += msghdr->msg_ts;
 	msqkptr->u.msg_qnum++;
 	msqkptr->u.msg_lspid = td->td_proc->p_pid;
 	msqkptr->u.msg_stime = time_second;
 
 	wakeup(msqkptr);
 	td->td_retval[0] = 0;
 done3:
 #ifdef RACCT
 	if (racct_enable && error != 0) {
 		PROC_LOCK(td->td_proc);
 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
 		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
 		PROC_UNLOCK(td->td_proc);
 	}
 #endif
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 int
 sys_msgsnd(td, uap)
 	struct thread *td;
 	register struct msgsnd_args *uap;
 {
 	int error;
 	long mtype;
 
 	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
 	    uap->msgsz, uap->msgflg));
 
 	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
 		DPRINTF(("error %d copying the message type\n", error));
 		return (error);
 	}
 	return (kern_msgsnd(td, uap->msqid,
 	    (const char *)uap->msgp + sizeof(mtype),
 	    uap->msgsz, uap->msgflg, mtype));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgrcv_args {
 	int	msqid;
 	void	*msgp;
 	size_t	msgsz;
 	long	msgtyp;
 	int	msgflg;
 };
 #endif
 int
 kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
 	struct thread *td;
 	int msqid;
 	void *msgp;	/* XXX msgp is actually mtext. */
 	size_t msgsz;
 	long msgtyp;
 	int msgflg;
 	long *mtype;
 {
 	size_t len;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
 	int msqix, error = 0;
 	short next;
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
 
 	if (msqix < 0 || msqix >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
 		    msginfo.msgmni));
 		return (EINVAL);
 	}
 
 	msqkptr = &msqids[msqix];
 	mtx_lock(&msq_mtx);
 	if (msqkptr->u.msg_qbytes == 0) {
 		DPRINTF(("no such message queue id\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
 		DPRINTF(("requester doesn't have read access\n"));
 		goto done2;
 	}
 
 #ifdef MAC
 	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
 	if (error != 0)
 		goto done2;
 #endif
 
 	msghdr = NULL;
 	while (msghdr == NULL) {
 		if (msgtyp == 0) {
 			msghdr = msqkptr->u.msg_first;
 			if (msghdr != NULL) {
 				if (msgsz < msghdr->msg_ts &&
 				    (msgflg & MSG_NOERROR) == 0) {
 					DPRINTF(("first message on the queue "
 					    "is too big (want %zu, got %d)\n",
 					    msgsz, msghdr->msg_ts));
 					error = E2BIG;
 					goto done2;
 				}
 #ifdef MAC
 				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
 				    msghdr);
 				if (error != 0)
 					goto done2;
 #endif
 				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
 					msqkptr->u.msg_first = NULL;
 					msqkptr->u.msg_last = NULL;
 				} else {
 					msqkptr->u.msg_first = msghdr->msg_next;
 					if (msqkptr->u.msg_first == NULL)
 						panic("msg_first/last screwed up #1");
 				}
 			}
 		} else {
 			struct msg *previous;
 			struct msg **prev;
 
 			previous = NULL;
 			prev = &(msqkptr->u.msg_first);
 			while ((msghdr = *prev) != NULL) {
 				/*
 				 * Is this message's type an exact match or is
 				 * this message's type less than or equal to
 				 * the absolute value of a negative msgtyp?
 				 * Note that the second half of this test can
 				 * NEVER be true if msgtyp is positive since
 				 * msg_type is always positive!
 				 */
 
 				if (msgtyp == msghdr->msg_type ||
 				    msghdr->msg_type <= -msgtyp) {
 					DPRINTF(("found message type %ld, "
 					    "requested %ld\n",
 					    msghdr->msg_type, msgtyp));
 					if (msgsz < msghdr->msg_ts &&
 					    (msgflg & MSG_NOERROR) == 0) {
 						DPRINTF(("requested message "
 						    "on the queue is too big "
 						    "(want %zu, got %hu)\n",
 						    msgsz, msghdr->msg_ts));
 						error = E2BIG;
 						goto done2;
 					}
 #ifdef MAC
 					error = mac_sysvmsq_check_msgrcv(
 					    td->td_ucred, msghdr);
 					if (error != 0)
 						goto done2;
 #endif
 					*prev = msghdr->msg_next;
 					if (msghdr == msqkptr->u.msg_last) {
 						if (previous == NULL) {
 							if (prev !=
 							    &msqkptr->u.msg_first)
 								panic("msg_first/last screwed up #2");
 							msqkptr->u.msg_first =
 							    NULL;
 							msqkptr->u.msg_last =
 							    NULL;
 						} else {
 							if (prev ==
 							    &msqkptr->u.msg_first)
 								panic("msg_first/last screwed up #3");
 							msqkptr->u.msg_last =
 							    previous;
 						}
 					}
 					break;
 				}
 				previous = msghdr;
 				prev = &(msghdr->msg_next);
 			}
 		}
 
 		/*
 		 * We've either extracted the msghdr for the appropriate
 		 * message or there isn't one.
 		 * If there is one then bail out of this loop.
 		 */
 
 		if (msghdr != NULL)
 			break;
 
 		/*
 		 * Hmph!  No message found.  Does the user want to wait?
 		 */
 
 		if ((msgflg & IPC_NOWAIT) != 0) {
 			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
 			    msgtyp));
 			/* The SVID says to return ENOMSG. */
 			error = ENOMSG;
 			goto done2;
 		}
 
 		/*
 		 * Wait for something to happen
 		 */
 
 		DPRINTF(("msgrcv:  goodnight\n"));
 		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
 		    "msgrcv", 0);
 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
 
 		if (error != 0) {
 			DPRINTF(("msgrcv:  interrupted system call\n"));
 			error = EINTR;
 			goto done2;
 		}
 
 		/*
 		 * Make sure that the msq queue still exists
 		 */
 
 		if (msqkptr->u.msg_qbytes == 0 ||
 		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
 			DPRINTF(("msqid deleted\n"));
 			error = EIDRM;
 			goto done2;
 		}
 	}
 
 	/*
 	 * Return the message to the user.
 	 *
 	 * First, do the bookkeeping (before we risk being interrupted).
 	 */
 
 	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
 	msqkptr->u.msg_qnum--;
 	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
 	msqkptr->u.msg_rtime = time_second;
 
 	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
 	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
 
 	/*
 	 * Make msgsz the actual amount that we'll be returning.
 	 * Note that this effectively truncates the message if it is too long
 	 * (since msgsz is never increased).
 	 */
 
 	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
 	    msghdr->msg_ts));
 	if (msgsz > msghdr->msg_ts)
 		msgsz = msghdr->msg_ts;
 	*mtype = msghdr->msg_type;
 
 	/*
 	 * Return the segments to the user
 	 */
 
 	next = msghdr->msg_spot;
 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
 		size_t tlen;
 
 		if (msgsz - len > msginfo.msgssz)
 			tlen = msginfo.msgssz;
 		else
 			tlen = msgsz - len;
 		if (next <= -1)
 			panic("next too low #3");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #3");
 		mtx_unlock(&msq_mtx);
 		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
 		mtx_lock(&msq_mtx);
 		if (error != 0) {
 			DPRINTF(("error (%d) copying out message segment\n",
 			    error));
 			msg_freehdr(msghdr);
 			wakeup(msqkptr);
 			goto done2;
 		}
 		msgp = (char *)msgp + tlen;
 		next = msgmaps[next].next;
 	}
 
 	/*
 	 * Done, return the actual number of bytes copied out.
 	 */
 
 	msg_freehdr(msghdr);
 	wakeup(msqkptr);
 	td->td_retval[0] = msgsz;
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 int
 sys_msgrcv(td, uap)
 	struct thread *td;
 	register struct msgrcv_args *uap;
 {
 	int error;
 	long mtype;
 
 	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
 	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
 
 	if ((error = kern_msgrcv(td, uap->msqid,
 	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
 		return (error);
 	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
 		DPRINTF(("error %d copying the message type\n", error));
 	return (error);
 }
 
 static int
 sysctl_msqids(SYSCTL_HANDLER_ARGS)
 {
 
 	return (SYSCTL_OUT(req, msqids,
 	    sizeof(struct msqid_kernel) * msginfo.msgmni));
 }
 
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
     "Maximum message size");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
     "Number of message queue identifiers");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
     "Maximum number of bytes in a queue");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
     "Maximum number of messages in the system");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
     "Size of a message segment");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
     "Number of message segments");
 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_msqids, "", "Message queue IDs");
 
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
 {
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	switch (uap->which) {
 	case 0:
 		return (freebsd7_freebsd32_msgctl(td,
 		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
 	case 2:
 		return (freebsd32_msgsnd(td,
 		    (struct freebsd32_msgsnd_args *)&uap->a2));
 	case 3:
 		return (freebsd32_msgrcv(td,
 		    (struct freebsd32_msgrcv_args *)&uap->a2));
 	default:
 		return (sys_msgsys(td, (struct msgsys_args *)uap));
 	}
 #else
 	return (nosys(td, NULL));
 #endif
 }
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 int
 freebsd7_freebsd32_msgctl(struct thread *td,
     struct freebsd7_freebsd32_msgctl_args *uap)
 {
 	struct msqid_ds msqbuf;
 	struct msqid_ds32_old msqbuf32;
 	int error;
 
 	if (uap->cmd == IPC_SET) {
 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
 		if (error)
 			return (error);
 		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
 		CP(msqbuf32, msqbuf, msg_cbytes);
 		CP(msqbuf32, msqbuf, msg_qnum);
 		CP(msqbuf32, msqbuf, msg_qbytes);
 		CP(msqbuf32, msqbuf, msg_lspid);
 		CP(msqbuf32, msqbuf, msg_lrpid);
 		CP(msqbuf32, msqbuf, msg_stime);
 		CP(msqbuf32, msqbuf, msg_rtime);
 		CP(msqbuf32, msqbuf, msg_ctime);
 	}
 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
 	if (error)
 		return (error);
 	if (uap->cmd == IPC_STAT) {
 		bzero(&msqbuf32, sizeof(msqbuf32));
 		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
 		CP(msqbuf, msqbuf32, msg_cbytes);
 		CP(msqbuf, msqbuf32, msg_qnum);
 		CP(msqbuf, msqbuf32, msg_qbytes);
 		CP(msqbuf, msqbuf32, msg_lspid);
 		CP(msqbuf, msqbuf32, msg_lrpid);
 		CP(msqbuf, msqbuf32, msg_stime);
 		CP(msqbuf, msqbuf32, msg_rtime);
 		CP(msqbuf, msqbuf32, msg_ctime);
 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
 {
 	struct msqid_ds msqbuf;
 	struct msqid_ds32 msqbuf32;
 	int error;
 
 	if (uap->cmd == IPC_SET) {
 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
 		if (error)
 			return (error);
 		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
 		CP(msqbuf32, msqbuf, msg_cbytes);
 		CP(msqbuf32, msqbuf, msg_qnum);
 		CP(msqbuf32, msqbuf, msg_qbytes);
 		CP(msqbuf32, msqbuf, msg_lspid);
 		CP(msqbuf32, msqbuf, msg_lrpid);
 		CP(msqbuf32, msqbuf, msg_stime);
 		CP(msqbuf32, msqbuf, msg_rtime);
 		CP(msqbuf32, msqbuf, msg_ctime);
 	}
 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
 	if (error)
 		return (error);
 	if (uap->cmd == IPC_STAT) {
 		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
 		CP(msqbuf, msqbuf32, msg_cbytes);
 		CP(msqbuf, msqbuf32, msg_qnum);
 		CP(msqbuf, msqbuf32, msg_qbytes);
 		CP(msqbuf, msqbuf32, msg_lspid);
 		CP(msqbuf, msqbuf32, msg_lrpid);
 		CP(msqbuf, msqbuf32, msg_stime);
 		CP(msqbuf, msqbuf32, msg_rtime);
 		CP(msqbuf, msqbuf32, msg_ctime);
 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
 	}
 	return (error);
 }
 
 int
 freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
 {
 	const void *msgp;
 	long mtype;
 	int32_t mtype32;
 	int error;
 
 	msgp = PTRIN(uap->msgp);
 	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
 		return (error);
 	mtype = mtype32;
 	return (kern_msgsnd(td, uap->msqid,
 	    (const char *)msgp + sizeof(mtype32),
 	    uap->msgsz, uap->msgflg, mtype));
 }
 
 int
 freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
 {
 	void *msgp;
 	long mtype;
 	int32_t mtype32;
 	int error;
 
 	msgp = PTRIN(uap->msgp);
 	if ((error = kern_msgrcv(td, uap->msqid,
 	    (char *)msgp + sizeof(mtype32), uap->msgsz,
 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
 		return (error);
 	mtype32 = (int32_t)mtype;
 	return (copyout(&mtype32, msgp, sizeof(mtype32)));
 }
 #endif
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 static sy_call_t *msgcalls[] = {
 	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
 };
 
 /*
  * Entry point for all MSG calls.
  */
 int
 sys_msgsys(td, uap)
 	struct thread *td;
 	/* XXX actually varargs. */
 	struct msgsys_args /* {
 		int	which;
 		int	a2;
 		int	a3;
 		int	a4;
 		int	a5;
 		int	a6;
 	} */ *uap;
 {
 	int error;
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
-	if (uap->which < 0 ||
-	    uap->which >= nitems(msgcalls))
+	if (uap->which < 0 || uap->which >= nitems(msgcalls))
 		return (EINVAL);
 	error = (*msgcalls[uap->which])(td, &uap->a2);
 	return (error);
 }
 
 #ifndef CP
 #define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd7_msgctl_args {
 	int	msqid;
 	int	cmd;
 	struct	msqid_ds_old *buf;
 };
 #endif
 int
 freebsd7_msgctl(td, uap)
 	struct thread *td;
 	struct freebsd7_msgctl_args *uap;
 {
 	struct msqid_ds_old msqold;
 	struct msqid_ds msqbuf;
 	int error;
 
 	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
 	    uap->buf));
 	if (uap->cmd == IPC_SET) {
 		error = copyin(uap->buf, &msqold, sizeof(msqold));
 		if (error)
 			return (error);
 		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
 		CP(msqold, msqbuf, msg_first);
 		CP(msqold, msqbuf, msg_last);
 		CP(msqold, msqbuf, msg_cbytes);
 		CP(msqold, msqbuf, msg_qnum);
 		CP(msqold, msqbuf, msg_qbytes);
 		CP(msqold, msqbuf, msg_lspid);
 		CP(msqold, msqbuf, msg_lrpid);
 		CP(msqold, msqbuf, msg_stime);
 		CP(msqold, msqbuf, msg_rtime);
 		CP(msqold, msqbuf, msg_ctime);
 	}
 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
 	if (error)
 		return (error);
 	if (uap->cmd == IPC_STAT) {
 		bzero(&msqold, sizeof(msqold));
 		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
 		CP(msqbuf, msqold, msg_first);
 		CP(msqbuf, msqold, msg_last);
 		CP(msqbuf, msqold, msg_cbytes);
 		CP(msqbuf, msqold, msg_qnum);
 		CP(msqbuf, msqold, msg_qbytes);
 		CP(msqbuf, msqold, msg_lspid);
 		CP(msqbuf, msqold, msg_lrpid);
 		CP(msqbuf, msqold, msg_stime);
 		CP(msqbuf, msqold, msg_rtime);
 		CP(msqbuf, msqold, msg_ctime);
 		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
 	}
 	return (error);
 }
 
 #undef CP
 
 #endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
 	   COMPAT_FREEBSD7 */
Index: head/sys/kern/sysv_sem.c
===================================================================
--- head/sys/kern/sysv_sem.c	(revision 298353)
+++ head/sys/kern/sysv_sem.c	(revision 298354)
@@ -1,1661 +1,1660 @@
 /*-
  * Implementation of SVID semaphores
  *
  * Author:  Daniel Boulet
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  */
 /*-
  * Copyright (c) 2003-2005 McAfee, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project in part by McAfee
  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
  * program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/sem.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 
 #include <security/mac/mac_framework.h>
 
 FEATURE(sysv_sem, "System V semaphores support");
 
 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores");
 
 #ifdef SEM_DEBUG
 #define DPRINTF(a)	printf a
 #else
 #define DPRINTF(a)
 #endif
 
 static int seminit(void);
 static int sysvsem_modload(struct module *, int, void *);
 static int semunload(void);
 static void semexit_myhook(void *arg, struct proc *p);
 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
 static int semvalid(int semid, struct semid_kernel *semakptr);
 
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args;
 int __semctl(struct thread *td, struct __semctl_args *uap);
 struct semget_args;
 int semget(struct thread *td, struct semget_args *uap);
 struct semop_args;
 int semop(struct thread *td, struct semop_args *uap);
 #endif
 
 static struct sem_undo *semu_alloc(struct thread *td);
 static int semundo_adjust(struct thread *td, struct sem_undo **supptr,
     int semid, int semseq, int semnum, int adjval);
 static void semundo_clear(int semid, int semnum);
 
 static struct mtx	sem_mtx;	/* semaphore global lock */
 static struct mtx sem_undo_mtx;
 static int	semtot = 0;
 static struct semid_kernel *sema;	/* semaphore id pool */
 static struct mtx *sema_mtx;	/* semaphore id pool mutexes*/
 static struct sem *sem;		/* semaphore pool */
 LIST_HEAD(, sem_undo) semu_list;	/* list of active undo structures */
 LIST_HEAD(, sem_undo) semu_free_list;	/* list of free undo structures */
 static int	*semu;		/* undo structure pool */
 static eventhandler_tag semexit_tag;
 
 #define SEMUNDO_MTX		sem_undo_mtx
 #define SEMUNDO_LOCK()		mtx_lock(&SEMUNDO_MTX);
 #define SEMUNDO_UNLOCK()	mtx_unlock(&SEMUNDO_MTX);
 #define SEMUNDO_LOCKASSERT(how)	mtx_assert(&SEMUNDO_MTX, (how));
 
 struct sem {
 	u_short	semval;		/* semaphore value */
 	pid_t	sempid;		/* pid of last operation */
 	u_short	semncnt;	/* # awaiting semval > cval */
 	u_short	semzcnt;	/* # awaiting semval = 0 */
 };
 
 /*
  * Undo structure (one per process)
  */
 struct sem_undo {
 	LIST_ENTRY(sem_undo) un_next;	/* ptr to next active undo structure */
 	struct	proc *un_proc;		/* owner of this structure */
 	short	un_cnt;			/* # of active entries */
 	struct undo {
 		short	un_adjval;	/* adjust on exit values */
 		short	un_num;		/* semaphore # */
 		int	un_id;		/* semid */
 		unsigned short un_seq;
 	} un_ent[1];			/* undo entries */
 };
 
 /*
  * Configuration parameters
  */
 #ifndef SEMMNI
 #define SEMMNI	50		/* # of semaphore identifiers */
 #endif
 #ifndef SEMMNS
 #define SEMMNS	340		/* # of semaphores in system */
 #endif
 #ifndef SEMUME
 #define SEMUME	50		/* max # of undo entries per process */
 #endif
 #ifndef SEMMNU
 #define SEMMNU	150		/* # of undo structures in system */
 #endif
 
 /* shouldn't need tuning */
 #ifndef SEMMSL
 #define SEMMSL	SEMMNS		/* max # of semaphores per id */
 #endif
 #ifndef SEMOPM
 #define SEMOPM	100		/* max # of operations per semop call */
 #endif
 
 #define SEMVMX	32767		/* semaphore maximum value */
 #define SEMAEM	16384		/* adjust on exit max value */
 
 /*
  * Due to the way semaphore memory is allocated, we have to ensure that
  * SEMUSZ is properly aligned.
  */
 
 #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
 
 /* actual size of an undo structure */
 #define SEMUSZ	SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME]))
 
 /*
  * Macro to find a particular sem_undo vector
  */
 #define SEMU(ix) \
 	((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz))
 
 /*
  * semaphore info struct
  */
 struct seminfo seminfo = {
                 SEMMNI,         /* # of semaphore identifiers */
                 SEMMNS,         /* # of semaphores in system */
                 SEMMNU,         /* # of undo structures in system */
                 SEMMSL,         /* max # of semaphores per id */
                 SEMOPM,         /* max # of operations per semop call */
                 SEMUME,         /* max # of undo entries per process */
                 SEMUSZ,         /* size in bytes of undo structure */
                 SEMVMX,         /* semaphore maximum value */
                 SEMAEM          /* adjust on exit max value */
 };
 
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0,
     "Number of semaphore identifiers");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0,
     "Maximum number of semaphores in the system");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0,
     "Maximum number of undo structures in the system");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RWTUN, &seminfo.semmsl, 0,
     "Max semaphores per id");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0,
     "Max operations per semop call");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0,
     "Max undo entries per process");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0,
     "Size in bytes of undo structure");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RWTUN, &seminfo.semvmx, 0,
     "Semaphore maximum value");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RWTUN, &seminfo.semaem, 0,
     "Adjust on exit max value");
 SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_sema, "", "Semaphore id pool");
 
 static struct syscall_helper_data sem_syscalls[] = {
 	SYSCALL_INIT_HELPER(__semctl),
 	SYSCALL_INIT_HELPER(semget),
 	SYSCALL_INIT_HELPER(semop),
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	SYSCALL_INIT_HELPER(semsys),
 	SYSCALL_INIT_HELPER_COMPAT(freebsd7___semctl),
 #endif
 	SYSCALL_INIT_LAST
 };
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 static struct syscall_helper_data sem32_syscalls[] = {
 	SYSCALL32_INIT_HELPER(freebsd32_semctl),
 	SYSCALL32_INIT_HELPER_COMPAT(semget),
 	SYSCALL32_INIT_HELPER_COMPAT(semop),
 	SYSCALL32_INIT_HELPER(freebsd32_semsys),
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_semctl),
 #endif
 	SYSCALL_INIT_LAST
 };
 #endif
 
 static int
 seminit(void)
 {
 	int i, error;
 
 	sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK);
 	sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM,
 	    M_WAITOK);
 	sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM,
 	    M_WAITOK | M_ZERO);
 	semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK);
 
 	for (i = 0; i < seminfo.semmni; i++) {
 		sema[i].u.sem_base = 0;
 		sema[i].u.sem_perm.mode = 0;
 		sema[i].u.sem_perm.seq = 0;
 #ifdef MAC
 		mac_sysvsem_init(&sema[i]);
 #endif
 	}
 	for (i = 0; i < seminfo.semmni; i++)
 		mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF);
 	LIST_INIT(&semu_free_list);
 	for (i = 0; i < seminfo.semmnu; i++) {
 		struct sem_undo *suptr = SEMU(i);
 		suptr->un_proc = NULL;
 		LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
 	}
 	LIST_INIT(&semu_list);
 	mtx_init(&sem_mtx, "sem", NULL, MTX_DEF);
 	mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF);
 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
 	    EVENTHANDLER_PRI_ANY);
 
 	error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	error = syscall32_helper_register(sem32_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #endif
 	return (0);
 }
 
 static int
 semunload(void)
 {
 	int i;
 
 	/* XXXKIB */
 	if (semtot != 0)
 		return (EBUSY);
 
 #ifdef COMPAT_FREEBSD32
 	syscall32_helper_unregister(sem32_syscalls);
 #endif
 	syscall_helper_unregister(sem_syscalls);
 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
 #ifdef MAC
 	for (i = 0; i < seminfo.semmni; i++)
 		mac_sysvsem_destroy(&sema[i]);
 #endif
 	free(sem, M_SEM);
 	free(sema, M_SEM);
 	free(semu, M_SEM);
 	for (i = 0; i < seminfo.semmni; i++)
 		mtx_destroy(&sema_mtx[i]);
 	free(sema_mtx, M_SEM);
 	mtx_destroy(&sem_mtx);
 	mtx_destroy(&sem_undo_mtx);
 	return (0);
 }
 
 static int
 sysvsem_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		error = seminit();
 		if (error != 0)
 			semunload();
 		break;
 	case MOD_UNLOAD:
 		error = semunload();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t sysvsem_mod = {
 	"sysvsem",
 	&sysvsem_modload,
 	NULL
 };
 
 DECLARE_MODULE(sysvsem, sysvsem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
 MODULE_VERSION(sysvsem, 1);
 
 /*
  * Allocate a new sem_undo structure for a process
  * (returns ptr to structure or NULL if no more room)
  */
 
 static struct sem_undo *
 semu_alloc(struct thread *td)
 {
 	struct sem_undo *suptr;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	if ((suptr = LIST_FIRST(&semu_free_list)) == NULL)
 		return (NULL);
 	LIST_REMOVE(suptr, un_next);
 	LIST_INSERT_HEAD(&semu_list, suptr, un_next);
 	suptr->un_cnt = 0;
 	suptr->un_proc = td->td_proc;
 	return (suptr);
 }
 
 static int
 semu_try_free(struct sem_undo *suptr)
 {
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 
 	if (suptr->un_cnt != 0)
 		return (0);
 	LIST_REMOVE(suptr, un_next);
 	LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
 	return (1);
 }
 
 /*
  * Adjust a particular entry for a particular proc
  */
 
 static int
 semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid,
     int semseq, int semnum, int adjval)
 {
 	struct proc *p = td->td_proc;
 	struct sem_undo *suptr;
 	struct undo *sunptr;
 	int i;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	/* Look for and remember the sem_undo if the caller doesn't provide
 	   it */
 
 	suptr = *supptr;
 	if (suptr == NULL) {
 		LIST_FOREACH(suptr, &semu_list, un_next) {
 			if (suptr->un_proc == p) {
 				*supptr = suptr;
 				break;
 			}
 		}
 		if (suptr == NULL) {
 			if (adjval == 0)
 				return(0);
 			suptr = semu_alloc(td);
 			if (suptr == NULL)
 				return (ENOSPC);
 			*supptr = suptr;
 		}
 	}
 
 	/*
 	 * Look for the requested entry and adjust it (delete if adjval becomes
 	 * 0).
 	 */
 	sunptr = &suptr->un_ent[0];
 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
 			continue;
 		if (adjval != 0) {
 			adjval += sunptr->un_adjval;
 			if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
 				return (ERANGE);
 		}
 		sunptr->un_adjval = adjval;
 		if (sunptr->un_adjval == 0) {
 			suptr->un_cnt--;
 			if (i < suptr->un_cnt)
 				suptr->un_ent[i] =
 				    suptr->un_ent[suptr->un_cnt];
 			if (suptr->un_cnt == 0)
 				semu_try_free(suptr);
 		}
 		return (0);
 	}
 
 	/* Didn't find the right entry - create it */
 	if (adjval == 0)
 		return (0);
 	if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
 		return (ERANGE);
 	if (suptr->un_cnt != seminfo.semume) {
 		sunptr = &suptr->un_ent[suptr->un_cnt];
 		suptr->un_cnt++;
 		sunptr->un_adjval = adjval;
 		sunptr->un_id = semid;
 		sunptr->un_num = semnum;
 		sunptr->un_seq = semseq;
 	} else
 		return (EINVAL);
 	return (0);
 }
 
 static void
 semundo_clear(int semid, int semnum)
 {
 	struct sem_undo *suptr, *suptr1;
 	struct undo *sunptr;
 	int i;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	LIST_FOREACH_SAFE(suptr, &semu_list, un_next, suptr1) {
 		sunptr = &suptr->un_ent[0];
 		for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
 			if (sunptr->un_id != semid)
 				continue;
 			if (semnum == -1 || sunptr->un_num == semnum) {
 				suptr->un_cnt--;
 				if (i < suptr->un_cnt) {
 					suptr->un_ent[i] =
 					    suptr->un_ent[suptr->un_cnt];
 					continue;
 				}
 				semu_try_free(suptr);
 			}
 			if (semnum != -1)
 				break;
 		}
 	}
 }
 
 static int
 semvalid(int semid, struct semid_kernel *semakptr)
 {
 
 	return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
 	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
 }
 
 /*
  * Note that the user-mode half of this passes a union, not a pointer.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args {
 	int	semid;
 	int	semnum;
 	int	cmd;
 	union	semun *arg;
 };
 #endif
 int
 sys___semctl(struct thread *td, struct __semctl_args *uap)
 {
 	struct semid_ds dsbuf;
 	union semun arg, semun;
 	register_t rval;
 	int error;
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_SET:
 	case IPC_STAT:
 	case GETALL:
 	case SETVAL:
 	case SETALL:
 		error = copyin(uap->arg, &arg, sizeof(arg));
 		if (error)
 			return (error);
 		break;
 	}
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		semun.buf = &dsbuf;
 		break;
 	case IPC_SET:
 		error = copyin(arg.buf, &dsbuf, sizeof(dsbuf));
 		if (error)
 			return (error);
 		semun.buf = &dsbuf;
 		break;
 	case GETALL:
 	case SETALL:
 		semun.array = arg.array;
 		break;
 	case SETVAL:
 		semun.val = arg.val;
 		break;		
 	}
 
 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
 	    &rval);
 	if (error)
 		return (error);
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		error = copyout(&dsbuf, arg.buf, sizeof(dsbuf));
 		break;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 	return (error);
 }
 
 int
 kern_semctl(struct thread *td, int semid, int semnum, int cmd,
     union semun *arg, register_t *rval)
 {
 	u_short *array;
 	struct ucred *cred = td->td_ucred;
 	int i, error;
 	struct semid_ds *sbuf;
 	struct semid_kernel *semakptr;
 	struct mtx *sema_mtxp;
 	u_short usval, count;
 	int semidx;
 
 	DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
 	    semid, semnum, cmd, arg));
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	array = NULL;
 
 	switch(cmd) {
 	case SEM_STAT:
 		/*
 		 * For this command we assume semid is an array index
 		 * rather than an IPC id.
 		 */
 		if (semid < 0 || semid >= seminfo.semmni)
 			return (EINVAL);
 		semakptr = &sema[semid];
 		sema_mtxp = &sema_mtx[semid];
 		mtx_lock(sema_mtxp);
 		if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
 			error = EINVAL;
 			goto done2;
 		}
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 #ifdef MAC
 		error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 		if (error != 0)
 			goto done2;
 #endif
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
 		*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
 		mtx_unlock(sema_mtxp);
 		return (0);
 	}
 
 	semidx = IPCID_TO_IX(semid);
 	if (semidx < 0 || semidx >= seminfo.semmni)
 		return (EINVAL);
 
 	semakptr = &sema[semidx];
 	sema_mtxp = &sema_mtx[semidx];
 	if (cmd == IPC_RMID)
 		mtx_lock(&sem_mtx);
 	mtx_lock(sema_mtxp);
 #ifdef MAC
 	error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 	if (error != 0)
 		goto done2;
 #endif
 
 	error = 0;
 	*rval = 0;
 
 	switch (cmd) {
 	case IPC_RMID:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
 		semakptr->u.sem_perm.cuid = cred->cr_uid;
 		semakptr->u.sem_perm.uid = cred->cr_uid;
 		semakptr->u.sem_perm.mode = 0;
 		racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
 		crfree(semakptr->cred);
 		semakptr->cred = NULL;
 		SEMUNDO_LOCK();
 		semundo_clear(semidx, -1);
 		SEMUNDO_UNLOCK();
 #ifdef MAC
 		mac_sysvsem_cleanup(semakptr);
 #endif
 		wakeup(semakptr);
 		for (i = 0; i < seminfo.semmni; i++) {
 			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
 			    sema[i].u.sem_base > semakptr->u.sem_base)
 				mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
 		}
 		for (i = semakptr->u.sem_base - sem; i < semtot; i++)
 			sem[i] = sem[i + semakptr->u.sem_nsems];
 		for (i = 0; i < seminfo.semmni; i++) {
 			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
 			    sema[i].u.sem_base > semakptr->u.sem_base) {
 				sema[i].u.sem_base -= semakptr->u.sem_nsems;
 				mtx_unlock(&sema_mtx[i]);
 			}
 		}
 		semtot -= semakptr->u.sem_nsems;
 		break;
 
 	case IPC_SET:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
 		sbuf = arg->buf;
 		semakptr->u.sem_perm.uid = sbuf->sem_perm.uid;
 		semakptr->u.sem_perm.gid = sbuf->sem_perm.gid;
 		semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode &
 		    ~0777) | (sbuf->sem_perm.mode & 0777);
 		semakptr->u.sem_ctime = time_second;
 		break;
 
 	case IPC_STAT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
 		break;
 
 	case GETNCNT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		*rval = semakptr->u.sem_base[semnum].semncnt;
 		break;
 
 	case GETPID:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		*rval = semakptr->u.sem_base[semnum].sempid;
 		break;
 
 	case GETVAL:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		*rval = semakptr->u.sem_base[semnum].semval;
 		break;
 
 	case GETALL:
 		/*
 		 * Unfortunately, callers of this function don't know
 		 * in advance how many semaphores are in this set.
 		 * While we could just allocate the maximum size array
 		 * and pass the actual size back to the caller, that
 		 * won't work for SETALL since we can't copyin() more
 		 * data than the user specified as we may return a
 		 * spurious EFAULT.
 		 * 
 		 * Note that the number of semaphores in a set is
 		 * fixed for the life of that set.  The only way that
 		 * the 'count' could change while are blocked in
 		 * malloc() is if this semaphore set were destroyed
 		 * and a new one created with the same index.
 		 * However, semvalid() will catch that due to the
 		 * sequence number unless exactly 0x8000 (or a
 		 * multiple thereof) semaphore sets for the same index
 		 * are created and destroyed while we are in malloc!
 		 *
 		 */
 		count = semakptr->u.sem_nsems;
 		mtx_unlock(sema_mtxp);		    
 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		for (i = 0; i < semakptr->u.sem_nsems; i++)
 			array[i] = semakptr->u.sem_base[i].semval;
 		mtx_unlock(sema_mtxp);
 		error = copyout(array, arg->array, count * sizeof(*array));
 		mtx_lock(sema_mtxp);
 		break;
 
 	case GETZCNT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		*rval = semakptr->u.sem_base[semnum].semzcnt;
 		break;
 
 	case SETVAL:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		if (arg->val < 0 || arg->val > seminfo.semvmx) {
 			error = ERANGE;
 			goto done2;
 		}
 		semakptr->u.sem_base[semnum].semval = arg->val;
 		SEMUNDO_LOCK();
 		semundo_clear(semidx, semnum);
 		SEMUNDO_UNLOCK();
 		wakeup(semakptr);
 		break;
 
 	case SETALL:
 		/*
 		 * See comment on GETALL for why 'count' shouldn't change
 		 * and why we require a userland buffer.
 		 */
 		count = semakptr->u.sem_nsems;
 		mtx_unlock(sema_mtxp);		    
 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
 		error = copyin(arg->array, array, count * sizeof(*array));
 		mtx_lock(sema_mtxp);
 		if (error)
 			break;
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
 		for (i = 0; i < semakptr->u.sem_nsems; i++) {
 			usval = array[i];
 			if (usval > seminfo.semvmx) {
 				error = ERANGE;
 				break;
 			}
 			semakptr->u.sem_base[i].semval = usval;
 		}
 		SEMUNDO_LOCK();
 		semundo_clear(semidx, -1);
 		SEMUNDO_UNLOCK();
 		wakeup(semakptr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 done2:
 	mtx_unlock(sema_mtxp);
 	if (cmd == IPC_RMID)
 		mtx_unlock(&sem_mtx);
 	if (array != NULL)
 		free(array, M_TEMP);
 	return(error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct semget_args {
 	key_t	key;
 	int	nsems;
 	int	semflg;
 };
 #endif
 int
 sys_semget(struct thread *td, struct semget_args *uap)
 {
 	int semid, error = 0;
 	int key = uap->key;
 	int nsems = uap->nsems;
 	int semflg = uap->semflg;
 	struct ucred *cred = td->td_ucred;
 
 	DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	mtx_lock(&sem_mtx);
 	if (key != IPC_PRIVATE) {
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
 			    sema[semid].u.sem_perm.key == key)
 				break;
 		}
 		if (semid < seminfo.semmni) {
 			DPRINTF(("found public key\n"));
 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 				DPRINTF(("not exclusive\n"));
 				error = EEXIST;
 				goto done2;
 			}
 			if ((error = ipcperm(td, &sema[semid].u.sem_perm,
 			    semflg & 0700))) {
 				goto done2;
 			}
 			if (nsems > 0 && sema[semid].u.sem_nsems < nsems) {
 				DPRINTF(("too small\n"));
 				error = EINVAL;
 				goto done2;
 			}
 #ifdef MAC
 			error = mac_sysvsem_check_semget(cred, &sema[semid]);
 			if (error != 0)
 				goto done2;
 #endif
 			goto found;
 		}
 	}
 
 	DPRINTF(("need to allocate the semid_kernel\n"));
 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 		if (nsems <= 0 || nsems > seminfo.semmsl) {
 			DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
 			    seminfo.semmsl));
 			error = EINVAL;
 			goto done2;
 		}
 		if (nsems > seminfo.semmns - semtot) {
 			DPRINTF((
 			    "not enough semaphores left (need %d, got %d)\n",
 			    nsems, seminfo.semmns - semtot));
 			error = ENOSPC;
 			goto done2;
 		}
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0)
 				break;
 		}
 		if (semid == seminfo.semmni) {
 			DPRINTF(("no more semid_kernel's available\n"));
 			error = ENOSPC;
 			goto done2;
 		}
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			error = racct_add(td->td_proc, RACCT_NSEM, nsems);
 			PROC_UNLOCK(td->td_proc);
 			if (error != 0) {
 				error = ENOSPC;
 				goto done2;
 			}
 		}
 #endif
 		DPRINTF(("semid %d is available\n", semid));
 		mtx_lock(&sema_mtx[semid]);
 		KASSERT((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0,
 		    ("Lost semaphore %d", semid));
 		sema[semid].u.sem_perm.key = key;
 		sema[semid].u.sem_perm.cuid = cred->cr_uid;
 		sema[semid].u.sem_perm.uid = cred->cr_uid;
 		sema[semid].u.sem_perm.cgid = cred->cr_gid;
 		sema[semid].u.sem_perm.gid = cred->cr_gid;
 		sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 		sema[semid].cred = crhold(cred);
 		sema[semid].u.sem_perm.seq =
 		    (sema[semid].u.sem_perm.seq + 1) & 0x7fff;
 		sema[semid].u.sem_nsems = nsems;
 		sema[semid].u.sem_otime = 0;
 		sema[semid].u.sem_ctime = time_second;
 		sema[semid].u.sem_base = &sem[semtot];
 		semtot += nsems;
 		bzero(sema[semid].u.sem_base,
 		    sizeof(sema[semid].u.sem_base[0])*nsems);
 #ifdef MAC
 		mac_sysvsem_create(cred, &sema[semid]);
 #endif
 		mtx_unlock(&sema_mtx[semid]);
 		DPRINTF(("sembase = %p, next = %p\n",
 		    sema[semid].u.sem_base, &sem[semtot]));
 	} else {
 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
 		error = ENOENT;
 		goto done2;
 	}
 
 found:
 	td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm);
 done2:
 	mtx_unlock(&sem_mtx);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct semop_args {
 	int	semid;
 	struct	sembuf *sops;
 	size_t	nsops;
 };
 #endif
 int
 sys_semop(struct thread *td, struct semop_args *uap)
 {
 #define SMALL_SOPS	8
 	struct sembuf small_sops[SMALL_SOPS];
 	int semid = uap->semid;
 	size_t nsops = uap->nsops;
 	struct sembuf *sops;
 	struct semid_kernel *semakptr;
 	struct sembuf *sopptr = NULL;
 	struct sem *semptr = NULL;
 	struct sem_undo *suptr;
 	struct mtx *sema_mtxp;
 	size_t i, j, k;
 	int error;
 	int do_wakeup, do_undos;
 	unsigned short seq;
 
 #ifdef SEM_DEBUG
 	sops = NULL;
 #endif
 	DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
 
 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
 
 	if (semid < 0 || semid >= seminfo.semmni)
 		return (EINVAL);
 
 	/* Allocate memory for sem_ops */
 	if (nsops <= SMALL_SOPS)
 		sops = small_sops;
 	else if (nsops > seminfo.semopm) {
 		DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm,
 		    nsops));
 		return (E2BIG);
 	} else {
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			if (nsops >
 			    racct_get_available(td->td_proc, RACCT_NSEMOP)) {
 				PROC_UNLOCK(td->td_proc);
 				return (E2BIG);
 			}
 			PROC_UNLOCK(td->td_proc);
 		}
 #endif
 
 		sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK);
 	}
 	if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) {
 		DPRINTF(("error = %d from copyin(%p, %p, %d)\n", error,
 		    uap->sops, sops, nsops * sizeof(sops[0])));
 		if (sops != small_sops)
 			free(sops, M_SEM);
 		return (error);
 	}
 
 	semakptr = &sema[semid];
 	sema_mtxp = &sema_mtx[semid];
 	mtx_lock(sema_mtxp);
 	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
 		error = EINVAL;
 		goto done2;
 	}
 	seq = semakptr->u.sem_perm.seq;
 	if (seq != IPCID_TO_SEQ(uap->semid)) {
 		error = EINVAL;
 		goto done2;
 	}
 	/*
 	 * Initial pass thru sops to see what permissions are needed.
 	 * Also perform any checks that don't need repeating on each
 	 * attempt to satisfy the request vector.
 	 */
 	j = 0;		/* permission needed */
 	do_undos = 0;
 	for (i = 0; i < nsops; i++) {
 		sopptr = &sops[i];
 		if (sopptr->sem_num >= semakptr->u.sem_nsems) {
 			error = EFBIG;
 			goto done2;
 		}
 		if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0)
 			do_undos = 1;
 		j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A;
 	}
 
 	if ((error = ipcperm(td, &semakptr->u.sem_perm, j))) {
 		DPRINTF(("error = %d from ipaccess\n", error));
 		goto done2;
 	}
 #ifdef MAC
 	error = mac_sysvsem_check_semop(td->td_ucred, semakptr, j);
 	if (error != 0)
 		goto done2;
 #endif
 
 	/*
 	 * Loop trying to satisfy the vector of requests.
 	 * If we reach a point where we must wait, any requests already
 	 * performed are rolled back and we go to sleep until some other
 	 * process wakes us up.  At this point, we start all over again.
 	 *
 	 * This ensures that from the perspective of other tasks, a set
 	 * of requests is atomic (never partially satisfied).
 	 */
 	for (;;) {
 		do_wakeup = 0;
 		error = 0;	/* error return if necessary */
 
 		for (i = 0; i < nsops; i++) {
 			sopptr = &sops[i];
 			semptr = &semakptr->u.sem_base[sopptr->sem_num];
 
 			DPRINTF((
 			    "semop:  semakptr=%p, sem_base=%p, "
 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
 			    semakptr, semakptr->u.sem_base, semptr,
 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
 			    (sopptr->sem_flg & IPC_NOWAIT) ?
 			    "nowait" : "wait"));
 
 			if (sopptr->sem_op < 0) {
 				if (semptr->semval + sopptr->sem_op < 0) {
 					DPRINTF(("semop:  can't do it now\n"));
 					break;
 				} else {
 					semptr->semval += sopptr->sem_op;
 					if (semptr->semval == 0 &&
 					    semptr->semzcnt > 0)
 						do_wakeup = 1;
 				}
 			} else if (sopptr->sem_op == 0) {
 				if (semptr->semval != 0) {
 					DPRINTF(("semop:  not zero now\n"));
 					break;
 				}
 			} else if (semptr->semval + sopptr->sem_op >
 			    seminfo.semvmx) {
 				error = ERANGE;
 				break;
 			} else {
 				if (semptr->semncnt > 0)
 					do_wakeup = 1;
 				semptr->semval += sopptr->sem_op;
 			}
 		}
 
 		/*
 		 * Did we get through the entire vector?
 		 */
 		if (i >= nsops)
 			goto done;
 
 		/*
 		 * No ... rollback anything that we've already done
 		 */
 		DPRINTF(("semop:  rollback 0 through %d\n", i-1));
 		for (j = 0; j < i; j++)
 			semakptr->u.sem_base[sops[j].sem_num].semval -=
 			    sops[j].sem_op;
 
 		/* If we detected an error, return it */
 		if (error != 0)
 			goto done2;
 
 		/*
 		 * If the request that we couldn't satisfy has the
 		 * NOWAIT flag set then return with EAGAIN.
 		 */
 		if (sopptr->sem_flg & IPC_NOWAIT) {
 			error = EAGAIN;
 			goto done2;
 		}
 
 		if (sopptr->sem_op == 0)
 			semptr->semzcnt++;
 		else
 			semptr->semncnt++;
 
 		DPRINTF(("semop:  good night!\n"));
 		error = msleep(semakptr, sema_mtxp, (PZERO - 4) | PCATCH,
 		    "semwait", 0);
 		DPRINTF(("semop:  good morning (error=%d)!\n", error));
 		/* return code is checked below, after sem[nz]cnt-- */
 
 		/*
 		 * Make sure that the semaphore still exists
 		 */
 		seq = semakptr->u.sem_perm.seq;
 		if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
 		    seq != IPCID_TO_SEQ(uap->semid)) {
 			error = EIDRM;
 			goto done2;
 		}
 
 		/*
 		 * Renew the semaphore's pointer after wakeup since
 		 * during msleep sem_base may have been modified and semptr
 		 * is not valid any more
 		 */
 		semptr = &semakptr->u.sem_base[sopptr->sem_num];
 
 		/*
 		 * The semaphore is still alive.  Readjust the count of
 		 * waiting processes.
 		 */
 		if (sopptr->sem_op == 0)
 			semptr->semzcnt--;
 		else
 			semptr->semncnt--;
 
 		/*
 		 * Is it really morning, or was our sleep interrupted?
 		 * (Delayed check of msleep() return code because we
 		 * need to decrement sem[nz]cnt either way.)
 		 */
 		if (error != 0) {
 			error = EINTR;
 			goto done2;
 		}
 		DPRINTF(("semop:  good morning!\n"));
 	}
 
 done:
 	/*
 	 * Process any SEM_UNDO requests.
 	 */
 	if (do_undos) {
 		SEMUNDO_LOCK();
 		suptr = NULL;
 		for (i = 0; i < nsops; i++) {
 			/*
 			 * We only need to deal with SEM_UNDO's for non-zero
 			 * op's.
 			 */
 			int adjval;
 
 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
 				continue;
 			adjval = sops[i].sem_op;
 			if (adjval == 0)
 				continue;
 			error = semundo_adjust(td, &suptr, semid, seq,
 			    sops[i].sem_num, -adjval);
 			if (error == 0)
 				continue;
 
 			/*
 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
 			 * Rollback the adjustments to this point and then
 			 * rollback the semaphore ups and down so we can return
 			 * with an error with all structures restored.  We
 			 * rollback the undo's in the exact reverse order that
 			 * we applied them.  This guarantees that we won't run
 			 * out of space as we roll things back out.
 			 */
 			for (j = 0; j < i; j++) {
 				k = i - j - 1;
 				if ((sops[k].sem_flg & SEM_UNDO) == 0)
 					continue;
 				adjval = sops[k].sem_op;
 				if (adjval == 0)
 					continue;
 				if (semundo_adjust(td, &suptr, semid, seq,
 				    sops[k].sem_num, adjval) != 0)
 					panic("semop - can't undo undos");
 			}
 
 			for (j = 0; j < nsops; j++)
 				semakptr->u.sem_base[sops[j].sem_num].semval -=
 				    sops[j].sem_op;
 
 			DPRINTF(("error = %d from semundo_adjust\n", error));
 			SEMUNDO_UNLOCK();
 			goto done2;
 		} /* loop through the sops */
 		SEMUNDO_UNLOCK();
 	} /* if (do_undos) */
 
 	/* We're definitely done - set the sempid's and time */
 	for (i = 0; i < nsops; i++) {
 		sopptr = &sops[i];
 		semptr = &semakptr->u.sem_base[sopptr->sem_num];
 		semptr->sempid = td->td_proc->p_pid;
 	}
 	semakptr->u.sem_otime = time_second;
 
 	/*
 	 * Do a wakeup if any semaphore was up'd whilst something was
 	 * sleeping on it.
 	 */
 	if (do_wakeup) {
 		DPRINTF(("semop:  doing wakeup\n"));
 		wakeup(semakptr);
 		DPRINTF(("semop:  back from wakeup\n"));
 	}
 	DPRINTF(("semop:  done\n"));
 	td->td_retval[0] = 0;
 done2:
 	mtx_unlock(sema_mtxp);
 	if (sops != small_sops)
 		free(sops, M_SEM);
 	return (error);
 }
 
 /*
  * Go through the undo structures for this process and apply the adjustments to
  * semaphores.
  */
 static void
 semexit_myhook(void *arg, struct proc *p)
 {
 	struct sem_undo *suptr;
 	struct semid_kernel *semakptr;
 	struct mtx *sema_mtxp;
 	int semid, semnum, adjval, ix;
 	unsigned short seq;
 
 	/*
 	 * Go through the chain of undo vectors looking for one
 	 * associated with this process.
 	 */
 	SEMUNDO_LOCK();
 	LIST_FOREACH(suptr, &semu_list, un_next) {
 		if (suptr->un_proc == p)
 			break;
 	}
 	if (suptr == NULL) {
 		SEMUNDO_UNLOCK();
 		return;
 	}
 	LIST_REMOVE(suptr, un_next);
 
 	DPRINTF(("proc @%p has undo structure with %d entries\n", p,
 	    suptr->un_cnt));
 
 	/*
 	 * If there are any active undo elements then process them.
 	 */
 	if (suptr->un_cnt > 0) {
 		SEMUNDO_UNLOCK();
 		for (ix = 0; ix < suptr->un_cnt; ix++) {
 			semid = suptr->un_ent[ix].un_id;
 			semnum = suptr->un_ent[ix].un_num;
 			adjval = suptr->un_ent[ix].un_adjval;
 			seq = suptr->un_ent[ix].un_seq;
 			semakptr = &sema[semid];
 			sema_mtxp = &sema_mtx[semid];
 
 			mtx_lock(sema_mtxp);
 			if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
 			    (semakptr->u.sem_perm.seq != seq)) {
 				mtx_unlock(sema_mtxp);
 				continue;
 			}
 			if (semnum >= semakptr->u.sem_nsems)
 				panic("semexit - semnum out of range");
 
 			DPRINTF((
 			    "semexit:  %p id=%d num=%d(adj=%d) ; sem=%d\n",
 			    suptr->un_proc, suptr->un_ent[ix].un_id,
 			    suptr->un_ent[ix].un_num,
 			    suptr->un_ent[ix].un_adjval,
 			    semakptr->u.sem_base[semnum].semval));
 
 			if (adjval < 0 && semakptr->u.sem_base[semnum].semval <
 			    -adjval)
 				semakptr->u.sem_base[semnum].semval = 0;
 			else
 				semakptr->u.sem_base[semnum].semval += adjval;
 
 			wakeup(semakptr);
 			DPRINTF(("semexit:  back from wakeup\n"));
 			mtx_unlock(sema_mtxp);
 		}
 		SEMUNDO_LOCK();
 	}
 
 	/*
 	 * Deallocate the undo vector.
 	 */
 	DPRINTF(("removing vector\n"));
 	suptr->un_proc = NULL;
 	suptr->un_cnt = 0;
 	LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
 	SEMUNDO_UNLOCK();
 }
 
 static int
 sysctl_sema(SYSCTL_HANDLER_ARGS)
 {
 
 	return (SYSCTL_OUT(req, sema,
 	    sizeof(struct semid_kernel) * seminfo.semmni));
 }
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 static sy_call_t *semcalls[] = {
 	(sy_call_t *)freebsd7___semctl, (sy_call_t *)sys_semget,
 	(sy_call_t *)sys_semop
 };
 
 /*
  * Entry point for all SEM calls.
  */
 int
 sys_semsys(td, uap)
 	struct thread *td;
 	/* XXX actually varargs. */
 	struct semsys_args /* {
 		int	which;
 		int	a2;
 		int	a3;
 		int	a4;
 		int	a5;
 	} */ *uap;
 {
 	int error;
 
 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
 		return (ENOSYS);
-	if (uap->which < 0 ||
-	    uap->which >= nitems(semcalls))
+	if (uap->which < 0 || uap->which >= nitems(semcalls))
 		return (EINVAL);
 	error = (*semcalls[uap->which])(td, &uap->a2);
 	return (error);
 }
 
 #ifndef CP
 #define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd7___semctl_args {
 	int	semid;
 	int	semnum;
 	int	cmd;
 	union	semun_old *arg;
 };
 #endif
 int
 freebsd7___semctl(struct thread *td, struct freebsd7___semctl_args *uap)
 {
 	struct semid_ds_old dsold;
 	struct semid_ds dsbuf;
 	union semun_old arg;
 	union semun semun;
 	register_t rval;
 	int error;
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_SET:
 	case IPC_STAT:
 	case GETALL:
 	case SETVAL:
 	case SETALL:
 		error = copyin(uap->arg, &arg, sizeof(arg));
 		if (error)
 			return (error);
 		break;
 	}
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		semun.buf = &dsbuf;
 		break;
 	case IPC_SET:
 		error = copyin(arg.buf, &dsold, sizeof(dsold));
 		if (error)
 			return (error);
 		ipcperm_old2new(&dsold.sem_perm, &dsbuf.sem_perm);
 		CP(dsold, dsbuf, sem_base);
 		CP(dsold, dsbuf, sem_nsems);
 		CP(dsold, dsbuf, sem_otime);
 		CP(dsold, dsbuf, sem_ctime);
 		semun.buf = &dsbuf;
 		break;
 	case GETALL:
 	case SETALL:
 		semun.array = arg.array;
 		break;
 	case SETVAL:
 		semun.val = arg.val;
 		break;		
 	}
 
 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
 	    &rval);
 	if (error)
 		return (error);
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		bzero(&dsold, sizeof(dsold));
 		ipcperm_new2old(&dsbuf.sem_perm, &dsold.sem_perm);
 		CP(dsbuf, dsold, sem_base);
 		CP(dsbuf, dsold, sem_nsems);
 		CP(dsbuf, dsold, sem_otime);
 		CP(dsbuf, dsold, sem_ctime);
 		error = copyout(&dsold, arg.buf, sizeof(dsold));
 		break;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 	return (error);
 }
 
 #endif /* COMPAT_FREEBSD{4,5,6,7} */
 
 #ifdef COMPAT_FREEBSD32
 
 int
 freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap)
 {
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 	switch (uap->which) {
 	case 0:
 		return (freebsd7_freebsd32_semctl(td,
 		    (struct freebsd7_freebsd32_semctl_args *)&uap->a2));
 	default:
 		return (sys_semsys(td, (struct semsys_args *)uap));
 	}
 #else
 	return (nosys(td, NULL));
 #endif
 }
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 int
 freebsd7_freebsd32_semctl(struct thread *td,
     struct freebsd7_freebsd32_semctl_args *uap)
 {
 	struct semid_ds32_old dsbuf32;
 	struct semid_ds dsbuf;
 	union semun semun;
 	union semun32 arg;
 	register_t rval;
 	int error;
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_SET:
 	case IPC_STAT:
 	case GETALL:
 	case SETVAL:
 	case SETALL:
 		error = copyin(uap->arg, &arg, sizeof(arg));
 		if (error)
 			return (error);		
 		break;
 	}
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		semun.buf = &dsbuf;
 		break;
 	case IPC_SET:
 		error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32));
 		if (error)
 			return (error);
 		freebsd32_ipcperm_old_in(&dsbuf32.sem_perm, &dsbuf.sem_perm);
 		PTRIN_CP(dsbuf32, dsbuf, sem_base);
 		CP(dsbuf32, dsbuf, sem_nsems);
 		CP(dsbuf32, dsbuf, sem_otime);
 		CP(dsbuf32, dsbuf, sem_ctime);
 		semun.buf = &dsbuf;
 		break;
 	case GETALL:
 	case SETALL:
 		semun.array = PTRIN(arg.array);
 		break;
 	case SETVAL:
 		semun.val = arg.val;
 		break;
 	}
 
 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
 	    &rval);
 	if (error)
 		return (error);
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		bzero(&dsbuf32, sizeof(dsbuf32));
 		freebsd32_ipcperm_old_out(&dsbuf.sem_perm, &dsbuf32.sem_perm);
 		PTROUT_CP(dsbuf, dsbuf32, sem_base);
 		CP(dsbuf, dsbuf32, sem_nsems);
 		CP(dsbuf, dsbuf32, sem_otime);
 		CP(dsbuf, dsbuf32, sem_ctime);
 		error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32));
 		break;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 	return (error);
 }
 #endif
 
 int
 freebsd32_semctl(struct thread *td, struct freebsd32_semctl_args *uap)
 {
 	struct semid_ds32 dsbuf32;
 	struct semid_ds dsbuf;
 	union semun semun;
 	union semun32 arg;
 	register_t rval;
 	int error;
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_SET:
 	case IPC_STAT:
 	case GETALL:
 	case SETVAL:
 	case SETALL:
 		error = copyin(uap->arg, &arg, sizeof(arg));
 		if (error)
 			return (error);		
 		break;
 	}
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		semun.buf = &dsbuf;
 		break;
 	case IPC_SET:
 		error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32));
 		if (error)
 			return (error);
 		freebsd32_ipcperm_in(&dsbuf32.sem_perm, &dsbuf.sem_perm);
 		PTRIN_CP(dsbuf32, dsbuf, sem_base);
 		CP(dsbuf32, dsbuf, sem_nsems);
 		CP(dsbuf32, dsbuf, sem_otime);
 		CP(dsbuf32, dsbuf, sem_ctime);
 		semun.buf = &dsbuf;
 		break;
 	case GETALL:
 	case SETALL:
 		semun.array = PTRIN(arg.array);
 		break;
 	case SETVAL:
 		semun.val = arg.val;
 		break;		
 	}
 
 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
 	    &rval);
 	if (error)
 		return (error);
 
 	switch (uap->cmd) {
 	case SEM_STAT:
 	case IPC_STAT:
 		bzero(&dsbuf32, sizeof(dsbuf32));
 		freebsd32_ipcperm_out(&dsbuf.sem_perm, &dsbuf32.sem_perm);
 		PTROUT_CP(dsbuf, dsbuf32, sem_base);
 		CP(dsbuf, dsbuf32, sem_nsems);
 		CP(dsbuf, dsbuf32, sem_otime);
 		CP(dsbuf, dsbuf32, sem_ctime);
 		error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32));
 		break;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 	return (error);
 }
 
 #endif /* COMPAT_FREEBSD32 */
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c	(revision 298353)
+++ head/sys/netinet/tcp_syncache.c	(revision 298354)
@@ -1,2158 +1,2157 @@
 /*-
  * Copyright (c) 2001 McAfee, Inc.
  * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
  * and McAfee Research, the Security Research Division of McAfee, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program. [2001 McAfee, Inc.]
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pcbgroup.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hash.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <sys/md5.h>
 #include <crypto/siphash/siphash.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/toecore.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 static VNET_DEFINE(int, tcp_syncookies) = 1;
 #define	V_tcp_syncookies		VNET(tcp_syncookies)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookies), 0,
     "Use TCP SYN cookies if the syncache overflows");
 
 static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
 #define	V_tcp_syncookiesonly		VNET(tcp_syncookiesonly)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
 #ifdef TCP_OFFLOAD
 #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
 #endif
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
 static int	 syncache_respond(struct syncache *, struct syncache_head *, int);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
 static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
 		    int docallout);
 static void	 syncache_timer(void *);
 
 static uint32_t	 syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
 		    uint8_t *, uintptr_t);
 static tcp_seq	 syncookie_generate(struct syncache_head *, struct syncache *);
 static struct syncache
 		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
 		    struct syncache *, struct tcphdr *, struct tcpopt *,
 		    struct socket *);
 static void	 syncookie_reseed(void *);
 #ifdef INVARIANTS
 static int	 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
 		    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
 		    struct socket *lso);
 #endif
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
  * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
  * the odds are that the user has given up attempting to connect by then.
  */
 #define SYNCACHE_MAXREXMTS		3
 
 /* Arbitrary values */
 #define TCP_SYNCACHE_HASHSIZE		512
 #define TCP_SYNCACHE_BUCKETLIMIT	30
 
 static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
 #define	V_tcp_syncache			VNET(tcp_syncache)
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
     "TCP SYN cache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.bucket_limit), 0,
     "Per-bucket hash limit for syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.cache_limit), 0,
     "Overall entry limit for syncache");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_VNET,
     &VNET_NAME(tcp_syncache.zone), "Current number of entries in syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.hashsize), 0,
     "Size of TCP syncache hashtable");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncache.rexmt_limit), 0,
     "Limit on SYN/ACK retransmissions");
 
 VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
     "Send reset on socket allocation failure");
 
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
 #define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
 #define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
 
 /*
  * Requires the syncache entry to be already removed from the bucket list.
  */
 static void
 syncache_free(struct syncache *sc)
 {
 
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
 	if (sc->sc_cred)
 		crfree(sc->sc_cred);
 #ifdef MAC
 	mac_syncache_destroy(&sc->sc_label);
 #endif
 
 	uma_zfree(V_tcp_syncache.zone, sc);
 }
 
 void
 syncache_init(void)
 {
 	int i;
 
 	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
 	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
 	V_tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
 	    &V_tcp_syncache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
 	    &V_tcp_syncache.bucket_limit);
 	if (!powerof2(V_tcp_syncache.hashsize) ||
 	    V_tcp_syncache.hashsize == 0) {
 		printf("WARNING: syncache hash size is not a power of 2.\n");
 		V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	}
 	V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
 
 	/* Set limits. */
 	V_tcp_syncache.cache_limit =
 	    V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
 	    &V_tcp_syncache.cache_limit);
 
 	/* Allocate the hash table. */
 	V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
 	    sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
 
 #ifdef VIMAGE
 	V_tcp_syncache.vnet = curvnet;
 #endif
 
 	/* Initialize the hash buckets. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
 		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
 		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
 			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
 		V_tcp_syncache.hashbase[i].sch_length = 0;
 		V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
 	}
 
 	/* Create the syncache entry zone. */
 	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
 	    V_tcp_syncache.cache_limit);
 
 	/* Start the SYN cookie reseeder callout. */
 	callout_init(&V_tcp_syncache.secret.reseed, 1);
 	arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
 	arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
 	callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
 	    syncookie_reseed, &V_tcp_syncache);
 }
 
 #ifdef VIMAGE
 void
 syncache_destroy(void)
 {
 	struct syncache_head *sch;
 	struct syncache *sc, *nsc;
 	int i;
 
 	/*
 	 * Stop the re-seed timer before freeing resources.  No need to
 	 * possibly schedule it another time.
 	 */
 	callout_drain(&V_tcp_syncache.secret.reseed);
 
 	/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 
 		sch = &V_tcp_syncache.hashbase[i];
 		callout_drain(&sch->sch_timer);
 
 		SCH_LOCK(sch);
 		TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
 			syncache_drop(sc, sch);
 		SCH_UNLOCK(sch);
 		KASSERT(TAILQ_EMPTY(&sch->sch_bucket),
 		    ("%s: sch->sch_bucket not empty", __func__));
 		KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0",
 		    __func__, sch->sch_length));
 		mtx_destroy(&sch->sch_mtx);
 	}
 
 	KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
 	    ("%s: cache_count not 0", __func__));
 
 	/* Free the allocated global resources. */
 	uma_zdestroy(V_tcp_syncache.zone);
 	free(V_tcp_syncache.hashbase, M_SYNCACHE);
 }
 #endif
 
 /*
  * Inserts a syncache entry into the specified bucket row.
  * Locks and unlocks the syncache_head autonomously.
  */
 static void
 syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
 	struct syncache *sc2;
 
 	SCH_LOCK(sch);
 
 	/*
 	 * Make sure that we don't overflow the per-bucket limit.
 	 * If the bucket is full, toss the oldest element.
 	 */
 	if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		syncache_drop(sc2, sch);
 		TCPSTAT_INC(tcps_sc_bucketoverflow);
 	}
 
 	/* Put it into the bucket. */
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_added(tod, sc->sc_todctx);
 	}
 #endif
 
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
 	syncache_timeout(sc, sch, 1);
 
 	SCH_UNLOCK(sch);
 
 	TCPSTATES_INC(TCPS_SYN_RECEIVED);
 	TCPSTAT_INC(tcps_sc_added);
 }
 
 /*
  * Remove and free entry from syncache bucket row.
  * Expects locked syncache head.
  */
 static void
 syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
 
 	SCH_LOCK_ASSERT(sch);
 
 	TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_removed(tod, sc->sc_todctx);
 	}
 #endif
 
 	syncache_free(sc);
 }
 
 /*
  * Engage/reengage time on bucket row.
  */
 static void
 syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 {
 	sc->sc_rxttime = ticks +
 		TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
 		sch->sch_nextc = sc->sc_rxttime;
 		if (docallout)
 			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
 			    syncache_timer, (void *)sch);
 	}
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire it.
  * One separate timer for each bucket row.
  */
 static void
 syncache_timer(void *xsch)
 {
 	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
 	int tick = ticks;
 	char *s;
 
 	CURVNET_SET(sch->sch_sc->vnet);
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * In the following cycle we may remove some entries and/or
 	 * advance some timeouts, so re-initialize the bucket timer.
 	 */
 	sch->sch_nextc = tick + INT_MAX;
 
 	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
 		/*
 		 * We do not check if the listen socket still exists
 		 * and accept the case where the listen socket may be
 		 * gone by the time we resend the SYN/ACK.  We do
 		 * not expect this to happens often. If it does,
 		 * then the RST will be sent by the time the remote
 		 * host does the SYN/ACK->ACK.
 		 */
 		if (TSTMP_GT(sc->sc_rxttime, tick)) {
 			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
 				sch->sch_nextc = sc->sc_rxttime;
 			continue;
 		}
 		if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
 				    "giving up and removing syncache entry\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			syncache_drop(sc, sch);
 			TCPSTAT_INC(tcps_sc_stale);
 			continue;
 		}
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Response timeout, "
 			    "retransmitting (%u) SYN|ACK\n",
 			    s, __func__, sc->sc_rxmits);
 			free(s, M_TCPLOG);
 		}
 
 		syncache_respond(sc, sch, 1);
 		TCPSTAT_INC(tcps_sc_retransmitted);
 		syncache_timeout(sc, sch, 0);
 	}
 	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
 		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
 			syncache_timer, (void *)(sch));
 	CURVNET_RESTORE();
 }
 
 /*
  * Find an entry in the syncache.
  * Returns always with locked syncache_head plus a matching entry or NULL.
  */
 static struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	uint32_t hash;
 
 	/*
 	 * The hash is built on foreign port + local port + foreign address.
 	 * We rely on the fact that struct in_conninfo starts with 16 bits
 	 * of foreign port, then 16 bits of local port then followed by 128
 	 * bits of foreign address.  In case of IPv4 address, the first 3
 	 * 32-bit words of the address always are zeroes.
 	 */
 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5,
 	    V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask;
 
 	sch = &V_tcp_syncache.hashbase[hash];
 	*schp = sch;
 	SCH_LOCK(sch);
 
 	/* Circle through bucket row to find matching entry. */
 	TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash)
 		if (bcmp(&inc->inc_ie, &sc->sc_inc.inc_ie,
 		    sizeof(struct in_endpoints)) == 0)
 			break;
 
 	return (sc);	/* Always returns with locked sch. */
 }
 
 /*
  * This function is called when we get a RST for a
  * non-existent connection, so that we can see if the
  * connection is in the syn cache.  If it is, zap it.
  */
 void
 syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	char *s = NULL;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
 	 * See RFC 793 page 65, section SEGMENT ARRIVES.
 	 */
 	if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
 			    "FIN flag set, segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/*
 	 * No corresponding connection was found in syncache.
 	 * If syncookies are enabled and possibly exclusively
 	 * used, or we are under memory pressure, a valid RST
 	 * may not find a syncache entry.  In that case we're
 	 * done and no SYN|ACK retransmissions will happen.
 	 * Otherwise the RST was misdirected or spoofed.
 	 */
 	if (sc == NULL) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
 			    "syncache entry (possibly syncookie only), "
 			    "segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/*
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
 	 * RFC 793 page 37:
 	 *   In all states except SYN-SENT, all reset (RST) segments
 	 *   are validated by checking their SEQ-fields.  A reset is
 	 *   valid if its sequence number is in the window.
 	 *
 	 *   The sequence number in the reset segment is normally an
 	 *   echo of our outgoing acknowlegement numbers, but some hosts
 	 *   send a reset with the sequence number at the rightmost edge
 	 *   of our receive window, and we have to handle this case.
 	 */
 	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
 	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		syncache_drop(sc, sch);
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
 			    "connection attempt aborted by remote endpoint\n",
 			    s, __func__);
 		TCPSTAT_INC(tcps_sc_reset);
 	} else {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
 			    "IRS %u (+WND %u), segment ignored\n",
 			    s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
 		TCPSTAT_INC(tcps_badrst);
 	}
 
 done:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_badack(struct in_conninfo *inc)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 		syncache_drop(sc, sch);
 		TCPSTAT_INC(tcps_sc_badack);
 	}
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL)
 		goto done;
 
 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
 	if (ntohl(th->th_seq) != sc->sc_iss)
 		goto done;
 
 	/*
 	 * If we've rertransmitted 3 times and this is our second error,
 	 * we remove the entry.  Otherwise, we allow it to continue on.
 	 * This prevents us from incorrectly nuking an entry during a
 	 * spurious network outage.
 	 *
 	 * See tcp_notify().
 	 */
 	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
 		sc->sc_flags |= SCF_UNREACH;
 		goto done;
 	}
 	syncache_drop(sc, sch);
 	TCPSTAT_INC(tcps_sc_unreach);
 done:
 	SCH_UNLOCK(sch);
 }
 
 /*
  * Build a new TCP socket structure from a syncache entry.
  *
  * On success return the newly created socket with its underlying inp locked.
  */
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
 	struct tcp_function_block *blk;
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
 	int error;
 	char *s;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	/*
 	 * Ok, create the full blown connection, and set things up
 	 * as they would have been set up if we had created the
 	 * connection when the SYN arrived.  If we can't create
 	 * the connection, abort it.
 	 */
 	so = sonewconn(lso, 0);
 	if (so == NULL) {
 		/*
 		 * Drop the connection; we will either send a RST or
 		 * have the peer retransmit its SYN again after its
 		 * RTO and try again.
 		 */
 		TCPSTAT_INC(tcps_listendrop);
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Socket create failed "
 			    "due to limits or memory shortage\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		goto abort2;
 	}
 #ifdef MAC
 	mac_socketpeer_set_from_mbuf(m, so);
 #endif
 
 	inp = sotoinpcb(so);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 	INP_WLOCK(inp);
 	/*
 	 * Exclusive pcbinfo lock is not required in syncache socket case even
 	 * if two inpcb locks can be acquired simultaneously:
 	 *  - the inpcb in LISTEN state,
 	 *  - the newly created inp.
 	 *
 	 * In this case, an inp cannot be at same time in LISTEN state and
 	 * just created by an accept() call.
 	 */
 	INP_HASH_WLOCK(&V_tcbinfo);
 
 	/* Insert new socket into PCB hash list. */
 	inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 	} else {
 		inp->inp_vflag &= ~INP_IPV6;
 		inp->inp_vflag |= INP_IPV4;
 #endif
 		inp->inp_laddr = sc->sc_inc.inc_laddr;
 #ifdef INET6
 	}
 #endif
 
 	/*
 	 * If there's an mbuf and it has a flowid, then let's initialise the
 	 * inp with that particular flowid.
 	 */
 	if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 
 	/*
 	 * Install in the reservation hash table for now, but don't yet
 	 * install a connection group since the full 4-tuple isn't yet
 	 * configured.
 	 */
 	inp->inp_lport = sc->sc_inc.inc_lport;
 	if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
 		/*
 		 * Undo the assignments above if we failed to
 		 * put the PCB on the hash lists.
 		 */
 #ifdef INET6
 		if (sc->sc_inc.inc_flags & INC_ISIPV6)
 			inp->in6p_laddr = in6addr_any;
 		else
 #endif
 			inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
 			    "with error %i\n",
 			    s, __func__, error);
 			free(s, M_TCPLOG);
 		}
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 		goto abort;
 	}
 #ifdef IPSEC
 	/* Copy old policy into new socket's. */
 	if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
 		printf("syncache_socket: could not copy policy\n");
 #endif
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		struct inpcb *oinp = sotoinpcb(lso);
 		struct in6_addr laddr6;
 		struct sockaddr_in6 sin6;
 		/*
 		 * Inherit socket options from the listening socket.
 		 * Note that in6p_inputopts are not (and should not be)
 		 * copied, since it stores previously received options and is
 		 * used to detect if each new option is different than the
 		 * previous one and hence should be passed to a user.
 		 * If we copied in6p_inputopts, a user would not be able to
 		 * receive options just after calling the accept system call.
 		 */
 		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
 		if (oinp->in6p_outputopts)
 			inp->in6p_outputopts =
 			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
 
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_addr = sc->sc_inc.inc6_faddr;
 		sin6.sin6_port = sc->sc_inc.inc_fport;
 		sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
 		laddr6 = inp->in6p_laddr;
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 			inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 		if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
 		    thread0.td_ucred, m)) != 0) {
 			inp->in6p_laddr = laddr6;
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
 				    "with error %i\n",
 				    s, __func__, error);
 				free(s, M_TCPLOG);
 			}
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto abort;
 		}
 		/* Override flowlabel from in6_pcbconnect. */
 		inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 		inp->inp_flow |= sc->sc_flowlabel;
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		struct in_addr laddr;
 		struct sockaddr_in sin;
 
 		inp->inp_options = (m) ? ip_srcroute(m) : NULL;
 		
 		if (inp->inp_options == NULL) {
 			inp->inp_options = sc->sc_ipopts;
 			sc->sc_ipopts = NULL;
 		}
 
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
 		sin.sin_addr = sc->sc_inc.inc_faddr;
 		sin.sin_port = sc->sc_inc.inc_fport;
 		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
 		laddr = inp->inp_laddr;
 		if (inp->inp_laddr.s_addr == INADDR_ANY)
 			inp->inp_laddr = sc->sc_inc.inc_laddr;
 		if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
 		    thread0.td_ucred, m)) != 0) {
 			inp->inp_laddr = laddr;
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
 				    "with error %i\n",
 				    s, __func__, error);
 				free(s, M_TCPLOG);
 			}
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto abort;
 		}
 	}
 #endif /* INET */
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	tp = intotcpcb(inp);
 	tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	tp->iss = sc->sc_iss;
 	tp->irs = sc->sc_irs;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
 	blk = sototcpcb(lso)->t_fb;
 	if (blk != tp->t_fb) {
 		/*
 		 * Our parents t_fb was not the default,
 		 * we need to release our ref on tp->t_fb and 
 		 * pickup one on the new entry.
 		 */
 		struct tcp_function_block *rblk;
 		
 		rblk = find_and_ref_tcp_fb(blk);
 		KASSERT(rblk != NULL,
 		    ("cannot find blk %p out of syncache?", blk));
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_fb = rblk;
 		if (tp->t_fb->tfb_tcp_fb_init) {
 			(*tp->t_fb->tfb_tcp_fb_init)(tp);
 		}
 	}		
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
 	tp->rcv_up = sc->sc_irs + 1;
 	tp->rcv_wnd = sc->sc_wnd;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_flags & SCF_NOOPT)
 		tp->t_flags |= TF_NOOPT;
 	else {
 		if (sc->sc_flags & SCF_WINSCALE) {
 			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
 			tp->snd_scale = sc->sc_requested_s_scale;
 			tp->request_r_scale = sc->sc_requested_r_scale;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
 			tp->ts_recent = sc->sc_tsreflect;
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_offset = sc->sc_tsoff;
 		}
 #ifdef TCP_SIGNATURE
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tp->t_flags |= TF_SIGNATURE;
 #endif
 		if (sc->sc_flags & SCF_SACK)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
 	if (sc->sc_flags & SCF_ECN)
 		tp->t_flags |= TF_ECN_PERMIT;
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
 	 * This might overwrite some of the defaults we just set.
 	 */
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
 	 * If the SYN,ACK was retransmitted, indicate that CWND to be
 	 * limited to one segment in cc_conn_init().
 	 * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
 	 */
 	if (sc->sc_rxmits > 1)
 		tp->snd_cwnd = 1;
 
 #ifdef TCP_OFFLOAD
 	/*
 	 * Allow a TOE driver to install its hooks.  Note that we hold the
 	 * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
 	 * new connection before the TOE driver has done its thing.
 	 */
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_offload_socket(tod, sc->sc_todctx, so);
 	}
 #endif
 	/*
 	 * Copy and activate timers.
 	 */
 	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
 	tp->t_keepidle = sototcpcb(lso)->t_keepidle;
 	tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
 	tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
 	soisconnected(so);
 
 	TCPSTAT_INC(tcps_accepts);
 	return (so);
 
 abort:
 	INP_WUNLOCK(inp);
 abort2:
 	if (so != NULL)
 		soabort(so);
 	return (NULL);
 }
 
 /*
  * This function gets called when we receive an ACK for a
  * socket in the LISTEN state.  We look up the connection
  * in the syncache, and if its there, we pull it out of
  * the cache and turn it into a full-blown connection in
  * the SYN-RECEIVED state.
  *
  * On syncache_socket() success the newly created socket
  * has its underlying inp locked.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct socket **lsop, struct mbuf *m)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	struct syncache scs;
 	char *s;
 
 	/*
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
 	    ("%s: can handle only ACK", __func__));
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 #ifdef INVARIANTS
 	/*
 	 * Test code for syncookies comparing the syncache stored
 	 * values with the reconstructed values from the cookie.
 	 */
 	if (sc != NULL)
 		syncookie_cmp(inc, sch, sc, th, to, *lsop);
 #endif
 
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
 		 * a returning syncookie.  To do this, first:
 		 *  A. See if this socket has had a syncache entry dropped in
 		 *     the past.  We don't want to accept a bogus syncookie
 		 *     if we've never received a SYN.
 		 *  B. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
 		if (!V_tcp_syncookies) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (syncookies disabled)\n",
 				    s, __func__);
 			goto failed;
 		}
 		bzero(&scs, sizeof(scs));
 		sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
 		SCH_UNLOCK(sch);
 		if (sc == NULL) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
 			goto failed;
 		}
 	} else {
 		/*
 		 * Pull out the entry to unlock the bucket row.
 		 * 
 		 * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not
 		 * tcp_state_change().  The tcpcb is not existent at this
 		 * moment.  A new one will be allocated via syncache_socket->
 		 * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then
 		 * syncache_socket() will change it to TCPS_SYN_RECEIVED.
 		 */
 		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			tod->tod_syncache_removed(tod, sc->sc_todctx);
 		}
 #endif
 		SCH_UNLOCK(sch);
 	}
 
 	/*
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
 	if (th->th_ack != sc->sc_iss + 1) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
 		goto failed;
 	}
 
 	/*
 	 * The SEQ must fall in the window starting at the received
 	 * initial receive sequence number + 1 (the SYN).
 	 */
 	if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
 	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
 		goto failed;
 	}
 
 	/*
 	 * If timestamps were not negotiated during SYN/ACK they
 	 * must not appear on any segment during this session.
 	 */
 	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "segment rejected\n", s, __func__);
 		goto failed;
 	}
 
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session.
 	 * XXXAO: This is only informal as there have been unverified
 	 * reports of non-compliants stacks.
 	 */
 	if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 			s = NULL;
 		}
 	}
 
 	/*
 	 * If timestamps were negotiated the reflected timestamp
 	 * must be equal to what we actually sent in the SYN|ACK.
 	 */
 	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
 			    "segment rejected\n",
 			    s, __func__, to->to_tsecr, sc->sc_ts);
 		goto failed;
 	}
 
 	*lsop = syncache_socket(sc, *lsop, m);
 
 	if (*lsop == NULL)
 		TCPSTAT_INC(tcps_sc_aborted);
 	else
 		TCPSTAT_INC(tcps_sc_completed);
 
 /* how do we find the inp for the new socket? */
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
 failed:
 	if (sc != NULL && sc != &scs)
 		syncache_free(sc);
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	*lsop = NULL;
 	return (0);
 }
 
 #ifdef TCP_RFC7413
 static void
 syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
     uint64_t response_cookie)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	unsigned int *pending_counter;
 
 	/*
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
 	*lsop = syncache_socket(sc, *lsop, m);
 	if (*lsop == NULL) {
 		TCPSTAT_INC(tcps_sc_aborted);
 		atomic_subtract_int(pending_counter, 1);
 	} else {
 		inp = sotoinpcb(*lsop);
 		tp = intotcpcb(inp);
 		tp->t_flags |= TF_FASTOPEN;
 		tp->t_tfo_cookie = response_cookie;
 		tp->snd_max = tp->iss;
 		tp->snd_nxt = tp->iss;
 		tp->t_tfo_pending = pending_counter;
 		TCPSTAT_INC(tcps_sc_completed);
 	}
 }
 #endif /* TCP_RFC7413 */
 
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
  * to the source.
  *
  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
  * Doing so would require that we hold onto the data and deliver it
  * to the application.  However, if we are the target of a SYN-flood
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
  *
  * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
  * cookie is processed, V_tcp_fastopen_enabled set to true, and the
  * TCP_FASTOPEN socket option is set.  In this case, a new socket is created
  * and returned via lsop, the mbuf is not freed so that tcp_input() can
  * queue its data to the socket, and 1 is returned to indicate the
  * TFO-socket-creation path was taken.
  */
 int
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
     void *todctx)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int ltflags;
 	int win, sb_hiwat, ip_ttl, ip_tos;
 	char *s;
 	int rv = 0;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
 #ifdef MAC
 	struct label *maclabel;
 #endif
 	struct syncache scs;
 	struct ucred *cred;
 #ifdef TCP_RFC7413
 	uint64_t tfo_response_cookie;
 	int tfo_cookie_valid = 0;
 	int tfo_response_cookie_valid = 0;
 #endif
 
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
 
 	/*
 	 * Combine all so/tp operations very early to drop the INP lock as
 	 * soon as possible.
 	 */
 	so = *lsop;
 	tp = sototcpcb(so);
 	cred = crhold(so->so_cred);
 
 #ifdef INET6
 	if ((inc->inc_flags & INC_ISIPV6) &&
 	    (inp->inp_flags & IN6P_AUTOFLOWLABEL))
 		autoflowlabel = 1;
 #endif
 	ip_ttl = inp->inp_ip_ttl;
 	ip_tos = inp->inp_ip_tos;
 	win = sbspace(&so->so_rcv);
 	sb_hiwat = so->so_rcv.sb_hiwat;
 	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
 #ifdef TCP_RFC7413
 	if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
 	    (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
 		/*
 		 * Limit the number of pending TFO connections to
 		 * approximately half of the queue limit.  This prevents TFO
 		 * SYN floods from starving the service by filling the
 		 * listen queue with bogus TFO connections.
 		 */
 		if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
 		    (so->so_qlimit / 2)) {
 			int result;
 
 			result = tcp_fastopen_check_cookie(inc,
 			    to->to_tfo_cookie, to->to_tfo_len,
 			    &tfo_response_cookie);
 			tfo_cookie_valid = (result > 0);
 			tfo_response_cookie_valid = (result >= 0);
 		} else
 			atomic_subtract_int(tp->t_tfo_pending, 1);
 	}
 #endif
 
 	/* By the time we drop the lock these should no longer be used. */
 	so = NULL;
 	tp = NULL;
 
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_WUNLOCK(inp);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
 #ifdef TCP_RFC7413
 	if (!tfo_cookie_valid)
 #endif
 		INP_WUNLOCK(inp);
 
 	/*
 	 * Remember the IP options, if any.
 	 */
 #ifdef INET6
 	if (!(inc->inc_flags & INC_ISIPV6))
 #endif
 #ifdef INET
 		ipopts = (m) ? ip_srcroute(m) : NULL;
 #else
 		ipopts = NULL;
 #endif
 
 	/*
 	 * See if we already have an entry for this connection.
 	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
 	 *
 	 * XXX: should the syncache be re-initialized with the contents
 	 * of the new SYN here (which may have different options?)
 	 *
 	 * XXX: We do not check the sequence number to see if this is a
 	 * real retransmit or a new connection attempt.  The question is
 	 * how to handle such a case; either ignore it as spoofed, or
 	 * drop the current entry and create a new one?
 	 */
 	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 #ifdef TCP_RFC7413
 		if (tfo_cookie_valid)
 			INP_WUNLOCK(inp);
 #endif
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {
 			/*
 			 * If we were remembering a previous source route,
 			 * forget it and use the new one we've been given.
 			 */
 			if (sc->sc_ipopts)
 				(void) m_free(sc->sc_ipopts);
 			sc->sc_ipopts = ipopts;
 		}
 		/*
 		 * Update timestamp if present.
 		 */
 		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
 			sc->sc_tsreflect = to->to_tsval;
 		else
 			sc->sc_flags &= ~SCF_TIMESTAMP;
 #ifdef MAC
 		/*
 		 * Since we have already unconditionally allocated label
 		 * storage, free it up.  The syncache entry will already
 		 * have an initialized label we can use.
 		 */
 		mac_syncache_destroy(&maclabel);
 #endif
 		/* Retransmit SYN|ACK and reset retransmit count. */
 		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
 			    "resetting timer and retransmitting SYN|ACK\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		if (syncache_respond(sc, sch, 1) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
 			TCPSTAT_INC(tcps_sndtotal);
 		}
 		SCH_UNLOCK(sch);
 		goto done;
 	}
 
 #ifdef TCP_RFC7413
 	if (tfo_cookie_valid) {
 		bzero(&scs, sizeof(scs));
 		sc = &scs;
 		goto skip_alloc;
 	}
 #endif
 
 	sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
 		 * The zone allocator couldn't provide more entries.
 		 * Treat this as if the cache was full; drop the oldest
 		 * entry and insert the new one.
 		 */
 		TCPSTAT_INC(tcps_sc_zonefail);
 		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
 			syncache_drop(sc, sch);
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (V_tcp_syncookies) {
 				bzero(&scs, sizeof(scs));
 				sc = &scs;
 			} else {
 				SCH_UNLOCK(sch);
 				if (ipopts)
 					(void) m_free(ipopts);
 				goto done;
 			}
 		}
 	}
 
 #ifdef TCP_RFC7413
 skip_alloc:
 	if (!tfo_cookie_valid && tfo_response_cookie_valid)
 		sc->sc_tfo_cookie = &tfo_response_cookie;
 #endif
 
 	/*
 	 * Fill in the syncache values.
 	 */
 #ifdef MAC
 	sc->sc_label = maclabel;
 #endif
 	sc->sc_cred = cred;
 	cred = NULL;
 	sc->sc_ipopts = ipopts;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 #ifdef INET6
 	if (!(inc->inc_flags & INC_ISIPV6))
 #endif
 	{
 		sc->sc_ip_tos = ip_tos;
 		sc->sc_ip_ttl = ip_ttl;
 	}
 #ifdef TCP_OFFLOAD
 	sc->sc_tod = tod;
 	sc->sc_todctx = todctx;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_iss = arc4random();
 	sc->sc_flags = 0;
 	sc->sc_flowlabel = 0;
 
 	/*
 	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
 	 * win was derived from socket earlier in the function.
 	 */
 	win = imax(win, 0);
 	win = imin(win, TCP_MAXWIN);
 	sc->sc_wnd = win;
 
 	if (V_tcp_do_rfc1323) {
 		/*
 		 * A timestamp received in a SYN makes
 		 * it ok to send timestamp requests and replies.
 		 */
 		if (to->to_flags & TOF_TS) {
 			sc->sc_tsreflect = to->to_tsval;
 			sc->sc_ts = tcp_ts_getticks();
 			sc->sc_flags |= SCF_TIMESTAMP;
 		}
 		if (to->to_flags & TOF_SCALE) {
 			int wscale = 0;
 
 			/*
 			 * Pick the smallest possible scaling factor that
 			 * will still allow us to scale up to sb_max, aka
 			 * kern.ipc.maxsockbuf.
 			 *
 			 * We do this because there are broken firewalls that
 			 * will corrupt the window scale option, leading to
 			 * the other endpoint believing that our advertised
 			 * window is unscaled.  At scale factors larger than
 			 * 5 the unscaled window will drop below 1500 bytes,
 			 * leading to serious problems when traversing these
 			 * broken firewalls.
 			 *
 			 * With the default maxsockbuf of 256K, a scale factor
 			 * of 3 will be chosen by this algorithm.  Those who
 			 * choose a larger maxsockbuf should watch out
 			 * for the compatiblity problems mentioned above.
 			 *
 			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
 			 * or <SYN,ACK>) segment itself is never scaled.
 			 */
 			while (wscale < TCP_MAX_WINSHIFT &&
 			    (TCP_MAXWIN << wscale) < sb_max)
 				wscale++;
 			sc->sc_requested_r_scale = wscale;
 			sc->sc_requested_s_scale = to->to_wscale;
 			sc->sc_flags |= SCF_WINSCALE;
 		}
 	}
 #ifdef TCP_SIGNATURE
 	/*
 	 * If listening socket requested TCP digests, OR received SYN
 	 * contains the option, flag this in the syncache so that
 	 * syncache_respond() will do the right thing with the SYN+ACK.
 	 */
 	if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 #endif
 	if (to->to_flags & TOF_SACKPERM)
 		sc->sc_flags |= SCF_SACK;
 	if (to->to_flags & TOF_MSS)
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (ltflags & TF_NOOPT)
 		sc->sc_flags |= SCF_NOOPT;
 	if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
 		sc->sc_flags |= SCF_ECN;
 
 	if (V_tcp_syncookies)
 		sc->sc_iss = syncookie_generate(sch, sc);
 #ifdef INET6
 	if (autoflowlabel) {
 		if (V_tcp_syncookies)
 			sc->sc_flowlabel = sc->sc_iss;
 		else
 			sc->sc_flowlabel = ip6_randomflowlabel();
 		sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
 	}
 #endif
 	SCH_UNLOCK(sch);
 
 #ifdef TCP_RFC7413
 	if (tfo_cookie_valid) {
 		syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
 		/* INP_WUNLOCK(inp) will be performed by the called */
 		rv = 1;
 		goto tfo_done;
 	}
 #endif
 
 	/*
 	 * Do a standard 3-way handshake.
 	 */
 	if (syncache_respond(sc, sch, 0) == 0) {
 		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
 			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		TCPSTAT_INC(tcps_sndacks);
 		TCPSTAT_INC(tcps_sndtotal);
 	} else {
 		if (sc != &scs)
 			syncache_free(sc);
 		TCPSTAT_INC(tcps_sc_dropped);
 	}
 
 done:
 	if (m) {
 		*lsop = NULL;
 		m_freem(m);
 	}
 #ifdef TCP_RFC7413
 tfo_done:
 #endif
 	if (cred != NULL)
 		crfree(cred);
 #ifdef MAC
 	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
 	return (rv);
 }
 
 static int
 syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked)
 {
 	struct ip *ip = NULL;
 	struct mbuf *m;
 	struct tcphdr *th = NULL;
 	int optlen, error = 0;	/* Make compiler happy */
 	u_int16_t hlen, tlen, mssopt;
 	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
 #ifdef TCP_SIGNATURE
 	struct secasvar *sav;
 #endif
 
 	hlen =
 #ifdef INET6
 	       (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) :
 #endif
 		sizeof(struct ip);
 	tlen = hlen + sizeof(struct tcphdr);
 
 	/* Determine MSS we advertize to other end of connection. */
 	mssopt = tcp_mssopt(&sc->sc_inc);
 	if (sc->sc_peer_mss)
 		mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
 
 	/* XXX: Assume that the entire packet will fit in a header mbuf. */
 	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
 	    ("syncache: mbuf too small"));
 
 	/* Create the IP+TCP header from scratch. */
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifdef MAC
 	mac_syncache_create_mbuf(sc->sc_label, m);
 #endif
 	m->m_data += max_linkhdr;
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_src = sc->sc_inc.inc6_laddr;
 		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
 		ip6->ip6_plen = htons(tlen - hlen);
 		/* ip6_hlim is set after checksum */
 		ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
 		ip6->ip6_flow |= sc->sc_flowlabel;
 
 		th = (struct tcphdr *)(ip6 + 1);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = sc->sc_inc.inc_laddr;
 		ip->ip_dst = sc->sc_inc.inc_faddr;
 		ip->ip_ttl = sc->sc_ip_ttl;
 		ip->ip_tos = sc->sc_ip_tos;
 
 		/*
 		 * See if we should do MTU discovery.  Route lookups are
 		 * expensive, so we will only unset the DF bit if:
 		 *
 		 *	1) path_mtu_discovery is disabled
 		 *	2) the SCF_UNREACH flag has been set
 		 */
 		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
 		       ip->ip_off |= htons(IP_DF);
 
 		th = (struct tcphdr *)(ip + 1);
 	}
 #endif /* INET */
 	th->th_sport = sc->sc_inc.inc_lport;
 	th->th_dport = sc->sc_inc.inc_fport;
 
 	th->th_seq = htonl(sc->sc_iss);
 	th->th_ack = htonl(sc->sc_irs + 1);
 	th->th_off = sizeof(struct tcphdr) >> 2;
 	th->th_x2 = 0;
 	th->th_flags = TH_SYN|TH_ACK;
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
 	if (sc->sc_flags & SCF_ECN) {
 		th->th_flags |= TH_ECE;
 		TCPSTAT_INC(tcps_ecn_shs);
 	}
 
 	/* Tack on the TCP options. */
 	if ((sc->sc_flags & SCF_NOOPT) == 0) {
 		to.to_flags = 0;
 
 		to.to_mss = mssopt;
 		to.to_flags = TOF_MSS;
 		if (sc->sc_flags & SCF_WINSCALE) {
 			to.to_wscale = sc->sc_requested_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			/* Virgin timestamp or TCP cookie enhanced one. */
 			to.to_tsval = sc->sc_ts;
 			to.to_tsecr = sc->sc_tsreflect;
 			to.to_flags |= TOF_TS;
 		}
 		if (sc->sc_flags & SCF_SACK)
 			to.to_flags |= TOF_SACKPERM;
 #ifdef TCP_SIGNATURE
 		sav = NULL;
 		if (sc->sc_flags & SCF_SIGNATURE) {
 			sav = tcp_get_sav(m, IPSEC_DIR_OUTBOUND);
 			if (sav != NULL)
 				to.to_flags |= TOF_SIGNATURE;
 			else {
 
 				/*
 				 * We've got SCF_SIGNATURE flag
 				 * inherited from listening socket,
 				 * but no SADB key for given source
 				 * address. Assume signature is not
 				 * required and remove signature flag
 				 * instead of silently dropping
 				 * connection.
 				 */
 				if (locked == 0)
 					SCH_LOCK(sch);
 				sc->sc_flags &= ~SCF_SIGNATURE;
 				if (locked == 0)
 					SCH_UNLOCK(sch);
 			}
 		}
 #endif
 
 #ifdef TCP_RFC7413
 		if (sc->sc_tfo_cookie) {
 			to.to_flags |= TOF_FASTOPEN;
 			to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 			to.to_tfo_cookie = sc->sc_tfo_cookie;
 			/* don't send cookie again when retransmitting response */
 			sc->sc_tfo_cookie = NULL;
 		}
 #endif
 		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 		/* Adjust headers by option size. */
 		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 
 #ifdef TCP_SIGNATURE
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tcp_signature_do_compute(m, 0, optlen,
 			    to.to_signature, sav);
 #endif
 #ifdef INET6
 		if (sc->sc_inc.inc_flags & INC_ISIPV6)
 			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
 		else
 #endif
 			ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 	} else
 		optlen = 0;
 
 	M_SETFIB(m, sc->sc_inc.inc_fibnum);
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
 		    IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(tlen + optlen - hlen + IPPROTO_TCP));
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 #endif
 	return (error);
 }
 
 /*
  * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
  * that exceed the capacity of the syncache by avoiding the storage of any
  * of the SYNs we receive.  Syncookies defend against blind SYN flooding
  * attacks where the attacker does not have access to our responses.
  *
  * Syncookies encode and include all necessary information about the
  * connection setup within the SYN|ACK that we send back.  That way we
  * can avoid keeping any local state until the ACK to our SYN|ACK returns
  * (if ever).  Normally the syncache and syncookies are running in parallel
  * with the latter taking over when the former is exhausted.  When matching
  * syncache entry is found the syncookie is ignored.
  *
  * The only reliable information persisting the 3WHS is our inital sequence
  * number ISS of 32 bits.  Syncookies embed a cryptographically sufficient
  * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
  * of our SYN|ACK.  The MAC can be recomputed when the ACK to our SYN|ACK
  * returns and signifies a legitimate connection if it matches the ACK.
  *
  * The available space of 32 bits to store the hash and to encode the SYN
  * option information is very tight and we should have at least 24 bits for
  * the MAC to keep the number of guesses by blind spoofing reasonably high.
  *
  * SYN option information we have to encode to fully restore a connection:
  * MSS: is imporant to chose an optimal segment size to avoid IP level
  *   fragmentation along the path.  The common MSS values can be encoded
  *   in a 3-bit table.  Uncommon values are captured by the next lower value
  *   in the table leading to a slight increase in packetization overhead.
  * WSCALE: is necessary to allow large windows to be used for high delay-
  *   bandwidth product links.  Not scaling the window when it was initially
  *   negotiated is bad for performance as lack of scaling further decreases
  *   the apparent available send window.  We only need to encode the WSCALE
  *   we received from the remote end.  Our end can be recalculated at any
  *   time.  The common WSCALE values can be encoded in a 3-bit table.
  *   Uncommon values are captured by the next lower value in the table
  *   making us under-estimate the available window size halving our
  *   theoretically possible maximum throughput for that connection.
  * SACK: Greatly assists in packet loss recovery and requires 1 bit.
  * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
  *   that are included in all segments on a connection.  We enable them when
  *   the ACK has them.
  *
  * Security of syncookies and attack vectors:
  *
  * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
  * together with the gloabl secret to make it unique per connection attempt.
  * Thus any change of any of those parameters results in a different MAC output
  * in an unpredictable way unless a collision is encountered.  24 bits of the
  * MAC are embedded into the ISS.
  *
  * To prevent replay attacks two rotating global secrets are updated with a
  * new random value every 15 seconds.  The life-time of a syncookie is thus
  * 15-30 seconds.
  *
  * Vector 1: Attacking the secret.  This requires finding a weakness in the
  * MAC itself or the way it is used here.  The attacker can do a chosen plain
  * text attack by varying and testing the all parameters under his control.
  * The strength depends on the size and randomness of the secret, and the
  * cryptographic security of the MAC function.  Due to the constant updating
  * of the secret the attacker has at most 29.999 seconds to find the secret
  * and launch spoofed connections.  After that he has to start all over again.
  *
  * Vector 2: Collision attack on the MAC of a single ACK.  With a 24 bit MAC
  * size an average of 4,823 attempts are required for a 50% chance of success
  * to spoof a single syncookie (birthday collision paradox).  However the
  * attacker is blind and doesn't know if one of his attempts succeeded unless
  * he has a side channel to interfere success from.  A single connection setup
  * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
  * This many attempts are required for each one blind spoofed connection.  For
  * every additional spoofed connection he has to launch another N attempts.
  * Thus for a sustained rate 100 spoofed connections per second approximately
  * 1,800,000 packets per second would have to be sent.
  *
  * NB: The MAC function should be fast so that it doesn't become a CPU
  * exhaustion attack vector itself.
  *
  * References:
  *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
  *  SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
  *   http://cr.yp.to/syncookies.html    (overview)
  *   http://cr.yp.to/syncookies/archive (details)
  *
  *
  * Schematic construction of a syncookie enabled Initial Sequence Number:
  *  0        1         2         3
  *  12345678901234567890123456789012
  * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
  *
  *  x 24 MAC (truncated)
  *  W  3 Send Window Scale index
  *  M  3 MSS index
  *  S  1 SACK permitted
  *  P  1 Odd/even secret
  */
 
 /*
  * Distribution and probability of certain MSS values.  Those in between are
  * rounded down to the next lower one.
  * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
  *                            .2%  .3%   5%    7%    7%    20%   15%   45%
  */
 static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
 
 /*
  * Distribution and probability of certain WSCALE values.  We have to map the
  * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
  * bits based on prevalence of certain values.  Where we don't have an exact
  * match for are rounded down to the next lower one letting us under-estimate
  * the true available window.  At the moment this would happen only for the
  * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
  * and window size).  The absence of the WSCALE option (no scaling in either
  * direction) is encoded with index zero.
  * [WSCALE values histograms, Allman, 2012]
  *                            X 10 10 35  5  6 14 10%   by host
  *                            X 11  4  5  5 18 49  3%   by connections
  */
 static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
 
 /*
  * Compute the MAC for the SYN cookie.  SIPHASH-2-4 is chosen for its speed
  * and good cryptographic properties.
  */
 static uint32_t
 syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
     uint8_t *secbits, uintptr_t secmod)
 {
 	SIPHASH_CTX ctx;
 	uint32_t siphash[2];
 
 	SipHash24_Init(&ctx);
 	SipHash_SetKey(&ctx, secbits);
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
 		SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
 		SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
 		break;
 #endif
 	}
 	SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
 	SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
 	SipHash_Update(&ctx, &irs, sizeof(irs));
 	SipHash_Update(&ctx, &flags, sizeof(flags));
 	SipHash_Update(&ctx, &secmod, sizeof(secmod));
 	SipHash_Final((u_int8_t *)&siphash, &ctx);
 
 	return (siphash[0] ^ siphash[1]);
 }
 
 static tcp_seq
 syncookie_generate(struct syncache_head *sch, struct syncache *sc)
 {
 	u_int i, mss, secbit, wscale;
 	uint32_t iss, hash;
 	uint8_t *secbits;
 	union syncookie cookie;
 
 	SCH_LOCK_ASSERT(sch);
 
 	cookie.cookie = 0;
 
 	/* Map our computed MSS into the 3-bit index. */
 	mss = min(tcp_mssopt(&sc->sc_inc), max(sc->sc_peer_mss, V_tcp_minmss));
-	for (i = nitems(tcp_sc_msstab) - 1;
-	     tcp_sc_msstab[i] > mss && i > 0;
+	for (i = nitems(tcp_sc_msstab) - 1; tcp_sc_msstab[i] > mss && i > 0;
 	     i--)
 		;
 	cookie.flags.mss_idx = i;
 
 	/*
 	 * Map the send window scale into the 3-bit index but only if
 	 * the wscale option was received.
 	 */
 	if (sc->sc_flags & SCF_WINSCALE) {
 		wscale = sc->sc_requested_s_scale;
 		for (i = nitems(tcp_sc_wstab) - 1;
-		     tcp_sc_wstab[i] > wscale && i > 0;
+		    tcp_sc_wstab[i] > wscale && i > 0;
 		     i--)
 			;
 		cookie.flags.wscale_idx = i;
 	}
 
 	/* Can we do SACK? */
 	if (sc->sc_flags & SCF_SACK)
 		cookie.flags.sack_ok = 1;
 
 	/* Which of the two secrets to use. */
 	secbit = sch->sch_sc->secret.oddeven & 0x1;
 	cookie.flags.odd_even = secbit;
 
 	secbits = sch->sch_sc->secret.key[secbit];
 	hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
 	    (uintptr_t)sch);
 
 	/*
 	 * Put the flags into the hash and XOR them to get better ISS number
 	 * variance.  This doesn't enhance the cryptographic strength and is
 	 * done to prevent the 8 cookie bits from showing up directly on the
 	 * wire.
 	 */
 	iss = hash & ~0xff;
 	iss |= cookie.cookie ^ (hash >> 24);
 
 	/* Randomize the timestamp. */
 	if (sc->sc_flags & SCF_TIMESTAMP) {
 		sc->sc_ts = arc4random();
 		sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
 	}
 
 	TCPSTAT_INC(tcps_sc_sendcookie);
 	return (iss);
 }
 
 static struct syncache *
 syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso)
 {
 	uint32_t hash;
 	uint8_t *secbits;
 	tcp_seq ack, seq;
 	int wnd, wscale = 0;
 	union syncookie cookie;
 
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Pull information out of SYN-ACK/ACK and revert sequence number
 	 * advances.
 	 */
 	ack = th->th_ack - 1;
 	seq = th->th_seq - 1;
 
 	/*
 	 * Unpack the flags containing enough information to restore the
 	 * connection.
 	 */
 	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
 
 	/* Which of the two secrets to use. */
 	secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
 
 	hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
 
 	/* The recomputed hash matches the ACK if this was a genuine cookie. */
 	if ((ack & ~0xff) != (hash & ~0xff))
 		return (NULL);
 
 	/* Fill in the syncache values. */
 	sc->sc_flags = 0;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
 	
 	sc->sc_irs = seq;
 	sc->sc_iss = ack;
 
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
 		sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
 			sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK;
 		break;
 #endif
 	}
 
 	sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
 
 	/* We can simply recompute receive window scale we sent earlier. */
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
 		wscale++;
 
 	/* Only use wscale if it was enabled in the orignal SYN. */
 	if (cookie.flags.wscale_idx > 0) {
 		sc->sc_requested_r_scale = wscale;
 		sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
 		sc->sc_flags |= SCF_WINSCALE;
 	}
 
 	wnd = sbspace(&lso->so_rcv);
 	wnd = imax(wnd, 0);
 	wnd = imin(wnd, TCP_MAXWIN);
 	sc->sc_wnd = wnd;
 
 	if (cookie.flags.sack_ok)
 		sc->sc_flags |= SCF_SACK;
 
 	if (to->to_flags & TOF_TS) {
 		sc->sc_flags |= SCF_TIMESTAMP;
 		sc->sc_tsreflect = to->to_tsval;
 		sc->sc_ts = to->to_tsecr;
 		sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
 	}
 
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 
 	sc->sc_rxmits = 0;
 
 	TCPSTAT_INC(tcps_sc_recvcookie);
 	return (sc);
 }
 
 #ifdef INVARIANTS
 static int
 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso)
 {
 	struct syncache scs, *scx;
 	char *s;
 
 	bzero(&scs, sizeof(scs));
 	scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
 
 	if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
 		return (0);
 
 	if (scx != NULL) {
 		if (sc->sc_peer_mss != scx->sc_peer_mss)
 			log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
 			    s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
 
 		if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
 			log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_r_scale,
 			    scx->sc_requested_r_scale);
 
 		if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
 			log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_s_scale,
 			    scx->sc_requested_s_scale);
 
 		if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
 			log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
 	}
 
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	return (0);
 }
 #endif /* INVARIANTS */
 
 static void
 syncookie_reseed(void *arg)
 {
 	struct tcp_syncache *sc = arg;
 	uint8_t *secbits;
 	int secbit;
 
 	/*
 	 * Reseeding the secret doesn't have to be protected by a lock.
 	 * It only must be ensured that the new random values are visible
 	 * to all CPUs in a SMP environment.  The atomic with release
 	 * semantics ensures that.
 	 */
 	secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
 	secbits = sc->secret.key[secbit];
 	arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
 	atomic_add_rel_int(&sc->secret.oddeven, 1);
 
 	/* Reschedule ourself. */
 	callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
 }
 
 /*
  * Exports the syncache entries to userland so that netstat can display
  * them alongside the other sockets.  This function is intended to be
  * called only from tcp_pcblist.
  *
  * Due to concurrency on an active system, the number of pcbs exported
  * may have no relation to max_pcbs.  max_pcbs merely indicates the
  * amount of space the caller allocated for this function to use.
  */
 int
 syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
 {
 	struct xtcpcb xt;
 	struct syncache *sc;
 	struct syncache_head *sch;
 	int count, error, i;
 
 	for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
 		sch = &V_tcp_syncache.hashbase[i];
 		SCH_LOCK(sch);
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (count >= max_pcbs) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
 				continue;
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof(xt);
 			if (sc->sc_inc.inc_flags & INC_ISIPV6)
 				xt.xt_inp.inp_vflag = INP_IPV6;
 			else
 				xt.xt_inp.inp_vflag = INP_IPV4;
 			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo));
 			xt.xt_tp.t_inpcb = &xt.xt_inp;
 			xt.xt_tp.t_state = TCPS_SYN_RECEIVED;
 			xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			xt.xt_socket.xso_len = sizeof (struct xsocket);
 			xt.xt_socket.so_type = SOCK_STREAM;
 			xt.xt_socket.so_state = SS_ISCONNECTING;
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 			if (error) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			count++;
 		}
 		SCH_UNLOCK(sch);
 	}
 exit:
 	*pcbs_exported = count;
 	return error;
 }
Index: head/sys/netinet6/in6_proto.c
===================================================================
--- head/sys/netinet6/in6_proto.c	(revision 298353)
+++ head/sys/netinet6/in6_proto.c	(revision 298354)
@@ -1,628 +1,627 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_proto.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 #include "opt_sctp.h"
 #include "opt_mpath.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet6/raw_ip6.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/pim6_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef SCTP
 #include <netinet/in_pcb.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_var.h>
 #include <netinet6/sctp6_var.h>
 #endif /* SCTP */
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 
 /*
  * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
  */
 FEATURE(inet6, "Internet Protocol version 6");
 
 extern	struct domain inet6domain;
 static	struct pr_usrreqs nousrreqs;
 
 #define PR_LISTEN	0
 #define PR_ABRTACPTDIS	0
 
 /* Spacer for loadable protocols. */
 #define IP6PROTOSPACER   			\
 {						\
 	.pr_domain =		&inet6domain,	\
 	.pr_protocol =		PROTO_SPACER,	\
 	.pr_usrreqs =		&nousrreqs	\
 }
 
 struct protosw inet6sw[] = {
 {
 	.pr_type =		0,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_init =		ip6_init,
 #ifdef VIMAGE
 	.pr_destroy =		ip6_destroy,
 #endif
 	.pr_slowtimo =		frag6_slowtimo,
 	.pr_drain =		frag6_drain,
 	.pr_usrreqs =		&nousrreqs,
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_UDP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		udp6_input,
 	.pr_ctlinput =		udp6_ctlinput,
 	.pr_ctloutput =		ip6_ctloutput,
 #ifndef INET	/* Do not call initialization twice. */
 	.pr_init =		udp_init,
 #endif
 	.pr_usrreqs =		&udp6_usrreqs,
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN,
 	.pr_input =		tcp6_input,
 	.pr_ctlinput =		tcp6_ctlinput,
 	.pr_ctloutput =		tcp_ctloutput,
 #ifndef INET	/* don't call initialization and timeout routines twice */
 	.pr_init =		tcp_init,
 	.pr_slowtimo =		tcp_slowtimo,
 #endif
 	.pr_drain =		tcp_drain,
 	.pr_usrreqs =		&tcp6_usrreqs,
 },
 #ifdef SCTP
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
 	.pr_input =		sctp6_input,
 	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =	sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 #ifndef INET	/* Do not call initialization twice. */
 	.pr_init =		sctp_init,
 #endif
 	.pr_usrreqs =		&sctp6_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
 	.pr_input =		sctp6_input,
 	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp6_usrreqs
 },
 #endif /* SCTP */
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_UDPLITE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		udp6_input,
 	.pr_ctlinput =		udplite6_ctlinput,
 	.pr_ctloutput =		udp_ctloutput,
 #ifndef INET	/* Do not call initialization twice. */
 	.pr_init =		udplite_init,
 #endif
 	.pr_usrreqs =		&udp6_usrreqs,
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		rip6_input,
 	.pr_output =		rip6_output,
 	.pr_ctlinput =		rip6_ctlinput,
 	.pr_ctloutput =		rip6_ctloutput,
 #ifndef INET	/* Do not call initialization twice. */
 	.pr_init =		rip_init,
 #endif
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ICMPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		icmp6_input,
 	.pr_output =		rip6_output,
 	.pr_ctlinput =		rip6_ctlinput,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_fasttimo =		icmp6_fasttimo,
 	.pr_slowtimo =		icmp6_slowtimo,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_DSTOPTS,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		dest6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ROUTING,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		route6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_FRAGMENT,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		frag6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 #ifdef IPSEC
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_AH,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		ipsec6_common_input,
 	.pr_usrreqs =		&nousrreqs,
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ESP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
         .pr_input =		ipsec6_common_input,
 	.pr_ctlinput =		esp6_ctlinput,
 	.pr_usrreqs =		&nousrreqs,
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPCOMP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
         .pr_input =		ipsec6_common_input,
 	.pr_usrreqs =		&nousrreqs,
 },
 #endif /* IPSEC */
 #ifdef INET
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV4,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_init =		encap_init,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 #endif /* INET */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_init =		encap_init,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_GRE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_init =		encap_init,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 /* Spacer n-times for loadable protocols. */
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 /* raw wildcard */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		rip6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 };
 
 extern int in6_inithead(void **, int);
 #ifdef VIMAGE
 extern int in6_detachhead(void **, int);
 #endif
 
 struct domain inet6domain = {
 	.dom_family =		AF_INET6,
 	.dom_name =		"internet6",
 	.dom_protosw =		(struct protosw *)inet6sw,
-	.dom_protoswNPROTOSW =	(struct protosw *)
-				&inet6sw[nitems(inet6sw)],
+	.dom_protoswNPROTOSW =	(struct protosw *)&inet6sw[nitems(inet6sw)],
 #ifdef RADIX_MPATH
 	.dom_rtattach =		rn6_mpath_inithead,
 #else
 	.dom_rtattach =		in6_inithead,
 #endif
 #ifdef VIMAGE
 	.dom_rtdetach =		in6_detachhead,
 #endif
 	.dom_ifattach =		in6_domifattach,
 	.dom_ifdetach =		in6_domifdetach,
 	.dom_ifmtu    =		in6_domifmtu
 };
 
 VNET_DOMAIN_SET(inet6);
 
 /*
  * Internet configuration info
  */
 #ifndef	IPV6FORWARDING
 #ifdef GATEWAY6
 #define	IPV6FORWARDING	1	/* forward IP6 packets not for us */
 #else
 #define	IPV6FORWARDING	0	/* don't forward IP6 packets not for us */
 #endif /* GATEWAY6 */
 #endif /* !IPV6FORWARDING */
 
 #ifndef	IPV6_SENDREDIRECTS
 #define	IPV6_SENDREDIRECTS	1
 #endif
 
 VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING;	/* act as router? */
 VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS;
 VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM;
 VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS;
 VNET_DEFINE(int, ip6_accept_rtadv) = 0;
 VNET_DEFINE(int, ip6_no_radr) = 0;
 VNET_DEFINE(int, ip6_norbit_raif) = 0;
 VNET_DEFINE(int, ip6_rfc6204w3) = 0;
 VNET_DEFINE(int, ip6_maxfragpackets);	/* initialized in frag6.c:frag6_init() */
 VNET_DEFINE(int, ip6_maxfrags);		/* initialized in frag6.c:frag6_init() */
 VNET_DEFINE(int, ip6_log_interval) = 5;
 VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
 					 * process? */
 VNET_DEFINE(int, ip6_dad_count) = 1;	/* DupAddrDetectionTransmits */
 VNET_DEFINE(int, ip6_auto_flowlabel) = 1;
 VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr
 					 * (RFC2462 5.5.4) */
 VNET_DEFINE(int, ip6_rr_prune) = 5;	/* router renumbering prefix
 					 * walk list every 5 sec. */
 VNET_DEFINE(int, ip6_mcast_pmtu) = 0;	/* enable pMTU discovery for multicast? */
 VNET_DEFINE(int, ip6_v6only) = 1;
 
 VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L;
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ip6stealth) = 0;
 #endif
 VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS
 					     * (RFC 4861) */
 
 /* icmp6 */
 /*
  * BSDI4 defines these variables in in_proto.c...
  * XXX: what if we don't define INET? Should we define pmtu6_expire
  * or so? (jinmei@kame.net 19990310)
  */
 VNET_DEFINE(int, pmtu_expire) = 60*10;
 VNET_DEFINE(int, pmtu_probe) = 60*2;
 
 /* ICMPV6 parameters */
 VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
 VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60;	/* 10 minutes */
 VNET_DEFINE(int, icmp6errppslim) = 100;		/* 100pps */
 /* control how to respond to NI queries */
 VNET_DEFINE(int, icmp6_nodeinfo) =
     (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
 VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
 
 /*
  * sysctl related items.
  */
 SYSCTL_NODE(_net,	PF_INET6,	inet6,	CTLFLAG_RW,	0,
 	"Internet6 Family");
 
 /* net.inet6 */
 SYSCTL_NODE(_net_inet6,	IPPROTO_IPV6,	ip6,	CTLFLAG_RW, 0,	"IP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_ICMPV6,	icmp6,	CTLFLAG_RW, 0,	"ICMP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_UDP,	udp6,	CTLFLAG_RW, 0,	"UDP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_TCP,	tcp6,	CTLFLAG_RW, 0,	"TCP6");
 #ifdef SCTP
 SYSCTL_NODE(_net_inet6,	IPPROTO_SCTP,	sctp6,	CTLFLAG_RW, 0,	"SCTP6");
 #endif
 #ifdef IPSEC
 SYSCTL_NODE(_net_inet6,	IPPROTO_ESP,	ipsec6,	CTLFLAG_RW, 0,	"IPSEC6");
 #endif /* IPSEC */
 
 /* net.inet6.ip6 */
 static int
 sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	int old;
 
 	error = SYSCTL_OUT(req, arg1, sizeof(int));
 	if (error || !req->newptr)
 		return (error);
 	old = V_ip6_temp_preferred_lifetime;
 	error = SYSCTL_IN(req, arg1, sizeof(int));
 	if (V_ip6_temp_preferred_lifetime <
 	    V_ip6_desync_factor + V_ip6_temp_regen_advance) {
 		V_ip6_temp_preferred_lifetime = old;
 		return (EINVAL);
 	}
 	return (error);
 }
 
 static int
 sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	int old;
 
 	error = SYSCTL_OUT(req, arg1, sizeof(int));
 	if (error || !req->newptr)
 		return (error);
 	old = V_ip6_temp_valid_lifetime;
 	error = SYSCTL_IN(req, arg1, sizeof(int));
 	if (V_ip6_temp_valid_lifetime < V_ip6_temp_preferred_lifetime) {
 		V_ip6_temp_preferred_lifetime = old;
 		return (EINVAL);
 	}
 	return (error);
 }
 
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ip6_defhlim), 0, "");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
 	ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
 	"Default value of per-interface flag for accepting ICMPv6 Router"
 	"Advertisement messages");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
 	"Default value of per-interface flag to control whether routers "
 	"sending ICMPv6 RA messages on that interface are added into the "
 	"default router list.");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0,
 	"Always set 0 to R flag in ICMPv6 NA messages when accepting RA"
 	" on the interface.");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
 	"Accept the default router list from ICMPv6 RA messages even "
 	"when packet forwarding enabled.");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, "");
 SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version,
 	CTLFLAG_RD, __KAME_VERSION, 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, "");
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ip6_temp_preferred_lifetime), 0,
    	sysctl_ip6_temppltime, "I", "");
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ip6_temp_valid_lifetime), 0,
    	sysctl_ip6_tempvltime, "I", "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
 	"Default value of per-interface flag for automatically adding an IPv6"
 	" link-local address to interfaces when attached");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
 	struct rip6stat, rip6stat,
 	"Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,"");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0, "");
 #ifdef IPSTEALTH
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ip6stealth), 0, "");
 #endif
 
 /* net.inet6.icmp6 */
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
 	struct icmp6stat, icmp6stat,
 	"ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
 	nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0, 
 	"Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
 	" for compatibility with KAME implememtation.");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
 	nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nd6_onlink_ns_rfc4861), 0,
 	"Accept 'on-link' nd6 NS in compliance with RFC 4861.");