diff --git a/sys/compat/linuxkpi/common/include/linux/module.h b/sys/compat/linuxkpi/common/include/linux/module.h
index 2a4fdc5a11a9..25d775dd8df9 100644
--- a/sys/compat/linuxkpi/common/include/linux/module.h
+++ b/sys/compat/linuxkpi/common/include/linux/module.h
@@ -1,107 +1,108 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #ifndef	_LINUX_MODULE_H_
 #define	_LINUX_MODULE_H_
 
 #include <sys/cdefs.h>
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/module.h>
 
 #include <linux/list.h>
 #include <linux/compiler.h>
 #include <linux/stringify.h>
 #include <linux/kmod.h>
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
 #define MODULE_AUTHOR(name)
 #define MODULE_DESCRIPTION(name)
 #define MODULE_LICENSE(name)
 #define	MODULE_INFO(tag, info)
 #define	MODULE_FIRMWARE(firmware)
 #define	MODULE_SUPPORTED_DEVICE(name)
 
 #define	THIS_MODULE	((struct module *)0)
 
 #define	__MODULE_STRING(x) __stringify(x)
 
 /* OFED pre-module initialization */
 #define	SI_SUB_OFED_PREINIT	(SI_SUB_ROOT_CONF - 2)
 /* OFED default module initialization */
 #define	SI_SUB_OFED_MODINIT	(SI_SUB_ROOT_CONF - 1)
 
 #include <sys/linker.h>
 
 static inline void
 _module_run(void *arg)
 {
 	void (*fn)(void);
 #ifdef OFED_DEBUG_INIT
 	char name[1024];
 	caddr_t pc;
 	long offset;
 
 	pc = (caddr_t)arg;
 	if (linker_search_symbol_name(pc, name, sizeof(name), &offset) != 0)
 		printf("Running ??? (%p)\n", pc);
 	else
 		printf("Running %s (%p)\n", name, pc);
 #endif
 	fn = arg;
 	fn();
 }
 
 #define	module_init(fn)							\
 	SYSINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_FIRST, _module_run, (fn))
 
 #define	module_exit(fn)						\
 	SYSUNINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_SECOND, _module_run, (fn))
 
 /*
  * The following two macros are a workaround for not having a module
  * load and unload order resolver:
  */
 #define	module_init_order(fn, order)					\
 	SYSINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn))
 
 #define	module_exit_order(fn, order)				\
 	SYSUNINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn))
 
 #define	module_get(module)
 #define	module_put(module)
 #define	try_module_get(module)	1
 
 #define	postcore_initcall(fn)	module_init(fn)
 
 #endif	/* _LINUX_MODULE_H_ */
diff --git a/sys/contrib/rdma/krping/krping_dev.c b/sys/contrib/rdma/krping/krping_dev.c
index 01927419fe33..eea3c772ea4f 100644
--- a/sys/contrib/rdma/krping/krping_dev.c
+++ b/sys/contrib/rdma/krping/krping_dev.c
@@ -1,231 +1,231 @@
 /*
  * This code lifted from:
  * 	Simple `echo' pseudo-device KLD
  * 	Murray Stokely
  * 	Converted to 5.X by Søren (Xride) Straarup
  */
 
 /*
  * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping
  * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>  /* defines used in kernel.h and module.h */
 #include <sys/module.h>
 #include <sys/systm.h>  /* uprintf */
 #include <sys/errno.h>
-#include <sys/param.h>  /* defines used in kernel.h */
 #include <sys/kernel.h> /* types used in module initialization */
 #include <sys/conf.h>   /* cdevsw struct */
 #include <sys/uio.h>    /* uio struct */
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <machine/stdarg.h>
 
 #include "krping.h"
 
 #define BUFFERSIZE 512
 
 SYSCTL_NODE(_dev, OID_AUTO, krping, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "kernel rping module");
 
 int krping_debug = 0;
 SYSCTL_INT(_dev_krping, OID_AUTO, debug, CTLFLAG_RW, &krping_debug, 0 , "");
 
 /* Function prototypes */
 static d_open_t      krping_open;
 static d_close_t     krping_close;
 static d_read_t      krping_read;
 static d_write_t     krping_write;
 static d_purge_t     krping_purge;
 
 /* Character device entry points */
 static struct cdevsw krping_cdevsw = {
 	.d_version = D_VERSION,
 	.d_open = krping_open,
 	.d_close = krping_close,
 	.d_read = krping_read,
 	.d_write = krping_write,
 	.d_purge = krping_purge,
 	.d_name = "krping",
 };
 
 typedef struct s_krping {
 	char msg[BUFFERSIZE];
 	int len;
 } krping_t;
 
 struct stats_list_entry {
 	STAILQ_ENTRY(stats_list_entry) link;
 	struct krping_stats *stats;
 };
 STAILQ_HEAD(stats_list, stats_list_entry);
 
 /* vars */
 static struct cdev *krping_dev;
 
 static int
 krping_loader(struct module *m, int what, void *arg)
 {
 	int err = 0;
 
 	switch (what) {
 	case MOD_LOAD:                /* kldload */
 		krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL,
 					0600, "krping");
 		printf("Krping device loaded.\n");
 		break;
 	case MOD_UNLOAD:
 		destroy_dev(krping_dev);
 		printf("Krping device unloaded.\n");
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 
 	return (err);
 }
 
 static int
 krping_open(struct cdev *dev, int oflags, int devtype, struct thread *p)
 {
 
 	return (0);
 }
 
 static int
 krping_close(struct cdev *dev, int fflag, int devtype, struct thread *p)
 {
 
 	return 0;
 }
 
 static void
 krping_copy_stats(struct krping_stats *stats, void *arg)
 {
 	struct stats_list_entry *s;
 	struct stats_list *list = arg;
 
 	s = malloc(sizeof(*s), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (s == NULL)
 		return;
 	if (stats != NULL) {
 		s->stats = malloc(sizeof(*stats), M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (s->stats == NULL) {
 			free(s, M_DEVBUF);
 			return;
 		}
 		*s->stats = *stats;
 	}
 	STAILQ_INSERT_TAIL(list, s, link);
 }
 
 static int
 krping_read(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int num = 1;
 	struct stats_list list;
 	struct stats_list_entry *e;
 
 	STAILQ_INIT(&list);
 	krping_walk_cb_list(krping_copy_stats, &list);
 
 	if (STAILQ_EMPTY(&list))
 		return (0);
 
 	uprintf("krping: %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
 	    "num", "device", "snd bytes", "snd msgs", "rcv bytes", "rcv msgs",
 	    "wr bytes", "wr msgs", "rd bytes", "rd msgs");
 
 	while (!STAILQ_EMPTY(&list)) {
 		e = STAILQ_FIRST(&list);
 		STAILQ_REMOVE_HEAD(&list, link);
 		if (e->stats == NULL)
 			uprintf("krping: %d listen\n", num);
 		else {
 			struct krping_stats *stats = e->stats;
 
 			uprintf("krping: %4d %10s %10llu %10llu %10llu %10llu "
 			    "%10llu %10llu %10llu %10llu\n", num, stats->name,
 			    stats->send_bytes, stats->send_msgs,
 			    stats->recv_bytes, stats->recv_msgs,
 			    stats->write_bytes, stats->write_msgs,
 			    stats->read_bytes, stats->read_msgs);
 			free(stats, M_DEVBUF);
 		}
 		num++;
 		free(e, M_DEVBUF);
 	}
 
 	return (0);
 }
 
 static int
 krping_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int err = 0;
 	int amt;
 	int remain = BUFFERSIZE;
 	char *cp;
 	krping_t *krpingmsg;
 
 	krpingmsg = malloc(sizeof *krpingmsg, M_DEVBUF, M_WAITOK|M_ZERO);
 	if (!krpingmsg) {
 		uprintf("Could not malloc mem!\n");
 		return ENOMEM;
 	}
 
 	cp = krpingmsg->msg;
 	while (uio->uio_resid) {
 		amt = MIN(uio->uio_resid, remain);
 		if (amt == 0)
 			break;
 
 		/* Copy the string in from user memory to kernel memory */
 		err = uiomove(cp, amt, uio);
 		if (err) {
 			uprintf("Write failed: bad address!\n");
 			goto done;
 		}
 		cp += amt;
 		remain -= amt;
 	}
 
 	if (uio->uio_resid != 0) {
 		uprintf("Message too big. max size is %d!\n", BUFFERSIZE);
 		err = EMSGSIZE;
 		goto done;
 	}
 
 	/* null terminate and remove the \n */
 	cp--;
 	*cp = 0;
 	krpingmsg->len = (unsigned long)(cp - krpingmsg->msg);
 	uprintf("krping: write string = |%s|\n", krpingmsg->msg);
 	err = krping_doit(krpingmsg->msg);
 done:
 	free(krpingmsg, M_DEVBUF);
 	return(err);
 }
 
 static void
 krping_purge(struct cdev *dev __unused)
 {
 
 	krping_cancel_all();
 }
 
 int
 krping_sigpending(void)
 {
 
 	return (SIGPENDING(curthread));
 }
 
 DEV_MODULE(krping, krping_loader, NULL);
 MODULE_DEPEND(krping, ibcore, 1, 1, 1);
diff --git a/sys/dev/videomode/videomode.c b/sys/dev/videomode/videomode.c
index a1c7f0a82290..aa576d21b623 100644
--- a/sys/dev/videomode/videomode.c
+++ b/sys/dev/videomode/videomode.c
@@ -1,130 +1,131 @@
 /*	$FreeBSD$	*/
 
 /*
  * THIS FILE AUTOMATICALLY GENERATED.  DO NOT EDIT.
  *
  * generated from:
  *	NetBSD: modelines,v 1.9 2011/03/30 18:45:04 jdc Exp 
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/module.h>
 #include <dev/videomode/videomode.h>
 
 MODULE_VERSION(videomode, 1);
 
 /*
  * These macros help the modelines below fit on one line.
  */
 #define HP VID_PHSYNC
 #define HN VID_NHSYNC
 #define VP VID_PVSYNC
 #define VN VID_NVSYNC
 #define I VID_INTERLACE
 #define DS VID_DBLSCAN
 
 #define M(nm,hr,vr,clk,hs,he,ht,vs,ve,vt,f) \
 	{ clk, hr, hs, he, ht, vr, vs, ve, vt, f, nm } 
 
 const struct videomode videomode_list[] = {
 M("640x350x85",640,350,31500,672,736,832,382,385,445,HP|VN),
 M("640x400x85",640,400,31500,672,736,832,401,404,445,HN|VP),
 M("720x400x70",720,400,28320,738,846,900,412,414,449,HN|VP),
 M("720x400x85",720,400,35500,756,828,936,401,404,446,HN|VP),
 M("720x400x87",720,400,35500,738,846,900,421,423,449,HN|VN),
 M("640x480x60",640,480,25175,656,752,800,490,492,525,HN|VN),
 M("640x480x72",640,480,31500,664,704,832,489,492,520,HN|VN),
 M("640x480x75",640,480,31500,656,720,840,481,484,500,HN|VN),
 M("640x480x85",640,480,36000,696,752,832,481,484,509,HN|VN),
 M("800x600x56",800,600,36000,824,896,1024,601,603,625,HP|VP),
 M("800x600x60",800,600,40000,840,968,1056,601,605,628,HP|VP),
 M("800x600x72",800,600,50000,856,976,1040,637,643,666,HP|VP),
 M("800x600x75",800,600,49500,816,896,1056,601,604,625,HP|VP),
 M("800x600x85",800,600,56250,832,896,1048,601,604,631,HP|VP),
 M("1024x768x87i",1024,768,44900,1032,1208,1264,768,776,817,HP|VP|I),
 M("1024x768x60",1024,768,65000,1048,1184,1344,771,777,806,HN|VN),
 M("1024x768x70",1024,768,75000,1048,1184,1328,771,777,806,HN|VN),
 M("1024x768x75",1024,768,78750,1040,1136,1312,769,772,800,HP|VP),
 M("1024x768x85",1024,768,94500,1072,1168,1376,769,772,808,HP|VP),
 M("1024x768x89",1024,768,100000,1108,1280,1408,768,780,796,HP|VP),
 M("1152x864x75",1152,864,108000,1216,1344,1600,865,868,900,HP|VP),
 M("1280x768x75",1280,768,105640,1312,1712,1744,782,792,807,HN|VP),
 M("1280x960x60",1280,960,108000,1376,1488,1800,961,964,1000,HP|VP),
 M("1280x960x85",1280,960,148500,1344,1504,1728,961,964,1011,HP|VP),
 M("1280x1024x60",1280,1024,108000,1328,1440,1688,1025,1028,1066,HP|VP),
 M("1280x1024x70",1280,1024,126000,1328,1440,1688,1025,1028,1066,HP|VP),
 M("1280x1024x75",1280,1024,135000,1296,1440,1688,1025,1028,1066,HP|VP),
 M("1280x1024x85",1280,1024,157500,1344,1504,1728,1025,1028,1072,HP|VP),
 M("1600x1200x60",1600,1200,162000,1664,1856,2160,1201,1204,1250,HP|VP),
 M("1600x1200x65",1600,1200,175500,1664,1856,2160,1201,1204,1250,HP|VP),
 M("1600x1200x70",1600,1200,189000,1664,1856,2160,1201,1204,1250,HP|VP),
 M("1600x1200x75",1600,1200,202500,1664,1856,2160,1201,1204,1250,HP|VP),
 M("1600x1200x85",1600,1200,229500,1664,1856,2160,1201,1204,1250,HP|VP),
 M("1680x1050x60",1680,1050,147140,1784,1968,2256,1051,1054,1087,HP|VP),
 M("1792x1344x60",1792,1344,204800,1920,2120,2448,1345,1348,1394,HN|VP),
 M("1792x1344x75",1792,1344,261000,1888,2104,2456,1345,1348,1417,HN|VP),
 M("1856x1392x60",1856,1392,218300,1952,2176,2528,1393,1396,1439,HN|VP),
 M("1856x1392x75",1856,1392,288000,1984,2208,2560,1393,1396,1500,HN|VP),
 M("1920x1440x60",1920,1440,234000,2048,2256,2600,1441,1444,1500,HN|VP),
 M("1920x1440x75",1920,1440,297000,2064,2288,2640,1441,1444,1500,HN|VP),
 M("832x624x74",832,624,57284,864,928,1152,625,628,667,HN|VN),
 M("1152x768x54",1152,768,64995,1178,1314,1472,771,777,806,HP|VP),
 M("1400x1050x60",1400,1050,122000,1488,1640,1880,1052,1064,1082,HP|VP),
 M("1400x1050x74",1400,1050,155800,1464,1784,1912,1052,1064,1090,HP|VP),
 M("1152x900x66",1152,900,94500,1192,1320,1528,902,906,937,HN|VN),
 M("1152x900x76",1152,900,105560,1168,1280,1472,902,906,943,HN|VN),
 
 /* Derived Double Scan Modes */
 
 M("320x175x85",320,175,15750,336,368,416,191,192,222,HP|VN|DS),
 M("320x200x85",320,200,15750,336,368,416,200,202,222,HN|VP|DS),
 M("360x200x70",360,200,14160,369,423,450,206,207,224,HN|VP|DS),
 M("360x200x85",360,200,17750,378,414,468,200,202,223,HN|VP|DS),
 M("360x200x87",360,200,17750,369,423,450,210,211,224,HN|VN|DS),
 M("320x240x60",320,240,12587,328,376,400,245,246,262,HN|VN|DS),
 M("320x240x72",320,240,15750,332,352,416,244,246,260,HN|VN|DS),
 M("320x240x75",320,240,15750,328,360,420,240,242,250,HN|VN|DS),
 M("320x240x85",320,240,18000,348,376,416,240,242,254,HN|VN|DS),
 M("400x300x56",400,300,18000,412,448,512,300,301,312,HP|VP|DS),
 M("400x300x60",400,300,20000,420,484,528,300,302,314,HP|VP|DS),
 M("400x300x72",400,300,25000,428,488,520,318,321,333,HP|VP|DS),
 M("400x300x75",400,300,24750,408,448,528,300,302,312,HP|VP|DS),
 M("400x300x85",400,300,28125,416,448,524,300,302,315,HP|VP|DS),
 M("512x384x87i",512,384,22450,516,604,632,384,388,408,HP|VP|DS|I),
 M("512x384x60",512,384,32500,524,592,672,385,388,403,HN|VN|DS),
 M("512x384x70",512,384,37500,524,592,664,385,388,403,HN|VN|DS),
 M("512x384x75",512,384,39375,520,568,656,384,386,400,HP|VP|DS),
 M("512x384x85",512,384,47250,536,584,688,384,386,404,HP|VP|DS),
 M("512x384x89",512,384,50000,554,640,704,384,390,398,HP|VP|DS),
 M("576x432x75",576,432,54000,608,672,800,432,434,450,HP|VP|DS),
 M("640x384x75",640,384,52820,656,856,872,391,396,403,HN|VP|DS),
 M("640x480x60",640,480,54000,688,744,900,480,482,500,HP|VP|DS),
 M("640x480x85",640,480,74250,672,752,864,480,482,505,HP|VP|DS),
 M("640x512x60",640,512,54000,664,720,844,512,514,533,HP|VP|DS),
 M("640x512x70",640,512,63000,664,720,844,512,514,533,HP|VP|DS),
 M("640x512x75",640,512,67500,648,720,844,512,514,533,HP|VP|DS),
 M("640x512x85",640,512,78750,672,752,864,512,514,536,HP|VP|DS),
 M("800x600x60",800,600,81000,832,928,1080,600,602,625,HP|VP|DS),
 M("800x600x65",800,600,87750,832,928,1080,600,602,625,HP|VP|DS),
 M("800x600x70",800,600,94500,832,928,1080,600,602,625,HP|VP|DS),
 M("800x600x75",800,600,101250,832,928,1080,600,602,625,HP|VP|DS),
 M("800x600x85",800,600,114750,832,928,1080,600,602,625,HP|VP|DS),
 M("840x525x60",840,525,73570,892,984,1128,525,527,543,HP|VP|DS),
 M("896x672x60",896,672,102400,960,1060,1224,672,674,697,HN|VP|DS),
 M("896x672x75",896,672,130500,944,1052,1228,672,674,708,HN|VP|DS),
 M("928x696x60",928,696,109150,976,1088,1264,696,698,719,HN|VP|DS),
 M("928x696x75",928,696,144000,992,1104,1280,696,698,750,HN|VP|DS),
 M("960x720x60",960,720,117000,1024,1128,1300,720,722,750,HN|VP|DS),
 M("960x720x75",960,720,148500,1032,1144,1320,720,722,750,HN|VP|DS),
 M("416x312x74",416,312,28642,432,464,576,312,314,333,HN|VN|DS),
 M("576x384x54",576,384,32497,589,657,736,385,388,403,HP|VP|DS),
 M("700x525x60",700,525,61000,744,820,940,526,532,541,HP|VP|DS),
 M("700x525x74",700,525,77900,732,892,956,526,532,545,HP|VP|DS),
 M("576x450x66",576,450,47250,596,660,764,451,453,468,HN|VN|DS),
 M("576x450x76",576,450,52780,584,640,736,451,453,471,HN|VN|DS),
 };
 
 const int videomode_count = 46;
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
index f8807d6d1c26..157c3802ec7e 100644
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -1,601 +1,602 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * Portions of this software were developed by BFF Storage Systems, LLC under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/fcntl.h>
 #include <sys/sysctl.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
 
 #include "fuse.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 
 #include <compat/linux/linux_errno.h>
 #include <compat/linux/linux_errno.inc>
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , device, trace, "int", "char*");
 
 static struct cdev *fuse_dev;
 
 static d_kqfilter_t fuse_device_filter;
 static d_open_t fuse_device_open;
 static d_poll_t fuse_device_poll;
 static d_read_t fuse_device_read;
 static d_write_t fuse_device_write;
 
 static struct cdevsw fuse_device_cdevsw = {
 	.d_kqfilter = fuse_device_filter,
 	.d_open = fuse_device_open,
 	.d_name = "fuse",
 	.d_poll = fuse_device_poll,
 	.d_read = fuse_device_read,
 	.d_write = fuse_device_write,
 	.d_version = D_VERSION,
 };
 
 static int fuse_device_filt_read(struct knote *kn, long hint);
 static int fuse_device_filt_write(struct knote *kn, long hint);
 static void fuse_device_filt_detach(struct knote *kn);
 
 struct filterops fuse_device_rfiltops = {
 	.f_isfd = 1,
 	.f_detach = fuse_device_filt_detach,
 	.f_event = fuse_device_filt_read,
 };
 
 struct filterops fuse_device_wfiltops = {
 	.f_isfd = 1,
 	.f_event = fuse_device_filt_write,
 };
 
 /****************************
  *
  * >>> Fuse device op defs
  *
  ****************************/
 
 static void
 fdata_dtor(void *arg)
 {
 	struct fuse_data *fdata;
 	struct fuse_ticket *tick;
 
 	fdata = arg;
 	if (fdata == NULL)
 		return;
 
 	fdata_set_dead(fdata);
 
 	FUSE_LOCK();
 	fuse_lck_mtx_lock(fdata->aw_mtx);
 	/* wakup poll()ers */
 	selwakeuppri(&fdata->ks_rsel, PZERO + 1);
 	/* Don't let syscall handlers wait in vain */
 	while ((tick = fuse_aw_pop(fdata))) {
 		fuse_lck_mtx_lock(tick->tk_aw_mtx);
 		fticket_set_answered(tick);
 		tick->tk_aw_errno = ENOTCONN;
 		wakeup(tick);
 		fuse_lck_mtx_unlock(tick->tk_aw_mtx);
 		FUSE_ASSERT_AW_DONE(tick);
 		fuse_ticket_drop(tick);
 	}
 	fuse_lck_mtx_unlock(fdata->aw_mtx);
 
 	/* Cleanup unsent operations */
 	fuse_lck_mtx_lock(fdata->ms_mtx);
 	while ((tick = fuse_ms_pop(fdata))) {
 		fuse_ticket_drop(tick);
 	}
 	fuse_lck_mtx_unlock(fdata->ms_mtx);
 	FUSE_UNLOCK();
 
 	fdata_trydestroy(fdata);
 }
 
 static int
 fuse_device_filter(struct cdev *dev, struct knote *kn)
 {
 	struct fuse_data *data;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&data);
 
 	if (error == 0 && kn->kn_filter == EVFILT_READ) {
 		kn->kn_fop = &fuse_device_rfiltops;
 		kn->kn_hook = data;
 		knlist_add(&data->ks_rsel.si_note, kn, 0);
 		error = 0;
 	} else if (error == 0 && kn->kn_filter == EVFILT_WRITE) {
 		kn->kn_fop = &fuse_device_wfiltops;
 		error = 0;
 	} else if (error == 0) {
 		error = EINVAL;
 		kn->kn_data = error;
 	}
 
 	return (error);
 }
 
 static void
 fuse_device_filt_detach(struct knote *kn)
 {
 	struct fuse_data *data;
 
 	data = (struct fuse_data*)kn->kn_hook;
 	MPASS(data != NULL);
 	knlist_remove(&data->ks_rsel.si_note, kn, 0);
 	kn->kn_hook = NULL;
 }
 
 static int
 fuse_device_filt_read(struct knote *kn, long hint)
 {
 	struct fuse_data *data;
 	int ready;
 
 	data = (struct fuse_data*)kn->kn_hook;
 	MPASS(data != NULL);
 
 	mtx_assert(&data->ms_mtx, MA_OWNED);
 	if (fdata_get_dead(data)) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = ENODEV;
 		kn->kn_data = 1;
 		ready = 1;
 	} else if (STAILQ_FIRST(&data->ms_head)) {
 		MPASS(data->ms_count >= 1);
 		kn->kn_data = data->ms_count;
 		ready = 1;
 	} else {
 		ready = 0;
 	}
 
 	return (ready);
 }
 
 static int
 fuse_device_filt_write(struct knote *kn, long hint)
 {
 
 	kn->kn_data = 0;
 
 	/* The device is always ready to write, so we return 1*/
 	return (1);
 }
 
 /*
  * Resources are set up on a per-open basis
  */
 static int
 fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 {
 	struct fuse_data *fdata;
 	int error;
 
 	SDT_PROBE2(fusefs, , device, trace, 1, "device open");
 
 	fdata = fdata_alloc(dev, td->td_ucred);
 	error = devfs_set_cdevpriv(fdata, fdata_dtor);
 	if (error != 0)
 		fdata_trydestroy(fdata);
 	else
 		SDT_PROBE2(fusefs, , device, trace, 1, "device open success");
 	return (error);
 }
 
 int
 fuse_device_poll(struct cdev *dev, int events, struct thread *td)
 {
 	struct fuse_data *data;
 	int error, revents = 0;
 
 	error = devfs_get_cdevpriv((void **)&data);
 	if (error != 0)
 		return (events &
 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
 
 	if (events & (POLLIN | POLLRDNORM)) {
 		fuse_lck_mtx_lock(data->ms_mtx);
 		if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(td, &data->ks_rsel);
 		fuse_lck_mtx_unlock(data->ms_mtx);
 	}
 	if (events & (POLLOUT | POLLWRNORM)) {
 		revents |= events & (POLLOUT | POLLWRNORM);
 	}
 	return (revents);
 }
 
 /*
  * fuse_device_read hangs on the queue of VFS messages.
  * When it's notified that there is a new one, it picks that and
  * passes up to the daemon
  */
 int
 fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int err;
 	struct fuse_data *data;
 	struct fuse_ticket *tick;
 	void *buf;
 	int buflen;
 
 	SDT_PROBE2(fusefs, , device, trace, 1, "fuse device read");
 
 	err = devfs_get_cdevpriv((void **)&data);
 	if (err != 0)
 		return (err);
 
 	fuse_lck_mtx_lock(data->ms_mtx);
 again:
 	if (fdata_get_dead(data)) {
 		SDT_PROBE2(fusefs, , device, trace, 2,
 			"we know early on that reader should be kicked so we "
 			"don't wait for news");
 		fuse_lck_mtx_unlock(data->ms_mtx);
 		return (ENODEV);
 	}
 	if (!(tick = fuse_ms_pop(data))) {
 		/* check if we may block */
 		if (ioflag & O_NONBLOCK) {
 			/* get outa here soon */
 			fuse_lck_mtx_unlock(data->ms_mtx);
 			return (EAGAIN);
 		} else {
 			err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
 			if (err != 0) {
 				fuse_lck_mtx_unlock(data->ms_mtx);
 				return (fdata_get_dead(data) ? ENODEV : err);
 			}
 			tick = fuse_ms_pop(data);
 		}
 	}
 	if (!tick) {
 		/*
 		 * We can get here if fuse daemon suddenly terminates,
 		 * eg, by being hit by a SIGKILL
 		 * -- and some other cases, too, tho not totally clear, when
 		 * (cv_signal/wakeup_one signals the whole process ?)
 		 */
 		SDT_PROBE2(fusefs, , device, trace, 1, "no message on thread");
 		goto again;
 	}
 	fuse_lck_mtx_unlock(data->ms_mtx);
 
 	if (fdata_get_dead(data)) {
 		/*
 		 * somebody somewhere -- eg., umount routine --
 		 * wants this liaison finished off
 		 */
 		SDT_PROBE2(fusefs, , device, trace, 2,
 			"reader is to be sacked");
 		if (tick) {
 			SDT_PROBE2(fusefs, , device, trace, 2, "weird -- "
 				"\"kick\" is set tho there is message");
 			FUSE_ASSERT_MS_DONE(tick);
 			fuse_ticket_drop(tick);
 		}
 		return (ENODEV);	/* This should make the daemon get off
 					 * of us */
 	}
 	SDT_PROBE2(fusefs, , device, trace, 1,
 		"fuse device read message successfully");
 
 	buf = tick->tk_ms_fiov.base;
 	buflen = tick->tk_ms_fiov.len;
 
 	/*
 	 * Why not ban mercilessly stupid daemons who can't keep up
 	 * with us? (There is no much use of a partial read here...)
 	 */
 	/*
 	 * XXX note that in such cases Linux FUSE throws EIO at the
 	 * syscall invoker and stands back to the message queue. The
 	 * rationale should be made clear (and possibly adopt that
 	 * behaviour). Keeping the current scheme at least makes
 	 * fallacy as loud as possible...
 	 */
 	if (uio->uio_resid < buflen) {
 		fdata_set_dead(data);
 		SDT_PROBE2(fusefs, , device, trace, 2,
 		    "daemon is stupid, kick it off...");
 		err = ENODEV;
 	} else {
 		err = uiomove(buf, buflen, uio);
 	}
 
 	FUSE_ASSERT_MS_DONE(tick);
 	fuse_ticket_drop(tick);
 
 	return (err);
 }
 
 static inline int
 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
 {
 	if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) {
 		SDT_PROBE2(fusefs, , device, trace, 1,
 			"Format error: body size "
 			"differs from size claimed by header");
 		return (EINVAL);
 	}
 	if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
 		SDT_PROBE2(fusefs, , device, trace, 1, 
 			"Format error: non zero error but message had a body");
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify,
 	"struct fuse_out_header*");
 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket,
 	"uint64_t");
 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found,
 	"struct fuse_ticket*");
 /*
  * fuse_device_write first reads the header sent by the daemon.
  * If that's OK, looks up ticket/callback node by the unique id seen in header.
  * If the callback node contains a handler function, the uio is passed over
  * that.
  */
 static int
 fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct fuse_out_header ohead;
 	int err = 0;
 	struct fuse_data *data;
 	struct mount *mp;
 	struct fuse_ticket *tick, *itick, *x_tick;
 	int found = 0;
 
 	err = devfs_get_cdevpriv((void **)&data);
 	if (err != 0)
 		return (err);
 	mp = data->mp;
 
 	if (uio->uio_resid < sizeof(struct fuse_out_header)) {
 		SDT_PROBE2(fusefs, , device, trace, 1,
 			"fuse_device_write got less than a header!");
 		fdata_set_dead(data);
 		return (EINVAL);
 	}
 	if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
 		return (err);
 
 	if (data->linux_errnos != 0 && ohead.error != 0) {
 		err = -ohead.error;
 		if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
 			return (EINVAL);
 
 		/* '-', because it will get flipped again below */
 		ohead.error = -linux_to_bsd_errtbl[err];
 	}
 
 	/*
 	 * We check header information (which is redundant) and compare it
 	 * with what we see. If we see some inconsistency we discard the
 	 * whole answer and proceed on as if it had never existed. In
 	 * particular, no pretender will be woken up, regardless the
 	 * "unique" value in the header.
 	 */
 	if ((err = fuse_ohead_audit(&ohead, uio))) {
 		fdata_set_dead(data);
 		return (err);
 	}
 	/* Pass stuff over to callback if there is one installed */
 
 	/* Looking for ticket with the unique id of header */
 	fuse_lck_mtx_lock(data->aw_mtx);
 	TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link,
 	    x_tick) {
 		if (tick->tk_unique == ohead.unique) {
 			SDT_PROBE1(fusefs, , device, fuse_device_write_found,
 				tick);
 			found = 1;
 			fuse_aw_remove(tick);
 			break;
 		}
 	}
 	if (found && tick->irq_unique > 0) {
 		/* 
 		 * Discard the FUSE_INTERRUPT ticket that tried to interrupt
 		 * this operation
 		 */
 		TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link,
 		    x_tick) {
 			if (itick->tk_unique == tick->irq_unique) {
 				fuse_aw_remove(itick);
 				fuse_ticket_drop(itick);
 				break;
 			}
 		}
 		tick->irq_unique = 0;
 	}
 	fuse_lck_mtx_unlock(data->aw_mtx);
 
 	if (found) {
 		if (tick->tk_aw_handler) {
 			/*
 			 * We found a callback with proper handler. In this
 			 * case the out header will be 0wnd by the callback,
 			 * so the fun of freeing that is left for her.
 			 * (Then, by all chance, she'll just get that's done
 			 * via ticket_drop(), so no manual mucking
 			 * around...)
 			 */
 			SDT_PROBE2(fusefs, , device, trace, 1,
 				"pass ticket to a callback");
 			/* Sanitize the linuxism of negative errnos */
 			ohead.error *= -1;
 			memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead));
 			err = tick->tk_aw_handler(tick, uio);
 		} else {
 			/* pretender doesn't wanna do anything with answer */
 			SDT_PROBE2(fusefs, , device, trace, 1,
 				"stuff devalidated, so we drop it");
 		}
 
 		/*
 		 * As aw_mtx was not held during the callback execution the
 		 * ticket may have been inserted again.  However, this is safe
 		 * because fuse_ticket_drop() will deal with refcount anyway.
 		 */
 		fuse_ticket_drop(tick);
 	} else if (ohead.unique == 0){
 		/* unique == 0 means asynchronous notification */
 		SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
 		switch (ohead.error) {
 		case FUSE_NOTIFY_INVAL_ENTRY:
 			err = fuse_internal_invalidate_entry(mp, uio);
 			break;
 		case FUSE_NOTIFY_INVAL_INODE:
 			err = fuse_internal_invalidate_inode(mp, uio);
 			break;
 		case FUSE_NOTIFY_RETRIEVE:
 		case FUSE_NOTIFY_STORE:
 			/*
 			 * Unimplemented.  I don't know of any file systems
 			 * that use them, and the protocol isn't sound anyway,
 			 * since the notification messages don't include the
 			 * inode's generation number.  Without that, it's
 			 * possible to manipulate the cache of the wrong vnode.
 			 * Finally, it's not defined what this message should
 			 * do for a file with dirty cache.
 			 */
 		case FUSE_NOTIFY_POLL:
 			/* Unimplemented.  See comments in fuse_vnops */
 		default:
 			/* Not implemented */
 			err = ENOSYS;
 		}
 	} else {
 		/* no callback at all! */
 		SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket, 
 			ohead.unique);
 		if (ohead.error == -EAGAIN) {
 			/* 
 			 * This was probably a response to a FUSE_INTERRUPT
 			 * operation whose original operation is already
 			 * complete.  We can't store FUSE_INTERRUPT tickets
 			 * indefinitely because their responses are optional.
 			 * So we delete them when the original operation
 			 * completes.  And sadly the fuse_header_out doesn't
 			 * identify the opcode, so we have to guess.
 			 */
 			err = 0;
 		} else {
 			err = EINVAL;
 		}
 	}
 
 	return (err);
 }
 
 int
 fuse_device_init(void)
 {
 
 	fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, "fuse");
 	if (fuse_dev == NULL)
 		return (ENOMEM);
 	return (0);
 }
 
 void
 fuse_device_destroy(void)
 {
 
 	MPASS(fuse_dev != NULL);
 	destroy_dev(fuse_dev);
 }
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 78398a990d7d..f85d17517ee0 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -1,1132 +1,1133 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * Portions of this software were developed by BFF Storage Systems, LLC under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_node.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_io.h"
 
 /* 
  * Set in a struct buf to indicate that the write came from the buffer cache
  * and the originating cred and pid are no longer known.
  */
 #define B_FUSEFS_WRITE_CACHE B_FS_FLAG1
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
 
 static int
 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end);
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid);
 static int 
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
     int ioflag, bool pages);
 static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
 
 /* Invalidate a range of cached data, whether dirty of not */
 static int
 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
 {
 	struct buf *bp;
 	daddr_t left_lbn, end_lbn, right_lbn;
 	off_t new_filesize;
 	int iosize, left_on, right_on, right_blksize;
 
 	iosize = fuse_iosize(vp);
 	left_lbn = start / iosize;
 	end_lbn = howmany(end, iosize);
 	left_on = start & (iosize - 1);
 	if (left_on != 0) {
 		bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0);
 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) {
 			/* 
 			 * Flush the dirty buffer, because we don't have a
 			 * byte-granular way to record which parts of the
 			 * buffer are valid.
 			 */
 			bwrite(bp);
 			if (bp->b_error)
 				return (bp->b_error);
 		} else {
 			brelse(bp);
 		}
 	}
 	right_on = end & (iosize - 1);
 	if (right_on != 0) {
 		right_lbn = end / iosize;
 		new_filesize = MAX(filesize, end);
 		right_blksize = MIN(iosize, new_filesize - iosize * right_lbn);
 		bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0);
 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) {
 			/* 
 			 * Flush the dirty buffer, because we don't have a
 			 * byte-granular way to record which parts of the
 			 * buffer are valid.
 			 */
 			bwrite(bp);
 			if (bp->b_error)
 				return (bp->b_error);
 		} else {
 			brelse(bp);
 		}
 	}
 
 	v_inval_buf_range(vp, left_lbn, end_lbn, iosize);
 	return (0);
 }
 
 SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
 SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
     "struct uio*", "int", "struct ucred*");
 int
 fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, pid_t pid)
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
 	int fflag;
 	bool closefufh = false;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE;
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/* 
 		 * nfsd will do I/O without first doing VOP_OPEN.  We
 		 * must implicitly open the file here
 		 */
 		err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	else if (err) {
 		SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed,
 			vp, uio, ioflag, cred);
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
 	if (err)
 		goto out;
 	SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh);
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		fuse_vnode_update(vp, FN_ATIMECHANGE);
 		if (directio) {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct read of vnode");
 			err = fuse_read_directbackend(vp, uio, cred, fufh);
 		} else {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered read of vnode");
 			err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh,
 				pid);
 		}
 		break;
 	case UIO_WRITE:
 		fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 		if (directio) {
 			off_t start, end, filesize;
 			bool pages = (ioflag & IO_VMIO) != 0;
 
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct write of vnode");
 
 			err = fuse_vnode_size(vp, &filesize, cred, curthread);
 			if (err)
 				goto out;
 
 			start = uio->uio_offset;
 			end = start + uio->uio_resid;
 			if (!pages) {
 				err = fuse_inval_buf_range(vp, filesize, start,
 				    end);
 				if (err)
 					return (err);
 			}
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
 				filesize, ioflag, pages);
 		} else {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered write of vnode");
 			if (!fsess_opt_writeback(vnode_mount(vp)))
 				ioflag |= IO_SYNC;
 			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag,
 				pid);
 		}
 		fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td);
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
 	}
 
 out:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int");
 SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*");
 SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int",
 		"struct buf*");
 static int
 fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid)
 {
 	struct buf *bp;
 	struct mount *mp;
 	struct fuse_data *data;
 	daddr_t lbn, nextlbn;
 	int bcount, nextsize;
 	int err, n = 0, on = 0, seqcount;
 	off_t filesize;
 
 	const int biosize = fuse_iosize(vp);
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		return err;
 
 	for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		if (filesize - uio->uio_offset <= 0)
 			break;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		if ((off_t)lbn * biosize >= filesize) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > filesize) {
 			bcount = filesize - (off_t)lbn *biosize;
 		} else {
 			bcount = biosize;
 		}
 		nextlbn = lbn + 1;
 		nextsize = MIN(biosize, filesize - nextlbn * biosize);
 
 		SDT_PROBE4(fusefs, , io, read_bio_backend_start,
 			biosize, (int)lbn, on, bcount);
 
 		if (bcount < biosize) {
 			/* If near EOF, don't do readahead */
 			err = bread(vp, lbn, bcount, NOCRED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			/* Try clustered read */
 			long totread = uio->uio_resid + on;
 			seqcount = MIN(seqcount,
 				data->max_readahead_blocks + 1);
 			err = cluster_read(vp, filesize, lbn, bcount, NOCRED,
 				totread, seqcount, 0, &bp);
 		} else if (seqcount > 1 && data->max_readahead_blocks >= 1) {
 			/* Try non-clustered readahead */
 			err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1,
 				NOCRED, &bp);
 		} else {
 			/* Just read what was requested */
 			err = bread(vp, lbn, bcount, NOCRED, &bp);
 		}
 
 		if (err) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 	         * on is the offset into the current bp.  Figure out how many
 	         * bytes we can copy out of the bp.  Note that bcount is
 	         * NOT DEV_BSIZE aligned.
 	         *
 	         * Then figure out how many bytes we can copy into the uio.
 	         */
 
 		n = 0;
 		if (on < bcount - bp->b_resid)
 			n = MIN((unsigned)(bcount - bp->b_resid - on),
 			    uio->uio_resid);
 		if (n > 0) {
 			SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
 			err = uiomove(bp->b_data + on, n, uio);
 		}
 		vfs_bio_brelse(bp, ioflag);
 		SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
 			uio->uio_resid, n, bp);
 		if (bp->b_resid > 0) {
 			/* Short read indicates EOF */
 			break;
 		}
 	}
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start,
 	"struct fuse_read_in*");
 SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete,
 	"struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*");
 
 static int
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct fuse_data *data;
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
 	int err = 0;
 
 	data = fuse_get_mpdata(vp->v_mount);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	fdisp_init(&fdi, 0);
 
 	/*
          * XXX In "normal" case we use an intermediate kernel buffer for
          * transmitting data from daemon's context to ours. Eventually, we should
          * get rid of this. Anyway, if the target uio lives in sysspace (we are
          * called from pageops), and the input data doesn't need kernel-side
          * processing (we are not called from readdir) we can already invoke
          * an optimized, "peer-to-peer" I/O routine.
          */
 	while (uio->uio_resid > 0) {
 		fdi.iosize = sizeof(*fri);
 		fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred);
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio->uio_offset;
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 		if (fuse_libabi_geq(data, 7, 9)) {
 			/* See comment regarding FUSE_WRITE_LOCKOWNER */
 			fri->read_flags = 0;
 			fri->flags = fufh_type_2_fflags(fufh->fufh_type);
 		}
 
 		SDT_PROBE1(fusefs, , io, read_directbackend_start, fri);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			goto out;
 
 		SDT_PROBE3(fusefs, , io, read_directbackend_complete,
 			&fdi, fri, uio);
 
 		if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
 			break;
 		if (fdi.iosize < fri->size) {
 			/* 
 			 * Short read.  Should only happen at EOF or with
 			 * direct io.
 			 */
 			break;
 		}
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 static int
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
     int ioflag, bool pages)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_data *data;
 	struct fuse_write_in *fwi;
 	struct fuse_write_out *fwo;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
 	void *fwi_data;
 	off_t as_written_offset;
 	int diff;
 	int err = 0;
 	bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO;
 	bool wrote_anything = false;
 	uint32_t write_flags;
 
 	data = fuse_get_mpdata(vp->v_mount);
 
 	/* 
 	 * Don't set FUSE_WRITE_LOCKOWNER in write_flags.  It can't be set
 	 * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not
 	 * aware of any file systems that do.  It was an attempt to add
 	 * Linux-style mandatory locking to the FUSE protocol, but mandatory
 	 * locking is deprecated even on Linux.  See Linux commit
 	 * f33321141b273d60cbb3a8f56a5489baad82ba5e .
 	 */
 	/*
 	 * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid
 	 * that originated a write.  For example when writing from the
 	 * writeback cache.  I don't know of a single file system that cares,
 	 * but the protocol says we're supposed to do this.
 	 */
 	write_flags = !pages && (
 		(ioflag & IO_DIRECT) ||
 		!fsess_opt_datacache(vnode_mount(vp)) ||
 		!fsess_opt_writeback(vnode_mount(vp))) ? 0 : FUSE_WRITE_CACHE;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, filesize);
 
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	fdisp_init(&fdi, 0);
 
 	while (uio->uio_resid > 0) {
 		size_t sizeof_fwi;
 
 		if (fuse_libabi_geq(data, 7, 9)) {
 			sizeof_fwi = sizeof(*fwi);
 		} else {
 			sizeof_fwi = FUSE_COMPAT_WRITE_IN_SIZE;
 		}
 
 		chunksize = MIN(uio->uio_resid, data->max_write);
 
 		fdi.iosize = sizeof_fwi + chunksize;
 		fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred);
 
 		fwi = fdi.indata;
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
 		fwi->write_flags = write_flags;
 		if (fuse_libabi_geq(data, 7, 9)) {
 			fwi->flags = fufh_type_2_fflags(fufh->fufh_type);
 		}
 		fwi_data = (char *)fdi.indata + sizeof_fwi;
 
 		if ((err = uiomove(fwi_data, chunksize, uio)))
 			break;
 
 retry:
 		err = fdisp_wait_answ(&fdi);
 		if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) {
 			/*
 			 * Rewind the uio so dofilewrite will know it's
 			 * incomplete
 			 */
 			uio->uio_resid += fwi->size;
 			uio->uio_offset -= fwi->size;
 			/* 
 			 * Change ERESTART into EINTR because we can't rewind
 			 * uio->uio_iov.  Basically, once uiomove(9) has been
 			 * called, it's impossible to restart a syscall.
 			 */
 			if (err == ERESTART)
 				err = EINTR;
 			break;
 		} else if (err) {
 			break;
 		} else {
 			wrote_anything = true;
 		}
 
 		fwo = ((struct fuse_write_out *)fdi.answ);
 
 		/* Adjust the uio in the case of short writes */
 		diff = fwi->size - fwo->size;
 		as_written_offset = uio->uio_offset - diff;
 
 		if (as_written_offset - diff > filesize)
 			fuse_vnode_setsize(vp, as_written_offset, false);
 		if (as_written_offset - diff >= filesize)
 			fvdat->flag &= ~FN_SIZECHANGE;
 
 		if (diff < 0) {
 			fuse_warn(data, FSESS_WARN_WROTE_LONG,
 				"wrote more data than we provided it.");
 			err = EINVAL;
 			break;
 		} else if (diff > 0) {
 			/* Short write */
 			if (!direct_io) {
 				fuse_warn(data, FSESS_WARN_SHORT_WRITE,
 					"short writes are only allowed with "
 					"direct_io.");
 			}
 			if (ioflag & IO_DIRECT) {
 				/* Return early */
 				uio->uio_resid += diff;
 				uio->uio_offset -= diff;
 				break;
 			} else {
 				/* Resend the unwritten portion of data */
 				fdi.iosize = sizeof_fwi + diff;
 				/* Refresh fdi without clearing data buffer */
 				fdisp_refresh_vp(&fdi, FUSE_WRITE, vp,
 					uio->uio_td, cred);
 				fwi = fdi.indata;
 				MPASS2(fwi == fdi.indata, "FUSE dispatcher "
 					"reallocated despite no increase in "
 					"size?");
 				void *src = (char*)fwi_data + fwo->size;
 				memmove(fwi_data, src, diff);
 				fwi->fh = fufh->fh_id;
 				fwi->offset = as_written_offset;
 				fwi->size = diff;
 				fwi->write_flags = write_flags;
 				goto retry;
 			}
 		}
 	}
 
 	fdisp_destroy(&fdi);
 
 	if (wrote_anything)
 		fuse_vnode_undirty_cached_timestamps(vp, false);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int",
 		"struct uio*", "int", "bool");
 SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int");
 SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*");
 
 static int
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct buf *bp;
 	daddr_t lbn;
 	off_t filesize;
 	int bcount;
 	int n, on, seqcount, err = 0;
 	bool last_page;
 
 	const int biosize = fuse_iosize(vp);
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 
 	KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode"));
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		return err;
 
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, filesize);
 
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	do {
 		bool direct_append, extending;
 
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 
 again:
 		/* Get or create a buffer for the write */
 		direct_append = uio->uio_offset == filesize && n;
 		if (uio->uio_offset + n < filesize) {
 			extending = false;
 			if ((off_t)(lbn + 1) * biosize < filesize) {
 				/* Not the file's last block */
 				bcount = biosize;
 			} else {
 				/* The file's last block */
 				bcount = filesize - (off_t)lbn * biosize;
 			}
 		} else {
 			extending = true;
 			bcount = on + n;
 		}
 		if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >=
 		    howmany(filesize, PAGE_SIZE))
 			last_page = true;
 		else
 			last_page = false;
 		if (direct_append) {
 			/* 
 			 * Take care to preserve the buffer's B_CACHE state so
 			 * as not to cause an unnecessary read.
 			 */
 			bp = getblk(vp, lbn, on, PCATCH, 0, 0);
 			if (bp != NULL) {
 				uint32_t save = bp->b_flags & B_CACHE;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
 			}
 		} else {
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 		}
 		if (!bp) {
 			err = EINTR;
 			break;
 		}
 		if (extending) {
 			/* 
 			 * Extend file _after_ locking buffer so we won't race
 			 * with other readers
 			 */
 			err = fuse_vnode_setsize(vp, uio->uio_offset + n, false);
 			filesize = uio->uio_offset + n;
 			fvdat->flag |= FN_SIZECHANGE;
 			if (err) {
 				brelse(bp);
 				break;
 			} 
 		}
 
 		SDT_PROBE6(fusefs, , io, write_biobackend_start,
 			lbn, on, n, uio, bcount, direct_append);
 		/*
 	         * Issue a READ if B_CACHE is not set.  In special-append
 	         * mode, B_CACHE is based on the buffer prior to the write
 	         * op and is typically set, avoiding the read.  If a read
 	         * is required in special append mode, the server will
 	         * probably send us a short-read since we extended the file
 	         * on our end, resulting in b_resid == 0 and, thusly,
 	         * B_CACHE getting set.
 	         *
 	         * We can also avoid issuing the read if the write covers
 	         * the entire buffer.  We have to make sure the buffer state
 	         * is reasonable in this case since we will not be initiating
 	         * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 	         * more information.
 	         *
 	         * B_CACHE may also be set due to the buffer being cached
 	         * normally.
 	         */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			fuse_io_strategy(vp, bp);
 			if ((err = bp->b_error)) {
 				brelse(bp);
 				break;
 			}
 			if (bp->b_resid > 0) {
 				/* 
 				 * Short read indicates EOF.  Update file size
 				 * from the server and try again.
 				 */
 				SDT_PROBE2(fusefs, , io, trace, 1,
 					"Short read during a RMW");
 				brelse(bp);
 				err = fuse_vnode_size(vp, &filesize, cred,
 				    curthread);
 				if (err)
 					break;
 				else
 					goto again;
 			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
 
 		/*
 	         * If dirtyend exceeds file size, chop it down.  This should
 	         * not normally occur but there is an append race where it
 	         * might occur XXX, so we log it.
 	         *
 	         * If the chopping creates a reverse-indexed or degenerate
 	         * situation with dirtyoff/end, we 0 both of them.
 	         */
 		if (bp->b_dirtyend > bcount) {
 			SDT_PROBE2(fusefs, , io, write_biobackend_append_race,
 			    (long)bp->b_blkno * biosize,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 	         * If the new write will leave a contiguous dirty
 	         * area, just update the b_dirtyoff and b_dirtyend,
 	         * otherwise force a write rpc of the old dirty area.
 	         *
 	         * While it is possible to merge discontiguous writes due to
 	         * our having a B_CACHE buffer ( and thus valid read data
 	         * for the hole), we don't because it could lead to
 	         * significant cache coherency problems with multiple clients,
 	         * especially if locking is implemented later on.
 	         *
 	         * as an optimization we could theoretically maintain
 	         * a linked list of discontinuous areas, but we would still
 	         * have to commit them separately so there isn't much
 	         * advantage to it except perhaps a bit of asynchronization.
 	         */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			/*
 	                 * Yes, we mean it. Write out everything to "storage"
 	                 * immediately, without hesitation. (Apart from other
 	                 * reasons: the only way to know if a write is valid
 	                 * if its actually written out.)
 	                 */
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp);
 			bwrite(bp);
 			if (bp->b_error == EINTR) {
 				err = EINTR;
 				break;
 			}
 			goto again;
 		}
 		err = uiomove((char *)bp->b_data + on, n, uio);
 
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			brelse(bp);
 			break;
 			/* TODO: vfs_bio_clrbuf like ffs_write does? */
 		}
 		/*
 	         * Only update dirtyoff/dirtyend if not a degenerate
 	         * condition.
 	         */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
 				bp->b_dirtyend = MAX((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_valid(bp, on, n);
 		}
 
 		vfs_bio_set_flags(bp, ioflag);
 
 		bp->b_flags |= B_FUSEFS_WRITE_CACHE;
 		if (ioflag & IO_SYNC) {
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
 			if (!(ioflag & IO_VMIO))
 				bp->b_flags &= ~B_FUSEFS_WRITE_CACHE;
 			err = bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
 			    (ioflag & IO_ASYNC)) {
 			bp->b_flags |= B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp);
 			bawrite(bp);
 		} else if (on == 0 && n == bcount) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
 					4, bp);
 				cluster_write(vp, bp, filesize, seqcount, 0);
 			} else {
 				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
 					5, bp);
 				bawrite(bp);
 			}
 		} else if (ioflag & IO_DIRECT) {
 			bp->b_flags |= B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp);
 			bawrite(bp);
 		} else {
 			bp->b_flags &= ~B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp);
 			bdwrite(bp);
 		}
 		if (err)
 			break;
 	} while (uio->uio_resid > 0 && n > 0);
 
 	return (err);
 }
 
 int
 fuse_io_strategy(struct vnode *vp, struct buf *bp)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh;
 	struct ucred *cred;
 	struct uio *uiop;
 	struct uio uio;
 	struct iovec io;
 	off_t filesize;
 	int error = 0;
 	int fflag;
 	/* We don't know the true pid when we're dealing with the cache */
 	pid_t pid = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 	MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE);
 
 	fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
 	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 	error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (bp->b_iocmd == BIO_READ && error == EBADF) {
 		/* 
 		 * This may be a read-modify-write operation on a cached file
 		 * opened O_WRONLY.  The FUSE protocol allows this.
 		 */
 		error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid);
 	}
 	if (error) {
 		printf("FUSE: strategy: filehandles are closed\n");
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 		bufdone(bp);
 		return (error);
 	}
 
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = curthread;
 
 	/*
          * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
          * do this here so we do not have to do it in all the code that
          * calls us.
          */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("fuse_io_strategy: bp %p already marked done", bp));
 	if (bp->b_iocmd == BIO_READ) {
 		ssize_t left;
 
 		io.iov_len = uiop->uio_resid = bp->b_bcount;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_READ;
 
 		uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
 		/* 
 		 * Store the amount we failed to read in the buffer's private
 		 * field, so callers can truncate the file if necessary'
 		 */
 
 		if (!error && uiop->uio_resid) {
 			int nread = bp->b_bcount - uiop->uio_resid;
 			left = uiop->uio_resid;
 			bzero((char *)bp->b_data + nread, left);
 
 			if ((fvdat->flag & FN_SIZECHANGE) == 0) {
 				/*
 				 * A short read with no error, when not using
 				 * direct io, and when no writes are cached,
 				 * indicates EOF caused by a server-side
 				 * truncation.  Clear the attr cache so we'll
 				 * pick up the new file size and timestamps.
 				 *
 				 * We must still bzero the remaining buffer so
 				 * uninitialized data doesn't get exposed by a
 				 * future truncate that extends the file.
 				 * 
 				 * To prevent lock order problems, we must
 				 * truncate the file upstack, not here.
 				 */
 				SDT_PROBE2(fusefs, , io, trace, 1,
 					"Short read of a clean file");
 				fuse_vnode_clear_attr_cache(vp);
 			} else {
 				/*
 				 * If dirty writes _are_ cached beyond EOF,
 				 * that indicates a newly created hole that the
 				 * server doesn't know about.  Those don't pose
 				 * any problem.
 				 * XXX: we don't currently track whether dirty
 				 * writes are cached beyond EOF, before EOF, or
 				 * both.
 				 */
 				SDT_PROBE2(fusefs, , io, trace, 1,
 					"Short read of a dirty file");
 				uiop->uio_resid = 0;
 			}
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = error;
 		}
 	} else {
 		/*
 	         * Setup for actual write
 	         */
 		/*
 		 * If the file's size is cached, use that value, even if the
 		 * cache is expired.  At this point we're already committed to
 		 * writing something.  If the FUSE server has changed the
 		 * file's size behind our back, it's too late for us to do
 		 * anything about it.  In particular, we can't invalidate any
 		 * part of the file's buffers because VOP_STRATEGY is called
 		 * with them already locked.
 		 */
 		filesize = fvdat->cached_attrs.va_size;
 		/* filesize must've been cached by fuse_vnop_open.  */
 		KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
 
 		if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
 			bp->b_dirtyend = filesize - 
 				(off_t)bp->b_lblkno * biosize;
 
 		if (bp->b_dirtyend > bp->b_dirtyoff) {
 			io.iov_len = uiop->uio_resid = bp->b_dirtyend
 			    - bp->b_dirtyoff;
 			uiop->uio_offset = (off_t)bp->b_lblkno * biosize
 			    + bp->b_dirtyoff;
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;
 
 			bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE;
 			error = fuse_write_directbackend(vp, uiop, cred, fufh,
 				filesize, 0, pages);
 
 			if (error == EINTR || error == ETIMEDOUT) {
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
 				if ((bp->b_flags & B_PAGING) == 0) {
 					bdirty(bp);
 					bp->b_flags &= ~B_DONE;
 				}
 				if ((error == EINTR || error == ETIMEDOUT) &&
 				    (bp->b_flags & B_ASYNC) == 0)
 					bp->b_flags |= B_EINTR;
 			} else {
 				if (error) {
 					bp->b_ioflags |= BIO_ERROR;
 					bp->b_flags |= B_INVAL;
 					bp->b_error = error;
 				}
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 			}
 		} else {
 			bp->b_resid = 0;
 			bufdone(bp);
 			return (0);
 		}
 	}
 	bp->b_resid = uiop->uio_resid;
 	bufdone(bp);
 	return (error);
 }
 
 int
 fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td)
 {
 
 	return (vn_fsync_buf(vp, waitfor));
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 fuse_io_invalbuf(struct vnode *vp, struct thread *td)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int error = 0;
 
 	if (VN_IS_DOOMED(vp))
 		return 0;
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf");
 
 	while (fvdat->flag & FN_FLUSHINPROG) {
 		struct proc *p = td->td_proc;
 
 		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
 			return EIO;
 		fvdat->flag |= FN_FLUSHWANT;
 		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
 		error = 0;
 		if (p != NULL) {
 			PROC_LOCK(p);
 			if (SIGNOTEMPTY(p->p_siglist) ||
 			    SIGNOTEMPTY(td->td_siglist))
 				error = EINTR;
 			PROC_UNLOCK(p);
 		}
 		if (error == EINTR)
 			return EINTR;
 	}
 	fvdat->flag |= FN_FLUSHINPROG;
 
 	if (vp->v_bufobj.bo_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
 		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
 	}
 	error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
 			fvdat->flag &= ~FN_FLUSHINPROG;
 			if (fvdat->flag & FN_FLUSHWANT) {
 				fvdat->flag &= ~FN_FLUSHWANT;
 				wakeup(&fvdat->flag);
 			}
 			return EINTR;
 		}
 		error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	}
 	fvdat->flag &= ~FN_FLUSHINPROG;
 	if (fvdat->flag & FN_FLUSHWANT) {
 		fvdat->flag &= ~FN_FLUSHWANT;
 		wakeup(&fvdat->flag);
 	}
 	return (error);
 }
diff --git a/sys/fs/fuse/fuse_main.c b/sys/fs/fuse/fuse_main.c
index ac15ad960725..824458db72cb 100644
--- a/sys/fs/fuse/fuse_main.c
+++ b/sys/fs/fuse/fuse_main.c
@@ -1,181 +1,182 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * Portions of this software were developed by BFF Storage Systems, LLC under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/file.h>
 #include <sys/buf.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_ipc.h"
 #include "fuse_internal.h"
 #include "fuse_node.h"
 
 static void fuse_bringdown(eventhandler_tag eh_tag);
 static int fuse_loader(struct module *m, int what, void *arg);
 
 struct mtx fuse_mtx;
 
 extern struct vfsops fuse_vfsops;
 extern struct cdevsw fuse_cdevsw;
 extern struct vop_vector fuse_fifonops;
 extern uma_zone_t fuse_pbuf_zone;
 
 static struct vfsconf fuse_vfsconf = {
 	.vfc_version = VFS_VERSION,
 	.vfc_name = "fusefs",
 	.vfc_vfsops = &fuse_vfsops,
 	.vfc_typenum = -1,
 	.vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC
 };
 
 SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "FUSE tunables");
 SYSCTL_NODE(_vfs_fusefs, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "FUSE statistics");
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_major, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, FUSE_KERNEL_VERSION, "FUSE kernel abi major version");
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_minor, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, FUSE_KERNEL_MINOR_VERSION, "FUSE kernel abi minor version");
 SDT_PROVIDER_DEFINE(fusefs);
 
 /******************************
  *
  * >>> Module management stuff
  *
  ******************************/
 
 static void
 fuse_bringdown(eventhandler_tag eh_tag)
 {
 	fuse_node_destroy();
 	fuse_internal_destroy();
 	fuse_file_destroy();
 	fuse_ipc_destroy();
 	fuse_device_destroy();
 	mtx_destroy(&fuse_mtx);
 }
 
 static int
 fuse_loader(struct module *m, int what, void *arg)
 {
 	static eventhandler_tag eh_tag = NULL;
 	int err = 0;
 
 	switch (what) {
 	case MOD_LOAD:			/* kldload */
 		mtx_init(&fuse_mtx, "fuse_mtx", NULL, MTX_DEF);
 		err = fuse_device_init();
 		if (err) {
 			mtx_destroy(&fuse_mtx);
 			return (err);
 		}
 		fuse_ipc_init();
 		fuse_file_init();
 		fuse_internal_init();
 		fuse_node_init();
 		fuse_pbuf_zone = pbuf_zsecond_create("fusepbuf", nswbuf / 2);
 
 		/* vfs_modevent ignores its first arg */
 		if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
 			fuse_bringdown(eh_tag);
 		break;
 	case MOD_UNLOAD:
 		if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
 			return (err);
 		fuse_bringdown(eh_tag);
 		uma_zdestroy(fuse_pbuf_zone);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	return (err);
 }
 
 /* Registering the module */
 
 static moduledata_t fuse_moddata = {
 	"fusefs",
 	fuse_loader,
 	&fuse_vfsconf
 };
 
 DECLARE_MODULE(fusefs, fuse_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE);
 MODULE_VERSION(fusefs, 1);
diff --git a/sys/kern/kern_module.c b/sys/kern/kern_module.c
index 97dca7185319..1798a834dcec 100644
--- a/sys/kern/kern_module.c
+++ b/sys/kern/kern_module.c
@@ -1,519 +1,573 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/sysproto.h>
 #include <sys/sysent.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/reboot.h>
 #include <sys/sx.h>
 #include <sys/module.h>
 #include <sys/linker.h>
 
 static MALLOC_DEFINE(M_MODULE, "module", "module data structures");
 
 struct module {
 	TAILQ_ENTRY(module)	link;	/* chain together all modules */
 	TAILQ_ENTRY(module)	flink;	/* all modules in a file */
 	struct linker_file	*file;	/* file which contains this module */
 	int			refs;	/* reference count */
 	int 			id;	/* unique id number */
 	char 			*name;	/* module name */
 	modeventhand_t 		handler;	/* event handler */
 	void 			*arg;	/* argument for handler */
 	modspecific_t 		data;	/* module specific data */
 };
 
 #define MOD_EVENT(mod, type)	(mod)->handler((mod), (type), (mod)->arg)
 
 static TAILQ_HEAD(modulelist, module) modules;
 struct sx modules_sx;
 static int nextid = 1;
 static void module_shutdown(void *, int);
 
 static int
 modevent_nop(module_t mod, int what, void *arg)
 {
 
 	switch(what) {
 	case MOD_LOAD:
 		return (0);
 	case MOD_UNLOAD:
 		return (EBUSY);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static void
 module_init(void *arg)
 {
 
 	sx_init(&modules_sx, "module subsystem sx lock");
 	TAILQ_INIT(&modules);
 	EVENTHANDLER_REGISTER(shutdown_final, module_shutdown, NULL,
 	    SHUTDOWN_PRI_DEFAULT);
 }
 
 SYSINIT(module, SI_SUB_KLD, SI_ORDER_FIRST, module_init, NULL);
 
 static void
 module_shutdown(void *arg1, int arg2)
 {
 	module_t mod;
 
 	if (arg2 & RB_NOSYNC)
 		return;
 	mtx_lock(&Giant);
 	MOD_SLOCK;
 	TAILQ_FOREACH_REVERSE(mod, &modules, modulelist, link)
 		MOD_EVENT(mod, MOD_SHUTDOWN);
 	MOD_SUNLOCK;
 	mtx_unlock(&Giant);
 }
 
 void
 module_register_init(const void *arg)
 {
 	const moduledata_t *data = (const moduledata_t *)arg;
 	int error;
 	module_t mod;
 
 	mtx_lock(&Giant);
 	MOD_SLOCK;
 	mod = module_lookupbyname(data->name);
 	if (mod == NULL)
 		panic("module_register_init: module named %s not found\n",
 		    data->name);
 	MOD_SUNLOCK;
 	error = MOD_EVENT(mod, MOD_LOAD);
 	if (error) {
 		MOD_EVENT(mod, MOD_UNLOAD);
 		MOD_XLOCK;
 		module_release(mod);
 		MOD_XUNLOCK;
 		printf("module_register_init: MOD_LOAD (%s, %p, %p) error"
 		    " %d\n", data->name, (void *)data->evhand, data->priv,
 		    error); 
 	} else {
 		MOD_XLOCK;
 		if (mod->file) {
 			/*
 			 * Once a module is successfully loaded, move
 			 * it to the head of the module list for this
 			 * linker file.  This resorts the list so that
 			 * when the kernel linker iterates over the
 			 * modules to unload them, it will unload them
 			 * in the reverse order they were loaded.
 			 */
 			TAILQ_REMOVE(&mod->file->modules, mod, flink);
 			TAILQ_INSERT_HEAD(&mod->file->modules, mod, flink);
 		}
 		MOD_XUNLOCK;
 	}
 	mtx_unlock(&Giant);
 }
 
 int
 module_register(const moduledata_t *data, linker_file_t container)
 {
 	size_t namelen;
 	module_t newmod;
 
 	MOD_XLOCK;
 	newmod = module_lookupbyname(data->name);
 	if (newmod != NULL) {
 		MOD_XUNLOCK;
 		printf("%s: cannot register %s from %s; already loaded from %s\n",
 		    __func__, data->name, container->filename, newmod->file->filename);
 		return (EEXIST);
 	}
 	namelen = strlen(data->name) + 1;
 	newmod = malloc(sizeof(struct module) + namelen, M_MODULE, M_WAITOK);
 	newmod->refs = 1;
 	newmod->id = nextid++;
 	newmod->name = (char *)(newmod + 1);
 	strcpy(newmod->name, data->name);
 	newmod->handler = data->evhand ? data->evhand : modevent_nop;
 	newmod->arg = data->priv;
 	bzero(&newmod->data, sizeof(newmod->data));
 	TAILQ_INSERT_TAIL(&modules, newmod, link);
 
 	if (container)
 		TAILQ_INSERT_TAIL(&container->modules, newmod, flink);
 	newmod->file = container;
 	MOD_XUNLOCK;
 	return (0);
 }
 
 void
 module_reference(module_t mod)
 {
 
 	MOD_XLOCK_ASSERT;
 
 	MOD_DPF(REFS, ("module_reference: before, refs=%d\n", mod->refs));
 	mod->refs++;
 }
 
 void
 module_release(module_t mod)
 {
 
 	MOD_XLOCK_ASSERT;
 
 	if (mod->refs <= 0)
 		panic("module_release: bad reference count");
 
 	MOD_DPF(REFS, ("module_release: before, refs=%d\n", mod->refs));
 
 	mod->refs--;
 	if (mod->refs == 0) {
 		TAILQ_REMOVE(&modules, mod, link);
 		if (mod->file)
 			TAILQ_REMOVE(&mod->file->modules, mod, flink);
 		free(mod, M_MODULE);
 	}
 }
 
 module_t
 module_lookupbyname(const char *name)
 {
 	module_t mod;
 	int err;
 
 	MOD_LOCK_ASSERT;
 
 	TAILQ_FOREACH(mod, &modules, link) {
 		err = strcmp(mod->name, name);
 		if (err == 0)
 			return (mod);
 	}
 	return (NULL);
 }
 
 module_t
 module_lookupbyid(int modid)
 {
         module_t mod;
 
         MOD_LOCK_ASSERT;
 
         TAILQ_FOREACH(mod, &modules, link)
                 if (mod->id == modid)
                         return(mod);
         return (NULL);
 }
 
 int
 module_quiesce(module_t mod)
 {
 	int error;
 
 	mtx_lock(&Giant);
 	error = MOD_EVENT(mod, MOD_QUIESCE);
 	mtx_unlock(&Giant);
 	if (error == EOPNOTSUPP || error == EINVAL)
 		error = 0;
 	return (error);
 }
 
 int
 module_unload(module_t mod)
 {
 	int error;
 
 	mtx_lock(&Giant);
 	error = MOD_EVENT(mod, MOD_UNLOAD);
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 int
 module_getid(module_t mod)
 {
 
 	MOD_LOCK_ASSERT;
 	return (mod->id);
 }
 
 module_t
 module_getfnext(module_t mod)
 {
 
 	MOD_LOCK_ASSERT;
 	return (TAILQ_NEXT(mod, flink));
 }
 
 const char *
 module_getname(module_t mod)
 {
 
 	MOD_LOCK_ASSERT;
 	return (mod->name);
 }
 
 void
 module_setspecific(module_t mod, modspecific_t *datap)
 {
 
 	MOD_XLOCK_ASSERT;
 	mod->data = *datap;
 }
 
 linker_file_t
 module_file(module_t mod)
 {
 
 	return (mod->file);
 }
 
 /*
  * Syscalls.
  */
 int
 sys_modnext(struct thread *td, struct modnext_args *uap)
 {
 	module_t mod;
 	int error = 0;
 
 	td->td_retval[0] = -1;
 
 	MOD_SLOCK;
 	if (uap->modid == 0) {
 		mod = TAILQ_FIRST(&modules);
 		if (mod)
 			td->td_retval[0] = mod->id;
 		else
 			error = ENOENT;
 		goto done2;
 	}
 	mod = module_lookupbyid(uap->modid);
 	if (mod == NULL) {
 		error = ENOENT;
 		goto done2;
 	}
 	if (TAILQ_NEXT(mod, link))
 		td->td_retval[0] = TAILQ_NEXT(mod, link)->id;
 	else
 		td->td_retval[0] = 0;
 done2:
 	MOD_SUNLOCK;
 	return (error);
 }
 
 int
 sys_modfnext(struct thread *td, struct modfnext_args *uap)
 {
 	module_t mod;
 	int error;
 
 	td->td_retval[0] = -1;
 
 	MOD_SLOCK;
 	mod = module_lookupbyid(uap->modid);
 	if (mod == NULL) {
 		error = ENOENT;
 	} else {
 		error = 0;
 		if (TAILQ_NEXT(mod, flink))
 			td->td_retval[0] = TAILQ_NEXT(mod, flink)->id;
 		else
 			td->td_retval[0] = 0;
 	}
 	MOD_SUNLOCK;
 	return (error);
 }
 
 struct module_stat_v1 {
 	int	version;		/* set to sizeof(struct module_stat) */
-	char	name[MAXMODNAME];
+	char	name[MAXMODNAMEV1V2];
 	int	refs;
 	int	id;
 };
 
+struct module_stat_v2 {
+	int	version;		/* set to sizeof(struct module_stat) */
+	char	name[MAXMODNAMEV1V2];
+	int	refs;
+	int	id;
+	modspecific_t	data;
+};
+
 int
 sys_modstat(struct thread *td, struct modstat_args *uap)
 {
 	module_t mod;
 	modspecific_t data;
 	int error = 0;
 	int id, namelen, refs, version;
 	struct module_stat *stat;
+	struct module_stat_v2 *stat_v2;
 	char *name;
+	bool is_v1v2;
 
 	MOD_SLOCK;
 	mod = module_lookupbyid(uap->modid);
 	if (mod == NULL) {
 		MOD_SUNLOCK;
 		return (ENOENT);
 	}
 	id = mod->id;
 	refs = mod->refs;
 	name = mod->name;
 	data = mod->data;
 	MOD_SUNLOCK;
 	stat = uap->stat;
 
 	/*
 	 * Check the version of the user's structure.
 	 */
 	if ((error = copyin(&stat->version, &version, sizeof(version))) != 0)
 		return (error);
-	if (version != sizeof(struct module_stat_v1)
-	    && version != sizeof(struct module_stat))
+	is_v1v2 = (version == sizeof(struct module_stat_v1) ||
+	    version == sizeof(struct module_stat_v2));
+	if (!is_v1v2 && version != sizeof(struct module_stat))
 		return (EINVAL);
 	namelen = strlen(mod->name) + 1;
-	if (namelen > MAXMODNAME)
-		namelen = MAXMODNAME;
+	if (is_v1v2 && namelen > MAXMODNAMEV1V2)
+		namelen = MAXMODNAMEV1V2;
+	else if (namelen > MAXMODNAMEV3)
+		namelen = MAXMODNAMEV3;
 	if ((error = copyout(name, &stat->name[0], namelen)) != 0)
 		return (error);
 
-	if ((error = copyout(&refs, &stat->refs, sizeof(int))) != 0)
-		return (error);
-	if ((error = copyout(&id, &stat->id, sizeof(int))) != 0)
-		return (error);
+	/* Extending MAXMODNAME gives an offset change for v3. */
+	if (is_v1v2) {
+		stat_v2 = (struct module_stat_v2 *)stat;
+		if ((error = copyout(&refs, &stat_v2->refs, sizeof(int))) != 0)
+			return (error);
+		if ((error = copyout(&id, &stat_v2->id, sizeof(int))) != 0)
+			return (error);
+	} else {
+		if ((error = copyout(&refs, &stat->refs, sizeof(int))) != 0)
+			return (error);
+		if ((error = copyout(&id, &stat->id, sizeof(int))) != 0)
+			return (error);
+	}
 
 	/*
 	 * >v1 stat includes module data.
 	 */
-	if (version == sizeof(struct module_stat))
+	if (version == sizeof(struct module_stat_v2)) {
+		if ((error = copyout(&data, &stat_v2->data,
+		    sizeof(data))) != 0)
+			return (error);
+	} else if (version == sizeof(struct module_stat)) {
 		if ((error = copyout(&data, &stat->data, 
 		    sizeof(data))) != 0)
 			return (error);
+	}
 	td->td_retval[0] = 0;
 	return (error);
 }
 
 int
 sys_modfind(struct thread *td, struct modfind_args *uap)
 {
 	int error = 0;
-	char name[MAXMODNAME];
+	char name[MAXMODNAMEV3];
 	module_t mod;
 
 	if ((error = copyinstr(uap->name, name, sizeof name, 0)) != 0)
 		return (error);
 
 	MOD_SLOCK;
 	mod = module_lookupbyname(name);
 	if (mod == NULL)
 		error = ENOENT;
 	else
 		td->td_retval[0] = module_getid(mod);
 	MOD_SUNLOCK;
 	return (error);
 }
 
 MODULE_VERSION(kernel, __FreeBSD_version);
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 typedef union modspecific32 {
 	int		intval;
 	uint32_t	uintval;
 	int		longval;
 	uint32_t	ulongval;
 } modspecific32_t;
 
+struct module_stat32_v2 {
+	int		version;
+	char		name[MAXMODNAMEV1V2];
+	int		refs;
+	int		id;
+	modspecific32_t	data;
+};
+
 struct module_stat32 {
 	int		version;
 	char		name[MAXMODNAME];
 	int		refs;
 	int		id;
 	modspecific32_t	data;
 };
 
 int
 freebsd32_modstat(struct thread *td, struct freebsd32_modstat_args *uap)
 {
 	module_t mod;
 	modspecific32_t data32;
 	int error = 0;
 	int id, namelen, refs, version;
 	struct module_stat32 *stat32;
+	struct module_stat32_v2 *stat32_v2;
 	char *name;
+	bool is_v1v2;
 
 	MOD_SLOCK;
 	mod = module_lookupbyid(uap->modid);
 	if (mod == NULL) {
 		MOD_SUNLOCK;
 		return (ENOENT);
 	}
 
 	id = mod->id;
 	refs = mod->refs;
 	name = mod->name;
 	CP(mod->data, data32, intval);
 	CP(mod->data, data32, uintval);
 	CP(mod->data, data32, longval);
 	CP(mod->data, data32, ulongval);
 	MOD_SUNLOCK;
 	stat32 = uap->stat;
 
 	if ((error = copyin(&stat32->version, &version, sizeof(version))) != 0)
 		return (error);
-	if (version != sizeof(struct module_stat_v1)
-	    && version != sizeof(struct module_stat32))
+	is_v1v2 = (version == sizeof(struct module_stat_v1) ||
+	    version == sizeof(struct module_stat32_v2));
+	if (!is_v1v2 && version != sizeof(struct module_stat32))
 		return (EINVAL);
 	namelen = strlen(mod->name) + 1;
-	if (namelen > MAXMODNAME)
-		namelen = MAXMODNAME;
+	if (is_v1v2 && namelen > MAXMODNAMEV1V2)
+		namelen = MAXMODNAMEV1V2;
+	else if (namelen > MAXMODNAMEV3)
+		namelen = MAXMODNAMEV3;
 	if ((error = copyout(name, &stat32->name[0], namelen)) != 0)
 		return (error);
 
-	if ((error = copyout(&refs, &stat32->refs, sizeof(int))) != 0)
-		return (error);
-	if ((error = copyout(&id, &stat32->id, sizeof(int))) != 0)
-		return (error);
+	/* Extending MAXMODNAME gives an offset change for v3. */
+	if (is_v1v2) {
+		stat32_v2 = (struct module_stat32_v2 *)stat32;
+		if ((error = copyout(&refs, &stat32_v2->refs, sizeof(int))) != 0)
+			return (error);
+		if ((error = copyout(&id, &stat32_v2->id, sizeof(int))) != 0)
+			return (error);
+	} else {
+		if ((error = copyout(&refs, &stat32->refs, sizeof(int))) != 0)
+			return (error);
+		if ((error = copyout(&id, &stat32->id, sizeof(int))) != 0)
+			return (error);
+	}
 
 	/*
 	 * >v1 stat includes module data.
 	 */
-	if (version == sizeof(struct module_stat32))
+	if (version == sizeof(struct module_stat32_v2)) {
+		if ((error = copyout(&data32, &stat32_v2->data,
+		    sizeof(data32))) != 0)
+			return (error);
+	} else if (version == sizeof(struct module_stat32)) {
 		if ((error = copyout(&data32, &stat32->data,
 		    sizeof(data32))) != 0)
 			return (error);
+	}
 	td->td_retval[0] = 0;
 	return (error);
 }
 #endif
diff --git a/sys/sys/module.h b/sys/sys/module.h
index 89377df401a8..efefaf4cb513 100644
--- a/sys/sys/module.h
+++ b/sys/sys/module.h
@@ -1,279 +1,281 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_MODULE_H_
 #define _SYS_MODULE_H_
 
 /*
  * Module metadata types
  */
 #define	MDT_DEPEND	1		/* argument is a module name */
 #define	MDT_MODULE	2		/* module declaration */
 #define	MDT_VERSION	3		/* module version(s) */
 #define	MDT_PNP_INFO	4		/* Plug and play hints record */
 
 #define	MDT_STRUCT_VERSION	1	/* version of metadata structure */
 #define	MDT_SETNAME	"modmetadata_set"
 
 typedef enum modeventtype {
 	MOD_LOAD,
 	MOD_UNLOAD,
 	MOD_SHUTDOWN,
 	MOD_QUIESCE
 } modeventtype_t;
 
 typedef struct module *module_t;
 typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
 
 /*
  * Struct for registering modules statically via SYSINIT.
  */
 typedef struct moduledata {
 	const char	*name;		/* module name */
 	modeventhand_t  evhand;		/* event handler */
 	void		*priv;		/* extra data */
 } moduledata_t;
 
 /*
  * A module can use this to report module specific data to the user via
  * kldstat(2).
  */
 typedef union modspecific {
 	int	intval;
 	u_int	uintval;
 	long	longval;
 	u_long	ulongval;
 } modspecific_t;
 
 /*
  * Module dependency declaration
  */
 struct mod_depend {
 	int	md_ver_minimum;
 	int	md_ver_preferred;
 	int	md_ver_maximum;
 };
 
 /*
  * Module version declaration
  */
 struct mod_version {
 	int	mv_version;
 };
 
 struct mod_metadata {
 	int		md_version;	/* structure version MDTV_* */
 	int		md_type;	/* type of entry MDT_* */
 	const void	*md_data;	/* specific data */
 	const char	*md_cval;	/* common string label */
 };
 
 struct mod_pnp_match_info 
 {
 	const char *descr;	/* Description of the table */
 	const char *bus;	/* Name of the bus for this table */
 	const void *table;	/* Pointer to pnp table */
 	int entry_len;		/* Length of each entry in the table (may be */
 				/*   longer than descr describes). */
 	int num_entry;		/* Number of entries in the table */
 };
 #ifdef	_KERNEL
 
 #include <sys/linker_set.h>
 
 #define	MODULE_METADATA_CONCAT(uniquifier)	_mod_metadata##uniquifier
 #define	MODULE_METADATA(uniquifier, type, data, cval)			\
 	static struct mod_metadata MODULE_METADATA_CONCAT(uniquifier) = {	\
 		MDT_STRUCT_VERSION,					\
 		type,							\
 		data,							\
 		cval							\
 	};								\
 	DATA_SET(modmetadata_set, MODULE_METADATA_CONCAT(uniquifier))
 
 #define	MODULE_DEPEND(module, mdepend, vmin, vpref, vmax)		\
 	static struct mod_depend _##module##_depend_on_##mdepend	\
 	    __section(".data") = {					\
 		vmin,							\
 		vpref,							\
 		vmax							\
 	};								\
 	MODULE_METADATA(_md_##module##_on_##mdepend, MDT_DEPEND,	\
 	    &_##module##_depend_on_##mdepend, #mdepend)
 
 /*
  * Every kernel has a 'kernel' module with the version set to
  * __FreeBSD_version.  We embed a MODULE_DEPEND() inside every module
  * that depends on the 'kernel' module.  It uses the current value of
  * __FreeBSD_version as the minimum and preferred versions.  For the
  * maximum version it rounds the version up to the end of its branch
  * (i.e. M99999 for M.x).  This allows a module built on M.x to work
  * on M.y systems where y >= x, but fail on M.z systems where z < x.
  */
 #define	MODULE_KERNEL_MAXVER	(roundup(__FreeBSD_version, 100000) - 1)
 
 #define	DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, maxver)	\
 	MODULE_DEPEND(name, kernel, __FreeBSD_version,			\
 	    __FreeBSD_version, maxver);					\
 	MODULE_METADATA(_md_##name, MDT_MODULE, &data, __XSTRING(name));\
 	SYSINIT(name##module, sub, order, module_register_init, &data);	\
 	struct __hack
 
 #ifdef KLD_TIED
 #define	DECLARE_MODULE(name, data, sub, order)				\
 	DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, __FreeBSD_version)
 #else
 #define	DECLARE_MODULE(name, data, sub, order)							\
 	DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, MODULE_KERNEL_MAXVER)
 #endif
 
 /*
  * The module declared with DECLARE_MODULE_TIED can only be loaded
  * into the kernel with exactly the same __FreeBSD_version.
  *
  * Use it for modules that use kernel interfaces that are not stable
  * even on STABLE/X branches.
  */
 #define	DECLARE_MODULE_TIED(name, data, sub, order)			\
 	DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, __FreeBSD_version)
 
 #define	MODULE_VERSION_CONCAT(module, version)	_##module##_version
 #define	MODULE_VERSION(module, version)					\
 	static struct mod_version MODULE_VERSION_CONCAT(module, version)\
 	    __section(".data") = {					\
 		version							\
 	};								\
 	MODULE_METADATA(MODULE_VERSION_CONCAT(module, version), MDT_VERSION,\
 	    &MODULE_VERSION_CONCAT(module, version), __XSTRING(module))
 
 /**
  * Generic macros to create pnp info hints that modules may export
  * to allow external tools to parse their internal device tables
  * to make an informed guess about what driver(s) to load.
  */
 #define	MODULE_PNP_INFO(d, b, unique, t, n)				\
 	static const struct mod_pnp_match_info _module_pnp_##b##_##unique = {	\
 		.descr = d,						\
 		.bus = #b,						\
 		.table = t,						\
 		.entry_len = sizeof((t)[0]),				\
 		.num_entry = n						\
 	};								\
 	MODULE_METADATA(_md_##b##_pnpinfo_##unique, MDT_PNP_INFO,	\
 	    &_module_pnp_##b##_##unique, #b);
 /**
  * descr is a string that describes each entry in the table. The general
  * form is the grammar (TYPE:pnp_name[/pnp_name];)*
  * where TYPE is one of the following:
  *	U8	uint8_t element
  *	V8	like U8 and 0xff means match any
  *	G16	uint16_t element, any value >= matches
  *	L16	uint16_t element, any value <= matches
  *	M16	uint16_t element, mask of which of the following fields to use.
  *	U16	uint16_t element
  *	V16	like U16 and 0xffff means match any
  *	U32	uint32_t element
  *	V32	like U32 and 0xffffffff means match any
  *	W32	Two 16-bit values with first pnp_name in LSW and second in MSW.
  *	Z	pointer to a string to match exactly
  *	D	pointer to a string to human readable description for device
  *	P	A pointer that should be ignored
  *	E	EISA PNP Identifier (in binary, but bus publishes string)
  *	T	Key for whole table. pnp_name=value. must be last, if present.
  *
  * The pnp_name "#" is reserved for other fields that should be ignored.
  * Otherwise pnp_name must match the name from the parent device's pnpinfo
  * output. The second pnp_name is used for the W32 type.
  */
 
 extern struct sx modules_sx;
 
 #define	MOD_XLOCK	sx_xlock(&modules_sx)
 #define	MOD_SLOCK	sx_slock(&modules_sx)
 #define	MOD_XUNLOCK	sx_xunlock(&modules_sx)
 #define	MOD_SUNLOCK	sx_sunlock(&modules_sx)
 #define	MOD_LOCK_ASSERT	sx_assert(&modules_sx, SX_LOCKED)
 #define	MOD_XLOCK_ASSERT	sx_assert(&modules_sx, SX_XLOCKED)
 
 struct linker_file;
 
 void	module_register_init(const void *);
 int	module_register(const struct moduledata *, struct linker_file *);
 module_t	module_lookupbyname(const char *);
 module_t	module_lookupbyid(int);
 int	module_quiesce(module_t);
 void	module_reference(module_t);
 void	module_release(module_t);
 int	module_unload(module_t);
 int	module_getid(module_t);
 module_t	module_getfnext(module_t);
 const char *	module_getname(module_t);
 void	module_setspecific(module_t, modspecific_t *);
 struct linker_file *module_file(module_t);
 
 #ifdef	MOD_DEBUG
 extern int mod_debug;
 #define	MOD_DEBUG_REFS	1
 
 #define	MOD_DPF(cat, args) do {						\
 	if (mod_debug & MOD_DEBUG_##cat)				\
 		printf args;						\
 } while (0)
 
 #else	/* !MOD_DEBUG */
 
 #define	MOD_DPF(cat, args)
 #endif
 #endif	/* _KERNEL */
 
-#define	MAXMODNAME	32
+#define	MAXMODNAMEV1V2	32
+#define	MAXMODNAMEV3	MAXPATHLEN
+#define	MAXMODNAME	MAXMODNAMEV3
 
 struct module_stat {
 	int		version;	/* set to sizeof(struct module_stat) */
 	char		name[MAXMODNAME];
 	int		refs;
 	int		id;
 	modspecific_t	data;
 };
 
 #ifndef _KERNEL
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	modnext(int _modid);
 int	modfnext(int _modid);
 int	modstat(int _modid, struct module_stat *_stat);
 int	modfind(const char *_name);
 __END_DECLS
 
 #endif
 
 #endif	/* !_SYS_MODULE_H_ */
diff --git a/sys/x86/cpufreq/hwpstate_intel.c b/sys/x86/cpufreq/hwpstate_intel.c
index f6e63fdeb854..e3d17aa0bd1c 100644
--- a/sys/x86/cpufreq/hwpstate_intel.c
+++ b/sys/x86/cpufreq/hwpstate_intel.c
@@ -1,639 +1,640 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018 Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted providing that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/sbuf.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/smp.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include <x86/cpufreq/hwpstate_intel_internal.h>
 
 #include "acpi_if.h"
 #include "cpufreq_if.h"
 
 extern uint64_t	tsc_freq;
 
 static int	intel_hwpstate_probe(device_t dev);
 static int	intel_hwpstate_attach(device_t dev);
 static int	intel_hwpstate_detach(device_t dev);
 static int	intel_hwpstate_suspend(device_t dev);
 static int	intel_hwpstate_resume(device_t dev);
 
 static int      intel_hwpstate_get(device_t dev, struct cf_setting *cf);
 static int      intel_hwpstate_type(device_t dev, int *type);
 
 static device_method_t intel_hwpstate_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	intel_hwpstate_identify),
 	DEVMETHOD(device_probe,		intel_hwpstate_probe),
 	DEVMETHOD(device_attach,	intel_hwpstate_attach),
 	DEVMETHOD(device_detach,	intel_hwpstate_detach),
 	DEVMETHOD(device_suspend,	intel_hwpstate_suspend),
 	DEVMETHOD(device_resume,	intel_hwpstate_resume),
 
 	/* cpufreq interface */
 	DEVMETHOD(cpufreq_drv_get,      intel_hwpstate_get),
 	DEVMETHOD(cpufreq_drv_type,     intel_hwpstate_type),
 
 	DEVMETHOD_END
 };
 
 struct hwp_softc {
 	device_t		dev;
 	bool 			hwp_notifications;
 	bool			hwp_activity_window;
 	bool			hwp_pref_ctrl;
 	bool			hwp_pkg_ctrl;
 	bool			hwp_pkg_ctrl_en;
 	bool			hwp_perf_bias;
 	bool			hwp_perf_bias_cached;
 
 	uint64_t		req; /* Cached copy of HWP_REQUEST */
 	uint64_t		hwp_energy_perf_bias;	/* Cache PERF_BIAS */
 
 	uint8_t			high;
 	uint8_t			guaranteed;
 	uint8_t			efficient;
 	uint8_t			low;
 };
 
 static devclass_t hwpstate_intel_devclass;
 static driver_t hwpstate_intel_driver = {
 	"hwpstate_intel",
 	intel_hwpstate_methods,
 	sizeof(struct hwp_softc),
 };
 
 DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver,
     hwpstate_intel_devclass, NULL, NULL);
 MODULE_VERSION(hwpstate_intel, 1);
 
 static bool hwpstate_pkg_ctrl_enable = true;
 SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN,
     &hwpstate_pkg_ctrl_enable, 0,
     "Set 1 (default) to enable package-level control, 0 to disable");
 
 static int
 intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	device_t dev;
 	struct pcpu *pc;
 	struct sbuf *sb;
 	struct hwp_softc *sc;
 	uint64_t data, data2;
 	int ret;
 
 	sc = (struct hwp_softc *)arg1;
 	dev = sc->dev;
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_putc(sb, '\n');
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	rdmsr_safe(MSR_IA32_PM_ENABLE, &data);
 	sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
 	    ((data & 1) ? "En" : "Dis"));
 
 	if (data == 0) {
 		ret = 0;
 		goto out;
 	}
 
 	rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data);
 	sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff);
 	sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff);
 	sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff);
 	sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff);
 
 	rdmsr_safe(MSR_IA32_HWP_REQUEST, &data);
 	data2 = 0;
 	if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL))
 		rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2);
 
 	sbuf_putc(sb, '\n');
 
 #define pkg_print(x, name, offset) do {					\
 	if (!sc->hwp_pkg_ctrl || (data & x) != 0) 			\
 		sbuf_printf(sb, "\t%s: %03u\n", name,			\
 		    (unsigned)(data >> offset) & 0xff);			\
 	else								\
 		sbuf_printf(sb, "\t%s: %03u\n", name,			\
 		    (unsigned)(data2 >> offset) & 0xff);		\
 } while (0)
 
 	pkg_print(IA32_HWP_REQUEST_EPP_VALID,
 	    "Requested Efficiency Performance Preference", 24);
 	pkg_print(IA32_HWP_REQUEST_DESIRED_VALID,
 	    "Requested Desired Performance", 16);
 	pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID,
 	    "Requested Maximum Performance", 8);
 	pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID,
 	    "Requested Minimum Performance", 0);
 #undef pkg_print
 
 	sbuf_putc(sb, '\n');
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	ret = sbuf_finish(sb);
 	if (ret == 0)
 		ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
 	sbuf_delete(sb);
 
 	return (ret);
 }
 
 static inline int
 percent_to_raw(int x)
 {
 
 	MPASS(x <= 100 && x >= 0);
 	return (0xff * x / 100);
 }
 
 /*
  * Given x * 10 in [0, 1000], round to the integer nearest x.
  *
  * This allows round-tripping nice human readable numbers through this
  * interface.  Otherwise, user-provided percentages such as 25, 50, 75 get
  * rounded down to 24, 49, and 74, which is a bit ugly.
  */
 static inline int
 round10(int xtimes10)
 {
 	return ((xtimes10 + 5) / 10);
 }
 
 static inline int
 raw_to_percent(int x)
 {
 	MPASS(x <= 0xff && x >= 0);
 	return (round10(x * 1000 / 0xff));
 }
 
 /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */
 static inline int
 percent_to_raw_perf_bias(int x)
 {
 	/*
 	 * Round up so that raw values present as nice round human numbers and
 	 * also round-trip to the same raw value.
 	 */
 	MPASS(x <= 100 && x >= 0);
 	return (((0xf * x) + 50) / 100);
 }
 
 static inline int
 raw_to_percent_perf_bias(int x)
 {
 	/* Rounding to nice human numbers despite a step interval of 6.67%. */
 	MPASS(x <= 0xf && x >= 0);
 	return (((x * 20) / 0xf) * 5);
 }
 
 static int
 sysctl_epp_select(SYSCTL_HANDLER_ARGS)
 {
 	struct hwp_softc *sc;
 	device_t dev;
 	struct pcpu *pc;
 	uint64_t epb;
 	uint32_t val;
 	int ret;
 
 	dev = oidp->oid_arg1;
 	sc = device_get_softc(dev);
 	if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias)
 		return (ENODEV);
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	if (sc->hwp_pref_ctrl) {
 		val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
 		val = raw_to_percent(val);
 	} else {
 		/*
 		 * If cpuid indicates EPP is not supported, the HWP controller
 		 * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4).
 		 * This register is per-core (but not HT).
 		 */
 		if (!sc->hwp_perf_bias_cached) {
 			ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb);
 			if (ret)
 				goto out;
 			sc->hwp_energy_perf_bias = epb;
 			sc->hwp_perf_bias_cached = true;
 		}
 		val = sc->hwp_energy_perf_bias &
 		    IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK;
 		val = raw_to_percent_perf_bias(val);
 	}
 
 	MPASS(val >= 0 && val <= 100);
 
 	ret = sysctl_handle_int(oidp, &val, 0, req);
 	if (ret || req->newptr == NULL)
 		goto out;
 
 	if (val > 100) {
 		ret = EINVAL;
 		goto out;
 	}
 
 	if (sc->hwp_pref_ctrl) {
 		val = percent_to_raw(val);
 
 		sc->req =
 		    ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE)
 		    | (val << 24u));
 
 		if (sc->hwp_pkg_ctrl_en)
 			ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
 		else
 			ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
 	} else {
 		val = percent_to_raw_perf_bias(val);
 		MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0);
 
 		sc->hwp_energy_perf_bias =
 		    ((sc->hwp_energy_perf_bias &
 		    ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val);
 		ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
 		    sc->hwp_energy_perf_bias);
 	}
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }
 
 void
 intel_hwpstate_identify(driver_t *driver, device_t parent)
 {
 	if (device_find_child(parent, "hwpstate_intel", -1) != NULL)
 		return;
 
 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
 		return;
 
 	if (resource_disabled("hwpstate_intel", 0))
 		return;
 
 	/*
 	 * Intel SDM 14.4.1 (HWP Programming Interfaces):
 	 *   Availability of HWP baseline resource and capability,
 	 *   CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new
 	 *   architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES,
 	 *   IA32_HWP_REQUEST, IA32_HWP_STATUS.
 	 */
 	if ((cpu_power_eax & CPUTPM1_HWP) == 0)
 		return;
 
 	if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL)
 		return;
 
 	if (bootverbose)
 		device_printf(parent, "hwpstate registered\n");
 }
 
 static int
 intel_hwpstate_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Intel Speed Shift");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 set_autonomous_hwp(struct hwp_softc *sc)
 {
 	struct pcpu *pc;
 	device_t dev;
 	uint64_t caps;
 	int ret;
 
 	dev = sc->dev;
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	/* XXX: Many MSRs aren't readable until feature is enabled */
 	ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
 	if (ret) {
 		/*
 		 * This is actually a package-level MSR, and only the first
 		 * write is not ignored.  So it is harmless to enable it across
 		 * all devices, and this allows us not to care especially in
 		 * which order cores (and packages) are probed.  This error
 		 * condition should not happen given we gate on the HWP CPUID
 		 * feature flag, if the Intel SDM is correct.
 		 */
 		device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
 	ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to read HWP request MSR for cpu%d (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
 	ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to read HWP capabilities MSR for cpu%d (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
 	/*
 	 * High and low are static; "guaranteed" is dynamic; and efficient is
 	 * also dynamic.
 	 */
 	sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps);
 	sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps);
 	sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps);
 	sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps);
 
 	/* hardware autonomous selection determines the performance target */
 	sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE;
 
 	/* enable HW dynamic selection of window size */
 	sc->req &= ~IA32_HWP_ACTIVITY_WINDOW;
 
 	/* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */
 	sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE;
 	sc->req |= sc->low;
 
 	/* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */
 	sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE;
 	sc->req |= sc->high << 8;
 
 	/* If supported, request package-level control for this CPU. */
 	if (sc->hwp_pkg_ctrl_en)
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
 		    IA32_HWP_REQUEST_PACKAGE_CONTROL);
 	else
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to setup%s autonomous HWP for cpu%d\n",
 		    sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
 		goto out;
 	}
 
 	/* If supported, write the PKG-wide control MSR. */
 	if (sc->hwp_pkg_ctrl_en) {
 		/*
 		 * "The structure of the IA32_HWP_REQUEST_PKG MSR
 		 * (package-level) is identical to the IA32_HWP_REQUEST MSR
 		 * with the exception of the Package Control field, which does
 		 * not exist." (Intel SDM §14.4.4)
 		 */
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
 		if (ret) {
 			device_printf(dev,
 			    "Failed to set autonomous HWP for package\n");
 		}
 	}
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }
 
 static int
 intel_hwpstate_attach(device_t dev)
 {
 	struct hwp_softc *sc;
 	int ret;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/* eax */
 	if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION)
 		sc->hwp_notifications = true;
 	if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW)
 		sc->hwp_activity_window = true;
 	if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF)
 		sc->hwp_pref_ctrl = true;
 	if (cpu_power_eax & CPUTPM1_HWP_PKG)
 		sc->hwp_pkg_ctrl = true;
 
 	/* Allow administrators to disable pkg-level control. */
 	sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable);
 
 	/* ecx */
 	if (cpu_power_ecx & CPUID_PERF_BIAS)
 		sc->hwp_perf_bias = true;
 
 	ret = set_autonomous_hwp(sc);
 	if (ret)
 		return (ret);
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev),
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT,
 	    sc, 0, intel_hwp_dump_sysctl_handler, "A", "");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
 	    "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, dev, 0,
 	    sysctl_epp_select, "I",
 	    "Efficiency/Performance Preference "
 	    "(range from 0, most performant, through 100, most efficient)");
 
 	return (cpufreq_register(dev));
 }
 
 static int
 intel_hwpstate_detach(device_t dev)
 {
 
 	return (cpufreq_unregister(dev));
 }
 
 static int
 intel_hwpstate_get(device_t dev, struct cf_setting *set)
 {
 	struct pcpu *pc;
 	uint64_t rate;
 	int ret;
 
 	if (set == NULL)
 		return (EINVAL);
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set));
 	set->dev = dev;
 
 	ret = cpu_est_clockrate(pc->pc_cpuid, &rate);
 	if (ret == 0)
 		set->freq = rate / 1000000;
 
 	set->volts = CPUFREQ_VAL_UNKNOWN;
 	set->power = CPUFREQ_VAL_UNKNOWN;
 	set->lat = CPUFREQ_VAL_UNKNOWN;
 
 	return (0);
 }
 
 static int
 intel_hwpstate_type(device_t dev, int *type)
 {
 	if (type == NULL)
 		return (EINVAL);
 	*type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED;
 
 	return (0);
 }
 
 static int
 intel_hwpstate_suspend(device_t dev)
 {
 	return (0);
 }
 
 /*
  * Redo a subset of set_autonomous_hwp on resume; untested.  Without this,
  * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus.
  */
 static int
 intel_hwpstate_resume(device_t dev)
 {
 	struct hwp_softc *sc;
 	struct pcpu *pc;
 	int ret;
 
 	sc = device_get_softc(dev);
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to enable HWP for cpu%d after suspend (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
 	if (sc->hwp_pkg_ctrl_en)
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
 		    IA32_HWP_REQUEST_PACKAGE_CONTROL);
 	else
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to set%s autonomous HWP for cpu%d after suspend\n",
 		    sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
 		goto out;
 	}
 	if (sc->hwp_pkg_ctrl_en) {
 		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
 		if (ret) {
 			device_printf(dev,
 			    "Failed to set autonomous HWP for package after "
 			    "suspend\n");
 			goto out;
 		}
 	}
 	if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) {
 		ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
 		    sc->hwp_energy_perf_bias);
 		if (ret) {
 			device_printf(dev,
 			    "Failed to set energy perf bias for cpu%d after "
 			    "suspend\n", pc->pc_cpuid);
 		}
 	}
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }