diff --git a/sys/compat/linuxkpi/common/include/linux/gpf.h b/sys/compat/linuxkpi/common/include/linux/gpf.h
new file mode 100644
index 000000000000..01e883a94728
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/linux/gpf.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Serenity Cyber Security, LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUXKPI_LINUX_GPF_H_
+#define	_LINUXKPI_LINUX_GPF_H_
+
+#include <linux/mmzone.h>
+
+#endif /* _LINUXKPI_LINUX_GPF_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/idr.h b/sys/compat/linuxkpi/common/include/linux/idr.h
index ca3f8171ff44..7f55b8e57c7e 100644
--- a/sys/compat/linuxkpi/common/include/linux/idr.h
+++ b/sys/compat/linuxkpi/common/include/linux/idr.h
@@ -1,160 +1,161 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef	_LINUXKPI_LINUX_IDR_H_
 #define	_LINUXKPI_LINUX_IDR_H_
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/limits.h>
 #include <sys/mutex.h>
 
+#include <linux/gpf.h>
 #include <linux/types.h>
 
 #define	IDR_BITS	5
 #define	IDR_SIZE	(1 << IDR_BITS)
 #define	IDR_MASK	(IDR_SIZE - 1)
 
 #define	MAX_ID_SHIFT	((sizeof(int) * NBBY) - 1)
 #define	MAX_ID_BIT	(1U << MAX_ID_SHIFT)
 #define	MAX_ID_MASK	(MAX_ID_BIT - 1)
 #define	MAX_LEVEL	(MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
 
 #define MAX_IDR_SHIFT (sizeof(int)*8 - 1)
 #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
 #define MAX_IDR_MASK (MAX_IDR_BIT - 1)
 
 struct idr_layer {
 	unsigned long		bitmap;
 	struct idr_layer	*ary[IDR_SIZE];
 };
 
 struct idr {
 	struct mtx		lock;
 	struct idr_layer	*top;
 	struct idr_layer	*free;
 	int			layers;
 	int			next_cyclic_id;
 };
 
 /* NOTE: It is the applications responsibility to destroy the IDR */
 #define	DEFINE_IDR(name)						\
 	struct idr name;						\
 	SYSINIT(name##_idr_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST,	\
 	    idr_init, &(name))
 
 /* NOTE: It is the applications responsibility to destroy the IDA */
 #define	DEFINE_IDA(name)						\
 	struct ida name;						\
 	SYSINIT(name##_ida_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST,	\
 	    ida_init, &(name))
 
 void	idr_preload(gfp_t gfp_mask);
 void	idr_preload_end(void);
 void	*idr_find(struct idr *idp, int id);
 void	*idr_get_next(struct idr *idp, int *nextid);
 bool	idr_is_empty(struct idr *idp);
 int	idr_pre_get(struct idr *idp, gfp_t gfp_mask);
 int	idr_get_new(struct idr *idp, void *ptr, int *id);
 int	idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
 void	*idr_replace(struct idr *idp, void *ptr, int id);
 void	*idr_remove(struct idr *idp, int id);
 void	idr_remove_all(struct idr *idp);
 void	idr_destroy(struct idr *idp);
 void	idr_init(struct idr *idp);
 int	idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t);
 int	idr_alloc_cyclic(struct idr *idp, void *ptr, int start, int end, gfp_t);
 int	idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data);
 
 #define	idr_for_each_entry(idp, entry, id)	\
 	for ((id) = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++(id))
 
 #define	IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
 #define	IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long) - 1)
 #define	IDA_BITMAP_BITS		(IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
 	long			nr_busy;
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
 struct ida {
 	struct idr		idr;
 	struct ida_bitmap	*free_bitmap;
 };
 
 int	ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int	ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void	ida_remove(struct ida *ida, int id);
 void	ida_destroy(struct ida *ida);
 void	ida_init(struct ida *ida);
 
 int	ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
     gfp_t gfp_mask);
 void	ida_simple_remove(struct ida *ida, unsigned int id);
 
 static inline void
 ida_free(struct ida *ida, int id)
 {
 
 	ida_remove(ida, id);
 }
 
 static inline int
 ida_get_new(struct ida *ida, int *p_id)
 {
 
 	return (ida_get_new_above(ida, 0, p_id));
 }
 
 static inline int
 ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
 {
 	return (ida_simple_get(ida, min, UINT_MAX, gfp));
 }
 
 static inline int
 ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
 {
 
 	return (ida_simple_get(ida, 0, max, gfp));
 }
 
 static inline int ida_alloc(struct ida *ida, gfp_t gfp)
 {
 	return (ida_alloc_max(ida, ~0u, gfp));
 }
 
 static inline bool
 ida_is_empty(struct ida *ida)
 {
 
 	return (idr_is_empty(&ida->idr));
 }
 
 #endif	/* _LINUXKPI_LINUX_IDR_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/kernel.h b/sys/compat/linuxkpi/common/include/linux/kernel.h
index fd5f42fbe36c..43938cdf73de 100644
--- a/sys/compat/linuxkpi/common/include/linux/kernel.h
+++ b/sys/compat/linuxkpi/common/include/linux/kernel.h
@@ -1,386 +1,385 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
  * Copyright (c) 2014-2015 François Tigeot
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef	_LINUXKPI_LINUX_KERNEL_H_
 #define	_LINUXKPI_LINUX_KERNEL_H_
 
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/libkern.h>
 #include <sys/stat.h>
 #include <sys/smp.h>
 #include <sys/stddef.h>
 #include <sys/syslog.h>
 #include <sys/time.h>
 
 #include <linux/bitops.h>
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
 #include <linux/container_of.h>
 #include <linux/kstrtox.h>
 #include <linux/limits.h>
 #include <linux/math.h>
 #include <linux/minmax.h>
 #include <linux/stringify.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/typecheck.h>
 #include <linux/jiffies.h>
 #include <linux/log2.h>
 #include <linux/kconfig.h>
 
 #include <asm/byteorder.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 
 #include <linux/stdarg.h>
 
 #define KERN_CONT       ""
 #define	KERN_EMERG	"<0>"
 #define	KERN_ALERT	"<1>"
 #define	KERN_CRIT	"<2>"
 #define	KERN_ERR	"<3>"
 #define	KERN_WARNING	"<4>"
 #define	KERN_NOTICE	"<5>"
 #define	KERN_INFO	"<6>"
 #define	KERN_DEBUG	"<7>"
 
 #define	S8_C(x)  x
 #define	U8_C(x)  x ## U
 #define	S16_C(x) x
 #define	U16_C(x) x ## U
 #define	S32_C(x) x
 #define	U32_C(x) x ## U
 #define	S64_C(x) x ## LL
 #define	U64_C(x) x ## ULL
 
 #define	BUG()			panic("BUG at %s:%d", __FILE__, __LINE__)
 #define	BUG_ON(cond)		do {				\
 	if (cond) {						\
 		panic("BUG ON %s failed at %s:%d",		\
 		    __stringify(cond), __FILE__, __LINE__);	\
 	}							\
 } while (0)
 
 extern int linuxkpi_warn_dump_stack;
 #define	WARN_ON(cond) ({					\
 	bool __ret = (cond);					\
 	if (__ret) {						\
 		printf("WARNING %s failed at %s:%d\n",		\
 		    __stringify(cond), __FILE__, __LINE__);	\
 		if (linuxkpi_warn_dump_stack)				\
 			linux_dump_stack();				\
 	}								\
 	unlikely(__ret);						\
 })
 
 #define	WARN_ON_SMP(cond)	WARN_ON(cond)
 
 #define	WARN_ON_ONCE(cond) ({					\
 	static bool __warn_on_once;				\
 	bool __ret = (cond);					\
 	if (__ret && !__warn_on_once) {				\
 		__warn_on_once = 1;				\
 		printf("WARNING %s failed at %s:%d\n",		\
 		    __stringify(cond), __FILE__, __LINE__);	\
 		if (linuxkpi_warn_dump_stack)				\
 			linux_dump_stack();				\
 	}								\
 	unlikely(__ret);						\
 })
 
 #define	oops_in_progress	SCHEDULER_STOPPED()
 
 #undef	ALIGN
 #define	ALIGN(x, y)		roundup2((x), (y))
 #define	ALIGN_DOWN(x, y)	rounddown2(x, y)
 #undef PTR_ALIGN
 #define	PTR_ALIGN(p, a)		((__typeof(p))ALIGN((uintptr_t)(p), (a)))
 #define	IS_ALIGNED(x, a)	(((x) & ((__typeof(x))(a) - 1)) == 0)
 #define	__KERNEL_DIV_ROUND_UP(x, n)	howmany(x, n)
 #define	FIELD_SIZEOF(t, f)	sizeof(((t *)0)->f)
 
 #define	printk(...)		printf(__VA_ARGS__)
 #define	vprintk(f, a)		vprintf(f, a)
 
 #define PTR_IF(x, p)		((x) ? (p) : NULL)
 
 #define	asm			__asm
 
 extern void linux_dump_stack(void);
 #define	dump_stack()		linux_dump_stack()
 
 struct va_format {
 	const char *fmt;
 	va_list *va;
 };
 
 static inline int
 vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	ssize_t ssize = size;
 	int i;
 
 	i = vsnprintf(buf, size, fmt, args);
 
 	return ((i >= ssize) ? (ssize - 1) : i);
 }
 
 static inline int
 scnprintf(char *buf, size_t size, const char *fmt, ...)
 {
 	va_list args;
 	int i;
 
 	va_start(args, fmt);
 	i = vscnprintf(buf, size, fmt, args);
 	va_end(args);
 
 	return (i);
 }
 
 /*
  * The "pr_debug()" and "pr_devel()" macros should produce zero code
  * unless DEBUG is defined:
  */
 #ifdef DEBUG
 extern int linuxkpi_debug;
 #define pr_debug(fmt, ...)					\
 	do {							\
 		if (linuxkpi_debug)				\
 			log(LOG_DEBUG, fmt, ##__VA_ARGS__);	\
 	} while (0)
 #define pr_devel(fmt, ...) \
 	log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__)
 #else
 #define pr_debug(fmt, ...) \
 	({ if (0) log(LOG_DEBUG, fmt, ##__VA_ARGS__); 0; })
 #define pr_devel(fmt, ...) \
 	({ if (0) log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); 0; })
 #endif
 
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
 
 /*
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
 #define printk_once(...) do {			\
 	static bool __print_once;		\
 						\
 	if (!__print_once) {			\
 		__print_once = true;		\
 		printk(__VA_ARGS__);		\
 	}					\
 } while (0)
 
 /*
  * Log a one-time message (analogous to WARN_ONCE() et al):
  */
 #define log_once(level,...) do {		\
 	static bool __log_once;			\
 						\
 	if (unlikely(!__log_once)) {		\
 		__log_once = true;		\
 		log(level, __VA_ARGS__);	\
 	}					\
 } while (0)
 
 #define pr_emerg(fmt, ...) \
 	log(LOG_EMERG, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_alert(fmt, ...) \
 	log(LOG_ALERT, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_crit(fmt, ...) \
 	log(LOG_CRIT, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_err(fmt, ...) \
 	log(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_err_once(fmt, ...) \
 	log_once(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_warning(fmt, ...) \
 	log(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_warn(...) \
 	pr_warning(__VA_ARGS__)
 #define pr_warn_once(fmt, ...) \
 	log_once(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_notice(fmt, ...) \
 	log(LOG_NOTICE, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info(fmt, ...) \
 	log(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info_once(fmt, ...) \
 	log_once(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_cont(fmt, ...) \
 	printk(KERN_CONT fmt, ##__VA_ARGS__)
 #define	pr_warn_ratelimited(...) do {		\
 	static linux_ratelimit_t __ratelimited;	\
 	if (linux_ratelimited(&__ratelimited))	\
 		pr_warning(__VA_ARGS__);	\
 } while (0)
 
 #ifndef WARN
 #define	WARN(condition, ...) ({			\
 	bool __ret_warn_on = (condition);	\
 	if (unlikely(__ret_warn_on))		\
 		pr_warning(__VA_ARGS__);	\
 	unlikely(__ret_warn_on);		\
 })
 #endif
 
 #ifndef WARN_ONCE
 #define	WARN_ONCE(condition, ...) ({		\
 	bool __ret_warn_on = (condition);	\
 	if (unlikely(__ret_warn_on))		\
 		pr_warn_once(__VA_ARGS__);	\
 	unlikely(__ret_warn_on);		\
 })
 #endif
 
 #define	ARRAY_SIZE(x)	(sizeof(x) / sizeof((x)[0]))
 
 #define	u64_to_user_ptr(val)	((void *)(uintptr_t)(val))
 
 #define _RET_IP_		__builtin_return_address(0)
 
 #define offsetofend(t, m)	\
         (offsetof(t, m) + sizeof((((t *)0)->m)))
 
 #define	smp_processor_id()	PCPU_GET(cpuid)
 #define	num_possible_cpus()	mp_ncpus
 #define	num_online_cpus()	mp_ncpus
 
 #if defined(__i386__) || defined(__amd64__)
 extern bool linux_cpu_has_clflush;
 #define	cpu_has_clflush		linux_cpu_has_clflush
 #endif
 
 typedef struct linux_ratelimit {
 	struct timeval lasttime;
 	int counter;
 } linux_ratelimit_t;
 
 static inline bool
 linux_ratelimited(linux_ratelimit_t *rl)
 {
 	return (ppsratecheck(&rl->lasttime, &rl->counter, 1));
 }
 
 #define	__is_constexpr(x) \
 	__builtin_constant_p(x)
 
 /*
  * The is_signed() macro below returns true if the passed data type is
  * signed. Else false is returned.
  */
 #define	is_signed(datatype) (((datatype)-1 / (datatype)2) == (datatype)0)
 
 #define	TAINT_WARN	0
 #define	test_taint(x)	(0)
 #define	add_taint(x,y)	do {	\
 	} while (0)
 
 static inline int
 _h2b(const char c)
 {
 
 	if (c >= '0' && c <= '9')
 		return (c - '0');
 	if (c >= 'a' && c <= 'f')
 		return (10 + c - 'a');
 	if (c >= 'A' && c <= 'F')
 		return (10 + c - 'A');
 	return (-EINVAL);
 }
 
 static inline int
 hex2bin(uint8_t *bindst, const char *hexsrc, size_t binlen)
 {
 	int hi4, lo4;
 
 	while (binlen > 0) {
 		hi4 = _h2b(*hexsrc++);
 		lo4 = _h2b(*hexsrc++);
 		if (hi4 < 0 || lo4 < 0)
 			return (-EINVAL);
 
 		*bindst++ = (hi4 << 4) | lo4;
 		binlen--;
 	}
 
 	return (0);
 }
 
 static inline bool
 mac_pton(const char *macin, uint8_t *macout)
 {
 	const char *s, *d;
 	uint8_t mac[6], hx, lx;;
 	int i;
 
 	if (strlen(macin) < (3 * 6 - 1))
 		return (false);
 
 	i = 0;
 	s = macin;
 	do {
 		/* Should we also support '-'-delimiters? */
 		d = strchrnul(s, ':');
 		hx = lx = 0;
 		while (s < d) {
 			/* Fail on abc:123:xxx:... */
 			if ((d - s) > 2)
 				return (false);
 			/* We do support non-well-formed strings: 3:45:6:... */
 			if ((d - s) > 1) {
 				hx = _h2b(*s);
 				if (hx < 0)
 					return (false);
 				s++;
 			}
 			lx = _h2b(*s);
 			if (lx < 0)
 				return (false);
 			s++;
 		}
 		mac[i] = (hx << 4) | lx;
 		i++;
 		if (i >= 6)
 			return (false);
 	} while (d != NULL && *d != '\0');
 
 	memcpy(macout, mac, 6);
 	return (true);
 }
 
 #define	DECLARE_FLEX_ARRAY(_t, _n)					\
     struct { struct { } __dummy_ ## _n; _t _n[0]; }
 
 #endif	/* _LINUXKPI_LINUX_KERNEL_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/scatterlist.h b/sys/compat/linuxkpi/common/include/linux/scatterlist.h
index e462d5c649f1..51ced19e6b5b 100644
--- a/sys/compat/linuxkpi/common/include/linux/scatterlist.h
+++ b/sys/compat/linuxkpi/common/include/linux/scatterlist.h
@@ -1,674 +1,677 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
  * Copyright (c) 2015 Matthew Dillon <dillon@backplane.com>
  * Copyright (c) 2016 Matthew Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef	_LINUXKPI_LINUX_SCATTERLIST_H_
 #define	_LINUXKPI_LINUX_SCATTERLIST_H_
 
 #include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sf_buf.h>
 
+#include <linux/err.h>
 #include <linux/page.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 
 struct bus_dmamap;
 struct scatterlist {
 	unsigned long page_link;
 #define	SG_PAGE_LINK_CHAIN	0x1UL
 #define	SG_PAGE_LINK_LAST	0x2UL
 #define	SG_PAGE_LINK_MASK	0x3UL
 	unsigned int offset;
 	unsigned int length;
 	dma_addr_t dma_address;
 	struct bus_dmamap *dma_map;	/* FreeBSD specific */
 };
 
 CTASSERT((sizeof(struct scatterlist) & SG_PAGE_LINK_MASK) == 0);
 
 struct sg_table {
 	struct scatterlist *sgl;
 	unsigned int nents;
 	unsigned int orig_nents;
 };
 
 struct sg_page_iter {
 	struct scatterlist *sg;
 	unsigned int sg_pgoffset;
 	unsigned int maxents;
 	struct {
 		unsigned int nents;
 		int	pg_advance;
 	} internal;
 };
 
 struct sg_dma_page_iter {
 	struct sg_page_iter base;
 };
 
 #define	SCATTERLIST_MAX_SEGMENT	(-1U & ~(PAGE_SIZE - 1))
 
 #define	SG_MAX_SINGLE_ALLOC	(PAGE_SIZE / sizeof(struct scatterlist))
 
 #define	SG_MAGIC		0x87654321UL
 #define	SG_CHAIN		SG_PAGE_LINK_CHAIN
 #define	SG_END			SG_PAGE_LINK_LAST
 
 #define	sg_is_chain(sg)		((sg)->page_link & SG_PAGE_LINK_CHAIN)
 #define	sg_is_last(sg)		((sg)->page_link & SG_PAGE_LINK_LAST)
 #define	sg_chain_ptr(sg)	\
 	((struct scatterlist *) ((sg)->page_link & ~SG_PAGE_LINK_MASK))
 
 #define	sg_dma_address(sg)	(sg)->dma_address
 #define	sg_dma_len(sg)		(sg)->length
 
 #define	for_each_sg_page(sgl, iter, nents, pgoffset)			\
 	for (_sg_iter_init(sgl, iter, nents, pgoffset);			\
 	     (iter)->sg; _sg_iter_next(iter))
 #define	for_each_sg_dma_page(sgl, iter, nents, pgoffset) 		\
 	for_each_sg_page(sgl, &(iter)->base, nents, pgoffset)
 
 #define	for_each_sg(sglist, sg, sgmax, iter)				\
 	for (iter = 0, sg = (sglist); iter < (sgmax); iter++, sg = sg_next(sg))
 
 #define	for_each_sgtable_sg(sgt, sg, i) \
 	for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i)
 
 #define	for_each_sgtable_page(sgt, iter, pgoffset) \
 	for_each_sg_page((sgt)->sgl, iter, (sgt)->orig_nents, pgoffset)
 
 #define	for_each_sgtable_dma_sg(sgt, sg, iter)				\
 	for_each_sg((sgt)->sgl, sg, (sgt)->nents, iter)
 
 #define	for_each_sgtable_dma_page(sgt, iter, pgoffset)			\
 	for_each_sg_dma_page((sgt)->sgl, iter, (sgt)->nents, pgoffset)
 
 typedef struct scatterlist *(sg_alloc_fn) (unsigned int, gfp_t);
 typedef void (sg_free_fn) (struct scatterlist *, unsigned int);
 
 static inline void
 sg_assign_page(struct scatterlist *sg, struct page *page)
 {
 	unsigned long page_link = sg->page_link & SG_PAGE_LINK_MASK;
 
 	sg->page_link = page_link | (unsigned long)page;
 }
 
 static inline void
 sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
     unsigned int offset)
 {
 	sg_assign_page(sg, page);
 	sg->offset = offset;
 	sg->length = len;
 }
 
 static inline struct page *
 sg_page(struct scatterlist *sg)
 {
 	return ((struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK));
 }
 
 static inline void
 sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen)
 {
 	sg_set_page(sg, virt_to_page(buf), buflen,
 	    ((uintptr_t)buf) & (PAGE_SIZE - 1));
 }
 
 static inline struct scatterlist *
 sg_next(struct scatterlist *sg)
 {
 	if (sg_is_last(sg))
 		return (NULL);
 	sg++;
 	if (sg_is_chain(sg))
 		sg = sg_chain_ptr(sg);
 	return (sg);
 }
 
 static inline vm_paddr_t
 sg_phys(struct scatterlist *sg)
 {
 	return (page_to_phys(sg_page(sg)) + sg->offset);
 }
 
 static inline void *
 sg_virt(struct scatterlist *sg)
 {
 
 	return ((void *)((unsigned long)page_address(sg_page(sg)) + sg->offset));
 }
 
 static inline void
 sg_chain(struct scatterlist *prv, unsigned int prv_nents,
     struct scatterlist *sgl)
 {
 	struct scatterlist *sg = &prv[prv_nents - 1];
 
 	sg->offset = 0;
 	sg->length = 0;
 	sg->page_link = ((unsigned long)sgl |
 	    SG_PAGE_LINK_CHAIN) & ~SG_PAGE_LINK_LAST;
 }
 
 static inline void
 sg_mark_end(struct scatterlist *sg)
 {
 	sg->page_link |= SG_PAGE_LINK_LAST;
 	sg->page_link &= ~SG_PAGE_LINK_CHAIN;
 }
 
 static inline void
 sg_init_table(struct scatterlist *sg, unsigned int nents)
 {
 	bzero(sg, sizeof(*sg) * nents);
 	sg_mark_end(&sg[nents - 1]);
 }
 
 static inline void
 sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
 {
 	sg_init_table(sg, 1);
 	sg_set_buf(sg, buf, buflen);
 }
 
 static struct scatterlist *
 sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 {
 	if (nents == SG_MAX_SINGLE_ALLOC) {
 		return ((void *)__get_free_page(gfp_mask));
 	} else
 		return (kmalloc(nents * sizeof(struct scatterlist), gfp_mask));
 }
 
 static inline void
 sg_kfree(struct scatterlist *sg, unsigned int nents)
 {
 	if (nents == SG_MAX_SINGLE_ALLOC) {
 		free_page((unsigned long)sg);
 	} else
 		kfree(sg);
 }
 
 static inline void
 __sg_free_table(struct sg_table *table, unsigned int max_ents,
     bool skip_first_chunk, sg_free_fn * free_fn)
 {
 	struct scatterlist *sgl, *next;
 
 	if (unlikely(!table->sgl))
 		return;
 
 	sgl = table->sgl;
 	while (table->orig_nents) {
 		unsigned int alloc_size = table->orig_nents;
 		unsigned int sg_size;
 
 		if (alloc_size > max_ents) {
 			next = sg_chain_ptr(&sgl[max_ents - 1]);
 			alloc_size = max_ents;
 			sg_size = alloc_size - 1;
 		} else {
 			sg_size = alloc_size;
 			next = NULL;
 		}
 
 		table->orig_nents -= sg_size;
 		if (skip_first_chunk)
 			skip_first_chunk = 0;
 		else
 			free_fn(sgl, alloc_size);
 		sgl = next;
 	}
 
 	table->sgl = NULL;
 }
 
 static inline void
 sg_free_table(struct sg_table *table)
 {
 	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
 }
 
 static inline int
 __sg_alloc_table(struct sg_table *table, unsigned int nents,
     unsigned int max_ents, struct scatterlist *first_chunk,
     gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
 {
 	struct scatterlist *sg, *prv;
 	unsigned int left;
 
 	memset(table, 0, sizeof(*table));
 
 	if (nents == 0)
 		return (-EINVAL);
 	left = nents;
 	prv = NULL;
 	do {
 		unsigned int sg_size;
 		unsigned int alloc_size = left;
 
 		if (alloc_size > max_ents) {
 			alloc_size = max_ents;
 			sg_size = alloc_size - 1;
 		} else
 			sg_size = alloc_size;
 
 		left -= sg_size;
 
 		if (first_chunk) {
 			sg = first_chunk;
 			first_chunk = NULL;
 		} else {
 			sg = alloc_fn(alloc_size, gfp_mask);
 		}
 		if (unlikely(!sg)) {
 			if (prv)
 				table->nents = ++table->orig_nents;
 
 			return (-ENOMEM);
 		}
 		sg_init_table(sg, alloc_size);
 		table->nents = table->orig_nents += sg_size;
 
 		if (prv)
 			sg_chain(prv, max_ents, sg);
 		else
 			table->sgl = sg;
 
 		if (!left)
 			sg_mark_end(&sg[sg_size - 1]);
 
 		prv = sg;
 	} while (left);
 
 	return (0);
 }
 
 static inline int
 sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 {
 	int ret;
 
 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
 	    NULL, gfp_mask, sg_kmalloc);
 	if (unlikely(ret))
 		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
 
 	return (ret);
 }
 
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 static inline struct scatterlist *
 __sg_alloc_table_from_pages(struct sg_table *sgt,
     struct page **pages, unsigned int count,
     unsigned long off, unsigned long size,
     unsigned int max_segment,
     struct scatterlist *prv, unsigned int left_pages,
     gfp_t gfp_mask)
 #else
 static inline int
 __sg_alloc_table_from_pages(struct sg_table *sgt,
     struct page **pages, unsigned int count,
     unsigned long off, unsigned long size,
     unsigned int max_segment, gfp_t gfp_mask)
 #endif
 {
 	unsigned int i, segs, cur, len;
 	int rc;
 	struct scatterlist *s, *sg_iter;
 
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 	if (prv != NULL) {
 		panic(
 		    "Support for prv != NULL not implemented in "
 		    "__sg_alloc_table_from_pages()");
 	}
 #endif
 
 	if (__predict_false(!max_segment || offset_in_page(max_segment)))
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 		return (ERR_PTR(-EINVAL));
 #else
 		return (-EINVAL);
 #endif
 
 	len = 0;
 	for (segs = i = 1; i < count; ++i) {
 		len += PAGE_SIZE;
 		if (len >= max_segment ||
 		    page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1) {
 			++segs;
 			len = 0;
 		}
 	}
 	if (__predict_false((rc = sg_alloc_table(sgt, segs, gfp_mask))))
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 		return (ERR_PTR(rc));
 #else
 		return (rc);
 #endif
 
 	cur = 0;
 	for_each_sg(sgt->sgl, sg_iter, sgt->orig_nents, i) {
 		unsigned long seg_size;
 		unsigned int j;
 
 		/*
 		 * We need to make sure that when we exit this loop "s" has the
 		 * last sg in the chain so we can call sg_mark_end() on it.
 		 * Only set this inside the loop since sg_iter will be iterated
 		 * until it is NULL.
 		 */
 		s = sg_iter;
 
 		len = 0;
 		for (j = cur + 1; j < count; ++j) {
 			len += PAGE_SIZE;
 			if (len >= max_segment || page_to_pfn(pages[j]) !=
 			    page_to_pfn(pages[j - 1]) + 1)
 				break;
 		}
 
 		seg_size = ((j - cur) << PAGE_SHIFT) - off;
 		sg_set_page(s, pages[cur], MIN(size, seg_size), off);
 		size -= seg_size;
 		off = 0;
 		cur = j;
 	}
 	KASSERT(s != NULL, ("s is NULL after loop in __sg_alloc_table_from_pages()"));
 
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 	if (left_pages == 0)
 		sg_mark_end(s);
 
 	return (s);
 #else
 	return (0);
 #endif
 }
 
 static inline int
 sg_alloc_table_from_pages(struct sg_table *sgt,
     struct page **pages, unsigned int count,
     unsigned long off, unsigned long size,
     gfp_t gfp_mask)
 {
 
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 	return (PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, count, off, size,
 	    SCATTERLIST_MAX_SEGMENT, NULL, 0, gfp_mask)));
 #else
 	return (__sg_alloc_table_from_pages(sgt, pages, count, off, size,
 	    SCATTERLIST_MAX_SEGMENT, gfp_mask));
 #endif
 }
 
 static inline int
 sg_alloc_table_from_pages_segment(struct sg_table *sgt,
     struct page **pages, unsigned int count, unsigned int off,
     unsigned long size, unsigned int max_segment, gfp_t gfp_mask)
 {
 #if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 51300
 	return (PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, count, off, size,
 	    max_segment, NULL, 0, gfp_mask)));
 #else
 	return (__sg_alloc_table_from_pages(sgt, pages, count, off, size,
 	    max_segment, gfp_mask));
 #endif
 }
 
 static inline int
 sg_nents(struct scatterlist *sg)
 {
 	int nents;
 
 	for (nents = 0; sg; sg = sg_next(sg))
 		nents++;
 	return (nents);
 }
 
 static inline void
 __sg_page_iter_start(struct sg_page_iter *piter,
     struct scatterlist *sglist, unsigned int nents,
     unsigned long pgoffset)
 {
 	piter->internal.pg_advance = 0;
 	piter->internal.nents = nents;
 
 	piter->sg = sglist;
 	piter->sg_pgoffset = pgoffset;
 }
 
 static inline void
 _sg_iter_next(struct sg_page_iter *iter)
 {
 	struct scatterlist *sg;
 	unsigned int pgcount;
 
 	sg = iter->sg;
 	pgcount = (sg->offset + sg->length + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	++iter->sg_pgoffset;
 	while (iter->sg_pgoffset >= pgcount) {
 		iter->sg_pgoffset -= pgcount;
 		sg = sg_next(sg);
 		--iter->maxents;
 		if (sg == NULL || iter->maxents == 0)
 			break;
 		pgcount = (sg->offset + sg->length + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 	iter->sg = sg;
 }
 
 static inline int
 sg_page_count(struct scatterlist *sg)
 {
 	return (PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT);
 }
 #define	sg_dma_page_count(sg) \
 	sg_page_count(sg)
 
 static inline bool
 __sg_page_iter_next(struct sg_page_iter *piter)
 {
 	unsigned int pgcount;
 
 	if (piter->internal.nents == 0)
 		return (0);
 	if (piter->sg == NULL)
 		return (0);
 
 	piter->sg_pgoffset += piter->internal.pg_advance;
 	piter->internal.pg_advance = 1;
 
 	while (1) {
 		pgcount = sg_page_count(piter->sg);
 		if (likely(piter->sg_pgoffset < pgcount))
 			break;
 		piter->sg_pgoffset -= pgcount;
 		piter->sg = sg_next(piter->sg);
 		if (--piter->internal.nents == 0)
 			return (0);
 		if (piter->sg == NULL)
 			return (0);
 	}
 	return (1);
 }
 #define	__sg_page_iter_dma_next(itr) \
 	__sg_page_iter_next(&(itr)->base)
 
 static inline void
 _sg_iter_init(struct scatterlist *sgl, struct sg_page_iter *iter,
     unsigned int nents, unsigned long pgoffset)
 {
 	if (nents) {
 		iter->sg = sgl;
 		iter->sg_pgoffset = pgoffset - 1;
 		iter->maxents = nents;
 		_sg_iter_next(iter);
 	} else {
 		iter->sg = NULL;
 		iter->sg_pgoffset = 0;
 		iter->maxents = 0;
 	}
 }
 
 /*
  * sg_page_iter_dma_address() is implemented as a macro because it
  * needs to accept two different and identical structure types. This
  * allows both old and new code to co-exist. The compile time assert
  * adds some safety, that the structure sizes match.
  */
 #define	sg_page_iter_dma_address(spi) ({		\
 	struct sg_page_iter *__spi = (void *)(spi);	\
 	dma_addr_t __dma_address;			\
 	CTASSERT(sizeof(*(spi)) == sizeof(*__spi));	\
 	__dma_address = __spi->sg->dma_address +	\
 	    (__spi->sg_pgoffset << PAGE_SHIFT);		\
 	__dma_address;					\
 })
 
 static inline struct page *
 sg_page_iter_page(struct sg_page_iter *piter)
 {
 	return (nth_page(sg_page(piter->sg), piter->sg_pgoffset));
 }
 
 static __inline size_t
 sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
     const void *buf, size_t buflen, off_t skip)
 {
 	struct sg_page_iter piter;
 	struct page *page;
 	struct sf_buf *sf;
 	size_t len, copied;
 	char *p, *b;
 
 	if (buflen == 0)
 		return (0);
 
 	b = __DECONST(char *, buf);
 	copied = 0;
 	sched_pin();
 	for_each_sg_page(sgl, &piter, nents, 0) {
 
 		/* Skip to the start. */
 		if (piter.sg->length <= skip) {
 			skip -= piter.sg->length;
 			continue;
 		}
 
 		/* See how much to copy. */
 		KASSERT(((piter.sg->length - skip) != 0 && (buflen != 0)),
 		    ("%s: sg len %u - skip %ju || buflen %zu is 0\n",
 		    __func__, piter.sg->length, (uintmax_t)skip, buflen));
 		len = min(piter.sg->length - skip, buflen);
 
 		page = sg_page_iter_page(&piter);
 		sf = sf_buf_alloc(page, SFB_CPUPRIVATE | SFB_NOWAIT);
 		if (sf == NULL)
 			break;
 		p = (char *)sf_buf_kva(sf) + piter.sg_pgoffset + skip;
 		memcpy(p, b, len);
 		sf_buf_free(sf);
 
 		/* We copied so nothing more to skip. */
 		skip = 0;
 		copied += len;
 		/* Either we exactly filled the page, or we are done. */
 		buflen -= len;
 		if (buflen == 0)
 			break;
 		b += len;
 	}
 	sched_unpin();
 
 	return (copied);
 }
 
 static inline size_t
 sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
     const void *buf, size_t buflen)
 {
 	return (sg_pcopy_from_buffer(sgl, nents, buf, buflen, 0));
 }
 
 static inline size_t
 sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
     void *buf, size_t buflen, off_t offset)
 {
 	struct sg_page_iter iter;
 	struct scatterlist *sg;
 	struct page *page;
 	struct sf_buf *sf;
 	char *vaddr;
 	size_t total = 0;
 	size_t len;
 
 	if (!PMAP_HAS_DMAP)
 		sched_pin();
 	for_each_sg_page(sgl, &iter, nents, 0) {
 		sg = iter.sg;
 
 		if (offset >= sg->length) {
 			offset -= sg->length;
 			continue;
 		}
 		len = ulmin(buflen, sg->length - offset);
 		if (len == 0)
 			break;
 
 		page = sg_page_iter_page(&iter);
 		if (!PMAP_HAS_DMAP) {
 			sf = sf_buf_alloc(page, SFB_CPUPRIVATE | SFB_NOWAIT);
 			if (sf == NULL)
 				break;
 			vaddr = (char *)sf_buf_kva(sf);
 		} else
 			vaddr = (char *)PHYS_TO_DMAP(page_to_phys(page));
 		memcpy(buf, vaddr + sg->offset + offset, len);
 		if (!PMAP_HAS_DMAP)
 			sf_buf_free(sf);
 
 		/* start at beginning of next page */
 		offset = 0;
 
 		/* advance buffer */
 		buf = (char *)buf + len;
 		buflen -= len;
 		total += len;
 	}
 	if (!PMAP_HAS_DMAP)
 		sched_unpin();
 	return (total);
 }
 
 #endif					/* _LINUXKPI_LINUX_SCATTERLIST_H_ */
diff --git a/sys/compat/linuxkpi/common/src/linux_rcu.c b/sys/compat/linuxkpi/common/src/linux_rcu.c
index 335708b6747f..4879c30164e3 100644
--- a/sys/compat/linuxkpi/common/src/linux_rcu.c
+++ b/sys/compat/linuxkpi/common/src/linux_rcu.c
@@ -1,429 +1,430 @@
 /*-
  * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
  * Copyright (c) 2017-2021 Hans Petter Selasky (hselasky@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/kdb.h>
 
 #include <ck_epoch.h>
 
 #include <linux/rcupdate.h>
+#include <linux/sched.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/compat.h>
 #include <linux/llist.h>
 #include <linux/irq_work.h>
 
 /*
  * By defining CONFIG_NO_RCU_SKIP LinuxKPI RCU locks and asserts will
  * not be skipped during panic().
  */
 #ifdef CONFIG_NO_RCU_SKIP
 #define	RCU_SKIP(void) 0
 #else
 #define	RCU_SKIP(void)	unlikely(SCHEDULER_STOPPED() || kdb_active)
 #endif
 
 struct callback_head {
 	union {
 		STAILQ_ENTRY(callback_head) entry;
 		struct llist_node node;
 	};
 	rcu_callback_t func;
 };
 
 struct linux_epoch_head {
 	struct llist_head cb_head;
 	struct task task;
 } __aligned(CACHE_LINE_SIZE);
 
 struct linux_epoch_record {
 	ck_epoch_record_t epoch_record;
 	TAILQ_HEAD(, task_struct) ts_head;
 	int cpuid;
 	int type;
 } __aligned(CACHE_LINE_SIZE);
 
 /*
  * Verify that "struct rcu_head" is big enough to hold "struct
  * callback_head". This has been done to avoid having to add special
  * compile flags for including ck_epoch.h to all clients of the
  * LinuxKPI.
  */
 CTASSERT(sizeof(struct rcu_head) == sizeof(struct callback_head));
 
 /*
  * Verify that "rcu_section[0]" has the same size as
  * "ck_epoch_section_t". This has been done to avoid having to add
  * special compile flags for including ck_epoch.h to all clients of
  * the LinuxKPI.
  */
 CTASSERT(sizeof(((struct task_struct *)0)->rcu_section[0] ==
     sizeof(ck_epoch_section_t)));
 
 /*
  * Verify that "epoch_record" is at beginning of "struct
  * linux_epoch_record":
  */
 CTASSERT(offsetof(struct linux_epoch_record, epoch_record) == 0);
 
 CTASSERT(TS_RCU_TYPE_MAX == RCU_TYPE_MAX);
 
 static ck_epoch_t linux_epoch[RCU_TYPE_MAX];
 static struct linux_epoch_head linux_epoch_head[RCU_TYPE_MAX];
 DPCPU_DEFINE_STATIC(struct linux_epoch_record, linux_epoch_record[RCU_TYPE_MAX]);
 
 static void linux_rcu_cleaner_func(void *, int);
 
 static void
 linux_rcu_runtime_init(void *arg __unused)
 {
 	struct linux_epoch_head *head;
 	int i;
 	int j;
 
 	for (j = 0; j != RCU_TYPE_MAX; j++) {
 		ck_epoch_init(&linux_epoch[j]);
 
 		head = &linux_epoch_head[j];
 
 		TASK_INIT(&head->task, 0, linux_rcu_cleaner_func, head);
 		init_llist_head(&head->cb_head);
 
 		CPU_FOREACH(i) {
 			struct linux_epoch_record *record;
 
 			record = &DPCPU_ID_GET(i, linux_epoch_record[j]);
 
 			record->cpuid = i;
 			record->type = j;
 			ck_epoch_register(&linux_epoch[j],
 			    &record->epoch_record, NULL);
 			TAILQ_INIT(&record->ts_head);
 		}
 	}
 }
 SYSINIT(linux_rcu_runtime, SI_SUB_CPU, SI_ORDER_ANY, linux_rcu_runtime_init, NULL);
 
 static void
 linux_rcu_cleaner_func(void *context, int pending __unused)
 {
 	struct linux_epoch_head *head = context;
 	struct callback_head *rcu;
 	STAILQ_HEAD(, callback_head) tmp_head;
 	struct llist_node *node, *next;
 	uintptr_t offset;
 
 	/* move current callbacks into own queue */
 	STAILQ_INIT(&tmp_head);
 	llist_for_each_safe(node, next, llist_del_all(&head->cb_head)) {
 		rcu = container_of(node, struct callback_head, node);
 		/* re-reverse list to restore chronological order */
 		STAILQ_INSERT_HEAD(&tmp_head, rcu, entry);
 	}
 
 	/* synchronize */
 	linux_synchronize_rcu(head - linux_epoch_head);
 
 	/* dispatch all callbacks, if any */
 	while ((rcu = STAILQ_FIRST(&tmp_head)) != NULL) {
 		STAILQ_REMOVE_HEAD(&tmp_head, entry);
 
 		offset = (uintptr_t)rcu->func;
 
 		if (offset < LINUX_KFREE_RCU_OFFSET_MAX)
 			kfree((char *)rcu - offset);
 		else
 			rcu->func((struct rcu_head *)rcu);
 	}
 }
 
 void
 linux_rcu_read_lock(unsigned type)
 {
 	struct linux_epoch_record *record;
 	struct task_struct *ts;
 
 	MPASS(type < RCU_TYPE_MAX);
 
 	if (RCU_SKIP())
 		return;
 
 	ts = current;
 
 	/* assert valid refcount */
 	MPASS(ts->rcu_recurse[type] != INT_MAX);
 
 	if (++(ts->rcu_recurse[type]) != 1)
 		return;
 
 	/*
 	 * Pin thread to current CPU so that the unlock code gets the
 	 * same per-CPU epoch record:
 	 */
 	sched_pin();
 
 	record = &DPCPU_GET(linux_epoch_record[type]);
 
 	/*
 	 * Use a critical section to prevent recursion inside
 	 * ck_epoch_begin(). Else this function supports recursion.
 	 */
 	critical_enter();
 	ck_epoch_begin(&record->epoch_record,
 	    (ck_epoch_section_t *)&ts->rcu_section[type]);
 	TAILQ_INSERT_TAIL(&record->ts_head, ts, rcu_entry[type]);
 	critical_exit();
 }
 
 void
 linux_rcu_read_unlock(unsigned type)
 {
 	struct linux_epoch_record *record;
 	struct task_struct *ts;
 
 	MPASS(type < RCU_TYPE_MAX);
 
 	if (RCU_SKIP())
 		return;
 
 	ts = current;
 
 	/* assert valid refcount */
 	MPASS(ts->rcu_recurse[type] > 0);
 	
 	if (--(ts->rcu_recurse[type]) != 0)
 		return;
 
 	record = &DPCPU_GET(linux_epoch_record[type]);
 
 	/*
 	 * Use a critical section to prevent recursion inside
 	 * ck_epoch_end(). Else this function supports recursion.
 	 */
 	critical_enter();
 	ck_epoch_end(&record->epoch_record,
 	    (ck_epoch_section_t *)&ts->rcu_section[type]);
 	TAILQ_REMOVE(&record->ts_head, ts, rcu_entry[type]);
 	critical_exit();
 
 	sched_unpin();
 }
 
 static void
 linux_synchronize_rcu_cb(ck_epoch_t *epoch __unused, ck_epoch_record_t *epoch_record, void *arg __unused)
 {
 	struct linux_epoch_record *record =
 	    container_of(epoch_record, struct linux_epoch_record, epoch_record);
 	struct thread *td = curthread;
 	struct task_struct *ts;
 
 	/* check if blocked on the current CPU */
 	if (record->cpuid == PCPU_GET(cpuid)) {
 		bool is_sleeping = 0;
 		u_char prio = 0;
 
 		/*
 		 * Find the lowest priority or sleeping thread which
 		 * is blocking synchronization on this CPU core. All
 		 * the threads in the queue are CPU-pinned and cannot
 		 * go anywhere while the current thread is locked.
 		 */
 		TAILQ_FOREACH(ts, &record->ts_head, rcu_entry[record->type]) {
 			if (ts->task_thread->td_priority > prio)
 				prio = ts->task_thread->td_priority;
 			is_sleeping |= (ts->task_thread->td_inhibitors != 0);
 		}
 
 		if (is_sleeping) {
 			thread_unlock(td);
 			pause("W", 1);
 			thread_lock(td);
 		} else {
 			/* set new thread priority */
 			sched_prio(td, prio);
 			/* task switch */
 			mi_switch(SW_VOL | SWT_RELINQUISH);
 			/*
 			 * It is important the thread lock is dropped
 			 * while yielding to allow other threads to
 			 * acquire the lock pointed to by
 			 * TDQ_LOCKPTR(td). Currently mi_switch() will
 			 * unlock the thread lock before
 			 * returning. Else a deadlock like situation
 			 * might happen.
 			 */
 			thread_lock(td);
 		}
 	} else {
 		/*
 		 * To avoid spinning move execution to the other CPU
 		 * which is blocking synchronization. Set highest
 		 * thread priority so that code gets run. The thread
 		 * priority will be restored later.
 		 */
 		sched_prio(td, 0);
 		sched_bind(td, record->cpuid);
 	}
 }
 
 void
 linux_synchronize_rcu(unsigned type)
 {
 	struct thread *td;
 	int was_bound;
 	int old_cpu;
 	int old_pinned;
 	u_char old_prio;
 
 	MPASS(type < RCU_TYPE_MAX);
 
 	if (RCU_SKIP())
 		return;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "linux_synchronize_rcu() can sleep");
 
 	td = curthread;
 	DROP_GIANT();
 
 	/*
 	 * Synchronizing RCU might change the CPU core this function
 	 * is running on. Save current values:
 	 */
 	thread_lock(td);
 
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	old_prio = td->td_priority;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 	sched_bind(td, old_cpu);
 
 	ck_epoch_synchronize_wait(&linux_epoch[type],
 	    &linux_synchronize_rcu_cb, NULL);
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	/* restore thread priority */
 	sched_prio(td, old_prio);
 	thread_unlock(td);
 
 	PICKUP_GIANT();
 }
 
 void
 linux_rcu_barrier(unsigned type)
 {
 	struct linux_epoch_head *head;
 
 	MPASS(type < RCU_TYPE_MAX);
 
 	/*
 	 * This function is not obligated to wait for a grace period.
 	 * It only waits for RCU callbacks that have already been posted.
 	 * If there are no RCU callbacks posted, rcu_barrier() can return
 	 * immediately.
 	 */
 	head = &linux_epoch_head[type];
 
 	/* wait for callbacks to complete */
 	taskqueue_drain(linux_irq_work_tq, &head->task);
 }
 
 void
 linux_call_rcu(unsigned type, struct rcu_head *context, rcu_callback_t func)
 {
 	struct callback_head *rcu;
 	struct linux_epoch_head *head;
 
 	MPASS(type < RCU_TYPE_MAX);
 
 	rcu = (struct callback_head *)context;
 	head = &linux_epoch_head[type];
 
 	rcu->func = func;
 	llist_add(&rcu->node, &head->cb_head);
 	taskqueue_enqueue(linux_irq_work_tq, &head->task);
 }
 
 int
 init_srcu_struct(struct srcu_struct *srcu)
 {
 	return (0);
 }
 
 void
 cleanup_srcu_struct(struct srcu_struct *srcu)
 {
 }
 
 int
 srcu_read_lock(struct srcu_struct *srcu)
 {
 	linux_rcu_read_lock(RCU_TYPE_SLEEPABLE);
 	return (0);
 }
 
 void
 srcu_read_unlock(struct srcu_struct *srcu, int key __unused)
 {
 	linux_rcu_read_unlock(RCU_TYPE_SLEEPABLE);
 }
 
 void
 synchronize_srcu(struct srcu_struct *srcu)
 {
 	linux_synchronize_rcu(RCU_TYPE_SLEEPABLE);
 }
 
 void
 srcu_barrier(struct srcu_struct *srcu)
 {
 	linux_rcu_barrier(RCU_TYPE_SLEEPABLE);
 }