diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 8720d9f71c1c..e9a16cd0b36e 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -1,389 +1,393 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007, 2022 The FreeBSD Foundation
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/msan.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ktr.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <machine/cpu.h>
 
 #ifdef VIMAGE
 #include <net/vnet.h>
 #endif
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #ifdef EPOCH_TRACE
 #include <sys/epoch.h>
 #endif
 
+void	(*tcp_hpts_softclock)(void);
+
 /*
  * Define the code needed before returning to user mode, for trap and
  * syscall.
  */
 void
 userret(struct thread *td, struct trapframe *frame)
 {
 	struct proc *p = td->td_proc;
 
 	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
             td->td_name);
 	KASSERT((p->p_flag & P_WEXIT) == 0,
 	    ("Exiting process returns to usermode"));
 #ifdef DIAGNOSTIC
 	/*
 	 * Check that we called signotify() enough.  For
 	 * multi-threaded processes, where signal distribution might
 	 * change due to other threads changing sigmask, the check is
 	 * racy and cannot be performed reliably.
 	 * If current process is vfork child, indicated by P_PPWAIT, then
 	 * issignal() ignores stops, so we block the check to avoid
 	 * classifying pending signals.
 	 */
 	if (p->p_numthreads == 1) {
 		PROC_LOCK(p);
 		thread_lock(td);
 		if ((p->p_flag & P_PPWAIT) == 0 &&
 		    (td->td_pflags & TDP_SIGFASTBLOCK) == 0 &&
 		    SIGPENDING(td) && !td_ast_pending(td, TDA_AST) &&
 		    !td_ast_pending(td, TDA_SIG)) {
 			thread_unlock(td);
 			panic(
 			    "failed to set signal flags for ast p %p "
 			    "td %p td_ast %#x fl %#x",
 			    p, td, td->td_ast, td->td_flags);
 		}
 		thread_unlock(td);
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	/*
 	 * Charge system time if profiling.
 	 */
 	if (__predict_false(p->p_flag & P_PROFIL))
 		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
 
 #ifdef HWPMC_HOOKS
 	if (PMC_THREAD_HAS_SAMPLES(td))
 		PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL);
 #endif
-#ifdef TCPHPTS
 	/*
-	 * @gallatin is adament that this needs to go here, I
-	 * am not so sure. Running hpts is a lot like
-	 * a lro_flush() that happens while a user process
-	 * is running. But he may know best so I will go
-	 * with his view of accounting. :-)
+	 * Calling tcp_hpts_softclock() here allows us to avoid frequent,
+	 * expensive callouts that trash the cache and lead to a much higher
+	 * number of interrupts and context switches.  Testing on busy web
+	 * servers at Netflix has shown that this improves CPU use by 7% over
+	 * relying only on callouts to drive HPTS, and also results in idle
+	 * power savings on mostly idle servers.
+	 * This was inspired by the paper "Soft Timers: Efficient Microsecond
+	 * Software Timer Support for Network Processing"
+	 * by Mohit Aron and Peter Druschel.
 	 */
-	tcp_run_hpts();
-#endif
+	tcp_hpts_softclock();
 	/*
 	 * Let the scheduler adjust our priority etc.
 	 */
 	sched_userret(td);
 
 	/*
 	 * Check for misbehavior.
 	 *
 	 * In case there is a callchain tracing ongoing because of
 	 * hwpmc(4), skip the scheduler pinning check.
 	 * hwpmc(4) subsystem, infact, will collect callchain informations
 	 * at ast() checkpoint, which is past userret().
 	 */
 	WITNESS_WARN(WARN_PANIC, NULL, "userret: returning");
 	KASSERT(td->td_critnest == 0,
 	    ("userret: Returning in a critical section"));
 	KASSERT(td->td_locks == 0,
 	    ("userret: Returning with %d locks held", td->td_locks));
 	KASSERT(td->td_rw_rlocks == 0,
 	    ("userret: Returning with %d rwlocks held in read mode",
 	    td->td_rw_rlocks));
 	KASSERT(td->td_sx_slocks == 0,
 	    ("userret: Returning with %d sx locks held in shared mode",
 	    td->td_sx_slocks));
 	KASSERT(td->td_lk_slocks == 0,
 	    ("userret: Returning with %d lockmanager locks held in shared mode",
 	    td->td_lk_slocks));
 	KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
 	    ("userret: Returning with pagefaults disabled"));
 	if (__predict_false(!THREAD_CAN_SLEEP())) {
 #ifdef EPOCH_TRACE
 		epoch_trace_list(curthread);
 #endif
 		KASSERT(0, ("userret: Returning with sleep disabled"));
 	}
 	KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0,
 	    ("userret: Returning with pinned thread"));
 	KASSERT(td->td_vp_reserved == NULL,
 	    ("userret: Returning with preallocated vnode"));
 	KASSERT((td->td_flags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0,
 	    ("userret: Returning with stop signals deferred"));
 	KASSERT(td->td_vslock_sz == 0,
 	    ("userret: Returning with vslock-wired space"));
 #ifdef VIMAGE
 	/* Unfortunately td_vnet_lpush needs VNET_DEBUG. */
 	VNET_ASSERT(curvnet == NULL,
 	    ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s",
 	    __func__, td, p->p_pid, td->td_name, curvnet,
 	    (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A"));
 #endif
 }
 
 static void
 ast_prep(struct thread *td, int tda __unused)
 {
 	VM_CNT_INC(v_trap);
 	td->td_pticks = 0;
 	if (td->td_cowgen != atomic_load_int(&td->td_proc->p_cowgen))
 		thread_cow_update(td);
 
 }
 
 struct ast_entry {
 	int	ae_flags;
 	int	ae_tdp;
 	void	(*ae_f)(struct thread *td, int ast);
 };
 
 _Static_assert(TDAI(TDA_MAX) <= UINT_MAX, "Too many ASTs");
 
 static struct ast_entry ast_entries[TDA_MAX] __read_mostly = {
 	[TDA_AST] = { .ae_f = ast_prep, .ae_flags = ASTR_UNCOND},
 };
 
 void
 ast_register(int ast, int flags, int tdp,
     void (*f)(struct thread *, int asts))
 {
 	struct ast_entry *ae;
 
 	MPASS(ast < TDA_MAX);
 	MPASS((flags & ASTR_TDP) == 0 || ((flags & ASTR_ASTF_REQUIRED) != 0
 	    && __bitcount(tdp) == 1));
 	ae = &ast_entries[ast];
 	MPASS(ae->ae_f == NULL);
 	ae->ae_flags = flags;
 	ae->ae_tdp = tdp;
 	atomic_interrupt_fence();
 	ae->ae_f = f;
 }
 
 /*
  * XXXKIB Note that the deregistration of an AST handler does not
  * drain threads possibly executing it, which affects unloadable
  * modules.  The issue is either handled by the subsystem using
  * handlers, or simply ignored.  Fixing the problem is considered not
  * worth the overhead.
  */
 void
 ast_deregister(int ast)
 {
 	struct ast_entry *ae;
 
 	MPASS(ast < TDA_MAX);
 	ae = &ast_entries[ast];
 	MPASS(ae->ae_f != NULL);
 	ae->ae_f = NULL;
 	atomic_interrupt_fence();
 	ae->ae_flags = 0;
 	ae->ae_tdp = 0;
 }
 
 void
 ast_sched_locked(struct thread *td, int tda)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	MPASS(tda < TDA_MAX);
 
 	td->td_ast |= TDAI(tda);
 }
 
 void
 ast_unsched_locked(struct thread *td, int tda)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	MPASS(tda < TDA_MAX);
 
 	td->td_ast &= ~TDAI(tda);
 }
 
 void
 ast_sched(struct thread *td, int tda)
 {
 	thread_lock(td);
 	ast_sched_locked(td, tda);
 	thread_unlock(td);
 }
 
 void
 ast_sched_mask(struct thread *td, int ast)
 {
 	thread_lock(td);
 	td->td_ast |= ast;
 	thread_unlock(td);
 }
 
 static bool
 ast_handler_calc_tdp_run(struct thread *td, const struct ast_entry *ae)
 {
 	return ((ae->ae_flags & ASTR_TDP) == 0 ||
 	    (td->td_pflags & ae->ae_tdp) != 0);
 }
 
 /*
  * Process an asynchronous software trap.
  */
 static void
 ast_handler(struct thread *td, struct trapframe *framep, bool dtor)
 {
 	struct ast_entry *ae;
 	void (*f)(struct thread *td, int asts);
 	int a, td_ast;
 	bool run;
 
 	if (framep != NULL) {
 		kmsan_mark(framep, sizeof(*framep), KMSAN_STATE_INITED);
 		td->td_frame = framep;
 	}
 
 	if (__predict_true(!dtor)) {
 		WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
 		mtx_assert(&Giant, MA_NOTOWNED);
 		THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
 
 		/*
 		 * This updates the td_ast for the checks below in one
 		 * atomic operation with turning off all scheduled AST's.
 		 * If another AST is triggered while we are handling the
 		 * AST's saved in td_ast, the td_ast is again non-zero and
 		 * ast() will be called again.
 		 */
 		thread_lock(td);
 		td_ast = td->td_ast;
 		td->td_ast = 0;
 		thread_unlock(td);
 	} else {
 		/*
 		 * The td thread's td_lock is not guaranteed to exist,
 		 * the thread might be not initialized enough when it's
 		 * destructor is called.  It is safe to read and
 		 * update td_ast without locking since the thread is
 		 * not runnable or visible to other threads.
 		 */
 		td_ast = td->td_ast;
 		td->td_ast = 0;
 	}
 
 	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
             td->td_proc->p_comm);
 	KASSERT(framep == NULL || TRAPF_USERMODE(framep),
 	    ("ast in kernel mode"));
 
 	for (a = 0; a < nitems(ast_entries); a++) {
 		ae = &ast_entries[a];
 		f = ae->ae_f;
 		if (f == NULL)
 			continue;
 		atomic_interrupt_fence();
 
 		run = false;
 		if (__predict_false(framep == NULL)) {
 			if ((ae->ae_flags & ASTR_KCLEAR) != 0)
 				run = ast_handler_calc_tdp_run(td, ae);
 		} else {
 			if ((ae->ae_flags & ASTR_UNCOND) != 0)
 				run = true;
 			else if ((ae->ae_flags & ASTR_ASTF_REQUIRED) != 0 &&
 			    (td_ast & TDAI(a)) != 0)
 				run = ast_handler_calc_tdp_run(td, ae);
 		}
 		if (run)
 			f(td, td_ast);
 	}
 }
 
 void
 ast(struct trapframe *framep)
 {
 	struct thread *td;
 
 	td = curthread;
 	ast_handler(td, framep, false);
 	userret(td, framep);
 }
 
 void
 ast_kclear(struct thread *td)
 {
 	ast_handler(td, NULL, td != curthread);
 }
 
 const char *
 syscallname(struct proc *p, u_int code)
 {
 	static const char unknown[] = "unknown";
 	struct sysentvec *sv;
 
 	sv = p->p_sysent;
 	if (sv->sv_syscallnames == NULL || code >= sv->sv_size)
 		return (unknown);
 	return (sv->sv_syscallnames[code]);
 }
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 8ca21daf60de..7eb1b2e08cb4 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -1,223 +1,222 @@
 /*-
  * Copyright (c) 2016-2018 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef __tcp_hpts_h__
 #define __tcp_hpts_h__
 
 /* Number of useconds in a hpts tick */
 #define HPTS_TICKS_PER_SLOT 10
 #define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
 #define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
 #define HPTS_USEC_IN_SEC 1000000
 #define HPTS_MSEC_IN_SEC 1000
 #define HPTS_USEC_IN_MSEC 1000
 
 struct hpts_diag {
 	uint32_t p_hpts_active; 	/* bbr->flex7 x */
 	uint32_t p_nxt_slot;		/* bbr->flex1 x */
 	uint32_t p_cur_slot;		/* bbr->flex2 x */
 	uint32_t p_prev_slot;		/* bbr->delivered */
 	uint32_t p_runningslot;		/* bbr->inflight */
 	uint32_t slot_req;		/* bbr->flex3 x */
 	uint32_t inp_hptsslot;		/* bbr->flex4 x */
 	uint32_t slot_remaining;	/* bbr->flex5 x */
 	uint32_t have_slept;		/* bbr->epoch x */
 	uint32_t hpts_sleep_time;	/* bbr->applimited x */
 	uint32_t yet_to_sleep;		/* bbr->lt_epoch x */
 	uint32_t need_new_to;		/* bbr->flex6 x  */
 	uint32_t wheel_slot;		/* bbr->bw_inuse x */
 	uint32_t maxslots;		/* bbr->delRate x */
 	uint32_t wheel_cts;		/* bbr->rttProp x */
 	int32_t co_ret; 		/* bbr->pkts_out x */
 	uint32_t p_curtick;		/* upper bbr->cur_del_rate */
 	uint32_t p_lasttick;		/* lower bbr->cur_del_rate */
 	uint8_t p_on_min_sleep; 	/* bbr->flex8 x */
 };
 
 /* Magic flags to tell whats cooking on the pacing wheel */
 #define PACE_TMR_DELACK 0x01	/* Delayed ack timer running */
 #define PACE_TMR_RACK   0x02	/* RACK timer running */
 #define PACE_TMR_TLP    0x04	/* TLP timer running */
 #define PACE_TMR_RXT    0x08	/* Retransmit timer running */
 #define PACE_TMR_PERSIT 0x10	/* Persists timer running */
 #define PACE_TMR_KEEP   0x20	/* Keep alive timer running */
 #define PACE_PKT_OUTPUT 0x40	/* Output Packets being paced */
 #define PACE_TMR_MASK   (PACE_TMR_KEEP|PACE_TMR_PERSIT|PACE_TMR_RXT|PACE_TMR_TLP|PACE_TMR_RACK|PACE_TMR_DELACK)
 
 #define DEFAULT_CONNECTION_THESHOLD 100
 
 /*
  * When using the hpts, a TCP stack must make sure
  * that once a INP_DROPPED flag is applied to a INP
  * that it does not expect tcp_output() to ever be
  * called by the hpts. The hpts will *not* call
  * any output (or input) functions on a TCB that
  * is in the DROPPED state.
  *
  * This implies final ACK's and RST's that might
  * be sent when a TCB is still around must be
  * sent from a routine like tcp_respond().
  */
 #define LOWEST_SLEEP_ALLOWED 50
 #define DEFAULT_MIN_SLEEP 250	/* How many usec's is default for hpts sleep
 				 * this determines min granularity of the
 				 * hpts. If 1, granularity is 10useconds at
 				 * the cost of more CPU (context switching).
 				 * Note do not set this to 0.
 				 */
 #define DYNAMIC_MIN_SLEEP DEFAULT_MIN_SLEEP
 #define DYNAMIC_MAX_SLEEP 5000	/* 5ms */
 
 /* Thresholds for raising/lowering sleep */
 #define TICKS_INDICATE_MORE_SLEEP 100		/* This would be 1ms */
 #define TICKS_INDICATE_LESS_SLEEP 1000		/* This would indicate 10ms */
 /**
  *
  * Dynamic adjustment of sleeping times is done in "new" mode
  * where we are depending on syscall returns and lro returns
  * to push hpts forward mainly and the timer is only a backstop.
  *
  * When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
  * then we do a dynamic adjustment on the time we sleep.
  * Our threshold is if the lateness of the first client served (in ticks) is
  * greater than or equal too ticks_indicate_more_sleep (10ms
  * or 10000 ticks). If we were that late, the actual sleep time
  * is adjusted down by 50%. If the ticks_ran is less than
  * ticks_indicate_more_sleep (100 ticks or 1000usecs).
  *
  */
 
 #ifdef _KERNEL
 void tcp_hpts_init(struct tcpcb *);
 void tcp_hpts_remove(struct tcpcb *);
 static inline bool
 tcp_in_hpts(struct tcpcb *tp)
 {
 	return (tp->t_in_hpts == IHPTS_ONQUEUE);
 }
 
 /*
  * To insert a TCB on the hpts you *must* be holding the
  * INP_WLOCK(). The hpts insert code will then acqurire
  * the hpts's lock and insert the TCB on the requested
  * slot possibly waking up the hpts if you are requesting
  * a time earlier than what the hpts is sleeping to (if
  * the hpts is sleeping). You may check the inp->inp_in_hpts
  * flag without the hpts lock. The hpts is the only one
  * that will clear this flag holding only the hpts lock. This
  * means that in your tcp_output() routine when you test for
  * it to be 1 (so you wont call output) it may be transitioning
  * to 0 (by the hpts). That will be fine since that will just
  * mean an extra call to tcp_output that most likely will find
  * the call you executed (when the mis-match occurred) will have
  * put the TCB back on the hpts and it will return. If your
  * call did not add it back to the hpts then you will either
  * over-send or the cwnd will block you from sending more.
  *
  * Note you should also be holding the INP_WLOCK() when you
  * call the remove from the hpts as well. Thoug usually
  * you are either doing this from a timer, where you need
  * that INP_WLOCK() or from destroying your TCB where again
  * you should already have the INP_WLOCK().
  */
 uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
     struct hpts_diag *diag);
 #define	tcp_hpts_insert(inp, slot)	\
 	tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
 
 void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
 #define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
 
 void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
 
-extern void (*tcp_hpts_softclock)(void);
 void tcp_lro_hpts_init(void);
 
 extern int32_t tcp_min_hptsi_time;
 
 #endif /* _KERNEL */
 
 /*
  * The following functions should also be available
  * to userspace as well.
  */
 static __inline uint32_t
 tcp_tv_to_hptstick(const struct timeval *sv)
 {
 	return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_TICKS_PER_SLOT));
 }
 
 static __inline uint32_t
 tcp_tv_to_usectick(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
 }
 
 static __inline uint32_t
 tcp_tv_to_mssectick(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC)));
 }
 
 static __inline uint64_t
 tcp_tv_to_lusectick(const struct timeval *sv)
 {
 	return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
 }
 
 #ifdef _KERNEL
 
 extern int32_t tcp_min_hptsi_time;
 
 static inline int32_t
 get_hpts_min_sleep_time(void)
 {
 	return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT);
 }
 
 static __inline uint32_t
 tcp_gethptstick(struct timeval *sv)
 {
 	struct timeval tv;
 
 	if (sv == NULL)
 		sv = &tv;
 	microuptime(sv);
 	return (tcp_tv_to_hptstick(sv));
 }
 
 static __inline uint32_t
 tcp_get_usecs(struct timeval *tv)
 {
 	struct timeval tvd;
 
 	if (tv == NULL)
 		tv = &tvd;
 	microuptime(tv);
 	return (tcp_tv_to_usectick(tv));
 }
 
 #endif /* _KERNEL */
 #endif /* __tcp_hpts_h__ */
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 255e543ae21d..921d28f82517 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1,1507 +1,1505 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2007, Myricom Inc.
  * Copyright (c) 2008, Intel Corporation.
  * Copyright (c) 2012 The FreeBSD Foundation
  * Copyright (c) 2016-2021 Mellanox Technologies.
  * All rights reserved.
  *
  * Portions of this software were developed by Bjoern Zeeb
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockbuf.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
 #include <net/infiniband.h>
 #include <net/if_lagg.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip6.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/udp.h>
 #include <netinet6/ip6_var.h>
 
 #include <machine/in_cksum.h>
 
 static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
 
 static void	tcp_lro_rx_done(struct lro_ctrl *lc);
 static int	tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m,
 		    uint32_t csum, bool use_hash);
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP LRO");
 
 long tcplro_stacks_wanting_mbufq;
 int	(*tcp_lro_flush_tcphpts)(struct lro_ctrl *lc, struct lro_entry *le);
-void	(*tcp_hpts_softclock)(void);
 
 counter_u64_t tcp_inp_lro_direct_queue;
 counter_u64_t tcp_inp_lro_wokeup_queue;
 counter_u64_t tcp_inp_lro_compressed;
 counter_u64_t tcp_inp_lro_locks_taken;
 counter_u64_t tcp_extra_mbuf;
 counter_u64_t tcp_would_have_but;
 counter_u64_t tcp_comp_total;
 counter_u64_t tcp_uncomp_total;
 counter_u64_t tcp_bad_csums;
 
 static unsigned	tcp_lro_entries = TCP_LRO_ENTRIES;
 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
     CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
     "default number of LRO entries");
 
 static uint32_t tcp_lro_cpu_set_thresh = TCP_LRO_CPU_DECLARATION_THRESH;
 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, lro_cpu_threshold,
     CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_cpu_set_thresh, 0,
     "Number of interrupts in a row on the same CPU that will make us declare an 'affinity' cpu?");
 
 static uint32_t tcp_less_accurate_lro_ts = 0;
 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, lro_less_accurate,
     CTLFLAG_MPSAFE, &tcp_less_accurate_lro_ts, 0,
     "Do we trade off efficency by doing less timestamp operations for time accuracy?");
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, fullqueue, CTLFLAG_RD,
     &tcp_inp_lro_direct_queue, "Number of lro's fully queued to transport");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, wokeup, CTLFLAG_RD,
     &tcp_inp_lro_wokeup_queue, "Number of lro's where we woke up transport via hpts");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, compressed, CTLFLAG_RD,
     &tcp_inp_lro_compressed, "Number of lro's compressed and sent to transport");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lockcnt, CTLFLAG_RD,
     &tcp_inp_lro_locks_taken, "Number of lro's inp_wlocks taken");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, extra_mbuf, CTLFLAG_RD,
     &tcp_extra_mbuf, "Number of times we had an extra compressed ack dropped into the tp");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, would_have_but, CTLFLAG_RD,
     &tcp_would_have_but, "Number of times we would have had an extra compressed, but mget failed");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, with_m_ackcmp, CTLFLAG_RD,
     &tcp_comp_total, "Number of mbufs queued with M_ACKCMP flags set");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, without_m_ackcmp, CTLFLAG_RD,
     &tcp_uncomp_total, "Number of mbufs queued without M_ACKCMP");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lro_badcsum, CTLFLAG_RD,
     &tcp_bad_csums, "Number of packets that the common code saw with bad csums");
 
 void
 tcp_lro_reg_mbufq(void)
 {
 	atomic_fetchadd_long(&tcplro_stacks_wanting_mbufq, 1);
 }
 
 void
 tcp_lro_dereg_mbufq(void)
 {
 	atomic_fetchadd_long(&tcplro_stacks_wanting_mbufq, -1);
 }
 
 static __inline void
 tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
     struct lro_entry *le)
 {
 
 	LIST_INSERT_HEAD(&lc->lro_active, le, next);
 	LIST_INSERT_HEAD(bucket, le, hash_next);
 }
 
 static __inline void
 tcp_lro_active_remove(struct lro_entry *le)
 {
 
 	LIST_REMOVE(le, next);		/* active list */
 	LIST_REMOVE(le, hash_next);	/* hash bucket */
 }
 
 int
 tcp_lro_init(struct lro_ctrl *lc)
 {
 	return (tcp_lro_init_args(lc, NULL, tcp_lro_entries, 0));
 }
 
 int
 tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
     unsigned lro_entries, unsigned lro_mbufs)
 {
 	struct lro_entry *le;
 	size_t size;
 	unsigned i, elements;
 
 	lc->lro_bad_csum = 0;
 	lc->lro_queued = 0;
 	lc->lro_flushed = 0;
 	lc->lro_mbuf_count = 0;
 	lc->lro_mbuf_max = lro_mbufs;
 	lc->lro_cnt = lro_entries;
 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
 	lc->ifp = ifp;
 	LIST_INIT(&lc->lro_free);
 	LIST_INIT(&lc->lro_active);
 
 	/* create hash table to accelerate entry lookup */
 	if (lro_entries > lro_mbufs)
 		elements = lro_entries;
 	else
 		elements = lro_mbufs;
 	lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
 	    HASH_NOWAIT);
 	if (lc->lro_hash == NULL) {
 		memset(lc, 0, sizeof(*lc));
 		return (ENOMEM);
 	}
 
 	/* compute size to allocate */
 	size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
 	    (lro_entries * sizeof(*le));
 	lc->lro_mbuf_data = (struct lro_mbuf_sort *)
 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
 
 	/* check for out of memory */
 	if (lc->lro_mbuf_data == NULL) {
 		free(lc->lro_hash, M_LRO);
 		memset(lc, 0, sizeof(*lc));
 		return (ENOMEM);
 	}
 	/* compute offset for LRO entries */
 	le = (struct lro_entry *)
 	    (lc->lro_mbuf_data + lro_mbufs);
 
 	/* setup linked list */
 	for (i = 0; i != lro_entries; i++)
 		LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
 
 	return (0);
 }
 
 struct vxlan_header {
 	uint32_t	vxlh_flags;
 	uint32_t	vxlh_vni;
 };
 
 static inline void *
 tcp_lro_low_level_parser(void *ptr, struct lro_parser *parser, bool update_data, bool is_vxlan, int mlen)
 {
 	const struct ether_vlan_header *eh;
 	void *old;
 	uint16_t eth_type;
 
 	if (update_data)
 		memset(parser, 0, sizeof(*parser));
 
 	old = ptr;
 
 	if (is_vxlan) {
 		const struct vxlan_header *vxh;
 		vxh = ptr;
 		ptr = (uint8_t *)ptr + sizeof(*vxh);
 		if (update_data) {
 			parser->data.vxlan_vni =
 			    vxh->vxlh_vni & htonl(0xffffff00);
 		}
 	}
 
 	eh = ptr;
 	if (__predict_false(eh->evl_encap_proto == htons(ETHERTYPE_VLAN))) {
 		eth_type = eh->evl_proto;
 		if (update_data) {
 			/* strip priority and keep VLAN ID only */
 			parser->data.vlan_id = eh->evl_tag & htons(EVL_VLID_MASK);
 		}
 		/* advance to next header */
 		ptr = (uint8_t *)ptr + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		mlen -= (ETHER_HDR_LEN  + ETHER_VLAN_ENCAP_LEN);
 	} else {
 		eth_type = eh->evl_encap_proto;
 		/* advance to next header */
 		mlen -= ETHER_HDR_LEN;
 		ptr = (uint8_t *)ptr + ETHER_HDR_LEN;
 	}
 	if (__predict_false(mlen <= 0))
 		return (NULL);
 	switch (eth_type) {
 #ifdef INET
 	case htons(ETHERTYPE_IP):
 		parser->ip4 = ptr;
 		if (__predict_false(mlen < sizeof(struct ip)))
 			return (NULL);
 		/* Ensure there are no IPv4 options. */
 		if ((parser->ip4->ip_hl << 2) != sizeof (*parser->ip4))
 			break;
 		/* .. and the packet is not fragmented. */
 		if (parser->ip4->ip_off & htons(IP_MF|IP_OFFMASK))
 			break;
 		/* .. and the packet has valid src/dst addrs */
 		if (__predict_false(parser->ip4->ip_src.s_addr == INADDR_ANY ||
 			parser->ip4->ip_dst.s_addr == INADDR_ANY))
 			break;
 		ptr = (uint8_t *)ptr + (parser->ip4->ip_hl << 2);
 		mlen -= sizeof(struct ip);
 		if (update_data) {
 			parser->data.s_addr.v4 = parser->ip4->ip_src;
 			parser->data.d_addr.v4 = parser->ip4->ip_dst;
 		}
 		switch (parser->ip4->ip_p) {
 		case IPPROTO_UDP:
 			if (__predict_false(mlen < sizeof(struct udphdr)))
 				return (NULL);
 			parser->udp = ptr;
 			if (update_data) {
 				parser->data.lro_type = LRO_TYPE_IPV4_UDP;
 				parser->data.s_port = parser->udp->uh_sport;
 				parser->data.d_port = parser->udp->uh_dport;
 			} else {
 				MPASS(parser->data.lro_type == LRO_TYPE_IPV4_UDP);
 			}
 			ptr = ((uint8_t *)ptr + sizeof(*parser->udp));
 			parser->total_hdr_len = (uint8_t *)ptr - (uint8_t *)old;
 			return (ptr);
 		case IPPROTO_TCP:
 			parser->tcp = ptr;
 			if (__predict_false(mlen < sizeof(struct tcphdr)))
 				return (NULL);
 			if (update_data) {
 				parser->data.lro_type = LRO_TYPE_IPV4_TCP;
 				parser->data.s_port = parser->tcp->th_sport;
 				parser->data.d_port = parser->tcp->th_dport;
 			} else {
 				MPASS(parser->data.lro_type == LRO_TYPE_IPV4_TCP);
 			}
 			if (__predict_false(mlen < (parser->tcp->th_off << 2)))
 				return (NULL);
 			ptr = (uint8_t *)ptr + (parser->tcp->th_off << 2);
 			parser->total_hdr_len = (uint8_t *)ptr - (uint8_t *)old;
 			return (ptr);
 		default:
 			break;
 		}
 		break;
 #endif
 #ifdef INET6
 	case htons(ETHERTYPE_IPV6):
 		parser->ip6 = ptr;
 		if (__predict_false(mlen < sizeof(struct ip6_hdr)))
 			return (NULL);
 		/* Ensure the packet has valid src/dst addrs */
 		if (__predict_false(IN6_IS_ADDR_UNSPECIFIED(&parser->ip6->ip6_src) ||
 			IN6_IS_ADDR_UNSPECIFIED(&parser->ip6->ip6_dst)))
 			return (NULL);
 		ptr = (uint8_t *)ptr + sizeof(*parser->ip6);
 		if (update_data) {
 			parser->data.s_addr.v6 = parser->ip6->ip6_src;
 			parser->data.d_addr.v6 = parser->ip6->ip6_dst;
 		}
 		mlen -= sizeof(struct ip6_hdr);
 		switch (parser->ip6->ip6_nxt) {
 		case IPPROTO_UDP:
 			if (__predict_false(mlen < sizeof(struct udphdr)))
 				return (NULL);
 			parser->udp = ptr;
 			if (update_data) {
 				parser->data.lro_type = LRO_TYPE_IPV6_UDP;
 				parser->data.s_port = parser->udp->uh_sport;
 				parser->data.d_port = parser->udp->uh_dport;
 			} else {
 				MPASS(parser->data.lro_type == LRO_TYPE_IPV6_UDP);
 			}
 			ptr = (uint8_t *)ptr + sizeof(*parser->udp);
 			parser->total_hdr_len = (uint8_t *)ptr - (uint8_t *)old;
 			return (ptr);
 		case IPPROTO_TCP:
 			if (__predict_false(mlen < sizeof(struct tcphdr)))
 				return (NULL);
 			parser->tcp = ptr;
 			if (update_data) {
 				parser->data.lro_type = LRO_TYPE_IPV6_TCP;
 				parser->data.s_port = parser->tcp->th_sport;
 				parser->data.d_port = parser->tcp->th_dport;
 			} else {
 				MPASS(parser->data.lro_type == LRO_TYPE_IPV6_TCP);
 			}
 			if (__predict_false(mlen < (parser->tcp->th_off << 2)))
 				return (NULL);
 			ptr = (uint8_t *)ptr + (parser->tcp->th_off << 2);
 			parser->total_hdr_len = (uint8_t *)ptr - (uint8_t *)old;
 			return (ptr);
 		default:
 			break;
 		}
 		break;
 #endif
 	default:
 		break;
 	}
 	/* Invalid packet - cannot parse */
 	return (NULL);
 }
 
 static const int vxlan_csum = CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
     CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID;
 
 static inline struct lro_parser *
 tcp_lro_parser(struct mbuf *m, struct lro_parser *po, struct lro_parser *pi, bool update_data)
 {
 	void *data_ptr;
 
 	/* Try to parse outer headers first. */
 	data_ptr = tcp_lro_low_level_parser(m->m_data, po, update_data, false, m->m_len);
 	if (data_ptr == NULL || po->total_hdr_len > m->m_len)
 		return (NULL);
 
 	if (update_data) {
 		/* Store VLAN ID, if any. */
 		if (__predict_false(m->m_flags & M_VLANTAG)) {
 			po->data.vlan_id =
 			    htons(m->m_pkthdr.ether_vtag) & htons(EVL_VLID_MASK);
 		}
 		/* Store decrypted flag, if any. */
 		if (__predict_false((m->m_pkthdr.csum_flags &
 		    CSUM_TLS_MASK) == CSUM_TLS_DECRYPTED))
 			po->data.lro_flags |= LRO_FLAG_DECRYPTED;
 	}
 
 	switch (po->data.lro_type) {
 	case LRO_TYPE_IPV4_UDP:
 	case LRO_TYPE_IPV6_UDP:
 		/* Check for VXLAN headers. */
 		if ((m->m_pkthdr.csum_flags & vxlan_csum) != vxlan_csum)
 			break;
 
 		/* Try to parse inner headers. */
 		data_ptr = tcp_lro_low_level_parser(data_ptr, pi, update_data, true,
 						    (m->m_len - ((caddr_t)data_ptr - m->m_data)));
 		if (data_ptr == NULL || (pi->total_hdr_len + po->total_hdr_len) > m->m_len)
 			break;
 
 		/* Verify supported header types. */
 		switch (pi->data.lro_type) {
 		case LRO_TYPE_IPV4_TCP:
 		case LRO_TYPE_IPV6_TCP:
 			return (pi);
 		default:
 			break;
 		}
 		break;
 	case LRO_TYPE_IPV4_TCP:
 	case LRO_TYPE_IPV6_TCP:
 		if (update_data)
 			memset(pi, 0, sizeof(*pi));
 		return (po);
 	default:
 		break;
 	}
 	return (NULL);
 }
 
 static inline int
 tcp_lro_trim_mbuf_chain(struct mbuf *m, const struct lro_parser *po)
 {
 	int len;
 
 	switch (po->data.lro_type) {
 #ifdef INET
 	case LRO_TYPE_IPV4_TCP:
 		len = ((uint8_t *)po->ip4 - (uint8_t *)m->m_data) +
 		    ntohs(po->ip4->ip_len);
 		break;
 #endif
 #ifdef INET6
 	case LRO_TYPE_IPV6_TCP:
 		len = ((uint8_t *)po->ip6 - (uint8_t *)m->m_data) +
 		    ntohs(po->ip6->ip6_plen) + sizeof(*po->ip6);
 		break;
 #endif
 	default:
 		return (TCP_LRO_CANNOT);
 	}
 
 	/*
 	 * If the frame is padded beyond the end of the IP packet,
 	 * then trim the extra bytes off:
 	 */
 	if (__predict_true(m->m_pkthdr.len == len)) {
 		return (0);
 	} else if (m->m_pkthdr.len > len) {
 		m_adj(m, len - m->m_pkthdr.len);
 		return (0);
 	}
 	return (TCP_LRO_CANNOT);
 }
 
 static void
 lro_free_mbuf_chain(struct mbuf *m)
 {
 	struct mbuf *save;
 
 	while (m) {
 		save = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		m_freem(m);
 		m = save;
 	}
 }
 
 void
 tcp_lro_free(struct lro_ctrl *lc)
 {
 	struct lro_entry *le;
 	unsigned x;
 
 	/* reset LRO free list */
 	LIST_INIT(&lc->lro_free);
 
 	/* free active mbufs, if any */
 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
 		tcp_lro_active_remove(le);
 		lro_free_mbuf_chain(le->m_head);
 	}
 
 	/* free hash table */
 	free(lc->lro_hash, M_LRO);
 	lc->lro_hash = NULL;
 	lc->lro_hashsz = 0;
 
 	/* free mbuf array, if any */
 	for (x = 0; x != lc->lro_mbuf_count; x++)
 		m_freem(lc->lro_mbuf_data[x].mb);
 	lc->lro_mbuf_count = 0;
 
 	/* free allocated memory, if any */
 	free(lc->lro_mbuf_data, M_LRO);
 	lc->lro_mbuf_data = NULL;
 }
 
 static uint16_t
 tcp_lro_rx_csum_tcphdr(const struct tcphdr *th)
 {
 	const uint16_t *ptr;
 	uint32_t csum;
 	uint16_t len;
 
 	csum = -th->th_sum;	/* exclude checksum field */
 	len = th->th_off;
 	ptr = (const uint16_t *)th;
 	while (len--) {
 		csum += *ptr;
 		ptr++;
 		csum += *ptr;
 		ptr++;
 	}
 	while (csum > 0xffff)
 		csum = (csum >> 16) + (csum & 0xffff);
 
 	return (csum);
 }
 
 static uint16_t
 tcp_lro_rx_csum_data(const struct lro_parser *pa, uint16_t tcp_csum)
 {
 	uint32_t c;
 	uint16_t cs;
 
 	c = tcp_csum;
 
 	switch (pa->data.lro_type) {
 #ifdef INET6
 	case LRO_TYPE_IPV6_TCP:
 		/* Compute full pseudo IPv6 header checksum. */
 		cs = in6_cksum_pseudo(pa->ip6, ntohs(pa->ip6->ip6_plen), pa->ip6->ip6_nxt, 0);
 		break;
 #endif
 #ifdef INET
 	case LRO_TYPE_IPV4_TCP:
 		/* Compute full pseudo IPv4 header checsum. */
 		cs = in_addword(ntohs(pa->ip4->ip_len) - sizeof(*pa->ip4), IPPROTO_TCP);
 		cs = in_pseudo(pa->ip4->ip_src.s_addr, pa->ip4->ip_dst.s_addr, htons(cs));
 		break;
 #endif
 	default:
 		cs = 0;		/* Keep compiler happy. */
 		break;
 	}
 
 	/* Complement checksum. */
 	cs = ~cs;
 	c += cs;
 
 	/* Remove TCP header checksum. */
 	cs = ~tcp_lro_rx_csum_tcphdr(pa->tcp);
 	c += cs;
 
 	/* Compute checksum remainder. */
 	while (c > 0xffff)
 		c = (c >> 16) + (c & 0xffff);
 
 	return (c);
 }
 
 static void
 tcp_lro_rx_done(struct lro_ctrl *lc)
 {
 	struct lro_entry *le;
 
 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
 		tcp_lro_active_remove(le);
 		tcp_lro_flush(lc, le);
 	}
 }
 
 static void
 tcp_lro_flush_active(struct lro_ctrl *lc)
 {
 	struct lro_entry *le;
 
 	/*
 	 * Walk through the list of le entries, and
 	 * any one that does have packets flush. This
 	 * is called because we have an inbound packet
 	 * (e.g. SYN) that has to have all others flushed
 	 * in front of it. Note we have to do the remove
 	 * because tcp_lro_flush() assumes that the entry
 	 * is being freed. This is ok it will just get
 	 * reallocated again like it was new.
 	 */
 	LIST_FOREACH(le, &lc->lro_active, next) {
 		if (le->m_head != NULL) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 		}
 	}
 }
 
 void
 tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
 {
 	struct lro_entry *le, *le_tmp;
 	uint64_t now, tov;
 	struct bintime bt;
 
 	NET_EPOCH_ASSERT();
 	if (LIST_EMPTY(&lc->lro_active))
 		return;
 
 	/* get timeout time and current time in ns */
 	binuptime(&bt);
 	now = bintime2ns(&bt);
 	tov = ((timeout->tv_sec * 1000000000) + (timeout->tv_usec * 1000));
 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
 		if (now >= (bintime2ns(&le->alloc_time) + tov)) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 		}
 	}
 }
 
 #ifdef INET
 static int
 tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4)
 {
 	uint16_t csum;
 
 	/* Legacy IP has a header checksum that needs to be correct. */
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		if (__predict_false((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0)) {
 			lc->lro_bad_csum++;
 			return (TCP_LRO_CANNOT);
 		}
 	} else {
 		csum = in_cksum_hdr(ip4);
 		if (__predict_false(csum != 0)) {
 			lc->lro_bad_csum++;
 			return (TCP_LRO_CANNOT);
 		}
 	}
 	return (0);
 }
 #endif
 
 static inline void
 tcp_lro_assign_and_checksum_16(uint16_t *ptr, uint16_t value, uint16_t *psum)
 {
 	uint32_t csum;
 
 	csum = 0xffff - *ptr + value;
 	while (csum > 0xffff)
 		csum = (csum >> 16) + (csum & 0xffff);
 	*ptr = value;
 	*psum = csum;
 }
 
 static uint16_t
 tcp_lro_update_checksum(const struct lro_parser *pa, const struct lro_entry *le,
     uint16_t payload_len, uint16_t delta_sum)
 {
 	uint32_t csum;
 	uint16_t tlen;
 	uint16_t temp[5] = {};
 
 	switch (pa->data.lro_type) {
 	case LRO_TYPE_IPV4_TCP:
 		/* Compute new IPv4 length. */
 		tlen = (pa->ip4->ip_hl << 2) + (pa->tcp->th_off << 2) + payload_len;
 		tcp_lro_assign_and_checksum_16(&pa->ip4->ip_len, htons(tlen), &temp[0]);
 
 		/* Subtract delta from current IPv4 checksum. */
 		csum = pa->ip4->ip_sum + 0xffff - temp[0];
 		while (csum > 0xffff)
 			csum = (csum >> 16) + (csum & 0xffff);
 		tcp_lro_assign_and_checksum_16(&pa->ip4->ip_sum, csum, &temp[1]);
 		goto update_tcp_header;
 
 	case LRO_TYPE_IPV6_TCP:
 		/* Compute new IPv6 length. */
 		tlen = (pa->tcp->th_off << 2) + payload_len;
 		tcp_lro_assign_and_checksum_16(&pa->ip6->ip6_plen, htons(tlen), &temp[0]);
 		goto update_tcp_header;
 
 	case LRO_TYPE_IPV4_UDP:
 		/* Compute new IPv4 length. */
 		tlen = (pa->ip4->ip_hl << 2) + sizeof(*pa->udp) + payload_len;
 		tcp_lro_assign_and_checksum_16(&pa->ip4->ip_len, htons(tlen), &temp[0]);
 
 		/* Subtract delta from current IPv4 checksum. */
 		csum = pa->ip4->ip_sum + 0xffff - temp[0];
 		while (csum > 0xffff)
 			csum = (csum >> 16) + (csum & 0xffff);
 		tcp_lro_assign_and_checksum_16(&pa->ip4->ip_sum, csum, &temp[1]);
 		goto update_udp_header;
 
 	case LRO_TYPE_IPV6_UDP:
 		/* Compute new IPv6 length. */
 		tlen = sizeof(*pa->udp) + payload_len;
 		tcp_lro_assign_and_checksum_16(&pa->ip6->ip6_plen, htons(tlen), &temp[0]);
 		goto update_udp_header;
 
 	default:
 		return (0);
 	}
 
 update_tcp_header:
 	/* Compute current TCP header checksum. */
 	temp[2] = tcp_lro_rx_csum_tcphdr(pa->tcp);
 
 	/* Incorporate the latest ACK into the TCP header. */
 	pa->tcp->th_ack = le->ack_seq;
 	pa->tcp->th_win = le->window;
 
 	/* Incorporate latest timestamp into the TCP header. */
 	if (le->timestamp != 0) {
 		uint32_t *ts_ptr;
 
 		ts_ptr = (uint32_t *)(pa->tcp + 1);
 		ts_ptr[1] = htonl(le->tsval);
 		ts_ptr[2] = le->tsecr;
 	}
 
 	/* Compute new TCP header checksum. */
 	temp[3] = tcp_lro_rx_csum_tcphdr(pa->tcp);
 
 	/* Compute new TCP checksum. */
 	csum = pa->tcp->th_sum + 0xffff - delta_sum +
 	    0xffff - temp[0] + 0xffff - temp[3] + temp[2];
 	while (csum > 0xffff)
 		csum = (csum >> 16) + (csum & 0xffff);
 
 	/* Assign new TCP checksum. */
 	tcp_lro_assign_and_checksum_16(&pa->tcp->th_sum, csum, &temp[4]);
 
 	/* Compute all modififications affecting next checksum. */
 	csum = temp[0] + temp[1] + 0xffff - temp[2] +
 	    temp[3] + temp[4] + delta_sum;
 	while (csum > 0xffff)
 		csum = (csum >> 16) + (csum & 0xffff);
 
 	/* Return delta checksum to next stage, if any. */
 	return (csum);
 
 update_udp_header:
 	tlen = sizeof(*pa->udp) + payload_len;
 	/* Assign new UDP length and compute checksum delta. */
 	tcp_lro_assign_and_checksum_16(&pa->udp->uh_ulen, htons(tlen), &temp[2]);
 
 	/* Check if there is a UDP checksum. */
 	if (__predict_false(pa->udp->uh_sum != 0)) {
 		/* Compute new UDP checksum. */
 		csum = pa->udp->uh_sum + 0xffff - delta_sum +
 		    0xffff - temp[0] + 0xffff - temp[2];
 		while (csum > 0xffff)
 			csum = (csum >> 16) + (csum & 0xffff);
 		/* Assign new UDP checksum. */
 		tcp_lro_assign_and_checksum_16(&pa->udp->uh_sum, csum, &temp[3]);
 	}
 
 	/* Compute all modififications affecting next checksum. */
 	csum = temp[0] + temp[1] + temp[2] + temp[3] + delta_sum;
 	while (csum > 0xffff)
 		csum = (csum >> 16) + (csum & 0xffff);
 
 	/* Return delta checksum to next stage, if any. */
 	return (csum);
 }
 
 static void
 tcp_flush_out_entry(struct lro_ctrl *lc, struct lro_entry *le)
 {
 	/* Check if we need to recompute any checksums. */
 	if (le->needs_merge) {
 		uint16_t csum;
 
 		switch (le->inner.data.lro_type) {
 		case LRO_TYPE_IPV4_TCP:
 			csum = tcp_lro_update_checksum(&le->inner, le,
 			    le->m_head->m_pkthdr.lro_tcp_d_len,
 			    le->m_head->m_pkthdr.lro_tcp_d_csum);
 			csum = tcp_lro_update_checksum(&le->outer, NULL,
 			    le->m_head->m_pkthdr.lro_tcp_d_len +
 			    le->inner.total_hdr_len, csum);
 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
 			le->m_head->m_pkthdr.csum_data = 0xffff;
 			if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED))
 				le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
 			break;
 		case LRO_TYPE_IPV6_TCP:
 			csum = tcp_lro_update_checksum(&le->inner, le,
 			    le->m_head->m_pkthdr.lro_tcp_d_len,
 			    le->m_head->m_pkthdr.lro_tcp_d_csum);
 			csum = tcp_lro_update_checksum(&le->outer, NULL,
 			    le->m_head->m_pkthdr.lro_tcp_d_len +
 			    le->inner.total_hdr_len, csum);
 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 			    CSUM_PSEUDO_HDR;
 			le->m_head->m_pkthdr.csum_data = 0xffff;
 			if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED))
 				le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
 			break;
 		case LRO_TYPE_NONE:
 			switch (le->outer.data.lro_type) {
 			case LRO_TYPE_IPV4_TCP:
 				csum = tcp_lro_update_checksum(&le->outer, le,
 				    le->m_head->m_pkthdr.lro_tcp_d_len,
 				    le->m_head->m_pkthdr.lro_tcp_d_csum);
 				le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 				    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
 				le->m_head->m_pkthdr.csum_data = 0xffff;
 				if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED))
 					le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
 				break;
 			case LRO_TYPE_IPV6_TCP:
 				csum = tcp_lro_update_checksum(&le->outer, le,
 				    le->m_head->m_pkthdr.lro_tcp_d_len,
 				    le->m_head->m_pkthdr.lro_tcp_d_csum);
 				le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 				    CSUM_PSEUDO_HDR;
 				le->m_head->m_pkthdr.csum_data = 0xffff;
 				if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED))
 					le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
 				break;
 			default:
 				break;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 
 	/*
 	 * Break any chain, this is not set to NULL on the singleton
 	 * case m_nextpkt points to m_head. Other case set them
 	 * m_nextpkt to NULL in push_and_replace.
 	 */
 	le->m_head->m_nextpkt = NULL;
 	lc->lro_queued += le->m_head->m_pkthdr.lro_nsegs;
 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
 }
 
 static void
 tcp_set_entry_to_mbuf(struct lro_ctrl *lc, struct lro_entry *le,
     struct mbuf *m, struct tcphdr *th)
 {
 	uint32_t *ts_ptr;
 	uint16_t tcp_data_len;
 	uint16_t tcp_opt_len;
 
 	ts_ptr = (uint32_t *)(th + 1);
 	tcp_opt_len = (th->th_off << 2);
 	tcp_opt_len -= sizeof(*th);
 
 	/* Check if there is a timestamp option. */
 	if (tcp_opt_len == 0 ||
 	    __predict_false(tcp_opt_len != TCPOLEN_TSTAMP_APPA ||
 	    *ts_ptr != TCP_LRO_TS_OPTION)) {
 		/* We failed to find the timestamp option. */
 		le->timestamp = 0;
 	} else {
 		le->timestamp = 1;
 		le->tsval = ntohl(*(ts_ptr + 1));
 		le->tsecr = *(ts_ptr + 2);
 	}
 
 	tcp_data_len = m->m_pkthdr.lro_tcp_d_len;
 
 	/* Pull out TCP sequence numbers and window size. */
 	le->next_seq = ntohl(th->th_seq) + tcp_data_len;
 	le->ack_seq = th->th_ack;
 	le->window = th->th_win;
 	le->flags = tcp_get_flags(th);
 	le->needs_merge = 0;
 
 	/* Setup new data pointers. */
 	le->m_head = m;
 	le->m_tail = m_last(m);
 }
 
 static void
 tcp_push_and_replace(struct lro_ctrl *lc, struct lro_entry *le, struct mbuf *m)
 {
 	struct lro_parser *pa;
 
 	/*
 	 * Push up the stack of the current entry
 	 * and replace it with "m".
 	 */
 	struct mbuf *msave;
 
 	/* Grab off the next and save it */
 	msave = le->m_head->m_nextpkt;
 	le->m_head->m_nextpkt = NULL;
 
 	/* Now push out the old entry */
 	tcp_flush_out_entry(lc, le);
 
 	/* Re-parse new header, should not fail. */
 	pa = tcp_lro_parser(m, &le->outer, &le->inner, false);
 	KASSERT(pa != NULL,
 	    ("tcp_push_and_replace: LRO parser failed on m=%p\n", m));
 
 	/*
 	 * Now to replace the data properly in the entry
 	 * we have to reset the TCP header and
 	 * other fields.
 	 */
 	tcp_set_entry_to_mbuf(lc, le, m, pa->tcp);
 
 	/* Restore the next list */
 	m->m_nextpkt = msave;
 }
 
 static void
 tcp_lro_mbuf_append_pkthdr(struct lro_entry *le, const struct mbuf *p)
 {
 	struct mbuf *m;
 	uint32_t csum;
 
 	m = le->m_head;
 	if (m->m_pkthdr.lro_nsegs == 1) {
 		/* Compute relative checksum. */
 		csum = p->m_pkthdr.lro_tcp_d_csum;
 	} else {
 		/* Merge TCP data checksums. */
 		csum = (uint32_t)m->m_pkthdr.lro_tcp_d_csum +
 		    (uint32_t)p->m_pkthdr.lro_tcp_d_csum;
 		while (csum > 0xffff)
 			csum = (csum >> 16) + (csum & 0xffff);
 	}
 
 	/* Update various counters. */
 	m->m_pkthdr.len += p->m_pkthdr.lro_tcp_d_len;
 	m->m_pkthdr.lro_tcp_d_csum = csum;
 	m->m_pkthdr.lro_tcp_d_len += p->m_pkthdr.lro_tcp_d_len;
 	m->m_pkthdr.lro_nsegs += p->m_pkthdr.lro_nsegs;
 	le->needs_merge = 1;
 }
 
 static void
 tcp_lro_condense(struct lro_ctrl *lc, struct lro_entry *le)
 {
 	/*
 	 * Walk through the mbuf chain we
 	 * have on tap and compress/condense
 	 * as required.
 	 */
 	uint32_t *ts_ptr;
 	struct mbuf *m;
 	struct tcphdr *th;
 	uint32_t tcp_data_len_total;
 	uint32_t tcp_data_seg_total;
 	uint16_t tcp_data_len;
 	uint16_t tcp_opt_len;
 
 	/*
 	 * First we must check the lead (m_head)
 	 * we must make sure that it is *not*
 	 * something that should be sent up
 	 * right away (sack etc).
 	 */
 again:
 	m = le->m_head->m_nextpkt;
 	if (m == NULL) {
 		/* Just one left. */
 		return;
 	}
 
 	th = tcp_lro_get_th(m);
 	tcp_opt_len = (th->th_off << 2);
 	tcp_opt_len -= sizeof(*th);
 	ts_ptr = (uint32_t *)(th + 1);
 
 	if (tcp_opt_len != 0 && __predict_false(tcp_opt_len != TCPOLEN_TSTAMP_APPA ||
 	    *ts_ptr != TCP_LRO_TS_OPTION)) {
 		/*
 		 * Its not the timestamp. We can't
 		 * use this guy as the head.
 		 */
 		le->m_head->m_nextpkt = m->m_nextpkt;
 		tcp_push_and_replace(lc, le, m);
 		goto again;
 	}
 	if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH)) != 0) {
 		/*
 		 * Make sure that previously seen segments/ACKs are delivered
 		 * before this segment, e.g. FIN.
 		 */
 		le->m_head->m_nextpkt = m->m_nextpkt;
 		tcp_push_and_replace(lc, le, m);
 		goto again;
 	}
 	while((m = le->m_head->m_nextpkt) != NULL) {
 		/*
 		 * condense m into le, first
 		 * pull m out of the list.
 		 */
 		le->m_head->m_nextpkt = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		/* Setup my data */
 		tcp_data_len = m->m_pkthdr.lro_tcp_d_len;
 		th = tcp_lro_get_th(m);
 		ts_ptr = (uint32_t *)(th + 1);
 		tcp_opt_len = (th->th_off << 2);
 		tcp_opt_len -= sizeof(*th);
 		tcp_data_len_total = le->m_head->m_pkthdr.lro_tcp_d_len + tcp_data_len;
 		tcp_data_seg_total = le->m_head->m_pkthdr.lro_nsegs + m->m_pkthdr.lro_nsegs;
 
 		if (tcp_data_seg_total >= lc->lro_ackcnt_lim ||
 		    tcp_data_len_total >= lc->lro_length_lim) {
 			/* Flush now if appending will result in overflow. */
 			tcp_push_and_replace(lc, le, m);
 			goto again;
 		}
 		if (tcp_opt_len != 0 &&
 		    __predict_false(tcp_opt_len != TCPOLEN_TSTAMP_APPA ||
 		    *ts_ptr != TCP_LRO_TS_OPTION)) {
 			/*
 			 * Maybe a sack in the new one? We need to
 			 * start all over after flushing the
 			 * current le. We will go up to the beginning
 			 * and flush it (calling the replace again possibly
 			 * or just returning).
 			 */
 			tcp_push_and_replace(lc, le, m);
 			goto again;
 		}
 		if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH)) != 0) {
 			tcp_push_and_replace(lc, le, m);
 			goto again;
 		}
 		if (tcp_opt_len != 0) {
 			uint32_t tsval = ntohl(*(ts_ptr + 1));
 			/* Make sure timestamp values are increasing. */
 			if (TSTMP_GT(le->tsval, tsval))  {
 				tcp_push_and_replace(lc, le, m);
 				goto again;
 			}
 			le->tsval = tsval;
 			le->tsecr = *(ts_ptr + 2);
 		}
 		/* Try to append the new segment. */
 		if (__predict_false(ntohl(th->th_seq) != le->next_seq ||
 				    ((tcp_get_flags(th) & TH_ACK) !=
 				      (le->flags & TH_ACK)) ||
 				    (tcp_data_len == 0 &&
 				     le->ack_seq == th->th_ack &&
 				     le->window == th->th_win))) {
 			/* Out of order packet, non-ACK + ACK or dup ACK. */
 			tcp_push_and_replace(lc, le, m);
 			goto again;
 		}
 		if (tcp_data_len != 0 ||
 		    SEQ_GT(ntohl(th->th_ack), ntohl(le->ack_seq))) {
 			le->next_seq += tcp_data_len;
 			le->ack_seq = th->th_ack;
 			le->window = th->th_win;
 			le->needs_merge = 1;
 		} else if (th->th_ack == le->ack_seq) {
 			if (WIN_GT(th->th_win, le->window)) {
 				le->window = th->th_win;
 				le->needs_merge = 1;
 			}
 		}
 
 		if (tcp_data_len == 0) {
 			m_freem(m);
 			continue;
 		}
 
 		/* Merge TCP data checksum and length to head mbuf. */
 		tcp_lro_mbuf_append_pkthdr(le, m);
 
 		/*
 		 * Adjust the mbuf so that m_data points to the first byte of
 		 * the ULP payload.  Adjust the mbuf to avoid complications and
 		 * append new segment to existing mbuf chain.
 		 */
 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
 		m_demote_pkthdr(m);
 		le->m_tail->m_next = m;
 		le->m_tail = m_last(m);
 	}
 }
 
 void
 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
 {
 
 	/* Only optimise if there are multiple packets waiting. */
 	NET_EPOCH_ASSERT();
 	if (tcp_lro_flush_tcphpts == NULL ||
 	    tcp_lro_flush_tcphpts(lc, le) != 0) {
 		tcp_lro_condense(lc, le);
 		tcp_flush_out_entry(lc, le);
 	}
 	lc->lro_flushed++;
 	bzero(le, sizeof(*le));
 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
 }
 
 #define	tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1))
 
 /*
  * The tcp_lro_sort() routine is comparable to qsort(), except it has
  * a worst case complexity limit of O(MIN(N,64)*N), where N is the
  * number of elements to sort and 64 is the number of sequence bits
  * available. The algorithm is bit-slicing the 64-bit sequence number,
  * sorting one bit at a time from the most significant bit until the
  * least significant one, skipping the constant bits. This is
  * typically called a radix sort.
  */
 static void
 tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size)
 {
 	struct lro_mbuf_sort temp;
 	uint64_t ones;
 	uint64_t zeros;
 	uint32_t x;
 	uint32_t y;
 
 repeat:
 	/* for small arrays insertion sort is faster */
 	if (size <= 12) {
 		for (x = 1; x < size; x++) {
 			temp = parray[x];
 			for (y = x; y > 0 && temp.seq < parray[y - 1].seq; y--)
 				parray[y] = parray[y - 1];
 			parray[y] = temp;
 		}
 		return;
 	}
 
 	/* compute sequence bits which are constant */
 	ones = 0;
 	zeros = 0;
 	for (x = 0; x != size; x++) {
 		ones |= parray[x].seq;
 		zeros |= ~parray[x].seq;
 	}
 
 	/* compute bits which are not constant into "ones" */
 	ones &= zeros;
 	if (ones == 0)
 		return;
 
 	/* pick the most significant bit which is not constant */
 	ones = tcp_lro_msb_64(ones);
 
 	/*
 	 * Move entries having cleared sequence bits to the beginning
 	 * of the array:
 	 */
 	for (x = y = 0; y != size; y++) {
 		/* skip set bits */
 		if (parray[y].seq & ones)
 			continue;
 		/* swap entries */
 		temp = parray[x];
 		parray[x] = parray[y];
 		parray[y] = temp;
 		x++;
 	}
 
 	KASSERT(x != 0 && x != size, ("Memory is corrupted\n"));
 
 	/* sort zeros */
 	tcp_lro_sort(parray, x);
 
 	/* sort ones */
 	parray += x;
 	size -= x;
 	goto repeat;
 }
 
 void
 tcp_lro_flush_all(struct lro_ctrl *lc)
 {
 	uint64_t seq;
 	uint64_t nseq;
 	unsigned x;
 
 	NET_EPOCH_ASSERT();
 	/* check if no mbufs to flush */
 	if (lc->lro_mbuf_count == 0)
 		goto done;
 	if (lc->lro_cpu_is_set == 0) {
 		if (lc->lro_last_cpu == curcpu) {
 			lc->lro_cnt_of_same_cpu++;
 			/* Have we reached the threshold to declare a cpu? */
 			if (lc->lro_cnt_of_same_cpu > tcp_lro_cpu_set_thresh)
 				lc->lro_cpu_is_set = 1;
 		} else {
 			lc->lro_last_cpu = curcpu;
 			lc->lro_cnt_of_same_cpu = 0;
 		}
 	}
 	CURVNET_SET(lc->ifp->if_vnet);
 
 	/* get current time */
 	binuptime(&lc->lro_last_queue_time);
 
 	/* sort all mbufs according to stream */
 	tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
 
 	/* input data into LRO engine, stream by stream */
 	seq = 0;
 	for (x = 0; x != lc->lro_mbuf_count; x++) {
 		struct mbuf *mb;
 
 		/* get mbuf */
 		mb = lc->lro_mbuf_data[x].mb;
 
 		/* get sequence number, masking away the packet index */
 		nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24);
 
 		/* check for new stream */
 		if (seq != nseq) {
 			seq = nseq;
 
 			/* flush active streams */
 			tcp_lro_rx_done(lc);
 		}
 
 		/* add packet to LRO engine */
 		if (tcp_lro_rx_common(lc, mb, 0, false) != 0) {
  			/* Flush anything we have acummulated */
  			tcp_lro_flush_active(lc);
 			/* input packet to network layer */
 			(*lc->ifp->if_input)(lc->ifp, mb);
 			lc->lro_queued++;
 			lc->lro_flushed++;
 		}
 	}
 	CURVNET_RESTORE();
 done:
 	/* flush active streams */
 	tcp_lro_rx_done(lc);
-	if (tcp_hpts_softclock != NULL)
-		tcp_hpts_softclock();
+	tcp_hpts_softclock();
 	lc->lro_mbuf_count = 0;
 }
 
 static struct lro_head *
 tcp_lro_rx_get_bucket(struct lro_ctrl *lc, struct mbuf *m, struct lro_parser *parser)
 {
 	u_long hash;
 
 	if (M_HASHTYPE_ISHASH(m)) {
 		hash = m->m_pkthdr.flowid;
 	} else {
 		for (unsigned i = hash = 0; i != LRO_RAW_ADDRESS_MAX; i++)
 			hash += parser->data.raw[i];
 	}
 	return (&lc->lro_hash[hash % lc->lro_hashsz]);
 }
 
 static int
 tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_hash)
 {
 	struct lro_parser pi;	/* inner address data */
 	struct lro_parser po;	/* outer address data */
 	struct lro_parser *pa;	/* current parser for TCP stream */
 	struct lro_entry *le;
 	struct lro_head *bucket;
 	struct tcphdr *th;
 	int tcp_data_len;
 	int tcp_opt_len;
 	int error;
 	uint16_t tcp_data_sum;
 
 #ifdef INET
 	/* Quickly decide if packet cannot be LRO'ed */
 	if (__predict_false(V_ipforwarding != 0))
 		return (TCP_LRO_CANNOT);
 #endif
 #ifdef INET6
 	/* Quickly decide if packet cannot be LRO'ed */
 	if (__predict_false(V_ip6_forwarding != 0))
 		return (TCP_LRO_CANNOT);
 #endif
 	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
 	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
 	    (m->m_pkthdr.csum_data != 0xffff)) {
 		/* 
 		 * The checksum either did not have hardware offload
 		 * or it was a bad checksum. We can't LRO such
 		 * a packet.
 		 */
 		counter_u64_add(tcp_bad_csums, 1);
 		return (TCP_LRO_CANNOT);
 	}
 	/* We expect a contiguous header [eh, ip, tcp]. */
 	pa = tcp_lro_parser(m, &po, &pi, true);
 	if (__predict_false(pa == NULL))
 		return (TCP_LRO_NOT_SUPPORTED);
 
 	/* We don't expect any padding. */
 	error = tcp_lro_trim_mbuf_chain(m, pa);
 	if (__predict_false(error != 0))
 		return (error);
 
 #ifdef INET
 	switch (pa->data.lro_type) {
 	case LRO_TYPE_IPV4_TCP:
 		error = tcp_lro_rx_ipv4(lc, m, pa->ip4);
 		if (__predict_false(error != 0))
 			return (error);
 		break;
 	default:
 		break;
 	}
 #endif
 	/* If no hardware or arrival stamp on the packet add timestamp */
 	if ((m->m_flags & (M_TSTMP_LRO | M_TSTMP)) == 0) {
 		m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); 
 		m->m_flags |= M_TSTMP_LRO;
 	}
 
 	/* Get pointer to TCP header. */
 	th = pa->tcp;
 
 	/* Don't process SYN packets. */
 	if (__predict_false(tcp_get_flags(th) & TH_SYN))
 		return (TCP_LRO_CANNOT);
 
 	/* Get total TCP header length and compute payload length. */
 	tcp_opt_len = (th->th_off << 2);
 	tcp_data_len = m->m_pkthdr.len - ((uint8_t *)th -
 	    (uint8_t *)m->m_data) - tcp_opt_len;
 	tcp_opt_len -= sizeof(*th);
 
 	/* Don't process invalid TCP headers. */
 	if (__predict_false(tcp_opt_len < 0 || tcp_data_len < 0))
 		return (TCP_LRO_CANNOT);
 
 	/* Compute TCP data only checksum. */
 	if (tcp_data_len == 0)
 		tcp_data_sum = 0;	/* no data, no checksum */
 	else if (__predict_false(csum != 0))
 		tcp_data_sum = tcp_lro_rx_csum_data(pa, ~csum);
 	else
 		tcp_data_sum = tcp_lro_rx_csum_data(pa, ~th->th_sum);
 
 	/* Save TCP info in mbuf. */
 	m->m_nextpkt = NULL;
 	m->m_pkthdr.rcvif = lc->ifp;
 	m->m_pkthdr.lro_tcp_d_csum = tcp_data_sum;
 	m->m_pkthdr.lro_tcp_d_len = tcp_data_len;
 	m->m_pkthdr.lro_tcp_h_off = ((uint8_t *)th - (uint8_t *)m->m_data);
 	m->m_pkthdr.lro_nsegs = 1;
 
 	/* Get hash bucket. */
 	if (!use_hash) {
 		bucket = &lc->lro_hash[0];
 	} else {
 		bucket = tcp_lro_rx_get_bucket(lc, m, pa);
 	}
 
 	/* Try to find a matching previous segment. */
 	LIST_FOREACH(le, bucket, hash_next) {
 		/* Compare addresses and ports. */
 		if (lro_address_compare(&po.data, &le->outer.data) == false ||
 		    lro_address_compare(&pi.data, &le->inner.data) == false)
 			continue;
 
 		/* Check if no data and old ACK. */
 		if (tcp_data_len == 0 &&
 		    SEQ_LT(ntohl(th->th_ack), ntohl(le->ack_seq))) {
 			m_freem(m);
 			return (0);
 		}
 
 		/* Mark "m" in the last spot. */
 		le->m_last_mbuf->m_nextpkt = m;
 		/* Now set the tail to "m". */
 		le->m_last_mbuf = m;
 		return (0);
 	}
 
 	/* Try to find an empty slot. */
 	if (LIST_EMPTY(&lc->lro_free))
 		return (TCP_LRO_NO_ENTRIES);
 
 	/* Start a new segment chain. */
 	le = LIST_FIRST(&lc->lro_free);
 	LIST_REMOVE(le, next);
 	tcp_lro_active_insert(lc, bucket, le);
 
 	/* Make sure the headers are set. */
 	le->inner = pi;
 	le->outer = po;
 
 	/* Store time this entry was allocated. */
 	le->alloc_time = lc->lro_last_queue_time;
 
 	tcp_set_entry_to_mbuf(lc, le, m, th);
 
 	/* Now set the tail to "m". */
 	le->m_last_mbuf = m;
 
 	return (0);
 }
 
 int
 tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
 {
 	int error;
 
 	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
 	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
 	    (m->m_pkthdr.csum_data != 0xffff)) {
 		/* 
 		 * The checksum either did not have hardware offload
 		 * or it was a bad checksum. We can't LRO such
 		 * a packet.
 		 */
 		counter_u64_add(tcp_bad_csums, 1);
 		return (TCP_LRO_CANNOT);
 	}
 	/* get current time */
 	binuptime(&lc->lro_last_queue_time);
 	CURVNET_SET(lc->ifp->if_vnet);
 	error = tcp_lro_rx_common(lc, m, csum, true);
 	if (__predict_false(error != 0)) {
 		/*
 		 * Flush anything we have acummulated
 		 * ahead of this packet that can't
 		 * be LRO'd. This preserves order.
 		 */
 		tcp_lro_flush_active(lc);
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 void
 tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
 {
 	NET_EPOCH_ASSERT();
 	/* sanity checks */
 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
 	    lc->lro_mbuf_max == 0)) {
 		/* packet drop */
 		m_freem(mb);
 		return;
 	}
 
 	/* check if packet is not LRO capable */
 	if (__predict_false((lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
 		/* input packet to network layer */
 		(*lc->ifp->if_input) (lc->ifp, mb);
 		return;
 	}
 
  	/* If no hardware or arrival stamp on the packet add timestamp */
  	if ((tcplro_stacks_wanting_mbufq > 0) &&
  	    (tcp_less_accurate_lro_ts == 0) &&
  	    ((mb->m_flags & M_TSTMP) == 0)) {
  		/* Add in an LRO time since no hardware */
  		binuptime(&lc->lro_last_queue_time);
  		mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); 
  		mb->m_flags |= M_TSTMP_LRO;
  	}
 
 	/* create sequence number */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
 	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
 	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
 	    ((uint64_t)lc->lro_mbuf_count);
 
 	/* enter mbuf */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
 
 	/* flush if array is full */
 	if (__predict_false(++lc->lro_mbuf_count == lc->lro_mbuf_max))
 		tcp_lro_flush_all(lc);
 }
 
 /* end */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 2532bc3d9926..06d40481375f 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,565 +1,571 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _SYS_SYSTM_H_
 #define	_SYS_SYSTM_H_
 
 #include <sys/types.h>
 #include <sys/callout.h>
 #include <sys/kassert.h>
 #include <sys/queue.h>
 #include <sys/stdint.h>		/* for people using printf mainly */
 #include <machine/atomic.h>
 #include <machine/cpufunc.h>
 
 __NULLABILITY_PRAGMA_PUSH
 
 #ifdef _KERNEL
 extern int cold;		/* nonzero if we are doing a cold boot */
 extern int suspend_blocked;	/* block suspend due to pending shutdown */
 extern int rebooting;		/* kern_reboot() has been called. */
 extern char version[];		/* system version */
 extern char compiler_version[];	/* compiler version */
 extern char copyright[];	/* system copyright */
 extern int kstack_pages;	/* number of kernel stack pages */
 
 extern u_long pagesizes[];	/* supported page sizes */
 extern long physmem;		/* physical memory */
 extern long realmem;		/* 'real' memory */
 
 extern char *rootdevnames[2];	/* names of possible root devices */
 
 extern int boothowto;		/* reboot flags, from console subsystem */
 extern int bootverbose;		/* nonzero to print verbose messages */
 
 extern int maxusers;		/* system tune hint */
 extern int ngroups_max;		/* max # of supplemental groups */
 extern int vm_guest;		/* Running as virtual machine guest? */
 
 extern u_long maxphys;		/* max raw I/O transfer size */
 
 /*
  * Detected virtual machine guest types. The intention is to expand
  * and/or add to the VM_GUEST_VM type if specific VM functionality is
  * ever implemented (e.g. vendor-specific paravirtualization features).
  * Keep in sync with vm_guest_sysctl_names[].
  */
 enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
 		VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX,
 		VM_GUEST_PARALLELS, VM_LAST };
 
 #endif /* KERNEL */
 
 /*
  * Align variables.
  */
 #define	__read_mostly		__section(".data.read_mostly")
 #define	__read_frequently	__section(".data.read_frequently")
 #define	__exclusive_cache_line	__aligned(CACHE_LINE_SIZE) \
 				    __section(".data.exclusive_cache_line")
 #if defined(_STANDALONE)
 struct ucred;
 #endif
 
 #ifdef _KERNEL
 #include <sys/param.h>		/* MAXCPU */
 #include <sys/pcpu.h>		/* curthread */
 #include <sys/kpilite.h>
 
 extern int osreldate;
 
 extern const void *zero_region;	/* address space maps to a zeroed page	*/
 
 extern int unmapped_buf_allowed;
 
 #ifdef __LP64__
 #define	IOSIZE_MAX		iosize_max()
 #define	DEVFS_IOSIZE_MAX	devfs_iosize_max()
 #else
 #define	IOSIZE_MAX		SSIZE_MAX
 #define	DEVFS_IOSIZE_MAX	SSIZE_MAX
 #endif
 
 /*
  * General function declarations.
  */
 
 struct inpcb;
 struct lock_object;
 struct malloc_type;
 struct mtx;
 struct proc;
 struct socket;
 struct thread;
 struct tty;
 struct ucred;
 struct uio;
 struct _jmp_buf;
 struct trapframe;
 struct eventtimer;
 
 int	setjmp(struct _jmp_buf *) __returns_twice;
 void	longjmp(struct _jmp_buf *, int) __dead2;
 int	dumpstatus(vm_offset_t addr, off_t count);
 int	nullop(void);
 int	eopnotsupp(void);
 int	ureadc(int, struct uio *);
 void	hashdestroy(void *, struct malloc_type *, u_long);
 void	*hashinit(int count, struct malloc_type *type, u_long *hashmask);
 void	*hashinit_flags(int count, struct malloc_type *type,
     u_long *hashmask, int flags);
 #define	HASH_NOWAIT	0x00000001
 #define	HASH_WAITOK	0x00000002
 
 void	*phashinit(int count, struct malloc_type *type, u_long *nentries);
 void	*phashinit_flags(int count, struct malloc_type *type, u_long *nentries,
     int flags);
 
 void	cpu_flush_dcache(void *, size_t);
 void	cpu_rootconf(void);
 void	critical_enter_KBI(void);
 void	critical_exit_KBI(void);
 void	critical_exit_preempt(void);
 void	init_param1(void);
 void	init_param2(long physpages);
 void	init_static_kenv(char *, size_t);
 void	tablefull(const char *);
 
 /*
  * Allocate per-thread "current" state in the linuxkpi
  */
 extern int (*lkpi_alloc_current)(struct thread *, int);
 int linux_alloc_current_noop(struct thread *, int);
 
 #if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET)
 #define critical_enter() critical_enter_KBI()
 #define critical_exit() critical_exit_KBI()
 #else
 static __inline void
 critical_enter(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	td->td_critnest++;
 	atomic_interrupt_fence();
 }
 
 static __inline void
 critical_exit(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));
 	atomic_interrupt_fence();
 	td->td_critnest--;
 	atomic_interrupt_fence();
 	if (__predict_false(td->td_owepreempt))
 		critical_exit_preempt();
 
 }
 #endif
 
 #ifdef  EARLY_PRINTF
 typedef void early_putc_t(int ch);
 extern early_putc_t *early_putc;
 #endif
 int	kvprintf(char const *, void (*)(int, void*), void *, int,
 	    __va_list) __printflike(1, 0);
 void	log(int, const char *, ...) __printflike(2, 3);
 void	log_console(struct uio *);
 void	vlog(int, const char *, __va_list) __printflike(2, 0);
 int	asprintf(char **ret, struct malloc_type *mtp, const char *format, 
 	    ...) __printflike(3, 4);
 int	printf(const char *, ...) __printflike(1, 2);
 int	snprintf(char *, size_t, const char *, ...) __printflike(3, 4);
 int	sprintf(char *buf, const char *, ...) __printflike(2, 3);
 int	uprintf(const char *, ...) __printflike(1, 2);
 int	vprintf(const char *, __va_list) __printflike(1, 0);
 int	vasprintf(char **ret, struct malloc_type *mtp, const char *format,
 	    __va_list ap) __printflike(3, 0);
 int	vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0);
 int	vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0);
 int	vsprintf(char *buf, const char *, __va_list) __printflike(2, 0);
 int	sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3);
 int	vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list)  __scanflike(2, 0);
 long	strtol(const char *, char **, int);
 u_long	strtoul(const char *, char **, int);
 quad_t	strtoq(const char *, char **, int);
 u_quad_t strtouq(const char *, char **, int);
 void	tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4);
 void	vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0);
 void	hexdump(const void *ptr, int length, const char *hdr, int flags);
 #define	HD_COLUMN_MASK	0xff
 #define	HD_DELIM_MASK	0xff00
 #define	HD_OMIT_COUNT	(1 << 16)
 #define	HD_OMIT_HEX	(1 << 17)
 #define	HD_OMIT_CHARS	(1 << 18)
 
 #define ovbcopy(f, t, l) bcopy((f), (t), (l))
 void	explicit_bzero(void * _Nonnull, size_t);
 
 void	*memset(void * _Nonnull buf, int c, size_t len);
 void	*memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 int	memcmp(const void *b1, const void *b2, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 #define	SAN_INTERCEPTOR(func)	\
 	__CONCAT(SAN_INTERCEPTOR_PREFIX, __CONCAT(_, func))
 void	*SAN_INTERCEPTOR(memset)(void *, int, size_t);
 void	*SAN_INTERCEPTOR(memcpy)(void *, const void *, size_t);
 void	*SAN_INTERCEPTOR(memmove)(void *, const void *, size_t);
 int	SAN_INTERCEPTOR(memcmp)(const void *, const void *, size_t);
 #ifndef SAN_RUNTIME
 #define bcopy(from, to, len)	SAN_INTERCEPTOR(memmove)((to), (from), (len))
 #define bzero(buf, len)		SAN_INTERCEPTOR(memset)((buf), 0, (len))
 #define bcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #define memset(buf, c, len)	SAN_INTERCEPTOR(memset)((buf), (c), (len))
 #define memcpy(to, from, len)	SAN_INTERCEPTOR(memcpy)((to), (from), (len))
 #define memmove(dest, src, n)	SAN_INTERCEPTOR(memmove)((dest), (src), (n))
 #define memcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #endif /* !SAN_RUNTIME */
 #else /* !SAN_NEEDS_INTERCEPTORS */
 #define bcopy(from, to, len)	__builtin_memmove((to), (from), (len))
 #define bzero(buf, len)		__builtin_memset((buf), 0, (len))
 #define bcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #define memset(buf, c, len)	__builtin_memset((buf), (c), (len))
 #define memcpy(to, from, len)	__builtin_memcpy((to), (from), (len))
 #define memmove(dest, src, n)	__builtin_memmove((dest), (src), (n))
 #define memcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 void	*memset_early(void * _Nonnull buf, int c, size_t len);
 #define bzero_early(buf, len) memset_early((buf), 0, (len))
 void	*memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 #define bcopy_early(from, to, len) memmove_early((to), (from), (len))
 
 #define	copystr(src, dst, len, outlen)	({			\
 	size_t __r, __len, *__outlen;				\
 								\
 	__len = (len);						\
 	__outlen = (outlen);					\
 	__r = strlcpy((dst), (src), __len);			\
 	if (__outlen != NULL)					\
 		*__outlen = ((__r >= __len) ? __len : __r + 1);	\
 	((__r >= __len) ? ENAMETOOLONG : 0);			\
 })
 
 int	copyinstr(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len,
 	    size_t * __restrict lencopied);
 int	copyin(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len);
 int	copyin_nofault(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len);
 int	copyout(const void * _Nonnull __restrict kaddr,
 	    void * __restrict udaddr, size_t len);
 int	copyout_nofault(const void * _Nonnull __restrict kaddr,
 	    void * __restrict udaddr, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 int	SAN_INTERCEPTOR(copyin)(const void *, void *, size_t);
 int	SAN_INTERCEPTOR(copyinstr)(const void *, void *, size_t, size_t *);
 int	SAN_INTERCEPTOR(copyout)(const void *, void *, size_t);
 #ifndef SAN_RUNTIME
 #define	copyin(u, k, l)		SAN_INTERCEPTOR(copyin)((u), (k), (l))
 #define	copyinstr(u, k, l, lc)	SAN_INTERCEPTOR(copyinstr)((u), (k), (l), (lc))
 #define	copyout(k, u, l)	SAN_INTERCEPTOR(copyout)((k), (u), (l))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 int	fubyte(volatile const void *base);
 long	fuword(volatile const void *base);
 int	fuword16(volatile const void *base);
 int32_t	fuword32(volatile const void *base);
 int64_t	fuword64(volatile const void *base);
 int	fueword(volatile const void *base, long *val);
 int	fueword32(volatile const void *base, int32_t *val);
 int	fueword64(volatile const void *base, int64_t *val);
 int	subyte(volatile void *base, int byte);
 int	suword(volatile void *base, long word);
 int	suword16(volatile void *base, int word);
 int	suword32(volatile void *base, int32_t word);
 int	suword64(volatile void *base, int64_t word);
 uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval);
 u_long	casuword(volatile u_long *p, u_long oldval, u_long newval);
 int	casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
 	    uint32_t newval);
 int	casueword(volatile u_long *p, u_long oldval, u_long *oldvalp,
 	    u_long newval);
 
 #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(KCSAN)
 int	SAN_INTERCEPTOR(fubyte)(volatile const void *base);
 int	SAN_INTERCEPTOR(fuword16)(volatile const void *base);
 int	SAN_INTERCEPTOR(fueword)(volatile const void *base, long *val);
 int	SAN_INTERCEPTOR(fueword32)(volatile const void *base, int32_t *val);
 int	SAN_INTERCEPTOR(fueword64)(volatile const void *base, int64_t *val);
 int	SAN_INTERCEPTOR(subyte)(volatile void *base, int byte);
 int	SAN_INTERCEPTOR(suword)(volatile void *base, long word);
 int	SAN_INTERCEPTOR(suword16)(volatile void *base, int word);
 int	SAN_INTERCEPTOR(suword32)(volatile void *base, int32_t word);
 int	SAN_INTERCEPTOR(suword64)(volatile void *base, int64_t word);
 int	SAN_INTERCEPTOR(casueword32)(volatile uint32_t *base, uint32_t oldval,
 	    uint32_t *oldvalp, uint32_t newval);
 int	SAN_INTERCEPTOR(casueword)(volatile u_long *p, u_long oldval,
 	    u_long *oldvalp, u_long newval);
 #ifndef SAN_RUNTIME
 #define	fubyte(b)		SAN_INTERCEPTOR(fubyte)((b))
 #define	fuword16(b)		SAN_INTERCEPTOR(fuword16)((b))
 #define	fueword(b, v)		SAN_INTERCEPTOR(fueword)((b), (v))
 #define	fueword32(b, v)		SAN_INTERCEPTOR(fueword32)((b), (v))
 #define	fueword64(b, v)		SAN_INTERCEPTOR(fueword64)((b), (v))
 #define	subyte(b, w)		SAN_INTERCEPTOR(subyte)((b), (w))
 #define	suword(b, w)		SAN_INTERCEPTOR(suword)((b), (w))
 #define	suword16(b, w)		SAN_INTERCEPTOR(suword16)((b), (w))
 #define	suword32(b, w)		SAN_INTERCEPTOR(suword32)((b), (w))
 #define	suword64(b, w)		SAN_INTERCEPTOR(suword64)((b), (w))
 #define	casueword32(b, o, p, n)	SAN_INTERCEPTOR(casueword32)((b), (o), (p), (n))
 #define	casueword(b, o, p, n)	SAN_INTERCEPTOR(casueword)((b), (o), (p), (n))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS && !KCSAN */
 
 int	sysbeep(int hertz, sbintime_t duration);
 
 void	hardclock(int cnt, int usermode);
 void	hardclock_sync(int cpu);
 void	statclock(int cnt, int usermode);
 void	profclock(int cnt, int usermode, uintfptr_t pc);
 
 int	hardclockintr(void);
 
 void	startprofclock(struct proc *);
 void	stopprofclock(struct proc *);
 void	cpu_startprofclock(void);
 void	cpu_stopprofclock(void);
 void	suspendclock(void);
 void	resumeclock(void);
 sbintime_t 	cpu_idleclock(void);
 void	cpu_activeclock(void);
 void	cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt);
 void	cpu_et_frequency(struct eventtimer *et, uint64_t newfreq);
 extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
+extern void	(*tcp_hpts_softclock)(void);
+#define	tcp_hpts_softclock()	do {					\
+		if (tcp_hpts_softclock != NULL)				\
+			tcp_hpts_softclock();				\
+} while (0)
+
 char	*kern_getenv(const char *name);
 void	freeenv(char *env);
 int	getenv_int(const char *name, int *data);
 int	getenv_uint(const char *name, unsigned int *data);
 int	getenv_long(const char *name, long *data);
 int	getenv_ulong(const char *name, unsigned long *data);
 int	getenv_string(const char *name, char *data, int size);
 int	getenv_int64(const char *name, int64_t *data);
 int	getenv_uint64(const char *name, uint64_t *data);
 int	getenv_quad(const char *name, quad_t *data);
 int	getenv_bool(const char *name, bool *data);
 bool	getenv_is_true(const char *name);
 bool	getenv_is_false(const char *name);
 int	kern_setenv(const char *name, const char *value);
 int	kern_unsetenv(const char *name);
 int	testenv(const char *name);
 
 int	getenv_array(const char *name, void *data, int size, int *psize,
     int type_size, bool allow_signed);
 #define	GETENV_UNSIGNED	false	/* negative numbers not allowed */
 #define	GETENV_SIGNED	true	/* negative numbers allowed */
 
 typedef uint64_t (cpu_tick_f)(void);
 void set_cputicker(cpu_tick_f *func, uint64_t freq, bool isvariable);
 extern cpu_tick_f *cpu_ticks;
 uint64_t cpu_tickrate(void);
 uint64_t cputick2usec(uint64_t tick);
 
 #include <sys/libkern.h>
 
 /* Initialize the world */
 void	consinit(void);
 void	cpu_initclocks(void);
 void	cpu_initclocks_bsp(void);
 void	cpu_initclocks_ap(void);
 void	usrinfoinit(void);
 
 /* Finalize the world */
 void	kern_reboot(int) __dead2;
 void	shutdown_nice(int);
 
 /* Stubs for obsolete functions that used to be for interrupt management */
 static __inline intrmask_t	splhigh(void)		{ return 0; }
 static __inline intrmask_t	splimp(void)		{ return 0; }
 static __inline intrmask_t	splnet(void)		{ return 0; }
 static __inline intrmask_t	spltty(void)		{ return 0; }
 static __inline void		splx(intrmask_t ipl __unused)	{ return; }
 
 /*
  * Common `proc' functions are declared here so that proc.h can be included
  * less often.
  */
 int	_sleep(const void * _Nonnull chan, struct lock_object *lock, int pri,
 	   const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep(chan, mtx, pri, wmesg, timo)				\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 #define	msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags)		\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr),	\
 	    (flags))
 int	msleep_spin_sbt(const void * _Nonnull chan, struct mtx *mtx,
 	    const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep_spin(chan, mtx, wmesg, timo)				\
 	msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo),	\
 	    0, C_HARDCLOCK)
 int	pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
 	    int flags);
 static __inline int
 pause(const char *wmesg, int timo)
 {
 	return (pause_sbt(wmesg, tick_sbt * timo, 0, C_HARDCLOCK));
 }
 #define	pause_sig(wmesg, timo)						\
 	pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH)
 #define	tsleep(chan, pri, wmesg, timo)					\
 	_sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo),		\
 	    0, C_HARDCLOCK)
 #define	tsleep_sbt(chan, pri, wmesg, bt, pr, flags)			\
 	_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
 void	wakeup(const void *chan);
 void	wakeup_one(const void *chan);
 void	wakeup_any(const void *chan);
 
 /*
  * Common `struct cdev *' stuff are declared here to avoid #include poisoning
  */
 
 struct cdev;
 dev_t dev2udev(struct cdev *x);
 const char *devtoname(struct cdev *cdev);
 
 #ifdef __LP64__
 size_t	devfs_iosize_max(void);
 size_t	iosize_max(void);
 #endif
 
 int poll_no_poll(int events);
 
 /* XXX: Should be void nanodelay(u_int nsec); */
 void	DELAY(int usec);
 
 /* Root mount holdback API */
 struct root_hold_token {
 	int				flags;
 	const char			*who;
 	TAILQ_ENTRY(root_hold_token)	list;
 };
 
 struct root_hold_token *root_mount_hold(const char *identifier);
 void root_mount_hold_token(const char *identifier, struct root_hold_token *h);
 void root_mount_rel(struct root_hold_token *h);
 int root_mounted(void);
 
 /*
  * Unit number allocation API. (kern/subr_unit.c)
  */
 struct unrhdr;
 #define	UNR_NO_MTX	((void *)(uintptr_t)-1)
 struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
 void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex);
 void delete_unrhdr(struct unrhdr *uh);
 void clear_unrhdr(struct unrhdr *uh);
 void clean_unrhdr(struct unrhdr *uh);
 void clean_unrhdrl(struct unrhdr *uh);
 int alloc_unr(struct unrhdr *uh);
 int alloc_unr_specific(struct unrhdr *uh, u_int item);
 int alloc_unrl(struct unrhdr *uh);
 void free_unr(struct unrhdr *uh, u_int item);
 void *create_iter_unr(struct unrhdr *uh);
 int next_iter_unr(void *handle);
 void free_iter_unr(void *handle);
 
 struct unrhdr64 {
         uint64_t	counter;
 };
 
 static __inline void
 new_unrhdr64(struct unrhdr64 *unr64, uint64_t low)
 {
 
 	unr64->counter = low;
 }
 
 static __inline uint64_t
 alloc_unr64(struct unrhdr64 *unr64)
 {
 
 	return (atomic_fetchadd_64(&unr64->counter, 1));
 }
 
 void	intr_prof_stack_use(struct thread *td, struct trapframe *frame);
 
 void counted_warning(unsigned *counter, const char *msg);
 
 /*
  * APIs to manage deprecation and obsolescence.
  */
 void _gone_in(int major, const char *msg);
 void _gone_in_dev(device_t dev, int major, const char *msg);
 #ifdef NO_OBSOLETE_CODE
 #define __gone_ok(m, msg)					 \
 	_Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)),	 \
 	    "Obsolete code: " msg);
 #else
 #define	__gone_ok(m, msg)
 #endif
 #define gone_in(major, msg)		__gone_ok(major, msg) _gone_in(major, msg)
 #define gone_in_dev(dev, major, msg)	__gone_ok(major, msg) _gone_in_dev(dev, major, msg)
 
 #ifdef INVARIANTS
 #define	__diagused
 #else
 #define	__diagused	__unused
 #endif
 
 #ifdef WITNESS
 #define	__witness_used
 #else
 #define	__witness_used	__unused
 #endif
 
 #endif /* _KERNEL */
 
 __NULLABILITY_PRAGMA_POP
 #endif /* !_SYS_SYSTM_H_ */