diff --git a/lib/libkvm/kvm_proc.c b/lib/libkvm/kvm_proc.c
index 5058e86a645b..1af4ce38615f 100644
--- a/lib/libkvm/kvm_proc.c
+++ b/lib/libkvm/kvm_proc.c
@@ -1,788 +1,783 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software developed by the Computer Systems
  * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
  * BG 91-66 and contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 __SCCSID("@(#)kvm_proc.c	8.3 (Berkeley) 9/23/93");
 
 /*
  * Proc traversal interface for kvm.  ps and w are (probably) the exclusive
  * users of this code, so we've factored it out into a separate module.
  * Thus, we keep this grunge out of the other kvm applications (i.e.,
  * most other applications are interested only in open/close/read/nlist).
  */
 
 #include <sys/param.h>
 #define	_WANT_UCRED	/* make ucred.h give us 'struct ucred' */
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_task.h>
 #include <sys/cpuset.h>
 #include <sys/user.h>
 #include <sys/proc.h>
 #define	_WANT_PRISON	/* make jail.h give us 'struct prison' */
 #include <sys/jail.h>
 #include <sys/exec.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/file.h>
 #include <sys/conf.h>
 #define	_WANT_KW_EXITCODE
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <unistd.h>
 #include <nlist.h>
 #include <kvm.h>
 
 #include <sys/sysctl.h>
 
 #include <limits.h>
 #include <memory.h>
 #include <paths.h>
 
 #include "kvm_private.h"
 
 #define KREAD(kd, addr, obj) \
 	(kvm_read(kd, addr, (char *)(obj), sizeof(*obj)) != sizeof(*obj))
 
 static int ticks;
 static int hz;
 static uint64_t cpu_tick_frequency;
 
 /*
  * From sys/kern/kern_tc.c. Depends on cpu_tick_frequency, which is
  * read/initialized before this function is ever called.
  */
 static uint64_t
 cputick2usec(uint64_t tick)
 {
-
 	if (cpu_tick_frequency == 0)
 		return (0);
-	if (tick > 18446744073709551)		/* floor(2^64 / 1000) */
-		return (tick / (cpu_tick_frequency / 1000000));
-	else if (tick > 18446744073709)	/* floor(2^64 / 1000000) */
-		return ((tick * 1000) / (cpu_tick_frequency / 1000));
-	else
-		return ((tick * 1000000) / cpu_tick_frequency);
+	return ((tick / cpu_tick_frequency) * 1000000ULL) +
+	    ((tick % cpu_tick_frequency) * 1000000ULL) / cpu_tick_frequency;
 }
 
 /*
  * Read proc's from memory file into buffer bp, which has space to hold
  * at most maxcnt procs.
  */
 static int
 kvm_proclist(kvm_t *kd, int what, int arg, struct proc *p,
     struct kinfo_proc *bp, int maxcnt)
 {
 	int cnt = 0;
 	struct kinfo_proc kinfo_proc, *kp;
 	struct pgrp pgrp;
 	struct session sess;
 	struct cdev t_cdev;
 	struct tty tty;
 	struct vmspace vmspace;
 	struct sigacts sigacts;
 #if 0
 	struct pstats pstats;
 #endif
 	struct ucred ucred;
 	struct prison pr;
 	struct thread mtd;
 	struct proc proc;
 	struct proc pproc;
 	struct sysentvec sysent;
 	char svname[KI_EMULNAMELEN];
 	struct thread *td = NULL;
 	bool first_thread;
 
 	kp = &kinfo_proc;
 	kp->ki_structsize = sizeof(kinfo_proc);
 	/*
 	 * Loop on the processes, then threads within the process if requested.
 	 */
 	if (what == KERN_PROC_ALL)
 		what |= KERN_PROC_INC_THREAD;
 	for (; cnt < maxcnt && p != NULL; p = LIST_NEXT(&proc, p_list)) {
 		memset(kp, 0, sizeof *kp);
 		if (KREAD(kd, (u_long)p, &proc)) {
 			_kvm_err(kd, kd->program, "can't read proc at %p", p);
 			return (-1);
 		}
 		if (proc.p_state == PRS_NEW)
 			continue;
 		if (KREAD(kd, (u_long)proc.p_ucred, &ucred) == 0) {
 			kp->ki_ruid = ucred.cr_ruid;
 			kp->ki_svuid = ucred.cr_svuid;
 			kp->ki_rgid = ucred.cr_rgid;
 			kp->ki_svgid = ucred.cr_svgid;
 			kp->ki_cr_flags = ucred.cr_flags;
 			if (ucred.cr_ngroups > KI_NGROUPS) {
 				kp->ki_ngroups = KI_NGROUPS;
 				kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 			} else
 				kp->ki_ngroups = ucred.cr_ngroups;
 			kvm_read(kd, (u_long)ucred.cr_groups, kp->ki_groups,
 			    kp->ki_ngroups * sizeof(gid_t));
 			kp->ki_uid = ucred.cr_uid;
 			if (ucred.cr_prison != NULL) {
 				if (KREAD(kd, (u_long)ucred.cr_prison, &pr)) {
 					_kvm_err(kd, kd->program,
 					    "can't read prison at %p",
 					    ucred.cr_prison);
 					return (-1);
 				}
 				kp->ki_jid = pr.pr_id;
 			}
 		}
 
 		switch(what & ~KERN_PROC_INC_THREAD) {
 
 		case KERN_PROC_GID:
 			if (kp->ki_groups[0] != (gid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_PID:
 			if (proc.p_pid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_RGID:
 			if (kp->ki_rgid != (gid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_UID:
 			if (kp->ki_uid != (uid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_RUID:
 			if (kp->ki_ruid != (uid_t)arg)
 				continue;
 			break;
 		}
 		/*
 		 * We're going to add another proc to the set.  If this
 		 * will overflow the buffer, assume the reason is because
 		 * nprocs (or the proc list) is corrupt and declare an error.
 		 */
 		if (cnt >= maxcnt) {
 			_kvm_err(kd, kd->program, "nprocs corrupt");
 			return (-1);
 		}
 		/*
 		 * gather kinfo_proc
 		 */
 		kp->ki_paddr = p;
 		kp->ki_addr = 0;	/* XXX uarea */
 		/* kp->ki_kstack = proc.p_thread.td_kstack; XXXKSE */
 		kp->ki_args = proc.p_args;
 		kp->ki_numthreads = proc.p_numthreads;
 		kp->ki_tracep = NULL;	/* XXXKIB do not expose ktr_io_params */
 		kp->ki_textvp = proc.p_textvp;
 		kp->ki_fd = proc.p_fd;
 		kp->ki_pd = proc.p_pd;
 		kp->ki_vmspace = proc.p_vmspace;
 		if (proc.p_sigacts != NULL) {
 			if (KREAD(kd, (u_long)proc.p_sigacts, &sigacts)) {
 				_kvm_err(kd, kd->program,
 				    "can't read sigacts at %p", proc.p_sigacts);
 				return (-1);
 			}
 			kp->ki_sigignore = sigacts.ps_sigignore;
 			kp->ki_sigcatch = sigacts.ps_sigcatch;
 		}
 #if 0
 		if ((proc.p_flag & P_INMEM) && proc.p_stats != NULL) {
 			if (KREAD(kd, (u_long)proc.p_stats, &pstats)) {
 				_kvm_err(kd, kd->program,
 				    "can't read stats at %x", proc.p_stats);
 				return (-1);
 			}
 			kp->ki_start = pstats.p_start;
 
 			/*
 			 * XXX: The times here are probably zero and need
 			 * to be calculated from the raw data in p_rux and
 			 * p_crux.
 			 */
 			kp->ki_rusage = pstats.p_ru;
 			kp->ki_childstime = pstats.p_cru.ru_stime;
 			kp->ki_childutime = pstats.p_cru.ru_utime;
 			/* Some callers want child-times in a single value */
 			timeradd(&kp->ki_childstime, &kp->ki_childutime,
 			    &kp->ki_childtime);
 		}
 #endif
 		if (proc.p_oppid)
 			kp->ki_ppid = proc.p_oppid;
 		else if (proc.p_pptr) {
 			if (KREAD(kd, (u_long)proc.p_pptr, &pproc)) {
 				_kvm_err(kd, kd->program,
 				    "can't read pproc at %p", proc.p_pptr);
 				return (-1);
 			}
 			kp->ki_ppid = pproc.p_pid;
 		} else
 			kp->ki_ppid = 0;
 		if (proc.p_pgrp == NULL)
 			goto nopgrp;
 		if (KREAD(kd, (u_long)proc.p_pgrp, &pgrp)) {
 			_kvm_err(kd, kd->program, "can't read pgrp at %p",
 				 proc.p_pgrp);
 			return (-1);
 		}
 		kp->ki_pgid = pgrp.pg_id;
 		kp->ki_jobc = -1;	/* Or calculate?  Arguably not. */
 		if (KREAD(kd, (u_long)pgrp.pg_session, &sess)) {
 			_kvm_err(kd, kd->program, "can't read session at %p",
 				pgrp.pg_session);
 			return (-1);
 		}
 		kp->ki_sid = sess.s_sid;
 		(void)memcpy(kp->ki_login, sess.s_login,
 						sizeof(kp->ki_login));
 		if ((proc.p_flag & P_CONTROLT) && sess.s_ttyp != NULL) {
 			if (KREAD(kd, (u_long)sess.s_ttyp, &tty)) {
 				_kvm_err(kd, kd->program,
 					 "can't read tty at %p", sess.s_ttyp);
 				return (-1);
 			}
 			if (tty.t_dev != NULL) {
 				if (KREAD(kd, (u_long)tty.t_dev, &t_cdev)) {
 					_kvm_err(kd, kd->program,
 						 "can't read cdev at %p",
 						tty.t_dev);
 					return (-1);
 				}
 #if 0
 				kp->ki_tdev = t_cdev.si_udev;
 #else
 				kp->ki_tdev = NODEV;
 #endif
 			}
 			if (tty.t_pgrp != NULL) {
 				if (KREAD(kd, (u_long)tty.t_pgrp, &pgrp)) {
 					_kvm_err(kd, kd->program,
 						 "can't read tpgrp at %p",
 						tty.t_pgrp);
 					return (-1);
 				}
 				kp->ki_tpgid = pgrp.pg_id;
 			} else
 				kp->ki_tpgid = -1;
 			if (tty.t_session != NULL) {
 				if (KREAD(kd, (u_long)tty.t_session, &sess)) {
 					_kvm_err(kd, kd->program,
 					    "can't read session at %p",
 					    tty.t_session);
 					return (-1);
 				}
 				kp->ki_tsid = sess.s_sid;
 			}
 		} else {
 nopgrp:
 			kp->ki_tdev = NODEV;
 		}
 
 		(void)kvm_read(kd, (u_long)proc.p_vmspace,
 		    (char *)&vmspace, sizeof(vmspace));
 		kp->ki_size = vmspace.vm_map.size;
 		/*
 		 * Approximate the kernel's method of calculating
 		 * this field.
 		 */
 #define		pmap_resident_count(pm) ((pm)->pm_stats.resident_count)
 		kp->ki_rssize = pmap_resident_count(&vmspace.vm_pmap);
 		kp->ki_swrss = vmspace.vm_swrss;
 		kp->ki_tsize = vmspace.vm_tsize;
 		kp->ki_dsize = vmspace.vm_dsize;
 		kp->ki_ssize = vmspace.vm_ssize;
 
 		switch (what & ~KERN_PROC_INC_THREAD) {
 
 		case KERN_PROC_PGRP:
 			if (kp->ki_pgid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_SESSION:
 			if (kp->ki_sid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_TTY:
 			if ((proc.p_flag & P_CONTROLT) == 0 ||
 			     kp->ki_tdev != (dev_t)arg)
 				continue;
 			break;
 		}
 		if (proc.p_comm[0] != 0)
 			strlcpy(kp->ki_comm, proc.p_comm, MAXCOMLEN);
 		(void)kvm_read(kd, (u_long)proc.p_sysent, (char *)&sysent,
 		    sizeof(sysent));
 		(void)kvm_read(kd, (u_long)sysent.sv_name, (char *)&svname,
 		    sizeof(svname));
 		if (svname[0] != 0)
 			strlcpy(kp->ki_emul, svname, KI_EMULNAMELEN);
 		kp->ki_runtime = cputick2usec(proc.p_rux.rux_runtime);
 		kp->ki_pid = proc.p_pid;
 		kp->ki_xstat = KW_EXITCODE(proc.p_xexit, proc.p_xsig);
 		kp->ki_acflag = proc.p_acflag;
 		kp->ki_lock = proc.p_lock;
 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 
 		/* Per-thread items; iterate as appropriate. */
 		td = TAILQ_FIRST(&proc.p_threads);
 		for (first_thread = true; cnt < maxcnt && td != NULL &&
 		    (first_thread || (what & KERN_PROC_INC_THREAD));
 		    first_thread = false) {
 			if (proc.p_state != PRS_ZOMBIE) {
 				if (KREAD(kd, (u_long)td, &mtd)) {
 					_kvm_err(kd, kd->program,
 					    "can't read thread at %p", td);
 					return (-1);
 				}
 				if (what & KERN_PROC_INC_THREAD)
 					td = TAILQ_NEXT(&mtd, td_plist);
 			} else
 				td = NULL;
 			if ((proc.p_state != PRS_ZOMBIE) && mtd.td_wmesg)
 				(void)kvm_read(kd, (u_long)mtd.td_wmesg,
 				    kp->ki_wmesg, WMESGLEN);
 			else
 				memset(kp->ki_wmesg, 0, WMESGLEN);
 			if (proc.p_pgrp == NULL) {
 				kp->ki_kiflag = 0;
 			} else {
 				kp->ki_kiflag = sess.s_ttyvp ? KI_CTTY : 0;
 				if (sess.s_leader == p)
 					kp->ki_kiflag |= KI_SLEADER;
 			}
 			if ((proc.p_state != PRS_ZOMBIE) &&
 			    (mtd.td_blocked != 0)) {
 				kp->ki_kiflag |= KI_LOCKBLOCK;
 				if (mtd.td_lockname)
 					(void)kvm_read(kd,
 					    (u_long)mtd.td_lockname,
 					    kp->ki_lockname, LOCKNAMELEN);
 				else
 					memset(kp->ki_lockname, 0,
 					    LOCKNAMELEN);
 				kp->ki_lockname[LOCKNAMELEN] = 0;
 			} else
 				kp->ki_kiflag &= ~KI_LOCKBLOCK;
 			kp->ki_siglist = proc.p_siglist;
 			if (proc.p_state != PRS_ZOMBIE) {
 				SIGSETOR(kp->ki_siglist, mtd.td_siglist);
 				kp->ki_sigmask = mtd.td_sigmask;
 				kp->ki_swtime = (ticks - proc.p_swtick) / hz;
 				kp->ki_flag = proc.p_flag;
 				kp->ki_sflag = 0;
 				kp->ki_nice = proc.p_nice;
 				kp->ki_traceflag = proc.p_traceflag;
 				if (proc.p_state == PRS_NORMAL) {
 					if (TD_ON_RUNQ(&mtd) ||
 					    TD_CAN_RUN(&mtd) ||
 					    TD_IS_RUNNING(&mtd)) {
 						kp->ki_stat = SRUN;
 					} else if (TD_GET_STATE(&mtd) ==
 					    TDS_INHIBITED) {
 						if (P_SHOULDSTOP(&proc)) {
 							kp->ki_stat = SSTOP;
 						} else if (
 						    TD_IS_SLEEPING(&mtd)) {
 							kp->ki_stat = SSLEEP;
 						} else if (TD_ON_LOCK(&mtd)) {
 							kp->ki_stat = SLOCK;
 						} else {
 							kp->ki_stat = SWAIT;
 						}
 					}
 				} else {
 					kp->ki_stat = SIDL;
 				}
 				/* Stuff from the thread */
 				kp->ki_pri.pri_level = mtd.td_priority;
 				kp->ki_pri.pri_native = mtd.td_base_pri;
 				kp->ki_lastcpu = mtd.td_lastcpu;
 				kp->ki_wchan = mtd.td_wchan;
 				kp->ki_oncpu = mtd.td_oncpu;
 				if (mtd.td_name[0] != '\0')
 					strlcpy(kp->ki_tdname, mtd.td_name,
 					    sizeof(kp->ki_tdname));
 				else
 					memset(kp->ki_tdname, 0,
 					    sizeof(kp->ki_tdname));
 				kp->ki_pctcpu = 0;
 				kp->ki_rqindex = 0;
 
 				/*
 				 * Note: legacy fields; wraps at NO_CPU_OLD
 				 * or the old max CPU value as appropriate
 				 */
 				if (mtd.td_lastcpu == NOCPU)
 					kp->ki_lastcpu_old = NOCPU_OLD;
 				else if (mtd.td_lastcpu > MAXCPU_OLD)
 					kp->ki_lastcpu_old = MAXCPU_OLD;
 				else
 					kp->ki_lastcpu_old = mtd.td_lastcpu;
 
 				if (mtd.td_oncpu == NOCPU)
 					kp->ki_oncpu_old = NOCPU_OLD;
 				else if (mtd.td_oncpu > MAXCPU_OLD)
 					kp->ki_oncpu_old = MAXCPU_OLD;
 				else
 					kp->ki_oncpu_old = mtd.td_oncpu;
 				kp->ki_tid = mtd.td_tid;
 			} else {
 				memset(&kp->ki_sigmask, 0,
 				    sizeof(kp->ki_sigmask));
 				kp->ki_stat = SZOMB;
 				kp->ki_tid = 0;
 			}
 
 			bcopy(&kinfo_proc, bp, sizeof(kinfo_proc));
 			++bp;
 			++cnt;
 		}
 	}
 	return (cnt);
 }
 
 /*
  * Build proc info array by reading in proc list from a crash dump.
  * Return number of procs read.  maxcnt is the max we will read.
  */
 static int
 kvm_deadprocs(kvm_t *kd, int what, int arg, u_long a_allproc,
     u_long a_zombproc, int maxcnt)
 {
 	struct kinfo_proc *bp = kd->procbase;
 	int acnt, zcnt = 0;
 	struct proc *p;
 
 	if (KREAD(kd, a_allproc, &p)) {
 		_kvm_err(kd, kd->program, "cannot read allproc");
 		return (-1);
 	}
 	acnt = kvm_proclist(kd, what, arg, p, bp, maxcnt);
 	if (acnt < 0)
 		return (acnt);
 
 	if (a_zombproc != 0) {
 		if (KREAD(kd, a_zombproc, &p)) {
 			_kvm_err(kd, kd->program, "cannot read zombproc");
 			return (-1);
 		}
 		zcnt = kvm_proclist(kd, what, arg, p, bp + acnt, maxcnt - acnt);
 		if (zcnt < 0)
 			zcnt = 0;
 	}
 
 	return (acnt + zcnt);
 }
 
 struct kinfo_proc *
 kvm_getprocs(kvm_t *kd, int op, int arg, int *cnt)
 {
 	int mib[4], st, nprocs;
 	size_t size, osize;
 	int temp_op;
 
 	if (kd->procbase != 0) {
 		free((void *)kd->procbase);
 		/*
 		 * Clear this pointer in case this call fails.  Otherwise,
 		 * kvm_close() will free it again.
 		 */
 		kd->procbase = 0;
 	}
 	if (ISALIVE(kd)) {
 		size = 0;
 		mib[0] = CTL_KERN;
 		mib[1] = KERN_PROC;
 		mib[2] = op;
 		mib[3] = arg;
 		temp_op = op & ~KERN_PROC_INC_THREAD;
 		st = sysctl(mib,
 		    temp_op == KERN_PROC_ALL || temp_op == KERN_PROC_PROC ?
 		    3 : 4, NULL, &size, NULL, 0);
 		if (st == -1) {
 			_kvm_syserr(kd, kd->program, "kvm_getprocs");
 			return (0);
 		}
 		/*
 		 * We can't continue with a size of 0 because we pass
 		 * it to realloc() (via _kvm_realloc()), and passing 0
 		 * to realloc() results in undefined behavior.
 		 */
 		if (size == 0) {
 			/*
 			 * XXX: We should probably return an invalid,
 			 * but non-NULL, pointer here so any client
 			 * program trying to dereference it will
 			 * crash.  However, _kvm_freeprocs() calls
 			 * free() on kd->procbase if it isn't NULL,
 			 * and free()'ing a junk pointer isn't good.
 			 * Then again, _kvm_freeprocs() isn't used
 			 * anywhere . . .
 			 */
 			kd->procbase = _kvm_malloc(kd, 1);
 			goto liveout;
 		}
 		do {
 			size += size / 10;
 			kd->procbase = (struct kinfo_proc *)
 			    _kvm_realloc(kd, kd->procbase, size);
 			if (kd->procbase == NULL)
 				return (0);
 			osize = size;
 			st = sysctl(mib, temp_op == KERN_PROC_ALL ||
 			    temp_op == KERN_PROC_PROC ? 3 : 4,
 			    kd->procbase, &size, NULL, 0);
 		} while (st == -1 && errno == ENOMEM && size == osize);
 		if (st == -1) {
 			_kvm_syserr(kd, kd->program, "kvm_getprocs");
 			return (0);
 		}
 		/*
 		 * We have to check the size again because sysctl()
 		 * may "round up" oldlenp if oldp is NULL; hence it
 		 * might've told us that there was data to get when
 		 * there really isn't any.
 		 */
 		if (size > 0 &&
 		    kd->procbase->ki_structsize != sizeof(struct kinfo_proc)) {
 			_kvm_err(kd, kd->program,
 			    "kinfo_proc size mismatch (expected %zu, got %d)",
 			    sizeof(struct kinfo_proc),
 			    kd->procbase->ki_structsize);
 			return (0);
 		}
 liveout:
 		nprocs = size == 0 ? 0 : size / kd->procbase->ki_structsize;
 	} else {
 		struct nlist nl[6], *p;
 		struct nlist nlz[2];
 
 		nl[0].n_name = "_nprocs";
 		nl[1].n_name = "_allproc";
 		nl[2].n_name = "_ticks";
 		nl[3].n_name = "_hz";
 		nl[4].n_name = "_cpu_tick_frequency";
 		nl[5].n_name = 0;
 
 		nlz[0].n_name = "_zombproc";
 		nlz[1].n_name = 0;
 
 		if (!kd->arch->ka_native(kd)) {
 			_kvm_err(kd, kd->program,
 			    "cannot read procs from non-native core");
 			return (0);
 		}
 
 		if (kvm_nlist(kd, nl) != 0) {
 			for (p = nl; p->n_type != 0; ++p)
 				;
 			_kvm_err(kd, kd->program,
 				 "%s: no such symbol", p->n_name);
 			return (0);
 		}
 		(void) kvm_nlist(kd, nlz);	/* attempt to get zombproc */
 		if (KREAD(kd, nl[0].n_value, &nprocs)) {
 			_kvm_err(kd, kd->program, "can't read nprocs");
 			return (0);
 		}
 		/*
 		 * If returning all threads, we don't know how many that
 		 * might be.  Presume that there are, on average, no more
 		 * than 10 threads per process.
 		 */
 		if (op == KERN_PROC_ALL || (op & KERN_PROC_INC_THREAD))
 			nprocs *= 10;		/* XXX */
 		if (KREAD(kd, nl[2].n_value, &ticks)) {
 			_kvm_err(kd, kd->program, "can't read ticks");
 			return (0);
 		}
 		if (KREAD(kd, nl[3].n_value, &hz)) {
 			_kvm_err(kd, kd->program, "can't read hz");
 			return (0);
 		}
 		if (KREAD(kd, nl[4].n_value, &cpu_tick_frequency)) {
 			_kvm_err(kd, kd->program,
 			    "can't read cpu_tick_frequency");
 			return (0);
 		}
 		size = nprocs * sizeof(struct kinfo_proc);
 		kd->procbase = (struct kinfo_proc *)_kvm_malloc(kd, size);
 		if (kd->procbase == NULL)
 			return (0);
 
 		nprocs = kvm_deadprocs(kd, op, arg, nl[1].n_value,
 				      nlz[0].n_value, nprocs);
 		if (nprocs <= 0) {
 			_kvm_freeprocs(kd);
 			nprocs = 0;
 		}
 #ifdef notdef
 		else {
 			size = nprocs * sizeof(struct kinfo_proc);
 			kd->procbase = realloc(kd->procbase, size);
 		}
 #endif
 	}
 	*cnt = nprocs;
 	return (kd->procbase);
 }
 
 void
 _kvm_freeprocs(kvm_t *kd)
 {
 
 	free(kd->procbase);
 	kd->procbase = NULL;
 }
 
 void *
 _kvm_realloc(kvm_t *kd, void *p, size_t n)
 {
 	void *np;
 
 	np = reallocf(p, n);
 	if (np == NULL)
 		_kvm_err(kd, kd->program, "out of memory");
 	return (np);
 }
 
 /*
  * Get the command args or environment.
  */
 static char **
 kvm_argv(kvm_t *kd, const struct kinfo_proc *kp, int env, int nchr)
 {
 	int oid[4];
 	int i;
 	size_t bufsz;
 	static int buflen;
 	static char *buf, *p;
 	static char **bufp;
 	static int argc;
 	char **nbufp;
 
 	if (!ISALIVE(kd)) {
 		_kvm_err(kd, kd->program,
 		    "cannot read user space from dead kernel");
 		return (NULL);
 	}
 
 	if (nchr == 0 || nchr > ARG_MAX)
 		nchr = ARG_MAX;
 	if (buflen == 0) {
 		buf = malloc(nchr);
 		if (buf == NULL) {
 			_kvm_err(kd, kd->program, "cannot allocate memory");
 			return (NULL);
 		}
 		argc = 32;
 		bufp = malloc(sizeof(char *) * argc);
 		if (bufp == NULL) {
 			free(buf);
 			buf = NULL;
 			_kvm_err(kd, kd->program, "cannot allocate memory");
 			return (NULL);
 		}
 		buflen = nchr;
 	} else if (nchr > buflen) {
 		p = realloc(buf, nchr);
 		if (p != NULL) {
 			buf = p;
 			buflen = nchr;
 		}
 	}
 	oid[0] = CTL_KERN;
 	oid[1] = KERN_PROC;
 	oid[2] = env ? KERN_PROC_ENV : KERN_PROC_ARGS;
 	oid[3] = kp->ki_pid;
 	bufsz = buflen;
 	if (sysctl(oid, 4, buf, &bufsz, 0, 0) == -1) {
 		/*
 		 * If the supplied buf is too short to hold the requested
 		 * value the sysctl returns with ENOMEM. The buf is filled
 		 * with the truncated value and the returned bufsz is equal
 		 * to the requested len.
 		 */
 		if (errno != ENOMEM || bufsz != (size_t)buflen)
 			return (NULL);
 		buf[bufsz - 1] = '\0';
 		errno = 0;
 	} else if (bufsz == 0)
 		return (NULL);
 	i = 0;
 	p = buf;
 	do {
 		bufp[i++] = p;
 		p += strlen(p) + 1;
 		if (i >= argc) {
 			argc += argc;
 			nbufp = realloc(bufp, sizeof(char *) * argc);
 			if (nbufp == NULL)
 				return (NULL);
 			bufp = nbufp;
 		}
 	} while (p < buf + bufsz);
 	bufp[i++] = 0;
 	return (bufp);
 }
 
 char **
 kvm_getargv(kvm_t *kd, const struct kinfo_proc *kp, int nchr)
 {
 	return (kvm_argv(kd, kp, 0, nchr));
 }
 
 char **
 kvm_getenvv(kvm_t *kd, const struct kinfo_proc *kp, int nchr)
 {
 	return (kvm_argv(kd, kp, 1, nchr));
 }
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 01b4e8656090..fcdec7a58200 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1,2269 +1,2263 @@
 /*-
  * SPDX-License-Identifier: Beerware
  *
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
  * can do whatever you want with this stuff. If we meet some day, and you think
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
  * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation
  *
  * Portions of this software were developed by Julien Ridoux at the University
  * of Melbourne under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ntp.h"
 #include "opt_ffclock.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/timeffc.h>
 #include <sys/timepps.h>
 #include <sys/timetc.h>
 #include <sys/timex.h>
 #include <sys/vdso.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
  * It is relatively small so that ntp_update_second gets called enough
  * in the typical 'missed a couple of seconds' case, but doesn't loop
  * forever when the time step is large.
  */
 #define LARGE_STEP	200
 
 /*
  * Implement a dummy timecounter which we can use until we get a real one
  * in the air.  This allows the console and other early stuff to use
  * time services.
  */
 
 static u_int
 dummy_get_timecount(struct timecounter *tc)
 {
 	static u_int now;
 
 	return (++now);
 }
 
 static struct timecounter dummy_timecounter = {
 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
 };
 
 struct timehands {
 	/* These fields must be initialized by the driver. */
 	struct timecounter	*th_counter;
 	int64_t			th_adjustment;
 	uint64_t		th_scale;
 	u_int			th_large_delta;
 	u_int	 		th_offset_count;
 	struct bintime		th_offset;
 	struct bintime		th_bintime;
 	struct timeval		th_microtime;
 	struct timespec		th_nanotime;
 	struct bintime		th_boottime;
 	/* Fields not to be copied in tc_windup start with th_generation. */
 	u_int			th_generation;
 	struct timehands	*th_next;
 };
 
 static struct timehands ths[16] = {
     [0] =  {
 	.th_counter = &dummy_timecounter,
 	.th_scale = (uint64_t)-1 / 1000000,
 	.th_large_delta = 1000000,
 	.th_offset = { .sec = 1 },
 	.th_generation = 1,
     },
 };
 
 static struct timehands *volatile timehands = &ths[0];
 struct timecounter *timecounter = &dummy_timecounter;
 static struct timecounter *timecounters = &dummy_timecounter;
 
 /* Mutex to protect the timecounter list. */
 static struct mtx tc_lock;
 
 int tc_min_ticktock_freq = 1;
 
 volatile time_t time_second = 1;
 volatile time_t time_uptime = 1;
 
 /*
  * The system time is always computed by summing the estimated boot time and the
  * system uptime. The timehands track boot time, but it changes when the system
  * time is set by the user, stepped by ntpd or adjusted when resuming. It
  * is set to new_time - uptime.
  */
 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime,
     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_kern_boottime, "S,timeval",
     "Estimated system boottime");
 
 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
 static int timestepwarnings;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RWTUN,
     &timestepwarnings, 0, "Log time steps");
 
 static int timehands_count = 2;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &timehands_count, 0, "Count of timehands in rotation");
 
 struct bintime bt_timethreshold;
 struct bintime bt_tickthreshold;
 sbintime_t sbt_timethreshold;
 sbintime_t sbt_tickthreshold;
 struct bintime tc_tick_bt;
 sbintime_t tc_tick_sbt;
 int tc_precexp;
 int tc_timepercentage = TC_DEFAULTPERC;
 static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
     CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
     sysctl_kern_timecounter_adjprecision, "I",
     "Allowed time interval deviation in percents");
 
 volatile int rtc_generation = 1;
 
 static int tc_chosen;	/* Non-zero if a specific tc was chosen via sysctl. */
 static char tc_from_tunable[16];
 
 static void tc_windup(struct bintime *new_boottimebin);
 static void cpu_tick_calibrate(int);
 
 void dtrace_getnanotime(struct timespec *tsp);
 void dtrace_getnanouptime(struct timespec *tsp);
 
 static int
 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
 {
 	struct timeval boottime;
 
 	getboottime(&boottime);
 
 /* i386 is the only arch which uses a 32bits time_t */
 #ifdef __amd64__
 #ifdef SCTL_MASK32
 	int tv[2];
 
 	if (req->flags & SCTL_MASK32) {
 		tv[0] = boottime.tv_sec;
 		tv[1] = boottime.tv_usec;
 		return (SYSCTL_OUT(req, tv, sizeof(tv)));
 	}
 #endif
 #endif
 	return (SYSCTL_OUT(req, &boottime, sizeof(boottime)));
 }
 
 static int
 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
 {
 	u_int ncount;
 	struct timecounter *tc = arg1;
 
 	ncount = tc->tc_get_timecount(tc);
 	return (sysctl_handle_int(oidp, &ncount, 0, req));
 }
 
 static int
 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t freq;
 	struct timecounter *tc = arg1;
 
 	freq = tc->tc_frequency;
 	return (sysctl_handle_64(oidp, &freq, 0, req));
 }
 
 /*
  * Return the difference between the timehands' counter value now and what
  * was when we copied it to the timehands' offset_count.
  */
 static __inline u_int
 tc_delta(struct timehands *th)
 {
 	struct timecounter *tc;
 
 	tc = th->th_counter;
 	return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
 	    tc->tc_counter_mask);
 }
 
 static __inline void
 bintime_add_tc_delta(struct bintime *bt, uint64_t scale,
     uint64_t large_delta, uint64_t delta)
 {
 	uint64_t x;
 
 	if (__predict_false(delta >= large_delta)) {
 		/* Avoid overflow for scale * delta. */
 		x = (scale >> 32) * delta;
 		bt->sec += x >> 32;
 		bintime_addx(bt, x << 32);
 		bintime_addx(bt, (scale & 0xffffffff) * delta);
 	} else {
 		bintime_addx(bt, scale * delta);
 	}
 }
 
 /*
  * Functions for reading the time.  We have to loop until we are sure that
  * the timehands that we operated on was not updated under our feet.  See
  * the comment in <sys/time.h> for a description of these 12 functions.
  */
 
 static __inline void
 bintime_off(struct bintime *bt, u_int off)
 {
 	struct timehands *th;
 	struct bintime *btp;
 	uint64_t scale;
 	u_int delta, gen, large_delta;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		btp = (struct bintime *)((vm_offset_t)th + off);
 		*bt = *btp;
 		scale = th->th_scale;
 		delta = tc_delta(th);
 		large_delta = th->th_large_delta;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 
 	bintime_add_tc_delta(bt, scale, large_delta, delta);
 }
 #define	GETTHBINTIME(dst, member)					\
 do {									\
 	_Static_assert(_Generic(((struct timehands *)NULL)->member,	\
 	    struct bintime: 1, default: 0) == 1,			\
 	    "struct timehands member is not of struct bintime type");	\
 	bintime_off(dst, __offsetof(struct timehands, member));		\
 } while (0)
 
 static __inline void
 getthmember(void *out, size_t out_size, u_int off)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		memcpy(out, (char *)th + off, out_size);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 #define	GETTHMEMBER(dst, member)					\
 do {									\
 	_Static_assert(_Generic(*dst,					\
 	    __typeof(((struct timehands *)NULL)->member): 1,		\
 	    default: 0) == 1,						\
 	    "*dst and struct timehands member have different types");	\
 	getthmember(dst, sizeof(*dst), __offsetof(struct timehands,	\
 	    member));							\
 } while (0)
 
 #ifdef FFCLOCK
 void
 fbclock_binuptime(struct bintime *bt)
 {
 
 	GETTHBINTIME(bt, th_offset);
 }
 
 void
 fbclock_nanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	fbclock_binuptime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 fbclock_microuptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	fbclock_binuptime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 fbclock_bintime(struct bintime *bt)
 {
 
 	GETTHBINTIME(bt, th_bintime);
 }
 
 void
 fbclock_nanotime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	fbclock_bintime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 fbclock_microtime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	fbclock_bintime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 fbclock_getbinuptime(struct bintime *bt)
 {
 
 	GETTHMEMBER(bt, th_offset);
 }
 
 void
 fbclock_getnanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	GETTHMEMBER(&bt, th_offset);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 fbclock_getmicrouptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	GETTHMEMBER(&bt, th_offset);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 fbclock_getbintime(struct bintime *bt)
 {
 
 	GETTHMEMBER(bt, th_bintime);
 }
 
 void
 fbclock_getnanotime(struct timespec *tsp)
 {
 
 	GETTHMEMBER(tsp, th_nanotime);
 }
 
 void
 fbclock_getmicrotime(struct timeval *tvp)
 {
 
 	GETTHMEMBER(tvp, th_microtime);
 }
 #else /* !FFCLOCK */
 
 void
 binuptime(struct bintime *bt)
 {
 
 	GETTHBINTIME(bt, th_offset);
 }
 
 void
 nanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microuptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 bintime(struct bintime *bt)
 {
 
 	GETTHBINTIME(bt, th_bintime);
 }
 
 void
 nanotime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microtime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 getbinuptime(struct bintime *bt)
 {
 
 	GETTHMEMBER(bt, th_offset);
 }
 
 void
 getnanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	GETTHMEMBER(&bt, th_offset);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 getmicrouptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	GETTHMEMBER(&bt, th_offset);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 getbintime(struct bintime *bt)
 {
 
 	GETTHMEMBER(bt, th_bintime);
 }
 
 void
 getnanotime(struct timespec *tsp)
 {
 
 	GETTHMEMBER(tsp, th_nanotime);
 }
 
 void
 getmicrotime(struct timeval *tvp)
 {
 
 	GETTHMEMBER(tvp, th_microtime);
 }
 #endif /* FFCLOCK */
 
 void
 getboottime(struct timeval *boottime)
 {
 	struct bintime boottimebin;
 
 	getboottimebin(&boottimebin);
 	bintime2timeval(&boottimebin, boottime);
 }
 
 void
 getboottimebin(struct bintime *boottimebin)
 {
 
 	GETTHMEMBER(boottimebin, th_boottime);
 }
 
 #ifdef FFCLOCK
 /*
  * Support for feed-forward synchronization algorithms. This is heavily inspired
  * by the timehands mechanism but kept independent from it. *_windup() functions
  * have some connection to avoid accessing the timecounter hardware more than
  * necessary.
  */
 
 /* Feed-forward clock estimates kept updated by the synchronization daemon. */
 struct ffclock_estimate ffclock_estimate;
 struct bintime ffclock_boottime;	/* Feed-forward boot time estimate. */
 uint32_t ffclock_status;		/* Feed-forward clock status. */
 int8_t ffclock_updated;			/* New estimates are available. */
 struct mtx ffclock_mtx;			/* Mutex on ffclock_estimate. */
 
 struct fftimehands {
 	struct ffclock_estimate	cest;
 	struct bintime		tick_time;
 	struct bintime		tick_time_lerp;
 	ffcounter		tick_ffcount;
 	uint64_t		period_lerp;
 	volatile uint8_t	gen;
 	struct fftimehands	*next;
 };
 
 #define	NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
 
 static struct fftimehands ffth[10];
 static struct fftimehands *volatile fftimehands = ffth;
 
 static void
 ffclock_init(void)
 {
 	struct fftimehands *cur;
 	struct fftimehands *last;
 
 	memset(ffth, 0, sizeof(ffth));
 
 	last = ffth + NUM_ELEMENTS(ffth) - 1;
 	for (cur = ffth; cur < last; cur++)
 		cur->next = cur + 1;
 	last->next = ffth;
 
 	ffclock_updated = 0;
 	ffclock_status = FFCLOCK_STA_UNSYNC;
 	mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF);
 }
 
 /*
  * Reset the feed-forward clock estimates. Called from inittodr() to get things
  * kick started and uses the timecounter nominal frequency as a first period
  * estimate. Note: this function may be called several time just after boot.
  * Note: this is the only function that sets the value of boot time for the
  * monotonic (i.e. uptime) version of the feed-forward clock.
  */
 void
 ffclock_reset_clock(struct timespec *ts)
 {
 	struct timecounter *tc;
 	struct ffclock_estimate cest;
 
 	tc = timehands->th_counter;
 	memset(&cest, 0, sizeof(struct ffclock_estimate));
 
 	timespec2bintime(ts, &ffclock_boottime);
 	timespec2bintime(ts, &(cest.update_time));
 	ffclock_read_counter(&cest.update_ffcount);
 	cest.leapsec_next = 0;
 	cest.period = ((1ULL << 63) / tc->tc_frequency) << 1;
 	cest.errb_abs = 0;
 	cest.errb_rate = 0;
 	cest.status = FFCLOCK_STA_UNSYNC;
 	cest.leapsec_total = 0;
 	cest.leapsec = 0;
 
 	mtx_lock(&ffclock_mtx);
 	bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
 	ffclock_updated = INT8_MAX;
 	mtx_unlock(&ffclock_mtx);
 
 	printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name,
 	    (unsigned long long)tc->tc_frequency, (long)ts->tv_sec,
 	    (unsigned long)ts->tv_nsec);
 }
 
 /*
  * Sub-routine to convert a time interval measured in RAW counter units to time
  * in seconds stored in bintime format.
  * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be
  * larger than the max value of u_int (on 32 bit architecture). Loop to consume
  * extra cycles.
  */
 static void
 ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt)
 {
 	struct bintime bt2;
 	ffcounter delta, delta_max;
 
 	delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1;
 	bintime_clear(bt);
 	do {
 		if (ffdelta > delta_max)
 			delta = delta_max;
 		else
 			delta = ffdelta;
 		bt2.sec = 0;
 		bt2.frac = period;
 		bintime_mul(&bt2, (unsigned int)delta);
 		bintime_add(bt, &bt2);
 		ffdelta -= delta;
 	} while (ffdelta > 0);
 }
 
 /*
  * Update the fftimehands.
  * Push the tick ffcount and time(s) forward based on current clock estimate.
  * The conversion from ffcounter to bintime relies on the difference clock
  * principle, whose accuracy relies on computing small time intervals. If a new
  * clock estimate has been passed by the synchronisation daemon, make it
  * current, and compute the linear interpolation for monotonic time if needed.
  */
 static void
 ffclock_windup(unsigned int delta)
 {
 	struct ffclock_estimate *cest;
 	struct fftimehands *ffth;
 	struct bintime bt, gap_lerp;
 	ffcounter ffdelta;
 	uint64_t frac;
 	unsigned int polling;
 	uint8_t forward_jump, ogen;
 
 	/*
 	 * Pick the next timehand, copy current ffclock estimates and move tick
 	 * times and counter forward.
 	 */
 	forward_jump = 0;
 	ffth = fftimehands->next;
 	ogen = ffth->gen;
 	ffth->gen = 0;
 	cest = &ffth->cest;
 	bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate));
 	ffdelta = (ffcounter)delta;
 	ffth->period_lerp = fftimehands->period_lerp;
 
 	ffth->tick_time = fftimehands->tick_time;
 	ffclock_convert_delta(ffdelta, cest->period, &bt);
 	bintime_add(&ffth->tick_time, &bt);
 
 	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
 	ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt);
 	bintime_add(&ffth->tick_time_lerp, &bt);
 
 	ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta;
 
 	/*
 	 * Assess the status of the clock, if the last update is too old, it is
 	 * likely the synchronisation daemon is dead and the clock is free
 	 * running.
 	 */
 	if (ffclock_updated == 0) {
 		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
 		ffclock_convert_delta(ffdelta, cest->period, &bt);
 		if (bt.sec > 2 * FFCLOCK_SKM_SCALE)
 			ffclock_status |= FFCLOCK_STA_UNSYNC;
 	}
 
 	/*
 	 * If available, grab updated clock estimates and make them current.
 	 * Recompute time at this tick using the updated estimates. The clock
 	 * estimates passed the feed-forward synchronisation daemon may result
 	 * in time conversion that is not monotonically increasing (just after
 	 * the update). time_lerp is a particular linear interpolation over the
 	 * synchronisation algo polling period that ensures monotonicity for the
 	 * clock ids requesting it.
 	 */
 	if (ffclock_updated > 0) {
 		bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate));
 		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
 		ffth->tick_time = cest->update_time;
 		ffclock_convert_delta(ffdelta, cest->period, &bt);
 		bintime_add(&ffth->tick_time, &bt);
 
 		/* ffclock_reset sets ffclock_updated to INT8_MAX */
 		if (ffclock_updated == INT8_MAX)
 			ffth->tick_time_lerp = ffth->tick_time;
 
 		if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >))
 			forward_jump = 1;
 		else
 			forward_jump = 0;
 
 		bintime_clear(&gap_lerp);
 		if (forward_jump) {
 			gap_lerp = ffth->tick_time;
 			bintime_sub(&gap_lerp, &ffth->tick_time_lerp);
 		} else {
 			gap_lerp = ffth->tick_time_lerp;
 			bintime_sub(&gap_lerp, &ffth->tick_time);
 		}
 
 		/*
 		 * The reset from the RTC clock may be far from accurate, and
 		 * reducing the gap between real time and interpolated time
 		 * could take a very long time if the interpolated clock insists
 		 * on strict monotonicity. The clock is reset under very strict
 		 * conditions (kernel time is known to be wrong and
 		 * synchronization daemon has been restarted recently.
 		 * ffclock_boottime absorbs the jump to ensure boot time is
 		 * correct and uptime functions stay consistent.
 		 */
 		if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) &&
 		    ((cest->status & FFCLOCK_STA_UNSYNC) == 0) &&
 		    ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) {
 			if (forward_jump)
 				bintime_add(&ffclock_boottime, &gap_lerp);
 			else
 				bintime_sub(&ffclock_boottime, &gap_lerp);
 			ffth->tick_time_lerp = ffth->tick_time;
 			bintime_clear(&gap_lerp);
 		}
 
 		ffclock_status = cest->status;
 		ffth->period_lerp = cest->period;
 
 		/*
 		 * Compute corrected period used for the linear interpolation of
 		 * time. The rate of linear interpolation is capped to 5000PPM
 		 * (5ms/s).
 		 */
 		if (bintime_isset(&gap_lerp)) {
 			ffdelta = cest->update_ffcount;
 			ffdelta -= fftimehands->cest.update_ffcount;
 			ffclock_convert_delta(ffdelta, cest->period, &bt);
 			polling = bt.sec;
 			bt.sec = 0;
 			bt.frac = 5000000 * (uint64_t)18446744073LL;
 			bintime_mul(&bt, polling);
 			if (bintime_cmp(&gap_lerp, &bt, >))
 				gap_lerp = bt;
 
 			/* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */
 			frac = 0;
 			if (gap_lerp.sec > 0) {
 				frac -= 1;
 				frac /= ffdelta / gap_lerp.sec;
 			}
 			frac += gap_lerp.frac / ffdelta;
 
 			if (forward_jump)
 				ffth->period_lerp += frac;
 			else
 				ffth->period_lerp -= frac;
 		}
 
 		ffclock_updated = 0;
 	}
 	if (++ogen == 0)
 		ogen = 1;
 	ffth->gen = ogen;
 	fftimehands = ffth;
 }
 
 /*
  * Adjust the fftimehands when the timecounter is changed. Stating the obvious,
  * the old and new hardware counter cannot be read simultaneously. tc_windup()
  * does read the two counters 'back to back', but a few cycles are effectively
  * lost, and not accumulated in tick_ffcount. This is a fairly radical
  * operation for a feed-forward synchronization daemon, and it is its job to not
  * pushing irrelevant data to the kernel. Because there is no locking here,
  * simply force to ignore pending or next update to give daemon a chance to
  * realize the counter has changed.
  */
 static void
 ffclock_change_tc(struct timehands *th)
 {
 	struct fftimehands *ffth;
 	struct ffclock_estimate *cest;
 	struct timecounter *tc;
 	uint8_t ogen;
 
 	tc = th->th_counter;
 	ffth = fftimehands->next;
 	ogen = ffth->gen;
 	ffth->gen = 0;
 
 	cest = &ffth->cest;
 	bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate));
 	cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1;
 	cest->errb_abs = 0;
 	cest->errb_rate = 0;
 	cest->status |= FFCLOCK_STA_UNSYNC;
 
 	ffth->tick_ffcount = fftimehands->tick_ffcount;
 	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
 	ffth->tick_time = fftimehands->tick_time;
 	ffth->period_lerp = cest->period;
 
 	/* Do not lock but ignore next update from synchronization daemon. */
 	ffclock_updated--;
 
 	if (++ogen == 0)
 		ogen = 1;
 	ffth->gen = ogen;
 	fftimehands = ffth;
 }
 
 /*
  * Retrieve feed-forward counter and time of last kernel tick.
  */
 void
 ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags)
 {
 	struct fftimehands *ffth;
 	uint8_t gen;
 
 	/*
 	 * No locking but check generation has not changed. Also need to make
 	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
 	 */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP)
 			*bt = ffth->tick_time_lerp;
 		else
 			*bt = ffth->tick_time;
 		*ffcount = ffth->tick_ffcount;
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Absolute clock conversion. Low level function to convert ffcounter to
  * bintime. The ffcounter is converted using the current ffclock period estimate
  * or the "interpolated period" to ensure monotonicity.
  * NOTE: this conversion may have been deferred, and the clock updated since the
  * hardware counter has been read.
  */
 void
 ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags)
 {
 	struct fftimehands *ffth;
 	struct bintime bt2;
 	ffcounter ffdelta;
 	uint8_t gen;
 
 	/*
 	 * No locking but check generation has not changed. Also need to make
 	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
 	 */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		if (ffcount > ffth->tick_ffcount)
 			ffdelta = ffcount - ffth->tick_ffcount;
 		else
 			ffdelta = ffth->tick_ffcount - ffcount;
 
 		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) {
 			*bt = ffth->tick_time_lerp;
 			ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2);
 		} else {
 			*bt = ffth->tick_time;
 			ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2);
 		}
 
 		if (ffcount > ffth->tick_ffcount)
 			bintime_add(bt, &bt2);
 		else
 			bintime_sub(bt, &bt2);
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Difference clock conversion.
  * Low level function to Convert a time interval measured in RAW counter units
  * into bintime. The difference clock allows measuring small intervals much more
  * reliably than the absolute clock.
  */
 void
 ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt)
 {
 	struct fftimehands *ffth;
 	uint8_t gen;
 
 	/* No locking but check generation has not changed. */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		ffclock_convert_delta(ffdelta, ffth->cest.period, bt);
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Access to current ffcounter value.
  */
 void
 ffclock_read_counter(ffcounter *ffcount)
 {
 	struct timehands *th;
 	struct fftimehands *ffth;
 	unsigned int gen, delta;
 
 	/*
 	 * ffclock_windup() called from tc_windup(), safe to rely on
 	 * th->th_generation only, for correct delta and ffcounter.
 	 */
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		ffth = fftimehands;
 		delta = tc_delta(th);
 		*ffcount = ffth->tick_ffcount;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 
 	*ffcount += delta;
 }
 
 void
 binuptime(struct bintime *bt)
 {
 
 	binuptime_fromclock(bt, sysclock_active);
 }
 
 void
 nanouptime(struct timespec *tsp)
 {
 
 	nanouptime_fromclock(tsp, sysclock_active);
 }
 
 void
 microuptime(struct timeval *tvp)
 {
 
 	microuptime_fromclock(tvp, sysclock_active);
 }
 
 void
 bintime(struct bintime *bt)
 {
 
 	bintime_fromclock(bt, sysclock_active);
 }
 
 void
 nanotime(struct timespec *tsp)
 {
 
 	nanotime_fromclock(tsp, sysclock_active);
 }
 
 void
 microtime(struct timeval *tvp)
 {
 
 	microtime_fromclock(tvp, sysclock_active);
 }
 
 void
 getbinuptime(struct bintime *bt)
 {
 
 	getbinuptime_fromclock(bt, sysclock_active);
 }
 
 void
 getnanouptime(struct timespec *tsp)
 {
 
 	getnanouptime_fromclock(tsp, sysclock_active);
 }
 
 void
 getmicrouptime(struct timeval *tvp)
 {
 
 	getmicrouptime_fromclock(tvp, sysclock_active);
 }
 
 void
 getbintime(struct bintime *bt)
 {
 
 	getbintime_fromclock(bt, sysclock_active);
 }
 
 void
 getnanotime(struct timespec *tsp)
 {
 
 	getnanotime_fromclock(tsp, sysclock_active);
 }
 
 void
 getmicrotime(struct timeval *tvp)
 {
 
 	getmicrouptime_fromclock(tvp, sysclock_active);
 }
 
 #endif /* FFCLOCK */
 
 /*
  * This is a clone of getnanotime and used for walltimestamps.
  * The dtrace_ prefix prevents fbt from creating probes for
  * it so walltimestamp can be safely used in all fbt probes.
  */
 void
 dtrace_getnanotime(struct timespec *tsp)
 {
 
 	GETTHMEMBER(tsp, th_nanotime);
 }
 
 /*
  * This is a clone of getnanouptime used for time since boot.
  * The dtrace_ prefix prevents fbt from creating probes for
  * it so an uptime that can be safely used in all fbt probes.
  */
 void
 dtrace_getnanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	GETTHMEMBER(&bt, th_offset);
 	bintime2timespec(&bt, tsp);
 }
 
 /*
  * System clock currently providing time to the system. Modifiable via sysctl
  * when the FFCLOCK option is defined.
  */
 int sysclock_active = SYSCLOCK_FBCK;
 
 /* Internal NTP status and error estimates. */
 extern int time_status;
 extern long time_esterror;
 
 /*
  * Take a snapshot of sysclock data which can be used to compare system clocks
  * and generate timestamps after the fact.
  */
 void
 sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
 {
 	struct fbclock_info *fbi;
 	struct timehands *th;
 	struct bintime bt;
 	unsigned int delta, gen;
 #ifdef FFCLOCK
 	ffcounter ffcount;
 	struct fftimehands *ffth;
 	struct ffclock_info *ffi;
 	struct ffclock_estimate cest;
 
 	ffi = &clock_snap->ff_info;
 #endif
 
 	fbi = &clock_snap->fb_info;
 	delta = 0;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		fbi->th_scale = th->th_scale;
 		fbi->tick_time = th->th_offset;
 #ifdef FFCLOCK
 		ffth = fftimehands;
 		ffi->tick_time = ffth->tick_time_lerp;
 		ffi->tick_time_lerp = ffth->tick_time_lerp;
 		ffi->period = ffth->cest.period;
 		ffi->period_lerp = ffth->period_lerp;
 		clock_snap->ffcount = ffth->tick_ffcount;
 		cest = ffth->cest;
 #endif
 		if (!fast)
 			delta = tc_delta(th);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 
 	clock_snap->delta = delta;
 	clock_snap->sysclock_active = sysclock_active;
 
 	/* Record feedback clock status and error. */
 	clock_snap->fb_info.status = time_status;
 	/* XXX: Very crude estimate of feedback clock error. */
 	bt.sec = time_esterror / 1000000;
 	bt.frac = ((time_esterror - bt.sec) * 1000000) *
 	    (uint64_t)18446744073709ULL;
 	clock_snap->fb_info.error = bt;
 
 #ifdef FFCLOCK
 	if (!fast)
 		clock_snap->ffcount += delta;
 
 	/* Record feed-forward clock leap second adjustment. */
 	ffi->leapsec_adjustment = cest.leapsec_total;
 	if (clock_snap->ffcount > cest.leapsec_next)
 		ffi->leapsec_adjustment -= cest.leapsec;
 
 	/* Record feed-forward clock status and error. */
 	clock_snap->ff_info.status = cest.status;
 	ffcount = clock_snap->ffcount - cest.update_ffcount;
 	ffclock_convert_delta(ffcount, cest.period, &bt);
 	/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */
 	bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL);
 	/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */
 	bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL);
 	clock_snap->ff_info.error = bt;
 #endif
 }
 
 /*
  * Convert a sysclock snapshot into a struct bintime based on the specified
  * clock source and flags.
  */
 int
 sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt,
     int whichclock, uint32_t flags)
 {
 	struct bintime boottimebin;
 #ifdef FFCLOCK
 	struct bintime bt2;
 	uint64_t period;
 #endif
 
 	switch (whichclock) {
 	case SYSCLOCK_FBCK:
 		*bt = cs->fb_info.tick_time;
 
 		/* If snapshot was created with !fast, delta will be >0. */
 		if (cs->delta > 0)
 			bintime_addx(bt, cs->fb_info.th_scale * cs->delta);
 
 		if ((flags & FBCLOCK_UPTIME) == 0) {
 			getboottimebin(&boottimebin);
 			bintime_add(bt, &boottimebin);
 		}
 		break;
 #ifdef FFCLOCK
 	case SYSCLOCK_FFWD:
 		if (flags & FFCLOCK_LERP) {
 			*bt = cs->ff_info.tick_time_lerp;
 			period = cs->ff_info.period_lerp;
 		} else {
 			*bt = cs->ff_info.tick_time;
 			period = cs->ff_info.period;
 		}
 
 		/* If snapshot was created with !fast, delta will be >0. */
 		if (cs->delta > 0) {
 			ffclock_convert_delta(cs->delta, period, &bt2);
 			bintime_add(bt, &bt2);
 		}
 
 		/* Leap second adjustment. */
 		if (flags & FFCLOCK_LEAPSEC)
 			bt->sec -= cs->ff_info.leapsec_adjustment;
 
 		/* Boot time adjustment, for uptime/monotonic clocks. */
 		if (flags & FFCLOCK_UPTIME)
 			bintime_sub(bt, &ffclock_boottime);
 		break;
 #endif
 	default:
 		return (EINVAL);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Initialize a new timecounter and possibly use it.
  */
 void
 tc_init(struct timecounter *tc)
 {
 	u_int u;
 	struct sysctl_oid *tc_root;
 
 	u = tc->tc_frequency / tc->tc_counter_mask;
 	/* XXX: We need some margin here, 10% is a guess */
 	u *= 11;
 	u /= 10;
 	if (u > hz && tc->tc_quality >= 0) {
 		tc->tc_quality = -2000;
 		if (bootverbose) {
 			printf("Timecounter \"%s\" frequency %ju Hz",
 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
 			printf(" -- Insufficient hz, needs at least %u\n", u);
 		}
 	} else if (tc->tc_quality >= 0 || bootverbose) {
 		printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
 		    tc->tc_name, (uintmax_t)tc->tc_frequency,
 		    tc->tc_quality);
 	}
 
 	/*
 	 * Set up sysctl tree for this counter.
 	 */
 	tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL,
 	    SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
 	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 	    "timecounter description", "timecounter");
 	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
 	    "mask for implemented bits");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "counter", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, tc,
 	    sizeof(*tc), sysctl_kern_timecounter_get, "IU",
 	    "current timecounter value");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "frequency", CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, tc,
 	    sizeof(*tc), sysctl_kern_timecounter_freq, "QU",
 	    "timecounter frequency");
 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
 	    "goodness of time counter");
 
 	mtx_lock(&tc_lock);
 	tc->tc_next = timecounters;
 	timecounters = tc;
 
 	/*
 	 * Do not automatically switch if the current tc was specifically
 	 * chosen.  Never automatically use a timecounter with negative quality.
 	 * Even though we run on the dummy counter, switching here may be
 	 * worse since this timecounter may not be monotonic.
 	 */
 	if (tc_chosen)
 		goto unlock;
 	if (tc->tc_quality < 0)
 		goto unlock;
 	if (tc_from_tunable[0] != '\0' &&
 	    strcmp(tc->tc_name, tc_from_tunable) == 0) {
 		tc_chosen = 1;
 		tc_from_tunable[0] = '\0';
 	} else {
 		if (tc->tc_quality < timecounter->tc_quality)
 			goto unlock;
 		if (tc->tc_quality == timecounter->tc_quality &&
 		    tc->tc_frequency < timecounter->tc_frequency)
 			goto unlock;
 	}
 	(void)tc->tc_get_timecount(tc);
 	timecounter = tc;
 unlock:
 	mtx_unlock(&tc_lock);
 }
 
 /* Report the frequency of the current timecounter. */
 uint64_t
 tc_getfrequency(void)
 {
 
 	return (timehands->th_counter->tc_frequency);
 }
 
 static bool
 sleeping_on_old_rtc(struct thread *td)
 {
 
 	/*
 	 * td_rtcgen is modified by curthread when it is running,
 	 * and by other threads in this function.  By finding the thread
 	 * on a sleepqueue and holding the lock on the sleepqueue
 	 * chain, we guarantee that the thread is not running and that
 	 * modifying td_rtcgen is safe.  Setting td_rtcgen to zero informs
 	 * the thread that it was woken due to a real-time clock adjustment.
 	 * (The declaration of td_rtcgen refers to this comment.)
 	 */
 	if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) {
 		td->td_rtcgen = 0;
 		return (true);
 	}
 	return (false);
 }
 
 static struct mtx tc_setclock_mtx;
 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
 
 /*
  * Step our concept of UTC.  This is done by modifying our estimate of
  * when we booted.
  */
 void
 tc_setclock(struct timespec *ts)
 {
 	struct timespec tbef, taft;
 	struct bintime bt, bt2;
 
 	timespec2bintime(ts, &bt);
 	nanotime(&tbef);
 	mtx_lock_spin(&tc_setclock_mtx);
 	cpu_tick_calibrate(1);
 	binuptime(&bt2);
 	bintime_sub(&bt, &bt2);
 
 	/* XXX fiddle all the little crinkly bits around the fiords... */
 	tc_windup(&bt);
 	mtx_unlock_spin(&tc_setclock_mtx);
 
 	/* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */
 	atomic_add_rel_int(&rtc_generation, 2);
 	sleepq_chains_remove_matching(sleeping_on_old_rtc);
 	if (timestepwarnings) {
 		nanotime(&taft);
 		log(LOG_INFO,
 		    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
 		    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
 		    (intmax_t)taft.tv_sec, taft.tv_nsec,
 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
 	}
 }
 
 /*
  * Recalculate the scaling factor.  We want the number of 1/2^64
  * fractions of a second per period of the hardware counter, taking
  * into account the th_adjustment factor which the NTP PLL/adjtime(2)
  * processing provides us with.
  *
  * The th_adjustment is nanoseconds per second with 32 bit binary
  * fraction and we want 64 bit binary fraction of second:
  *
  *	 x = a * 2^32 / 10^9 = a * 4.294967296
  *
  * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
  * we can only multiply by about 850 without overflowing, that
  * leaves no suitably precise fractions for multiply before divide.
  *
  * Divide before multiply with a fraction of 2199/512 results in a
  * systematic undercompensation of 10PPM of th_adjustment.  On a
  * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
  *
  * We happily sacrifice the lowest of the 64 bits of our result
  * to the goddess of code clarity.
  */
 static void
 recalculate_scaling_factor_and_large_delta(struct timehands *th)
 {
 	uint64_t scale;
 
 	scale = (uint64_t)1 << 63;
 	scale += (th->th_adjustment / 1024) * 2199;
 	scale /= th->th_counter->tc_frequency;
 	th->th_scale = scale * 2;
 	th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX);
 }
 
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
  * timecounter and/or do seconds processing in NTP.  Slightly magic.
  */
 static void
 tc_windup(struct bintime *new_boottimebin)
 {
 	struct bintime bt;
 	struct timecounter *tc;
 	struct timehands *th, *tho;
 	u_int delta, ncount, ogen;
 	int i;
 	time_t t;
 
 	/*
 	 * Make the next timehands a copy of the current one, but do
 	 * not overwrite the generation or next pointer.  While we
 	 * update the contents, the generation must be zero.  We need
 	 * to ensure that the zero generation is visible before the
 	 * data updates become visible, which requires release fence.
 	 * For similar reasons, re-reading of the generation after the
 	 * data is read should use acquire fence.
 	 */
 	tho = timehands;
 	th = tho->th_next;
 	ogen = th->th_generation;
 	th->th_generation = 0;
 	atomic_thread_fence_rel();
 	memcpy(th, tho, offsetof(struct timehands, th_generation));
 	if (new_boottimebin != NULL)
 		th->th_boottime = *new_boottimebin;
 
 	/*
 	 * Capture a timecounter delta on the current timecounter and if
 	 * changing timecounters, a counter value from the new timecounter.
 	 * Update the offset fields accordingly.
 	 */
 	tc = atomic_load_ptr(&timecounter);
 	delta = tc_delta(th);
 	if (th->th_counter != tc)
 		ncount = tc->tc_get_timecount(tc);
 	else
 		ncount = 0;
 #ifdef FFCLOCK
 	ffclock_windup(delta);
 #endif
 	th->th_offset_count += delta;
 	th->th_offset_count &= th->th_counter->tc_counter_mask;
 	bintime_add_tc_delta(&th->th_offset, th->th_scale,
 	    th->th_large_delta, delta);
 
 	/*
 	 * Hardware latching timecounters may not generate interrupts on
 	 * PPS events, so instead we poll them.  There is a finite risk that
 	 * the hardware might capture a count which is later than the one we
 	 * got above, and therefore possibly in the next NTP second which might
 	 * have a different rate than the current NTP second.  It doesn't
 	 * matter in practice.
 	 */
 	if (tho->th_counter->tc_poll_pps)
 		tho->th_counter->tc_poll_pps(tho->th_counter);
 
 	/*
 	 * Deal with NTP second processing.  The loop normally
 	 * iterates at most once, but in extreme situations it might
 	 * keep NTP sane if timeouts are not run for several seconds.
 	 * At boot, the time step can be large when the TOD hardware
 	 * has been read, so on really large steps, we call
 	 * ntp_update_second only twice.  We need to call it twice in
 	 * case we missed a leap second.
 	 */
 	bt = th->th_offset;
 	bintime_add(&bt, &th->th_boottime);
 	i = bt.sec - tho->th_microtime.tv_sec;
 	if (i > 0) {
 		if (i > LARGE_STEP)
 			i = 2;
 
 		do {
 			t = bt.sec;
 			ntp_update_second(&th->th_adjustment, &bt.sec);
 			if (bt.sec != t)
 				th->th_boottime.sec += bt.sec - t;
 			--i;
 		} while (i > 0);
 
 		recalculate_scaling_factor_and_large_delta(th);
 	}
 
 	/* Update the UTC timestamps used by the get*() functions. */
 	th->th_bintime = bt;
 	bintime2timeval(&bt, &th->th_microtime);
 	bintime2timespec(&bt, &th->th_nanotime);
 
 	/* Now is a good time to change timecounters. */
 	if (th->th_counter != tc) {
 #ifndef __arm__
 		if ((tc->tc_flags & TC_FLAGS_C2STOP) != 0)
 			cpu_disable_c2_sleep++;
 		if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0)
 			cpu_disable_c2_sleep--;
 #endif
 		th->th_counter = tc;
 		th->th_offset_count = ncount;
 		tc_min_ticktock_freq = max(1, tc->tc_frequency /
 		    (((uint64_t)tc->tc_counter_mask + 1) / 3));
 		recalculate_scaling_factor_and_large_delta(th);
 #ifdef FFCLOCK
 		ffclock_change_tc(th);
 #endif
 	}
 
 	/*
 	 * Now that the struct timehands is again consistent, set the new
 	 * generation number, making sure to not make it zero.
 	 */
 	if (++ogen == 0)
 		ogen = 1;
 	atomic_store_rel_int(&th->th_generation, ogen);
 
 	/* Go live with the new struct timehands. */
 #ifdef FFCLOCK
 	switch (sysclock_active) {
 	case SYSCLOCK_FBCK:
 #endif
 		time_second = th->th_microtime.tv_sec;
 		time_uptime = th->th_offset.sec;
 #ifdef FFCLOCK
 		break;
 	case SYSCLOCK_FFWD:
 		time_second = fftimehands->tick_time_lerp.sec;
 		time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec;
 		break;
 	}
 #endif
 
 	timehands = th;
 	timekeep_push_vdso();
 }
 
 /* Report or change the active timecounter hardware. */
 static int
 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
 {
 	char newname[32];
 	struct timecounter *newtc, *tc;
 	int error;
 
 	mtx_lock(&tc_lock);
 	tc = timecounter;
 	strlcpy(newname, tc->tc_name, sizeof(newname));
 	mtx_unlock(&tc_lock);
 
 	error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&tc_lock);
 	/* Record that the tc in use now was specifically chosen. */
 	tc_chosen = 1;
 	if (strcmp(newname, tc->tc_name) == 0) {
 		mtx_unlock(&tc_lock);
 		return (0);
 	}
 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
 		if (strcmp(newname, newtc->tc_name) != 0)
 			continue;
 
 		/* Warm up new timecounter. */
 		(void)newtc->tc_get_timecount(newtc);
 
 		timecounter = newtc;
 
 		/*
 		 * The vdso timehands update is deferred until the next
 		 * 'tc_windup()'.
 		 *
 		 * This is prudent given that 'timekeep_push_vdso()' does not
 		 * use any locking and that it can be called in hard interrupt
 		 * context via 'tc_windup()'.
 		 */
 		break;
 	}
 	mtx_unlock(&tc_lock);
 	return (newtc != NULL ? 0 : EINVAL);
 }
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware,
     CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 0, 0,
     sysctl_kern_timecounter_hardware, "A",
     "Timecounter hardware selected");
 
 /* Report the available timecounter hardware. */
 static int
 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct timecounter *tc;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sb, NULL, 0, req);
 	mtx_lock(&tc_lock);
 	for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
 		if (tc != timecounters)
 			sbuf_putc(&sb, ' ');
 		sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality);
 	}
 	mtx_unlock(&tc_lock);
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error);
 }
 
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
     sysctl_kern_timecounter_choice, "A",
     "Timecounter hardware detected");
 
 /*
  * RFC 2783 PPS-API implementation.
  */
 
 /*
  *  Return true if the driver is aware of the abi version extensions in the
  *  pps_state structure, and it supports at least the given abi version number.
  */
 static inline int
 abi_aware(struct pps_state *pps, int vers)
 {
 
 	return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers);
 }
 
 static int
 pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps)
 {
 	int err, timo;
 	pps_seq_t aseq, cseq;
 	struct timeval tv;
 
 	if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
 		return (EINVAL);
 
 	/*
 	 * If no timeout is requested, immediately return whatever values were
 	 * most recently captured.  If timeout seconds is -1, that's a request
 	 * to block without a timeout.  WITNESS won't let us sleep forever
 	 * without a lock (we really don't need a lock), so just repeatedly
 	 * sleep a long time.
 	 */
 	if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) {
 		if (fapi->timeout.tv_sec == -1)
 			timo = 0x7fffffff;
 		else {
 			tv.tv_sec = fapi->timeout.tv_sec;
 			tv.tv_usec = fapi->timeout.tv_nsec / 1000;
 			timo = tvtohz(&tv);
 		}
 		aseq = atomic_load_int(&pps->ppsinfo.assert_sequence);
 		cseq = atomic_load_int(&pps->ppsinfo.clear_sequence);
 		while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) &&
 		    cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) {
 			if (abi_aware(pps, 1) && pps->driver_mtx != NULL) {
 				if (pps->flags & PPSFLAG_MTX_SPIN) {
 					err = msleep_spin(pps, pps->driver_mtx,
 					    "ppsfch", timo);
 				} else {
 					err = msleep(pps, pps->driver_mtx, PCATCH,
 					    "ppsfch", timo);
 				}
 			} else {
 				err = tsleep(pps, PCATCH, "ppsfch", timo);
 			}
 			if (err == EWOULDBLOCK) {
 				if (fapi->timeout.tv_sec == -1) {
 					continue;
 				} else {
 					return (ETIMEDOUT);
 				}
 			} else if (err != 0) {
 				return (err);
 			}
 		}
 	}
 
 	pps->ppsinfo.current_mode = pps->ppsparam.mode;
 	fapi->pps_info_buf = pps->ppsinfo;
 
 	return (0);
 }
 
 int
 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
 {
 	pps_params_t *app;
 	struct pps_fetch_args *fapi;
 #ifdef FFCLOCK
 	struct pps_fetch_ffc_args *fapi_ffc;
 #endif
 #ifdef PPS_SYNC
 	struct pps_kcbind_args *kapi;
 #endif
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
 	switch (cmd) {
 	case PPS_IOC_CREATE:
 		return (0);
 	case PPS_IOC_DESTROY:
 		return (0);
 	case PPS_IOC_SETPARAMS:
 		app = (pps_params_t *)data;
 		if (app->mode & ~pps->ppscap)
 			return (EINVAL);
 #ifdef FFCLOCK
 		/* Ensure only a single clock is selected for ffc timestamp. */
 		if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK)
 			return (EINVAL);
 #endif
 		pps->ppsparam = *app;
 		return (0);
 	case PPS_IOC_GETPARAMS:
 		app = (pps_params_t *)data;
 		*app = pps->ppsparam;
 		app->api_version = PPS_API_VERS_1;
 		return (0);
 	case PPS_IOC_GETCAP:
 		*(int*)data = pps->ppscap;
 		return (0);
 	case PPS_IOC_FETCH:
 		fapi = (struct pps_fetch_args *)data;
 		return (pps_fetch(fapi, pps));
 #ifdef FFCLOCK
 	case PPS_IOC_FETCH_FFCOUNTER:
 		fapi_ffc = (struct pps_fetch_ffc_args *)data;
 		if (fapi_ffc->tsformat && fapi_ffc->tsformat !=
 		    PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec)
 			return (EOPNOTSUPP);
 		pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode;
 		fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc;
 		/* Overwrite timestamps if feedback clock selected. */
 		switch (pps->ppsparam.mode & PPS_TSCLK_MASK) {
 		case PPS_TSCLK_FBCK:
 			fapi_ffc->pps_info_buf_ffc.assert_timestamp =
 			    pps->ppsinfo.assert_timestamp;
 			fapi_ffc->pps_info_buf_ffc.clear_timestamp =
 			    pps->ppsinfo.clear_timestamp;
 			break;
 		case PPS_TSCLK_FFWD:
 			break;
 		default:
 			break;
 		}
 		return (0);
 #endif /* FFCLOCK */
 	case PPS_IOC_KCBIND:
 #ifdef PPS_SYNC
 		kapi = (struct pps_kcbind_args *)data;
 		/* XXX Only root should be able to do this */
 		if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (kapi->kernel_consumer != PPS_KC_HARDPPS)
 			return (EINVAL);
 		if (kapi->edge & ~pps->ppscap)
 			return (EINVAL);
 		pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) |
 		    (pps->kcmode & KCMODE_ABIFLAG);
 		return (0);
 #else
 		return (EOPNOTSUPP);
 #endif
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 void
 pps_init(struct pps_state *pps)
 {
 	pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT;
 	if (pps->ppscap & PPS_CAPTUREASSERT)
 		pps->ppscap |= PPS_OFFSETASSERT;
 	if (pps->ppscap & PPS_CAPTURECLEAR)
 		pps->ppscap |= PPS_OFFSETCLEAR;
 #ifdef FFCLOCK
 	pps->ppscap |= PPS_TSCLK_MASK;
 #endif
 	pps->kcmode &= ~KCMODE_ABIFLAG;
 }
 
 void
 pps_init_abi(struct pps_state *pps)
 {
 
 	pps_init(pps);
 	if (pps->driver_abi > 0) {
 		pps->kcmode |= KCMODE_ABIFLAG;
 		pps->kernel_abi = PPS_ABI_VERSION;
 	}
 }
 
 void
 pps_capture(struct pps_state *pps)
 {
 	struct timehands *th;
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
 	th = timehands;
 	pps->capgen = atomic_load_acq_int(&th->th_generation);
 	pps->capth = th;
 #ifdef FFCLOCK
 	pps->capffth = fftimehands;
 #endif
 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
 	atomic_thread_fence_acq();
 	if (pps->capgen != th->th_generation)
 		pps->capgen = 0;
 }
 
 void
 pps_event(struct pps_state *pps, int event)
 {
 	struct bintime bt;
 	struct timespec ts, *tsp, *osp;
 	u_int tcount, *pcount;
 	int foff;
 	pps_seq_t *pseq;
 #ifdef FFCLOCK
 	struct timespec *tsp_ffc;
 	pps_seq_t *pseq_ffc;
 	ffcounter *ffcount;
 #endif
 #ifdef PPS_SYNC
 	int fhard;
 #endif
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
 	/* Nothing to do if not currently set to capture this event type. */
 	if ((event & pps->ppsparam.mode) == 0)
 		return;
 	/* If the timecounter was wound up underneath us, bail out. */
 	if (pps->capgen == 0 || pps->capgen !=
 	    atomic_load_acq_int(&pps->capth->th_generation))
 		return;
 
 	/* Things would be easier with arrays. */
 	if (event == PPS_CAPTUREASSERT) {
 		tsp = &pps->ppsinfo.assert_timestamp;
 		osp = &pps->ppsparam.assert_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
 #ifdef PPS_SYNC
 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
 #endif
 		pcount = &pps->ppscount[0];
 		pseq = &pps->ppsinfo.assert_sequence;
 #ifdef FFCLOCK
 		ffcount = &pps->ppsinfo_ffc.assert_ffcount;
 		tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp;
 		pseq_ffc = &pps->ppsinfo_ffc.assert_sequence;
 #endif
 	} else {
 		tsp = &pps->ppsinfo.clear_timestamp;
 		osp = &pps->ppsparam.clear_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
 #ifdef PPS_SYNC
 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
 #endif
 		pcount = &pps->ppscount[1];
 		pseq = &pps->ppsinfo.clear_sequence;
 #ifdef FFCLOCK
 		ffcount = &pps->ppsinfo_ffc.clear_ffcount;
 		tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp;
 		pseq_ffc = &pps->ppsinfo_ffc.clear_sequence;
 #endif
 	}
 
 	/*
 	 * If the timecounter changed, we cannot compare the count values, so
 	 * we have to drop the rest of the PPS-stuff until the next event.
 	 */
 	if (pps->ppstc != pps->capth->th_counter) {
 		pps->ppstc = pps->capth->th_counter;
 		*pcount = pps->capcount;
 		pps->ppscount[2] = pps->capcount;
 		return;
 	}
 
 	/* Convert the count to a timespec. */
 	tcount = pps->capcount - pps->capth->th_offset_count;
 	tcount &= pps->capth->th_counter->tc_counter_mask;
 	bt = pps->capth->th_bintime;
 	bintime_addx(&bt, pps->capth->th_scale * tcount);
 	bintime2timespec(&bt, &ts);
 
 	/* If the timecounter was wound up underneath us, bail out. */
 	atomic_thread_fence_acq();
 	if (pps->capgen != pps->capth->th_generation)
 		return;
 
 	*pcount = pps->capcount;
 	(*pseq)++;
 	*tsp = ts;
 
 	if (foff) {
 		timespecadd(tsp, osp, tsp);
 		if (tsp->tv_nsec < 0) {
 			tsp->tv_nsec += 1000000000;
 			tsp->tv_sec -= 1;
 		}
 	}
 
 #ifdef FFCLOCK
 	*ffcount = pps->capffth->tick_ffcount + tcount;
 	bt = pps->capffth->tick_time;
 	ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt);
 	bintime_add(&bt, &pps->capffth->tick_time);
 	bintime2timespec(&bt, &ts);
 	(*pseq_ffc)++;
 	*tsp_ffc = ts;
 #endif
 
 #ifdef PPS_SYNC
 	if (fhard) {
 		uint64_t scale;
 
 		/*
 		 * Feed the NTP PLL/FLL.
 		 * The FLL wants to know how many (hardware) nanoseconds
 		 * elapsed since the previous event.
 		 */
 		tcount = pps->capcount - pps->ppscount[2];
 		pps->ppscount[2] = pps->capcount;
 		tcount &= pps->capth->th_counter->tc_counter_mask;
 		scale = (uint64_t)1 << 63;
 		scale /= pps->capth->th_counter->tc_frequency;
 		scale *= 2;
 		bt.sec = 0;
 		bt.frac = 0;
 		bintime_addx(&bt, scale * tcount);
 		bintime2timespec(&bt, &ts);
 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
 	}
 #endif
 
 	/* Wakeup anyone sleeping in pps_fetch().  */
 	wakeup(pps);
 }
 
 /*
  * Timecounters need to be updated every so often to prevent the hardware
  * counter from overflowing.  Updating also recalculates the cached values
  * used by the get*() family of functions, so their precision depends on
  * the update frequency.
  */
 
 static int tc_tick;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
     "Approximate number of hardclock ticks in a millisecond");
 
 void
 tc_ticktock(int cnt)
 {
 	static int count;
 
 	if (mtx_trylock_spin(&tc_setclock_mtx)) {
 		count += cnt;
 		if (count >= tc_tick) {
 			count = 0;
 			tc_windup(NULL);
 		}
 		mtx_unlock_spin(&tc_setclock_mtx);
 	}
 }
 
 static void __inline
 tc_adjprecision(void)
 {
 	int t;
 
 	if (tc_timepercentage > 0) {
 		t = (99 + tc_timepercentage) / tc_timepercentage;
 		tc_precexp = fls(t + (t >> 1)) - 1;
 		FREQ2BT(hz / tc_tick, &bt_timethreshold);
 		FREQ2BT(hz, &bt_tickthreshold);
 		bintime_shift(&bt_timethreshold, tc_precexp);
 		bintime_shift(&bt_tickthreshold, tc_precexp);
 	} else {
 		tc_precexp = 31;
 		bt_timethreshold.sec = INT_MAX;
 		bt_timethreshold.frac = ~(uint64_t)0;
 		bt_tickthreshold = bt_timethreshold;
 	}
 	sbt_timethreshold = bttosbt(bt_timethreshold);
 	sbt_tickthreshold = bttosbt(bt_tickthreshold);
 }
 
 static int
 sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = tc_timepercentage;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	tc_timepercentage = val;
 	if (cold)
 		goto done;
 	tc_adjprecision();
 done:
 	return (0);
 }
 
 /* Set up the requested number of timehands. */
 static void
 inittimehands(void *dummy)
 {
 	struct timehands *thp;
 	int i;
 
 	TUNABLE_INT_FETCH("kern.timecounter.timehands_count",
 	    &timehands_count);
 	if (timehands_count < 1)
 		timehands_count = 1;
 	if (timehands_count > nitems(ths))
 		timehands_count = nitems(ths);
 	for (i = 1, thp = &ths[0]; i < timehands_count;  thp = &ths[i++])
 		thp->th_next = &ths[i];
 	thp->th_next = &ths[0];
 
 	TUNABLE_STR_FETCH("kern.timecounter.hardware", tc_from_tunable,
 	    sizeof(tc_from_tunable));
 
 	mtx_init(&tc_lock, "tc", NULL, MTX_DEF);
 }
 SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL);
 
 static void
 inittimecounter(void *dummy)
 {
 	u_int p;
 	int tick_rate;
 
 	/*
 	 * Set the initial timeout to
 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
 	 * People should probably not use the sysctl to set the timeout
 	 * to smaller than its initial value, since that value is the
 	 * smallest reasonable one.  If they want better timestamps they
 	 * should use the non-"get"* functions.
 	 */
 	if (hz > 1000)
 		tc_tick = (hz + 500) / 1000;
 	else
 		tc_tick = 1;
 	tc_adjprecision();
 	FREQ2BT(hz, &tick_bt);
 	tick_sbt = bttosbt(tick_bt);
 	tick_rate = hz / tc_tick;
 	FREQ2BT(tick_rate, &tc_tick_bt);
 	tc_tick_sbt = bttosbt(tc_tick_bt);
 	p = (tc_tick * 1000000) / hz;
 	printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
 
 #ifdef FFCLOCK
 	ffclock_init();
 #endif
 
 	/* warm up new timecounter (again) and get rolling. */
 	(void)timecounter->tc_get_timecount(timecounter);
 	mtx_lock_spin(&tc_setclock_mtx);
 	tc_windup(NULL);
 	mtx_unlock_spin(&tc_setclock_mtx);
 }
 
 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
 
 /* Cpu tick handling -------------------------------------------------*/
 
 static int cpu_tick_variable;
 static uint64_t	cpu_tick_frequency;
 
 DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base);
 DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last);
 
 static uint64_t
 tc_cpu_ticks(void)
 {
 	struct timecounter *tc;
 	uint64_t res, *base;
 	unsigned u, *last;
 
 	critical_enter();
 	base = DPCPU_PTR(tc_cpu_ticks_base);
 	last = DPCPU_PTR(tc_cpu_ticks_last);
 	tc = timehands->th_counter;
 	u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
 	if (u < *last)
 		*base += (uint64_t)tc->tc_counter_mask + 1;
 	*last = u;
 	res = u + *base;
 	critical_exit();
 	return (res);
 }
 
 void
 cpu_tick_calibration(void)
 {
 	static time_t last_calib;
 
 	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
 		cpu_tick_calibrate(0);
 		last_calib = time_uptime;
 	}
 }
 
 /*
  * This function gets called every 16 seconds on only one designated
  * CPU in the system from hardclock() via cpu_tick_calibration()().
  *
  * Whenever the real time clock is stepped we get called with reset=1
  * to make sure we handle suspend/resume and similar events correctly.
  */
 
 static void
 cpu_tick_calibrate(int reset)
 {
 	static uint64_t c_last;
 	uint64_t c_this, c_delta;
 	static struct bintime  t_last;
 	struct bintime t_this, t_delta;
 	uint32_t divi;
 
 	if (reset) {
 		/* The clock was stepped, abort & reset */
 		t_last.sec = 0;
 		return;
 	}
 
 	/* we don't calibrate fixed rate cputicks */
 	if (!cpu_tick_variable)
 		return;
 
 	getbinuptime(&t_this);
 	c_this = cpu_ticks();
 	if (t_last.sec != 0) {
 		c_delta = c_this - c_last;
 		t_delta = t_this;
 		bintime_sub(&t_delta, &t_last);
 		/*
 		 * Headroom:
 		 * 	2^(64-20) / 16[s] =
 		 * 	2^(44) / 16[s] =
 		 * 	17.592.186.044.416 / 16 =
 		 * 	1.099.511.627.776 [Hz]
 		 */
 		divi = t_delta.sec << 20;
 		divi |= t_delta.frac >> (64 - 20);
 		c_delta <<= 20;
 		c_delta /= divi;
 		if (c_delta > cpu_tick_frequency) {
 			if (0 && bootverbose)
 				printf("cpu_tick increased to %ju Hz\n",
 				    c_delta);
 			cpu_tick_frequency = c_delta;
 		}
 	}
 	c_last = c_this;
 	t_last = t_this;
 }
 
 void
 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
 {
 
 	if (func == NULL) {
 		cpu_ticks = tc_cpu_ticks;
 	} else {
 		cpu_tick_frequency = freq;
 		cpu_tick_variable = var;
 		cpu_ticks = func;
 	}
 }
 
 uint64_t
 cpu_tickrate(void)
 {
 
 	if (cpu_ticks == tc_cpu_ticks) 
 		return (tc_getfrequency());
 	return (cpu_tick_frequency);
 }
 
 /*
  * We need to be slightly careful converting cputicks to microseconds.
  * There is plenty of margin in 64 bits of microseconds (half a million
  * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
  * before divide conversion (to retain precision) we find that the
  * margin shrinks to 1.5 hours (one millionth of 146y).
- * With a three prong approach we never lose significant bits, no
- * matter what the cputick rate and length of timeinterval is.
  */
 
 uint64_t
 cputick2usec(uint64_t tick)
 {
-
-	if (tick > 18446744073709551LL)		/* floor(2^64 / 1000) */
-		return (tick / (cpu_tickrate() / 1000000LL));
-	else if (tick > 18446744073709LL)	/* floor(2^64 / 1000000) */
-		return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
-	else
-		return ((tick * 1000000LL) / cpu_tickrate());
+	uint64_t tr;
+	tr = cpu_tickrate();
+	return ((tick / tr) * 1000000ULL) + ((tick % tr) * 1000000ULL) / tr;
 }
 
 cpu_tick_f	*cpu_ticks = tc_cpu_ticks;
 
 static int vdso_th_enable = 1;
 static int
 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
 {
 	int old_vdso_th_enable, error;
 
 	old_vdso_th_enable = vdso_th_enable;
 	error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
 	if (error != 0)
 		return (error);
 	vdso_th_enable = old_vdso_th_enable;
 	return (0);
 }
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
 
 uint32_t
 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
 {
 	struct timehands *th;
 	uint32_t enabled;
 
 	th = timehands;
 	vdso_th->th_scale = th->th_scale;
 	vdso_th->th_offset_count = th->th_offset_count;
 	vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
 	vdso_th->th_offset = th->th_offset;
 	vdso_th->th_boottime = th->th_boottime;
 	if (th->th_counter->tc_fill_vdso_timehands != NULL) {
 		enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th,
 		    th->th_counter);
 	} else
 		enabled = 0;
 	if (!vdso_th_enable)
 		enabled = 0;
 	return (enabled);
 }
 
 #ifdef COMPAT_FREEBSD32
 uint32_t
 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
 {
 	struct timehands *th;
 	uint32_t enabled;
 
 	th = timehands;
 	*(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
 	vdso_th32->th_offset_count = th->th_offset_count;
 	vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
 	vdso_th32->th_offset.sec = th->th_offset.sec;
 	*(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
 	vdso_th32->th_boottime.sec = th->th_boottime.sec;
 	*(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac;
 	if (th->th_counter->tc_fill_vdso_timehands32 != NULL) {
 		enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32,
 		    th->th_counter);
 	} else
 		enabled = 0;
 	if (!vdso_th_enable)
 		enabled = 0;
 	return (enabled);
 }
 #endif
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(timecounter, db_show_timecounter)
 {
 	struct timehands *th;
 	struct timecounter *tc;
 	u_int val1, val2;
 
 	th = timehands;
 	tc = th->th_counter;
 	val1 = tc->tc_get_timecount(tc);
 	__compiler_membar();
 	val2 = tc->tc_get_timecount(tc);
 
 	db_printf("timecounter %p %s\n", tc, tc->tc_name);
 	db_printf("  mask %#x freq %ju qual %d flags %#x priv %p\n",
 	    tc->tc_counter_mask, (uintmax_t)tc->tc_frequency, tc->tc_quality,
 	    tc->tc_flags, tc->tc_priv);
 	db_printf("  val %#x %#x\n", val1, val2);
 	db_printf("timehands adj %#jx scale %#jx ldelta %d off_cnt %d gen %d\n",
 	    (uintmax_t)th->th_adjustment, (uintmax_t)th->th_scale,
 	    th->th_large_delta, th->th_offset_count, th->th_generation);
 	db_printf("  offset %jd %jd boottime %jd %jd\n",
 	    (intmax_t)th->th_offset.sec, (uintmax_t)th->th_offset.frac,
 	    (intmax_t)th->th_boottime.sec, (uintmax_t)th->th_boottime.frac);
 }
 #endif
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 0bab05c65ffc..194a23fdc9e8 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -1,1838 +1,1836 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_time.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/clock.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/sleepqueue.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/posix4.h>
 #include <sys/time.h>
 #include <sys/timers.h>
 #include <sys/timetc.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #define MAX_CLOCKS 	(CLOCK_MONOTONIC+1)
 #define CPUCLOCK_BIT		0x80000000
 #define CPUCLOCK_PROCESS_BIT	0x40000000
 #define CPUCLOCK_ID_MASK	(~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT))
 #define MAKE_THREAD_CPUCLOCK(tid)	(CPUCLOCK_BIT|(tid))
 #define MAKE_PROCESS_CPUCLOCK(pid)	\
 	(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid))
 
 #define NS_PER_SEC	1000000000
 
 static struct kclock	posix_clocks[MAX_CLOCKS];
 static uma_zone_t	itimer_zone = NULL;
 
 /*
  * Time of day and interval timer support.
  *
  * These routines provide the kernel entry points to get and set
  * the time-of-day and per-process interval timers.  Subroutines
  * here provide support for adding and subtracting timeval structures
  * and decrementing interval timers, optionally reloading the interval
  * timers when they expire.
  */
 
 static int	settime(struct thread *, struct timeval *);
 static void	timevalfix(struct timeval *);
 static int	user_clock_nanosleep(struct thread *td, clockid_t clock_id,
 		    int flags, const struct timespec *ua_rqtp,
 		    struct timespec *ua_rmtp);
 
 static void	itimer_start(void);
 static int	itimer_init(void *, int, int);
 static void	itimer_fini(void *, int);
 static void	itimer_enter(struct itimer *);
 static void	itimer_leave(struct itimer *);
 static struct itimer *itimer_find(struct proc *, int);
 static void	itimers_alloc(struct proc *);
 static int	realtimer_create(struct itimer *);
 static int	realtimer_gettime(struct itimer *, struct itimerspec *);
 static int	realtimer_settime(struct itimer *, int,
 			struct itimerspec *, struct itimerspec *);
 static int	realtimer_delete(struct itimer *);
 static void	realtimer_clocktime(clockid_t, struct timespec *);
 static void	realtimer_expire(void *);
 static void	realtimer_expire_l(struct itimer *it, bool proc_locked);
 
 static int	register_posix_clock(int, const struct kclock *);
 static void	itimer_fire(struct itimer *it);
 static int	itimespecfix(struct timespec *ts);
 
 #define CLOCK_CALL(clock, call, arglist)		\
 	((*posix_clocks[clock].call) arglist)
 
 SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL);
 
 static int
 settime(struct thread *td, struct timeval *tv)
 {
 	struct timeval delta, tv1, tv2;
 	static struct timeval maxtime, laststep;
 	struct timespec ts;
 
 	microtime(&tv1);
 	delta = *tv;
 	timevalsub(&delta, &tv1);
 
 	/*
 	 * If the system is secure, we do not allow the time to be 
 	 * set to a value earlier than 1 second less than the highest
 	 * time we have yet seen. The worst a miscreant can do in
 	 * this circumstance is "freeze" time. He couldn't go
 	 * back to the past.
 	 *
 	 * We similarly do not allow the clock to be stepped more
 	 * than one second, nor more than once per second. This allows
 	 * a miscreant to make the clock march double-time, but no worse.
 	 */
 	if (securelevel_gt(td->td_ucred, 1) != 0) {
 		if (delta.tv_sec < 0 || delta.tv_usec < 0) {
 			/*
 			 * Update maxtime to latest time we've seen.
 			 */
 			if (tv1.tv_sec > maxtime.tv_sec)
 				maxtime = tv1;
 			tv2 = *tv;
 			timevalsub(&tv2, &maxtime);
 			if (tv2.tv_sec < -1) {
 				tv->tv_sec = maxtime.tv_sec - 1;
 				printf("Time adjustment clamped to -1 second\n");
 			}
 		} else {
 			if (tv1.tv_sec == laststep.tv_sec)
 				return (EPERM);
 			if (delta.tv_sec > 1) {
 				tv->tv_sec = tv1.tv_sec + 1;
 				printf("Time adjustment clamped to +1 second\n");
 			}
 			laststep = *tv;
 		}
 	}
 
 	ts.tv_sec = tv->tv_sec;
 	ts.tv_nsec = tv->tv_usec * 1000;
 	tc_setclock(&ts);
 	resettodr();
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct clock_getcpuclockid2_args {
 	id_t id;
 	int which,
 	clockid_t *clock_id;
 };
 #endif
 /* ARGSUSED */
 int
 sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap)
 {
 	clockid_t clk_id;
 	int error;
 
 	error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id);
 	if (error == 0)
 		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
 	return (error);
 }
 
 int
 kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
     clockid_t *clk_id)
 {
 	struct proc *p;
 	pid_t pid;
 	lwpid_t tid;
 	int error;
 
 	switch (which) {
 	case CPUCLOCK_WHICH_PID:
 		if (id != 0) {
 			error = pget(id, PGET_CANSEE | PGET_NOTID, &p);
 			if (error != 0)
 				return (error);
 			PROC_UNLOCK(p);
 			pid = id;
 		} else {
 			pid = td->td_proc->p_pid;
 		}
 		*clk_id = MAKE_PROCESS_CPUCLOCK(pid);
 		return (0);
 	case CPUCLOCK_WHICH_TID:
 		tid = id == 0 ? td->td_tid : id;
 		*clk_id = MAKE_THREAD_CPUCLOCK(tid);
 		return (0);
 	default:
 		return (EINVAL);
 	}
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct clock_gettime_args {
 	clockid_t clock_id;
 	struct	timespec *tp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap)
 {
 	struct timespec ats;
 	int error;
 
 	error = kern_clock_gettime(td, uap->clock_id, &ats);
 	if (error == 0)
 		error = copyout(&ats, uap->tp, sizeof(ats));
 
 	return (error);
 }
 
 static inline void
 cputick2timespec(uint64_t runtime, struct timespec *ats)
 {
-	runtime = cputick2usec(runtime);
-	ats->tv_sec = runtime / 1000000;
-	ats->tv_nsec = runtime % 1000000 * 1000;
+	uint64_t tr;
+	tr = cpu_tickrate();
+	ats->tv_sec = runtime / tr;
+	ats->tv_nsec = ((runtime % tr) * 1000000000ULL) / tr;
 }
 
 void
 kern_thread_cputime(struct thread *targettd, struct timespec *ats)
 {
 	uint64_t runtime, curtime, switchtime;
 
 	if (targettd == NULL) { /* current thread */
 		spinlock_enter();
 		switchtime = PCPU_GET(switchtime);
 		curtime = cpu_ticks();
 		runtime = curthread->td_runtime;
 		spinlock_exit();
 		runtime += curtime - switchtime;
 	} else {
 		PROC_LOCK_ASSERT(targettd->td_proc, MA_OWNED);
 		thread_lock(targettd);
 		runtime = targettd->td_runtime;
 		thread_unlock(targettd);
 	}
 	cputick2timespec(runtime, ats);
 }
 
 void
 kern_process_cputime(struct proc *targetp, struct timespec *ats)
 {
 	uint64_t runtime;
 	struct rusage ru;
 
 	PROC_LOCK_ASSERT(targetp, MA_OWNED);
 	PROC_STATLOCK(targetp);
 	rufetch(targetp, &ru);
 	runtime = targetp->p_rux.rux_runtime;
 	if (curthread->td_proc == targetp)
 		runtime += cpu_ticks() - PCPU_GET(switchtime);
 	PROC_STATUNLOCK(targetp);
 	cputick2timespec(runtime, ats);
 }
 
 static int
 get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats)
 {
 	struct proc *p, *p2;
 	struct thread *td2;
 	lwpid_t tid;
 	pid_t pid;
 	int error;
 
 	p = td->td_proc;
 	if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) {
 		tid = clock_id & CPUCLOCK_ID_MASK;
 		td2 = tdfind(tid, p->p_pid);
 		if (td2 == NULL)
 			return (EINVAL);
 		kern_thread_cputime(td2, ats);
 		PROC_UNLOCK(td2->td_proc);
 	} else {
 		pid = clock_id & CPUCLOCK_ID_MASK;
 		error = pget(pid, PGET_CANSEE, &p2);
 		if (error != 0)
 			return (EINVAL);
 		kern_process_cputime(p2, ats);
 		PROC_UNLOCK(p2);
 	}
 	return (0);
 }
 
 int
 kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats)
 {
 	struct timeval sys, user;
 	struct proc *p;
 
 	p = td->td_proc;
 	switch (clock_id) {
 	case CLOCK_REALTIME:		/* Default to precise. */
 	case CLOCK_REALTIME_PRECISE:
 		nanotime(ats);
 		break;
 	case CLOCK_REALTIME_FAST:
 		getnanotime(ats);
 		break;
 	case CLOCK_VIRTUAL:
 		PROC_LOCK(p);
 		PROC_STATLOCK(p);
 		calcru(p, &user, &sys);
 		PROC_STATUNLOCK(p);
 		PROC_UNLOCK(p);
 		TIMEVAL_TO_TIMESPEC(&user, ats);
 		break;
 	case CLOCK_PROF:
 		PROC_LOCK(p);
 		PROC_STATLOCK(p);
 		calcru(p, &user, &sys);
 		PROC_STATUNLOCK(p);
 		PROC_UNLOCK(p);
 		timevaladd(&user, &sys);
 		TIMEVAL_TO_TIMESPEC(&user, ats);
 		break;
 	case CLOCK_MONOTONIC:		/* Default to precise. */
 	case CLOCK_MONOTONIC_PRECISE:
 	case CLOCK_UPTIME:
 	case CLOCK_UPTIME_PRECISE:
 		nanouptime(ats);
 		break;
 	case CLOCK_UPTIME_FAST:
 	case CLOCK_MONOTONIC_FAST:
 		getnanouptime(ats);
 		break;
 	case CLOCK_SECOND:
 		ats->tv_sec = time_second;
 		ats->tv_nsec = 0;
 		break;
 	case CLOCK_THREAD_CPUTIME_ID:
 		kern_thread_cputime(NULL, ats);
 		break;
 	case CLOCK_PROCESS_CPUTIME_ID:
 		PROC_LOCK(p);
 		kern_process_cputime(p, ats);
 		PROC_UNLOCK(p);
 		break;
 	default:
 		if ((int)clock_id >= 0)
 			return (EINVAL);
 		return (get_cputime(td, clock_id, ats));
 	}
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct clock_settime_args {
 	clockid_t clock_id;
 	const struct	timespec *tp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_clock_settime(struct thread *td, struct clock_settime_args *uap)
 {
 	struct timespec ats;
 	int error;
 
 	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
 		return (error);
 	return (kern_clock_settime(td, uap->clock_id, &ats));
 }
 
 static int allow_insane_settime = 0;
 SYSCTL_INT(_debug, OID_AUTO, allow_insane_settime, CTLFLAG_RWTUN,
     &allow_insane_settime, 0,
     "do not perform possibly restrictive checks on settime(2) args");
 
 int
 kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
 {
 	struct timeval atv;
 	int error;
 
 	if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
 		return (error);
 	if (clock_id != CLOCK_REALTIME)
 		return (EINVAL);
 	if (ats->tv_nsec < 0 || ats->tv_nsec >= NS_PER_SEC || ats->tv_sec < 0)
 		return (EINVAL);
 	if (!allow_insane_settime &&
 	    (ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60 ||
 	    ats->tv_sec < utc_offset()))
 		return (EINVAL);
 	/* XXX Don't convert nsec->usec and back */
 	TIMESPEC_TO_TIMEVAL(&atv, ats);
 	error = settime(td, &atv);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct clock_getres_args {
 	clockid_t clock_id;
 	struct	timespec *tp;
 };
 #endif
 int
 sys_clock_getres(struct thread *td, struct clock_getres_args *uap)
 {
 	struct timespec ts;
 	int error;
 
 	if (uap->tp == NULL)
 		return (0);
 
 	error = kern_clock_getres(td, uap->clock_id, &ts);
 	if (error == 0)
 		error = copyout(&ts, uap->tp, sizeof(ts));
 	return (error);
 }
 
 int
 kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
 {
 
 	ts->tv_sec = 0;
 	switch (clock_id) {
 	case CLOCK_REALTIME:
 	case CLOCK_REALTIME_FAST:
 	case CLOCK_REALTIME_PRECISE:
 	case CLOCK_MONOTONIC:
 	case CLOCK_MONOTONIC_FAST:
 	case CLOCK_MONOTONIC_PRECISE:
 	case CLOCK_UPTIME:
 	case CLOCK_UPTIME_FAST:
 	case CLOCK_UPTIME_PRECISE:
 		/*
 		 * Round up the result of the division cheaply by adding 1.
 		 * Rounding up is especially important if rounding down
 		 * would give 0.  Perfect rounding is unimportant.
 		 */
 		ts->tv_nsec = NS_PER_SEC / tc_getfrequency() + 1;
 		break;
 	case CLOCK_VIRTUAL:
 	case CLOCK_PROF:
 		/* Accurately round up here because we can do so cheaply. */
 		ts->tv_nsec = howmany(NS_PER_SEC, hz);
 		break;
 	case CLOCK_SECOND:
 		ts->tv_sec = 1;
 		ts->tv_nsec = 0;
 		break;
 	case CLOCK_THREAD_CPUTIME_ID:
 	case CLOCK_PROCESS_CPUTIME_ID:
 	cputime:
-		/* sync with cputick2usec */
-		ts->tv_nsec = 1000000 / cpu_tickrate();
-		if (ts->tv_nsec == 0)
-			ts->tv_nsec = 1000;
+		ts->tv_nsec = 1000000000 / cpu_tickrate() + 1;
 		break;
 	default:
 		if ((int)clock_id < 0)
 			goto cputime;
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
 {
 
 	return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt,
 	    rmt));
 }
 
 static uint8_t nanowait[MAXCPU];
 
 int
 kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
     const struct timespec *rqt, struct timespec *rmt)
 {
 	struct timespec ts, now;
 	sbintime_t sbt, sbtt, prec, tmp;
 	time_t over;
 	int error;
 	bool is_abs_real;
 
 	if (rqt->tv_nsec < 0 || rqt->tv_nsec >= NS_PER_SEC)
 		return (EINVAL);
 	if ((flags & ~TIMER_ABSTIME) != 0)
 		return (EINVAL);
 	switch (clock_id) {
 	case CLOCK_REALTIME:
 	case CLOCK_REALTIME_PRECISE:
 	case CLOCK_REALTIME_FAST:
 	case CLOCK_SECOND:
 		is_abs_real = (flags & TIMER_ABSTIME) != 0;
 		break;
 	case CLOCK_MONOTONIC:
 	case CLOCK_MONOTONIC_PRECISE:
 	case CLOCK_MONOTONIC_FAST:
 	case CLOCK_UPTIME:
 	case CLOCK_UPTIME_PRECISE:
 	case CLOCK_UPTIME_FAST:
 		is_abs_real = false;
 		break;
 	case CLOCK_VIRTUAL:
 	case CLOCK_PROF:
 	case CLOCK_PROCESS_CPUTIME_ID:
 		return (ENOTSUP);
 	case CLOCK_THREAD_CPUTIME_ID:
 	default:
 		return (EINVAL);
 	}
 	do {
 		ts = *rqt;
 		if ((flags & TIMER_ABSTIME) != 0) {
 			if (is_abs_real)
 				td->td_rtcgen =
 				    atomic_load_acq_int(&rtc_generation);
 			error = kern_clock_gettime(td, clock_id, &now);
 			KASSERT(error == 0, ("kern_clock_gettime: %d", error));
 			timespecsub(&ts, &now, &ts);
 		}
 		if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
 			error = EWOULDBLOCK;
 			break;
 		}
 		if (ts.tv_sec > INT32_MAX / 2) {
 			over = ts.tv_sec - INT32_MAX / 2;
 			ts.tv_sec -= over;
 		} else
 			over = 0;
 		tmp = tstosbt(ts);
 		prec = tmp;
 		prec >>= tc_precexp;
 		if (TIMESEL(&sbt, tmp))
 			sbt += tc_tick_sbt;
 		sbt += tmp;
 		error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
 		    sbt, prec, C_ABSOLUTE);
 	} while (error == 0 && is_abs_real && td->td_rtcgen == 0);
 	td->td_rtcgen = 0;
 	if (error != EWOULDBLOCK) {
 		if (TIMESEL(&sbtt, tmp))
 			sbtt += tc_tick_sbt;
 		if (sbtt >= sbt)
 			return (0);
 		if (error == ERESTART)
 			error = EINTR;
 		if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) {
 			ts = sbttots(sbt - sbtt);
 			ts.tv_sec += over;
 			if (ts.tv_sec < 0)
 				timespecclear(&ts);
 			*rmt = ts;
 		}
 		return (error);
 	}
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct nanosleep_args {
 	struct	timespec *rqtp;
 	struct	timespec *rmtp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_nanosleep(struct thread *td, struct nanosleep_args *uap)
 {
 
 	return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME,
 	    uap->rqtp, uap->rmtp));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct clock_nanosleep_args {
 	clockid_t clock_id;
 	int 	  flags;
 	struct	timespec *rqtp;
 	struct	timespec *rmtp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap)
 {
 	int error;
 
 	error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp,
 	    uap->rmtp);
 	return (kern_posix_error(td, error));
 }
 
 static int
 user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
     const struct timespec *ua_rqtp, struct timespec *ua_rmtp)
 {
 	struct timespec rmt, rqt;
 	int error, error2;
 
 	error = copyin(ua_rqtp, &rqt, sizeof(rqt));
 	if (error)
 		return (error);
 	error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt);
 	if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) {
 		error2 = copyout(&rmt, ua_rmtp, sizeof(rmt));
 		if (error2 != 0)
 			error = error2;
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct gettimeofday_args {
 	struct	timeval *tp;
 	struct	timezone *tzp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
 {
 	struct timeval atv;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		error = copyout(&atv, uap->tp, sizeof (atv));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = 0;
 		rtz.tz_dsttime = 0;
 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct settimeofday_args {
 	struct	timeval *tv;
 	struct	timezone *tzp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_settimeofday(struct thread *td, struct settimeofday_args *uap)
 {
 	struct timeval atv, *tvp;
 	struct timezone atz, *tzp;
 	int error;
 
 	if (uap->tv) {
 		error = copyin(uap->tv, &atv, sizeof(atv));
 		if (error)
 			return (error);
 		tvp = &atv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &atz, sizeof(atz));
 		if (error)
 			return (error);
 		tzp = &atz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
 {
 	int error;
 
 	error = priv_check(td, PRIV_SETTIMEOFDAY);
 	if (error)
 		return (error);
 	/* Verify all parameters before changing time. */
 	if (tv) {
 		if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 ||
 		    tv->tv_sec < 0)
 			return (EINVAL);
 		error = settime(td, tv);
 	}
 	return (error);
 }
 
 /*
  * Get value of an interval timer.  The process virtual and profiling virtual
  * time timers are kept in the p_stats area, since they can be swapped out.
  * These are kept internally in the way they are specified externally: in
  * time until they expire.
  *
  * The real time interval timer is kept in the process table slot for the
  * process, and its value (it_value) is kept as an absolute time rather than
  * as a delta, so that it is easy to keep periodic real-time signals from
  * drifting.
  *
  * Virtual time timers are processed in the hardclock() routine of
  * kern_clock.c.  The real time timer is processed by a timeout routine,
  * called from the softclock() routine.  Since a callout may be delayed in
  * real time due to interrupt processing in the system, it is possible for
  * the real time timeout routine (realitexpire, given below), to be delayed
  * in real time past when it is supposed to occur.  It does not suffice,
  * therefore, to reload the real timer .it_value from the real time timers
  * .it_interval.  Rather, we compute the next time in absolute time the timer
  * should go off.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getitimer_args {
 	u_int	which;
 	struct	itimerval *itv;
 };
 #endif
 int
 sys_getitimer(struct thread *td, struct getitimer_args *uap)
 {
 	struct itimerval aitv;
 	int error;
 
 	error = kern_getitimer(td, uap->which, &aitv);
 	if (error != 0)
 		return (error);
 	return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
 }
 
 int
 kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv)
 {
 	struct proc *p = td->td_proc;
 	struct timeval ctv;
 
 	if (which > ITIMER_PROF)
 		return (EINVAL);
 
 	if (which == ITIMER_REAL) {
 		/*
 		 * Convert from absolute to relative time in .it_value
 		 * part of real time timer.  If time for real time timer
 		 * has passed return 0, else return difference between
 		 * current time and time for the timer to go off.
 		 */
 		PROC_LOCK(p);
 		*aitv = p->p_realtimer;
 		PROC_UNLOCK(p);
 		if (timevalisset(&aitv->it_value)) {
 			microuptime(&ctv);
 			if (timevalcmp(&aitv->it_value, &ctv, <))
 				timevalclear(&aitv->it_value);
 			else
 				timevalsub(&aitv->it_value, &ctv);
 		}
 	} else {
 		PROC_ITIMLOCK(p);
 		*aitv = p->p_stats->p_timer[which];
 		PROC_ITIMUNLOCK(p);
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktritimerval(aitv);
 #endif
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setitimer_args {
 	u_int	which;
 	struct	itimerval *itv, *oitv;
 };
 #endif
 int
 sys_setitimer(struct thread *td, struct setitimer_args *uap)
 {
 	struct itimerval aitv, oitv;
 	int error;
 
 	if (uap->itv == NULL) {
 		uap->itv = uap->oitv;
 		return (sys_getitimer(td, (struct getitimer_args *)uap));
 	}
 
 	if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval))))
 		return (error);
 	error = kern_setitimer(td, uap->which, &aitv, &oitv);
 	if (error != 0 || uap->oitv == NULL)
 		return (error);
 	return (copyout(&oitv, uap->oitv, sizeof(struct itimerval)));
 }
 
 int
 kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
     struct itimerval *oitv)
 {
 	struct proc *p = td->td_proc;
 	struct timeval ctv;
 	sbintime_t sbt, pr;
 
 	if (aitv == NULL)
 		return (kern_getitimer(td, which, oitv));
 
 	if (which > ITIMER_PROF)
 		return (EINVAL);
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktritimerval(aitv);
 #endif
 	if (itimerfix(&aitv->it_value) ||
 	    aitv->it_value.tv_sec > INT32_MAX / 2)
 		return (EINVAL);
 	if (!timevalisset(&aitv->it_value))
 		timevalclear(&aitv->it_interval);
 	else if (itimerfix(&aitv->it_interval) ||
 	    aitv->it_interval.tv_sec > INT32_MAX / 2)
 		return (EINVAL);
 
 	if (which == ITIMER_REAL) {
 		PROC_LOCK(p);
 		if (timevalisset(&p->p_realtimer.it_value))
 			callout_stop(&p->p_itcallout);
 		microuptime(&ctv);
 		if (timevalisset(&aitv->it_value)) {
 			pr = tvtosbt(aitv->it_value) >> tc_precexp;
 			timevaladd(&aitv->it_value, &ctv);
 			sbt = tvtosbt(aitv->it_value);
 			callout_reset_sbt(&p->p_itcallout, sbt, pr,
 			    realitexpire, p, C_ABSOLUTE);
 		}
 		*oitv = p->p_realtimer;
 		p->p_realtimer = *aitv;
 		PROC_UNLOCK(p);
 		if (timevalisset(&oitv->it_value)) {
 			if (timevalcmp(&oitv->it_value, &ctv, <))
 				timevalclear(&oitv->it_value);
 			else
 				timevalsub(&oitv->it_value, &ctv);
 		}
 	} else {
 		if (aitv->it_interval.tv_sec == 0 &&
 		    aitv->it_interval.tv_usec != 0 &&
 		    aitv->it_interval.tv_usec < tick)
 			aitv->it_interval.tv_usec = tick;
 		if (aitv->it_value.tv_sec == 0 &&
 		    aitv->it_value.tv_usec != 0 &&
 		    aitv->it_value.tv_usec < tick)
 			aitv->it_value.tv_usec = tick;
 		PROC_ITIMLOCK(p);
 		*oitv = p->p_stats->p_timer[which];
 		p->p_stats->p_timer[which] = *aitv;
 		PROC_ITIMUNLOCK(p);
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktritimerval(oitv);
 #endif
 	return (0);
 }
 
 static void
 realitexpire_reset_callout(struct proc *p, sbintime_t *isbtp)
 {
 	sbintime_t prec;
 
 	prec = isbtp == NULL ? tvtosbt(p->p_realtimer.it_interval) : *isbtp;
 	callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value),
 	    prec >> tc_precexp, realitexpire, p, C_ABSOLUTE);
 }
 
 void
 itimer_proc_continue(struct proc *p)
 {
 	struct timeval ctv;
 	struct itimer *it;
 	int id;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if ((p->p_flag2 & P2_ITSTOPPED) != 0) {
 		p->p_flag2 &= ~P2_ITSTOPPED;
 		microuptime(&ctv);
 		if (timevalcmp(&p->p_realtimer.it_value, &ctv, >=))
 			realitexpire(p);
 		else
 			realitexpire_reset_callout(p, NULL);
 	}
 
 	if (p->p_itimers != NULL) {
 		for (id = 3; id < TIMER_MAX; id++) {
 			it = p->p_itimers->its_timers[id];
 			if (it == NULL)
 				continue;
 			if ((it->it_flags & ITF_PSTOPPED) != 0) {
 				ITIMER_LOCK(it);
 				if ((it->it_flags & ITF_PSTOPPED) != 0) {
 					it->it_flags &= ~ITF_PSTOPPED;
 					if ((it->it_flags & ITF_DELETING) == 0)
 						realtimer_expire_l(it, true);
 				}
 				ITIMER_UNLOCK(it);
 			}
 		}
 	}
 }
 
 /*
  * Real interval timer expired:
  * send process whose timer expired an alarm signal.
  * If time is not set up to reload, then just return.
  * Else compute next time timer should go off which is > current time.
  * This is where delay in processing this timeout causes multiple
  * SIGALRM calls to be compressed into one.
  * tvtohz() always adds 1 to allow for the time until the next clock
  * interrupt being strictly less than 1 clock tick, but we don't want
  * that here since we want to appear to be in sync with the clock
  * interrupt even when we're delayed.
  */
 void
 realitexpire(void *arg)
 {
 	struct proc *p;
 	struct timeval ctv;
 	sbintime_t isbt;
 
 	p = (struct proc *)arg;
 	kern_psignal(p, SIGALRM);
 	if (!timevalisset(&p->p_realtimer.it_interval)) {
 		timevalclear(&p->p_realtimer.it_value);
 		if (p->p_flag & P_WEXIT)
 			wakeup(&p->p_itcallout);
 		return;
 	}
 
 	isbt = tvtosbt(p->p_realtimer.it_interval);
 	if (isbt >= sbt_timethreshold)
 		getmicrouptime(&ctv);
 	else
 		microuptime(&ctv);
 	do {
 		timevaladd(&p->p_realtimer.it_value,
 		    &p->p_realtimer.it_interval);
 	} while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=));
 
 	if (P_SHOULDSTOP(p) || P_KILLED(p)) {
 		p->p_flag2 |= P2_ITSTOPPED;
 		return;
 	}
 
 	p->p_flag2 &= ~P2_ITSTOPPED;
 	realitexpire_reset_callout(p, &isbt);
 }
 
 /*
  * Check that a proposed value to load into the .it_value or
  * .it_interval part of an interval timer is acceptable, and
  * fix it to have at least minimal value (i.e. if it is less
  * than the resolution of the clock, round it up.)
  */
 int
 itimerfix(struct timeval *tv)
 {
 
 	if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
 		return (EINVAL);
 	if (tv->tv_sec == 0 && tv->tv_usec != 0 &&
 	    tv->tv_usec < (u_int)tick / 16)
 		tv->tv_usec = (u_int)tick / 16;
 	return (0);
 }
 
 /*
  * Decrement an interval timer by a specified number
  * of microseconds, which must be less than a second,
  * i.e. < 1000000.  If the timer expires, then reload
  * it.  In this case, carry over (usec - old value) to
  * reduce the value reloaded into the timer so that
  * the timer does not drift.  This routine assumes
  * that it is called in a context where the timers
  * on which it is operating cannot change in value.
  */
 int
 itimerdecr(struct itimerval *itp, int usec)
 {
 
 	if (itp->it_value.tv_usec < usec) {
 		if (itp->it_value.tv_sec == 0) {
 			/* expired, and already in next interval */
 			usec -= itp->it_value.tv_usec;
 			goto expire;
 		}
 		itp->it_value.tv_usec += 1000000;
 		itp->it_value.tv_sec--;
 	}
 	itp->it_value.tv_usec -= usec;
 	usec = 0;
 	if (timevalisset(&itp->it_value))
 		return (1);
 	/* expired, exactly at end of interval */
 expire:
 	if (timevalisset(&itp->it_interval)) {
 		itp->it_value = itp->it_interval;
 		itp->it_value.tv_usec -= usec;
 		if (itp->it_value.tv_usec < 0) {
 			itp->it_value.tv_usec += 1000000;
 			itp->it_value.tv_sec--;
 		}
 	} else
 		itp->it_value.tv_usec = 0;		/* sec is already 0 */
 	return (0);
 }
 
 /*
  * Add and subtract routines for timevals.
  * N.B.: subtract routine doesn't deal with
  * results which are before the beginning,
  * it just gets very confused in this case.
  * Caveat emptor.
  */
 void
 timevaladd(struct timeval *t1, const struct timeval *t2)
 {
 
 	t1->tv_sec += t2->tv_sec;
 	t1->tv_usec += t2->tv_usec;
 	timevalfix(t1);
 }
 
 void
 timevalsub(struct timeval *t1, const struct timeval *t2)
 {
 
 	t1->tv_sec -= t2->tv_sec;
 	t1->tv_usec -= t2->tv_usec;
 	timevalfix(t1);
 }
 
 static void
 timevalfix(struct timeval *t1)
 {
 
 	if (t1->tv_usec < 0) {
 		t1->tv_sec--;
 		t1->tv_usec += 1000000;
 	}
 	if (t1->tv_usec >= 1000000) {
 		t1->tv_sec++;
 		t1->tv_usec -= 1000000;
 	}
 }
 
 /*
  * ratecheck(): simple time-based rate-limit checking.
  */
 int
 ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
 {
 	struct timeval tv, delta;
 	int rv = 0;
 
 	getmicrouptime(&tv);		/* NB: 10ms precision */
 	delta = tv;
 	timevalsub(&delta, lasttime);
 
 	/*
 	 * check for 0,0 is so that the message will be seen at least once,
 	 * even if interval is huge.
 	 */
 	if (timevalcmp(&delta, mininterval, >=) ||
 	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
 		*lasttime = tv;
 		rv = 1;
 	}
 
 	return (rv);
 }
 
 /*
  * ppsratecheck(): packets (or events) per second limitation.
  *
  * Return 0 if the limit is to be enforced (e.g. the caller
  * should drop a packet because of the rate limitation).
  *
  * maxpps of 0 always causes zero to be returned.  maxpps of -1
  * always causes 1 to be returned; this effectively defeats rate
  * limiting.
  *
  * Note that we maintain the struct timeval for compatibility
  * with other bsd systems.  We reuse the storage and just monitor
  * clock ticks for minimal overhead.  
  */
 int
 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
 {
 	int now;
 
 	/*
 	 * Reset the last time and counter if this is the first call
 	 * or more than a second has passed since the last update of
 	 * lasttime.
 	 */
 	now = ticks;
 	if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
 		lasttime->tv_sec = now;
 		*curpps = 1;
 		return (maxpps != 0);
 	} else {
 		(*curpps)++;		/* NB: ignore potential overflow */
 		return (maxpps < 0 || *curpps <= maxpps);
 	}
 }
 
 static void
 itimer_start(void)
 {
 	static const struct kclock rt_clock = {
 		.timer_create  = realtimer_create,
 		.timer_delete  = realtimer_delete,
 		.timer_settime = realtimer_settime,
 		.timer_gettime = realtimer_gettime,
 	};
 
 	itimer_zone = uma_zcreate("itimer", sizeof(struct itimer),
 		NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0);
 	register_posix_clock(CLOCK_REALTIME,  &rt_clock);
 	register_posix_clock(CLOCK_MONOTONIC, &rt_clock);
 	p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L);
 	p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX);
 	p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX);
 }
 
 static int
 register_posix_clock(int clockid, const struct kclock *clk)
 {
 	if ((unsigned)clockid >= MAX_CLOCKS) {
 		printf("%s: invalid clockid\n", __func__);
 		return (0);
 	}
 	posix_clocks[clockid] = *clk;
 	return (1);
 }
 
 static int
 itimer_init(void *mem, int size, int flags)
 {
 	struct itimer *it;
 
 	it = (struct itimer *)mem;
 	mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF);
 	return (0);
 }
 
 static void
 itimer_fini(void *mem, int size)
 {
 	struct itimer *it;
 
 	it = (struct itimer *)mem;
 	mtx_destroy(&it->it_mtx);
 }
 
 static void
 itimer_enter(struct itimer *it)
 {
 
 	mtx_assert(&it->it_mtx, MA_OWNED);
 	it->it_usecount++;
 }
 
 static void
 itimer_leave(struct itimer *it)
 {
 
 	mtx_assert(&it->it_mtx, MA_OWNED);
 	KASSERT(it->it_usecount > 0, ("invalid it_usecount"));
 
 	if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0)
 		wakeup(it);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ktimer_create_args {
 	clockid_t clock_id;
 	struct sigevent * evp;
 	int * timerid;
 };
 #endif
 int
 sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap)
 {
 	struct sigevent *evp, ev;
 	int id;
 	int error;
 
 	if (uap->evp == NULL) {
 		evp = NULL;
 	} else {
 		error = copyin(uap->evp, &ev, sizeof(ev));
 		if (error != 0)
 			return (error);
 		evp = &ev;
 	}
 	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
 	if (error == 0) {
 		error = copyout(&id, uap->timerid, sizeof(int));
 		if (error != 0)
 			kern_ktimer_delete(td, id);
 	}
 	return (error);
 }
 
 int
 kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp,
     int *timerid, int preset_id)
 {
 	struct proc *p = td->td_proc;
 	struct itimer *it;
 	int id;
 	int error;
 
 	if (clock_id < 0 || clock_id >= MAX_CLOCKS)
 		return (EINVAL);
 
 	if (posix_clocks[clock_id].timer_create == NULL)
 		return (EINVAL);
 
 	if (evp != NULL) {
 		if (evp->sigev_notify != SIGEV_NONE &&
 		    evp->sigev_notify != SIGEV_SIGNAL &&
 		    evp->sigev_notify != SIGEV_THREAD_ID)
 			return (EINVAL);
 		if ((evp->sigev_notify == SIGEV_SIGNAL ||
 		     evp->sigev_notify == SIGEV_THREAD_ID) &&
 			!_SIG_VALID(evp->sigev_signo))
 			return (EINVAL);
 	}
 
 	if (p->p_itimers == NULL)
 		itimers_alloc(p);
 
 	it = uma_zalloc(itimer_zone, M_WAITOK);
 	it->it_flags = 0;
 	it->it_usecount = 0;
 	timespecclear(&it->it_time.it_value);
 	timespecclear(&it->it_time.it_interval);
 	it->it_overrun = 0;
 	it->it_overrun_last = 0;
 	it->it_clockid = clock_id;
 	it->it_proc = p;
 	ksiginfo_init(&it->it_ksi);
 	it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT;
 	error = CLOCK_CALL(clock_id, timer_create, (it));
 	if (error != 0)
 		goto out;
 
 	PROC_LOCK(p);
 	if (preset_id != -1) {
 		KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id"));
 		id = preset_id;
 		if (p->p_itimers->its_timers[id] != NULL) {
 			PROC_UNLOCK(p);
 			error = 0;
 			goto out;
 		}
 	} else {
 		/*
 		 * Find a free timer slot, skipping those reserved
 		 * for setitimer().
 		 */
 		for (id = 3; id < TIMER_MAX; id++)
 			if (p->p_itimers->its_timers[id] == NULL)
 				break;
 		if (id == TIMER_MAX) {
 			PROC_UNLOCK(p);
 			error = EAGAIN;
 			goto out;
 		}
 	}
 	p->p_itimers->its_timers[id] = it;
 	if (evp != NULL)
 		it->it_sigev = *evp;
 	else {
 		it->it_sigev.sigev_notify = SIGEV_SIGNAL;
 		switch (clock_id) {
 		default:
 		case CLOCK_REALTIME:
 			it->it_sigev.sigev_signo = SIGALRM;
 			break;
 		case CLOCK_VIRTUAL:
  			it->it_sigev.sigev_signo = SIGVTALRM;
 			break;
 		case CLOCK_PROF:
 			it->it_sigev.sigev_signo = SIGPROF;
 			break;
 		}
 		it->it_sigev.sigev_value.sival_int = id;
 	}
 
 	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
 	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
 		it->it_ksi.ksi_signo = it->it_sigev.sigev_signo;
 		it->it_ksi.ksi_code = SI_TIMER;
 		it->it_ksi.ksi_value = it->it_sigev.sigev_value;
 		it->it_ksi.ksi_timerid = id;
 	}
 	PROC_UNLOCK(p);
 	*timerid = id;
 	return (0);
 
 out:
 	ITIMER_LOCK(it);
 	CLOCK_CALL(it->it_clockid, timer_delete, (it));
 	ITIMER_UNLOCK(it);
 	uma_zfree(itimer_zone, it);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ktimer_delete_args {
 	int timerid;
 };
 #endif
 int
 sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
 {
 
 	return (kern_ktimer_delete(td, uap->timerid));
 }
 
 static struct itimer *
 itimer_find(struct proc *p, int timerid)
 {
 	struct itimer *it;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if ((p->p_itimers == NULL) ||
 	    (timerid < 0) || (timerid >= TIMER_MAX) ||
 	    (it = p->p_itimers->its_timers[timerid]) == NULL) {
 		return (NULL);
 	}
 	ITIMER_LOCK(it);
 	if ((it->it_flags & ITF_DELETING) != 0) {
 		ITIMER_UNLOCK(it);
 		it = NULL;
 	}
 	return (it);
 }
 
 int
 kern_ktimer_delete(struct thread *td, int timerid)
 {
 	struct proc *p = td->td_proc;
 	struct itimer *it;
 
 	PROC_LOCK(p);
 	it = itimer_find(p, timerid);
 	if (it == NULL) {
 		PROC_UNLOCK(p);
 		return (EINVAL);
 	}
 	PROC_UNLOCK(p);
 
 	it->it_flags |= ITF_DELETING;
 	while (it->it_usecount > 0) {
 		it->it_flags |= ITF_WANTED;
 		msleep(it, &it->it_mtx, PPAUSE, "itimer", 0);
 	}
 	it->it_flags &= ~ITF_WANTED;
 	CLOCK_CALL(it->it_clockid, timer_delete, (it));
 	ITIMER_UNLOCK(it);
 
 	PROC_LOCK(p);
 	if (KSI_ONQ(&it->it_ksi))
 		sigqueue_take(&it->it_ksi);
 	p->p_itimers->its_timers[timerid] = NULL;
 	PROC_UNLOCK(p);
 	uma_zfree(itimer_zone, it);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ktimer_settime_args {
 	int timerid;
 	int flags;
 	const struct itimerspec * value;
 	struct itimerspec * ovalue;
 };
 #endif
 int
 sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
 {
 	struct itimerspec val, oval, *ovalp;
 	int error;
 
 	error = copyin(uap->value, &val, sizeof(val));
 	if (error != 0)
 		return (error);
 	ovalp = uap->ovalue != NULL ? &oval : NULL;
 	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
 	if (error == 0 && uap->ovalue != NULL)
 		error = copyout(ovalp, uap->ovalue, sizeof(*ovalp));
 	return (error);
 }
 
 int
 kern_ktimer_settime(struct thread *td, int timer_id, int flags,
     struct itimerspec *val, struct itimerspec *oval)
 {
 	struct proc *p;
 	struct itimer *it;
 	int error;
 
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
 		PROC_UNLOCK(p);
 		error = EINVAL;
 	} else {
 		PROC_UNLOCK(p);
 		itimer_enter(it);
 		error = CLOCK_CALL(it->it_clockid, timer_settime, (it,
 		    flags, val, oval));
 		itimer_leave(it);
 		ITIMER_UNLOCK(it);
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ktimer_gettime_args {
 	int timerid;
 	struct itimerspec * value;
 };
 #endif
 int
 sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
 {
 	struct itimerspec val;
 	int error;
 
 	error = kern_ktimer_gettime(td, uap->timerid, &val);
 	if (error == 0)
 		error = copyout(&val, uap->value, sizeof(val));
 	return (error);
 }
 
 int
 kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val)
 {
 	struct proc *p;
 	struct itimer *it;
 	int error;
 
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
 		PROC_UNLOCK(p);
 		error = EINVAL;
 	} else {
 		PROC_UNLOCK(p);
 		itimer_enter(it);
 		error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val));
 		itimer_leave(it);
 		ITIMER_UNLOCK(it);
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct timer_getoverrun_args {
 	int timerid;
 };
 #endif
 int
 sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
 {
 
 	return (kern_ktimer_getoverrun(td, uap->timerid));
 }
 
 int
 kern_ktimer_getoverrun(struct thread *td, int timer_id)
 {
 	struct proc *p = td->td_proc;
 	struct itimer *it;
 	int error ;
 
 	PROC_LOCK(p);
 	if (timer_id < 3 ||
 	    (it = itimer_find(p, timer_id)) == NULL) {
 		PROC_UNLOCK(p);
 		error = EINVAL;
 	} else {
 		td->td_retval[0] = it->it_overrun_last;
 		ITIMER_UNLOCK(it);
 		PROC_UNLOCK(p);
 		error = 0;
 	}
 	return (error);
 }
 
 static int
 realtimer_create(struct itimer *it)
 {
 	callout_init_mtx(&it->it_callout, &it->it_mtx, 0);
 	return (0);
 }
 
 static int
 realtimer_delete(struct itimer *it)
 {
 	mtx_assert(&it->it_mtx, MA_OWNED);
 
 	/*
 	 * clear timer's value and interval to tell realtimer_expire
 	 * to not rearm the timer.
 	 */
 	timespecclear(&it->it_time.it_value);
 	timespecclear(&it->it_time.it_interval);
 	ITIMER_UNLOCK(it);
 	callout_drain(&it->it_callout);
 	ITIMER_LOCK(it);
 	return (0);
 }
 
 static int
 realtimer_gettime(struct itimer *it, struct itimerspec *ovalue)
 {
 	struct timespec cts;
 
 	mtx_assert(&it->it_mtx, MA_OWNED);
 
 	realtimer_clocktime(it->it_clockid, &cts);
 	*ovalue = it->it_time;
 	if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) {
 		timespecsub(&ovalue->it_value, &cts, &ovalue->it_value);
 		if (ovalue->it_value.tv_sec < 0 ||
 		    (ovalue->it_value.tv_sec == 0 &&
 		     ovalue->it_value.tv_nsec == 0)) {
 			ovalue->it_value.tv_sec  = 0;
 			ovalue->it_value.tv_nsec = 1;
 		}
 	}
 	return (0);
 }
 
 static int
 realtimer_settime(struct itimer *it, int flags, struct itimerspec *value,
     struct itimerspec *ovalue)
 {
 	struct timespec cts, ts;
 	struct timeval tv;
 	struct itimerspec val;
 
 	mtx_assert(&it->it_mtx, MA_OWNED);
 
 	val = *value;
 	if (itimespecfix(&val.it_value))
 		return (EINVAL);
 
 	if (timespecisset(&val.it_value)) {
 		if (itimespecfix(&val.it_interval))
 			return (EINVAL);
 	} else {
 		timespecclear(&val.it_interval);
 	}
 
 	if (ovalue != NULL)
 		realtimer_gettime(it, ovalue);
 
 	it->it_time = val;
 	if (timespecisset(&val.it_value)) {
 		realtimer_clocktime(it->it_clockid, &cts);
 		ts = val.it_value;
 		if ((flags & TIMER_ABSTIME) == 0) {
 			/* Convert to absolute time. */
 			timespecadd(&it->it_time.it_value, &cts,
 			    &it->it_time.it_value);
 		} else {
 			timespecsub(&ts, &cts, &ts);
 			/*
 			 * We don't care if ts is negative, tztohz will
 			 * fix it.
 			 */
 		}
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire,
 		    it);
 	} else {
 		callout_stop(&it->it_callout);
 	}
 
 	return (0);
 }
 
 static void
 realtimer_clocktime(clockid_t id, struct timespec *ts)
 {
 	if (id == CLOCK_REALTIME)
 		getnanotime(ts);
 	else	/* CLOCK_MONOTONIC */
 		getnanouptime(ts);
 }
 
 int
 itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi)
 {
 	struct itimer *it;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	it = itimer_find(p, timerid);
 	if (it != NULL) {
 		ksi->ksi_overrun = it->it_overrun;
 		it->it_overrun_last = it->it_overrun;
 		it->it_overrun = 0;
 		ITIMER_UNLOCK(it);
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 itimespecfix(struct timespec *ts)
 {
 
 	if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= NS_PER_SEC)
 		return (EINVAL);
 	if ((UINT64_MAX - ts->tv_nsec) / NS_PER_SEC < ts->tv_sec)
 		return (EINVAL);
 	if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000)
 		ts->tv_nsec = tick * 1000;
 	return (0);
 }
 
 #define	timespectons(tsp)			\
 	((uint64_t)(tsp)->tv_sec * NS_PER_SEC + (tsp)->tv_nsec)
 #define	timespecfromns(ns) (struct timespec){	\
 	.tv_sec = (ns) / NS_PER_SEC,		\
 	.tv_nsec = (ns) % NS_PER_SEC		\
 }
 
 static void
 realtimer_expire_l(struct itimer *it, bool proc_locked)
 {
 	struct timespec cts, ts;
 	struct timeval tv;
 	struct proc *p;
 	uint64_t interval, now, overruns, value;
 
 	realtimer_clocktime(it->it_clockid, &cts);
 	/* Only fire if time is reached. */
 	if (timespeccmp(&cts, &it->it_time.it_value, >=)) {
 		if (timespecisset(&it->it_time.it_interval)) {
 			timespecadd(&it->it_time.it_value,
 			    &it->it_time.it_interval,
 			    &it->it_time.it_value);
 
 			interval = timespectons(&it->it_time.it_interval);
 			value = timespectons(&it->it_time.it_value);
 			now = timespectons(&cts);
 
 			if (now >= value) {
 				/*
 				 * We missed at least one period.
 				 */
 				overruns = howmany(now - value + 1, interval);
 				if (it->it_overrun + overruns >=
 				    it->it_overrun &&
 				    it->it_overrun + overruns <= INT_MAX) {
 					it->it_overrun += (int)overruns;
 				} else {
 					it->it_overrun = INT_MAX;
 					it->it_ksi.ksi_errno = ERANGE;
 				}
 				value =
 				    now + interval - (now - value) % interval;
 				it->it_time.it_value = timespecfromns(value);
 			}
 		} else {
 			/* single shot timer ? */
 			timespecclear(&it->it_time.it_value);
 		}
 
 		p = it->it_proc;
 		if (timespecisset(&it->it_time.it_value)) {
 			if (P_SHOULDSTOP(p) || P_KILLED(p)) {
 				it->it_flags |= ITF_PSTOPPED;
 			} else {
 				timespecsub(&it->it_time.it_value, &cts, &ts);
 				TIMESPEC_TO_TIMEVAL(&tv, &ts);
 				callout_reset(&it->it_callout, tvtohz(&tv),
 				    realtimer_expire, it);
 			}
 		}
 
 		itimer_enter(it);
 		ITIMER_UNLOCK(it);
 		if (proc_locked)
 			PROC_UNLOCK(p);
 		itimer_fire(it);
 		if (proc_locked)
 			PROC_LOCK(p);
 		ITIMER_LOCK(it);
 		itimer_leave(it);
 	} else if (timespecisset(&it->it_time.it_value)) {
 		p = it->it_proc;
 		if (P_SHOULDSTOP(p) || P_KILLED(p)) {
 			it->it_flags |= ITF_PSTOPPED;
 		} else {
 			ts = it->it_time.it_value;
 			timespecsub(&ts, &cts, &ts);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts);
 			callout_reset(&it->it_callout, tvtohz(&tv),
 			    realtimer_expire, it);
 		}
 	}
 }
 
 /* Timeout callback for realtime timer */
 static void
 realtimer_expire(void *arg)
 {
 	realtimer_expire_l(arg, false);
 }
 
 static void
 itimer_fire(struct itimer *it)
 {
 	struct proc *p = it->it_proc;
 	struct thread *td;
 
 	if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
 	    it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
 		if (sigev_findtd(p, &it->it_sigev, &td) != 0) {
 			ITIMER_LOCK(it);
 			timespecclear(&it->it_time.it_value);
 			timespecclear(&it->it_time.it_interval);
 			callout_stop(&it->it_callout);
 			ITIMER_UNLOCK(it);
 			return;
 		}
 		if (!KSI_ONQ(&it->it_ksi)) {
 			it->it_ksi.ksi_errno = 0;
 			ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev);
 			tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi);
 		} else {
 			if (it->it_overrun < INT_MAX)
 				it->it_overrun++;
 			else
 				it->it_ksi.ksi_errno = ERANGE;
 		}
 		PROC_UNLOCK(p);
 	}
 }
 
 static void
 itimers_alloc(struct proc *p)
 {
 	struct itimers *its;
 
 	its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO);
 	PROC_LOCK(p);
 	if (p->p_itimers == NULL) {
 		p->p_itimers = its;
 		PROC_UNLOCK(p);
 	}
 	else {
 		PROC_UNLOCK(p);
 		free(its, M_SUBPROC);
 	}
 }
 
 /* Clean up timers when some process events are being triggered. */
 static void
 itimers_event_exit_exec(int start_idx, struct proc *p)
 {
 	struct itimers *its;
 	struct itimer *it;
 	int i;
 
 	its = p->p_itimers;
 	if (its == NULL)
 		return;
 
 	for (i = start_idx; i < TIMER_MAX; ++i) {
 		if ((it = its->its_timers[i]) != NULL)
 			kern_ktimer_delete(curthread, i);
 	}
 	if (its->its_timers[0] == NULL && its->its_timers[1] == NULL &&
 	    its->its_timers[2] == NULL) {
 		/* Synchronize with itimer_proc_continue(). */
 		PROC_LOCK(p);
 		p->p_itimers = NULL;
 		PROC_UNLOCK(p);
 		free(its, M_SUBPROC);
 	}
 }
 
 void
 itimers_exec(struct proc *p)
 {
 	/*
 	 * According to susv3, XSI interval timers should be inherited
 	 * by new image.
 	 */
 	itimers_event_exit_exec(3, p);
 }
 
 void
 itimers_exit(struct proc *p)
 {
 	itimers_event_exit_exec(0, p);
 }