Index: projects/calloutng/sys/kern/kern_clock.c
===================================================================
--- projects/calloutng/sys/kern/kern_clock.c	(revision 236314)
+++ projects/calloutng/sys/kern/kern_clock.c	(revision 236315)
@@ -1,896 +1,895 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kdb.h"
 #include "opt_device_polling.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
 #include "opt_ntp.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/sysctl.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/limits.h>
 #include <sys/timetc.h>
 
 #ifdef GPROF
 #include <sys/gmon.h>
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , clock, hard);
 PMC_SOFT_DEFINE( , , clock, stat);
 #endif
 
 #ifdef DEVICE_POLLING
 extern void hardclock_device_poll(void);
 #endif /* DEVICE_POLLING */
 
 static void initclocks(void *dummy);
 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
 
 /* Spin-lock protecting profiling statistics. */
 static struct mtx time_lock;
 
 SDT_PROVIDER_DECLARE(sched);
 SDT_PROBE_DEFINE2(sched, , , tick, tick, "struct thread *", "struct proc *");
 
 static int
 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long cp_time[CPUSTATES];
 #ifdef SCTL_MASK32
 	int i;
 	unsigned int cp_time32[CPUSTATES];
 #endif
 
 	read_cpu_time(cp_time);
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		if (!req->oldptr)
 			return SYSCTL_OUT(req, 0, sizeof(cp_time32));
 		for (i = 0; i < CPUSTATES; i++)
 			cp_time32[i] = (unsigned int)cp_time[i];
 		error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
 	} else
 #endif
 	{
 		if (!req->oldptr)
 			return SYSCTL_OUT(req, 0, sizeof(cp_time));
 		error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
 	}
 	return error;
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
 
 static long empty[CPUSTATES];
 
 static int
 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
 {
 	struct pcpu *pcpu;
 	int error;
 	int c;
 	long *cp_time;
 #ifdef SCTL_MASK32
 	unsigned int cp_time32[CPUSTATES];
 	int i;
 #endif
 
 	if (!req->oldptr) {
 #ifdef SCTL_MASK32
 		if (req->flags & SCTL_MASK32)
 			return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
 		else
 #endif
 			return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
 	}
 	for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
 		if (!CPU_ABSENT(c)) {
 			pcpu = pcpu_find(c);
 			cp_time = pcpu->pc_cp_time;
 		} else {
 			cp_time = empty;
 		}
 #ifdef SCTL_MASK32
 		if (req->flags & SCTL_MASK32) {
 			for (i = 0; i < CPUSTATES; i++)
 				cp_time32[i] = (unsigned int)cp_time[i];
 			error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
 		} else
 #endif
 			error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
 	}
 	return error;
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
 
 #ifdef DEADLKRES
 static const char *blessed[] = {
 	"getblk",
 	"so_snd_sx",
 	"so_rcv_sx",
 	NULL
 };
 static int slptime_threshold = 1800;
 static int blktime_threshold = 900;
 static int sleepfreq = 3;
 
 static void
 deadlkres(void)
 {
 	struct proc *p;
 	struct thread *td;
 	void *wchan;
 	int blkticks, i, slpticks, slptype, tryl, tticks;
 
 	tryl = 0;
 	for (;;) {
 		blkticks = blktime_threshold * hz;
 		slpticks = slptime_threshold * hz;
 
 		/*
 		 * Avoid to sleep on the sx_lock in order to avoid a possible
 		 * priority inversion problem leading to starvation.
 		 * If the lock can't be held after 100 tries, panic.
 		 */
 		if (!sx_try_slock(&allproc_lock)) {
 			if (tryl > 100)
 		panic("%s: possible deadlock detected on allproc_lock\n",
 				    __func__);
 			tryl++;
 			pause("allproc", sleepfreq * hz);
 			continue;
 		}
 		tryl = 0;
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			FOREACH_THREAD_IN_PROC(p, td) {
 
 				/*
 				 * Once a thread is found in "interesting"
 				 * state a possible ticks wrap-up needs to be
 				 * checked.
 				 */
 				thread_lock(td);
 				if (TD_ON_LOCK(td) && ticks < td->td_blktick) {
 
 					/*
 					 * The thread should be blocked on a
 					 * turnstile, simply check if the
 					 * turnstile channel is in good state.
 					 */
 					MPASS(td->td_blocked != NULL);
 
 					tticks = ticks - td->td_blktick;
 					thread_unlock(td);
 					if (tticks > blkticks) {
 
 						/*
 						 * Accordingly with provided
 						 * thresholds, this thread is
 						 * stuck for too long on a
 						 * turnstile.
 						 */
 						PROC_UNLOCK(p);
 						sx_sunlock(&allproc_lock);
 	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
 						    __func__, td, tticks);
 					}
 				} else if (TD_IS_SLEEPING(td) &&
 				    TD_ON_SLEEPQ(td) &&
 				    ticks < td->td_blktick) {
 
 					/*
 					 * Check if the thread is sleeping on a
 					 * lock, otherwise skip the check.
 					 * Drop the thread lock in order to
 					 * avoid a LOR with the sleepqueue
 					 * spinlock.
 					 */
 					wchan = td->td_wchan;
 					tticks = ticks - td->td_slptick;
 					thread_unlock(td);
 					slptype = sleepq_type(wchan);
 					if ((slptype == SLEEPQ_SX ||
 					    slptype == SLEEPQ_LK) &&
 					    tticks > slpticks) {
 
 						/*
 						 * Accordingly with provided
 						 * thresholds, this thread is
 						 * stuck for too long on a
 						 * sleepqueue.
 						 * However, being on a
 						 * sleepqueue, we might still
 						 * check for the blessed
 						 * list.
 						 */
 						tryl = 0;
 						for (i = 0; blessed[i] != NULL;
 						    i++) {
 							if (!strcmp(blessed[i],
 							    td->td_wmesg)) {
 								tryl = 1;
 								break;
 							}
 						}
 						if (tryl != 0) {
 							tryl = 0;
 							continue;
 						}
 						PROC_UNLOCK(p);
 						sx_sunlock(&allproc_lock);
 	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
 						    __func__, td, tticks);
 					}
 				} else
 					thread_unlock(td);
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 
 		/* Sleep for sleepfreq seconds. */
 		pause("-", sleepfreq * hz);
 	}
 }
 
 static struct kthread_desc deadlkres_kd = {
 	"deadlkres",
 	deadlkres,
 	(struct thread **)NULL
 };
 
 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
 
 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0,
     "Deadlock resolver");
 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
     &slptime_threshold, 0,
     "Number of seconds within is valid to sleep on a sleepqueue");
 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
     &blktime_threshold, 0,
     "Number of seconds within is valid to block on a turnstile");
 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
     "Number of seconds between any deadlock resolver thread run");
 #endif	/* DEADLKRES */
 
 void
 read_cpu_time(long *cp_time)
 {
 	struct pcpu *pc;
 	int i, j;
 
 	/* Sum up global cp_time[]. */
 	bzero(cp_time, sizeof(long) * CPUSTATES);
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		for (j = 0; j < CPUSTATES; j++)
 			cp_time[j] += pc->pc_cp_time[j];
 	}
 }
 
 #ifdef SW_WATCHDOG
 #include <sys/watchdog.h>
 
 static int watchdog_ticks;
 static int watchdog_enabled;
 static void watchdog_fire(void);
 static void watchdog_config(void *, u_int, int *);
 #endif /* SW_WATCHDOG */
 
 /*
  * Clock handling routines.
  *
  * This code is written to operate with two timers that run independently of
  * each other.
  *
  * The main timer, running hz times per second, is used to trigger interval
  * timers, timeouts and rescheduling as needed.
  *
  * The second timer handles kernel and user profiling,
  * and does resource use estimation.  If the second timer is programmable,
  * it is randomized to avoid aliasing between the two clocks.  For example,
  * the randomization prevents an adversary from always giving up the cpu
  * just before its quantum expires.  Otherwise, it would never accumulate
  * cpu ticks.  The mean frequency of the second timer is stathz.
  *
  * If no second timer exists, stathz will be zero; in this case we drive
  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  * do not do it unless absolutely necessary.
  *
  * The statistics clock may (or may not) be run at a higher rate while
  * profiling.  This profile clock runs at profhz.  We require that profhz
  * be an integral multiple of stathz.
  *
  * If the statistics clock is running fast, it must be divided by the ratio
  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  *
  * Time-of-day is maintained using a "timecounter", which may or may
  * not be related to the hardware generating the above mentioned
  * interrupts.
  */
 
 int	stathz;
 int	profhz;
 int	profprocs;
 int	ticks;
 int	psratio;
 
 static DPCPU_DEFINE(int, pcputicks);	/* Per-CPU version of ticks. */
 static int global_hardclock_run = 0;
 
 /*
  * Initialize clock frequencies and start both clocks running.
  */
 /* ARGSUSED*/
 static void
 initclocks(dummy)
 	void *dummy;
 {
 	register int i;
 
 	/*
 	 * Set divisors to 1 (normal case) and let the machine-specific
 	 * code do its bit.
 	 */
 	mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
 	cpu_initclocks();
 
 	/*
 	 * Compute profhz/stathz, and fix profhz if needed.
 	 */
 	i = stathz ? stathz : hz;
 	if (profhz == 0)
 		profhz = i;
 	psratio = profhz / i;
 #ifdef SW_WATCHDOG
 	EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
 #endif
 }
 
 /*
  * Each time the real-time timer fires, this function is called on all CPUs.
  * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
  * the other CPUs in the system need to call this function.
  */
 void
 hardclock_cpu(int usermode)
 {
 	struct pstats *pstats;
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	int flags;
 
 	/*
 	 * Run current process's virtual and profile time, as needed.
 	 */
 	pstats = p->p_stats;
 	flags = 0;
 	if (usermode &&
 	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
 		PROC_SLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
 			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
 		PROC_SUNLOCK(p);
 	}
 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
 		PROC_SLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
 			flags |= TDF_PROFPEND | TDF_ASTPENDING;
 		PROC_SUNLOCK(p);
 	}
 	thread_lock(td);
 	sched_tick(1);
 	td->td_flags |= flags;
 	thread_unlock(td);
 
 #ifdef HWPMC_HOOKS
 	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
 	if (td->td_intr_frame != NULL)
 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
 	callout_tick();
 }
 
 /*
  * The real-time timer, interrupting hz times per second.
  */
 void
 hardclock(int usermode, uintfptr_t pc)
 {
 
 	atomic_add_int((volatile int *)&ticks, 1);
 	hardclock_cpu(usermode);
 	tc_ticktock(1);
 	cpu_tick_calibration();
 	/*
 	 * If no separate statistics clock is available, run it from here.
 	 *
 	 * XXX: this only works for UP
 	 */
 	if (stathz == 0) {
 		profclock(usermode, pc);
 		statclock(usermode);
 	}
 #ifdef DEVICE_POLLING
 	hardclock_device_poll();	/* this is very short and quick */
 #endif /* DEVICE_POLLING */
 #ifdef SW_WATCHDOG
 	if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
 		watchdog_fire();
 #endif /* SW_WATCHDOG */
 }
 
 void
 hardclock_cnt(int cnt, int usermode)
 {
 	struct pstats *pstats;
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	int *t = DPCPU_PTR(pcputicks);
 	int flags, global, newticks;
 #ifdef SW_WATCHDOG
 	int i;
 #endif /* SW_WATCHDOG */
 
 	/*
 	 * Update per-CPU and possibly global ticks values.
 	 */
 	*t += cnt;
 	do {
 		global = ticks;
 		newticks = *t - global;
 		if (newticks <= 0) {
 			if (newticks < -1)
 				*t = global - 1;
 			newticks = 0;
 			break;
 		}
 	} while (!atomic_cmpset_int(&ticks, global, *t));
 
 	/*
 	 * Run current process's virtual and profile time, as needed.
 	 */
 	pstats = p->p_stats;
 	flags = 0;
 	if (usermode &&
 	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
 		PROC_SLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
 		    tick * cnt) == 0)
 			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
 		PROC_SUNLOCK(p);
 	}
 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
 		PROC_SLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
 		    tick * cnt) == 0)
 			flags |= TDF_PROFPEND | TDF_ASTPENDING;
 		PROC_SUNLOCK(p);
 	}
 	thread_lock(td);
 	sched_tick(cnt);
 	td->td_flags |= flags;
 	thread_unlock(td);
 
 #ifdef	HWPMC_HOOKS
 	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
 	if (td->td_intr_frame != NULL)
 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
-	callout_tick();
 	/* We are in charge to handle this tick duty. */
 	if (newticks > 0) {
 		/* Dangerous and no need to call these things concurrently. */
 		if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) {
 			tc_ticktock(newticks);
 #ifdef DEVICE_POLLING
 			/* This is very short and quick. */
 			hardclock_device_poll();
 #endif /* DEVICE_POLLING */
 			atomic_store_rel_int(&global_hardclock_run, 0);
 		}
 #ifdef SW_WATCHDOG
 		if (watchdog_enabled > 0) {
 			i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
 			if (i > 0 && i <= newticks)
 				watchdog_fire();
 		}
 #endif /* SW_WATCHDOG */
 	}
 	if (curcpu == CPU_FIRST())
 		cpu_tick_calibration();
 }
 
 void
 hardclock_sync(int cpu)
 {
 	int	*t = DPCPU_ID_PTR(cpu, pcputicks);
 
 	*t = ticks;
 }
 
 /*
  * Compute number of ticks in the specified amount of time.
  */
 int
 tvtohz(tv)
 	struct timeval *tv;
 {
 	register unsigned long ticks;
 	register long sec, usec;
 
 	/*
 	 * If the number of usecs in the whole seconds part of the time
 	 * difference fits in a long, then the total number of usecs will
 	 * fit in an unsigned long.  Compute the total and convert it to
 	 * ticks, rounding up and adding 1 to allow for the current tick
 	 * to expire.  Rounding also depends on unsigned long arithmetic
 	 * to avoid overflow.
 	 *
 	 * Otherwise, if the number of ticks in the whole seconds part of
 	 * the time difference fits in a long, then convert the parts to
 	 * ticks separately and add, using similar rounding methods and
 	 * overflow avoidance.  This method would work in the previous
 	 * case but it is slightly slower and assumes that hz is integral.
 	 *
 	 * Otherwise, round the time difference down to the maximum
 	 * representable value.
 	 *
 	 * If ints have 32 bits, then the maximum value for any timeout in
 	 * 10ms ticks is 248 days.
 	 */
 	sec = tv->tv_sec;
 	usec = tv->tv_usec;
 	if (usec < 0) {
 		sec--;
 		usec += 1000000;
 	}
 	if (sec < 0) {
 #ifdef DIAGNOSTIC
 		if (usec > 0) {
 			sec++;
 			usec -= 1000000;
 		}
 		printf("tvotohz: negative time difference %ld sec %ld usec\n",
 		       sec, usec);
 #endif
 		ticks = 1;
 	} else if (sec <= LONG_MAX / 1000000)
 		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
 			/ tick + 1;
 	else if (sec <= LONG_MAX / hz)
 		ticks = sec * hz
 			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
 	else
 		ticks = LONG_MAX;
 	if (ticks > INT_MAX)
 		ticks = INT_MAX;
 	return ((int)ticks);
 }
 
 /*
  * Start profiling on a process.
  *
  * Kernel profiling passes proc0 which never exits and hence
  * keeps the profile clock running constantly.
  */
 void
 startprofclock(p)
 	register struct proc *p;
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (p->p_flag & P_STOPPROF)
 		return;
 	if ((p->p_flag & P_PROFIL) == 0) {
 		p->p_flag |= P_PROFIL;
 		mtx_lock(&time_lock);
 		if (++profprocs == 1)
 			cpu_startprofclock();
 		mtx_unlock(&time_lock);
 	}
 }
 
 /*
  * Stop profiling on a process.
  */
 void
 stopprofclock(p)
 	register struct proc *p;
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (p->p_flag & P_PROFIL) {
 		if (p->p_profthreads != 0) {
 			p->p_flag |= P_STOPPROF;
 			while (p->p_profthreads != 0)
 				msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
 				    "stopprof", 0);
 			p->p_flag &= ~P_STOPPROF;
 		}
 		if ((p->p_flag & P_PROFIL) == 0)
 			return;
 		p->p_flag &= ~P_PROFIL;
 		mtx_lock(&time_lock);
 		if (--profprocs == 0)
 			cpu_stopprofclock();
 		mtx_unlock(&time_lock);
 	}
 }
 
 /*
  * Statistics clock.  Updates rusage information and calls the scheduler
  * to adjust priorities of the active thread.
  *
  * This should be called by all active processors.
  */
 void
 statclock(int usermode)
 {
 
 	statclock_cnt(1, usermode);
 }
 
 void
 statclock_cnt(int cnt, int usermode)
 {
 	struct rusage *ru;
 	struct vmspace *vm;
 	struct thread *td;
 	struct proc *p;
 	long rss;
 	long *cp_time;
 
 	td = curthread;
 	p = td->td_proc;
 
 	cp_time = (long *)PCPU_PTR(cp_time);
 	if (usermode) {
 		/*
 		 * Charge the time as appropriate.
 		 */
 		td->td_uticks += cnt;
 		if (p->p_nice > NZERO)
 			cp_time[CP_NICE] += cnt;
 		else
 			cp_time[CP_USER] += cnt;
 	} else {
 		/*
 		 * Came from kernel mode, so we were:
 		 * - handling an interrupt,
 		 * - doing syscall or trap work on behalf of the current
 		 *   user process, or
 		 * - spinning in the idle loop.
 		 * Whichever it is, charge the time as appropriate.
 		 * Note that we charge interrupts to the current process,
 		 * regardless of whether they are ``for'' that process,
 		 * so that we know how much of its real time was spent
 		 * in ``non-process'' (i.e., interrupt) work.
 		 */
 		if ((td->td_pflags & TDP_ITHREAD) ||
 		    td->td_intr_nesting_level >= 2) {
 			td->td_iticks += cnt;
 			cp_time[CP_INTR] += cnt;
 		} else {
 			td->td_pticks += cnt;
 			td->td_sticks += cnt;
 			if (!TD_IS_IDLETHREAD(td))
 				cp_time[CP_SYS] += cnt;
 			else
 				cp_time[CP_IDLE] += cnt;
 		}
 	}
 
 	/* Update resource usage integrals and maximums. */
 	MPASS(p->p_vmspace != NULL);
 	vm = p->p_vmspace;
 	ru = &td->td_ru;
 	ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
 	ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
 	ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
 	rss = pgtok(vmspace_resident_count(vm));
 	if (ru->ru_maxrss < rss)
 		ru->ru_maxrss = rss;
 	KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
 	    "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
 	SDT_PROBE2(sched, , , tick, td, td->td_proc);
 	thread_lock_flags(td, MTX_QUIET);
 	for ( ; cnt > 0; cnt--)
 		sched_clock(td);
 	thread_unlock(td);
 #ifdef HWPMC_HOOKS
 	if (td->td_intr_frame != NULL)
 		PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
 #endif
 }
 
 void
 profclock(int usermode, uintfptr_t pc)
 {
 
 	profclock_cnt(1, usermode, pc);
 }
 
 void
 profclock_cnt(int cnt, int usermode, uintfptr_t pc)
 {
 	struct thread *td;
 #ifdef GPROF
 	struct gmonparam *g;
 	uintfptr_t i;
 #endif
 
 	td = curthread;
 	if (usermode) {
 		/*
 		 * Came from user mode; CPU was in user state.
 		 * If this process is being profiled, record the tick.
 		 * if there is no related user location yet, don't
 		 * bother trying to count it.
 		 */
 		if (td->td_proc->p_flag & P_PROFIL)
 			addupc_intr(td, pc, cnt);
 	}
 #ifdef GPROF
 	else {
 		/*
 		 * Kernel statistics are just like addupc_intr, only easier.
 		 */
 		g = &_gmonparam;
 		if (g->state == GMON_PROF_ON && pc >= g->lowpc) {
 			i = PC_TO_I(g, pc);
 			if (i < g->textsize) {
 				KCOUNT(g, i) += cnt;
 			}
 		}
 	}
 #endif
 }
 
 /*
  * Return information about system clocks.
  */
 static int
 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
 {
 	struct clockinfo clkinfo;
 	/*
 	 * Construct clockinfo structure.
 	 */
 	bzero(&clkinfo, sizeof(clkinfo));
 	clkinfo.hz = hz;
 	clkinfo.tick = tick;
 	clkinfo.profhz = profhz;
 	clkinfo.stathz = stathz ? stathz : hz;
 	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
 }
 
 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
 	CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
 	0, 0, sysctl_kern_clockrate, "S,clockinfo",
 	"Rate and period of various kernel clocks");
 
 #ifdef SW_WATCHDOG
 
 static void
 watchdog_config(void *unused __unused, u_int cmd, int *error)
 {
 	u_int u;
 
 	u = cmd & WD_INTERVAL;
 	if (u >= WD_TO_1SEC) {
 		watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
 		watchdog_enabled = 1;
 		*error = 0;
 	} else {
 		watchdog_enabled = 0;
 	}
 }
 
 /*
  * Handle a watchdog timeout by dumping interrupt information and
  * then either dropping to DDB or panicking.
  */
 static void
 watchdog_fire(void)
 {
 	int nintr;
 	uint64_t inttotal;
 	u_long *curintr;
 	char *curname;
 
 	curintr = intrcnt;
 	curname = intrnames;
 	inttotal = 0;
 	nintr = sintrcnt / sizeof(u_long);
 
 	printf("interrupt                   total\n");
 	while (--nintr >= 0) {
 		if (*curintr)
 			printf("%-12s %20lu\n", curname, *curintr);
 		curname += strlen(curname) + 1;
 		inttotal += *curintr++;
 	}
 	printf("Total        %20ju\n", (uintmax_t)inttotal);
 
 #if defined(KDB) && !defined(KDB_UNATTENDED)
 	kdb_backtrace();
 	kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
 #else
 	panic("watchdog timeout");
 #endif
 }
 
 #endif /* SW_WATCHDOG */
Index: projects/calloutng/sys/kern/kern_clocksource.c
===================================================================
--- projects/calloutng/sys/kern/kern_clocksource.c	(revision 236314)
+++ projects/calloutng/sys/kern/kern_clocksource.c	(revision 236315)
@@ -1,971 +1,1001 @@
 /*-
  * Copyright (c) 2010-2012 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Common routines to manage event timers hardware.
  */
 
 #include "opt_device_polling.h"
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/time.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 
 #include <machine/atomic.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 cyclic_clock_func_t	cyclic_clock_func = NULL;
 #endif
 
 int			cpu_can_deep_sleep = 0;	/* C3 state is available. */
 int			cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
 
 static void		setuptimer(void);
 static void		loadtimer(struct bintime *now, int first);
 static int		doconfigtimer(void);
 static void		configtimer(int start);
 static int		round_freq(struct eventtimer *et, int freq);
 
 static void		getnextcpuevent(struct bintime *event, int idle);
 static void		getnextevent(struct bintime *event);
 static int		handleevents(struct bintime *now, int fake);
 #ifdef SMP
-static void		cpu_new_callout(int cpu, int ticks);
+static void		cpu_new_callout(int cpu, struct bintime bt);
 #endif
 
 static struct mtx	et_hw_mtx;
 
 #define	ET_HW_LOCK(state)						\
 	{								\
 		if (timer->et_flags & ET_FLAGS_PERCPU)			\
 			mtx_lock_spin(&(state)->et_hw_mtx);		\
 		else							\
 			mtx_lock_spin(&et_hw_mtx);			\
 	}
 
 #define	ET_HW_UNLOCK(state)						\
 	{								\
 		if (timer->et_flags & ET_FLAGS_PERCPU)			\
 			mtx_unlock_spin(&(state)->et_hw_mtx);		\
 		else							\
 			mtx_unlock_spin(&et_hw_mtx);			\
 	}
 
 static struct eventtimer *timer = NULL;
 static struct bintime	timerperiod;	/* Timer period for periodic mode. */
 static struct bintime	hardperiod;	/* hardclock() events period. */
 static struct bintime	statperiod;	/* statclock() events period. */
 static struct bintime	profperiod;	/* profclock() events period. */
 static struct bintime	nexttick;	/* Next global timer tick time. */
 static struct bintime	nexthard;	/* Next global hardlock() event. */
 static u_int		busy = 0;	/* Reconfiguration is in progress. */
 static int		profiling = 0;	/* Profiling events enabled. */
 
 static char		timername[32];	/* Wanted timer. */
 TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
 
 static int		singlemul = 0;	/* Multiplier for periodic mode. */
 TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
 SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
     0, "Multiplier for periodic mode");
 
 static u_int		idletick = 0;	/* Run periodic events when idle. */
 TUNABLE_INT("kern.eventtimer.idletick", &idletick);
 SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
     0, "Run periodic events when idle");
 
 static u_int		activetick = 1;	/* Run all periodic events when active. */
 TUNABLE_INT("kern.eventtimer.activetick", &activetick);
 SYSCTL_UINT(_kern_eventtimer, OID_AUTO, activetick, CTLFLAG_RW, &activetick,
     0, "Run all periodic events when active");
 
 static int		periodic = 0;	/* Periodic or one-shot mode. */
 static int		want_periodic = 0; /* What mode to prefer. */
 TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
 
 struct pcpu_state {
 	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
 	u_int		action;		/* Reconfiguration requests. */
 	u_int		handle;		/* Immediate handle resuests. */
 	struct bintime	now;		/* Last tick time. */
 	struct bintime	nextevent;	/* Next scheduled event on this CPU. */
 	struct bintime	nexttick;	/* Next timer tick time. */
 	struct bintime	nexthard;	/* Next hardlock() event. */
 	struct bintime	nextstat;	/* Next statclock() event. */
 	struct bintime	nextprof;	/* Next profclock() event. */
+	struct bintime	nextcall;	/* Next callout event. */
 #ifdef KDTRACE_HOOKS
 	struct bintime	nextcyc;	/* Next OpenSolaris cyclics event. */
 #endif
 	int		ipi;		/* This CPU needs IPI. */
 	int		idle;		/* This CPU is in idle mode. */
 };
 
 static DPCPU_DEFINE(struct pcpu_state, timerstate);
 
 #define FREQ2BT(freq, bt)						\
 {									\
 	(bt)->sec = 0;							\
 	(bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;	\
 }
 #define BT2FREQ(bt)							\
 	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /		\
 	    ((bt)->frac >> 1))
 
 /*
  * Timer broadcast IPI handler.
  */
 int
 hardclockintr(void)
 {
 	struct bintime now;
 	struct pcpu_state *state;
 	int done;
 
 	if (doconfigtimer() || busy)
 		return (FILTER_HANDLED);
 	state = DPCPU_PTR(timerstate);
 	now = state->now;
 	CTR4(KTR_SPARE2, "ipi  at %d:    now  %d.%08x%08x",
 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
 			     (unsigned int)(now.frac & 0xffffffff));
 	done = handleevents(&now, 0);
 	return (done ? FILTER_HANDLED : FILTER_STRAY);
 }
 
 /*
  * Handle all events for specified time on this CPU
  */
 static int
 handleevents(struct bintime *now, int fake)
 {
 	struct bintime t;
 	struct trapframe *frame;
 	struct pcpu_state *state;
 	uintfptr_t pc;
 	int usermode;
 	int done, runs;
 
 	CTR4(KTR_SPARE2, "handle at %d:  now  %d.%08x%08x",
 	    curcpu, now->sec, (unsigned int)(now->frac >> 32),
 		     (unsigned int)(now->frac & 0xffffffff));
 	done = 0;
 	if (fake) {
 		frame = NULL;
 		usermode = 0;
 		pc = 0;
 	} else {
 		frame = curthread->td_intr_frame;
 		usermode = TRAPF_USERMODE(frame);
 		pc = TRAPF_PC(frame);
 	}
 
 	state = DPCPU_PTR(timerstate);
 
 	runs = 0;
 	while (bintime_cmp(now, &state->nexthard, >=)) {
 		bintime_add(&state->nexthard, &hardperiod);
 		runs++;
 	}
 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 &&
 	    bintime_cmp(&state->nexthard, &nexthard, >))
 		nexthard = state->nexthard;
 	if (runs && fake < 2) {
 		hardclock_cnt(runs, usermode);
 		done = 1;
 	}
 	runs = 0;
 	while (bintime_cmp(now, &state->nextstat, >=)) {
 		bintime_add(&state->nextstat, &statperiod);
 		runs++;
 	}
 	if (runs && fake < 2) {
 		statclock_cnt(runs, usermode);
 		done = 1;
 	}
 	if (profiling) {
 		runs = 0;
 		while (bintime_cmp(now, &state->nextprof, >=)) {
 			bintime_add(&state->nextprof, &profperiod);
 			runs++;
 		}
 		if (runs && !fake) {
 			profclock_cnt(runs, usermode, pc);
 			done = 1;
 		}
 	} else
 		state->nextprof = state->nextstat;
+	if (bintime_cmp(now, &state->nextcall, >=) &&
+		(state->nextcall.sec != -1)) {
+		state->nextcall.sec = -1;
+		callout_tick();
+	}
 
 #ifdef KDTRACE_HOOKS
 	if (fake == 0 && cyclic_clock_func != NULL &&
 	    state->nextcyc.sec != -1 &&
 	    bintime_cmp(now, &state->nextcyc, >=)) {
 		state->nextcyc.sec = -1;
 		(*cyclic_clock_func)(frame);
 	}
 #endif
 
 	getnextcpuevent(&t, 0);
 	if (fake == 2) {
 		state->nextevent = t;
 		return (done);
 	}
 	ET_HW_LOCK(state);
 	if (!busy) {
 		state->idle = 0;
 		state->nextevent = t;
 		loadtimer(now, 0);
 	}
 	ET_HW_UNLOCK(state);
 	return (done);
 }
 
 /*
  * Schedule binuptime of the next event on current CPU.
  */
 static void
 getnextcpuevent(struct bintime *event, int idle)
 {
 	struct bintime tmp;
 	struct pcpu_state *state;
-	int skip;
 
 	state = DPCPU_PTR(timerstate);
 	/* Handle hardclock() events. */
 	*event = state->nexthard;
-	if (idle || (!activetick && !profiling &&
-	    (timer->et_flags & ET_FLAGS_PERCPU) == 0)) {
-		skip = idle ? 4 : (stathz / 2);
-		if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip)
-			skip = tc_min_ticktock_freq;
-		skip = callout_tickstofirst(hz / skip) - 1;
-		CTR2(KTR_SPARE2, "skip   at %d: %d", curcpu, skip);
-		tmp = hardperiod;
-		bintime_mul(&tmp, skip);
-		bintime_add(event, &tmp);
+	/* Handle callout events. */
+	tmp = callout_tickstofirst();
+	if (state->nextcall.sec == -1)
+		state->nextcall = tmp;
+	if (bintime_cmp(&tmp, &state->nextcall, <) && 	
+	    (tmp.sec != -1)) {
+		state->nextcall = tmp;
+	}	
+	if (bintime_cmp(event, &state->nextcall, >) && 
+	    (state->nextcall.sec != -1)) {
+		*event = state->nextcall;
 	}
 	if (!idle) { /* If CPU is active - handle other types of events. */
 		if (bintime_cmp(event, &state->nextstat, >))
 			*event = state->nextstat;
 		if (profiling && bintime_cmp(event, &state->nextprof, >))
 			*event = state->nextprof;
 	}
 #ifdef KDTRACE_HOOKS
 	if (state->nextcyc.sec != -1 && bintime_cmp(event, &state->nextcyc, >))
 		*event = state->nextcyc;
 #endif
 }
 
 /*
  * Schedule binuptime of the next event on all CPUs.
  */
 static void
 getnextevent(struct bintime *event)
 {
 	struct pcpu_state *state;
 #ifdef SMP
 	int	cpu;
 #endif
 	int	c, nonidle;
 
 	state = DPCPU_PTR(timerstate);
 	*event = state->nextevent;
 	c = curcpu;
 	nonidle = !state->idle;
 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
 #ifdef SMP
 		CPU_FOREACH(cpu) {
 			if (curcpu == cpu)
 				continue;
 			state = DPCPU_ID_PTR(cpu, timerstate);
 			nonidle += !state->idle;
 			if (bintime_cmp(event, &state->nextevent, >)) {
 				*event = state->nextevent;
 				c = cpu;
 			}
 		}
 #endif
 		if (nonidle != 0 && bintime_cmp(event, &nexthard, >))
 			*event = nexthard;
 	}
 	CTR5(KTR_SPARE2, "next at %d:    next %d.%08x%08x by %d",
 	    curcpu, event->sec, (unsigned int)(event->frac >> 32),
 			     (unsigned int)(event->frac & 0xffffffff), c);
 }
 
 /* Hardware timer callback function. */
 static void
 timercb(struct eventtimer *et, void *arg)
 {
 	struct bintime now;
 	struct bintime *next;
 	struct pcpu_state *state;
 #ifdef SMP
 	int cpu, bcast;
 #endif
 
 	/* Do not touch anything if somebody reconfiguring timers. */
 	if (busy)
 		return;
 	/* Update present and next tick times. */
 	state = DPCPU_PTR(timerstate);
 	if (et->et_flags & ET_FLAGS_PERCPU) {
 		next = &state->nexttick;
 	} else
 		next = &nexttick;
 	if (periodic) {
 		now = *next;	/* Ex-next tick time becomes present time. */
 		bintime_add(next, &timerperiod); /* Next tick in 1 period. */
 	} else {
 		binuptime(&now);	/* Get present time from hardware. */
 		next->sec = -1;		/* Next tick is not scheduled yet. */
 	}
 	state->now = now;
 	CTR4(KTR_SPARE2, "intr at %d:    now  %d.%08x%08x",
 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
 			     (unsigned int)(now.frac & 0xffffffff));
 
 #ifdef SMP
 	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
 	bcast = 0;
 	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
 		CPU_FOREACH(cpu) {
 			state = DPCPU_ID_PTR(cpu, timerstate);
 			ET_HW_LOCK(state);
 			state->now = now;
 			if (bintime_cmp(&now, &state->nextevent, >=)) {
 				state->nextevent.sec++;
 				if (curcpu != cpu) {
 					state->ipi = 1;
 					bcast = 1;
 				}
 			}
 			ET_HW_UNLOCK(state);
 		}
 	}
 #endif
 
 	/* Handle events for this time on this CPU. */
 	handleevents(&now, 0);
 
 #ifdef SMP
 	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
 	if (bcast) {
 		CPU_FOREACH(cpu) {
 			if (curcpu == cpu)
 				continue;
 			state = DPCPU_ID_PTR(cpu, timerstate);
 			if (state->ipi) {
 				state->ipi = 0;
 				ipi_cpu(cpu, IPI_HARDCLOCK);
 			}
 		}
 	}
 #endif
 }
 
 /*
  * Load new value into hardware timer.
  */
 static void
 loadtimer(struct bintime *now, int start)
 {
 	struct pcpu_state *state;
 	struct bintime new;
 	struct bintime *next;
 	uint64_t tmp;
 	int eq;
 
 	if (timer->et_flags & ET_FLAGS_PERCPU) {
 		state = DPCPU_PTR(timerstate);
 		next = &state->nexttick;
 	} else
 		next = &nexttick;
 	if (periodic) {
 		if (start) {
 			/*
 			 * Try to start all periodic timers aligned
 			 * to period to make events synchronous.
 			 */
 			tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
 			tmp = (tmp % (timerperiod.frac >> 28)) << 28;
 			new.sec = 0;
 			new.frac = timerperiod.frac - tmp;
 			if (new.frac < tmp)	/* Left less then passed. */
 				bintime_add(&new, &timerperiod);
 			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
 			    curcpu, now->sec, (unsigned int)(now->frac >> 32),
 			    new.sec, (unsigned int)(new.frac >> 32));
 			*next = new;
 			bintime_add(next, now);
 			et_start(timer, &new, &timerperiod);
 		}
 	} else {
 		getnextevent(&new);
 		eq = bintime_cmp(&new, next, ==);
 		CTR5(KTR_SPARE2, "load at %d:    next %d.%08x%08x eq %d",
 		    curcpu, new.sec, (unsigned int)(new.frac >> 32),
 			     (unsigned int)(new.frac & 0xffffffff),
 			     eq);
 		if (!eq) {
 			*next = new;
 			bintime_sub(&new, now);
 			et_start(timer, &new, NULL);
 		}
 	}
 }
 
 /*
  * Prepare event timer parameters after configuration changes.
  */
 static void
 setuptimer(void)
 {
 	int freq;
 
 	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
 		periodic = 0;
 	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
 		periodic = 1;
 	singlemul = MIN(MAX(singlemul, 1), 20);
 	freq = hz * singlemul;
 	while (freq < (profiling ? profhz : stathz))
 		freq += hz;
 	freq = round_freq(timer, freq);
 	FREQ2BT(freq, &timerperiod);
 }
 
 /*
  * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
  */
 static int
 doconfigtimer(void)
 {
 	struct bintime now;
 	struct pcpu_state *state;
 
 	state = DPCPU_PTR(timerstate);
 	switch (atomic_load_acq_int(&state->action)) {
 	case 1:
 		binuptime(&now);
 		ET_HW_LOCK(state);
 		loadtimer(&now, 1);
 		ET_HW_UNLOCK(state);
 		state->handle = 0;
 		atomic_store_rel_int(&state->action, 0);
 		return (1);
 	case 2:
 		ET_HW_LOCK(state);
 		et_stop(timer);
 		ET_HW_UNLOCK(state);
 		state->handle = 0;
 		atomic_store_rel_int(&state->action, 0);
 		return (1);
 	}
 	if (atomic_readandclear_int(&state->handle) && !busy) {
 		binuptime(&now);
 		handleevents(&now, 0);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Reconfigure specified timer.
  * For per-CPU timers use IPI to make other CPUs to reconfigure.
  */
 static void
 configtimer(int start)
 {
 	struct bintime now, next;
 	struct pcpu_state *state;
 	int cpu;
 
 	if (start) {
 		setuptimer();
 		binuptime(&now);
 	}
 	critical_enter();
 	ET_HW_LOCK(DPCPU_PTR(timerstate));
 	if (start) {
 		/* Initialize time machine parameters. */
 		next = now;
 		bintime_add(&next, &timerperiod);
 		if (periodic)
 			nexttick = next;
 		else
 			nexttick.sec = -1;
 		CPU_FOREACH(cpu) {
 			state = DPCPU_ID_PTR(cpu, timerstate);
 			state->now = now;
 			state->nextevent = next;
 			if (periodic)
 				state->nexttick = next;
 			else
 				state->nexttick.sec = -1;
 			state->nexthard = next;
 			state->nextstat = next;
 			state->nextprof = next;
 			hardclock_sync(cpu);
 		}
 		busy = 0;
 		/* Start global timer or per-CPU timer of this CPU. */
 		loadtimer(&now, 1);
 	} else {
 		busy = 1;
 		/* Stop global timer or per-CPU timer of this CPU. */
 		et_stop(timer);
 	}
 	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
 #ifdef SMP
 	/* If timer is global or there is no other CPUs yet - we are done. */
 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
 		critical_exit();
 		return;
 	}
 	/* Set reconfigure flags for other CPUs. */
 	CPU_FOREACH(cpu) {
 		state = DPCPU_ID_PTR(cpu, timerstate);
 		atomic_store_rel_int(&state->action,
 		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
 	}
 	/* Broadcast reconfigure IPI. */
 	ipi_all_but_self(IPI_HARDCLOCK);
 	/* Wait for reconfiguration completed. */
 restart:
 	cpu_spinwait();
 	CPU_FOREACH(cpu) {
 		if (cpu == curcpu)
 			continue;
 		state = DPCPU_ID_PTR(cpu, timerstate);
 		if (atomic_load_acq_int(&state->action))
 			goto restart;
 	}
 #endif
 	critical_exit();
 }
 
 /*
  * Calculate nearest frequency supported by hardware timer.
  */
 static int
 round_freq(struct eventtimer *et, int freq)
 {
 	uint64_t div;
 
 	if (et->et_frequency != 0) {
 		div = lmax((et->et_frequency + freq / 2) / freq, 1);
 		if (et->et_flags & ET_FLAGS_POW2DIV)
 			div = 1 << (flsl(div + div / 2) - 1);
 		freq = (et->et_frequency + div / 2) / div;
 	}
 	if (et->et_min_period.sec > 0)
 		freq = 0;
 	else if (et->et_min_period.frac != 0)
 		freq = min(freq, BT2FREQ(&et->et_min_period));
 	if (et->et_max_period.sec == 0 && et->et_max_period.frac != 0)
 		freq = max(freq, BT2FREQ(&et->et_max_period));
 	return (freq);
 }
 
 /*
  * Configure and start event timers (BSP part).
  */
 void
 cpu_initclocks_bsp(void)
 {
 	struct pcpu_state *state;
 	int base, div, cpu;
 
 	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
 	CPU_FOREACH(cpu) {
 		state = DPCPU_ID_PTR(cpu, timerstate);
 		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
 #ifdef KDTRACE_HOOKS
 		state->nextcyc.sec = -1;
 #endif
+		state->nextcall.sec = -1;
 	}
 #ifdef SMP
 	callout_new_inserted = cpu_new_callout;
 #endif
 	periodic = want_periodic;
 	/* Grab requested timer or the best of present. */
 	if (timername[0])
 		timer = et_find(timername, 0, 0);
 	if (timer == NULL && periodic) {
 		timer = et_find(NULL,
 		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
 	}
 	if (timer == NULL) {
 		timer = et_find(NULL,
 		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
 	}
 	if (timer == NULL && !periodic) {
 		timer = et_find(NULL,
 		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
 	}
 	if (timer == NULL)
 		panic("No usable event timer found!");
 	et_init(timer, timercb, NULL, NULL);
 
 	/* Adapt to timer capabilities. */
 	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
 		periodic = 0;
 	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
 		periodic = 1;
 	if (timer->et_flags & ET_FLAGS_C3STOP)
 		cpu_disable_deep_sleep++;
 
 	/*
 	 * We honor the requested 'hz' value.
 	 * We want to run stathz in the neighborhood of 128hz.
 	 * We would like profhz to run as often as possible.
 	 */
 	if (singlemul <= 0 || singlemul > 20) {
 		if (hz >= 1500 || (hz % 128) == 0)
 			singlemul = 1;
 		else if (hz >= 750)
 			singlemul = 2;
 		else
 			singlemul = 4;
 	}
 	if (periodic) {
 		base = round_freq(timer, hz * singlemul);
 		singlemul = max((base + hz / 2) / hz, 1);
 		hz = (base + singlemul / 2) / singlemul;
 		if (base <= 128)
 			stathz = base;
 		else {
 			div = base / 128;
 			if (div >= singlemul && (div % singlemul) == 0)
 				div++;
 			stathz = base / div;
 		}
 		profhz = stathz;
 		while ((profhz + stathz) <= 128 * 64)
 			profhz += stathz;
 		profhz = round_freq(timer, profhz);
 	} else {
 		hz = round_freq(timer, hz);
 		stathz = round_freq(timer, 127);
 		profhz = round_freq(timer, stathz * 64);
 	}
 	tick = 1000000 / hz;
 	FREQ2BT(hz, &hardperiod);
 	FREQ2BT(stathz, &statperiod);
 	FREQ2BT(profhz, &profperiod);
 	ET_LOCK();
 	configtimer(1);
 	ET_UNLOCK();
 }
 
 /*
  * Start per-CPU event timers on APs.
  */
 void
 cpu_initclocks_ap(void)
 {
 	struct bintime now;
 	struct pcpu_state *state;
 
 	state = DPCPU_PTR(timerstate);
 	binuptime(&now);
 	ET_HW_LOCK(state);
 	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 && periodic) {
 		state->now = nexttick;
 		bintime_sub(&state->now, &timerperiod);
 	} else
 		state->now = now;
 	hardclock_sync(curcpu);
 	handleevents(&state->now, 2);
 	if (timer->et_flags & ET_FLAGS_PERCPU)
 		loadtimer(&now, 1);
 	ET_HW_UNLOCK(state);
 }
 
 /*
  * Switch to profiling clock rates.
  */
 void
 cpu_startprofclock(void)
 {
 
 	ET_LOCK();
 	if (periodic) {
 		configtimer(0);
 		profiling = 1;
 		configtimer(1);
 	} else
 		profiling = 1;
 	ET_UNLOCK();
 }
 
 /*
  * Switch to regular clock rates.
  */
 void
 cpu_stopprofclock(void)
 {
 
 	ET_LOCK();
 	if (periodic) {
 		configtimer(0);
 		profiling = 0;
 		configtimer(1);
 	} else
 		profiling = 0;
 	ET_UNLOCK();
 }
 
 /*
  * Switch to idle mode (all ticks handled).
  */
 void
 cpu_idleclock(void)
 {
 	struct bintime now, t;
 	struct pcpu_state *state;
 
 	if (idletick || busy ||
 	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
 #ifdef DEVICE_POLLING
 	    || curcpu == CPU_FIRST()
 #endif
 	    )
 		return;
 	state = DPCPU_PTR(timerstate);
 	if (periodic)
 		now = state->now;
 	else
 		binuptime(&now);
 	CTR4(KTR_SPARE2, "idle at %d:    now  %d.%08x%08x",
 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
 			     (unsigned int)(now.frac & 0xffffffff));
 	getnextcpuevent(&t, 1);
 	ET_HW_LOCK(state);
 	state->idle = 1;
 	state->nextevent = t;
 	if (!periodic)
 		loadtimer(&now, 0);
 	ET_HW_UNLOCK(state);
 }
 
 /*
  * Switch to active mode (skip empty ticks).
  */
 void
 cpu_activeclock(void)
 {
 	struct bintime now;
 	struct pcpu_state *state;
 	struct thread *td;
 
 	state = DPCPU_PTR(timerstate);
 	if (state->idle == 0 || busy)
 		return;
 	if (periodic)
 		now = state->now;
 	else
 		binuptime(&now);
 	CTR4(KTR_SPARE2, "active at %d:  now  %d.%08x%08x",
 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
 			     (unsigned int)(now.frac & 0xffffffff));
 	spinlock_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	handleevents(&now, 1);
 	td->td_intr_nesting_level--;
 	spinlock_exit();
 }
 
 #ifdef KDTRACE_HOOKS
 void
 clocksource_cyc_set(const struct bintime *t)
 {
 	struct bintime now;
 	struct pcpu_state *state;
 
 	state = DPCPU_PTR(timerstate);
 	if (periodic)
 		now = state->now;
 	else
 		binuptime(&now);
 
 	CTR4(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x%08x",
 	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
 			     (unsigned int)(now.frac & 0xffffffff));
 	CTR4(KTR_SPARE2, "set_cyc at %d:  t  %d.%08x%08x",
 	    curcpu, t->sec, (unsigned int)(t->frac >> 32),
 			     (unsigned int)(t->frac & 0xffffffff));
 
 	ET_HW_LOCK(state);
 	if (bintime_cmp(t, &state->nextcyc, ==)) {
 		ET_HW_UNLOCK(state);
 		return;
 	}
 	state->nextcyc = *t;
 	if (bintime_cmp(&state->nextcyc, &state->nextevent, >=)) {
 		ET_HW_UNLOCK(state);
 		return;
 	}
 	state->nextevent = state->nextcyc;
 	if (!periodic)
 		loadtimer(&now, 0);
 	ET_HW_UNLOCK(state);
 }
 #endif
 
 #ifdef SMP
 static void
-cpu_new_callout(int cpu, int ticks)
+cpu_new_callout(int cpu, struct bintime bt)
 {
-	struct bintime tmp;
+	struct bintime now;
 	struct pcpu_state *state;
 
 	CTR3(KTR_SPARE2, "new co at %d:    on %d in %d",
 	    curcpu, cpu, ticks);
 	state = DPCPU_ID_PTR(cpu, timerstate);
 	ET_HW_LOCK(state);
 	if (state->idle == 0 || busy) {
 		ET_HW_UNLOCK(state);
 		return;
 	}
 	/*
 	 * If timer is periodic - just update next event time for target CPU.
 	 * If timer is global - there is chance it is already programmed.
 	 */
 	if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) {
-		tmp = hardperiod;
-		bintime_mul(&tmp, ticks - 1);
-		bintime_add(&tmp, &state->nexthard);
-		if (bintime_cmp(&tmp, &state->nextevent, <))
-			state->nextevent = tmp;
+		/* 
+		 * Update next callout time. We can do this only if 
+		 * this one on which we're running is the target CPU.
+		 */
+		if (!periodic) {
+			if (bintime_cmp(&bt, &state->nextcall, ==)) {
+				ET_HW_UNLOCK(state);
+				return;
+			}
+			if (state->nextcall.sec == -1 ||
+			    bintime_cmp(&bt, &state->nextcall, <))
+				state->nextcall = bt;
+			if (bintime_cmp(&state->nextcall, &state->nextevent, >=)) {
+				ET_HW_UNLOCK(state);
+				return;
+			}	
+			state->nextevent = state->nextcall;
+			if (cpu == curcpu) {
+				loadtimer(&now, 0);
+				ET_HW_UNLOCK(state);
+			}
+			else
+				goto out;
+		}
+		if (bintime_cmp(&state->nexthard, &state->nextevent, <))
+			state->nextevent = state->nexthard;
 		if (periodic ||
 		    bintime_cmp(&state->nextevent, &nexttick, >=)) {
 			ET_HW_UNLOCK(state);
 			return;
 		}
 	}
+out:
 	/*
 	 * Otherwise we have to wake that CPU up, as we can't get present
 	 * bintime to reprogram global timer from here. If timer is per-CPU,
 	 * we by definition can't do it from here.
 	 */
 	ET_HW_UNLOCK(state);
 	if (timer->et_flags & ET_FLAGS_PERCPU) {
 		state->handle = 1;
 		ipi_cpu(cpu, IPI_HARDCLOCK);
 	} else {
 		if (!cpu_idle_wakeup(cpu))
 			ipi_cpu(cpu, IPI_AST);
 	}
 }
 #endif
 
 /*
  * Report or change the active event timers hardware.
  */
 static int
 sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
 {
 	char buf[32];
 	struct eventtimer *et;
 	int error;
 
 	ET_LOCK();
 	et = timer;
 	snprintf(buf, sizeof(buf), "%s", et->et_name);
 	ET_UNLOCK();
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	ET_LOCK();
 	et = timer;
 	if (error != 0 || req->newptr == NULL ||
 	    strcasecmp(buf, et->et_name) == 0) {
 		ET_UNLOCK();
 		return (error);
 	}
 	et = et_find(buf, 0, 0);
 	if (et == NULL) {
 		ET_UNLOCK();
 		return (ENOENT);
 	}
 	configtimer(0);
 	et_free(timer);
 	if (et->et_flags & ET_FLAGS_C3STOP)
 		cpu_disable_deep_sleep++;
 	if (timer->et_flags & ET_FLAGS_C3STOP)
 		cpu_disable_deep_sleep--;
 	periodic = want_periodic;
 	timer = et;
 	et_init(timer, timercb, NULL, NULL);
 	configtimer(1);
 	ET_UNLOCK();
 	return (error);
 }
 SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
 
 /*
  * Report or change the active event timer periodicity.
  */
 static int
 sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = periodic;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	ET_LOCK();
 	configtimer(0);
 	periodic = want_periodic = val;
 	configtimer(1);
 	ET_UNLOCK();
 	return (error);
 }
 SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
Index: projects/calloutng/sys/kern/kern_timeout.c
===================================================================
--- projects/calloutng/sys/kern/kern_timeout.c	(revision 236314)
+++ projects/calloutng/sys/kern/kern_timeout.c	(revision 236315)
@@ -1,1134 +1,1180 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/condvar.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start);
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
     "struct callout *");
 SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); 
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
     "struct callout *");
 
-static int avg_depth;
-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
-    "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
 static int avg_lockcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
     "Average number of lock callouts made per softclock call. Units = 1/1000");
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
 int callwheelsize, callwheelbits, callwheelmask;
 
 /*
  * The callout cpu migration entity represents informations necessary for
  * describing the migrating callout to the new callout cpu.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
 struct cc_mig_ent {
 #ifdef SMP
-	void	(*ce_migration_func)(void *);
-	void	*ce_migration_arg;
-	int	ce_migration_cpu;
-	int	ce_migration_ticks;
+	void			(*ce_migration_func)(void *);
+	void			*ce_migration_arg;
+	int			ce_migration_cpu;
+	struct bintime		ce_migration_time;
 #endif
 };
 	
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
  * In particular:
  *	cc_ticks is incremented once per tick in callout_cpu().
  *	It tracks the global 'ticks' but in a way that the individual
  *	threads should not worry about races in the order in which
  *	hardclock() and hardclock_cpu() run on the various CPUs.
  *	cc_softclock is advanced in callout_cpu() to point to the
  *	first entry in cc_callwheel that may need handling. In turn,
  *	a softclock() is scheduled so it can serve the various entries i
  *	such that cc_softclock <= i <= cc_ticks .
  *	XXX maybe cc_softclock and cc_ticks should be volatile ?
  *
  *	cc_ticks is also used in callout_reset_cpu() to determine
  *	when the callout should be served.
  */
 struct callout_cpu {
 	struct cc_mig_ent	cc_migrating_entity;
 	struct mtx		cc_lock;
 	struct callout		*cc_callout;
 	struct callout_tailq	*cc_callwheel;
 	struct callout_list	cc_callfree;
 	struct callout		*cc_next;
 	struct callout		*cc_curr;
 	void			*cc_cookie;
-	int 			cc_ticks;
-	int 			cc_softticks;
+	struct bintime 		cc_ticks;
+	struct bintime 		cc_softticks;
 	int			cc_cancel;
 	int			cc_waiting;
-	int 			cc_firsttick;
+	struct bintime 		cc_firsttick;
+	struct callout_tailq	*cc_localexp;		  
 };
 
 #ifdef SMP
 #define	cc_migration_func	cc_migrating_entity.ce_migration_func
 #define	cc_migration_arg	cc_migrating_entity.ce_migration_arg
 #define	cc_migration_cpu	cc_migrating_entity.ce_migration_cpu
-#define	cc_migration_ticks	cc_migrating_entity.ce_migration_ticks
+#define	cc_migration_time	cc_migrating_entity.ce_migration_time
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
 #define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
 #define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
 #else
 struct callout_cpu cc_cpu;
 #define	CC_CPU(cpu)	&cc_cpu
 #define	CC_SELF()	&cc_cpu
 #endif
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
+#define FREQ2BT(freq, bt)                                               \
+{                                                                       \
+        (bt)->sec = 0;                                                  \
+        (bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
+}
+
 static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL;
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
  *   cc_curr         - If a callout is in progress, it is curr_callout.
  *                     If curr_callout is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
  *   cc_cancel       - Changing to 1 with both callout_lock and c_lock held
  *                     guarantees that the current callout will not run.
  *                     The softclock() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
  *                     c_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
  *                     curr_callout is non-NULL.
  */
 
 /*
  * Resets the migration entity tied to a specific callout cpu.
  */
 static void
 cc_cme_cleanup(struct callout_cpu *cc)
 {
 
 #ifdef SMP
 	cc->cc_migration_cpu = CPUBLOCK;
-	cc->cc_migration_ticks = 0;
+	cc->cc_migration_time.sec = 0;
+	cc->cc_migration_time.frac = 0;
 	cc->cc_migration_func = NULL;
 	cc->cc_migration_arg = NULL;
 #endif
 }
 
 /*
  * Checks if migration is requested by a specific callout cpu.
  */
 static int
 cc_cme_migrating(struct callout_cpu *cc)
 {
 
 #ifdef SMP
 	return (cc->cc_migration_cpu != CPUBLOCK);
 #else
 	return (0);
 #endif
 }
 
 /*
  * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization 
  *
  *	This code is called very early in the kernel initialization sequence,
  *	and may be called more then once.
  */
 caddr_t
 kern_timeout_callwheel_alloc(caddr_t v)
 {
 	struct callout_cpu *cc;
 
 	timeout_cpu = PCPU_GET(cpuid);
 	cc = CC_CPU(timeout_cpu);
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	cc->cc_callout = (struct callout *)v;
 	v = (caddr_t)(cc->cc_callout + ncallout);
 	cc->cc_callwheel = (struct callout_tailq *)v;
 	v = (caddr_t)(cc->cc_callwheel + callwheelsize);
+	cc->cc_localexp = (struct callout_tailq *)v;
+	v = (caddr_t)(cc->cc_localexp + 1);
 	return(v);
 }
 
 static void
 callout_cpu_init(struct callout_cpu *cc)
 {
 	struct callout *c;
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 	SLIST_INIT(&cc->cc_callfree);
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&cc->cc_callwheel[i]);
 	}
+	TAILQ_INIT(cc->cc_localexp);
 	cc_cme_cleanup(cc);
 	if (cc->cc_callout == NULL)
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
 		c->c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
 
 #ifdef SMP
 /*
  * Switches the cpu tied to a specific callout.
  * The function expects a locked incoming callout cpu and returns with
  * locked outcoming callout cpu.
  */
 static struct callout_cpu *
 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 {
 	struct callout_cpu *new_cc;
 
 	MPASS(c != NULL && cc != NULL);
 	CC_LOCK_ASSERT(cc);
 
 	/*
 	 * Avoid interrupts and preemption firing after the callout cpu
 	 * is blocked in order to avoid deadlocks as the new thread
 	 * may be willing to acquire the callout cpu lock.
 	 */
 	c->c_cpu = CPUBLOCK;
 	spinlock_enter();
 	CC_UNLOCK(cc);
 	new_cc = CC_CPU(new_cpu);
 	CC_LOCK(new_cc);
 	spinlock_exit();
 	c->c_cpu = new_cpu;
 	return (new_cc);
 }
 #endif
 
 /*
  * kern_timeout_callwheel_init() - initialize previously reserved callwheel
  *				   space.
  *
  *	This code is called just once, after the space reserved for the
  *	callout wheel has been finalized.
  */
 void
 kern_timeout_callwheel_init(void)
 {
 	callout_cpu_init(CC_CPU(timeout_cpu));
 }
 
 /*
  * Start standard softclock thread.
  */
 static void
 start_softclock(void *dummy)
 {
 	struct callout_cpu *cc;
 #ifdef SMP
 	int cpu;
 #endif
 
 	cc = CC_CPU(timeout_cpu);
 	if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
 	    INTR_MPSAFE, &cc->cc_cookie))
 		panic("died while creating standard software ithreads");
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		if (cpu == timeout_cpu)
 			continue;
 		cc = CC_CPU(cpu);
 		if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
 		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(). */
 		cc->cc_callwheel = malloc(
 		    sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
 		    M_WAITOK);
+		cc->cc_localexp = malloc(
+		    sizeof(struct callout_tailq), M_CALLOUT, M_WAITOK);
 		callout_cpu_init(cc);
 	}
 #endif
 }
 
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
+static int
+get_bucket(struct bintime *bt) 
+{
+	time_t sec;
+	uint64_t frac;
+	sec = bt->sec;
+	frac = bt->frac;
+	return (int) (((sec<<10)+(frac>>54)) & callwheelmask);
+} 
+
 void
 callout_tick(void)
 {
+	struct callout *tmp;
 	struct callout_cpu *cc;
+	struct callout_tailq *sc;
+	struct bintime now;
 	int need_softclock;
 	int bucket;
 
 	/*
 	 * Process callouts at a very low cpu priority, so we don't keep the
 	 * relatively high clock interrupt priority any longer than necessary.
 	 */
 	need_softclock = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	cc->cc_firsttick = cc->cc_ticks = ticks;
-	for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
-		bucket = cc->cc_softticks & callwheelmask;
-		if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
-			need_softclock = 1;
-			break;
+	binuptime(&now);
+	for (bucket = 0; bucket < callwheelsize; ++bucket) {
+		sc = &cc->cc_callwheel[bucket];
+		TAILQ_FOREACH(tmp, sc, c_links.tqe) {
+			if (bintime_cmp(&tmp->c_time,&now, <=)) {
+				TAILQ_INSERT_TAIL(cc->cc_localexp,tmp,c_staiter);
+				TAILQ_REMOVE(sc, tmp, c_links.tqe);
+				need_softclock = 1;
+			}	
 		}
 	}
+	cc->cc_softticks = now;
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 	/*
 	 * swi_sched acquires the thread lock, so we don't want to call it
 	 * with cc_lock held; incorrect locking order.
 	 */
-	if (need_softclock)
+	if (need_softclock) {
 		swi_sched(cc->cc_cookie, 0);
+	}
 }
 
-int
-callout_tickstofirst(int limit)
+struct bintime
+callout_tickstofirst(void)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
 	struct callout_tailq *sc;
-	int curticks;
-	int skip = 1;
+	struct bintime tmp;
+	struct bintime now;
+	int bucket;
 
+	tmp.sec = 0;
+	tmp.frac = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	curticks = cc->cc_ticks;
-	while( skip < ncallout && skip < limit ) {
-		sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
-		/* search scanning ticks */
-		TAILQ_FOREACH( c, sc, c_links.tqe ){
-			if (c->c_time - curticks <= ncallout)
-				goto out;
+	binuptime(&now);
+	for (bucket = 0; bucket < callwheelsize; ++bucket) {
+		sc = &cc->cc_callwheel[bucket];
+		TAILQ_FOREACH( c, sc, c_links.tqe ) {
+			if (tmp.sec == 0 && tmp.frac == 0) 
+				tmp = c->c_time;
+			if (bintime_cmp(&c->c_time, &now, <)) 
+				tmp = now;
+			if (bintime_cmp(&c->c_time, &tmp, <=)) 
+				tmp = c->c_time;
+			
 		}
-		skip++;
 	}
-out:
-	cc->cc_firsttick = curticks + skip;
+	if (tmp.sec == 0 && tmp.frac == 0) {
+		cc->cc_firsttick.sec = -1;
+		cc->cc_firsttick.frac = -1;
+	}
+	else
+		cc->cc_firsttick = tmp;
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	return (skip);
+	return (cc->cc_firsttick);
 }
 
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	for (;;) {
 		cpu = c->c_cpu;
 #ifdef SMP
 		if (cpu == CPUBLOCK) {
 			while (c->c_cpu == CPUBLOCK)
 				cpu_spinwait();
 			continue;
 		}
 #endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
 			break;
 		CC_UNLOCK(cc);
 	}
 	return (cc);
 }
 
 static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
-    void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc, 
+    struct bintime to_bintime, void (*func)(void *), void *arg, int cpu)
 {
-
+	int bucket;	
+	struct bintime now;
+	struct bintime tmp;
+	
+	tmp.sec = 1;
+	tmp.frac = 0;
 	CC_LOCK_ASSERT(cc);
-
-	if (to_ticks <= 0)
-		to_ticks = 1;
+	binuptime(&now);
+	if (bintime_cmp(&to_bintime, &now, <)) {
+		bintime_add(&now, &tmp);
+		to_bintime = now;
+	}
 	c->c_arg = arg;
 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 	c->c_func = func;
-	c->c_time = ticks + to_ticks;
-	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
+	c->c_time = to_bintime; 
+	bucket = get_bucket(&c->c_time);	
+	TAILQ_INSERT_TAIL(&cc->cc_callwheel[bucket & callwheelmask], 
 	    c, c_links.tqe);
-	if ((c->c_time - cc->cc_firsttick) < 0 &&
-	    callout_new_inserted != NULL) {
-		cc->cc_firsttick = c->c_time;
-		(*callout_new_inserted)(cpu,
-		    to_ticks + (ticks - cc->cc_ticks));
-	}
+	/*
+	 * Inform the eventtimers(4) subsystem there's a new callout 
+	 * that has been inserted.
+	 */
+	if (callout_new_inserted != NULL)
+	(*callout_new_inserted)(cpu,
+	    to_bintime);
 }
 
 static void
 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 {
 
 	if (cc->cc_next == c)
-		cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+		cc->cc_next = TAILQ_NEXT(c, c_staiter);
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
 		c->c_func = NULL;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
 
 static struct callout *
 softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
     int *lockcalls, int *gcalls)
 {
 	void (*c_func)(void *);
 	void *c_arg;
 	struct lock_class *class;
 	struct lock_object *c_lock;
 	int c_flags, sharedlock;
 #ifdef SMP
 	struct callout_cpu *new_cc;
 	void (*new_func)(void *);
 	void *new_arg;
-	int new_cpu, new_ticks;
+	int new_cpu;
+	struct bintime new_time;
 #endif
 #ifdef DIAGNOSTIC
 	struct bintime bt1, bt2;
 	struct timespec ts2;
 	static uint64_t maxdt = 36893488147419102LL;	/* 2 msec */
 	static timeout_t *lastfunc;
 #endif
 
-	cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+	cc->cc_next = TAILQ_NEXT(c, c_staiter);
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 	sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
 	c_flags = c->c_flags;
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC)
 		c->c_flags = CALLOUT_LOCAL_ALLOC;
 	else
 		c->c_flags &= ~CALLOUT_PENDING;
 	cc->cc_curr = c;
 	cc->cc_cancel = 0;
 	CC_UNLOCK(cc);
 	if (c_lock != NULL) {
 		class->lc_lock(c_lock, sharedlock);
 		/*
 		 * The callout may have been cancelled
 		 * while we switched locks.
 		 */
 		if (cc->cc_cancel) {
 			class->lc_unlock(c_lock);
 			goto skip;
 		}
 		/* The callout cannot be stopped now. */
 		cc->cc_cancel = 1;
 
 		if (c_lock == &Giant.lock_object) {
 			(*gcalls)++;
 			CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
 			(*lockcalls)++;
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
 		(*mpcalls)++;
 		CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
 #ifdef DIAGNOSTIC
 	binuptime(&bt1);
 #endif
 	THREAD_NO_SLEEPING();
 	SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
 	c_func(c_arg);
 	SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
 	THREAD_SLEEPING_OK();
 #ifdef DIAGNOSTIC
 	binuptime(&bt2);
 	bintime_sub(&bt2, &bt1);
 	if (bt2.frac > maxdt) {
 		if (lastfunc != c_func || bt2.frac > maxdt * 2) {
 			bintime2timespec(&bt2, &ts2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
 		maxdt = bt2.frac;
 		lastfunc = c_func;
 	}
 #endif
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
 	if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
 		class->lc_unlock(c_lock);
 skip:
 	CC_LOCK(cc);
 	/*
 	 * If the current callout is locally allocated (from
 	 * timeout(9)) then put it on the freelist.
 	 *
 	 * Note: we need to check the cached copy of c_flags because
 	 * if it was not local, then it's not safe to deref the
 	 * callout pointer.
 	 */
 	if (c_flags & CALLOUT_LOCAL_ALLOC) {
 		KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
 		    ("corrupted callout"));
 		c->c_func = NULL;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 	cc->cc_curr = NULL;
 	if (cc->cc_waiting) {
 		/*
 		 * There is someone waiting for the
 		 * callout to complete.
 		 * If the callout was scheduled for
 		 * migration just cancel it.
 		 */
 		if (cc_cme_migrating(cc))
 			cc_cme_cleanup(cc);
 		cc->cc_waiting = 0;
 		CC_UNLOCK(cc);
 		wakeup(&cc->cc_waiting);
 		CC_LOCK(cc);
 	} else if (cc_cme_migrating(cc)) {
 #ifdef SMP
 		/*
 		 * If the callout was scheduled for
 		 * migration just perform it now.
 		 */
 		new_cpu = cc->cc_migration_cpu;
-		new_ticks = cc->cc_migration_ticks;
+		new_time = cc->cc_migration_time;
 		new_func = cc->cc_migration_func;
 		new_arg = cc->cc_migration_arg;
 		cc_cme_cleanup(cc);
 
 		/*
 		 * Handle deferred callout stops
 		 */
 		if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
 			CTR3(KTR_CALLOUT,
 			     "deferred cancelled %p func %p arg %p",
 			     c, new_func, new_arg);
 			callout_cc_del(c, cc);
 			goto nextc;
 		}
 
 		c->c_flags &= ~CALLOUT_DFRMIGRATION;
 
 		/*
 		 * It should be assert here that the
 		 * callout is not destroyed but that
 		 * is not easy.
 		 */
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
-		callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
+		callout_cc_add(c, new_cc, new_time, new_func, new_arg,
 		    new_cpu);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
 		panic("migration should not happen");
 #endif
 	}
 #ifdef SMP
 nextc:
 #endif
 	return (cc->cc_next);
 }
 
 /*
  * The callout mechanism is based on the work of Adam M. Costello and 
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  * used in this implementation was published by G. Varghese and T. Lauck in
  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  * the 11th ACM Annual Symposium on Operating Systems Principles,
  * Austin, Texas Nov 1987.
  */
 
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
  */
 void
 softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
-	struct callout_tailq *bucket;
-	int curticks;
 	int steps;	/* #steps since we last allowed interrupts */
-	int depth;
 	int mpcalls;
 	int lockcalls;
 	int gcalls;
 
 #ifndef MAX_SOFTCLOCK_STEPS
 #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
 #endif /* MAX_SOFTCLOCK_STEPS */
-
+	
 	mpcalls = 0;
 	lockcalls = 0;
 	gcalls = 0;
-	depth = 0;
 	steps = 0;
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
-	while (cc->cc_softticks - 1 != cc->cc_ticks) {
-		/*
-		 * cc_softticks may be modified by hard clock, so cache
-		 * it while we work on a given bucket.
-		 */
-		curticks = cc->cc_softticks;
-		cc->cc_softticks++;
-		bucket = &cc->cc_callwheel[curticks & callwheelmask];
-		c = TAILQ_FIRST(bucket);
-		while (c != NULL) {
-			depth++;
-			if (c->c_time != curticks) {
-				c = TAILQ_NEXT(c, c_links.tqe);
-				++steps;
-				if (steps >= MAX_SOFTCLOCK_STEPS) {
-					cc->cc_next = c;
-					/* Give interrupts a chance. */
-					CC_UNLOCK(cc);
-					;	/* nothing */
-					CC_LOCK(cc);
-					c = cc->cc_next;
-					steps = 0;
-				}
-			} else {
-				TAILQ_REMOVE(bucket, c, c_links.tqe);
-				c = softclock_call_cc(c, cc, &mpcalls,
-				    &lockcalls, &gcalls);
-				steps = 0;
-			}
+
+	c = TAILQ_FIRST(cc->cc_localexp);
+	while (c != NULL) {
+		++steps;
+		if (steps >= MAX_SOFTCLOCK_STEPS) {
+			cc->cc_next = c;
+			/* Give interrupts a chance. */
+			CC_UNLOCK(cc);
+			;	/* nothing */
+			CC_LOCK(cc);
+			c = cc->cc_next;
+			steps = 0;
 		}
+		else {
+			TAILQ_REMOVE(cc->cc_localexp, c, c_staiter);	
+			c = softclock_call_cc(c, cc, &mpcalls,
+			    &lockcalls, &gcalls);
+			steps = 0;
+		}	
 	}
-	avg_depth += (depth * 1000 - avg_depth) >> 8;
+
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
 	cc->cc_next = NULL;
 	CC_UNLOCK(cc);
 }
 
 /*
  * timeout --
  *	Execute a function after a specified length of time.
  *
  * untimeout --
  *	Cancel previous timeout function call.
  *
  * callout_handle_init --
  *	Initialize a handle so that using it with untimeout is benign.
  *
  *	See AT&T BCI Driver Reference Manual for specification.  This
  *	implementation differs from that one in that although an 
  *	identification value is returned from timeout, the original
  *	arguments to timeout as well as the identifier are used to
  *	identify entries for untimeout.
  */
 struct callout_handle
 timeout(ftn, arg, to_ticks)
 	timeout_t *ftn;
 	void *arg;
 	int to_ticks;
 {
 	struct callout_cpu *cc;
 	struct callout *new;
 	struct callout_handle handle;
 
 	cc = CC_CPU(timeout_cpu);
 	CC_LOCK(cc);
 	/* Fill in the next free callout structure. */
 	new = SLIST_FIRST(&cc->cc_callfree);
 	if (new == NULL)
 		/* XXX Attempt to malloc first */
 		panic("timeout table full");
 	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
 	callout_reset(new, to_ticks, ftn, arg);
 	handle.callout = new;
 	CC_UNLOCK(cc);
 
 	return (handle);
 }
 
 void
 untimeout(ftn, arg, handle)
 	timeout_t *ftn;
 	void *arg;
 	struct callout_handle handle;
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Check for a handle that was initialized
 	 * by callout_handle_init, but never used
 	 * for a real timeout.
 	 */
 	if (handle.callout == NULL)
 		return;
 
 	cc = callout_lock(handle.callout);
 	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
 		callout_stop(handle.callout);
 	CC_UNLOCK(cc);
 }
 
 void
 callout_handle_init(struct callout_handle *handle)
 {
 	handle->callout = NULL;
 }
 
 /*
  * New interface; clients allocate their own callout structures.
  *
  * callout_reset() - establish or change a timeout
  * callout_stop() - disestablish a timeout
  * callout_init() - initialize a callout structure so that it can
  *	safely be passed to callout_reset() and callout_stop()
  *
  * <sys/callout.h> defines three convenience macros:
  *
  * callout_active() - returns truth if callout has not been stopped,
  *	drained, or deactivated since the last time the callout was
  *	reset.
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
 int
 callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
     void *arg, int cpu)
 {
 	struct callout_cpu *cc;
+	struct bintime bt;
+	struct bintime now;
 	int cancelled = 0;
+	int bucket; 
+	
+	/*
+	 * Convert ticks to struct bintime.
+	 */
 
+	FREQ2BT(hz,&bt);
+	binuptime(&now);
+	bintime_mul(&bt,to_ticks);
+	bintime_add(&bt,&now);
 	/*
 	 * Don't allow migration of pre-allocated callouts lest they
 	 * become unbalanced.
 	 */
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC)
 		cpu = c->c_cpu;
 	cc = callout_lock(c);
 	if (cc->cc_curr == c) {
 		/*
 		 * We're being asked to reschedule a callout which is
 		 * currently in progress.  If there is a lock then we
 		 * can cancel the callout if it has not really started.
 		 */
 		if (c->c_lock != NULL && !cc->cc_cancel)
 			cancelled = cc->cc_cancel = 1;
 		if (cc->cc_waiting) {
 			/*
 			 * Someone has called callout_drain to kill this
 			 * callout.  Don't reschedule.
 			 */
 			CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 			    cancelled ? "cancelled" : "failed to cancel",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 	}
 	if (c->c_flags & CALLOUT_PENDING) {
 		if (cc->cc_next == c) {
 			cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
 		}
-		TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
+		bucket = get_bucket(&c->c_time);	
+		TAILQ_REMOVE(&cc->cc_callwheel[bucket], c,
 		    c_links.tqe);
 
 		cancelled = 1;
 		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 	}
 
 #ifdef SMP
 	/*
 	 * If the callout must migrate try to perform it immediately.
 	 * If the callout is currently running, just defer the migration
 	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
 		if (cc->cc_curr == c) {
 			cc->cc_migration_cpu = cpu;
-			cc->cc_migration_ticks = to_ticks;
+			cc->cc_migration_time = bt;
 			cc->cc_migration_func = ftn;
 			cc->cc_migration_arg = arg;
 			c->c_flags |= CALLOUT_DFRMIGRATION;
-			CTR5(KTR_CALLOUT,
-		    "migration of %p func %p arg %p in %d to %u deferred",
-			    c, c->c_func, c->c_arg, to_ticks, cpu);
+			CTR6(KTR_CALLOUT,
+		    "migration of %p func %p arg %p in %ld %ld to %u deferred",
+			    c, c->c_func, c->c_arg, bt.sec, bt.frac, cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 		cc = callout_cpu_switch(c, cc, cpu);
 	}
 #endif
 
-	callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
-	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
-	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+	callout_cc_add(c, cc, bt, ftn, arg, cpu);
+	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %ld %ld",
+	    cancelled ? "re" : "", c, c->c_func, c->c_arg, bt.sec, bt.frac);
 	CC_UNLOCK(cc);
 
 	return (cancelled);
 }
 
 /*
  * Common idioms that can be optimized in the future.
  */
 int
 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 }
 
 int
 callout_schedule(struct callout *c, int to_ticks)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 }
 
 int
 _callout_stop_safe(c, safe)
 	struct	callout *c;
 	int	safe;
 {
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
-	int use_lock, sq_locked;
+	int use_lock, sq_locked, bucket;
 
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
 	 * so just discard this check for the moment.
 	 */
 	if (!safe && c->c_lock != NULL) {
 		if (c->c_lock == &Giant.lock_object)
 			use_lock = mtx_owned(&Giant);
 		else {
 			use_lock = 1;
 			class = LOCK_CLASS(c->c_lock);
 			class->lc_assert(c->c_lock, LA_XLOCKED);
 		}
 	} else
 		use_lock = 0;
 
 	sq_locked = 0;
 	old_cc = NULL;
 again:
 	cc = callout_lock(c);
 
 	/*
 	 * If the callout was migrating while the callout cpu lock was
 	 * dropped,  just drop the sleepqueue lock and check the states
 	 * again.
 	 */
 	if (sq_locked != 0 && cc != old_cc) {
 #ifdef SMP
 		CC_UNLOCK(cc);
 		sleepq_release(&old_cc->cc_waiting);
 		sq_locked = 0;
 		old_cc = NULL;
 		goto again;
 #else
 		panic("migration should not happen");
 #endif
 	}
 
 	/*
 	 * If the callout isn't pending, it's not on the queue, so
 	 * don't attempt to remove it from the queue.  We can try to
 	 * stop it by other means however.
 	 */
 	if (!(c->c_flags & CALLOUT_PENDING)) {
 		c->c_flags &= ~CALLOUT_ACTIVE;
 
 		/*
 		 * If it wasn't on the queue and it isn't the current
 		 * callout, then we can't stop it, so just bail.
 		 */
 		if (cc->cc_curr != c) {
 			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			if (sq_locked)
 				sleepq_release(&cc->cc_waiting);
 			return (0);
 		}
 
 		if (safe) {
 			/*
 			 * The current callout is running (or just
 			 * about to run) and blocking is allowed, so
 			 * just wait for the current invocation to
 			 * finish.
 			 */
 			while (cc->cc_curr == c) {
 
 				/*
 				 * Use direct calls to sleepqueue interface
 				 * instead of cv/msleep in order to avoid
 				 * a LOR between cc_lock and sleepqueue
 				 * chain spinlocks.  This piece of code
 				 * emulates a msleep_spin() call actually.
 				 *
 				 * If we already have the sleepqueue chain
 				 * locked, then we can safely block.  If we
 				 * don't already have it locked, however,
 				 * we have to drop the cc_lock to lock
 				 * it.  This opens several races, so we
 				 * restart at the beginning once we have
 				 * both locks.  If nothing has changed, then
 				 * we will end up back here with sq_locked
 				 * set.
 				 */
 				if (!sq_locked) {
 					CC_UNLOCK(cc);
 					sleepq_lock(&cc->cc_waiting);
 					sq_locked = 1;
 					old_cc = cc;
 					goto again;
 				}
 
 				/*
 				 * Migration could be cancelled here, but
 				 * as long as it is still not sure when it
 				 * will be packed up, just let softclock()
 				 * take care of it.
 				 */
 				cc->cc_waiting = 1;
 				DROP_GIANT();
 				CC_UNLOCK(cc);
 				sleepq_add(&cc->cc_waiting,
 				    &cc->cc_lock.lock_object, "codrain",
 				    SLEEPQ_SLEEP, 0);
 				sleepq_wait(&cc->cc_waiting, 0);
 				sq_locked = 0;
 				old_cc = NULL;
 
 				/* Reacquire locks previously released. */
 				PICKUP_GIANT();
 				CC_LOCK(cc);
 			}
 		} else if (use_lock && !cc->cc_cancel) {
 			/*
 			 * The current callout is waiting for its
 			 * lock which we hold.  Cancel the callout
 			 * and return.  After our caller drops the
 			 * lock, the callout will be skipped in
 			 * softclock().
 			 */
 			cc->cc_cancel = 1;
 			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			KASSERT(!cc_cme_migrating(cc),
 			    ("callout wrongly scheduled for migration"));
 			CC_UNLOCK(cc);
 			KASSERT(!sq_locked, ("sleepqueue chain locked"));
 			return (1);
 		} else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
 			c->c_flags &= ~CALLOUT_DFRMIGRATION;
 			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			return (1);
 		}
 		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
 		CC_UNLOCK(cc);
 		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 		return (0);
 	}
 	if (sq_locked)
 		sleepq_release(&cc->cc_waiting);
 
 	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
-	TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
+	bucket = get_bucket(&c->c_time);
+	TAILQ_REMOVE(&cc->cc_callwheel[bucket], c,
 	    c_links.tqe);
 	callout_cc_del(c, cc);
 
 	CC_UNLOCK(cc);
 	return (1);
 }
 
 void
 callout_init(c, mpsafe)
 	struct	callout *c;
 	int mpsafe;
 {
 	bzero(c, sizeof *c);
 	if (mpsafe) {
 		c->c_lock = NULL;
 		c->c_flags = CALLOUT_RETURNUNLOCKED;
 	} else {
 		c->c_lock = &Giant.lock_object;
 		c->c_flags = 0;
 	}
 	c->c_cpu = timeout_cpu;
 }
 
 void
 _callout_init_lock(c, lock, flags)
 	struct	callout *c;
 	struct	lock_object *lock;
 	int flags;
 {
 	bzero(c, sizeof *c);
 	c->c_lock = lock;
 	KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 	    ("callout_init_lock: bad flags %d", flags));
 	KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
 	    (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
 	    __func__));
 	c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 	c->c_cpu = timeout_cpu;
 }
 
 #ifdef APM_FIXUP_CALLTODO
 /* 
  * Adjust the kernel calltodo timeout list.  This routine is used after 
  * an APM resume to recalculate the calltodo timer list values with the 
  * number of hz's we have been sleeping.  The next hardclock() will detect 
  * that there are fired timers and run softclock() to execute them.
  *
  * Please note, I have not done an exhaustive analysis of what code this
  * might break.  I am motivated to have my select()'s and alarm()'s that
  * have expired during suspend firing upon resume so that the applications
  * which set the timer can do the maintanence the timer was for as close
  * as possible to the originally intended time.  Testing this code for a 
  * week showed that resuming from a suspend resulted in 22 to 25 timers 
  * firing, which seemed independant on whether the suspend was 2 hours or
  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
  */
 void
 adjust_timeout_calltodo(time_change)
     struct timeval *time_change;
 {
 	register struct callout *p;
 	unsigned long delta_ticks;
 
 	/* 
 	 * How many ticks were we asleep?
 	 * (stolen from tvtohz()).
 	 */
 
 	/* Don't do anything */
 	if (time_change->tv_sec < 0)
 		return;
 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
 		delta_ticks = (time_change->tv_sec * 1000000 +
 			       time_change->tv_usec + (tick - 1)) / tick + 1;
 	else if (time_change->tv_sec <= LONG_MAX / hz)
 		delta_ticks = time_change->tv_sec * hz +
 			      (time_change->tv_usec + (tick - 1)) / tick + 1;
 	else
 		delta_ticks = LONG_MAX;
 
 	if (delta_ticks > INT_MAX)
 		delta_ticks = INT_MAX;
 
 	/* 
 	 * Now rip through the timer calltodo list looking for timers
 	 * to expire.
 	 */
 
 	/* don't collide with softclock() */
 	CC_LOCK(cc);
 	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
 		p->c_time -= delta_ticks;
 
 		/* Break if the timer had more time on it than delta_ticks */
 		if (p->c_time > 0)
 			break;
 
 		/* take back the ticks the timer didn't use (p->c_time <= 0) */
 		delta_ticks = -p->c_time;
 	}
 	CC_UNLOCK(cc);
 
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */
Index: projects/calloutng/sys/netinet/tcp_timer.c
===================================================================
--- projects/calloutng/sys/netinet/tcp_timer.c	(revision 236314)
+++ projects/calloutng/sys/netinet/tcp_timer.c	(revision 236315)
@@ -1,682 +1,715 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/ip_var.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 
 int	tcp_keepinit;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
 
 int	tcp_keepidle;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
 
 int	tcp_keepintvl;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
 
 int	tcp_delacktime;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 int	tcp_msl;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
 
 int	tcp_rexmit_min;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
     "Minimum Retransmission Timeout");
 
 int	tcp_rexmit_slop;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
 static int	always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
 int    tcp_fast_finwait2_recycle = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
     &tcp_fast_finwait2_recycle, 0,
     "Recycle closed FIN_WAIT_2 connections faster");
 
 int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
 int	tcp_keepcnt = TCPTV_KEEPCNT;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
     "Number of keepalive probes to send");
 
 	/* max idle probes */
 int	tcp_maxpersistidle;
 
 static int	per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
 
 #define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
 		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
 
 /*
  * Tcp protocol timeout routine called every 500 ms.
  * Updates timestamps used for TCP
  * causes finite state machine actions if timers expire.
  */
 void
 tcp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		(void) tcp_tw_2msl_scan(0);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 
 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
 static int tcp_timer_race;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
     0, "Count of t_inpcb races on tcp_discardcb");
 
 /*
  * TCP timer processing.
  */
 
 void
 tcp_timer_delack(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
 	    || !callout_active(&tp->t_timers->tt_delack)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_delack);
 
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
 	(void) tcp_output(tp);
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_2msl(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	/*
 	 * XXXRW: Does this actually happen?
 	 */
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	tcp_free_sackholes(tp);
 	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
 	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_WUNLOCK(tp->t_inpcb);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_2msl);
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 	 * control block.  Otherwise, check again in a bit.
 	 *
 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
 	 * Ignore fact that there were recent incoming segments.
 	 */
 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
 	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 		TCPSTAT_INC(tcps_finwait2_drops);
 		tp = tcp_close(tp);             
 	} else {
 		if (tp->t_state != TCPS_TIME_WAIT &&
 		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
 		       callout_reset_on(&tp->t_timers->tt_2msl,
 			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
 	       else
 		       tp = tcp_close(tp);
        }
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_keep(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct tcptemp *t_template;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
 	    || !callout_active(&tp->t_timers->tt_keep)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_keep);
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
 	 */
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response
 		 * if the peer is up and reachable:
 		 * either an ACK if the connection is still alive,
 		 * or an RST if the peer has closed the connection
 		 * due to timeout or reboot.
 		 * Using sequence number tp->snd_una-1
 		 * causes the transmitted zero-length segment
 		 * to lie outside the receive window;
 		 * by the protocol spec, this requires the
 		 * correspondent TCP to respond.
 		 */
 		TCPSTAT_INC(tcps_keepprobe);
 		t_template = tcpip_maketemplate(inp);
 		if (t_template) {
 			tcp_respond(tp, t_template->tt_ipgen,
 				    &t_template->tt_t, (struct mbuf *)NULL,
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
 		    tcp_timer_keep, tp, INP_CPU(inp));
 	} else
 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
 		    tcp_timer_keep, tp, INP_CPU(inp));
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 	return;
 
 dropit:
 	TCPSTAT_INC(tcps_keepdrops);
 	tp = tcp_drop(tp, ETIMEDOUT);
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_persist(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
 	    || !callout_active(&tp->t_timers->tt_persist)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_persist);
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
 	TCPSTAT_INC(tcps_persisttimeo);
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not
 	 * time out if the window is closed.  After a full
 	 * backoff, drop the connection if the idle time
 	 * (no responses to probes) reaches the maximum
 	 * backoff that we would use if retransmitting.
 	 */
 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 		TCPSTAT_INC(tcps_persistdrop);
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
 	tcp_setpersist(tp);
 	tp->t_flags |= TF_FORCEDATA;
 	(void) tcp_output(tp);
 	tp->t_flags &= ~TF_FORCEDATA;
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_rexmt(void * xtp)
 {
 	struct tcpcb *tp = xtp;
 	CURVNET_SET(tp->t_vnet);
 	int rexmt;
 	int headlocked;
 	struct inpcb *inp;
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
 	    || !callout_active(&tp->t_timers->tt_rexmt)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_rexmt);
 	tcp_free_sackholes(tp);
 	/*
 	 * Retransmission timer went off.  Message has not
 	 * been acked within retransmit interval.  Back off
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
 		in_pcbref(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		INP_WUNLOCK(inp);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 		if (inp->inp_flags & INP_DROPPED) {
 			INP_WUNLOCK(inp);
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		tp = tcp_drop(tp, tp->t_softerror ?
 			      tp->t_softerror : ETIMEDOUT);
 		headlocked = 1;
 		goto out;
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	headlocked = 0;
 	if (tp->t_rxtshift == 1) {
 		/*
 		 * first retransmit; record ssthresh and cwnd so they can
 		 * be recovered if this turns out to be a "bad" retransmit.
 		 * A retransmit is considered "bad" if an ACK for this
 		 * segment is received within RTT/2 interval; the assumption
 		 * here is that the ACK was already in flight.  See
 		 * "On Estimating End-to-End Network Path Properties" by
 		 * Allman and Paxson for more details.
 		 */
 		tp->snd_cwnd_prev = tp->snd_cwnd;
 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
 		tp->snd_recover_prev = tp->snd_recover;
 		if (IN_FASTRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASFRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASFRECOVERY;
 		if (IN_CONGRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if (tp->t_state == TCPS_SYN_SENT)
 		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
 	/*
 	 * Disable rfc1323 if we haven't got any response to
 	 * our third SYN to work-around some broken terminal servers
 	 * (most of which have hopefully been retired) that have bad VJ
 	 * header compression code which trashes TCP segments containing
 	 * unknown-to-them TCP options.
 	 */
 	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current
 	 * retransmit times until then.
 	 */
 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
 		else
 #endif
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
 	tp->snd_nxt = tp->snd_una;
 	tp->snd_recover = tp->snd_max;
 	/*
 	 * Force a segment to be sent.
 	 */
 	tp->t_flags |= TF_ACKNOW;
 	/*
 	 * If timing a segment in this window, stop the timer.
 	 */
 	tp->t_rtttime = 0;
 
 	cc_cong_signal(tp, NULL, CC_RTO);
 
 	(void) tcp_output(tp);
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	if (headlocked)
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
 {
 	struct callout *t_callout;
 	void *f_callout;
 	struct inpcb *inp = tp->t_inpcb;
 	int cpu = INP_CPU(inp);
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl;
 			break;
 		default:
 			panic("bad timer_type");
 		}
 	if (delta == 0) {
 		callout_stop(t_callout);
 	} else {
 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
 	}
 }
 
 int
 tcp_timer_active(struct tcpcb *tp, int timer_type)
 {
 	struct callout *t_callout;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
 			panic("bad timer_type");
 		}
 	return callout_active(t_callout);
 }
 
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
+#define bintime_to_msecs(bt)						\
+	(((uint64_t)1000 *						\
+	(uint32_t)  (bt.frac >> 32)) >> 32) + (bt.sec * 1000); 
+
 void
 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
 {
+	struct bintime now;
+	struct bintime tmp;
+	
 	bzero(xtimer, sizeof(struct xtcp_timer));
 	if (timer == NULL)
 		return;
-	if (callout_active(&timer->tt_delack))
-		xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
-	if (callout_active(&timer->tt_rexmt))
-		xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
-	if (callout_active(&timer->tt_persist))
-		xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
-	if (callout_active(&timer->tt_keep))
-		xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
-	if (callout_active(&timer->tt_2msl))
-		xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+	
+	if (callout_active(&timer->tt_delack)) {
+		binuptime(&now);
+		tmp = timer->tt_delack.c_time;		
+		bintime_sub(&tmp,&now);
+		xtimer->tt_delack = bintime_to_msecs(tmp);
+	}
+	
+	if (callout_active(&timer->tt_rexmt)) {
+		binuptime(&now);
+		tmp = timer->tt_rexmt.c_time;
+		bintime_sub(&tmp,&now);
+		xtimer->tt_rexmt = bintime_to_msecs(tmp);
+	}
+	
+	if (callout_active(&timer->tt_persist)) {
+		binuptime(&now);
+		tmp = timer->tt_persist.c_time;
+		bintime_sub(&tmp,&now);
+		xtimer->tt_persist = bintime_to_msecs(tmp);
+	}
+	
+	if (callout_active(&timer->tt_keep)) {
+		binuptime(&now);
+		tmp = timer->tt_keep.c_time;
+		bintime_sub(&tmp,&now); 
+		xtimer->tt_keep = bintime_to_msecs(tmp);
+	}
+
+	if (callout_active(&timer->tt_2msl)) {
+		binuptime(&now);
+		tmp = timer->tt_2msl.c_time;
+		bintime_sub(&tmp,&now);
+		xtimer->tt_2msl = bintime_to_msecs(tmp);
+	}
+
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
Index: projects/calloutng/sys/sys/_callout.h
===================================================================
--- projects/calloutng/sys/sys/_callout.h	(revision 236314)
+++ projects/calloutng/sys/sys/_callout.h	(revision 236315)
@@ -1,61 +1,63 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)callout.h	8.2 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS__CALLOUT_H
 #define	_SYS__CALLOUT_H
 
 #include <sys/queue.h>
+#include <sys/time.h>
 
 struct lock_object;
 
 SLIST_HEAD(callout_list, callout);
 TAILQ_HEAD(callout_tailq, callout);
 
 struct callout {
 	union {
 		SLIST_ENTRY(callout) sle;
 		TAILQ_ENTRY(callout) tqe;
 	} c_links;
-	int	c_time;				/* ticks to the event */
+	TAILQ_ENTRY(callout) c_staiter;
+	struct bintime c_time;			/* ticks to the event */
 	void	*c_arg;				/* function argument */
 	void	(*c_func)(void *);		/* function to call */
 	struct lock_object *c_lock;		/* lock to handle */
 	int	c_flags;			/* state of this entry */
 	volatile int c_cpu;			/* CPU we're scheduled on */
 };
 
 #endif
Index: projects/calloutng/sys/sys/callout.h
===================================================================
--- projects/calloutng/sys/sys/callout.h	(revision 236314)
+++ projects/calloutng/sys/sys/callout.h	(revision 236315)
@@ -1,87 +1,87 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)callout.h	8.2 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_CALLOUT_H_
 #define _SYS_CALLOUT_H_
 
 #include <sys/_callout.h>
 
 #define	CALLOUT_LOCAL_ALLOC	0x0001 /* was allocated from callfree */
 #define	CALLOUT_ACTIVE		0x0002 /* callout is currently active */
 #define	CALLOUT_PENDING		0x0004 /* callout is waiting for timeout */
 #define	CALLOUT_MPSAFE		0x0008 /* callout handler is mp safe */
 #define	CALLOUT_RETURNUNLOCKED	0x0010 /* handler returns with mtx unlocked */
 #define	CALLOUT_SHAREDLOCK	0x0020 /* callout lock held in shared mode */
 #define	CALLOUT_DFRMIGRATION	0x0040 /* callout in deferred migration mode */
 
 struct callout_handle {
 	struct callout *callout;
 };
 
 #ifdef _KERNEL
 extern int ncallout;
 
 #define	callout_active(c)	((c)->c_flags & CALLOUT_ACTIVE)
 #define	callout_deactivate(c)	((c)->c_flags &= ~CALLOUT_ACTIVE)
 #define	callout_drain(c)	_callout_stop_safe(c, 1)
 void	callout_init(struct callout *, int);
 void	_callout_init_lock(struct callout *, struct lock_object *, int);
 #define	callout_init_mtx(c, mtx, flags)					\
 	_callout_init_lock((c), ((mtx) != NULL) ? &(mtx)->lock_object :	\
 	    NULL, (flags))
 #define	callout_init_rw(c, rw, flags)					\
 	_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object :	\
 	   NULL, (flags))
 #define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
 int	callout_reset_on(struct callout *, int, void (*)(void *), void *, int);
 #define	callout_reset(c, on_tick, fn, arg)				\
     callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu)
 #define	callout_reset_curcpu(c, on_tick, fn, arg)			\
     callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid))
 int	callout_schedule(struct callout *, int);
 int	callout_schedule_on(struct callout *, int, int);
 #define	callout_schedule_curcpu(c, on_tick)				\
     callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
 #define	callout_stop(c)		_callout_stop_safe(c, 0)
 int	_callout_stop_safe(struct callout *, int);
 void	callout_tick(void);
-int	callout_tickstofirst(int limit);
-extern void (*callout_new_inserted)(int cpu, int ticks);
+struct bintime callout_tickstofirst(void);
+extern void (*callout_new_inserted)(int cpu, struct bintime bt);
 
 #endif
 
 #endif /* _SYS_CALLOUT_H_ */