diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 72e184585667..a22da2bd15d1 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,837 +1,837 @@ static volatile int print_tci = 1; /*- * Copyright (c) 1997, 1998 Poul-Henning Kamp * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.63 1998/04/04 13:25:11 phk Exp $ + * $Id: kern_clock.c,v 1.64 1998/04/04 18:46:09 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #if defined(SMP) && defined(BETTER_CLOCK) #include #endif static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) static void tco_forward __P((void)); static void tco_setscales __P((struct timecounter *tc)); /* Some of these don't belong here, but it's easiest to concentrate them. */ #if defined(SMP) && defined(BETTER_CLOCK) long cp_time[CPUSTATES]; #else static long cp_time[CPUSTATES]; #endif long dk_seek[DK_NDRIVE]; static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ long dk_wds[DK_NDRIVE]; long dk_wpms[DK_NDRIVE]; long dk_xfer[DK_NDRIVE]; int dk_busy; int dk_ndrive = 0; char dk_names[DK_NDRIVE][DK_NAMELEN]; long tk_cancc; long tk_nin; long tk_nout; long tk_rawcc; struct timecounter *timecounter; time_t time_second; /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; static int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ int psratio; /* ratio: prof / stat */ /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void initclocks(dummy) void *dummy; { register int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ psdiv = pscnt = 1; cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; } /* * The real-time timer, interrupting hz times per second. */ void hardclock(frame) register struct clockframe *frame; { register struct proc *p; p = curproc; if (p) { register struct pstats *pstats; /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; if (CLKF_USERMODE(frame) && timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) psignal(p, SIGVTALRM); if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) psignal(p, SIGPROF); } #if defined(SMP) && defined(BETTER_CLOCK) forward_hardclock(pscnt); #endif /* * If no separate statistics clock is available, run it from here. */ if (stathz == 0) statclock(frame); tco_forward(); ticks++; /* * Process callouts at a very low cpu priority, so we don't keep the * relatively high clock interrupt priority any longer than necessary. */ if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { if (CLKF_BASEPRI(frame)) { /* * Save the overhead of a software interrupt; * it will happen as soon as we return, so do it now. */ (void)splsoftclock(); softclock(); } else setsoftclock(); } else if (softticks + 1 == ticks) ++softticks; } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(tv) struct timeval *tv; { register unsigned long ticks; register long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) / tick + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + ((unsigned long)usec + (tick - 1)) / tick + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return (ticks); } /* * Compute number of hz until specified time. Used to * compute third argument to timeout() from an absolute time. */ int hzto(tv) struct timeval *tv; { struct timeval t2; getmicrotime(&t2); t2.tv_sec = tv->tv_sec - t2.tv_sec; t2.tv_usec = tv->tv_usec - t2.tv_usec; return (tvtohz(&t2)); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(p) register struct proc *p; { int s; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; if (++profprocs == 1 && stathz != 0) { s = splstatclock(); psdiv = pscnt = psratio; setstatclockrate(profhz); splx(s); } } } /* * Stop profiling on a process. */ void stopprofclock(p) register struct proc *p; { int s; if (p->p_flag & P_PROFIL) { p->p_flag &= ~P_PROFIL; if (--profprocs == 0 && stathz != 0) { s = splstatclock(); psdiv = pscnt = 1; setstatclockrate(stathz); splx(s); } } } /* * Statistics clock. Grab profile sample, and if divider reaches 0, * do process and kernel statistics. */ void statclock(frame) register struct clockframe *frame; { #ifdef GPROF register struct gmonparam *g; #endif register struct proc *p; register int i; struct pstats *pstats; long rss; struct rusage *ru; struct vmspace *vm; if (CLKF_USERMODE(frame)) { p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Came from user mode; CPU was in user state. * If this process is being profiled record the tick. */ p->p_uticks++; if (p->p_nice > NZERO) cp_time[CP_NICE]++; else cp_time[CP_USER]++; } else { #ifdef GPROF /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON) { i = CLKF_PC(frame) - g->lowpc; if (i < g->textsize) { i /= HISTFRACTION * sizeof(*g->kcount); g->kcount[i]++; } } #endif #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ p = curproc; if (CLKF_INTR(frame)) { if (p != NULL) p->p_iticks++; cp_time[CP_INTR]++; } else if (p != NULL) { p->p_sticks++; cp_time[CP_SYS]++; } else cp_time[CP_IDLE]++; } pscnt = psdiv; /* * We maintain statistics shown by user-level statistics * programs: the amount of time in each cpu state, and * the amount of time each of DK_NDRIVE ``drives'' is busy. * * XXX should either run linked list of drives, or (better) * grab timestamps in the start & done code. */ for (i = 0; i < DK_NDRIVE; i++) if (dk_busy & (1 << i)) dk_time[i]++; /* * We adjust the priority of the current process. The priority of * a process gets worse as it accumulates CPU time. The cpu usage * estimator (p_estcpu) is increased here. The formula for computing * priorities (in kern_synch.c) will compute a different value each * time p_estcpu increases by 4. The cpu usage estimator ramps up * quite quickly when the process is running (linearly), and decays * away exponentially, at a rate which is proportionally slower when * the system is busy. The basic principal is that the system will * 90% forget that the process used a lot of CPU time in 5 * loadav * seconds. This causes the system to favor processes which haven't * run much recently, and to round-robin among other processes. */ if (p != NULL) { p->p_cpticks++; if (++p->p_estcpu == 0) p->p_estcpu--; if ((p->p_estcpu & 3) == 0) { resetpriority(p); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; } /* Update resource usage integrals and maximums. */ if ((pstats = p->p_stats) != NULL && (ru = &pstats->p_ru) != NULL && (vm = p->p_vmspace) != NULL) { ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; rss = vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024; if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; } } } /* * Return information about system clocks. */ static int sysctl_kern_clockrate SYSCTL_HANDLER_ARGS { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.tickadj = tickadj; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); /* * We have four functions for looking at the clock, two for microseconds * and two for nanoseconds. For each there is fast but less precise * version "get{nano|micro}time" which will return a time which is up * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time" * will return a timestamp which is as precise as possible. */ void getmicrotime(struct timeval *tvp) { struct timecounter *tc; tc = timecounter; *tvp = tc->microtime; } void getnanotime(struct timespec *tsp) { struct timecounter *tc; tc = timecounter; *tsp = tc->nanotime; } void microtime(struct timeval *tv) { struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; tv->tv_usec = tc->offset_micro; tv->tv_usec += ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; tv->tv_usec += boottime.tv_usec; tv->tv_sec += boottime.tv_sec; while (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanotime(struct timespec *tv) { u_int count; u_int64_t delta; struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; count = tc->get_timedelta(tc); delta = tc->offset_nano; delta += ((u_int64_t)count * tc->scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->scale_nano_i); delta += boottime.tv_usec * 1000; tv->tv_sec += boottime.tv_sec; while (delta >= 1000000000) { delta -= 1000000000; tv->tv_sec++; } tv->tv_nsec = delta; } void getmicroruntime(struct timeval *tvp) { struct timecounter *tc; tc = timecounter; tvp->tv_sec = tc->offset_sec; tvp->tv_usec = tc->offset_micro; } void getnanoruntime(struct timespec *tsp) { struct timecounter *tc; tc = timecounter; tsp->tv_sec = tc->offset_sec; tsp->tv_nsec = tc->offset_nano >> 32; } void microruntime(struct timeval *tv) { struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; tv->tv_usec = tc->offset_micro; tv->tv_usec += ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; if (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanoruntime(struct timespec *tv) { u_int count; u_int64_t delta; struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; count = tc->get_timedelta(tc); delta = tc->offset_nano; delta += ((u_int64_t)count * tc->scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->scale_nano_i); if (delta >= 1000000000) { delta -= 1000000000; tv->tv_sec++; } tv->tv_nsec = delta; } static void tco_setscales(struct timecounter *tc) { u_int64_t scale; scale = 1000000000LL << 32; if (tc->adjustment > 0) scale += (tc->adjustment * 1000LL) << 10; else scale -= (-tc->adjustment * 1000LL) << 10; scale /= tc->frequency; tc->scale_micro = scale / 1000; tc->scale_nano_f = scale & 0xffffffff; tc->scale_nano_i = scale >> 32; } static u_int delta_timecounter(struct timecounter *tc) { return((tc->get_timecount() - tc->offset_count) & tc->counter_mask); } void init_timecounter(struct timecounter *tc) { struct timespec ts0, ts1; int i; if (!tc->get_timedelta) tc->get_timedelta = delta_timecounter; tc->adjustment = 0; tco_setscales(tc); tc->offset_count = tc->get_timecount(); tc[0].tweak = &tc[0]; tc[2] = tc[1] = tc[0]; tc[1].other = &tc[2]; tc[2].other = &tc[1]; if (!timecounter || !strcmp(timecounter->name, "dummy")) timecounter = &tc[2]; tc = &tc[1]; /* * Figure out the cost of calling this timecounter. * XXX: The 1:15 ratio is a guess at reality. */ nanotime(&ts0); for (i = 0; i < 16; i ++) tc->get_timecount(); for (i = 0; i < 240; i ++) tc->get_timedelta(tc); nanotime(&ts1); ts1.tv_sec -= ts0.tv_sec; tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; tc->cost >>= 8; if (print_tci && strcmp(tc->name, "dummy")) printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", tc->name, tc->frequency, tc->cost); /* XXX: For now always start using the counter. */ tc->offset_count = tc->get_timecount(); nanotime(&ts1); tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32; tc->offset_micro = ts1.tv_nsec / 1000; tc->offset_sec = ts1.tv_sec; timecounter = tc; } void set_timecounter(struct timespec *ts) { struct timespec ts2; nanoruntime(&ts2); boottime.tv_sec = ts->tv_sec - ts2.tv_sec; boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; if (boottime.tv_usec < 0) { boottime.tv_usec += 1000000; boottime.tv_sec--; } /* fiddle all the little crinkly bits around the fiords... */ tco_forward(); } #if 0 /* Currently unused */ void switch_timecounter(struct timecounter *newtc) { int s; struct timecounter *tc; struct timespec ts; s = splclock(); tc = timecounter; if (newtc == tc || newtc == tc->other) { splx(s); return; } nanotime(&ts); newtc->offset_sec = ts.tv_sec; newtc->offset_nano = (u_int64_t)ts.tv_nsec << 32; newtc->offset_micro = ts.tv_nsec / 1000; newtc->offset_count = newtc->get_timecount(); timecounter = newtc; splx(s); } #endif static struct timecounter * sync_other_counter(void) { struct timecounter *tc, *tco; u_int delta; tc = timecounter->other; tco = tc->other; *tc = *timecounter; tc->other = tco; delta = tc->get_timedelta(tc); tc->offset_count += delta; tc->offset_count &= tc->counter_mask; tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f; tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32; return (tc); } static void tco_forward(void) { struct timecounter *tc; tc = sync_other_counter(); if (timedelta != 0) { tc->offset_nano += (u_int64_t)(tickdelta * 1000) << 32; timedelta -= tickdelta; } if (tc->offset_nano >= 1000000000ULL << 32) { tc->offset_nano -= 1000000000ULL << 32; tc->offset_sec++; tc->frequency = tc->tweak->frequency; tc->adjustment = tc->tweak->adjustment; ntp_update_second(tc); /* XXX only needed if xntpd runs */ tco_setscales(tc); } tc->offset_micro = (tc->offset_nano / 1000) >> 32; /* Figure out the wall-clock time */ tc->nanotime.tv_sec = tc->offset_sec + boottime.tv_sec; tc->nanotime.tv_nsec = (tc->offset_nano >> 32) + boottime.tv_usec * 1000; tc->microtime.tv_usec = tc->offset_micro + boottime.tv_usec; if (tc->nanotime.tv_nsec > 1000000000) { tc->nanotime.tv_nsec -= 1000000000; tc->microtime.tv_usec -= 1000000; tc->nanotime.tv_sec++; } time_second = tc->microtime.tv_sec = tc->nanotime.tv_sec; timecounter = tc; } static int sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS { return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency, sizeof(timecounter->tweak->frequency), req)); } static int sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS { return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment, sizeof(timecounter->tweak->adjustment), req)); } SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", ""); SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", ""); /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * timeservices. */ static u_int64_t dummy_get_timecount(void) { static u_int64_t now; return (++now); } static struct timecounter dummy_timecounter[3] = { { 0, dummy_get_timecount, ~0, - 100000, + 1000000, "dummy" } }; static void initdummytimecounter(void *dummy) { init_timecounter(dummy_timecounter); } SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL) diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 72e184585667..a22da2bd15d1 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1,837 +1,837 @@ static volatile int print_tci = 1; /*- * Copyright (c) 1997, 1998 Poul-Henning Kamp * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.63 1998/04/04 13:25:11 phk Exp $ + * $Id: kern_clock.c,v 1.64 1998/04/04 18:46:09 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #if defined(SMP) && defined(BETTER_CLOCK) #include #endif static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) static void tco_forward __P((void)); static void tco_setscales __P((struct timecounter *tc)); /* Some of these don't belong here, but it's easiest to concentrate them. */ #if defined(SMP) && defined(BETTER_CLOCK) long cp_time[CPUSTATES]; #else static long cp_time[CPUSTATES]; #endif long dk_seek[DK_NDRIVE]; static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ long dk_wds[DK_NDRIVE]; long dk_wpms[DK_NDRIVE]; long dk_xfer[DK_NDRIVE]; int dk_busy; int dk_ndrive = 0; char dk_names[DK_NDRIVE][DK_NAMELEN]; long tk_cancc; long tk_nin; long tk_nout; long tk_rawcc; struct timecounter *timecounter; time_t time_second; /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; static int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ int psratio; /* ratio: prof / stat */ /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void initclocks(dummy) void *dummy; { register int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ psdiv = pscnt = 1; cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; } /* * The real-time timer, interrupting hz times per second. */ void hardclock(frame) register struct clockframe *frame; { register struct proc *p; p = curproc; if (p) { register struct pstats *pstats; /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; if (CLKF_USERMODE(frame) && timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) psignal(p, SIGVTALRM); if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) psignal(p, SIGPROF); } #if defined(SMP) && defined(BETTER_CLOCK) forward_hardclock(pscnt); #endif /* * If no separate statistics clock is available, run it from here. */ if (stathz == 0) statclock(frame); tco_forward(); ticks++; /* * Process callouts at a very low cpu priority, so we don't keep the * relatively high clock interrupt priority any longer than necessary. */ if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { if (CLKF_BASEPRI(frame)) { /* * Save the overhead of a software interrupt; * it will happen as soon as we return, so do it now. */ (void)splsoftclock(); softclock(); } else setsoftclock(); } else if (softticks + 1 == ticks) ++softticks; } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(tv) struct timeval *tv; { register unsigned long ticks; register long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) / tick + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + ((unsigned long)usec + (tick - 1)) / tick + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return (ticks); } /* * Compute number of hz until specified time. Used to * compute third argument to timeout() from an absolute time. */ int hzto(tv) struct timeval *tv; { struct timeval t2; getmicrotime(&t2); t2.tv_sec = tv->tv_sec - t2.tv_sec; t2.tv_usec = tv->tv_usec - t2.tv_usec; return (tvtohz(&t2)); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(p) register struct proc *p; { int s; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; if (++profprocs == 1 && stathz != 0) { s = splstatclock(); psdiv = pscnt = psratio; setstatclockrate(profhz); splx(s); } } } /* * Stop profiling on a process. */ void stopprofclock(p) register struct proc *p; { int s; if (p->p_flag & P_PROFIL) { p->p_flag &= ~P_PROFIL; if (--profprocs == 0 && stathz != 0) { s = splstatclock(); psdiv = pscnt = 1; setstatclockrate(stathz); splx(s); } } } /* * Statistics clock. Grab profile sample, and if divider reaches 0, * do process and kernel statistics. */ void statclock(frame) register struct clockframe *frame; { #ifdef GPROF register struct gmonparam *g; #endif register struct proc *p; register int i; struct pstats *pstats; long rss; struct rusage *ru; struct vmspace *vm; if (CLKF_USERMODE(frame)) { p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Came from user mode; CPU was in user state. * If this process is being profiled record the tick. */ p->p_uticks++; if (p->p_nice > NZERO) cp_time[CP_NICE]++; else cp_time[CP_USER]++; } else { #ifdef GPROF /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON) { i = CLKF_PC(frame) - g->lowpc; if (i < g->textsize) { i /= HISTFRACTION * sizeof(*g->kcount); g->kcount[i]++; } } #endif #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ p = curproc; if (CLKF_INTR(frame)) { if (p != NULL) p->p_iticks++; cp_time[CP_INTR]++; } else if (p != NULL) { p->p_sticks++; cp_time[CP_SYS]++; } else cp_time[CP_IDLE]++; } pscnt = psdiv; /* * We maintain statistics shown by user-level statistics * programs: the amount of time in each cpu state, and * the amount of time each of DK_NDRIVE ``drives'' is busy. * * XXX should either run linked list of drives, or (better) * grab timestamps in the start & done code. */ for (i = 0; i < DK_NDRIVE; i++) if (dk_busy & (1 << i)) dk_time[i]++; /* * We adjust the priority of the current process. The priority of * a process gets worse as it accumulates CPU time. The cpu usage * estimator (p_estcpu) is increased here. The formula for computing * priorities (in kern_synch.c) will compute a different value each * time p_estcpu increases by 4. The cpu usage estimator ramps up * quite quickly when the process is running (linearly), and decays * away exponentially, at a rate which is proportionally slower when * the system is busy. The basic principal is that the system will * 90% forget that the process used a lot of CPU time in 5 * loadav * seconds. This causes the system to favor processes which haven't * run much recently, and to round-robin among other processes. */ if (p != NULL) { p->p_cpticks++; if (++p->p_estcpu == 0) p->p_estcpu--; if ((p->p_estcpu & 3) == 0) { resetpriority(p); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; } /* Update resource usage integrals and maximums. */ if ((pstats = p->p_stats) != NULL && (ru = &pstats->p_ru) != NULL && (vm = p->p_vmspace) != NULL) { ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; rss = vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024; if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; } } } /* * Return information about system clocks. */ static int sysctl_kern_clockrate SYSCTL_HANDLER_ARGS { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.tickadj = tickadj; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); /* * We have four functions for looking at the clock, two for microseconds * and two for nanoseconds. For each there is fast but less precise * version "get{nano|micro}time" which will return a time which is up * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time" * will return a timestamp which is as precise as possible. */ void getmicrotime(struct timeval *tvp) { struct timecounter *tc; tc = timecounter; *tvp = tc->microtime; } void getnanotime(struct timespec *tsp) { struct timecounter *tc; tc = timecounter; *tsp = tc->nanotime; } void microtime(struct timeval *tv) { struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; tv->tv_usec = tc->offset_micro; tv->tv_usec += ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; tv->tv_usec += boottime.tv_usec; tv->tv_sec += boottime.tv_sec; while (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanotime(struct timespec *tv) { u_int count; u_int64_t delta; struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; count = tc->get_timedelta(tc); delta = tc->offset_nano; delta += ((u_int64_t)count * tc->scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->scale_nano_i); delta += boottime.tv_usec * 1000; tv->tv_sec += boottime.tv_sec; while (delta >= 1000000000) { delta -= 1000000000; tv->tv_sec++; } tv->tv_nsec = delta; } void getmicroruntime(struct timeval *tvp) { struct timecounter *tc; tc = timecounter; tvp->tv_sec = tc->offset_sec; tvp->tv_usec = tc->offset_micro; } void getnanoruntime(struct timespec *tsp) { struct timecounter *tc; tc = timecounter; tsp->tv_sec = tc->offset_sec; tsp->tv_nsec = tc->offset_nano >> 32; } void microruntime(struct timeval *tv) { struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; tv->tv_usec = tc->offset_micro; tv->tv_usec += ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; if (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanoruntime(struct timespec *tv) { u_int count; u_int64_t delta; struct timecounter *tc; tc = (struct timecounter *)timecounter; tv->tv_sec = tc->offset_sec; count = tc->get_timedelta(tc); delta = tc->offset_nano; delta += ((u_int64_t)count * tc->scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->scale_nano_i); if (delta >= 1000000000) { delta -= 1000000000; tv->tv_sec++; } tv->tv_nsec = delta; } static void tco_setscales(struct timecounter *tc) { u_int64_t scale; scale = 1000000000LL << 32; if (tc->adjustment > 0) scale += (tc->adjustment * 1000LL) << 10; else scale -= (-tc->adjustment * 1000LL) << 10; scale /= tc->frequency; tc->scale_micro = scale / 1000; tc->scale_nano_f = scale & 0xffffffff; tc->scale_nano_i = scale >> 32; } static u_int delta_timecounter(struct timecounter *tc) { return((tc->get_timecount() - tc->offset_count) & tc->counter_mask); } void init_timecounter(struct timecounter *tc) { struct timespec ts0, ts1; int i; if (!tc->get_timedelta) tc->get_timedelta = delta_timecounter; tc->adjustment = 0; tco_setscales(tc); tc->offset_count = tc->get_timecount(); tc[0].tweak = &tc[0]; tc[2] = tc[1] = tc[0]; tc[1].other = &tc[2]; tc[2].other = &tc[1]; if (!timecounter || !strcmp(timecounter->name, "dummy")) timecounter = &tc[2]; tc = &tc[1]; /* * Figure out the cost of calling this timecounter. * XXX: The 1:15 ratio is a guess at reality. */ nanotime(&ts0); for (i = 0; i < 16; i ++) tc->get_timecount(); for (i = 0; i < 240; i ++) tc->get_timedelta(tc); nanotime(&ts1); ts1.tv_sec -= ts0.tv_sec; tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; tc->cost >>= 8; if (print_tci && strcmp(tc->name, "dummy")) printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", tc->name, tc->frequency, tc->cost); /* XXX: For now always start using the counter. */ tc->offset_count = tc->get_timecount(); nanotime(&ts1); tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32; tc->offset_micro = ts1.tv_nsec / 1000; tc->offset_sec = ts1.tv_sec; timecounter = tc; } void set_timecounter(struct timespec *ts) { struct timespec ts2; nanoruntime(&ts2); boottime.tv_sec = ts->tv_sec - ts2.tv_sec; boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; if (boottime.tv_usec < 0) { boottime.tv_usec += 1000000; boottime.tv_sec--; } /* fiddle all the little crinkly bits around the fiords... */ tco_forward(); } #if 0 /* Currently unused */ void switch_timecounter(struct timecounter *newtc) { int s; struct timecounter *tc; struct timespec ts; s = splclock(); tc = timecounter; if (newtc == tc || newtc == tc->other) { splx(s); return; } nanotime(&ts); newtc->offset_sec = ts.tv_sec; newtc->offset_nano = (u_int64_t)ts.tv_nsec << 32; newtc->offset_micro = ts.tv_nsec / 1000; newtc->offset_count = newtc->get_timecount(); timecounter = newtc; splx(s); } #endif static struct timecounter * sync_other_counter(void) { struct timecounter *tc, *tco; u_int delta; tc = timecounter->other; tco = tc->other; *tc = *timecounter; tc->other = tco; delta = tc->get_timedelta(tc); tc->offset_count += delta; tc->offset_count &= tc->counter_mask; tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f; tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32; return (tc); } static void tco_forward(void) { struct timecounter *tc; tc = sync_other_counter(); if (timedelta != 0) { tc->offset_nano += (u_int64_t)(tickdelta * 1000) << 32; timedelta -= tickdelta; } if (tc->offset_nano >= 1000000000ULL << 32) { tc->offset_nano -= 1000000000ULL << 32; tc->offset_sec++; tc->frequency = tc->tweak->frequency; tc->adjustment = tc->tweak->adjustment; ntp_update_second(tc); /* XXX only needed if xntpd runs */ tco_setscales(tc); } tc->offset_micro = (tc->offset_nano / 1000) >> 32; /* Figure out the wall-clock time */ tc->nanotime.tv_sec = tc->offset_sec + boottime.tv_sec; tc->nanotime.tv_nsec = (tc->offset_nano >> 32) + boottime.tv_usec * 1000; tc->microtime.tv_usec = tc->offset_micro + boottime.tv_usec; if (tc->nanotime.tv_nsec > 1000000000) { tc->nanotime.tv_nsec -= 1000000000; tc->microtime.tv_usec -= 1000000; tc->nanotime.tv_sec++; } time_second = tc->microtime.tv_sec = tc->nanotime.tv_sec; timecounter = tc; } static int sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS { return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency, sizeof(timecounter->tweak->frequency), req)); } static int sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS { return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment, sizeof(timecounter->tweak->adjustment), req)); } SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", ""); SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", ""); /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * timeservices. */ static u_int64_t dummy_get_timecount(void) { static u_int64_t now; return (++now); } static struct timecounter dummy_timecounter[3] = { { 0, dummy_get_timecount, ~0, - 100000, + 1000000, "dummy" } }; static void initdummytimecounter(void *dummy) { init_timecounter(dummy_timecounter); } SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL) diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index b8da9b080b99..6337414b9fe5 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,687 +1,686 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 - * $Id: kern_time.c,v 1.48 1998/04/05 10:28:01 phk Exp $ + * $Id: kern_time.c,v 1.49 1998/04/05 11:17:19 peter Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include struct timezone tz; /* * Time of day and interval timer support. * * These routines provide the kernel entry points to get and set * the time-of-day and per-process interval timers. Subroutines * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ static int nanosleep1 __P((struct proc *p, struct timespec *rqt, struct timespec *rmt)); static int settime __P((struct timeval *)); static void timevalfix __P((struct timeval *)); static void no_lease_updatetime __P((int)); static void no_lease_updatetime(deltat) int deltat; { } void (*lease_updatetime) __P((int)) = no_lease_updatetime; static int settime(tv) struct timeval *tv; { struct timeval delta, tv1; struct timespec ts; struct proc *p; int s; s = splclock(); microtime(&tv1); delta = *tv; timevalsub(&delta, &tv1); /* * If the system is secure, we do not allow the time to be * set to an earlier value (it may be slowed using adjtime, * but not set back). This feature prevent interlopers from * setting arbitrary time stamps on files. */ if (delta.tv_sec < 0 && securelevel > 1) { splx(s); return (EPERM); } ts.tv_sec = tv->tv_sec; ts.tv_nsec = tv->tv_usec * 1000; set_timecounter(&ts); (void) splsoftclock(); - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (timerisset(&p->p_realtimer.it_value)) - timevaladd(&p->p_realtimer.it_value, &delta); - } lease_updatetime(delta.tv_sec); splx(s); resettodr(); return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_gettime_args { clockid_t clock_id; struct timespec *tp; }; #endif /* ARGSUSED */ int clock_gettime(p, uap) struct proc *p; struct clock_gettime_args *uap; { struct timespec ats; if (SCARG(uap, clock_id) != CLOCK_REALTIME) return (EINVAL); nanotime(&ats); return (copyout(&ats, SCARG(uap, tp), sizeof(ats))); } #ifndef _SYS_SYSPROTO_H_ struct clock_settime_args { clockid_t clock_id; const struct timespec *tp; }; #endif /* ARGSUSED */ int clock_settime(p, uap) struct proc *p; struct clock_settime_args *uap; { struct timeval atv; struct timespec ats; int error; if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); if (SCARG(uap, clock_id) != CLOCK_REALTIME) return (EINVAL); if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0) return (error); if (ats.tv_nsec < 0 || ats.tv_nsec >= 1000000000) return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, &ats); if ((error = settime(&atv))) return (error); return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_getres_args { clockid_t clock_id; struct timespec *tp; }; #endif int clock_getres(p, uap) struct proc *p; struct clock_getres_args *uap; { struct timespec ts; int error; if (SCARG(uap, clock_id) != CLOCK_REALTIME) return (EINVAL); error = 0; if (SCARG(uap, tp)) { ts.tv_sec = 0; ts.tv_nsec = 1000000000 / timecounter->frequency; error = copyout(&ts, SCARG(uap, tp), sizeof(ts)); } return (error); } static int nanowait; static int nanosleep1(p, rqt, rmt) struct proc *p; struct timespec *rqt, *rmt; { struct timespec ts, ts2; struct timeval tv; int error; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); if (rqt->tv_sec < 0 || rqt->tv_sec == 0 && rqt->tv_nsec == 0) return (0); getnanoruntime(&ts); timespecadd(&ts, rqt); TIMESPEC_TO_TIMEVAL(&tv, rqt); for (;;) { error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp", tvtohz(&tv)); getnanoruntime(&ts2); if (error != EWOULDBLOCK) { if (error == ERESTART) error = EINTR; if (rmt != NULL) { timespecsub(&ts, &ts2); if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } return (error); } if (timespeccmp(&ts2, &ts, >=)) return (0); getnanoruntime(&ts2); timespecsub(&ts2, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts2); } } #ifndef _SYS_SYSPROTO_H_ struct nanosleep_args { struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int nanosleep(p, uap) struct proc *p; struct nanosleep_args *uap; { struct timespec rmt, rqt; int error, error2; error = copyin(SCARG(uap, rqtp), &rqt, sizeof(rqt)); if (error) return (error); if (SCARG(uap, rmtp)) if (!useracc((caddr_t)SCARG(uap, rmtp), sizeof(rmt), B_WRITE)) return (EFAULT); error = nanosleep1(p, &rqt, &rmt); if (error && SCARG(uap, rmtp)) { error2 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt)); if (error2) /* XXX shouldn't happen, did useracc() above */ return (error2); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct signanosleep_args { struct timespec *rqtp; struct timespec *rmtp; sigset_t *mask; }; #endif /* ARGSUSED */ int signanosleep(p, uap) struct proc *p; struct signanosleep_args *uap; { struct timespec rmt, rqt; int error, error2; sigset_t mask; error = copyin(SCARG(uap, rqtp), &rqt, sizeof(rqt)); if (error) return (error); if (SCARG(uap, rmtp)) if (!useracc((caddr_t)SCARG(uap, rmtp), sizeof(rmt), B_WRITE)) return (EFAULT); error = copyin(SCARG(uap, mask), &mask, sizeof(mask)); if (error) return (error); /* change mask for sleep */ p->p_sigmask = mask &~ sigcantmask; error = nanosleep1(p, &rqt, &rmt); if (error && SCARG(uap, rmtp)) { error2 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt)); if (error2) /* XXX shouldn't happen, did useracc() above */ return (error2); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct gettimeofday_args { struct timeval *tp; struct timezone *tzp; }; #endif /* ARGSUSED */ int gettimeofday(p, uap) struct proc *p; register struct gettimeofday_args *uap; { struct timeval atv; int error = 0; if (uap->tp) { microtime(&atv); if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp, sizeof (atv)))) return (error); } if (uap->tzp) error = copyout((caddr_t)&tz, (caddr_t)uap->tzp, sizeof (tz)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct settimeofday_args { struct timeval *tv; struct timezone *tzp; }; #endif /* ARGSUSED */ int settimeofday(p, uap) struct proc *p; struct settimeofday_args *uap; { struct timeval atv; struct timezone atz; int error; if ((error = suser(p->p_ucred, &p->p_acflag))) return (error); /* Verify all parameters before changing time. */ if (uap->tv) { if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof(atv)))) return (error); if (atv.tv_usec < 0 || atv.tv_usec >= 1000000) return (EINVAL); } if (uap->tzp && (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz)))) return (error); if (uap->tv && (error = settime(&atv))) return (error); if (uap->tzp) tz = atz; return (0); } int tickdelta; /* current clock skew, us. per tick */ long timedelta; /* unapplied time correction, us. */ static long bigadj = 1000000; /* use 10x skew above bigadj us. */ #ifndef _SYS_SYSPROTO_H_ struct adjtime_args { struct timeval *delta; struct timeval *olddelta; }; #endif /* ARGSUSED */ int adjtime(p, uap) struct proc *p; register struct adjtime_args *uap; { struct timeval atv; register long ndelta, ntickdelta, odelta; int s, error; if ((error = suser(p->p_ucred, &p->p_acflag))) return (error); if ((error = copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval)))) return (error); /* * Compute the total correction and the rate at which to apply it. * Round the adjustment down to a whole multiple of the per-tick * delta, so that after some number of incremental changes in * hardclock(), tickdelta will become zero, lest the correction * overshoot and start taking us away from the desired final time. */ ndelta = atv.tv_sec * 1000000 + atv.tv_usec; if (ndelta > bigadj || ndelta < -bigadj) ntickdelta = 10 * tickadj; else ntickdelta = tickadj; if (ndelta % ntickdelta) ndelta = ndelta / ntickdelta * ntickdelta; /* * To make hardclock()'s job easier, make the per-tick delta negative * if we want time to run slower; then hardclock can simply compute * tick + tickdelta, and subtract tickdelta from timedelta. */ if (ndelta < 0) ntickdelta = -ntickdelta; s = splclock(); odelta = timedelta; timedelta = ndelta; tickdelta = ntickdelta; splx(s); if (uap->olddelta) { atv.tv_sec = odelta / 1000000; atv.tv_usec = odelta % 1000000; (void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta, sizeof(struct timeval)); } return (0); } /* * Get value of an interval timer. The process virtual and * profiling virtual time timers are kept in the p_stats area, since * they can be swapped out. These are kept internally in the * way they are specified externally: in time until they expire. * * The real time interval timer is kept in the process table slot * for the process, and its value (it_value) is kept as an * absolute time rather than as a delta, so that it is easy to keep * periodic real-time signals from drifting. * * Virtual time timers are processed in the hardclock() routine of * kern_clock.c. The real time timer is processed by a timeout * routine, called from the softclock() routine. Since a callout * may be delayed in real time due to interrupt processing in the system, * it is possible for the real time timeout routine (realitexpire, given below), * to be delayed in real time past when it is supposed to occur. It * does not suffice, therefore, to reload the real timer .it_value from the * real time timers .it_interval. Rather, we compute the next time in * absolute time the timer should go off. */ #ifndef _SYS_SYSPROTO_H_ struct getitimer_args { u_int which; struct itimerval *itv; }; #endif /* ARGSUSED */ int getitimer(p, uap) struct proc *p; register struct getitimer_args *uap; { struct timeval ctv; struct itimerval aitv; int s; if (uap->which > ITIMER_PROF) return (EINVAL); s = splclock(); /* XXX still needed ? */ if (uap->which == ITIMER_REAL) { /* * Convert from absoulte to relative time in .it_value * part of real time timer. If time for real time timer * has passed return 0, else return difference between * current time and time for the timer to go off. */ aitv = p->p_realtimer; if (timerisset(&aitv.it_value)) { - getmicrotime(&ctv); + getmicroruntime(&ctv); if (timercmp(&aitv.it_value, &ctv, <)) timerclear(&aitv.it_value); else timevalsub(&aitv.it_value, &ctv); } } else aitv = p->p_stats->p_timer[uap->which]; splx(s); return (copyout((caddr_t)&aitv, (caddr_t)uap->itv, sizeof (struct itimerval))); } #ifndef _SYS_SYSPROTO_H_ struct setitimer_args { u_int which; struct itimerval *itv, *oitv; }; #endif /* ARGSUSED */ int setitimer(p, uap) struct proc *p; register struct setitimer_args *uap; { struct itimerval aitv; struct timeval ctv; register struct itimerval *itvp; int s, error; if (uap->which > ITIMER_PROF) return (EINVAL); itvp = uap->itv; if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv, sizeof(struct itimerval)))) return (error); if ((uap->itv = uap->oitv) && (error = getitimer(p, (struct getitimer_args *)uap))) return (error); if (itvp == 0) return (0); if (itimerfix(&aitv.it_value)) return (EINVAL); if (!timerisset(&aitv.it_value)) timerclear(&aitv.it_interval); else if (itimerfix(&aitv.it_interval)) return (EINVAL); s = splclock(); /* XXX: still needed ? */ if (uap->which == ITIMER_REAL) { if (timerisset(&p->p_realtimer.it_value)) untimeout(realitexpire, (caddr_t)p, p->p_ithandle); if (timerisset(&aitv.it_value)) p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&aitv.it_value)); + getmicroruntime(&ctv); + timevaladd(&aitv.it_value, &ctv); p->p_realtimer = aitv; } else p->p_stats->p_timer[uap->which] = aitv; splx(s); return (0); } /* * Real interval timer expired: * send process whose timer expired an alarm signal. * If time is not set up to reload, then just return. * Else compute next time timer should go off which is > current time. * This is where delay in processing this timeout causes multiple * SIGALRM calls to be compressed into one. * hzto() always adds 1 to allow for the time until the next clock * interrupt being strictly less than 1 clock tick, but we don't want * that here since we want to appear to be in sync with the clock * interrupt even when we're delayed. */ void realitexpire(arg) void *arg; { register struct proc *p; - struct timeval ctv; + struct timeval ctv, ntv; int s; p = (struct proc *)arg; psignal(p, SIGALRM); if (!timerisset(&p->p_realtimer.it_interval)) { timerclear(&p->p_realtimer.it_value); return; } for (;;) { s = splclock(); /* XXX: still neeeded ? */ timevaladd(&p->p_realtimer.it_value, &p->p_realtimer.it_interval); - getmicrotime(&ctv); + getmicroruntime(&ctv); if (timercmp(&p->p_realtimer.it_value, &ctv, >)) { + ntv = p->p_realtimer.it_value; + timevalsub(&ntv, &ctv); p->p_ithandle = - timeout(realitexpire, (caddr_t)p, - hzto(&p->p_realtimer.it_value) - 1); + timeout(realitexpire, (caddr_t)p, tvtohz(&ntv)); splx(s); return; } splx(s); } } /* * Check that a proposed value to load into the .it_value or * .it_interval part of an interval timer is acceptable, and * fix it to have at least minimal value (i.e. if it is less * than the resolution of the clock, round it up.) */ int itimerfix(tv) struct timeval *tv; { if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) return (EINVAL); if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) tv->tv_usec = tick; return (0); } /* * Decrement an interval timer by a specified number * of microseconds, which must be less than a second, * i.e. < 1000000. If the timer expires, then reload * it. In this case, carry over (usec - old value) to * reduce the value reloaded into the timer so that * the timer does not drift. This routine assumes * that it is called in a context where the timers * on which it is operating cannot change in value. */ int itimerdecr(itp, usec) register struct itimerval *itp; int usec; { if (itp->it_value.tv_usec < usec) { if (itp->it_value.tv_sec == 0) { /* expired, and already in next interval */ usec -= itp->it_value.tv_usec; goto expire; } itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } itp->it_value.tv_usec -= usec; usec = 0; if (timerisset(&itp->it_value)) return (1); /* expired, exactly at end of interval */ expire: if (timerisset(&itp->it_interval)) { itp->it_value = itp->it_interval; itp->it_value.tv_usec -= usec; if (itp->it_value.tv_usec < 0) { itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } } else itp->it_value.tv_usec = 0; /* sec is already 0 */ return (0); } /* * Add and subtract routines for timevals. * N.B.: subtract routine doesn't deal with * results which are before the beginning, * it just gets very confused in this case. * Caveat emptor. */ void timevaladd(t1, t2) struct timeval *t1, *t2; { t1->tv_sec += t2->tv_sec; t1->tv_usec += t2->tv_usec; timevalfix(t1); } void timevalsub(t1, t2) struct timeval *t1, *t2; { t1->tv_sec -= t2->tv_sec; t1->tv_usec -= t2->tv_usec; timevalfix(t1); } static void timevalfix(t1) struct timeval *t1; { if (t1->tv_usec < 0) { t1->tv_sec--; t1->tv_usec += 1000000; } if (t1->tv_usec >= 1000000) { t1->tv_sec++; t1->tv_usec -= 1000000; } }