Index: head/sys/kern/kern_time.c =================================================================== --- head/sys/kern/kern_time.c (revision 302769) +++ head/sys/kern/kern_time.c (revision 302770) @@ -1,1662 +1,1679 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); +#include "opt_ktrace.h" + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#ifdef KTRACE +#include +#endif #include #include #define MAX_CLOCKS (CLOCK_MONOTONIC+1) #define CPUCLOCK_BIT 0x80000000 #define CPUCLOCK_PROCESS_BIT 0x40000000 #define CPUCLOCK_ID_MASK (~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT)) #define MAKE_THREAD_CPUCLOCK(tid) (CPUCLOCK_BIT|(tid)) #define MAKE_PROCESS_CPUCLOCK(pid) \ (CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid)) static struct kclock posix_clocks[MAX_CLOCKS]; static uma_zone_t itimer_zone = NULL; /* * Time of day and interval timer support. * * These routines provide the kernel entry points to get and set * the time-of-day and per-process interval timers. Subroutines * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ static int settime(struct thread *, struct timeval *); static void timevalfix(struct timeval *); static void itimer_start(void); static int itimer_init(void *, int, int); static void itimer_fini(void *, int); static void itimer_enter(struct itimer *); static void itimer_leave(struct itimer *); static struct itimer *itimer_find(struct proc *, int); static void itimers_alloc(struct proc *); static void itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp); static void itimers_event_hook_exit(void *arg, struct proc *p); static int realtimer_create(struct itimer *); static int realtimer_gettime(struct itimer *, struct itimerspec *); static int realtimer_settime(struct itimer *, int, struct itimerspec *, struct itimerspec *); static int realtimer_delete(struct itimer *); static void realtimer_clocktime(clockid_t, struct timespec *); static void realtimer_expire(void *); int register_posix_clock(int, struct kclock *); void itimer_fire(struct itimer *it); int itimespecfix(struct timespec *ts); #define CLOCK_CALL(clock, call, arglist) \ ((*posix_clocks[clock].call) arglist) SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL); static int settime(struct thread *td, struct timeval *tv) { struct timeval delta, tv1, tv2; static struct timeval maxtime, laststep; struct timespec ts; int s; s = splclock(); microtime(&tv1); delta = *tv; timevalsub(&delta, &tv1); /* * If the system is secure, we do not allow the time to be * set to a value earlier than 1 second less than the highest * time we have yet seen. The worst a miscreant can do in * this circumstance is "freeze" time. He couldn't go * back to the past. * * We similarly do not allow the clock to be stepped more * than one second, nor more than once per second. This allows * a miscreant to make the clock march double-time, but no worse. */ if (securelevel_gt(td->td_ucred, 1) != 0) { if (delta.tv_sec < 0 || delta.tv_usec < 0) { /* * Update maxtime to latest time we've seen. */ if (tv1.tv_sec > maxtime.tv_sec) maxtime = tv1; tv2 = *tv; timevalsub(&tv2, &maxtime); if (tv2.tv_sec < -1) { tv->tv_sec = maxtime.tv_sec - 1; printf("Time adjustment clamped to -1 second\n"); } } else { if (tv1.tv_sec == laststep.tv_sec) { splx(s); return (EPERM); } if (delta.tv_sec > 1) { tv->tv_sec = tv1.tv_sec + 1; printf("Time adjustment clamped to +1 second\n"); } laststep = *tv; } } ts.tv_sec = tv->tv_sec; ts.tv_nsec = tv->tv_usec * 1000; mtx_lock(&Giant); tc_setclock(&ts); resettodr(); mtx_unlock(&Giant); return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_getcpuclockid2_args { id_t id; int which, clockid_t *clock_id; }; #endif /* ARGSUSED */ int sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id) { struct proc *p; pid_t pid; lwpid_t tid; int error; switch (which) { case CPUCLOCK_WHICH_PID: if (id != 0) { error = pget(id, PGET_CANSEE | PGET_NOTID, &p); if (error != 0) return (error); PROC_UNLOCK(p); pid = id; } else { pid = td->td_proc->p_pid; } *clk_id = MAKE_PROCESS_CPUCLOCK(pid); return (0); case CPUCLOCK_WHICH_TID: tid = id == 0 ? td->td_tid : id; *clk_id = MAKE_THREAD_CPUCLOCK(tid); return (0); default: return (EINVAL); } } #ifndef _SYS_SYSPROTO_H_ struct clock_gettime_args { clockid_t clock_id; struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap) { struct timespec ats; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) error = copyout(&ats, uap->tp, sizeof(ats)); return (error); } static inline void cputick2timespec(uint64_t runtime, struct timespec *ats) { runtime = cputick2usec(runtime); ats->tv_sec = runtime / 1000000; ats->tv_nsec = runtime % 1000000 * 1000; } static void get_thread_cputime(struct thread *targettd, struct timespec *ats) { uint64_t runtime, curtime, switchtime; if (targettd == NULL) { /* current thread */ critical_enter(); switchtime = PCPU_GET(switchtime); curtime = cpu_ticks(); runtime = curthread->td_runtime; critical_exit(); runtime += curtime - switchtime; } else { thread_lock(targettd); runtime = targettd->td_runtime; thread_unlock(targettd); } cputick2timespec(runtime, ats); } static void get_process_cputime(struct proc *targetp, struct timespec *ats) { uint64_t runtime; struct rusage ru; PROC_STATLOCK(targetp); rufetch(targetp, &ru); runtime = targetp->p_rux.rux_runtime; PROC_STATUNLOCK(targetp); cputick2timespec(runtime, ats); } static int get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct proc *p, *p2; struct thread *td2; lwpid_t tid; pid_t pid; int error; p = td->td_proc; if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) { tid = clock_id & CPUCLOCK_ID_MASK; td2 = tdfind(tid, p->p_pid); if (td2 == NULL) return (EINVAL); get_thread_cputime(td2, ats); PROC_UNLOCK(td2->td_proc); } else { pid = clock_id & CPUCLOCK_ID_MASK; error = pget(pid, PGET_CANSEE, &p2); if (error != 0) return (EINVAL); get_process_cputime(p2, ats); PROC_UNLOCK(p2); } return (0); } int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval sys, user; struct proc *p; p = td->td_proc; switch (clock_id) { case CLOCK_REALTIME: /* Default to precise. */ case CLOCK_REALTIME_PRECISE: nanotime(ats); break; case CLOCK_REALTIME_FAST: getnanotime(ats); break; case CLOCK_VIRTUAL: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_PROF: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); timevaladd(&user, &sys); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_MONOTONIC: /* Default to precise. */ case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: nanouptime(ats); break; case CLOCK_UPTIME_FAST: case CLOCK_MONOTONIC_FAST: getnanouptime(ats); break; case CLOCK_SECOND: ats->tv_sec = time_second; ats->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: get_thread_cputime(NULL, ats); break; case CLOCK_PROCESS_CPUTIME_ID: PROC_LOCK(p); get_process_cputime(p, ats); PROC_UNLOCK(p); break; default: if ((int)clock_id >= 0) return (EINVAL); return (get_cputime(td, clock_id, ats)); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_settime_args { clockid_t clock_id; const struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_settime(struct thread *td, struct clock_settime_args *uap) { struct timespec ats; int error; if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) return (error); return (kern_clock_settime(td, uap->clock_id, &ats)); } int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval atv; int error; if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) return (error); if (clock_id != CLOCK_REALTIME) return (EINVAL); if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 || ats->tv_sec < 0) return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, ats); error = settime(td, &atv); return (error); } #ifndef _SYS_SYSPROTO_H_ struct clock_getres_args { clockid_t clock_id; struct timespec *tp; }; #endif int sys_clock_getres(struct thread *td, struct clock_getres_args *uap) { struct timespec ts; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) error = copyout(&ts, uap->tp, sizeof(ts)); return (error); } int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) { ts->tv_sec = 0; switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_FAST: case CLOCK_REALTIME_PRECISE: case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_FAST: case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_FAST: case CLOCK_UPTIME_PRECISE: /* * Round up the result of the division cheaply by adding 1. * Rounding up is especially important if rounding down * would give 0. Perfect rounding is unimportant. */ ts->tv_nsec = 1000000000 / tc_getfrequency() + 1; break; case CLOCK_VIRTUAL: case CLOCK_PROF: /* Accurately round up here because we can do so cheaply. */ ts->tv_nsec = howmany(1000000000, hz); break; case CLOCK_SECOND: ts->tv_sec = 1; ts->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: case CLOCK_PROCESS_CPUTIME_ID: cputime: /* sync with cputick2usec */ ts->tv_nsec = 1000000 / cpu_tickrate(); if (ts->tv_nsec == 0) ts->tv_nsec = 1000; break; default: if ((int)clock_id < 0) goto cputime; return (EINVAL); } return (0); } static uint8_t nanowait[MAXCPU]; int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) { struct timespec ts; sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) return (0); ts = *rqt; if (ts.tv_sec > INT32_MAX / 2) { over = ts.tv_sec - INT32_MAX / 2; ts.tv_sec -= over; } else over = 0; tmp = tstosbt(ts); prec = tmp; prec >>= tc_precexp; if (TIMESEL(&sbt, tmp)) sbt += tc_tick_sbt; sbt += tmp; error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", sbt, prec, C_ABSOLUTE); if (error != EWOULDBLOCK) { if (error == ERESTART) error = EINTR; TIMESEL(&sbtt, tmp); if (rmt != NULL) { ts = sbttots(sbt - sbtt); ts.tv_sec += over; if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } if (sbtt >= sbt) return (0); return (error); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct nanosleep_args { struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int sys_nanosleep(struct thread *td, struct nanosleep_args *uap) { struct timespec rmt, rqt; int error; error = copyin(uap->rqtp, &rqt, sizeof(rqt)); if (error) return (error); if (uap->rmtp && !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_nanosleep(td, &rqt, &rmt); if (error && uap->rmtp) { int error2; error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); if (error2) error = error2; } return (error); } #ifndef _SYS_SYSPROTO_H_ struct gettimeofday_args { struct timeval *tp; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap) { struct timeval atv; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); error = copyout(&atv, uap->tp, sizeof (atv)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct settimeofday_args { struct timeval *tv; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_settimeofday(struct thread *td, struct settimeofday_args *uap) { struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &atv, sizeof(atv)); if (error) return (error); tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp) { int error; error = priv_check(td, PRIV_SETTIMEOFDAY); if (error) return (error); /* Verify all parameters before changing time. */ if (tv) { if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 || tv->tv_sec < 0) return (EINVAL); error = settime(td, tv); } if (tzp && error == 0) { tz_minuteswest = tzp->tz_minuteswest; tz_dsttime = tzp->tz_dsttime; } return (error); } /* * Get value of an interval timer. The process virtual and profiling virtual * time timers are kept in the p_stats area, since they can be swapped out. * These are kept internally in the way they are specified externally: in * time until they expire. * * The real time interval timer is kept in the process table slot for the * process, and its value (it_value) is kept as an absolute time rather than * as a delta, so that it is easy to keep periodic real-time signals from * drifting. * * Virtual time timers are processed in the hardclock() routine of * kern_clock.c. The real time timer is processed by a timeout routine, * called from the softclock() routine. Since a callout may be delayed in * real time due to interrupt processing in the system, it is possible for * the real time timeout routine (realitexpire, given below), to be delayed * in real time past when it is supposed to occur. It does not suffice, * therefore, to reload the real timer .it_value from the real time timers * .it_interval. Rather, we compute the next time in absolute time the timer * should go off. */ #ifndef _SYS_SYSPROTO_H_ struct getitimer_args { u_int which; struct itimerval *itv; }; #endif int sys_getitimer(struct thread *td, struct getitimer_args *uap) { struct itimerval aitv; int error; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); } int kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv) { struct proc *p = td->td_proc; struct timeval ctv; if (which > ITIMER_PROF) return (EINVAL); if (which == ITIMER_REAL) { /* * Convert from absolute to relative time in .it_value * part of real time timer. If time for real time timer * has passed return 0, else return difference between * current time and time for the timer to go off. */ PROC_LOCK(p); *aitv = p->p_realtimer; PROC_UNLOCK(p); if (timevalisset(&aitv->it_value)) { microuptime(&ctv); if (timevalcmp(&aitv->it_value, &ctv, <)) timevalclear(&aitv->it_value); else timevalsub(&aitv->it_value, &ctv); } } else { PROC_ITIMLOCK(p); *aitv = p->p_stats->p_timer[which]; PROC_ITIMUNLOCK(p); } +#ifdef KTRACE + if (KTRPOINT(td, KTR_STRUCT)) + ktritimerval(aitv); +#endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct setitimer_args { u_int which; struct itimerval *itv, *oitv; }; #endif int sys_setitimer(struct thread *td, struct setitimer_args *uap) { struct itimerval aitv, oitv; int error; if (uap->itv == NULL) { uap->itv = uap->oitv; return (sys_getitimer(td, (struct getitimer_args *)uap)); } if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval)))) return (error); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); return (copyout(&oitv, uap->oitv, sizeof(struct itimerval))); } int kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv, struct itimerval *oitv) { struct proc *p = td->td_proc; struct timeval ctv; sbintime_t sbt, pr; if (aitv == NULL) return (kern_getitimer(td, which, oitv)); if (which > ITIMER_PROF) return (EINVAL); +#ifdef KTRACE + if (KTRPOINT(td, KTR_STRUCT)) + ktritimerval(aitv); +#endif if (itimerfix(&aitv->it_value) || aitv->it_value.tv_sec > INT32_MAX / 2) return (EINVAL); if (!timevalisset(&aitv->it_value)) timevalclear(&aitv->it_interval); else if (itimerfix(&aitv->it_interval) || aitv->it_interval.tv_sec > INT32_MAX / 2) return (EINVAL); if (which == ITIMER_REAL) { PROC_LOCK(p); if (timevalisset(&p->p_realtimer.it_value)) callout_stop(&p->p_itcallout); microuptime(&ctv); if (timevalisset(&aitv->it_value)) { pr = tvtosbt(aitv->it_value) >> tc_precexp; timevaladd(&aitv->it_value, &ctv); sbt = tvtosbt(aitv->it_value); callout_reset_sbt(&p->p_itcallout, sbt, pr, realitexpire, p, C_ABSOLUTE); } *oitv = p->p_realtimer; p->p_realtimer = *aitv; PROC_UNLOCK(p); if (timevalisset(&oitv->it_value)) { if (timevalcmp(&oitv->it_value, &ctv, <)) timevalclear(&oitv->it_value); else timevalsub(&oitv->it_value, &ctv); } } else { if (aitv->it_interval.tv_sec == 0 && aitv->it_interval.tv_usec != 0 && aitv->it_interval.tv_usec < tick) aitv->it_interval.tv_usec = tick; if (aitv->it_value.tv_sec == 0 && aitv->it_value.tv_usec != 0 && aitv->it_value.tv_usec < tick) aitv->it_value.tv_usec = tick; PROC_ITIMLOCK(p); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; PROC_ITIMUNLOCK(p); } +#ifdef KTRACE + if (KTRPOINT(td, KTR_STRUCT)) + ktritimerval(oitv); +#endif return (0); } /* * Real interval timer expired: * send process whose timer expired an alarm signal. * If time is not set up to reload, then just return. * Else compute next time timer should go off which is > current time. * This is where delay in processing this timeout causes multiple * SIGALRM calls to be compressed into one. * tvtohz() always adds 1 to allow for the time until the next clock * interrupt being strictly less than 1 clock tick, but we don't want * that here since we want to appear to be in sync with the clock * interrupt even when we're delayed. */ void realitexpire(void *arg) { struct proc *p; struct timeval ctv; sbintime_t isbt; p = (struct proc *)arg; kern_psignal(p, SIGALRM); if (!timevalisset(&p->p_realtimer.it_interval)) { timevalclear(&p->p_realtimer.it_value); if (p->p_flag & P_WEXIT) wakeup(&p->p_itcallout); return; } isbt = tvtosbt(p->p_realtimer.it_interval); if (isbt >= sbt_timethreshold) getmicrouptime(&ctv); else microuptime(&ctv); do { timevaladd(&p->p_realtimer.it_value, &p->p_realtimer.it_interval); } while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=)); callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value), isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE); } /* * Check that a proposed value to load into the .it_value or * .it_interval part of an interval timer is acceptable, and * fix it to have at least minimal value (i.e. if it is less * than the resolution of the clock, round it up.) */ int itimerfix(struct timeval *tv) { if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) return (EINVAL); if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < (u_int)tick / 16) tv->tv_usec = (u_int)tick / 16; return (0); } /* * Decrement an interval timer by a specified number * of microseconds, which must be less than a second, * i.e. < 1000000. If the timer expires, then reload * it. In this case, carry over (usec - old value) to * reduce the value reloaded into the timer so that * the timer does not drift. This routine assumes * that it is called in a context where the timers * on which it is operating cannot change in value. */ int itimerdecr(struct itimerval *itp, int usec) { if (itp->it_value.tv_usec < usec) { if (itp->it_value.tv_sec == 0) { /* expired, and already in next interval */ usec -= itp->it_value.tv_usec; goto expire; } itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } itp->it_value.tv_usec -= usec; usec = 0; if (timevalisset(&itp->it_value)) return (1); /* expired, exactly at end of interval */ expire: if (timevalisset(&itp->it_interval)) { itp->it_value = itp->it_interval; itp->it_value.tv_usec -= usec; if (itp->it_value.tv_usec < 0) { itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } } else itp->it_value.tv_usec = 0; /* sec is already 0 */ return (0); } /* * Add and subtract routines for timevals. * N.B.: subtract routine doesn't deal with * results which are before the beginning, * it just gets very confused in this case. * Caveat emptor. */ void timevaladd(struct timeval *t1, const struct timeval *t2) { t1->tv_sec += t2->tv_sec; t1->tv_usec += t2->tv_usec; timevalfix(t1); } void timevalsub(struct timeval *t1, const struct timeval *t2) { t1->tv_sec -= t2->tv_sec; t1->tv_usec -= t2->tv_usec; timevalfix(t1); } static void timevalfix(struct timeval *t1) { if (t1->tv_usec < 0) { t1->tv_sec--; t1->tv_usec += 1000000; } if (t1->tv_usec >= 1000000) { t1->tv_sec++; t1->tv_usec -= 1000000; } } /* * ratecheck(): simple time-based rate-limit checking. */ int ratecheck(struct timeval *lasttime, const struct timeval *mininterval) { struct timeval tv, delta; int rv = 0; getmicrouptime(&tv); /* NB: 10ms precision */ delta = tv; timevalsub(&delta, lasttime); /* * check for 0,0 is so that the message will be seen at least once, * even if interval is huge. */ if (timevalcmp(&delta, mininterval, >=) || (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { *lasttime = tv; rv = 1; } return (rv); } /* * ppsratecheck(): packets (or events) per second limitation. * * Return 0 if the limit is to be enforced (e.g. the caller * should drop a packet because of the rate limitation). * * maxpps of 0 always causes zero to be returned. maxpps of -1 * always causes 1 to be returned; this effectively defeats rate * limiting. * * Note that we maintain the struct timeval for compatibility * with other bsd systems. We reuse the storage and just monitor * clock ticks for minimal overhead. */ int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) { int now; /* * Reset the last time and counter if this is the first call * or more than a second has passed since the last update of * lasttime. */ now = ticks; if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { lasttime->tv_sec = now; *curpps = 1; return (maxpps != 0); } else { (*curpps)++; /* NB: ignore potential overflow */ return (maxpps < 0 || *curpps <= maxpps); } } static void itimer_start(void) { struct kclock rt_clock = { .timer_create = realtimer_create, .timer_delete = realtimer_delete, .timer_settime = realtimer_settime, .timer_gettime = realtimer_gettime, .event_hook = NULL }; itimer_zone = uma_zcreate("itimer", sizeof(struct itimer), NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0); register_posix_clock(CLOCK_REALTIME, &rt_clock); register_posix_clock(CLOCK_MONOTONIC, &rt_clock); p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L); p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX); p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX); EVENTHANDLER_REGISTER(process_exit, itimers_event_hook_exit, (void *)ITIMER_EV_EXIT, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(process_exec, itimers_event_hook_exec, (void *)ITIMER_EV_EXEC, EVENTHANDLER_PRI_ANY); } int register_posix_clock(int clockid, struct kclock *clk) { if ((unsigned)clockid >= MAX_CLOCKS) { printf("%s: invalid clockid\n", __func__); return (0); } posix_clocks[clockid] = *clk; return (1); } static int itimer_init(void *mem, int size, int flags) { struct itimer *it; it = (struct itimer *)mem; mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF); return (0); } static void itimer_fini(void *mem, int size) { struct itimer *it; it = (struct itimer *)mem; mtx_destroy(&it->it_mtx); } static void itimer_enter(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); it->it_usecount++; } static void itimer_leave(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); KASSERT(it->it_usecount > 0, ("invalid it_usecount")); if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0) wakeup(it); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_create_args { clockid_t clock_id; struct sigevent * evp; int * timerid; }; #endif int sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap) { struct sigevent *evp, ev; int id; int error; if (uap->evp == NULL) { evp = NULL; } else { error = copyin(uap->evp, &ev, sizeof(ev)); if (error != 0) return (error); evp = &ev; } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id) { struct proc *p = td->td_proc; struct itimer *it; int id; int error; if (clock_id < 0 || clock_id >= MAX_CLOCKS) return (EINVAL); if (posix_clocks[clock_id].timer_create == NULL) return (EINVAL); if (evp != NULL) { if (evp->sigev_notify != SIGEV_NONE && evp->sigev_notify != SIGEV_SIGNAL && evp->sigev_notify != SIGEV_THREAD_ID) return (EINVAL); if ((evp->sigev_notify == SIGEV_SIGNAL || evp->sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(evp->sigev_signo)) return (EINVAL); } if (p->p_itimers == NULL) itimers_alloc(p); it = uma_zalloc(itimer_zone, M_WAITOK); it->it_flags = 0; it->it_usecount = 0; it->it_active = 0; timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); it->it_overrun = 0; it->it_overrun_last = 0; it->it_clockid = clock_id; it->it_timerid = -1; it->it_proc = p; ksiginfo_init(&it->it_ksi); it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT; error = CLOCK_CALL(clock_id, timer_create, (it)); if (error != 0) goto out; PROC_LOCK(p); if (preset_id != -1) { KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id")); id = preset_id; if (p->p_itimers->its_timers[id] != NULL) { PROC_UNLOCK(p); error = 0; goto out; } } else { /* * Find a free timer slot, skipping those reserved * for setitimer(). */ for (id = 3; id < TIMER_MAX; id++) if (p->p_itimers->its_timers[id] == NULL) break; if (id == TIMER_MAX) { PROC_UNLOCK(p); error = EAGAIN; goto out; } } it->it_timerid = id; p->p_itimers->its_timers[id] = it; if (evp != NULL) it->it_sigev = *evp; else { it->it_sigev.sigev_notify = SIGEV_SIGNAL; switch (clock_id) { default: case CLOCK_REALTIME: it->it_sigev.sigev_signo = SIGALRM; break; case CLOCK_VIRTUAL: it->it_sigev.sigev_signo = SIGVTALRM; break; case CLOCK_PROF: it->it_sigev.sigev_signo = SIGPROF; break; } it->it_sigev.sigev_value.sival_int = id; } if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { it->it_ksi.ksi_signo = it->it_sigev.sigev_signo; it->it_ksi.ksi_code = SI_TIMER; it->it_ksi.ksi_value = it->it_sigev.sigev_value; it->it_ksi.ksi_timerid = id; } PROC_UNLOCK(p); *timerid = id; return (0); out: ITIMER_LOCK(it); CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); uma_zfree(itimer_zone, it); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_delete_args { int timerid; }; #endif int sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) { return (kern_ktimer_delete(td, uap->timerid)); } static struct itimer * itimer_find(struct proc *p, int timerid) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_itimers == NULL) || (timerid < 0) || (timerid >= TIMER_MAX) || (it = p->p_itimers->its_timers[timerid]) == NULL) { return (NULL); } ITIMER_LOCK(it); if ((it->it_flags & ITF_DELETING) != 0) { ITIMER_UNLOCK(it); it = NULL; } return (it); } int kern_ktimer_delete(struct thread *td, int timerid) { struct proc *p = td->td_proc; struct itimer *it; PROC_LOCK(p); it = itimer_find(p, timerid); if (it == NULL) { PROC_UNLOCK(p); return (EINVAL); } PROC_UNLOCK(p); it->it_flags |= ITF_DELETING; while (it->it_usecount > 0) { it->it_flags |= ITF_WANTED; msleep(it, &it->it_mtx, PPAUSE, "itimer", 0); } it->it_flags &= ~ITF_WANTED; CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); PROC_LOCK(p); if (KSI_ONQ(&it->it_ksi)) sigqueue_take(&it->it_ksi); p->p_itimers->its_timers[timerid] = NULL; PROC_UNLOCK(p); uma_zfree(itimer_zone, it); return (0); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_settime_args { int timerid; int flags; const struct itimerspec * value; struct itimerspec * ovalue; }; #endif int sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) { struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val, sizeof(val)); if (error != 0) return (error); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) error = copyout(ovalp, uap->ovalue, sizeof(*ovalp)); return (error); } int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_settime, (it, flags, val, oval)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_gettime_args { int timerid; struct itimerspec * value; }; #endif int sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) { struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) error = copyout(&val, uap->value, sizeof(val)); return (error); } int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct timer_getoverrun_args { int timerid; }; #endif int sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap) { return (kern_ktimer_getoverrun(td, uap->timerid)); } int kern_ktimer_getoverrun(struct thread *td, int timer_id) { struct proc *p = td->td_proc; struct itimer *it; int error ; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { td->td_retval[0] = it->it_overrun_last; ITIMER_UNLOCK(it); PROC_UNLOCK(p); error = 0; } return (error); } static int realtimer_create(struct itimer *it) { callout_init_mtx(&it->it_callout, &it->it_mtx, 0); return (0); } static int realtimer_delete(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); /* * clear timer's value and interval to tell realtimer_expire * to not rearm the timer. */ timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); ITIMER_UNLOCK(it); callout_drain(&it->it_callout); ITIMER_LOCK(it); return (0); } static int realtimer_gettime(struct itimer *it, struct itimerspec *ovalue) { struct timespec cts; mtx_assert(&it->it_mtx, MA_OWNED); realtimer_clocktime(it->it_clockid, &cts); *ovalue = it->it_time; if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) { timespecsub(&ovalue->it_value, &cts); if (ovalue->it_value.tv_sec < 0 || (ovalue->it_value.tv_sec == 0 && ovalue->it_value.tv_nsec == 0)) { ovalue->it_value.tv_sec = 0; ovalue->it_value.tv_nsec = 1; } } return (0); } static int realtimer_settime(struct itimer *it, int flags, struct itimerspec *value, struct itimerspec *ovalue) { struct timespec cts, ts; struct timeval tv; struct itimerspec val; mtx_assert(&it->it_mtx, MA_OWNED); val = *value; if (itimespecfix(&val.it_value)) return (EINVAL); if (timespecisset(&val.it_value)) { if (itimespecfix(&val.it_interval)) return (EINVAL); } else { timespecclear(&val.it_interval); } if (ovalue != NULL) realtimer_gettime(it, ovalue); it->it_time = val; if (timespecisset(&val.it_value)) { realtimer_clocktime(it->it_clockid, &cts); ts = val.it_value; if ((flags & TIMER_ABSTIME) == 0) { /* Convert to absolute time. */ timespecadd(&it->it_time.it_value, &cts); } else { timespecsub(&ts, &cts); /* * We don't care if ts is negative, tztohz will * fix it. */ } TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } else { callout_stop(&it->it_callout); } return (0); } static void realtimer_clocktime(clockid_t id, struct timespec *ts) { if (id == CLOCK_REALTIME) getnanotime(ts); else /* CLOCK_MONOTONIC */ getnanouptime(ts); } int itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); it = itimer_find(p, timerid); if (it != NULL) { ksi->ksi_overrun = it->it_overrun; it->it_overrun_last = it->it_overrun; it->it_overrun = 0; ITIMER_UNLOCK(it); return (0); } return (EINVAL); } int itimespecfix(struct timespec *ts) { if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) return (EINVAL); if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000) ts->tv_nsec = tick * 1000; return (0); } /* Timeout callback for realtime timer */ static void realtimer_expire(void *arg) { struct timespec cts, ts; struct timeval tv; struct itimer *it; it = (struct itimer *)arg; realtimer_clocktime(it->it_clockid, &cts); /* Only fire if time is reached. */ if (timespeccmp(&cts, &it->it_time.it_value, >=)) { if (timespecisset(&it->it_time.it_interval)) { timespecadd(&it->it_time.it_value, &it->it_time.it_interval); while (timespeccmp(&cts, &it->it_time.it_value, >=)) { if (it->it_overrun < INT_MAX) it->it_overrun++; else it->it_ksi.ksi_errno = ERANGE; timespecadd(&it->it_time.it_value, &it->it_time.it_interval); } } else { /* single shot timer ? */ timespecclear(&it->it_time.it_value); } if (timespecisset(&it->it_time.it_value)) { ts = it->it_time.it_value; timespecsub(&ts, &cts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } itimer_enter(it); ITIMER_UNLOCK(it); itimer_fire(it); ITIMER_LOCK(it); itimer_leave(it); } else if (timespecisset(&it->it_time.it_value)) { ts = it->it_time.it_value; timespecsub(&ts, &cts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } } void itimer_fire(struct itimer *it) { struct proc *p = it->it_proc; struct thread *td; if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { if (sigev_findtd(p, &it->it_sigev, &td) != 0) { ITIMER_LOCK(it); timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); callout_stop(&it->it_callout); ITIMER_UNLOCK(it); return; } if (!KSI_ONQ(&it->it_ksi)) { it->it_ksi.ksi_errno = 0; ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev); tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi); } else { if (it->it_overrun < INT_MAX) it->it_overrun++; else it->it_ksi.ksi_errno = ERANGE; } PROC_UNLOCK(p); } } static void itimers_alloc(struct proc *p) { struct itimers *its; int i; its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO); LIST_INIT(&its->its_virtual); LIST_INIT(&its->its_prof); TAILQ_INIT(&its->its_worklist); for (i = 0; i < TIMER_MAX; i++) its->its_timers[i] = NULL; PROC_LOCK(p); if (p->p_itimers == NULL) { p->p_itimers = its; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); free(its, M_SUBPROC); } } static void itimers_event_hook_exec(void *arg, struct proc *p, struct image_params *imgp __unused) { itimers_event_hook_exit(arg, p); } /* Clean up timers when some process events are being triggered. */ static void itimers_event_hook_exit(void *arg, struct proc *p) { struct itimers *its; struct itimer *it; int event = (int)(intptr_t)arg; int i; if (p->p_itimers != NULL) { its = p->p_itimers; for (i = 0; i < MAX_CLOCKS; ++i) { if (posix_clocks[i].event_hook != NULL) CLOCK_CALL(i, event_hook, (p, i, event)); } /* * According to susv3, XSI interval timers should be inherited * by new image. */ if (event == ITIMER_EV_EXEC) i = 3; else if (event == ITIMER_EV_EXIT) i = 0; else panic("unhandled event"); for (; i < TIMER_MAX; ++i) { if ((it = its->its_timers[i]) != NULL) kern_ktimer_delete(curthread, i); } if (its->its_timers[0] == NULL && its->its_timers[1] == NULL && its->its_timers[2] == NULL) { free(its, M_SUBPROC); p->p_itimers = NULL; } } } Index: head/sys/sys/ktrace.h =================================================================== --- head/sys/sys/ktrace.h (revision 302769) +++ head/sys/sys/ktrace.h (revision 302770) @@ -1,289 +1,291 @@ /*- * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ktrace.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_KTRACE_H_ #define _SYS_KTRACE_H_ #include /* * operations to ktrace system call (KTROP(op)) */ #define KTROP_SET 0 /* set trace points */ #define KTROP_CLEAR 1 /* clear trace points */ #define KTROP_CLEARFILE 2 /* stop all tracing to file */ #define KTROP(o) ((o)&3) /* macro to extract operation */ /* * flags (ORed in with operation) */ #define KTRFLAG_DESCEND 4 /* perform op on all children too */ /* * ktrace record header */ struct ktr_header { int ktr_len; /* length of buf */ short ktr_type; /* trace record type */ pid_t ktr_pid; /* process id */ char ktr_comm[MAXCOMLEN + 1];/* command name */ struct timeval ktr_time; /* timestamp */ intptr_t ktr_tid; /* was ktr_buffer */ }; /* * Test for kernel trace point (MP SAFE). * * KTRCHECK() just checks that the type is enabled and is only for * internal use in the ktrace subsystem. KTRPOINT() checks against * ktrace recursion as well as checking that the type is enabled and * is the public interface. */ #define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type)) #define KTRPOINT(td, type) \ (KTRCHECK((td), (type)) && !((td)->td_pflags & TDP_INKTRACE)) #define KTRCHECKDRAIN(td) (!(STAILQ_EMPTY(&(td)->td_proc->p_ktr))) #define KTRUSERRET(td) do { \ if (KTRCHECKDRAIN(td)) \ ktruserret(td); \ } while (0) /* * ktrace record types */ /* * KTR_SYSCALL - system call record */ #define KTR_SYSCALL 1 struct ktr_syscall { short ktr_code; /* syscall number */ short ktr_narg; /* number of arguments */ /* * followed by ktr_narg register_t */ register_t ktr_args[1]; }; /* * KTR_SYSRET - return from system call record */ #define KTR_SYSRET 2 struct ktr_sysret { short ktr_code; short ktr_eosys; int ktr_error; register_t ktr_retval; }; /* * KTR_NAMEI - namei record */ #define KTR_NAMEI 3 /* record contains pathname */ /* * KTR_GENIO - trace generic process i/o */ #define KTR_GENIO 4 struct ktr_genio { int ktr_fd; enum uio_rw ktr_rw; /* * followed by data successfully read/written */ }; /* * KTR_PSIG - trace processed signal */ #define KTR_PSIG 5 struct ktr_psig { int signo; sig_t action; int code; sigset_t mask; }; /* * KTR_CSW - trace context switches */ #define KTR_CSW 6 struct ktr_csw_old { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ }; struct ktr_csw { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ char wmesg[8]; }; /* * KTR_USER - data coming from userland */ #define KTR_USER_MAXLEN 2048 /* maximum length of passed data */ #define KTR_USER 7 /* * KTR_STRUCT - misc. structs */ #define KTR_STRUCT 8 /* * record contains null-terminated struct name followed by * struct contents */ struct sockaddr; struct stat; struct sysentvec; /* * KTR_SYSCTL - name of a sysctl MIB */ #define KTR_SYSCTL 9 /* record contains null-terminated MIB name */ /* * KTR_PROCCTOR - trace process creation (multiple ABI support) */ #define KTR_PROCCTOR 10 struct ktr_proc_ctor { u_int sv_flags; /* struct sysentvec sv_flags copy */ }; /* * KTR_PROCDTOR - trace process destruction (multiple ABI support) */ #define KTR_PROCDTOR 11 /* * KTR_CAPFAIL - trace capability check failures */ #define KTR_CAPFAIL 12 enum ktr_cap_fail_type { CAPFAIL_NOTCAPABLE, /* insufficient capabilities in cap_check() */ CAPFAIL_INCREASE, /* attempt to increase capabilities */ CAPFAIL_SYSCALL, /* disallowed system call */ CAPFAIL_LOOKUP, /* disallowed VFS lookup */ }; struct ktr_cap_fail { enum ktr_cap_fail_type cap_type; cap_rights_t cap_needed; cap_rights_t cap_held; }; /* * KTR_FAULT - page fault record */ #define KTR_FAULT 13 struct ktr_fault { vm_offset_t vaddr; int type; }; /* * KTR_FAULTEND - end of page fault record */ #define KTR_FAULTEND 14 struct ktr_faultend { int result; }; /* * KTR_DROP - If this bit is set in ktr_type, then at least one event * between the previous record and this record was dropped. */ #define KTR_DROP 0x8000 /* * kernel trace points (in p_traceflag) */ #define KTRFAC_MASK 0x00ffffff #define KTRFAC_SYSCALL (1<sa_len) #define ktrstat(s) \ ktrstruct("stat", (s), sizeof(struct stat)) #else #include __BEGIN_DECLS int ktrace(const char *, int, int, pid_t); int utrace(const void *, size_t); __END_DECLS #endif #endif Index: head/usr.bin/kdump/kdump.c =================================================================== --- head/usr.bin/kdump/kdump.c (revision 302769) +++ head/usr.bin/kdump/kdump.c (revision 302770) @@ -1,1843 +1,1868 @@ /*- * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)kdump.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #define _KERNEL extern int errno; #include #undef _KERNEL #include #include #include #define _KERNEL #include #undef _KERNEL #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_LIBCASPER #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ktrace.h" #include "kdump_subr.h" #ifdef HAVE_LIBCASPER #include #include #include #endif u_int abidump(struct ktr_header *); int fetchprocinfo(struct ktr_header *, u_int *); int fread_tail(void *, int, int); void dumpheader(struct ktr_header *); void ktrsyscall(struct ktr_syscall *, u_int); void ktrsysret(struct ktr_sysret *, u_int); void ktrnamei(char *, int); void hexdump(char *, int, int); void visdump(char *, int, int); void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); void ktruser_malloc(void *); void ktruser_rtld(int, void *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); +void ktritimerval(struct itimerval *it); void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); void ktrcapfail(struct ktr_cap_fail *); void ktrfault(struct ktr_fault *); void ktrfaultend(struct ktr_faultend *); void limitfd(int fd); void usage(void); #define TIMESTAMP_NONE 0x0 #define TIMESTAMP_ABSOLUTE 0x1 #define TIMESTAMP_ELAPSED 0x2 #define TIMESTAMP_RELATIVE 0x4 extern const char *signames[]; static int timestamp, decimal, fancy = 1, suppressdata, tail, threads, maxdata, resolv = 0, abiflag = 0, syscallno = 0; static const char *tracefile = DEF_TRACEFILE; static struct ktr_header ktr_header; #define TIME_FORMAT "%b %e %T %Y" #define eqs(s1, s2) (strcmp((s1), (s2)) == 0) #define print_number(i,n,c) do { \ if (decimal) \ printf("%c%jd", c, (intmax_t)*i); \ else \ printf("%c%#jx", c, (uintmax_t)(u_register_t)*i); \ i++; \ n--; \ c = ','; \ } while (0) struct proc_info { TAILQ_ENTRY(proc_info) info; u_int sv_flags; pid_t pid; }; static TAILQ_HEAD(trace_procs, proc_info) trace_procs; #ifdef HAVE_LIBCASPER static cap_channel_t *cappwd, *capgrp; #endif static void strerror_init(void) { /* * Cache NLS data before entering capability mode. * XXXPJD: There should be strerror_init() and strsignal_init() in libc. */ (void)catopen("libc", NL_CAT_LOCALE); } static void localtime_init(void) { time_t ltime; /* * Allow localtime(3) to cache /etc/localtime content before entering * capability mode. * XXXPJD: There should be localtime_init() in libc. */ (void)time(<ime); (void)localtime(<ime); } #ifdef HAVE_LIBCASPER static int cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp) { cap_channel_t *capcas, *cappwdloc, *capgrploc; const char *cmds[1], *fields[1]; capcas = cap_init(); if (capcas == NULL) { err(1, "unable to create casper process"); exit(1); } cappwdloc = cap_service_open(capcas, "system.pwd"); capgrploc = cap_service_open(capcas, "system.grp"); /* Casper capability no longer needed. */ cap_close(capcas); if (cappwdloc == NULL || capgrploc == NULL) { if (cappwdloc == NULL) warn("unable to open system.pwd service"); if (capgrploc == NULL) warn("unable to open system.grp service"); exit(1); } /* Limit system.pwd to only getpwuid() function and pw_name field. */ cmds[0] = "getpwuid"; if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0) err(1, "unable to limit system.pwd service"); fields[0] = "pw_name"; if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0) err(1, "unable to limit system.pwd service"); /* Limit system.grp to only getgrgid() function and gr_name field. */ cmds[0] = "getgrgid"; if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0) err(1, "unable to limit system.grp service"); fields[0] = "gr_name"; if (cap_grp_limit_fields(capgrploc, fields, 1) < 0) err(1, "unable to limit system.grp service"); *cappwdp = cappwdloc; *capgrpp = capgrploc; return (0); } #endif /* HAVE_LIBCASPER */ int main(int argc, char *argv[]) { int ch, ktrlen, size; void *m; int trpoints = ALL_POINTS; int drop_logged; pid_t pid = 0; u_int sv_flags; setlocale(LC_CTYPE, ""); timestamp = TIMESTAMP_NONE; while ((ch = getopt(argc,argv,"f:dElm:np:AHRrSsTt:")) != -1) switch (ch) { case 'A': abiflag = 1; break; case 'f': tracefile = optarg; break; case 'd': decimal = 1; break; case 'l': tail = 1; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = 0; break; case 'p': pid = atoi(optarg); break; case 'r': resolv = 1; break; case 'S': syscallno = 1; break; case 's': suppressdata = 1; break; case 'E': timestamp |= TIMESTAMP_ELAPSED; break; case 'H': threads = 1; break; case 'R': timestamp |= TIMESTAMP_RELATIVE; break; case 'T': timestamp |= TIMESTAMP_ABSOLUTE; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (strcmp(tracefile, "-") != 0) if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); strerror_init(); localtime_init(); #ifdef HAVE_LIBCASPER if (resolv != 0) { if (cappwdgrp_setup(&cappwd, &capgrp) < 0) { cappwd = NULL; capgrp = NULL; } } if (resolv == 0 || (cappwd != NULL && capgrp != NULL)) { if (cap_enter() < 0 && errno != ENOSYS) err(1, "unable to enter capability mode"); } #else if (resolv == 0) { if (cap_enter() < 0 && errno != ENOSYS) err(1, "unable to enter capability mode"); } #endif limitfd(STDIN_FILENO); limitfd(STDOUT_FILENO); limitfd(STDERR_FILENO); TAILQ_INIT(&trace_procs); drop_logged = 0; while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (ktr_header.ktr_type & KTR_DROP) { ktr_header.ktr_type &= ~KTR_DROP; if (!drop_logged && threads) { printf( "%6jd %6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, ktr_header.ktr_tid > 0 ? (intmax_t)ktr_header.ktr_tid : 0, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } else if (!drop_logged) { printf("%6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } } if (trpoints & (1< size) { m = realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if (fetchprocinfo(&ktr_header, (u_int *)m) != 0) continue; sv_flags = abidump(&ktr_header); if (pid && ktr_header.ktr_pid != pid && ktr_header.ktr_tid != pid) continue; if ((trpoints & (1<ktr_type) { case KTR_PROCCTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); break; } } pi = malloc(sizeof(struct proc_info)); if (pi == NULL) errx(1, "%s", strerror(ENOMEM)); pi->sv_flags = *flags; pi->pid = kth->ktr_pid; TAILQ_INSERT_TAIL(&trace_procs, pi, info); return (1); case KTR_PROCDTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); free(pi); break; } } return (1); } return (0); } u_int abidump(struct ktr_header *kth) { struct proc_info *pi; const char *abi; const char *arch; u_int flags = 0; TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { flags = pi->sv_flags; break; } } if (abiflag == 0) return (flags); switch (flags & SV_ABI_MASK) { case SV_ABI_LINUX: abi = "L"; break; case SV_ABI_FREEBSD: abi = "F"; break; case SV_ABI_CLOUDABI: abi = "C"; break; default: abi = "U"; break; } if (flags & SV_LP64) arch = "64"; else if (flags & SV_ILP32) arch = "32"; else arch = "00"; printf("%s%s ", abi, arch); return (flags); } void dumpheader(struct ktr_header *kth) { static char unknown[64]; static struct timeval prevtime, prevtime_e, temp; const char *type; const char *sign; switch (kth->ktr_type) { case KTR_SYSCALL: type = "CALL"; break; case KTR_SYSRET: type = "RET "; break; case KTR_NAMEI: type = "NAMI"; break; case KTR_GENIO: type = "GIO "; break; case KTR_PSIG: type = "PSIG"; break; case KTR_CSW: type = "CSW "; break; case KTR_USER: type = "USER"; break; case KTR_STRUCT: type = "STRU"; break; case KTR_SYSCTL: type = "SCTL"; break; case KTR_PROCCTOR: /* FALLTHROUGH */ case KTR_PROCDTOR: return; case KTR_CAPFAIL: type = "CAP "; break; case KTR_FAULT: type = "PFLT"; break; case KTR_FAULTEND: type = "PRET"; break; default: sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; } /* * The ktr_tid field was previously the ktr_buffer field, which held * the kernel pointer value for the buffer associated with data * following the record header. It now holds a threadid, but only * for trace files after the change. Older trace files still contain * kernel pointers. Detect this and suppress the results by printing * negative tid's as 0. */ if (threads) printf("%6jd %6jd %-8.*s ", (intmax_t)kth->ktr_pid, kth->ktr_tid > 0 ? (intmax_t)kth->ktr_tid : 0, MAXCOMLEN, kth->ktr_comm); else printf("%6jd %-8.*s ", (intmax_t)kth->ktr_pid, MAXCOMLEN, kth->ktr_comm); if (timestamp) { if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timevalsub(&kth->ktr_time, &prevtime_e); printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); timevaladd(&kth->ktr_time, &prevtime_e); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; temp = kth->ktr_time; timevalsub(&kth->ktr_time, &prevtime); if ((intmax_t)kth->ktr_time.tv_sec < 0) { kth->ktr_time = prevtime; prevtime = temp; timevalsub(&kth->ktr_time, &prevtime); sign = "-"; } else { prevtime = temp; sign = ""; } printf("%s%jd.%06ld ", sign, (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } } printf("%s ", type); } #include static void ioctlname(unsigned long val) { const char *str; str = sysdecode_ioctlname(val); if (str != NULL) printf("%s", str); else if (decimal) printf("%lu", val); else printf("%#lx", val); } static enum sysdecode_abi syscallabi(u_int sv_flags) { if (sv_flags == 0) return (SYSDECODE_ABI_FREEBSD); switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: return (SYSDECODE_ABI_FREEBSD); #if defined(__amd64__) || defined(__i386__) case SV_ABI_LINUX: #ifdef __amd64__ if (sv_flags & SV_ILP32) return (SYSDECODE_ABI_LINUX32); #endif return (SYSDECODE_ABI_LINUX); #endif #if defined(__aarch64__) || defined(__amd64__) case SV_ABI_CLOUDABI: return (SYSDECODE_ABI_CLOUDABI64); #endif default: return (SYSDECODE_ABI_UNKNOWN); } } static void syscallname(u_int code, u_int sv_flags) { const char *name; name = sysdecode_syscallname(syscallabi(sv_flags), code); if (name == NULL) printf("[%d]", code); else { printf("%s", name); if (syscallno) printf("[%d]", code); } } void ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip; intmax_t arg; syscallname(ktr->ktr_code, sv_flags); ip = &ktr->ktr_args[0]; if (narg) { char c = '('; if (fancy && (sv_flags == 0 || (sv_flags & SV_ABI_MASK) == SV_ABI_FREEBSD)) { switch (ktr->ktr_code) { case SYS_bindat: case SYS_connectat: case SYS_faccessat: case SYS_fchmodat: case SYS_fchownat: case SYS_fstatat: case SYS_futimesat: case SYS_linkat: case SYS_mkdirat: case SYS_mkfifoat: case SYS_mknodat: case SYS_openat: case SYS_readlinkat: case SYS_renameat: case SYS_unlinkat: case SYS_utimensat: putchar('('); atfdname(*ip, decimal); c = ','; ip++; narg--; break; } switch (ktr->ktr_code) { case SYS_ioctl: { print_number(ip, narg, c); putchar(c); ioctlname(*ip); c = ','; ip++; narg--; break; } case SYS_ptrace: putchar('('); ptraceopname(*ip); c = ','; ip++; narg--; break; case SYS_access: case SYS_eaccess: case SYS_faccessat: print_number(ip, narg, c); putchar(','); accessmodename(*ip); ip++; narg--; break; case SYS_open: case SYS_openat: print_number(ip, narg, c); putchar(','); flagsandmodename(ip[0], ip[1], decimal); ip += 2; narg -= 2; break; case SYS_wait4: print_number(ip, narg, c); print_number(ip, narg, c); /* * A flags value of zero is valid for * wait4() but not for wait6(), so * handle zero special here. */ if (*ip == 0) { print_number(ip, narg, c); } else { putchar(','); wait6optname(*ip); ip++; narg--; } break; case SYS_wait6: putchar('('); idtypename(*ip, decimal); c = ','; ip++; narg--; print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); wait6optname(*ip); ip++; narg--; break; case SYS_chmod: case SYS_fchmod: case SYS_lchmod: case SYS_fchmodat: print_number(ip, narg, c); putchar(','); modename(*ip); ip++; narg--; break; case SYS_mknod: case SYS_mknodat: print_number(ip, narg, c); putchar(','); modename(*ip); ip++; narg--; break; case SYS_getfsstat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); getfsstatflagsname(*ip); ip++; narg--; break; case SYS_mount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); mountflagsname(*ip); ip++; narg--; break; case SYS_unmount: print_number(ip, narg, c); putchar(','); mountflagsname(*ip); ip++; narg--; break; case SYS_recvmsg: case SYS_sendmsg: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); sendrecvflagsname(*ip); ip++; narg--; break; case SYS_recvfrom: case SYS_sendto: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); sendrecvflagsname(*ip); ip++; narg--; break; case SYS_chflags: case SYS_fchflags: case SYS_lchflags: print_number(ip, narg, c); putchar(','); modename(*ip); ip++; narg--; break; case SYS_kill: print_number(ip, narg, c); putchar(','); signame(*ip); ip++; narg--; break; case SYS_reboot: putchar('('); rebootoptname(*ip); ip++; narg--; break; case SYS_umask: putchar('('); modename(*ip); ip++; narg--; break; case SYS_msync: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); msyncflagsname(*ip); ip++; narg--; break; #ifdef SYS_freebsd6_mmap case SYS_freebsd6_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); mmapprotname(*ip); putchar(','); ip++; narg--; mmapflagsname(*ip); ip++; narg--; break; #endif case SYS_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); mmapprotname(*ip); putchar(','); ip++; narg--; mmapflagsname(*ip); ip++; narg--; break; case SYS_mprotect: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); mmapprotname(*ip); ip++; narg--; break; case SYS_madvise: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); madvisebehavname(*ip); ip++; narg--; break; case SYS_setpriority: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); prioname(*ip); ip++; narg--; break; case SYS_fcntl: print_number(ip, narg, c); putchar(','); fcntlcmdname(ip[0], ip[1], decimal); ip += 2; narg -= 2; break; case SYS_socket: { int sockdomain; putchar('('); sockdomain = *ip; sockdomainname(sockdomain); ip++; narg--; putchar(','); socktypenamewithflags(*ip); ip++; narg--; if (sockdomain == PF_INET || sockdomain == PF_INET6) { putchar(','); sockipprotoname(*ip); ip++; narg--; } c = ','; break; } case SYS_setsockopt: case SYS_getsockopt: print_number(ip, narg, c); putchar(','); sockoptlevelname(*ip, decimal); if (*ip == SOL_SOCKET) { ip++; narg--; putchar(','); sockoptname(*ip); } ip++; narg--; break; #ifdef SYS_freebsd6_lseek case SYS_freebsd6_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); whencename(*ip); ip++; narg--; break; #endif case SYS_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); putchar(','); whencename(*ip); ip++; narg--; break; case SYS_flock: print_number(ip, narg, c); putchar(','); flockname(*ip); ip++; narg--; break; case SYS_mkfifo: case SYS_mkfifoat: case SYS_mkdir: case SYS_mkdirat: print_number(ip, narg, c); putchar(','); modename(*ip); ip++; narg--; break; case SYS_shutdown: print_number(ip, narg, c); putchar(','); shutdownhowname(*ip); ip++; narg--; break; case SYS_socketpair: putchar('('); sockdomainname(*ip); ip++; narg--; putchar(','); socktypenamewithflags(*ip); ip++; narg--; c = ','; break; case SYS_getrlimit: case SYS_setrlimit: putchar('('); rlimitname(*ip); ip++; narg--; c = ','; break; case SYS_quotactl: print_number(ip, narg, c); putchar(','); quotactlname(*ip); ip++; narg--; c = ','; break; case SYS_nfssvc: putchar('('); nfssvcname(*ip); ip++; narg--; c = ','; break; case SYS_rtprio: putchar('('); rtprioname(*ip); ip++; narg--; c = ','; break; case SYS___semctl: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); semctlname(*ip); ip++; narg--; break; case SYS_semget: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); semgetname(*ip); ip++; narg--; break; case SYS_msgctl: print_number(ip, narg, c); putchar(','); shmctlname(*ip); ip++; narg--; break; case SYS_shmat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); shmatname(*ip); ip++; narg--; break; case SYS_shmctl: print_number(ip, narg, c); putchar(','); shmctlname(*ip); ip++; narg--; break; case SYS_shm_open: print_number(ip, narg, c); putchar(','); flagsname(ip[0]); printf(",0%o", (unsigned int)ip[1]); ip += 3; narg -= 3; break; case SYS_minherit: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); minheritname(*ip); ip++; narg--; break; case SYS_rfork: putchar('('); rforkname(*ip); ip++; narg--; c = ','; break; case SYS_lio_listio: putchar('('); lio_listioname(*ip); ip++; narg--; c = ','; break; case SYS_mlockall: putchar('('); mlockallname(*ip); ip++; narg--; break; case SYS_sched_setscheduler: print_number(ip, narg, c); putchar(','); schedpolicyname(*ip); ip++; narg--; break; case SYS_sched_get_priority_max: case SYS_sched_get_priority_min: putchar('('); schedpolicyname(*ip); ip++; narg--; break; case SYS_sendfile: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); sendfileflagsname(*(int *)ip); ip++; narg--; break; case SYS_kldsym: print_number(ip, narg, c); putchar(','); kldsymcmdname(*ip); ip++; narg--; break; case SYS_sigprocmask: putchar('('); sigprocmaskhowname(*ip); ip++; narg--; c = ','; break; case SYS___acl_get_file: case SYS___acl_set_file: case SYS___acl_get_fd: case SYS___acl_set_fd: case SYS___acl_delete_file: case SYS___acl_delete_fd: case SYS___acl_aclcheck_file: case SYS___acl_aclcheck_fd: case SYS___acl_get_link: case SYS___acl_set_link: case SYS___acl_delete_link: case SYS___acl_aclcheck_link: print_number(ip, narg, c); putchar(','); acltypename(*ip); ip++; narg--; break; case SYS_sigaction: putchar('('); signame(*ip); ip++; narg--; c = ','; break; case SYS_extattrctl: print_number(ip, narg, c); putchar(','); extattrctlname(*ip); ip++; narg--; break; case SYS_nmount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); mountflagsname(*ip); ip++; narg--; break; case SYS_thr_create: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); thrcreateflagsname(*ip); ip++; narg--; break; case SYS_thr_kill: print_number(ip, narg, c); putchar(','); signame(*ip); ip++; narg--; break; case SYS_kldunloadf: print_number(ip, narg, c); putchar(','); kldunloadfflagsname(*ip); ip++; narg--; break; case SYS_linkat: case SYS_renameat: case SYS_symlinkat: print_number(ip, narg, c); putchar(','); atfdname(*ip, decimal); ip++; narg--; break; case SYS_cap_fcntls_limit: print_number(ip, narg, c); putchar(','); arg = *ip; ip++; narg--; capfcntlname(arg); break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); (void)putchar(','); fadvisebehavname((int)*ip); ip++; narg--; break; case SYS_procctl: putchar('('); idtypename(*ip, decimal); c = ','; ip++; narg--; print_number(ip, narg, c); putchar(','); procctlcmdname(*ip); ip++; narg--; break; case SYS__umtx_op: print_number(ip, narg, c); putchar(','); umtxopname(*ip); switch (*ip) { case UMTX_OP_CV_WAIT: ip++; narg--; putchar(','); umtxcvwaitflags(*ip); break; case UMTX_OP_RW_RDLOCK: ip++; narg--; putchar(','); umtxrwlockflags(*ip); break; } ip++; narg--; } } while (narg > 0) { print_number(ip, narg, c); } putchar(')'); } putchar('\n'); } void ktrsysret(struct ktr_sysret *ktr, u_int sv_flags) { register_t ret = ktr->ktr_retval; int error = ktr->ktr_error; syscallname(ktr->ktr_code, sv_flags); printf(" "); if (error == 0) { if (fancy) { printf("%ld", (long)ret); if (ret < 0 || ret > 9) printf("/%#lx", (unsigned long)ret); } else { if (decimal) printf("%ld", (long)ret); else printf("%#lx", (unsigned long)ret); } } else if (error == ERESTART) printf("RESTART"); else if (error == EJUSTRETURN) printf("JUSTRETURN"); else { printf("-1 errno %d", sysdecode_freebsd_to_abi_errno( syscallabi(sv_flags), error)); if (fancy) printf(" %s", strerror(ktr->ktr_error)); } putchar('\n'); } void ktrnamei(char *cp, int len) { printf("\"%.*s\"\n", len, cp); } void hexdump(char *p, int len, int screenwidth) { int n, i; int width; width = 0; do { width += 2; i = 13; /* base offset */ i += (width / 2) + 1; /* spaces every second byte */ i += (width * 2); /* width of bytes */ i += 3; /* " |" */ i += width; /* each byte */ i += 1; /* "|" */ } while (i < screenwidth); width -= 2; for (n = 0; n < len; n += width) { for (i = n; i < n + width; i++) { if ((i % width) == 0) { /* beginning of line */ printf(" 0x%04x", i); } if ((i % 2) == 0) { printf(" "); } if (i < len) printf("%02x", p[i] & 0xff); else printf(" "); } printf(" |"); for (i = n; i < n + width; i++) { if (i >= len) break; if (p[i] >= ' ' && p[i] <= '~') printf("%c", p[i]); else printf("."); } printf("|\n"); } if ((i % width) != 0) printf("\n"); } void visdump(char *dp, int datalen, int screenwidth) { int col = 0; char *cp; int width; char visbuf[5]; printf(" \""); col = 8; for (;datalen > 0; datalen--, dp++) { vis(visbuf, *dp, VIS_CSTYLE, *(dp+1)); cp = visbuf; /* * Keep track of printables and * space chars (like fold(1)). */ if (col == 0) { putchar('\t'); col = 8; } switch(*cp) { case '\n': col = 0; putchar('\n'); continue; case '\t': width = 8 - (col&07); break; default: width = strlen(cp); } if (col + width > (screenwidth-2)) { printf("\\\n\t"); col = 8; } col += width; do { putchar(*cp++); } while (*cp); } if (col == 0) printf(" "); printf("\"\n"); } void ktrgenio(struct ktr_genio *ktr, int len) { int datalen = len - sizeof (struct ktr_genio); char *dp = (char *)ktr + sizeof (struct ktr_genio); static int screenwidth = 0; int i, binary; printf("fd %d %s %d byte%s\n", ktr->ktr_fd, ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen, datalen == 1 ? "" : "s"); if (suppressdata) return; if (screenwidth == 0) { struct winsize ws; if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 && ws.ws_col > 8) screenwidth = ws.ws_col; else screenwidth = 80; } if (maxdata && datalen > maxdata) datalen = maxdata; for (i = 0, binary = 0; i < datalen && binary == 0; i++) { if (dp[i] >= 32 && dp[i] < 127) continue; if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9) continue; binary = 1; } if (binary) hexdump(dp, datalen, screenwidth); else visdump(dp, datalen, screenwidth); } const char *signames[] = { "NULL", "HUP", "INT", "QUIT", "ILL", "TRAP", "IOT", /* 1 - 6 */ "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", /* 7 - 12 */ "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", /* 13 - 18 */ "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", /* 19 - 24 */ "XFSZ", "VTALRM", "PROF", "WINCH", "29", "USR1", /* 25 - 30 */ "USR2", NULL, /* 31 - 32 */ }; void ktrpsig(struct ktr_psig *psig) { if (psig->signo > 0 && psig->signo < NSIG) printf("SIG%s ", signames[psig->signo]); else printf("SIG %d ", psig->signo); if (psig->action == SIG_DFL) { printf("SIG_DFL code="); sigcodename(psig->signo, psig->code); putchar('\n'); } else { printf("caught handler=0x%lx mask=0x%x code=", (u_long)psig->action, psig->mask.__bits[0]); sigcodename(psig->signo, psig->code); putchar('\n'); } } void ktrcsw_old(struct ktr_csw_old *cs) { printf("%s %s\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel"); } void ktrcsw(struct ktr_csw *cs) { printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel", cs->wmesg); } void ktruser(int len, void *p) { unsigned char *cp; if (sysdecode_utrace(stdout, p, len)) { printf("\n"); return; } printf("%d ", len); cp = p; while (len--) if (decimal) printf(" %d", *cp++); else printf(" %02x", *cp++); printf("\n"); } void ktrcaprights(cap_rights_t *rightsp) { printf("cap_rights_t "); capname(rightsp); printf("\n"); } +static void +ktrtimeval(struct timeval *tv) +{ + + printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); +} + void +ktritimerval(struct itimerval *it) +{ + + printf("itimerval { .interval = "); + ktrtimeval(&it->it_interval); + printf(", .value = "); + ktrtimeval(&it->it_value); + printf(" }\n"); +} + +void ktrsockaddr(struct sockaddr *sa) { /* TODO: Support additional address families #include struct sockaddr_natm *natm; #include struct sockaddr_nb *nb; */ char addr[64]; /* * note: ktrstruct() has already verified that sa points to a * buffer at least sizeof(struct sockaddr) bytes long and exactly * sa->sa_len bytes long. */ printf("struct sockaddr { "); sockfamilyname(sa->sa_family); printf(", "); #define check_sockaddr_len(n) \ if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) { \ printf("invalid"); \ break; \ } switch(sa->sa_family) { case AF_INET: { struct sockaddr_in sa_in; memset(&sa_in, 0, sizeof(sa_in)); memcpy(&sa_in, sa, sa->sa_len); check_sockaddr_len(in); inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr); printf("%s:%u", addr, ntohs(sa_in.sin_port)); break; } case AF_INET6: { struct sockaddr_in6 sa_in6; memset(&sa_in6, 0, sizeof(sa_in6)); memcpy(&sa_in6, sa, sa->sa_len); check_sockaddr_len(in6); getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6), addr, sizeof(addr), NULL, 0, NI_NUMERICHOST); printf("[%s]:%u", addr, htons(sa_in6.sin6_port)); break; } case AF_UNIX: { struct sockaddr_un sa_un; memset(&sa_un, 0, sizeof(sa_un)); memcpy(&sa_un, sa, sa->sa_len); printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path); break; } default: printf("unknown address family"); } printf(" }\n"); } void ktrstat(struct stat *statp) { char mode[12], timestr[PATH_MAX + 4]; struct passwd *pwd; struct group *grp; struct tm *tm; /* * note: ktrstruct() has already verified that statp points to a * buffer exactly sizeof(struct stat) bytes long. */ printf("struct stat {"); printf("dev=%ju, ino=%ju, ", (uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino); if (resolv == 0) printf("mode=0%jo, ", (uintmax_t)statp->st_mode); else { strmode(statp->st_mode, mode); printf("mode=%s, ", mode); } printf("nlink=%ju, ", (uintmax_t)statp->st_nlink); if (resolv == 0) { pwd = NULL; } else { #ifdef HAVE_LIBCASPER if (cappwd != NULL) pwd = cap_getpwuid(cappwd, statp->st_uid); else #endif pwd = getpwuid(statp->st_uid); } if (pwd == NULL) printf("uid=%ju, ", (uintmax_t)statp->st_uid); else printf("uid=\"%s\", ", pwd->pw_name); if (resolv == 0) { grp = NULL; } else { #ifdef HAVE_LIBCASPER if (capgrp != NULL) grp = cap_getgrgid(capgrp, statp->st_gid); else #endif grp = getgrgid(statp->st_gid); } if (grp == NULL) printf("gid=%ju, ", (uintmax_t)statp->st_gid); else printf("gid=\"%s\", ", grp->gr_name); printf("rdev=%ju, ", (uintmax_t)statp->st_rdev); printf("atime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_atim.tv_sec); else { tm = localtime(&statp->st_atim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_atim.tv_nsec != 0) printf(".%09ld, ", statp->st_atim.tv_nsec); else printf(", "); printf("mtime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_mtim.tv_sec); else { tm = localtime(&statp->st_mtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_mtim.tv_nsec != 0) printf(".%09ld, ", statp->st_mtim.tv_nsec); else printf(", "); printf("ctime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_ctim.tv_sec); else { tm = localtime(&statp->st_ctim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_ctim.tv_nsec != 0) printf(".%09ld, ", statp->st_ctim.tv_nsec); else printf(", "); printf("birthtime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_birthtim.tv_sec); else { tm = localtime(&statp->st_birthtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_birthtim.tv_nsec != 0) printf(".%09ld, ", statp->st_birthtim.tv_nsec); else printf(", "); printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x", (uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize, (intmax_t)statp->st_blocks, statp->st_flags); printf(" }\n"); } void ktrstruct(char *buf, size_t buflen) { char *name, *data; size_t namelen, datalen; int i; cap_rights_t rights; + struct itimerval it; struct stat sb; struct sockaddr_storage ss; for (name = buf, namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; data = buf + namelen + 1; datalen = buflen - namelen - 1; if (datalen == 0) goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalpha(name[i])) goto invalid; if (strcmp(name, "caprights") == 0) { if (datalen != sizeof(cap_rights_t)) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); + } else if (strcmp(name, "itimerval") == 0) { + if (datalen != sizeof(struct itimerval)) + goto invalid; + memcpy(&it, data, datalen); + ktritimerval(&it); } else if (strcmp(name, "stat") == 0) { if (datalen != sizeof(struct stat)) goto invalid; memcpy(&sb, data, datalen); ktrstat(&sb); } else if (strcmp(name, "sockaddr") == 0) { if (datalen > sizeof(ss)) goto invalid; memcpy(&ss, data, datalen); if (datalen != ss.ss_len) goto invalid; ktrsockaddr((struct sockaddr *)&ss); } else { printf("unknown structure\n"); } return; invalid: printf("invalid record\n"); } void ktrcapfail(struct ktr_cap_fail *ktr) { switch (ktr->cap_type) { case CAPFAIL_NOTCAPABLE: /* operation on fd with insufficient capabilities */ printf("operation requires "); capname(&ktr->cap_needed); printf(", descriptor holds "); capname(&ktr->cap_held); break; case CAPFAIL_INCREASE: /* requested more capabilities than fd already has */ printf("attempt to increase capabilities from "); capname(&ktr->cap_held); printf(" to "); capname(&ktr->cap_needed); break; case CAPFAIL_SYSCALL: /* called restricted syscall */ printf("disallowed system call"); break; case CAPFAIL_LOOKUP: /* used ".." in strict-relative mode */ printf("restricted VFS lookup"); break; default: printf("unknown capability failure: "); capname(&ktr->cap_needed); printf(" "); capname(&ktr->cap_held); break; } printf("\n"); } void ktrfault(struct ktr_fault *ktr) { printf("0x%jx ", (uintmax_t)ktr->vaddr); vmprotname(ktr->type); printf("\n"); } void ktrfaultend(struct ktr_faultend *ktr) { vmresultname(ktr->result); printf("\n"); } void usage(void) { fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] " "[-m maxdata] [-p pid] [-t trstr]\n"); exit(1); }