diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -103,6 +103,13 @@ uint64_t amount); static void racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount); +static int racct_set_locked(struct proc *p, int resource, uint64_t amount, + int force); +static void racct_updatepcpu_locked(struct proc *p); +static void racct_updatepcpu_racct_locked(struct racct *racct); +static void racct_updatepcpu_containers(void); +static void racct_propagatepcpu_locked(struct proc *p); +static void racct_settime_locked(struct proc *p, bool exit); SDT_PROVIDER_DEFINE(racct); SDT_PROBE_DEFINE3(racct, , rusage, add, @@ -315,6 +322,7 @@ #define CCPU_EXP_MAX 110 +#if 0 /* * This function is analogical to the getpcpu() function in the ps(1) command. * They should both calculate in the same way so that the racct %cpu @@ -397,6 +405,7 @@ return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); } +#endif static void racct_add_racct(struct racct *dest, const struct racct *src) @@ -663,10 +672,44 @@ RACCT_UNLOCK(); } +static void +racct_settime_locked(struct proc *p, bool exit) +{ + struct thread *td; + struct timeval wallclock; + uint64_t runtime; + + ASSERT_RACCT_ENABLED(); + RACCT_LOCK_ASSERT(); + PROC_LOCK_ASSERT(p, MA_OWNED); + + if (exit) { + /* proc_reap() has already calculated rux + * and added crux to rux. */ + runtime = cputick2usec(p->p_rux.rux_runtime - + p->p_crux.rux_runtime); + } + else { + PROC_STATLOCK(p); + FOREACH_THREAD_IN_PROC(p, td) + ruxagg(p, td); + PROC_STATUNLOCK(p); + runtime = cputick2usec(p->p_rux.rux_runtime); + } + KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); + microuptime(&wallclock); + timevalsub(&wallclock, &p->p_stats->p_start); + + racct_set_locked(p, RACCT_CPU, runtime, 0); + racct_set_locked(p, RACCT_WALLCLOCK, + (uint64_t)wallclock.tv_sec * 1000000 + + wallclock.tv_usec, 0); +} + static int racct_set_locked(struct proc *p, int resource, uint64_t amount, int force) { - int64_t old_amount, decayed_amount, diff_proc, diff_cred; + int64_t old_amount, diff_proc, diff_cred; #ifdef RCTL int error; #endif @@ -683,6 +726,7 @@ * The diffs may be negative. */ diff_proc = amount - old_amount; +#if 0 if (resource == RACCT_PCTCPU) { /* * Resources in per-credential racct containers may decay. @@ -693,6 +737,7 @@ decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; diff_cred = amount - decayed_amount; } else +#endif diff_cred = diff_proc; #ifdef notyet KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), @@ -992,37 +1037,16 @@ void racct_proc_exit(struct proc *p) { - struct timeval wallclock; - uint64_t pct_estimate, pct, runtime; int i; if (!racct_enable) return; PROC_LOCK(p); - /* - * We don't need to calculate rux, proc_reap() has already done this. - */ - runtime = cputick2usec(p->p_rux.rux_runtime); -#ifdef notyet - KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); -#else - if (runtime < p->p_prev_runtime) - runtime = p->p_prev_runtime; -#endif - microuptime(&wallclock); - timevalsub(&wallclock, &p->p_stats->p_start); - if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { - pct_estimate = (1000000 * runtime * 100) / - ((uint64_t)wallclock.tv_sec * 1000000 + - wallclock.tv_usec); - } else - pct_estimate = 0; - pct = racct_getpcpu(p, pct_estimate); - RACCT_LOCK(); - racct_set_locked(p, RACCT_CPU, runtime, 0); - racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); + + racct_settime_locked(p, true); + //racct_propagatepcpu_locked(p); KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0, ("process reaped with %ju allocated for RSS\n", @@ -1096,6 +1120,10 @@ RACCT_LOCK(); racct_add_racct(dest, src); racct_sub_racct(src, src); + dest->r_runtime = src->r_runtime; + dest->r_time = src->r_time; + src->r_runtime = 0; + timevalsub(&src->r_time, &src->r_time); RACCT_UNLOCK(); } @@ -1193,7 +1221,9 @@ static void racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2) { +#if 0 int64_t r_old, r_new; +#endif ASSERT_RACCT_ENABLED(); RACCT_LOCK_ASSERT(); @@ -1205,6 +1235,7 @@ rctl_throttle_decay(racct, RACCT_WRITEIOPS); #endif +#if 0 r_old = racct->r_resources[RACCT_PCTCPU]; /* If there is nothing to decay, just exit. */ @@ -1213,6 +1244,7 @@ r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; racct->r_resources[RACCT_PCTCPU] = r_new; +#endif } static void @@ -1243,13 +1275,90 @@ racct_decay_post, NULL, NULL); } +static void +racct_updatepcpu_racct_locked(struct racct *racct) +{ + ASSERT_RACCT_ENABLED(); + RACCT_LOCK_ASSERT(); + + struct timeval diff; + uint64_t elapsed; + uint64_t runtime; + uint64_t newpcpu; + uint64_t oldpcpu; + + microuptime(&diff); + timevalsub(&diff, &racct->r_time); + elapsed = (uint64_t)diff.tv_sec * 1000000 + diff.tv_usec; + runtime = racct->r_resources[RACCT_CPU] - racct->r_runtime; + newpcpu = runtime * 100 * 1000000 / elapsed; + oldpcpu = racct->r_resources[RACCT_PCTCPU]; + racct->r_resources[RACCT_PCTCPU] = ((FSCALE - RACCT_DECAY_FACTOR) * newpcpu + + RACCT_DECAY_FACTOR * oldpcpu) / FSCALE; + racct->r_runtime = racct->r_resources[RACCT_CPU]; + timevaladd(&racct->r_time, &diff); +} + +static void +racct_updatepcpu_locked(struct proc *p) +{ + ASSERT_RACCT_ENABLED(); + PROC_LOCK_ASSERT(p, MA_OWNED); + + racct_updatepcpu_racct_locked(p->p_racct); +} + +static void +racct_propagatepcpu_locked(struct proc *p) +{ + struct prison *pr; + + ASSERT_RACCT_ENABLED(); + PROC_LOCK_ASSERT(p, MA_OWNED); + + racct_updatepcpu_racct_locked(p->p_ucred->cr_ruidinfo->ui_racct); + for (pr = p->p_ucred->cr_prison; pr != NULL; pr = pr->pr_parent) + racct_updatepcpu_racct_locked(pr->pr_prison_racct->prr_racct); + racct_updatepcpu_racct_locked(p->p_ucred->cr_loginclass->lc_racct); +} + +static void +racct_updatepcpu_pre(void) +{ + + RACCT_LOCK(); +} + +static void +racct_updatepcpu_post(void) +{ + + RACCT_UNLOCK(); +} + +static void +racct_updatepcpu_racct_callback(struct racct *racct, void *dummy1, void *dummy2) +{ + racct_updatepcpu_racct_locked(racct); +} + +static void +racct_updatepcpu_containers(void) +{ + ASSERT_RACCT_ENABLED(); + + ui_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre, + racct_updatepcpu_post, NULL, NULL); + loginclass_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre, + racct_updatepcpu_post, NULL, NULL); + prison_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre, + racct_updatepcpu_post, NULL, NULL); +} + static void racctd(void) { - struct thread *td; struct proc *p; - struct timeval wallclock; - uint64_t pct, pct_estimate, runtime; ASSERT_RACCT_ENABLED(); @@ -1261,34 +1370,14 @@ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state != PRS_NORMAL) { - if (p->p_state == PRS_ZOMBIE) + if (p->p_state == PRS_ZOMBIE) { +#if 0 racct_set(p, RACCT_PCTCPU, 0); +#endif + } PROC_UNLOCK(p); continue; } - - microuptime(&wallclock); - timevalsub(&wallclock, &p->p_stats->p_start); - PROC_STATLOCK(p); - FOREACH_THREAD_IN_PROC(p, td) - ruxagg(p, td); - runtime = cputick2usec(p->p_rux.rux_runtime); - PROC_STATUNLOCK(p); -#ifdef notyet - KASSERT(runtime >= p->p_prev_runtime, - ("runtime < p_prev_runtime")); -#else - if (runtime < p->p_prev_runtime) - runtime = p->p_prev_runtime; -#endif - p->p_prev_runtime = runtime; - if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { - pct_estimate = (1000000 * runtime * 100) / - ((uint64_t)wallclock.tv_sec * 1000000 + - wallclock.tv_usec); - } else - pct_estimate = 0; - pct = racct_getpcpu(p, pct_estimate); RACCT_LOCK(); #ifdef RCTL rctl_throttle_decay(p->p_racct, RACCT_READBPS); @@ -1296,11 +1385,8 @@ rctl_throttle_decay(p->p_racct, RACCT_READIOPS); rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); #endif - racct_set_locked(p, RACCT_PCTCPU, pct, 1); - racct_set_locked(p, RACCT_CPU, runtime, 0); - racct_set_locked(p, RACCT_WALLCLOCK, - (uint64_t)wallclock.tv_sec * 1000000 + - wallclock.tv_usec, 0); + racct_settime_locked(p, false); + racct_updatepcpu_locked(p); RACCT_UNLOCK(); PROC_UNLOCK(p); } @@ -1328,6 +1414,8 @@ PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); + + racct_updatepcpu_containers(); pause("-", hz); } } diff --git a/sys/sys/racct.h b/sys/sys/racct.h --- a/sys/sys/racct.h +++ b/sys/sys/racct.h @@ -144,13 +144,17 @@ /* * The 'racct' structure defines resource consumption for a particular - * subject, such as process or jail. + * subject, such as process or jail. It also contains the current + * cpu time and real time of the subject, taken at the most recent + * time that RACCT_PCPU was updated. * * This structure must be filled with zeroes initially. */ struct racct { int64_t r_resources[RACCT_MAX + 1]; LIST_HEAD(, rctl_rule_link) r_rule_links; + uint64_t r_runtime; + struct timeval r_time; }; SYSCTL_DECL(_kern_racct);