Index: lib/libutil/kinfo_getvmobject.c =================================================================== --- lib/libutil/kinfo_getvmobject.c +++ lib/libutil/kinfo_getvmobject.c @@ -36,8 +36,8 @@ #include "libutil.h" -struct kinfo_vmobject * -kinfo_getvmobject(int *cntp) +static struct kinfo_vmobject * +_kinfo_getvmobject(int *cntp, const char* vmobjsysctl) { char *buf, *bp, *ep; struct kinfo_vmobject *kvo, *list, *kp; @@ -46,14 +46,14 @@ buf = NULL; for (i = 0; i < 3; i++) { - if (sysctlbyname("vm.objects", NULL, &len, NULL, 0) < 0) { + if (sysctlbyname(vmobjsysctl, NULL, &len, NULL, 0) < 0) { free(buf); return (NULL); } buf = reallocf(buf, len); if (buf == NULL) return (NULL); - if (sysctlbyname("vm.objects", buf, &len, NULL, 0) == 0) + if (sysctlbyname(vmobjsysctl, buf, &len, NULL, 0) == 0) goto unpack; if (errno != ENOMEM) { free(buf); @@ -94,3 +94,17 @@ *cntp = cnt; return (list); } + +struct kinfo_vmobject * +kinfo_getvmobject(int *cntp) +{ + + return _kinfo_getvmobject(cntp, "vm.objects"); +} + +struct kinfo_vmobject * +kinfo_getswapvmobject(int *cntp) +{ + + return _kinfo_getvmobject(cntp, "vm.swap_objects"); +} Index: lib/libutil/libutil.h =================================================================== --- lib/libutil/libutil.h +++ lib/libutil/libutil.h @@ -109,6 +109,8 @@ kinfo_getvmmap(pid_t _pid, int *_cntp); struct kinfo_vmobject * kinfo_getvmobject(int *_cntp); +struct kinfo_vmobject * + kinfo_getswapvmobject(int *_cntp); struct kinfo_proc * kinfo_getallproc(int *_cntp); struct kinfo_proc * Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -2471,7 +2471,7 @@ } static int -sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) +vm_object_list_handler(struct sysctl_req *req, bool swap_only) { struct kinfo_vmobject *kvo; char *fullpath, *freepath; @@ -2509,10 +2509,12 @@ */ mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(obj, &vm_object_list, object_list) { - if (obj->type == OBJT_DEAD) + if (obj->type == OBJT_DEAD || + (swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0)) continue; VM_OBJECT_RLOCK(obj); - if (obj->type == OBJT_DEAD) { + if (obj->type == OBJT_DEAD || + (swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0)) { VM_OBJECT_RUNLOCK(obj); continue; } @@ -2524,20 +2526,22 @@ kvo->kvo_memattr = obj->memattr; kvo->kvo_active = 0; kvo->kvo_inactive = 0; - TAILQ_FOREACH(m, &obj->memq, listq) { - /* - * A page may belong to the object but be - * dequeued and set to PQ_NONE while the - * object lock is not held. This makes the - * reads of m->queue below racy, and we do not - * count pages set to PQ_NONE. However, this - * sysctl is only meant to give an - * approximation of the system anyway. - */ - if (m->a.queue == PQ_ACTIVE) - kvo->kvo_active++; - else if (m->a.queue == PQ_INACTIVE) - kvo->kvo_inactive++; + if (!swap_only) { + TAILQ_FOREACH(m, &obj->memq, listq) { + /* + * A page may belong to the object but be + * dequeued and set to PQ_NONE while the + * object lock is not held. This makes the + * reads of m->queue below racy, and we do not + * count pages set to PQ_NONE. However, this + * sysctl is only meant to give an + * approximation of the system anyway. + */ + if (m->a.queue == PQ_ACTIVE) + kvo->kvo_active++; + else if (m->a.queue == PQ_INACTIVE) + kvo->kvo_inactive++; + } } kvo->kvo_vn_fileid = 0; @@ -2547,7 +2551,10 @@ fullpath = ""; kvo->kvo_type = vm_object_kvme_type(obj, &vp); if (vp != NULL) { - vref(vp); + if (swap_only) + vp = NULL; + else + vref(vp); } else if ((obj->flags & OBJ_ANON) != 0) { MPASS(kvo->kvo_type == KVME_TYPE_DEFAULT || kvo->kvo_type == KVME_TYPE_SWAP); @@ -2580,6 +2587,7 @@ kvo->kvo_structsize = roundup(kvo->kvo_structsize, sizeof(uint64_t)); error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize); + maybe_yield(); mtx_lock(&vm_object_list_mtx); if (error) break; @@ -2588,10 +2596,28 @@ free(kvo, M_TEMP); return (error); } + +static int +sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) +{ + return (vm_object_list_handler(req, false)); +} + SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", "List of VM objects"); +static int +sysctl_vm_object_list_swap(SYSCTL_HANDLER_ARGS) +{ + return (vm_object_list_handler(req, true)); +} + +SYSCTL_PROC(_vm, OID_AUTO, swap_objects, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, + sysctl_vm_object_list_swap, "S,kinfo_vmobject", + "List of swap VM objects"); + #include "opt_ddb.h" #ifdef DDB #include Index: usr.bin/systat/Makefile =================================================================== --- usr.bin/systat/Makefile +++ usr.bin/systat/Makefile @@ -5,7 +5,7 @@ PROG= systat SRCS= cmds.c cmdtab.c devs.c fetch.c iostat.c keyboard.c main.c sysput.c \ - netcmds.c netstat.c pigs.c swap.c icmp.c \ + netcmds.c netstat.c pigs.c proc.c swap.c icmp.c \ mode.c ip.c sctp.c tcp.c zarc.c \ vmstat.c convtbl.c ifcmds.c ifstat.c @@ -16,6 +16,6 @@ WARNS?= 1 -LIBADD= ncursesw m devstat kvm util +LIBADD= ncursesw m devstat kvm util procstat .include Index: usr.bin/systat/proc.c =================================================================== --- /dev/null +++ usr.bin/systat/proc.c @@ -0,0 +1,309 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 Yoshihiro Ota + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "systat.h" +#include "extern.h" + +/* + * vm objects of swappable types + */ +static struct swapvm { + uint64_t kvo_me; + uint32_t swapped; /* in pages */ + uint64_t next; + pid_t pid; /* to avoid double counting */ +} *swobj = NULL; +static int nswobj = 0; + +static struct procstat *prstat = NULL; +/* + *procstat_getvmmap() is an expensive call and the number of processes running + * may also be high. So, maintain an array of pointers for ease of expanding + * an array and also swapping pointers are faster than struct. + */ +static struct proc_usage { + pid_t pid; + uid_t uid; + char command[COMMLEN + 1]; + uint64_t total; + uint32_t pages; +} **pu = NULL; +static unsigned int nproc; +static int proc_compar(const void *, const void *); + +static void +display_proc_line(int idx, int y, uint64_t totalswappages) +{ + int offset = 0, rate; + const char *uname, *pname; + char buf[30]; + uint64_t swapbytes; + + wmove(wnd, y, 0); + wclrtoeol(wnd); + if (idx >= nproc) + return; + + uname = user_from_uid(pu[idx]->uid, 0); + swapbytes = ptoa(pu[idx]->pages); + + snprintf(buf, sizeof(buf), "%6d %-10s %-10.10s", pu[idx]->pid, uname, + pu[idx]->command); + offset = 6 + 1 + 10 + 1 + 10 + 1; + mvwaddstr(wnd, y, 0, buf); + sysputuint64(wnd, y, offset, 4, swapbytes, 0); + offset += 4; + mvwaddstr(wnd, y, offset, " / "); + offset += 3; + sysputuint64(wnd, y, offset, 4, pu[idx]->total, 0); + offset += 4; + + rate = pu[idx]->total > 1 ? 100 * swapbytes / pu[idx]->total : 0; + snprintf(buf, sizeof(buf), "%3d%%", rate); + mvwaddstr(wnd, y, offset, buf); + if (rate > 100) /* avoid running over the screen */ + rate = 100; + sysputXs(wnd, y, offset + 5, rate / 10); + + rate = 100 * pu[idx]->pages / totalswappages; + snprintf(buf, sizeof(buf), "%3d%%", rate); + mvwaddstr(wnd, y, offset + 16, buf); + if (rate > 100) /* avoid running over the screen */ + rate = 100; + sysputXs(wnd, y, offset + 21, rate / 10); +} + +static int +swobj_search(const void *a, const void *b) +{ + const uint64_t *aa = a; + const struct swapvm *bb = b; + + if (*aa == bb->kvo_me) + return (0); + return (*aa > bb->kvo_me ? -1 : 1); +} + +static int +swobj_sort(const void *a, const void *b) +{ + + return ((((const struct swapvm *) a)->kvo_me > + ((const struct swapvm *) b)->kvo_me) ? -1 : 1); +} + +static bool +get_swap_vmobjects(void) +{ + static int maxnobj; + int cnt, i, next_i, last_nswobj; + struct kinfo_vmobject *kvo; + + next_i = nswobj = 0; + kvo = kinfo_getswapvmobject(&cnt); + if (kvo == NULL) { + error("kinfo_getswapvmobject()"); + return (false); + } + do { + for (i = next_i; i < cnt; i++) { + if (kvo[i].kvo_type != KVME_TYPE_DEFAULT && + kvo[i].kvo_type != KVME_TYPE_SWAP) + continue; + if (nswobj < maxnobj) { + swobj[nswobj].kvo_me = kvo[i].kvo_me; + swobj[nswobj].swapped = kvo[i].kvo_swapped; + swobj[nswobj].next = kvo[i].kvo_backing_obj; + swobj[nswobj].pid = 0; + next_i = i + 1; + } + nswobj++; + } + if (nswobj <= maxnobj) + break; + /* allocate memory and fill skipped elements */ + last_nswobj = maxnobj; + maxnobj = nswobj; + nswobj = last_nswobj; + /* allocate more memory and fill missed ones */ + if ((swobj = reallocf(swobj, maxnobj * sizeof(*swobj))) == + NULL) { + error("Out of memory"); + die(0); + } + } while (i <= cnt); /* extra safety guard */ + free(kvo); + if (nswobj > 1) + qsort(swobj, nswobj, sizeof(swobj[0]), swobj_sort); + return (nswobj > 0); +} + +/* This returns the number of swap pages a process uses. */ +static uint32_t +per_proc_swap_usage(struct kinfo_proc *kipp) +{ + int i, cnt; + uint32_t pages = 0; + uint64_t vmobj; + struct kinfo_vmentry *freep, *kve; + struct swapvm *vm; + + freep = procstat_getvmmap(prstat, kipp, &cnt); + if (freep == NULL) + return (pages); + + for (i = 0; i < cnt; i++) { + kve = &freep[i]; + if (kve->kve_type == KVME_TYPE_DEFAULT || + kve->kve_type == KVME_TYPE_SWAP) { + vmobj = kve->kve_obj; + do { + vm = bsearch(&vmobj, swobj, nswobj, + sizeof(swobj[0]), swobj_search); + if (vm != NULL && vm->pid != kipp->ki_pid) { + pages += vm->swapped; + vmobj = vm->next; + vm->pid = kipp->ki_pid; + } else + break; + } while (vmobj != 0); + } + } + free(freep); + return (pages); +} + +void +closeproc(WINDOW *w) +{ + + if (prstat != NULL) + procstat_close(prstat); + prstat = NULL; + if (w == NULL) + return; + wclear(w); + wrefresh(w); + delwin(w); +} + +void +procshow(int col, int hight, uint64_t totalswappages) +{ + int i, y; + + for (i = 0, y = col + 1 /* HEADING */; i < hight; i++, y++) + display_proc_line(i, y, totalswappages); +} + +int +procinit(void) +{ + + if (prstat == NULL) + prstat = procstat_open_sysctl(); + return (prstat != NULL); +} + +void +procgetinfo(void) +{ + static unsigned int maxnproc = 0; + int cnt, i; + uint32_t pages; + struct kinfo_proc *kipp; + + nproc = 0; + if ( ! get_swap_vmobjects() ) /* call failed or nothing is paged-out */ + return; + + kipp = procstat_getprocs(prstat, KERN_PROC_PROC, 0, &cnt); + if (kipp == NULL) { + error("procstat_getprocs()"); + return; + } + if (maxnproc < cnt) { + if ((pu = realloc(pu, cnt * sizeof(*pu))) == NULL) { + error("Out of memory"); + die(0); + } + memset(&pu[maxnproc], 0, (cnt - maxnproc) * sizeof(pu[0])); + maxnproc = cnt; + } + + for (i = 0; i < cnt; i++) { + pages = per_proc_swap_usage(&kipp[i]); + if (pages == 0) + continue; + if (pu[nproc] == NULL && + (pu[nproc] = malloc(sizeof(**pu))) == NULL) { + error("Out of memory"); + die(0); + } + strlcpy(pu[nproc]->command, kipp[i].ki_comm, + sizeof(pu[nproc]->command)); + pu[nproc]->pid = kipp[i].ki_pid; + pu[nproc]->uid = kipp[i].ki_uid; + pu[nproc]->pages = pages; + pu[nproc]->total = kipp[i].ki_size; + nproc++; + } + if (nproc > 1) + qsort(pu, nproc, sizeof(*pu), proc_compar); +} + +void +proclabel(int col) +{ + + wmove(wnd, col, 0); + wclrtoeol(wnd); + mvwaddstr(wnd, col, 0, + "Pid Username Command Swap/Total " + "Per-Process Per-System"); +} + +int +proc_compar(const void *a, const void *b) +{ + const struct proc_usage *aa = *((const struct proc_usage **)a); + const struct proc_usage *bb = *((const struct proc_usage **)b); + + return (aa->pages > bb->pages ? -1 : 1); +} Index: usr.bin/systat/swap.c =================================================================== --- usr.bin/systat/swap.c +++ usr.bin/systat/swap.c @@ -103,6 +103,7 @@ } pathlen = 80 - 50 /* % */ - 5 /* Used */ - 5 /* Size */ - 3 /* space */; dsinit(12); + procinit(); once = 1; return (1); @@ -125,14 +126,13 @@ cur_dev.dinfo = tmp_dinfo; last_dev.snap_time = cur_dev.snap_time; - dsgetinfo( &cur_dev ); + dsgetinfo(&cur_dev); + procgetinfo(); } void labelswap(void) { - const char *name; - int i; werase(wnd); @@ -146,18 +146,13 @@ mvwprintw(wnd, 0, 0, "%*s%5s %5s %s", -pathlen, "Device/Path", "Size", "Used", "|0% /10 /20 /30 /40 / 60\\ 70\\ 80\\ 90\\ 100|"); - - for (i = 0; i <= kvnsw; ++i) { - name = i == kvnsw ? "Total" : kvmsw[i].ksw_devname; - mvwprintw(wnd, 1 + i, 0, "%-*.*s", pathlen, pathlen - 1, name); - } } void showswap(void) { - int count; - int i; + const char *name; + int count, i; if (kvnsw != okvnsw) labelswap(); @@ -167,7 +162,10 @@ if (kvnsw <= 0) return; - for (i = 0; i <= kvnsw; ++i) { + for (i = (kvnsw == 1 ? 0 : kvnsw); i >= 0; i--) { + name = i == kvnsw ? "Total" : kvmsw[i].ksw_devname; + mvwprintw(wnd, 1 + i, 0, "%-*.*s", pathlen, pathlen - 1, name); + sysputpage(wnd, i + 1, pathlen, 5, kvmsw[i].ksw_total, 0); sysputpage(wnd, i + 1, pathlen + 5 + 1, 5, kvmsw[i].ksw_used, 0); @@ -178,4 +176,8 @@ } wclrtoeol(wnd); } + count = kvnsw == 1 ? 2 : 3; + proclabel(kvnsw + count); + procshow(kvnsw + count, LINES - 5 - kvnsw + 3 - DISKHIGHT + 1, + kvmsw[kvnsw].ksw_total); } Index: usr.bin/systat/sysput.c =================================================================== --- usr.bin/systat/sysput.c +++ usr.bin/systat/sysput.c @@ -31,10 +31,11 @@ #include #include -#include -#include #include +#include #include +#include +#include #include "systat.h" #include "extern.h" @@ -103,26 +104,9 @@ sysputuint64(wd, row, col, width, val, flags); } -static int -calc_page_shift() -{ - u_int page_size; - int shifts; - - shifts = 0; - GETSYSCTL("vm.stats.vm.v_page_size", page_size); - for(; page_size > 1; page_size >>= 1) - shifts++; - return shifts; -} - void sysputpage(WINDOW *wd, int row, int col, int width, uint64_t pages, int flags) { - static int shifts = 0; - if (shifts == 0) - shifts = calc_page_shift(); - pages <<= shifts; - sysputuint64(wd, row, col, width, pages, flags); + sysputuint64(wd, row, col, width, ptoa(pages), flags); } Index: usr.bin/systat/systat.h =================================================================== --- usr.bin/systat/systat.h +++ usr.bin/systat/systat.h @@ -32,6 +32,7 @@ * $FreeBSD$ */ +#include #include struct cmdtab { @@ -72,3 +73,8 @@ extern void putint(int, int, int, int); extern void putfloat(double, int, int, int, int, int); extern void putlongdouble(long double, int, int, int, int, int); + +int procinit(void); +void procgetinfo(void); +void proclabel(int col); +void procshow(int col, int hight, uint64_t totalswappages); Index: usr.bin/systat/systat.1 =================================================================== --- usr.bin/systat/systat.1 +++ usr.bin/systat/systat.1 @@ -279,9 +279,11 @@ .El .It Ic swap Show information about swap space usage on all the -swap areas compiled into the kernel. -The first column is the device name of the partition. -The next column is the total space available in the partition. +swap areas compiled into the kernel and processes that are swapped out +as well as a summary of disk activity. +.Pp +The swap areas are displayed first with their name, sizes and +usage percentage. The .Ar Used column indicates the total blocks used so far; @@ -289,6 +291,28 @@ If there are more than one swap partition in use, a total line is also shown. Areas known to the kernel, but not in use are shown as not available. +.Pp +Below the swap space statistics, +processes are listed in order of higher swap area usage. +Pid, username, a part of command line, the total use of swap space +in bytes, the size of process, as well as per-process swap usage percentage and +per-system swap space percentage are shown per process. +.Pp +At the bottom left is the disk usage display. +It reports the number of +kilobytes per transaction, transactions per second, megabytes +per second and the percentage of the time the disk was busy averaged +over the refresh period of the display (by default, five seconds). +The system keeps statistics on most every storage device. +In general, up +to seven devices are displayed. +The devices displayed by default are the +first devices in the kernel's device list. +See +.Xr devstat 3 +and +.Xr devstat 9 +for details on the devstat system. .It Ic vmstat Take over the entire display and show a (rather crowded) compendium of statistics related to virtual memory usage, process scheduling,