Index: head/lib/libutil/Makefile =================================================================== --- head/lib/libutil/Makefile (revision 283623) +++ head/lib/libutil/Makefile (revision 283624) @@ -1,88 +1,90 @@ # @(#)Makefile 8.1 (Berkeley) 6/4/93 # $FreeBSD$ SHLIBDIR?= /lib .include LIB= util SHLIB_MAJOR= 9 SRCS= _secure_path.c auth.c expand_number.c flopen.c fparseln.c gr_util.c \ hexdump.c humanize_number.c kinfo_getfile.c kinfo_getfile.c \ - kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c kld.c \ + kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c \ + kinfo_getvmobject.c kld.c \ login_auth.c login_cap.c \ login_class.c login_crypt.c login_ok.c login_times.c login_tty.c \ pidfile.c property.c pty.c pw_util.c quotafile.c realhostname.c \ stub.c trimdomain.c uucplock.c INCS= libutil.h login_cap.h CFLAGS+= -DLIBC_SCCS .if ${MK_INET6_SUPPORT} != "no" CFLAGS+= -DINET6 .endif CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../libc/gen/ MAN+= expand_number.3 flopen.3 fparseln.3 hexdump.3 \ humanize_number.3 kinfo_getallproc.3 kinfo_getfile.3 \ - kinfo_getproc.3 kinfo_getvmmap.3 kld.3 login_auth.3 login_cap.3 \ + kinfo_getproc.3 kinfo_getvmmap.3 kinfo_getvmobject.3 kld.3 \ + login_auth.3 login_cap.3 \ login_class.3 login_ok.3 login_times.3 login_tty.3 pidfile.3 \ property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \ _secure_path.3 trimdomain.3 uucplock.3 pw_util.3 MAN+= login.conf.5 MLINKS+= kld.3 kld_isloaded.3 kld.3 kld_load.3 MLINKS+=login_auth.3 auth_cat.3 login_auth.3 auth_checknologin.3 MLINKS+=login_cap.3 login_close.3 login_cap.3 login_getcapbool.3 \ login_cap.3 login_getcaplist.3 login_cap.3 login_getcapnum.3 \ login_cap.3 login_getcapsize.3 login_cap.3 login_getcapstr.3 \ login_cap.3 login_getcaptime.3 login_cap.3 login_getclass.3 \ login_cap.3 login_getclassbyname.3 login_cap.3 login_getpath.3 \ login_cap.3 login_getpwclass.3 login_cap.3 login_getstyle.3 \ login_cap.3 login_getuserclass.3 login_cap.3 login_setcryptfmt.3 MLINKS+=login_class.3 setclasscontext.3 login_class.3 setclassenvironment.3 \ login_class.3 setclassresources.3 login_class.3 setusercontext.3 MLINKS+=login_ok.3 auth_hostok.3 login_ok.3 auth_timeok.3 \ login_ok.3 auth_ttyok.3 MLINKS+=login_times.3 in_lt.3 login_times.3 in_ltm.3 \ login_times.3 in_ltms.3 \ login_times.3 in_lts.3 \ login_times.3 parse_lt.3 MLINKS+=pidfile.3 pidfile_close.3 \ pidfile.3 pidfile_fileno.3 \ pidfile.3 pidfile_open.3 \ pidfile.3 pidfile_remove.3 \ pidfile.3 pidfile_write.3 MLINKS+= property.3 property_find.3 property.3 properties_free.3 MLINKS+= property.3 properties_read.3 MLINKS+= pty.3 forkpty.3 pty.3 openpty.3 MLINKS+=quotafile.3 quota_close.3 \ quotafile.3 quota_fsname.3 \ quotafile.3 quota_open.3 \ quotafile.3 quota_qfname.3 \ quotafile.3 quota_read.3 \ quotafile.3 quota_statfs.3 \ quotafile.3 quota_write_limits.3 \ quotafile.3 quota_write_usage.3 MLINKS+=uucplock.3 uu_lock.3 uucplock.3 uu_lock_txfr.3 \ uucplock.3 uu_lockerr.3 uucplock.3 uu_unlock.3 MLINKS+=pw_util.3 pw_copy.3 \ pw_util.3 pw_dup.3 \ pw_util.3 pw_edit.3 \ pw_util.3 pw_equal.3 \ pw_util.3 pw_fini.3 \ pw_util.3 pw_init.3 \ pw_util.3 pw_make.3 \ pw_util.3 pw_make_v7.3 \ pw_util.3 pw_mkdb.3 \ pw_util.3 pw_lock.3 \ pw_util.3 pw_scan.3 \ pw_util.3 pw_tempname.3 \ pw_util.3 pw_tmp.3 .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .include Index: head/lib/libutil/kinfo_getvmobject.3 =================================================================== --- head/lib/libutil/kinfo_getvmobject.3 (nonexistent) +++ head/lib/libutil/kinfo_getvmobject.3 (revision 283624) @@ -0,0 +1,74 @@ +.\" +.\" Copyright (c) 2015 John Baldwin +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 27, 2015 +.Dt KINFO_GETVMOBJECT 3 +.Os +.Sh NAME +.Nm kinfo_getvmobject +.Nd function for getting system-wide memory information +.Sh LIBRARY +.Lb libutil +.Sh SYNOPSIS +.In sys/types.h +.In sys/user.h +.In libutil.h +.Ft struct kinfo_vmobject * +.Fn kinfo_getvmobject "int *cntp" +.Sh DESCRIPTION +This function is used to obtain information about the objects using memory +in the system. +.Pp +The +.Ar cntp +argument allows the caller to know how many records are returned. +.Pp +This function is a wrapper around the +.Dq vm.objects +.Xr sysctl 3 +MIB. +While the kernel returns a packed structure, this function expands the +data into a fixed record format. +.Sh RETURN VALUES +On success the +.Fn kinfo_getvmobject +function returns a pointer to an array of +.Vt struct kinfo_vmobject +structures as defined by +.In sys/user.h . +The array is allocated by an internal call to +.Xr malloc 3 +and must be freed by the caller with a call to +.Xr free 3 . +On failure the +.Fn kinfo_getvmobject +function returns +.Dv NULL . +.Sh SEE ALSO +.Xr free 3 , +.Xr kinfo_getvmmap 3 , +.Xr malloc 3 Property changes on: head/lib/libutil/kinfo_getvmobject.3 ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libutil/kinfo_getvmobject.c =================================================================== --- head/lib/libutil/kinfo_getvmobject.c (nonexistent) +++ head/lib/libutil/kinfo_getvmobject.c (revision 283624) @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2013 Hudson River Trading LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include "libutil.h" + +struct kinfo_vmobject * +kinfo_getvmobject(int *cntp) +{ + char *buf, *bp, *ep; + struct kinfo_vmobject *kvo, *list, *kp; + size_t len; + int cnt, i; + + buf = NULL; + for (i = 0; i < 3; i++) { + if (sysctlbyname("vm.objects", NULL, &len, NULL, 0) < 0) + return (NULL); + buf = reallocf(buf, len); + if (buf == NULL) + return (NULL); + if (sysctlbyname("vm.objects", buf, &len, NULL, 0) == 0) + goto unpack; + if (errno != ENOMEM) { + free(buf); + return (NULL); + } + } + free(buf); + return (NULL); + +unpack: + /* Count items */ + cnt = 0; + bp = buf; + ep = buf + len; + while (bp < ep) { + kvo = (struct kinfo_vmobject *)(uintptr_t)bp; + bp += kvo->kvo_structsize; + cnt++; + } + + list = calloc(cnt, sizeof(*list)); + if (list == NULL) { + free(buf); + return (NULL); + } + + /* Unpack */ + bp = buf; + kp = list; + while (bp < ep) { + kvo = (struct kinfo_vmobject *)(uintptr_t)bp; + memcpy(kp, kvo, kvo->kvo_structsize); + bp += kvo->kvo_structsize; + kp->kvo_structsize = sizeof(*kp); + kp++; + } + free(buf); + *cntp = cnt; + return (list); +} Property changes on: head/lib/libutil/kinfo_getvmobject.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libutil/libutil.h =================================================================== --- head/lib/libutil/libutil.h (revision 283623) +++ head/lib/libutil/libutil.h (revision 283624) @@ -1,250 +1,252 @@ /* * Copyright (c) 1996 Peter Wemm . * All rights reserved. * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * Portions of this software were developed for the FreeBSD Project by * ThinkSec AS and NAI Labs, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 * ("CBOSS"), as part of the DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, is permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LIBUTIL_H_ #define _LIBUTIL_H_ #include #include #include #ifndef _GID_T_DECLARED typedef __gid_t gid_t; #define _GID_T_DECLARED #endif #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; #define _MODE_T_DECLARED #endif #ifndef _PID_T_DECLARED typedef __pid_t pid_t; #define _PID_T_DECLARED #endif #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _UID_T_DECLARED typedef __uid_t uid_t; #define _UID_T_DECLARED #endif #define PROPERTY_MAX_NAME 64 #define PROPERTY_MAX_VALUE 512 /* For properties.c. */ typedef struct _property { struct _property *next; char *name; char *value; } *properties; /* Avoid pulling in all the include files for no need. */ struct in_addr; struct pidfh; struct sockaddr; struct termios; struct winsize; __BEGIN_DECLS char *auth_getval(const char *_name); void clean_environment(const char * const *_white, const char * const *_more_white); int expand_number(const char *_buf, uint64_t *_num); int extattr_namespace_to_string(int _attrnamespace, char **_string); int extattr_string_to_namespace(const char *_string, int *_attrnamespace); int flopen(const char *_path, int _flags, ...); int forkpty(int *_amaster, char *_name, struct termios *_termp, struct winsize *_winp); void hexdump(const void *_ptr, int _length, const char *_hdr, int _flags); int humanize_number(char *_buf, size_t _len, int64_t _number, const char *_suffix, int _scale, int _flags); struct kinfo_file * kinfo_getfile(pid_t _pid, int *_cntp); struct kinfo_vmentry * kinfo_getvmmap(pid_t _pid, int *_cntp); +struct kinfo_vmobject * + kinfo_getvmobject(int *_cntp); struct kinfo_proc * kinfo_getallproc(int *_cntp); struct kinfo_proc * kinfo_getproc(pid_t _pid); int kld_isloaded(const char *_name); int kld_load(const char *_name); int login_tty(int _fd); int openpty(int *_amaster, int *_aslave, char *_name, struct termios *_termp, struct winsize *_winp); int pidfile_close(struct pidfh *_pfh); int pidfile_fileno(const struct pidfh *_pfh); struct pidfh * pidfile_open(const char *_path, mode_t _mode, pid_t *_pidptr); int pidfile_remove(struct pidfh *_pfh); int pidfile_write(struct pidfh *_pfh); void properties_free(properties _list); char *property_find(properties _list, const char *_name); properties properties_read(int _fd); int realhostname(char *_host, size_t _hsize, const struct in_addr *_ip); int realhostname_sa(char *_host, size_t _hsize, struct sockaddr *_addr, int _addrlen); int _secure_path(const char *_path, uid_t _uid, gid_t _gid); void trimdomain(char *_fullhost, int _hostsize); const char * uu_lockerr(int _uu_lockresult); int uu_lock(const char *_ttyname); int uu_unlock(const char *_ttyname); int uu_lock_txfr(const char *_ttyname, pid_t _pid); /* * Conditionally prototype the following functions if the include * files upon which they depend have been included. */ #ifdef _STDIO_H_ char *fparseln(FILE *_fp, size_t *_len, size_t *_lineno, const char _delim[3], int _flags); #endif #ifdef _PWD_H_ int pw_copy(int _ffd, int _tfd, const struct passwd *_pw, struct passwd *_old_pw); struct passwd *pw_dup(const struct passwd *_pw); int pw_edit(int _notsetuid); int pw_equal(const struct passwd *_pw1, const struct passwd *_pw2); void pw_fini(void); int pw_init(const char *_dir, const char *_master); char *pw_make(const struct passwd *_pw); char *pw_make_v7(const struct passwd *_pw); int pw_mkdb(const char *_user); int pw_lock(void); struct passwd * pw_scan(const char *_line, int _flags); const char * pw_tempname(void); int pw_tmp(int _mfd); #endif #ifdef _GRP_H_ int gr_copy(int __ffd, int _tfd, const struct group *_gr, struct group *_old_gr); struct group * gr_dup(const struct group *_gr); struct group * gr_add(const struct group *_gr, const char *_newmember); int gr_equal(const struct group *_gr1, const struct group *_gr2); void gr_fini(void); int gr_init(const char *_dir, const char *_master); int gr_lock(void); char *gr_make(const struct group *_gr); int gr_mkdb(void); struct group * gr_scan(const char *_line); int gr_tmp(int _mdf); #endif #ifdef _UFS_UFS_QUOTA_H_ struct fstab; struct quotafile; int quota_check_path(const struct quotafile *_qf, const char *_path); void quota_close(struct quotafile *_qf); int quota_convert(struct quotafile *_qf, int _wordsize); const char * quota_fsname(const struct quotafile *_qf); int quota_maxid(struct quotafile *_qf); int quota_off(struct quotafile *_qf); int quota_on(struct quotafile *_qf); struct quotafile * quota_open(struct fstab *_fs, int _quotatype, int _openflags); const char * quota_qfname(const struct quotafile *_qf); int quota_read(struct quotafile *_qf, struct dqblk *_dqb, int _id); int quota_write_limits(struct quotafile *_qf, struct dqblk *_dqb, int _id); int quota_write_usage(struct quotafile *_qf, struct dqblk *_dqb, int _id); #endif __END_DECLS /* fparseln(3) */ #define FPARSELN_UNESCESC 0x01 #define FPARSELN_UNESCCONT 0x02 #define FPARSELN_UNESCCOMM 0x04 #define FPARSELN_UNESCREST 0x08 #define FPARSELN_UNESCALL 0x0f /* Flags for hexdump(3). */ #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) /* Values for humanize_number(3)'s flags parameter. */ #define HN_DECIMAL 0x01 #define HN_NOSPACE 0x02 #define HN_B 0x04 #define HN_DIVISOR_1000 0x08 #define HN_IEC_PREFIXES 0x10 /* Values for humanize_number(3)'s scale parameter. */ #define HN_GETSCALE 0x10 #define HN_AUTOSCALE 0x20 /* Return values from realhostname(). */ #define HOSTNAME_FOUND 0 #define HOSTNAME_INCORRECTNAME 1 #define HOSTNAME_INVALIDADDR 2 #define HOSTNAME_INVALIDNAME 3 /* Flags for pw_scan(). */ #define PWSCAN_MASTER 0x01 #define PWSCAN_WARN 0x02 /* Return values from uu_lock(). */ #define UU_LOCK_INUSE 1 #define UU_LOCK_OK 0 #define UU_LOCK_OPEN_ERR (-1) #define UU_LOCK_READ_ERR (-2) #define UU_LOCK_CREAT_ERR (-3) #define UU_LOCK_WRITE_ERR (-4) #define UU_LOCK_LINK_ERR (-5) #define UU_LOCK_TRY_ERR (-6) #define UU_LOCK_OWNER_ERR (-7) #endif /* !_LIBUTIL_H_ */ Index: head/sys/sys/user.h =================================================================== --- head/sys/sys/user.h (revision 283623) +++ head/sys/sys/user.h (revision 283624) @@ -1,540 +1,561 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)user.h 8.2 (Berkeley) 9/23/93 * $FreeBSD$ */ #ifndef _SYS_USER_H_ #define _SYS_USER_H_ #include #ifndef _KERNEL /* stuff that *used* to be included by user.h, or is now needed */ #include #include #include #include #include #include #include #include #include #include /* XXX */ #include /* XXX */ #include /* XXX */ #include /* XXX */ #endif /* !_KERNEL */ #ifndef _SYS_RESOURCEVAR_H_ #include #endif #ifndef _SYS_SIGNALVAR_H_ #include #endif #ifndef _SYS_SOCKET_VAR_H_ #include #endif #include /* * KERN_PROC subtype ops return arrays of selected proc structure entries: * * This struct includes several arrays of spare space, with different arrays * for different standard C-types. When adding new variables to this struct, * the space for byte-aligned data should be taken from the ki_sparestring, * pointers from ki_spareptrs, word-aligned data from ki_spareints, and * doubleword-aligned data from ki_sparelongs. Make sure the space for new * variables come from the array which matches the size and alignment of * those variables on ALL hardware platforms, and then adjust the appropriate * KI_NSPARE_* value(s) to match. * * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all * platforms after you have added new variables. Note that if you change * the value of KINFO_PROC_SIZE, then many userland programs will stop * working until they are recompiled! * * Once you have added the new field, you will need to add code to initialize * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and * function kvm_proclist in lib/libkvm/kvm_proc.c . */ #define KI_NSPARE_INT 4 #define KI_NSPARE_LONG 12 #define KI_NSPARE_PTR 6 #ifndef _KERNEL #ifndef KINFO_PROC_SIZE #error "Unknown architecture" #endif #endif /* !_KERNEL */ #define WMESGLEN 8 /* size of returned wchan message */ #define LOCKNAMELEN 8 /* size of returned lock name */ #define TDNAMLEN 16 /* size of returned thread name */ #define COMMLEN 19 /* size of returned ki_comm name */ #define KI_EMULNAMELEN 16 /* size of returned ki_emul */ #define KI_NGROUPS 16 /* number of groups in ki_groups */ #define LOGNAMELEN 17 /* size of returned ki_login */ #define LOGINCLASSLEN 17 /* size of returned ki_loginclass */ #ifndef BURN_BRIDGES #define OCOMMLEN TDNAMLEN #define ki_ocomm ki_tdname #endif /* Flags for the process credential. */ #define KI_CRF_CAPABILITY_MODE 0x00000001 /* * Steal a bit from ki_cr_flags to indicate that the cred had more than * KI_NGROUPS groups. */ #define KI_CRF_GRP_OVERFLOW 0x80000000 struct kinfo_proc { int ki_structsize; /* size of this structure */ int ki_layout; /* reserved: layout identifier */ struct pargs *ki_args; /* address of command arguments */ struct proc *ki_paddr; /* address of proc */ struct user *ki_addr; /* kernel virtual addr of u-area */ struct vnode *ki_tracep; /* pointer to trace file */ struct vnode *ki_textvp; /* pointer to executable file */ struct filedesc *ki_fd; /* pointer to open file info */ struct vmspace *ki_vmspace; /* pointer to kernel vmspace struct */ void *ki_wchan; /* sleep address */ pid_t ki_pid; /* Process identifier */ pid_t ki_ppid; /* parent process id */ pid_t ki_pgid; /* process group id */ pid_t ki_tpgid; /* tty process group id */ pid_t ki_sid; /* Process session ID */ pid_t ki_tsid; /* Terminal session ID */ short ki_jobc; /* job control counter */ short ki_spare_short1; /* unused (just here for alignment) */ dev_t ki_tdev; /* controlling tty dev */ sigset_t ki_siglist; /* Signals arrived but not delivered */ sigset_t ki_sigmask; /* Current signal mask */ sigset_t ki_sigignore; /* Signals being ignored */ sigset_t ki_sigcatch; /* Signals being caught by user */ uid_t ki_uid; /* effective user id */ uid_t ki_ruid; /* Real user id */ uid_t ki_svuid; /* Saved effective user id */ gid_t ki_rgid; /* Real group id */ gid_t ki_svgid; /* Saved effective group id */ short ki_ngroups; /* number of groups */ short ki_spare_short2; /* unused (just here for alignment) */ gid_t ki_groups[KI_NGROUPS]; /* groups */ vm_size_t ki_size; /* virtual size */ segsz_t ki_rssize; /* current resident set size in pages */ segsz_t ki_swrss; /* resident set size before last swap */ segsz_t ki_tsize; /* text size (pages) XXX */ segsz_t ki_dsize; /* data size (pages) XXX */ segsz_t ki_ssize; /* stack size (pages) */ u_short ki_xstat; /* Exit status for wait & stop signal */ u_short ki_acflag; /* Accounting flags */ fixpt_t ki_pctcpu; /* %cpu for process during ki_swtime */ u_int ki_estcpu; /* Time averaged value of ki_cpticks */ u_int ki_slptime; /* Time since last blocked */ u_int ki_swtime; /* Time swapped in or out */ u_int ki_cow; /* number of copy-on-write faults */ u_int64_t ki_runtime; /* Real time in microsec */ struct timeval ki_start; /* starting time */ struct timeval ki_childtime; /* time used by process children */ long ki_flag; /* P_* flags */ long ki_kiflag; /* KI_* flags (below) */ int ki_traceflag; /* Kernel trace points */ char ki_stat; /* S* process status */ signed char ki_nice; /* Process "nice" value */ char ki_lock; /* Process lock (prevent swap) count */ char ki_rqindex; /* Run queue index */ u_char ki_oncpu_old; /* Which cpu we are on (legacy) */ u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */ char ki_tdname[TDNAMLEN+1]; /* thread name */ char ki_wmesg[WMESGLEN+1]; /* wchan message */ char ki_login[LOGNAMELEN+1]; /* setlogin name */ char ki_lockname[LOCKNAMELEN+1]; /* lock name */ char ki_comm[COMMLEN+1]; /* command name */ char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */ char ki_loginclass[LOGINCLASSLEN+1]; /* login class */ /* * When adding new variables, take space for char-strings from the * front of ki_sparestrings, and ints from the end of ki_spareints. * That way the spare room from both arrays will remain contiguous. */ char ki_sparestrings[50]; /* spare string space */ int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */ int ki_oncpu; /* Which cpu we are on */ int ki_lastcpu; /* Last cpu we were on */ int ki_tracer; /* Pid of tracing process */ int ki_flag2; /* P2_* flags */ int ki_fibnum; /* Default FIB number */ u_int ki_cr_flags; /* Credential flags */ int ki_jid; /* Process jail ID */ int ki_numthreads; /* XXXKSE number of threads in total */ lwpid_t ki_tid; /* XXXKSE thread id */ struct priority ki_pri; /* process priority */ struct rusage ki_rusage; /* process rusage statistics */ /* XXX - most fields in ki_rusage_ch are not (yet) filled in */ struct rusage ki_rusage_ch; /* rusage of children processes */ struct pcb *ki_pcb; /* kernel virtual addr of pcb */ void *ki_kstack; /* kernel virtual addr of stack */ void *ki_udata; /* User convenience pointer */ struct thread *ki_tdaddr; /* address of thread */ /* * When adding new variables, take space for pointers from the * front of ki_spareptrs, and longs from the end of ki_sparelongs. * That way the spare room from both arrays will remain contiguous. */ void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */ long ki_sflag; /* PS_* flags */ long ki_tdflags; /* XXXKSE kthread flag */ }; void fill_kinfo_proc(struct proc *, struct kinfo_proc *); /* XXX - the following two defines are temporary */ #define ki_childstime ki_rusage_ch.ru_stime #define ki_childutime ki_rusage_ch.ru_utime /* * Legacy PS_ flag. This moved to p_flag but is maintained for * compatibility. */ #define PS_INMEM 0x00001 /* Loaded into memory. */ /* ki_sessflag values */ #define KI_CTTY 0x00000001 /* controlling tty vnode active */ #define KI_SLEADER 0x00000002 /* session leader */ #define KI_LOCKBLOCK 0x00000004 /* proc blocked on lock ki_lockname */ /* * This used to be the per-process structure containing data that * isn't needed in core when the process is swapped out, but now it * remains only for the benefit of a.out core dumps. */ struct user { struct pstats u_stats; /* *p_stats */ struct kinfo_proc u_kproc; /* eproc */ }; /* * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor * array of another process. */ #define KF_ATTR_VALID 0x0001 #define KF_TYPE_NONE 0 #define KF_TYPE_VNODE 1 #define KF_TYPE_SOCKET 2 #define KF_TYPE_PIPE 3 #define KF_TYPE_FIFO 4 #define KF_TYPE_KQUEUE 5 #define KF_TYPE_CRYPTO 6 #define KF_TYPE_MQUEUE 7 #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 #define KF_TYPE_PROCDESC 11 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 #define KF_VTYPE_VREG 1 #define KF_VTYPE_VDIR 2 #define KF_VTYPE_VBLK 3 #define KF_VTYPE_VCHR 4 #define KF_VTYPE_VLNK 5 #define KF_VTYPE_VSOCK 6 #define KF_VTYPE_VFIFO 7 #define KF_VTYPE_VBAD 8 #define KF_VTYPE_UNKNOWN 255 #define KF_FD_TYPE_CWD -1 /* Current working directory */ #define KF_FD_TYPE_ROOT -2 /* Root directory */ #define KF_FD_TYPE_JAIL -3 /* Jail directory */ #define KF_FD_TYPE_TRACE -4 /* Ktrace vnode */ #define KF_FD_TYPE_TEXT -5 /* Text vnode */ #define KF_FD_TYPE_CTTY -6 /* Controlling terminal */ #define KF_FLAG_READ 0x00000001 #define KF_FLAG_WRITE 0x00000002 #define KF_FLAG_APPEND 0x00000004 #define KF_FLAG_ASYNC 0x00000008 #define KF_FLAG_FSYNC 0x00000010 #define KF_FLAG_NONBLOCK 0x00000020 #define KF_FLAG_DIRECT 0x00000040 #define KF_FLAG_HASLOCK 0x00000080 #define KF_FLAG_SHLOCK 0x00000100 #define KF_FLAG_EXLOCK 0x00000200 #define KF_FLAG_NOFOLLOW 0x00000400 #define KF_FLAG_CREAT 0x00000800 #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 /* * Old format. Has variable hidden padding due to alignment. * This is a compatability hack for pre-build 7.1 packages. */ #if defined(__amd64__) #define KINFO_OFILE_SIZE 1328 #endif #if defined(__i386__) #define KINFO_OFILE_SIZE 1324 #endif struct kinfo_ofile { int kf_structsize; /* Size of kinfo_file. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ /* XXX Hidden alignment padding here on amd64 */ off_t kf_offset; /* Seek location. */ int kf_vnode_type; /* Vnode type. */ int kf_sock_domain; /* Socket domain. */ int kf_sock_type; /* Socket type. */ int kf_sock_protocol; /* Socket protocol. */ char kf_path[PATH_MAX]; /* Path to file, if any. */ struct sockaddr_storage kf_sa_local; /* Socket address. */ struct sockaddr_storage kf_sa_peer; /* Peer address. */ }; #if defined(__amd64__) || defined(__i386__) /* * This size should never be changed. If you really need to, you must provide * backward ABI compatibility by allocating a new sysctl MIB that will return * the new structure. The current structure has to be returned by the current * sysctl MIB. See how it is done for the kinfo_ofile structure. */ #define KINFO_FILE_SIZE 1392 #endif struct kinfo_file { int kf_structsize; /* Variable size of record. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ int kf_pad0; /* Round to 64 bit alignment. */ int64_t kf_offset; /* Seek location. */ int kf_vnode_type; /* Vnode type. */ int kf_sock_domain; /* Socket domain. */ int kf_sock_type; /* Socket type. */ int kf_sock_protocol; /* Socket protocol. */ struct sockaddr_storage kf_sa_local; /* Socket address. */ struct sockaddr_storage kf_sa_peer; /* Peer address. */ union { struct { /* Address of so_pcb. */ uint64_t kf_sock_pcb; /* Address of inp_ppcb. */ uint64_t kf_sock_inpcb; /* Address of unp_conn. */ uint64_t kf_sock_unpconn; /* Send buffer state. */ uint16_t kf_sock_snd_sb_state; /* Receive buffer state. */ uint16_t kf_sock_rcv_sb_state; /* Round to 64 bit alignment. */ uint32_t kf_sock_pad0; } kf_sock; struct { /* Global file id. */ uint64_t kf_file_fileid; /* File size. */ uint64_t kf_file_size; /* Vnode filesystem id. */ uint32_t kf_file_fsid; /* File device. */ uint32_t kf_file_rdev; /* File mode. */ uint16_t kf_file_mode; /* Round to 64 bit alignment. */ uint16_t kf_file_pad0; uint32_t kf_file_pad1; } kf_file; struct { uint32_t kf_sem_value; uint16_t kf_sem_mode; } kf_sem; struct { uint64_t kf_pipe_addr; uint64_t kf_pipe_peer; uint32_t kf_pipe_buffer_cnt; /* Round to 64 bit alignment. */ uint32_t kf_pipe_pad0[3]; } kf_pipe; struct { uint32_t kf_pts_dev; /* Round to 64 bit alignment. */ uint32_t kf_pts_pad0[7]; } kf_pts; struct { pid_t kf_pid; } kf_proc; } kf_un; uint16_t kf_status; /* Status flags. */ uint16_t kf_pad1; /* Round to 32 bit alignment. */ int _kf_ispare0; /* Space for more stuff. */ cap_rights_t kf_cap_rights; /* Capability rights. */ uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */ /* Truncated before copyout in sysctl */ char kf_path[PATH_MAX]; /* Path to file, if any. */ }; /* * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of * another process as a series of entries. */ #define KVME_TYPE_NONE 0 #define KVME_TYPE_DEFAULT 1 #define KVME_TYPE_VNODE 2 #define KVME_TYPE_SWAP 3 #define KVME_TYPE_DEVICE 4 #define KVME_TYPE_PHYS 5 #define KVME_TYPE_DEAD 6 #define KVME_TYPE_SG 7 #define KVME_TYPE_MGTDEVICE 8 #define KVME_TYPE_UNKNOWN 255 #define KVME_PROT_READ 0x00000001 #define KVME_PROT_WRITE 0x00000002 #define KVME_PROT_EXEC 0x00000004 #define KVME_FLAG_COW 0x00000001 #define KVME_FLAG_NEEDS_COPY 0x00000002 #define KVME_FLAG_NOCOREDUMP 0x00000004 #define KVME_FLAG_SUPER 0x00000008 #define KVME_FLAG_GROWS_UP 0x00000010 #define KVME_FLAG_GROWS_DOWN 0x00000020 #if defined(__amd64__) #define KINFO_OVMENTRY_SIZE 1168 #endif #if defined(__i386__) #define KINFO_OVMENTRY_SIZE 1128 #endif struct kinfo_ovmentry { int kve_structsize; /* Size of kinfo_vmmapentry. */ int kve_type; /* Type of map entry. */ void *kve_start; /* Starting address. */ void *kve_end; /* Finishing address. */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ void *_kve_pspare[8]; /* Space for more stuff. */ off_t kve_offset; /* Mapping offset in object */ uint64_t kve_fileid; /* inode number if vnode */ dev_t kve_fsid; /* dev_t of vnode location */ int _kve_ispare[3]; /* Space for more stuff. */ }; #if defined(__amd64__) || defined(__i386__) #define KINFO_VMENTRY_SIZE 1160 #endif struct kinfo_vmentry { int kve_structsize; /* Variable size of record. */ int kve_type; /* Type of map entry. */ uint64_t kve_start; /* Starting address. */ uint64_t kve_end; /* Finishing address. */ uint64_t kve_offset; /* Mapping offset in object */ uint64_t kve_vn_fileid; /* inode number if vnode */ uint32_t kve_vn_fsid; /* dev_t of vnode location */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ int kve_vn_type; /* Vnode type. */ uint64_t kve_vn_size; /* File size. */ uint32_t kve_vn_rdev; /* Device id if device. */ uint16_t kve_vn_mode; /* File mode. */ uint16_t kve_status; /* Status flags. */ int _kve_ispare[12]; /* Space for more stuff. */ /* Truncated before copyout in sysctl */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ }; /* + * The "vm.objects" sysctl provides a list of all VM objects in the system + * via an array of these entries. + */ +struct kinfo_vmobject { + int kvo_structsize; /* Variable size of record. */ + int kvo_type; /* Object type: KVME_TYPE_*. */ + uint64_t kvo_size; /* Object size in pages. */ + uint64_t kvo_vn_fileid; /* inode number if vnode. */ + uint32_t kvo_vn_fsid; /* dev_t of vnode location. */ + int kvo_ref_count; /* Reference count. */ + int kvo_shadow_count; /* Shadow count. */ + int kvo_memattr; /* Memory attribute. */ + uint64_t kvo_resident; /* Number of resident pages. */ + uint64_t kvo_active; /* Number of active pages. */ + uint64_t kvo_inactive; /* Number of inactive pages. */ + uint64_t _kvo_qspare[8]; + uint32_t _kvo_ispare[8]; + char kvo_path[PATH_MAX]; /* Pathname, if any. */ +}; + +/* * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of * another process as a series of entries. Each stack is represented by a * series of symbol names and offsets as generated by stack_sbuf_print(9). */ #define KKST_MAXLEN 1024 #define KKST_STATE_STACKOK 0 /* Stack is valid. */ #define KKST_STATE_SWAPPED 1 /* Stack swapped out. */ #define KKST_STATE_RUNNING 2 /* Stack ephemeral. */ #if defined(__amd64__) || defined(__i386__) #define KINFO_KSTACK_SIZE 1096 #endif struct kinfo_kstack { lwpid_t kkst_tid; /* ID of thread. */ int kkst_state; /* Validity of stack. */ char kkst_trace[KKST_MAXLEN]; /* String representing stack. */ int _kkst_ispare[16]; /* Space for more stuff. */ }; struct kinfo_sigtramp { void *ksigtramp_start; void *ksigtramp_end; void *ksigtramp_spare[4]; }; #ifdef _KERNEL /* Flags for kern_proc_out function. */ #define KERN_PROC_NOTHREADS 0x1 #define KERN_PROC_MASK32 0x2 struct sbuf; /* * The kern_proc out functions are helper functions to dump process * miscellaneous kinfo structures to sbuf. The main consumers are KERN_PROC * sysctls but they may also be used by other kernel subsystems. * * The functions manipulate the process locking state and expect the process * to be locked on enter. On return the process is unlocked. */ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); int kern_proc_out(struct proc *p, struct sbuf *sb, int flags); int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb); int vntype_to_kinfo(int vtype); #endif /* !_KERNEL */ #endif Index: head/sys/vm/vm_object.c =================================================================== --- head/sys/vm/vm_object.c (revision 283623) +++ head/sys/vm/vm_object.c (revision 283624) @@ -1,2522 +1,2659 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory object module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include /* for curproc, pageproc */ #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags); static void vm_object_qcollapse(vm_object_t object); static void vm_object_vndeallocate(vm_object_t object); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; struct vm_object kmem_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); static long object_collapses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, &object_collapses, 0, "VM object collapses"); static long object_bypasses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, 0, "VM object bypasses"); static uma_zone_t obj_zone; static int vm_object_zinit(void *mem, int size, int flags); #ifdef INVARIANTS static void vm_object_zdtor(void *mem, int size, void *arg); static void vm_object_zdtor(void *mem, int size, void *arg) { vm_object_t object; object = (vm_object_t)mem; KASSERT(object->ref_count == 0, ("object %p ref_count = %d", object, object->ref_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages in its memq", object)); KASSERT(vm_radix_is_empty(&object->rtree), ("object %p has resident pages in its trie", object)); #if VM_NRESERVLEVEL > 0 KASSERT(LIST_EMPTY(&object->rvq), ("object %p has reservations", object)); #endif KASSERT(vm_object_cache_is_empty(object), ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); KASSERT(object->resident_page_count == 0, ("object %p resident_page_count = %d", object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); KASSERT(object->type == OBJT_DEAD, ("object %p has non-dead type %d", object, object->type)); } #endif static int vm_object_zinit(void *mem, int size, int flags) { vm_object_t object; object = (vm_object_t)mem; rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW); /* These are true for any object that has been freed */ object->type = OBJT_DEAD; object->ref_count = 0; object->rtree.rt_root = 0; object->rtree.rt_flags = 0; object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; object->cache.rt_root = 0; object->cache.rt_flags = 0; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); return (0); } static void _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->type = type; switch (type) { case OBJT_DEAD: panic("_vm_object_allocate: can't create OBJT_DEAD"); case OBJT_DEFAULT: case OBJT_SWAP: object->flags = OBJ_ONEMAPPING; break; case OBJT_DEVICE: case OBJT_SG: object->flags = OBJ_FICTITIOUS | OBJ_UNMANAGED; break; case OBJT_MGTDEVICE: object->flags = OBJ_FICTITIOUS; break; case OBJT_PHYS: object->flags = OBJ_UNMANAGED; break; case OBJT_VNODE: object->flags = 0; break; default: panic("_vm_object_allocate: type %d is undefined", type); } object->size = size; object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; object->cred = NULL; object->charge = 0; object->handle = NULL; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init(void) { TAILQ_INIT(&vm_object_list); mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); rw_init(&kernel_object->lock, "kernel vm object"); _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kernel_object); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif rw_init(&kmem_object->lock, "kmem vm object"); _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); #if VM_NRESERVLEVEL > 0 kmem_object->flags |= OBJ_COLORED; kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object * without holding any references to it. */ obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, #ifdef INVARIANTS vm_object_zdtor, #else NULL, #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); vm_radix_init(); } void vm_object_clear_flag(vm_object_t object, u_short bits) { VM_OBJECT_ASSERT_WLOCKED(object); object->flags &= ~bits; } /* * Sets the default memory attribute for the specified object. Pages * that are allocated to this object are by default assigned this memory * attribute. * * Presently, this function must be called before any pages are allocated * to the object. In the future, this requirement may be relaxed for * "default" and "swap" objects. */ int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) { VM_OBJECT_ASSERT_WLOCKED(object); switch (object->type) { case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_MGTDEVICE: case OBJT_PHYS: case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) return (KERN_FAILURE); break; case OBJT_DEAD: return (KERN_INVALID_ARGUMENT); default: panic("vm_object_set_memattr: object %p is of undefined type", object); } object->memattr = memattr; return (KERN_SUCCESS); } void vm_object_pip_add(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress += i; } void vm_object_pip_subtract(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress -= i; } void vm_object_pip_wakeup(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wakeupn(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); if (i) object->paging_in_progress -= i; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wait(vm_object_t object, char *waitid) { VM_OBJECT_ASSERT_WLOCKED(object); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PVM, waitid, 0); } } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(objtype_t type, vm_pindex_t size) { vm_object_t object; object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(type, size, object); return (object); } /* * vm_object_reference: * * Gets another reference to the given object. Note: OBJ_DEAD * objects can be referenced during final cleaning. */ void vm_object_reference(vm_object_t object) { if (object == NULL) return; VM_OBJECT_WLOCK(object); vm_object_reference_locked(object); VM_OBJECT_WUNLOCK(object); } /* * vm_object_reference_locked: * * Gets another reference to the given object. * * The object must be locked. */ void vm_object_reference_locked(vm_object_t object) { struct vnode *vp; VM_OBJECT_ASSERT_WLOCKED(object); object->ref_count++; if (object->type == OBJT_VNODE) { vp = object->handle; vref(vp); } } /* * Handle deallocating an object of type OBJT_VNODE. */ static void vm_object_vndeallocate(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { vprint("vm_object_vndeallocate", vp); panic("vm_object_vndeallocate: bad object reference count"); } #endif /* * The test for text of vp vnode does not need a bypass to * reach right VV_TEXT there, since it is obtained from * object->handle. */ if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) { object->ref_count--; VM_OBJECT_WUNLOCK(object); /* vrele may need the vnode lock. */ vrele(vp); } else { vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vdrop(vp); VM_OBJECT_WLOCK(object); object->ref_count--; if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); } else { if (object->ref_count == 0) VOP_UNSET_TEXT(vp); VM_OBJECT_WUNLOCK(object); vput(vp); } } } /* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(vm_object_t object) { vm_object_t temp; struct vnode *vp; while (object != NULL) { VM_OBJECT_WLOCK(object); if (object->type == OBJT_VNODE) { vm_object_vndeallocate(object); return; } KASSERT(object->ref_count != 0, ("vm_object_deallocate: object deallocated too many times: %d", object->type)); /* * If the reference count goes to 0 we start calling * vm_object_terminate() on the object chain. * A ref count of 1 may be a special case depending on the * shadow count being 0 or 1. */ object->ref_count--; if (object->ref_count > 1) { VM_OBJECT_WUNLOCK(object); return; } else if (object->ref_count == 1) { if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) { vp = object->un_pager.swp.swp_tmpfs; vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD || object->ref_count != 1) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); vdrop(vp); return; } if ((object->flags & OBJ_TMPFS) != 0) VOP_UNSET_TEXT(vp); VOP_UNLOCK(vp, 0); vdrop(vp); } if (object->shadow_count == 0 && object->handle == NULL && (object->type == OBJT_DEFAULT || (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS_NODE) == 0))) { vm_object_set_flag(object, OBJ_ONEMAPPING); } else if ((object->shadow_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; robject = LIST_FIRST(&object->shadow_head); KASSERT(robject != NULL, ("vm_object_deallocate: ref_count: %d, shadow_count: %d", object->ref_count, object->shadow_count)); KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object %p", object)); if (!VM_OBJECT_TRYWLOCK(robject)) { /* * Avoid a potential deadlock. */ object->ref_count++; VM_OBJECT_WUNLOCK(object); /* * More likely than not the thread * holding robject's lock has lower * priority than the current thread. * Let the lower priority thread run. */ pause("vmo_de", 1); continue; } /* * Collapse object into its shadow unless its * shadow is dead. In that case, object will * be deallocated by the thread that is * deallocating its shadow. */ if ((robject->flags & OBJ_DEAD) == 0 && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { robject->ref_count++; retry: if (robject->paging_in_progress) { VM_OBJECT_WUNLOCK(object); vm_object_pip_wait(robject, "objde1"); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else if (object->paging_in_progress) { VM_OBJECT_WUNLOCK(robject); object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PDROP | PVM, "objde2", 0); VM_OBJECT_WLOCK(robject); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else VM_OBJECT_WUNLOCK(object); if (robject->ref_count == 1) { robject->ref_count--; object = robject; goto doterm; } object = robject; vm_object_collapse(object); VM_OBJECT_WUNLOCK(object); continue; } VM_OBJECT_WUNLOCK(robject); } VM_OBJECT_WUNLOCK(object); return; } doterm: temp = object->backing_object; if (temp != NULL) { KASSERT((object->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object 2 %p", object)); VM_OBJECT_WLOCK(temp); LIST_REMOVE(object, shadow_list); temp->shadow_count--; VM_OBJECT_WUNLOCK(temp); object->backing_object = NULL; } /* * Don't double-terminate, we could be in a termination * recursion due to the terminate having to sync data * to disk. */ if ((object->flags & OBJ_DEAD) == 0) vm_object_terminate(object); else VM_OBJECT_WUNLOCK(object); object = temp; } } /* * vm_object_destroy removes the object from the global object list * and frees the space for the object. */ void vm_object_destroy(vm_object_t object) { /* * Release the allocation charge. */ if (object->cred != NULL) { swap_release_by_cred(object->charge, object->cred); object->charge = 0; crfree(object->cred); object->cred = NULL; } /* * Free the space for the object. */ uma_zfree(obj_zone, object); } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. * This routine may block. */ void vm_object_terminate(vm_object_t object) { vm_page_t p, p_next; VM_OBJECT_ASSERT_WLOCKED(object); /* * Make sure no one uses us. */ vm_object_set_flag(object, OBJ_DEAD); /* * wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); KASSERT(!object->paging_in_progress, ("vm_object_terminate: pageout in progress")); /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { struct vnode *vp = (struct vnode *)object->handle; /* * Clean pages and flush buffers. */ vm_object_page_clean(object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(object); vinvalbuf(vp, V_SAVE, 0, 0); VM_OBJECT_WLOCK(object); } KASSERT(object->ref_count == 0, ("vm_object_terminate: object with references, ref_count=%d", object->ref_count)); /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting * its "object" field. Specifically, if the page is not * wired, then the effect of this assignment is that * vm_page_free()'s call to vm_page_remove() will return * immediately without modifying the page or the object. */ p->object = NULL; if (p->wire_count == 0) { vm_page_free(p); PCPU_INC(cnt.v_pfree); } vm_page_unlock(p); } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { vm_radix_reclaim_allnodes(&object->rtree); TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif if (__predict_false(!vm_object_cache_is_empty(object))) vm_page_cache_free(object, 0, 0); KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT || object->type == OBJT_SWAP, ("%s: non-swap obj %p has cred", __func__, object)); /* * Let the pager know object is dead. */ vm_pager_deallocate(object); VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); } /* * Make the page read-only so that we can clear the object flags. However, if * this is a nosync mmap then the object is likely to stay dirty so do not * mess with the page and do not clear the object flags. Returns TRUE if the * page should be flushed, and FALSE otherwise. */ static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags) { /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were not * cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) { *clearobjflags = FALSE; return (FALSE); } else { pmap_remove_write(p); return (p->dirty != 0); } } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. Leaves page * on whatever queue it is currently on. If NOSYNC is set then do not * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC), * leaving the object dirty. * * When stuffing pages asynchronously, allow clustering. XXX we need a * synchronous clustering mode implementation. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. * * Returns FALSE if some page from the range was not written, as * reported by the pager, and TRUE otherwise. */ boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags) { vm_page_t np, p; vm_pindex_t pi, tend, tstart; int curgeneration, n, pagerflags; boolean_t clearobjflags, eio, res; VM_OBJECT_ASSERT_WLOCKED(object); /* * The OBJ_MIGHTBEDIRTY flag is only set for OBJT_VNODE * objects. The check below prevents the function from * operating on non-vnode objects. */ if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || object->resident_page_count == 0) return (TRUE); pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; tstart = OFF_TO_IDX(start); tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); clearobjflags = tstart == 0 && tend >= object->size; res = TRUE; rescan: curgeneration = object->generation; for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; if (vm_page_sleep_if_busy(p, "vpcwai")) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } np = vm_page_find_least(object, pi); continue; } if (!vm_object_page_remove_write(p, flags, &clearobjflags)) continue; n = vm_object_page_collect_flush(object, p, pagerflags, flags, &clearobjflags, &eio); if (eio) { res = FALSE; clearobjflags = FALSE; } if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } /* * If the VOP_PUTPAGES() did a truncated write, so * that even the first page of the run is not fully * written, vm_pageout_flush() returns 0 as the run * length. Since the condition that caused truncated * write may be permanent, e.g. exhausted free space, * accepting n == 0 would cause an infinite loop. * * Forwarding the iterator leaves the unwritten page * behind, but there is not much we can do there if * filesystem refuses to write it. */ if (n == 0) { n = 1; clearobjflags = FALSE; } np = vm_page_find_least(object, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif if (clearobjflags) vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY); return (res); } static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio) { vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; vm_page_lock_assert(p, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(object); count = 1; mreq = 0; for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; } for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; p_first = tp; mreq++; } for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) ma[i] = tp; vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio); return (runlen); } /* * Note that there is absolutely no sense in writing out * anonymous objects, so we track down the vnode object * to write out. * We invalidate (remove) all pages from the address space * for semantic correctness. * * If the backing object is a device object with unmanaged pages, then any * mappings to the specified range of pages must be removed before this * function is called. * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ boolean_t vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, boolean_t syncio, boolean_t invalidate) { vm_object_t backing_object; struct vnode *vp; struct mount *mp; int error, flags, fsync_after; boolean_t res; if (object == NULL) return (TRUE); res = TRUE; error = 0; VM_OBJECT_WLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_WLOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_WUNLOCK(object); object = backing_object; if (object->size < OFF_TO_IDX(offset + size)) size = IDX_TO_OFF(object->size) - offset; } /* * Flush pages if writing is allowed, invalidate them * if invalidation requested. Pages undergoing I/O * will be ignored by vm_object_page_remove(). * * We cannot lock the vnode and then wait for paging * to complete without deadlocking against vm_fault. * Instead we simply call vm_object_page_remove() and * allow it to block internally on a page-by-page * basis when it encounters pages undergoing async * I/O. */ if (object->type == OBJT_VNODE && (object->flags & OBJ_MIGHTBEDIRTY) != 0) { vp = object->handle; VM_OBJECT_WUNLOCK(object); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (syncio && !invalidate && offset == 0 && OFF_TO_IDX(size) == object->size) { /* * If syncing the whole mapping of the file, * it is faster to schedule all the writes in * async mode, also allowing the clustering, * and then wait for i/o to complete. */ flags = 0; fsync_after = TRUE; } else { flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0; fsync_after = FALSE; } VM_OBJECT_WLOCK(object); res = vm_object_page_clean(object, offset, offset + size, flags); VM_OBJECT_WUNLOCK(object); if (fsync_after) error = VOP_FSYNC(vp, MNT_WAIT, curthread); VOP_UNLOCK(vp, 0); vn_finished_write(mp); if (error != 0) res = FALSE; VM_OBJECT_WLOCK(object); } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { if (object->type == OBJT_DEVICE) /* * The option OBJPR_NOTMAPPED must be passed here * because vm_object_page_remove() cannot remove * unmanaged mappings. */ flags = OBJPR_NOTMAPPED; else if (old_msync) flags = OBJPR_NOTWIRED; else flags = OBJPR_CLEANONLY | OBJPR_NOTWIRED; vm_object_page_remove(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), flags); } VM_OBJECT_WUNLOCK(object); return (res); } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) * * Activate the specified pages if they are resident. * * MADV_DONTNEED (any object) * * Deactivate the specified pages if they are resident. * * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, * OBJ_ONEMAPPING only) * * Deactivate and clean the specified pages if they are * resident. This permits the process to reuse the pages * without faulting or the kernel to reclaim the pages * without I/O. */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, int advise) { vm_pindex_t tpindex; vm_object_t backing_object, tobject; vm_page_t m; if (object == NULL) return; VM_OBJECT_WLOCK(object); /* * Locate and adjust resident pages */ for (; pindex < end; pindex += 1) { relookup: tobject = object; tpindex = pindex; shadowlookup: /* * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages * and those pages must be OBJ_ONEMAPPING. */ if (advise == MADV_FREE) { if ((tobject->type != OBJT_DEFAULT && tobject->type != OBJT_SWAP) || (tobject->flags & OBJ_ONEMAPPING) == 0) { goto unlock_tobject; } } else if ((tobject->flags & OBJ_UNMANAGED) != 0) goto unlock_tobject; m = vm_page_lookup(tobject, tpindex); if (m == NULL && advise == MADV_WILLNEED) { /* * If the page is cached, reactivate it. */ m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); } if (m == NULL) { /* * There may be swap even if there is no backing page */ if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); /* * next object */ backing_object = tobject->backing_object; if (backing_object == NULL) goto unlock_tobject; VM_OBJECT_WLOCK(backing_object); tpindex += OFF_TO_IDX(tobject->backing_object_offset); if (tobject != object) VM_OBJECT_WUNLOCK(tobject); tobject = backing_object; goto shadowlookup; } else if (m->valid != VM_PAGE_BITS_ALL) goto unlock_tobject; /* * If the page is not in a normal state, skip it. */ vm_page_lock(m); if (m->hold_count != 0 || m->wire_count != 0) { vm_page_unlock(m); goto unlock_tobject; } KASSERT((m->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", m)); if (vm_page_busied(m)) { if (advise == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); } if (object != tobject) VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(tobject); vm_page_busy_sleep(m, "madvpo"); VM_OBJECT_WLOCK(object); goto relookup; } if (advise == MADV_WILLNEED) { vm_page_activate(m); } else { vm_page_advise(m, advise); } vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: if (tobject != object) VM_OBJECT_WUNLOCK(tobject); } VM_OBJECT_WUNLOCK(object); } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow( vm_object_t *object, /* IN/OUT */ vm_ooffset_t *offset, /* IN/OUT */ vm_size_t length) { vm_object_t source; vm_object_t result; source = *object; /* * Don't create the new object if the old object isn't shared. */ if (source != NULL) { VM_OBJECT_WLOCK(source); if (source->ref_count == 1 && source->handle == NULL && (source->type == OBJT_DEFAULT || source->type == OBJT_SWAP)) { VM_OBJECT_WUNLOCK(source); return; } VM_OBJECT_WUNLOCK(source); } /* * Allocate a new object with the given length. */ result = vm_object_allocate(OBJT_DEFAULT, atop(length)); /* * The new object shadows the source object, adding a reference to it. * Our caller changes his reference to point to the new object, * removing a reference to the source object. Net result: no change * of reference count. * * Try to optimize the result object's page color when shadowing * in order to maintain page coloring consistency in the combined * shadowed object. */ result->backing_object = source; /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; if (source != NULL) { VM_OBJECT_WLOCK(source); LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); source->shadow_count++; #if VM_NRESERVLEVEL > 0 result->flags |= source->flags & OBJ_COLORED; result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif VM_OBJECT_WUNLOCK(source); } /* * Return the new things */ *offset = 0; *object = result; } /* * vm_object_split: * * Split the pages in a map entry into a new object. This affords * easier removal of unused pages, and keeps object inheritance from * being a negative impact on memory usage. */ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; vm_object_t orig_object, new_object, source; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) return; if (orig_object->ref_count <= 1) return; VM_OBJECT_WUNLOCK(orig_object); offidxstart = OFF_TO_IDX(entry->offset); size = atop(entry->end - entry->start); /* * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ new_object = vm_object_allocate(OBJT_DEFAULT, size); /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_WLOCK(new_object); VM_OBJECT_WLOCK(orig_object); source = orig_object->backing_object; if (source != NULL) { VM_OBJECT_WLOCK(source); if ((source->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(source); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); vm_object_deallocate(new_object); VM_OBJECT_WLOCK(orig_object); return; } LIST_INSERT_HEAD(&source->shadow_head, new_object, shadow_list); source->shadow_count++; vm_object_reference_locked(source); /* for new_object */ vm_object_clear_flag(source, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(source); new_object->backing_object_offset = orig_object->backing_object_offset + entry->offset; new_object->backing_object = source; } if (orig_object->cred != NULL) { new_object->cred = orig_object->cred; crhold(orig_object->cred); new_object->charge = ptoa(size); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); /* * We must wait for pending I/O to complete before we can * rename the page. * * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ if (vm_page_busied(m)) { VM_OBJECT_WUNLOCK(new_object); vm_page_lock(m); VM_OBJECT_WUNLOCK(orig_object); vm_page_busy_sleep(m, "spltwt"); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } /* vm_page_rename() will handle dirty and cache. */ if (vm_page_rename(m, new_object, idx)) { VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); VM_WAIT; VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } #if VM_NRESERVLEVEL > 0 /* * If some of the reservation's allocated pages remain with * the original object, then transferring the reservation to * the new object is neither particularly beneficial nor * particularly harmful as compared to leaving the reservation * with the original object. If, however, all of the * reservation's allocated pages are transferred to the new * object, then transferring the reservation is typically * beneficial. Determining which of these two cases applies * would be more costly than unconditionally renaming the * reservation. */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif if (orig_object->type == OBJT_SWAP) vm_page_xbusy(m); } if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_xunbusy(m); /* * Transfer any cached pages from orig_object to new_object. * If swap_pager_copy() found swapped out pages within the * specified range of orig_object, then it changed * new_object's type to OBJT_SWAP when it transferred those * pages to new_object. Otherwise, new_object's type * should still be OBJT_DEFAULT and orig_object should not * contain any cached pages within the specified range. */ if (__predict_false(!vm_object_cache_is_empty(orig_object))) vm_page_cache_transfer(orig_object, offidxstart, new_object); } VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); entry->object.vm_object = new_object; entry->offset = 0LL; vm_object_deallocate(orig_object); VM_OBJECT_WLOCK(new_object); } #define OBSC_TEST_ALL_SHADOWED 0x0001 #define OBSC_COLLAPSE_NOWAIT 0x0002 #define OBSC_COLLAPSE_WAIT 0x0004 static int vm_object_backing_scan(vm_object_t object, int op) { int r = 1; vm_page_t p; vm_object_t backing_object; vm_pindex_t backing_offset_index; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* * Initial conditions */ if (op & OBSC_TEST_ALL_SHADOWED) { /* * We do not want to have to test for the existence of cache * or swap pages in the backing object. XXX but with the * new swapper this would be pretty easy to do. * * XXX what about anonymous MAP_SHARED memory that hasn't * been ZFOD faulted yet? If we do not test for this, the * shadow test may succeed! XXX */ if (backing_object->type != OBJT_DEFAULT) { return (0); } } if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } /* * Our scan */ p = TAILQ_FIRST(&backing_object->memq); while (p) { vm_page_t next = TAILQ_NEXT(p, listq); vm_pindex_t new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { vm_page_t pp; /* * Ignore pages outside the parent object's range * and outside the parent object's mapping of the * backing object. * * note that we do not busy the backing object's * page. */ if ( p->pindex < backing_offset_index || new_pindex >= object->size ) { p = next; continue; } /* * See if the parent has the page or if the parent's * object pager has the page. If the parent has the * page but the page is not valid, the parent's * object pager must have the page. * * If this fails, the parent does not completely shadow * the object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); if ( (pp == NULL || pp->valid == 0) && !vm_pager_has_page(object, new_pindex, NULL, NULL) ) { r = 0; break; } } /* * Check for busy page */ if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { if (!p->valid || vm_page_busied(p)) { p = next; continue; } } else if (op & OBSC_COLLAPSE_WAIT) { if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_lock(p); VM_OBJECT_WUNLOCK(backing_object); vm_page_busy_sleep(p, "vmocol"); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); /* * If we slept, anything could have * happened. Since the object is * marked dead, the backing offset * should not have changed so we * just restart our scan. */ p = TAILQ_FIRST(&backing_object->memq); continue; } } KASSERT( p->object == backing_object, ("vm_object_backing_scan: object mismatch") ); if ( p->pindex < backing_offset_index || new_pindex >= object->size ) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); /* * Page is out of the parent object's range, we * can simply destroy it. */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); p = next; continue; } pp = vm_page_lookup(object, new_pindex); if ( (op & OBSC_COLLAPSE_NOWAIT) != 0 && (pp != NULL && pp->valid == 0) ) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); /* * The page in the parent is not (yet) valid. * We don't know anything about the state of * the original page. It might be mapped, * so we must avoid the next if here. * * This is due to a race in vm_fault() where * we must unbusy the original (backing_obj) * page before we can (re)lock the parent. * Hence we can get here. */ p = next; continue; } if ( pp != NULL || vm_pager_has_page(object, new_pindex, NULL, NULL) ) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); /* * page already exists in parent OR swap exists * for this location in the parent. Destroy * the original page from the backing object. * * Leave the parent's page alone */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); p = next; continue; } /* * Page does not exist in parent, rename the * page from the backing object to the main object. * * If the page was mapped to a process, it can remain * mapped through the rename. * vm_page_rename() will handle dirty and cache. */ if (vm_page_rename(p, object, new_pindex)) { if (op & OBSC_COLLAPSE_NOWAIT) { p = next; continue; } VM_OBJECT_WUNLOCK(backing_object); VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); p = TAILQ_FIRST(&backing_object->memq); continue; } /* Use the old pindex to free the right page. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, new_pindex + backing_offset_index, 1); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(p, object, backing_object, backing_offset_index); #endif } p = next; } return (r); } /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. */ static void vm_object_qcollapse(vm_object_t object) { vm_object_t backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(backing_object); if (backing_object->ref_count != 1) return; vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT); } /* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); while (TRUE) { vm_object_t backing_object; /* * Verify that the conditions are right for collapse: * * The object exists and the backing object exists. */ if ((backing_object = object->backing_object) == NULL) break; /* * we check the backing object first, because it is most likely * not collapsable. */ VM_OBJECT_WLOCK(backing_object); if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) || (backing_object->flags & OBJ_DEAD) || object->handle != NULL || (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP) || (object->flags & OBJ_DEAD)) { VM_OBJECT_WUNLOCK(backing_object); break; } if ( object->paging_in_progress != 0 || backing_object->paging_in_progress != 0 ) { vm_object_qcollapse(object); VM_OBJECT_WUNLOCK(backing_object); break; } /* * We know that we can either collapse the backing object (if * the parent is the only reference to it) or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. * * This is ignoring pager-backed pages such as swap pages. * vm_object_backing_scan fails the shadowing test in this * case. */ if (backing_object->ref_count == 1) { /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT); #if VM_NRESERVLEVEL > 0 /* * Break any reservations from backing_object. */ if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) vm_reserv_break_all(backing_object); #endif /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case * the backing_object's and object's locks are * released and reacquired. * Since swap_pager_copy() is being asked to * destroy the source, it will change the * backing_object's type to OBJT_DEFAULT. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); /* * Free any cached pages from backing_object. */ if (__predict_false( !vm_object_cache_is_empty(backing_object))) vm_page_cache_free(backing_object, 0, 0); } /* * Object now shadows whatever backing_object did. * Note that the reference to * backing_object->backing_object moves from within * backing_object to within object. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; if (backing_object->backing_object) { VM_OBJECT_WLOCK(backing_object->backing_object); LIST_REMOVE(backing_object, shadow_list); LIST_INSERT_HEAD( &backing_object->backing_object->shadow_head, object, shadow_list); /* * The shadow_count has not changed. */ VM_OBJECT_WUNLOCK(backing_object->backing_object); } object->backing_object = backing_object->backing_object; object->backing_object_offset += backing_object->backing_object_offset; /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ KASSERT(backing_object->ref_count == 1, ( "backing_object %p was somehow re-referenced during collapse!", backing_object)); backing_object->type = OBJT_DEAD; backing_object->ref_count = 0; VM_OBJECT_WUNLOCK(backing_object); vm_object_destroy(backing_object); object_collapses++; } else { vm_object_t new_backing_object; /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. */ if (object->resident_page_count != object->size && vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) { VM_OBJECT_WUNLOCK(backing_object); break; } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; new_backing_object = backing_object->backing_object; if ((object->backing_object = new_backing_object) != NULL) { VM_OBJECT_WLOCK(new_backing_object); LIST_INSERT_HEAD( &new_backing_object->shadow_head, object, shadow_list ); new_backing_object->shadow_count++; vm_object_reference_locked(new_backing_object); VM_OBJECT_WUNLOCK(new_backing_object); object->backing_object_offset += backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ backing_object->ref_count--; VM_OBJECT_WUNLOCK(backing_object); object_bypasses++; } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: * * For the given object, either frees or invalidates each of the * specified pages. In general, a page is freed. However, if a page is * wired for any reason other than the existence of a managed, wired * mapping, then it may be invalidated but not removed from the object. * Pages are specified by the given range ["start", "end") and the option * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range * extends from "start" to the end of the object. If the option * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the * specified range are affected. If the option OBJPR_NOTMAPPED is * specified, then the pages within the specified range must have no * mappings. Otherwise, if this option is not specified, any mappings to * the specified pages are removed before the pages are freed or * invalidated. * * In general, this operation should only be performed on objects that * contain managed pages. There are, however, two exceptions. First, it * is performed on the kernel and kmem objects by vm_map_entry_delete(). * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- * backed pages. In both of these cases, the option OBJPR_CLEANONLY must * not be specified and the option OBJPR_NOTMAPPED must be specified. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { vm_page_t p, next; int wirings; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); if (object->resident_page_count == 0) goto skipmemq; vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * If the page is wired for any reason besides the existence * of managed, wired mappings, then it cannot be freed. For * example, fictitious pages, which represent device memory, * are inherently wired and cannot be freed. They can, * however, be invalidated if the option OBJPR_CLEANONLY is * not specified. */ vm_page_lock(p); if (vm_page_xbusied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopax"); VM_OBJECT_WLOCK(object); goto again; } if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { if ((options & (OBJPR_NOTWIRED | OBJPR_NOTMAPPED)) == 0) { pmap_remove_all(p); /* Account for removal of wired mappings. */ if (wirings != 0) p->wire_count -= wirings; } if ((options & OBJPR_CLEANONLY) == 0) { p->valid = 0; vm_page_undirty(p); } goto next; } if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopar"); VM_OBJECT_WLOCK(object); goto again; } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_write(p); if (p->dirty) goto next; } if ((options & OBJPR_NOTMAPPED) == 0) { if ((options & OBJPR_NOTWIRED) != 0 && wirings != 0) goto next; pmap_remove_all(p); /* Account for removal of wired mappings. */ if (wirings != 0) { KASSERT(p->wire_count == wirings, ("inconsistent wire count %d %d %p", p->wire_count, wirings, p)); p->wire_count = 0; atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } vm_page_free(p); next: vm_page_unlock(p); } vm_object_pip_wakeup(object); skipmemq: if (__predict_false(!vm_object_cache_is_empty(object))) vm_page_cache_free(object, start, end); } /* * vm_object_page_cache: * * For the given object, attempt to move the specified clean * pages to the cache queue. If a page is wired for any reason, * then it will not be changed. Pages are specified by the given * range ["start", "end"). As a special case, if "end" is zero, * then the range extends from "start" to the end of the object. * Any mappings to the specified pages are removed before the * pages are moved to the cache queue. * * This operation should only be performed on objects that * contain non-fictitious, managed pages. * * The object must be locked. */ void vm_object_page_cache(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { struct mtx *mtx, *new_mtx; vm_page_t p, next; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, ("vm_object_page_cache: illegal object %p", object)); if (object->resident_page_count == 0) return; p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ mtx = NULL; for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(p); if (mtx != new_mtx) { if (mtx != NULL) mtx_unlock(mtx); mtx = new_mtx; mtx_lock(mtx); } vm_page_try_to_cache(p); } if (mtx != NULL) mtx_unlock(mtx); } /* * Populate the specified range of the object with valid pages. Returns * TRUE if the range is successfully populated and FALSE otherwise. * * Note: This function should be optimized to pass a larger array of * pages to vm_pager_get_pages() before it is applied to a non- * OBJT_DEVICE object. * * The object must be locked. */ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m, ma[1]; vm_pindex_t pindex; int rv; VM_OBJECT_ASSERT_WLOCKED(object); for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, pindex); if (m == NULL) break; if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); break; } } /* * Keep "m" busy because a subsequent iteration may unlock * the object. */ } if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_xunbusy(m); m = TAILQ_NEXT(m, listq); } } return (pindex == end); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * prev_size Size of reference to prev_object * next_size Size of reference to the second object * reserved Indicator that extension region has * swap accounted for * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) { vm_pindex_t next_pindex; if (prev_object == NULL) return (TRUE); VM_OBJECT_WLOCK(prev_object); if ((prev_object->type != OBJT_DEFAULT && prev_object->type != OBJT_SWAP) || (prev_object->flags & OBJ_TMPFS_NODE) != 0) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Try to collapse the object first */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->backing_object != NULL) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; if ((prev_object->ref_count > 1) && (prev_object->size != next_pindex)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Account for the charge. */ if (prev_object->cred != NULL) { /* * If prev_object was charged, then this mapping, * althought not charged now, may become writable * later. Non-NULL cred in the object would prevent * swap reservation during enabling of the write * access, so reserve swap now. Failed reservation * cause allocation of the separate object for the map * entry, and swap reservation for this entry is * managed in appropriate time. */ if (!reserved && !swap_reserve_by_cred(ptoa(next_size), prev_object->cred)) { return (FALSE); } prev_object->charge += ptoa(next_size); } /* * Remove any pages that may still be in the object from a previous * deallocation. */ if (next_pindex < prev_object->size) { vm_object_page_remove(prev_object, next_pindex, next_pindex + next_size, 0); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); #if 0 if (prev_object->cred != NULL) { KASSERT(prev_object->charge >= ptoa(prev_object->size - next_pindex), ("object %p overcharged 1 %jx %jx", prev_object, (uintmax_t)next_pindex, (uintmax_t)next_size)); prev_object->charge -= ptoa(prev_object->size - next_pindex); } #endif } /* * Extend the object if necessary. */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; VM_OBJECT_WUNLOCK(prev_object); return (TRUE); } void vm_object_set_writeable_dirty(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_VNODE) { if ((object->flags & OBJ_TMPFS_NODE) != 0) { KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs")); vm_object_set_flag(object, OBJ_TMPFS_DIRTY); } return; } object->generation++; if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) return; vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } /* * vm_object_unwire: * * For each page offset within the specified range of the given object, * find the highest-level page in the shadow chain and unwire it. A page * must exist at every page offset, and the highest-level page must be * wired. */ void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue) { vm_object_t tobject; vm_page_t m, tm; vm_pindex_t end_pindex, pindex, tpindex; int depth, locked_depth; KASSERT((offset & PAGE_MASK) == 0, ("vm_object_unwire: offset is not page aligned")); KASSERT((length & PAGE_MASK) == 0, ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); /* The wired count of a fictitious page never changes. */ if ((object->flags & OBJ_FICTITIOUS) != 0) return; pindex = OFF_TO_IDX(offset); end_pindex = pindex + atop(length); locked_depth = 1; VM_OBJECT_RLOCK(object); m = vm_page_find_least(object, pindex); while (pindex < end_pindex) { if (m == NULL || pindex < m->pindex) { /* * The first object in the shadow chain doesn't * contain a page at the current index. Therefore, * the page must exist in a backing object. */ tobject = object; tpindex = pindex; depth = 0; do { tpindex += OFF_TO_IDX(tobject->backing_object_offset); tobject = tobject->backing_object; KASSERT(tobject != NULL, ("vm_object_unwire: missing page")); if ((tobject->flags & OBJ_FICTITIOUS) != 0) goto next_page; depth++; if (depth == locked_depth) { locked_depth++; VM_OBJECT_RLOCK(tobject); } } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { tm = m; m = TAILQ_NEXT(m, listq); } vm_page_lock(tm); vm_page_unwire(tm, queue); vm_page_unlock(tm); next_page: pindex++; } /* Release the accumulated object locks. */ for (depth = 0; depth < locked_depth; depth++) { tobject = object->backing_object; VM_OBJECT_RUNLOCK(object); object = tobject; } } + +static int +sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) +{ + struct kinfo_vmobject kvo; + char *fullpath, *freepath; + struct vnode *vp; + struct vattr va; + vm_object_t obj; + vm_page_t m; + int count, error; + + if (req->oldptr == NULL) { + /* + * If an old buffer has not been provided, generate an + * estimate of the space needed for a subsequent call. + */ + mtx_lock(&vm_object_list_mtx); + count = 0; + TAILQ_FOREACH(obj, &vm_object_list, object_list) { + if (obj->type == OBJT_DEAD) + continue; + count++; + } + mtx_unlock(&vm_object_list_mtx); + return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * + count * 11 / 10)); + } + + error = 0; + + /* + * VM objects are type stable and are never removed from the + * list once added. This allows us to safely read obj->object_list + * after reacquiring the VM object lock. + */ + mtx_lock(&vm_object_list_mtx); + TAILQ_FOREACH(obj, &vm_object_list, object_list) { + if (obj->type == OBJT_DEAD) + continue; + VM_OBJECT_RLOCK(obj); + if (obj->type == OBJT_DEAD) { + VM_OBJECT_RUNLOCK(obj); + continue; + } + mtx_unlock(&vm_object_list_mtx); + kvo.kvo_size = ptoa(obj->size); + kvo.kvo_resident = obj->resident_page_count; + kvo.kvo_ref_count = obj->ref_count; + kvo.kvo_shadow_count = obj->shadow_count; + kvo.kvo_memattr = obj->memattr; + kvo.kvo_active = 0; + kvo.kvo_inactive = 0; + TAILQ_FOREACH(m, &obj->memq, listq) { + /* + * A page may belong to the object but be + * dequeued and set to PQ_NONE while the + * object lock is not held. This makes the + * reads of m->queue below racy, and we do not + * count pages set to PQ_NONE. However, this + * sysctl is only meant to give an + * approximation of the system anyway. + */ + if (m->queue == PQ_ACTIVE) + kvo.kvo_active++; + else if (m->queue == PQ_INACTIVE) + kvo.kvo_inactive++; + } + + kvo.kvo_vn_fileid = 0; + kvo.kvo_vn_fsid = 0; + freepath = NULL; + fullpath = ""; + vp = NULL; + switch (obj->type) { + case OBJT_DEFAULT: + kvo.kvo_type = KVME_TYPE_DEFAULT; + break; + case OBJT_VNODE: + kvo.kvo_type = KVME_TYPE_VNODE; + vp = obj->handle; + vref(vp); + break; + case OBJT_SWAP: + kvo.kvo_type = KVME_TYPE_SWAP; + break; + case OBJT_DEVICE: + kvo.kvo_type = KVME_TYPE_DEVICE; + break; + case OBJT_PHYS: + kvo.kvo_type = KVME_TYPE_PHYS; + break; + case OBJT_DEAD: + kvo.kvo_type = KVME_TYPE_DEAD; + break; + case OBJT_SG: + kvo.kvo_type = KVME_TYPE_SG; + break; + case OBJT_MGTDEVICE: + kvo.kvo_type = KVME_TYPE_MGTDEVICE; + break; + default: + kvo.kvo_type = KVME_TYPE_UNKNOWN; + break; + } + VM_OBJECT_RUNLOCK(obj); + if (vp != NULL) { + vn_fullpath(curthread, vp, &fullpath, &freepath); + vn_lock(vp, LK_SHARED | LK_RETRY); + if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { + kvo.kvo_vn_fileid = va.va_fileid; + kvo.kvo_vn_fsid = va.va_fsid; + } + vput(vp); + } + + strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path)); + if (freepath != NULL) + free(freepath, M_TEMP); + + /* Pack record size down */ + kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) + + strlen(kvo.kvo_path) + 1; + kvo.kvo_structsize = roundup(kvo.kvo_structsize, + sizeof(uint64_t)); + error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize); + mtx_lock(&vm_object_list_mtx); + if (error) + break; + } + mtx_unlock(&vm_object_list_mtx); + return (error); +} +SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | + CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", + "List of VM objects"); #include "opt_ddb.h" #ifdef DDB #include #include #include static int _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; int entcount; if (map == 0) return 0; if (entry == 0) { tmpe = map->header.next; entcount = map->nentries; while (entcount-- && (tmpe != &map->header)) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; tmpe = tmpm->header.next; entcount = tmpm->nentries; while (entcount-- && tmpe != &tmpm->header) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) if (obj == object) { return 1; } } return 0; } static int vm_object_in_map(vm_object_t object) { struct proc *p; /* sx_slock(&allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { /* sx_sunlock(&allproc_lock); */ return 1; } } /* sx_sunlock(&allproc_lock); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; return 0; } DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ TAILQ_FOREACH(object, &vm_object_list, object_list) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { db_printf("vmochk: internal obj has zero ref count: %ld\n", (long)object->size); } if (!vm_object_in_map(object)) { db_printf( "vmochk: internal obj is not in a map: " "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", object->ref_count, (u_long)object->size, (u_long)object->size, (void *)object->backing_object); } } } } /* * vm_object_print: [ debug ] */ DB_SHOW_COMMAND(object, vm_object_print_static) { /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; vm_page_t p; /* XXX count is an (unused) arg. Avoid shadowing it. */ #define count was_count int count; if (object == NULL) return; db_iprintf( "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n", object, (int)object->type, (uintmax_t)object->size, object->resident_page_count, object->ref_count, object->flags, object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge); db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", object->shadow_count, object->backing_object ? object->backing_object->ref_count : 0, object->backing_object, (uintmax_t)object->backing_object_offset); if (!full) return; db_indent += 2; count = 0; TAILQ_FOREACH(p, &object->memq, listq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { db_printf("\n"); db_iprintf(" ..."); count = 0; } else db_printf(","); count++; db_printf("(off=0x%jx,page=0x%jx)", (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); } if (count != 0) db_printf("\n"); db_indent -= 2; } /* XXX. */ #undef count /* XXX need this non-static entry for calling from vm_map_print. */ void vm_object_print( /* db_expr_t */ long addr, boolean_t have_addr, /* db_expr_t */ long count, char *modif) { vm_object_print_static(addr, have_addr, count, modif); } DB_SHOW_COMMAND(vmopag, vm_object_print_pages) { vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; vm_page_t m, prev_m; int rcount, nl, c; nl = 0; TAILQ_FOREACH(object, &vm_object_list, object_list) { db_printf("new object: %p\n", (void *)object); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; fidx = 0; pa = -1; TAILQ_FOREACH(m, &object->memq, listq) { if (m->pindex > 128) break; if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; } } if (rcount && (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } fidx = m->pindex; pa = VM_PAGE_TO_PHYS(m); rcount = 1; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } } } #endif /* DDB */ Index: head/usr.bin/vmstat/vmstat.8 =================================================================== --- head/usr.bin/vmstat/vmstat.8 (revision 283623) +++ head/usr.bin/vmstat/vmstat.8 (revision 283624) @@ -1,375 +1,378 @@ .\" Copyright (c) 1986, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)vmstat.8 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" .Dd August 8, 2014 .Dt VMSTAT 8 .Os .Sh NAME .Nm vmstat .Nd report virtual memory statistics .Sh SYNOPSIS .Nm .\" .Op Fl fimst -.Op Fl afHhimPsz +.Op Fl afHhimoPsz .Op Fl M Ar core Op Fl N Ar system .Op Fl c Ar count .Op Fl n Ar devs .Oo .Fl p .Sm off .Ar type , if , pass .Sm on .Oc .Op Fl w Ar wait .Op Ar disks ... .Op wait Op count .Sh DESCRIPTION The .Nm utility reports certain kernel statistics kept about process, virtual memory, disk, trap and cpu activity. .Pp If the .Fl M option is not specified, information is obtained from the currently running kernel via the .Xr sysctl 3 interface. Otherwise, information is read from the specified core file, using the name list from the specified kernel image (or from the default image). .Pp The options are as follows: .Bl -tag -width indent .It Fl a When used with .Fl i , include statistics about interrupts that have never been generated. .It Fl c Repeat the display .Ar count times. The first display is for the time since a reboot and each subsequent report is for the time period since the last display. If no repeat .Ar count is specified, and .Fl w is specified, the default is infinity, otherwise the default is one. .It Fl f Report on the number .Xr fork 2 , .Xr vfork 2 and .Xr rfork 2 system calls since system startup, and the number of pages of virtual memory involved in each. .It Fl h Changes memory columns into more easily human readable form. The default if standard output is a terminal device. .It Fl H Changes memory columns into straight numbers. The default if standard output is not a terminal device (such as a script). .It Fl i Report on the number of interrupts taken by each device since system startup. .It Fl M Extract values associated with the name list from the specified .Ar core . .It Fl N If .Fl M is also specified, extract the name list from the specified .Ar system instead of the default, which is the kernel image the system has booted from. .It Fl m Report on the usage of kernel dynamic memory allocated using .Xr malloc 9 by type. .It Fl n Change the maximum number of disks to display from the default of 2. +.It Fl o +Display a list of virtual memory objects in the system and the resident +memory used by each object. .It Fl P Report per-cpu system/user/idle cpu statistics. .It Fl p Specify which types of devices to display. There are three different categories of devices: .Pp .Bl -tag -width indent -compact .It device type: .Bl -tag -width 9n -compact .It da Direct Access devices .It sa Sequential Access devices .It printer Printers .It proc Processor devices .It worm Write Once Read Multiple devices .It cd CD devices .It scanner Scanner devices .It optical Optical Memory devices .It changer Medium Changer devices .It comm Communication devices .It array Storage Array devices .It enclosure Enclosure Services devices .It floppy Floppy devices .El .Pp .It interface: .Bl -tag -width 9n -compact .It IDE Integrated Drive Electronics devices .It SCSI Small Computer System Interface devices .It other Any other device interface .El .Pp .It passthrough: .Bl -tag -width 9n -compact .It pass Passthrough devices .El .El .Pp The user must specify at least one device type, and may specify at most one device type from each category. Multiple device types in a single device type statement must be separated by commas. .Pp Any number of .Fl p arguments may be specified on the command line. All .Fl p arguments are ORed together to form a matching expression against which all devices in the system are compared. Any device that fully matches any .Fl p argument will be included in the .Nm output, up to two devices, or the maximum number of devices specified by the user. .It Fl s Display the contents of the .Em sum structure, giving the total number of several kinds of paging related events which have occurred since system startup. .\" .It Fl t .\" Report on the number of page in and page reclaims since system startup, .\" and the amount of time required by each. .It Fl w Pause .Ar wait seconds between each display. If no repeat .Ar wait interval is specified, the default is 1 second. The .Nm command will accept and honor a non-integer number of seconds. .It Fl z Report on memory used by the kernel zone allocator, .Xr uma 9 , by zone. .El .Pp The .Ar wait and .Ar count arguments may be given after their respective flags at any point on the command line before the .Ar disks argument(s), or without their flags, as the final argument(s). The latter form is accepted for backwards compatibility, but it is preferred to use the forms with .Fl w and .Fl c to avoid ambiguity. .Pp By default, .Nm displays the following information: .Bl -tag -width indent .It procs Information about the numbers of processes in various states. .Pp .Bl -tag -width indent -compact .It r in run queue .It b blocked for resources (i/o, paging, etc.) .It w runnable or short sleeper (< 20 secs) but swapped .El .It memory Information about the usage of virtual and real memory. Virtual pages (reported in units of 1024 bytes) are considered active if they belong to processes which are running or have run in the last 20 seconds. .Pp .Bl -tag -width indent -compact .It avm active virtual pages .It fre size of the free list .El .It page Information about page faults and paging activity. These are averaged each five seconds, and given in units per second. .Pp .Bl -tag -width indent -compact .It flt total number of page faults .It re page reclaims (simulating reference bits) .\" .It at .\" pages attached (found in free list) .It pi pages paged in .It po pages paged out .It fr pages freed per second .\" .It de .\" anticipated short term memory shortfall .It sr pages scanned by clock algorithm, per-second .El .It disks Disk operations per second (this field is system dependent). Typically paging will be split across the available drives. The header of the field is the first two characters of the disk name and the unit number. If more than two disk drives are configured in the system, .Nm displays only the first two drives, unless the user specifies the .Fl n argument to increase the number of drives displayed. This will probably cause the display to exceed 80 columns, however. To force .Nm to display specific drives, their names may be supplied on the command line. The .Nm utility defaults to show disks first, and then various other random devices in the system to add up to two devices, if there are that many devices in the system. If devices are specified on the command line, or if a device type matching pattern is specified (see above), .Nm will only display the given devices or the devices matching the pattern, and will not randomly select other devices in the system. .It faults Trap/interrupt rate averages per second over last 5 seconds. .Pp .Bl -tag -width indent -compact .It in device interrupts per interval (including clock interrupts) .It sy system calls per interval .It cs cpu context switch rate (switches/interval) .El .It cpu Breakdown of percentage usage of CPU time. .Pp .Bl -tag -width indent -compact .It us user time for normal and low priority processes .It sy system time .It id cpu idle .El .El .Sh FILES .Bl -tag -width /boot/kernel/kernel -compact .It Pa /boot/kernel/kernel default kernel namelist .It Pa /dev/kmem default memory file .El .Sh EXAMPLES The command: .Dl vmstat -w 5 will print what the system is doing every five seconds; this is a good choice of printing interval since this is how often some of the statistics are sampled in the system. Others vary every second and running the output for a while will make it apparent which are recomputed every second. .Pp The command: .Dl vmstat -p da -p cd -w 1 will tell vmstat to select the first two direct access or CDROM devices and display statistics on those devices, as well as other systems statistics every second. .Sh SEE ALSO .Xr fstat 1 , .Xr netstat 1 , .Xr nfsstat 1 , .Xr ps 1 , .Xr systat 1 , .Xr libmemstat 3 , .Xr gstat 8 , .Xr iostat 8 , .Xr pstat 8 , .Xr sysctl 8 , .Xr malloc 9 , .Xr uma 9 .Pp The sections starting with ``Interpreting system activity'' in .%T "Installing and Operating 4.3BSD" . .Sh BUGS The .Fl c and .Fl w options are only available with the default output. Index: head/usr.bin/vmstat/vmstat.c =================================================================== --- head/usr.bin/vmstat/vmstat.c (revision 283623) +++ head/usr.bin/vmstat/vmstat.c (revision 283624) @@ -1,1454 +1,1585 @@ /* * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1991, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)vmstat.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static char da[] = "da"; static struct nlist namelist[] = { #define X_SUM 0 { "_vm_cnt" }, #define X_HZ 1 { "_hz" }, #define X_STATHZ 2 { "_stathz" }, #define X_NCHSTATS 3 { "_nchstats" }, #define X_INTRNAMES 4 { "_intrnames" }, #define X_SINTRNAMES 5 { "_sintrnames" }, #define X_INTRCNT 6 { "_intrcnt" }, #define X_SINTRCNT 7 { "_sintrcnt" }, #ifdef notyet #define X_DEFICIT XXX { "_deficit" }, #define X_REC XXX { "_rectime" }, #define X_PGIN XXX { "_pgintime" }, #define X_XSTATS XXX { "_xstats" }, #define X_END XXX #else #define X_END 8 #endif { "" }, }; static struct statinfo cur, last; static int num_devices, maxshowdevs; static long generation; static struct device_selection *dev_select; static int num_selected; static struct devstat_match *matches; static int num_matches = 0; static int num_devices_specified, num_selections; static long select_generation; static char **specified_devices; static devstat_select_mode select_mode; static struct vmmeter sum, osum; #define VMSTAT_DEFAULT_LINES 20 /* Default number of `winlines'. */ volatile sig_atomic_t wresized; /* Tty resized, when non-zero. */ static int winlines = VMSTAT_DEFAULT_LINES; /* Current number of tty rows. */ static int aflag; static int nflag; static int Pflag; static int hflag; static kvm_t *kd; #define FORKSTAT 0x01 #define INTRSTAT 0x02 #define MEMSTAT 0x04 #define SUMSTAT 0x08 #define TIMESTAT 0x10 #define VMSTAT 0x20 #define ZMEMSTAT 0x40 +#define OBJSTAT 0x80 static void cpustats(void); static void pcpustats(int, u_long, int); static void devstats(void); static void doforkst(void); static void dointr(unsigned int, int); +static void doobjstat(void); static void dosum(void); static void dovmstat(unsigned int, int); static void domemstat_malloc(void); static void domemstat_zone(void); static void kread(int, void *, size_t); static void kreado(int, void *, size_t, size_t); static char *kgetstr(const char *); static void needhdr(int); static void needresize(int); static void doresize(void); static void printhdr(int, u_long); static void usage(void); static long pct(long, long); static long long getuptime(void); static char **getdrivedata(char **); int main(int argc, char *argv[]) { int c, todo; unsigned int interval; float f; int reps; char *memf, *nlistf; char errbuf[_POSIX2_LINE_MAX]; memf = nlistf = NULL; interval = reps = todo = 0; maxshowdevs = 2; hflag = isatty(1); - while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:Pp:stw:z")) != -1) { + while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:oPp:stw:z")) != -1) { switch (c) { case 'a': aflag++; break; case 'c': reps = atoi(optarg); break; case 'P': Pflag++; break; case 'f': todo |= FORKSTAT; break; case 'h': hflag = 1; break; case 'H': hflag = 0; break; case 'i': todo |= INTRSTAT; break; case 'M': memf = optarg; break; case 'm': todo |= MEMSTAT; break; case 'N': nlistf = optarg; break; case 'n': nflag = 1; maxshowdevs = atoi(optarg); if (maxshowdevs < 0) errx(1, "number of devices %d is < 0", maxshowdevs); break; + case 'o': + todo |= OBJSTAT; + break; case 'p': if (devstat_buildmatch(optarg, &matches, &num_matches) != 0) errx(1, "%s", devstat_errbuf); break; case 's': todo |= SUMSTAT; break; case 't': #ifdef notyet todo |= TIMESTAT; #else errx(EX_USAGE, "sorry, -t is not (re)implemented yet"); #endif break; case 'w': /* Convert to milliseconds. */ f = atof(optarg); interval = f * 1000; break; case 'z': todo |= ZMEMSTAT; break; case '?': default: usage(); } } argc -= optind; argv += optind; if (todo == 0) todo = VMSTAT; if (memf != NULL) { kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf); if (kd == NULL) errx(1, "kvm_openfiles: %s", errbuf); } retry_nlist: if (kd != NULL && (c = kvm_nlist(kd, namelist)) != 0) { if (c > 0) { /* * 'cnt' was renamed to 'vm_cnt'. If 'vm_cnt' is not * found try looking up older 'cnt' symbol. * */ if (namelist[X_SUM].n_type == 0 && strcmp(namelist[X_SUM].n_name, "_vm_cnt") == 0) { namelist[X_SUM].n_name = "_cnt"; goto retry_nlist; } warnx("undefined symbols:"); for (c = 0; c < (int)(sizeof(namelist)/sizeof(namelist[0])); c++) if (namelist[c].n_type == 0) (void)fprintf(stderr, " %s", namelist[c].n_name); (void)fputc('\n', stderr); } else warnx("kvm_nlist: %s", kvm_geterr(kd)); exit(1); } if (kd && Pflag) errx(1, "Cannot use -P with crash dumps"); if (todo & VMSTAT) { /* * Make sure that the userland devstat version matches the * kernel devstat version. If not, exit and print a * message informing the user of his mistake. */ if (devstat_checkversion(NULL) < 0) errx(1, "%s", devstat_errbuf); argv = getdrivedata(argv); } if (*argv) { f = atof(*argv); interval = f * 1000; if (*++argv) reps = atoi(*argv); } if (interval) { if (!reps) reps = -1; } else if (reps) interval = 1 * 1000; if (todo & FORKSTAT) doforkst(); if (todo & MEMSTAT) domemstat_malloc(); if (todo & ZMEMSTAT) domemstat_zone(); if (todo & SUMSTAT) dosum(); + if (todo & OBJSTAT) + doobjstat(); #ifdef notyet if (todo & TIMESTAT) dotimes(); #endif if (todo & INTRSTAT) dointr(interval, reps); if (todo & VMSTAT) dovmstat(interval, reps); exit(0); } static int mysysctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int error; error = sysctlbyname(name, oldp, oldlenp, newp, newlen); if (error != 0 && errno != ENOMEM) err(1, "sysctl(%s)", name); return (error); } static char ** getdrivedata(char **argv) { if ((num_devices = devstat_getnumdevs(NULL)) < 0) errx(1, "%s", devstat_errbuf); cur.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); last.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); if (devstat_getdevs(NULL, &cur) == -1) errx(1, "%s", devstat_errbuf); num_devices = cur.dinfo->numdevs; generation = cur.dinfo->generation; specified_devices = (char **)malloc(sizeof(char *)); for (num_devices_specified = 0; *argv; ++argv) { if (isdigit(**argv)) break; num_devices_specified++; specified_devices = (char **)realloc(specified_devices, sizeof(char *) * num_devices_specified); specified_devices[num_devices_specified - 1] = *argv; } dev_select = NULL; if (nflag == 0 && maxshowdevs < num_devices_specified) maxshowdevs = num_devices_specified; /* * People are generally only interested in disk statistics when * they're running vmstat. So, that's what we're going to give * them if they don't specify anything by default. We'll also give * them any other random devices in the system so that we get to * maxshowdevs devices, if that many devices exist. If the user * specifies devices on the command line, either through a pattern * match or by naming them explicitly, we will give the user only * those devices. */ if ((num_devices_specified == 0) && (num_matches == 0)) { if (devstat_buildmatch(da, &matches, &num_matches) != 0) errx(1, "%s", devstat_errbuf); select_mode = DS_SELECT_ADD; } else select_mode = DS_SELECT_ONLY; /* * At this point, selectdevs will almost surely indicate that the * device list has changed, so we don't look for return values of 0 * or 1. If we get back -1, though, there is an error. */ if (devstat_selectdevs(&dev_select, &num_selected, &num_selections, &select_generation, generation, cur.dinfo->devices, num_devices, matches, num_matches, specified_devices, num_devices_specified, select_mode, maxshowdevs, 0) == -1) errx(1, "%s", devstat_errbuf); return(argv); } /* Return system uptime in nanoseconds */ static long long getuptime(void) { struct timespec sp; (void)clock_gettime(CLOCK_UPTIME, &sp); return((long long)sp.tv_sec * 1000000000LL + sp.tv_nsec); } static void fill_pcpu(struct pcpu ***pcpup, int* maxcpup) { struct pcpu **pcpu; int maxcpu, i; *pcpup = NULL; if (kd == NULL) return; maxcpu = kvm_getmaxcpu(kd); if (maxcpu < 0) errx(1, "kvm_getmaxcpu: %s", kvm_geterr(kd)); pcpu = calloc(maxcpu, sizeof(struct pcpu *)); if (pcpu == NULL) err(1, "calloc"); for (i = 0; i < maxcpu; i++) { pcpu[i] = kvm_getpcpu(kd, i); if (pcpu[i] == (struct pcpu *)-1) errx(1, "kvm_getpcpu: %s", kvm_geterr(kd)); } *maxcpup = maxcpu; *pcpup = pcpu; } static void free_pcpu(struct pcpu **pcpu, int maxcpu) { int i; for (i = 0; i < maxcpu; i++) free(pcpu[i]); free(pcpu); } static void fill_vmmeter(struct vmmeter *vmmp) { struct pcpu **pcpu; int maxcpu, i; if (kd != NULL) { kread(X_SUM, vmmp, sizeof(*vmmp)); fill_pcpu(&pcpu, &maxcpu); for (i = 0; i < maxcpu; i++) { if (pcpu[i] == NULL) continue; #define ADD_FROM_PCPU(i, name) \ vmmp->name += pcpu[i]->pc_cnt.name ADD_FROM_PCPU(i, v_swtch); ADD_FROM_PCPU(i, v_trap); ADD_FROM_PCPU(i, v_syscall); ADD_FROM_PCPU(i, v_intr); ADD_FROM_PCPU(i, v_soft); ADD_FROM_PCPU(i, v_vm_faults); ADD_FROM_PCPU(i, v_io_faults); ADD_FROM_PCPU(i, v_cow_faults); ADD_FROM_PCPU(i, v_cow_optim); ADD_FROM_PCPU(i, v_zfod); ADD_FROM_PCPU(i, v_ozfod); ADD_FROM_PCPU(i, v_swapin); ADD_FROM_PCPU(i, v_swapout); ADD_FROM_PCPU(i, v_swappgsin); ADD_FROM_PCPU(i, v_swappgsout); ADD_FROM_PCPU(i, v_vnodein); ADD_FROM_PCPU(i, v_vnodeout); ADD_FROM_PCPU(i, v_vnodepgsin); ADD_FROM_PCPU(i, v_vnodepgsout); ADD_FROM_PCPU(i, v_intrans); ADD_FROM_PCPU(i, v_tfree); ADD_FROM_PCPU(i, v_forks); ADD_FROM_PCPU(i, v_vforks); ADD_FROM_PCPU(i, v_rforks); ADD_FROM_PCPU(i, v_kthreads); ADD_FROM_PCPU(i, v_forkpages); ADD_FROM_PCPU(i, v_vforkpages); ADD_FROM_PCPU(i, v_rforkpages); ADD_FROM_PCPU(i, v_kthreadpages); #undef ADD_FROM_PCPU } free_pcpu(pcpu, maxcpu); } else { size_t size = sizeof(unsigned int); #define GET_VM_STATS(cat, name) \ mysysctl("vm.stats." #cat "." #name, &vmmp->name, &size, NULL, 0) /* sys */ GET_VM_STATS(sys, v_swtch); GET_VM_STATS(sys, v_trap); GET_VM_STATS(sys, v_syscall); GET_VM_STATS(sys, v_intr); GET_VM_STATS(sys, v_soft); /* vm */ GET_VM_STATS(vm, v_vm_faults); GET_VM_STATS(vm, v_io_faults); GET_VM_STATS(vm, v_cow_faults); GET_VM_STATS(vm, v_cow_optim); GET_VM_STATS(vm, v_zfod); GET_VM_STATS(vm, v_ozfod); GET_VM_STATS(vm, v_swapin); GET_VM_STATS(vm, v_swapout); GET_VM_STATS(vm, v_swappgsin); GET_VM_STATS(vm, v_swappgsout); GET_VM_STATS(vm, v_vnodein); GET_VM_STATS(vm, v_vnodeout); GET_VM_STATS(vm, v_vnodepgsin); GET_VM_STATS(vm, v_vnodepgsout); GET_VM_STATS(vm, v_intrans); GET_VM_STATS(vm, v_reactivated); GET_VM_STATS(vm, v_pdwakeups); GET_VM_STATS(vm, v_pdpages); GET_VM_STATS(vm, v_tcached); GET_VM_STATS(vm, v_dfree); GET_VM_STATS(vm, v_pfree); GET_VM_STATS(vm, v_tfree); GET_VM_STATS(vm, v_page_size); GET_VM_STATS(vm, v_page_count); GET_VM_STATS(vm, v_free_reserved); GET_VM_STATS(vm, v_free_target); GET_VM_STATS(vm, v_free_min); GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); GET_VM_STATS(vm, v_cache_count); GET_VM_STATS(vm, v_cache_min); GET_VM_STATS(vm, v_cache_max); GET_VM_STATS(vm, v_pageout_free_min); GET_VM_STATS(vm, v_interrupt_free_min); /*GET_VM_STATS(vm, v_free_severe);*/ GET_VM_STATS(vm, v_forks); GET_VM_STATS(vm, v_vforks); GET_VM_STATS(vm, v_rforks); GET_VM_STATS(vm, v_kthreads); GET_VM_STATS(vm, v_forkpages); GET_VM_STATS(vm, v_vforkpages); GET_VM_STATS(vm, v_rforkpages); GET_VM_STATS(vm, v_kthreadpages); #undef GET_VM_STATS } } static void fill_vmtotal(struct vmtotal *vmtp) { if (kd != NULL) { /* XXX fill vmtp */ errx(1, "not implemented"); } else { size_t size = sizeof(*vmtp); mysysctl("vm.vmtotal", vmtp, &size, NULL, 0); if (size != sizeof(*vmtp)) errx(1, "vm.total size mismatch"); } } /* Determine how many cpu columns, and what index they are in kern.cp_times */ static int getcpuinfo(u_long *maskp, int *maxidp) { int maxcpu; int maxid; int ncpus; int i, j; int empty; size_t size; long *times; u_long mask; if (kd != NULL) errx(1, "not implemented"); mask = 0; ncpus = 0; size = sizeof(maxcpu); mysysctl("kern.smp.maxcpus", &maxcpu, &size, NULL, 0); if (size != sizeof(maxcpu)) errx(1, "sysctl kern.smp.maxcpus"); size = sizeof(long) * maxcpu * CPUSTATES; times = malloc(size); if (times == NULL) err(1, "malloc %zd bytes", size); mysysctl("kern.cp_times", times, &size, NULL, 0); maxid = (size / CPUSTATES / sizeof(long)) - 1; for (i = 0; i <= maxid; i++) { empty = 1; for (j = 0; empty && j < CPUSTATES; j++) { if (times[i * CPUSTATES + j] != 0) empty = 0; } if (!empty) { mask |= (1ul << i); ncpus++; } } if (maskp) *maskp = mask; if (maxidp) *maxidp = maxid; return (ncpus); } static void prthuman(u_int64_t val, int size) { char buf[10]; int flags; if (size < 5 || size > 9) errx(1, "doofus"); flags = HN_B | HN_NOSPACE | HN_DECIMAL; humanize_number(buf, size, val, "", HN_AUTOSCALE, flags); printf("%*s", size, buf); } static int hz, hdrcnt; static long *cur_cp_times; static long *last_cp_times; static size_t size_cp_times; static void dovmstat(unsigned int interval, int reps) { struct vmtotal total; time_t uptime, halfuptime; struct devinfo *tmp_dinfo; size_t size; int ncpus, maxid; u_long cpumask; int rate_adj; uptime = getuptime() / 1000000000LL; halfuptime = uptime / 2; rate_adj = 1; ncpus = 1; maxid = 0; /* * If the user stops the program (control-Z) and then resumes it, * print out the header again. */ (void)signal(SIGCONT, needhdr); /* * If our standard output is a tty, then install a SIGWINCH handler * and set wresized so that our first iteration through the main * vmstat loop will peek at the terminal's current rows to find out * how many lines can fit in a screenful of output. */ if (isatty(fileno(stdout)) != 0) { wresized = 1; (void)signal(SIGWINCH, needresize); } else { wresized = 0; winlines = VMSTAT_DEFAULT_LINES; } if (kd != NULL) { if (namelist[X_STATHZ].n_type != 0 && namelist[X_STATHZ].n_value != 0) kread(X_STATHZ, &hz, sizeof(hz)); if (!hz) kread(X_HZ, &hz, sizeof(hz)); } else { struct clockinfo clockrate; size = sizeof(clockrate); mysysctl("kern.clockrate", &clockrate, &size, NULL, 0); if (size != sizeof(clockrate)) errx(1, "clockrate size mismatch"); hz = clockrate.hz; } if (Pflag) { ncpus = getcpuinfo(&cpumask, &maxid); size_cp_times = sizeof(long) * (maxid + 1) * CPUSTATES; cur_cp_times = calloc(1, size_cp_times); last_cp_times = calloc(1, size_cp_times); } for (hdrcnt = 1;;) { if (!--hdrcnt) printhdr(maxid, cpumask); if (kd != NULL) { if (kvm_getcptime(kd, cur.cp_time) < 0) errx(1, "kvm_getcptime: %s", kvm_geterr(kd)); } else { size = sizeof(cur.cp_time); mysysctl("kern.cp_time", &cur.cp_time, &size, NULL, 0); if (size != sizeof(cur.cp_time)) errx(1, "cp_time size mismatch"); } if (Pflag) { size = size_cp_times; mysysctl("kern.cp_times", cur_cp_times, &size, NULL, 0); if (size != size_cp_times) errx(1, "cp_times mismatch"); } tmp_dinfo = last.dinfo; last.dinfo = cur.dinfo; cur.dinfo = tmp_dinfo; last.snap_time = cur.snap_time; /* * Here what we want to do is refresh our device stats. * getdevs() returns 1 when the device list has changed. * If the device list has changed, we want to go through * the selection process again, in case a device that we * were previously displaying has gone away. */ switch (devstat_getdevs(NULL, &cur)) { case -1: errx(1, "%s", devstat_errbuf); break; case 1: { int retval; num_devices = cur.dinfo->numdevs; generation = cur.dinfo->generation; retval = devstat_selectdevs(&dev_select, &num_selected, &num_selections, &select_generation, generation, cur.dinfo->devices, num_devices, matches, num_matches, specified_devices, num_devices_specified, select_mode, maxshowdevs, 0); switch (retval) { case -1: errx(1, "%s", devstat_errbuf); break; case 1: printhdr(maxid, cpumask); break; default: break; } } default: break; } fill_vmmeter(&sum); fill_vmtotal(&total); (void)printf("%1d %1d %1d", total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw); #define vmstat_pgtok(a) ((a) * (sum.v_page_size >> 10)) #define rate(x) (((x) * rate_adj + halfuptime) / uptime) /* round */ if (hflag) { printf(""); prthuman(total.t_avm * (u_int64_t)sum.v_page_size, 5); printf(" "); prthuman(total.t_free * (u_int64_t)sum.v_page_size, 5); printf(" "); (void)printf("%5lu ", (unsigned long)rate(sum.v_vm_faults - osum.v_vm_faults)); } else { printf(" %7d", vmstat_pgtok(total.t_avm)); printf(" %7d ", vmstat_pgtok(total.t_free)); (void)printf("%4lu ", (unsigned long)rate(sum.v_vm_faults - osum.v_vm_faults)); } (void)printf("%3lu ", (unsigned long)rate(sum.v_reactivated - osum.v_reactivated)); (void)printf("%3lu ", (unsigned long)rate(sum.v_swapin + sum.v_vnodein - (osum.v_swapin + osum.v_vnodein))); (void)printf("%3lu ", (unsigned long)rate(sum.v_swapout + sum.v_vnodeout - (osum.v_swapout + osum.v_vnodeout))); (void)printf("%5lu ", (unsigned long)rate(sum.v_tfree - osum.v_tfree)); (void)printf("%4lu ", (unsigned long)rate(sum.v_pdpages - osum.v_pdpages)); devstats(); (void)printf("%4lu %5lu %5lu", (unsigned long)rate(sum.v_intr - osum.v_intr), (unsigned long)rate(sum.v_syscall - osum.v_syscall), (unsigned long)rate(sum.v_swtch - osum.v_swtch)); if (Pflag) pcpustats(ncpus, cpumask, maxid); else cpustats(); (void)printf("\n"); (void)fflush(stdout); if (reps >= 0 && --reps <= 0) break; osum = sum; uptime = interval; rate_adj = 1000; /* * We round upward to avoid losing low-frequency events * (i.e., >= 1 per interval but < 1 per millisecond). */ if (interval != 1) halfuptime = (uptime + 1) / 2; else halfuptime = 0; (void)usleep(interval * 1000); } } static void printhdr(int maxid, u_long cpumask) { int i, num_shown; num_shown = (num_selected < maxshowdevs) ? num_selected : maxshowdevs; if (hflag) { (void)printf("procs memory page%*s ", 19, ""); } else { (void)printf("procs memory page%*s ", 19, ""); } if (num_shown > 1) (void)printf(" disks %*s", num_shown * 4 - 7, ""); else if (num_shown == 1) (void)printf(" disk"); (void)printf(" faults "); if (Pflag) { for (i = 0; i <= maxid; i++) { if (cpumask & (1ul << i)) printf(" cpu%d ", i); } printf("\n"); } else printf(" cpu\n"); if (hflag) { (void)printf("r b w avm fre flt re pi po fr sr "); } else { (void)printf("r b w avm fre flt re pi po fr sr "); } for (i = 0; i < num_devices; i++) if ((dev_select[i].selected) && (dev_select[i].selected <= maxshowdevs)) (void)printf("%c%c%d ", dev_select[i].device_name[0], dev_select[i].device_name[1], dev_select[i].unit_number); (void)printf(" in sy cs"); if (Pflag) { for (i = 0; i <= maxid; i++) { if (cpumask & (1ul << i)) printf(" us sy id"); } printf("\n"); } else printf(" us sy id\n"); if (wresized != 0) doresize(); hdrcnt = winlines; } /* * Force a header to be prepended to the next output. */ static void needhdr(int dummy __unused) { hdrcnt = 1; } /* * When the terminal is resized, force an update of the maximum number of rows * printed between each header repetition. Then force a new header to be * prepended to the next output. */ void needresize(int signo) { wresized = 1; hdrcnt = 1; } /* * Update the global `winlines' count of terminal rows. */ void doresize(void) { int status; struct winsize w; for (;;) { status = ioctl(fileno(stdout), TIOCGWINSZ, &w); if (status == -1 && errno == EINTR) continue; else if (status == -1) err(1, "ioctl"); if (w.ws_row > 3) winlines = w.ws_row - 3; else winlines = VMSTAT_DEFAULT_LINES; break; } /* * Inhibit doresize() calls until we are rescheduled by SIGWINCH. */ wresized = 0; } #ifdef notyet static void dotimes(void) { unsigned int pgintime, rectime; kread(X_REC, &rectime, sizeof(rectime)); kread(X_PGIN, &pgintime, sizeof(pgintime)); kread(X_SUM, &sum, sizeof(sum)); (void)printf("%u reclaims, %u total time (usec)\n", sum.v_pgrec, rectime); (void)printf("average: %u usec / reclaim\n", rectime / sum.v_pgrec); (void)printf("\n"); (void)printf("%u page ins, %u total time (msec)\n", sum.v_pgin, pgintime / 10); (void)printf("average: %8.1f msec / page in\n", pgintime / (sum.v_pgin * 10.0)); } #endif static long pct(long top, long bot) { long ans; if (bot == 0) return(0); ans = (quad_t)top * 100 / bot; return (ans); } #define PCT(top, bot) pct((long)(top), (long)(bot)) static void dosum(void) { struct nchstats lnchstats; long nchtotal; fill_vmmeter(&sum); (void)printf("%9u cpu context switches\n", sum.v_swtch); (void)printf("%9u device interrupts\n", sum.v_intr); (void)printf("%9u software interrupts\n", sum.v_soft); (void)printf("%9u traps\n", sum.v_trap); (void)printf("%9u system calls\n", sum.v_syscall); (void)printf("%9u kernel threads created\n", sum.v_kthreads); (void)printf("%9u fork() calls\n", sum.v_forks); (void)printf("%9u vfork() calls\n", sum.v_vforks); (void)printf("%9u rfork() calls\n", sum.v_rforks); (void)printf("%9u swap pager pageins\n", sum.v_swapin); (void)printf("%9u swap pager pages paged in\n", sum.v_swappgsin); (void)printf("%9u swap pager pageouts\n", sum.v_swapout); (void)printf("%9u swap pager pages paged out\n", sum.v_swappgsout); (void)printf("%9u vnode pager pageins\n", sum.v_vnodein); (void)printf("%9u vnode pager pages paged in\n", sum.v_vnodepgsin); (void)printf("%9u vnode pager pageouts\n", sum.v_vnodeout); (void)printf("%9u vnode pager pages paged out\n", sum.v_vnodepgsout); (void)printf("%9u page daemon wakeups\n", sum.v_pdwakeups); (void)printf("%9u pages examined by the page daemon\n", sum.v_pdpages); (void)printf("%9u pages reactivated\n", sum.v_reactivated); (void)printf("%9u copy-on-write faults\n", sum.v_cow_faults); (void)printf("%9u copy-on-write optimized faults\n", sum.v_cow_optim); (void)printf("%9u zero fill pages zeroed\n", sum.v_zfod); (void)printf("%9u zero fill pages prezeroed\n", sum.v_ozfod); (void)printf("%9u intransit blocking page faults\n", sum.v_intrans); (void)printf("%9u total VM faults taken\n", sum.v_vm_faults); (void)printf("%9u page faults requiring I/O\n", sum.v_io_faults); (void)printf("%9u pages affected by kernel thread creation\n", sum.v_kthreadpages); (void)printf("%9u pages affected by fork()\n", sum.v_forkpages); (void)printf("%9u pages affected by vfork()\n", sum.v_vforkpages); (void)printf("%9u pages affected by rfork()\n", sum.v_rforkpages); (void)printf("%9u pages cached\n", sum.v_tcached); (void)printf("%9u pages freed\n", sum.v_tfree); (void)printf("%9u pages freed by daemon\n", sum.v_dfree); (void)printf("%9u pages freed by exiting processes\n", sum.v_pfree); (void)printf("%9u pages active\n", sum.v_active_count); (void)printf("%9u pages inactive\n", sum.v_inactive_count); (void)printf("%9u pages in VM cache\n", sum.v_cache_count); (void)printf("%9u pages wired down\n", sum.v_wire_count); (void)printf("%9u pages free\n", sum.v_free_count); (void)printf("%9u bytes per page\n", sum.v_page_size); if (kd != NULL) { kread(X_NCHSTATS, &lnchstats, sizeof(lnchstats)); } else { size_t size = sizeof(lnchstats); mysysctl("vfs.cache.nchstats", &lnchstats, &size, NULL, 0); if (size != sizeof(lnchstats)) errx(1, "vfs.cache.nchstats size mismatch"); } nchtotal = lnchstats.ncs_goodhits + lnchstats.ncs_neghits + lnchstats.ncs_badhits + lnchstats.ncs_falsehits + lnchstats.ncs_miss + lnchstats.ncs_long; (void)printf("%9ld total name lookups\n", nchtotal); (void)printf( "%9s cache hits (%ld%% pos + %ld%% neg) system %ld%% per-directory\n", "", PCT(lnchstats.ncs_goodhits, nchtotal), PCT(lnchstats.ncs_neghits, nchtotal), PCT(lnchstats.ncs_pass2, nchtotal)); (void)printf("%9s deletions %ld%%, falsehits %ld%%, toolong %ld%%\n", "", PCT(lnchstats.ncs_badhits, nchtotal), PCT(lnchstats.ncs_falsehits, nchtotal), PCT(lnchstats.ncs_long, nchtotal)); } static void doforkst(void) { fill_vmmeter(&sum); (void)printf("%u forks, %u pages, average %.2f\n", sum.v_forks, sum.v_forkpages, sum.v_forks == 0 ? 0.0 : (double)sum.v_forkpages / sum.v_forks); (void)printf("%u vforks, %u pages, average %.2f\n", sum.v_vforks, sum.v_vforkpages, sum.v_vforks == 0 ? 0.0 : (double)sum.v_vforkpages / sum.v_vforks); (void)printf("%u rforks, %u pages, average %.2f\n", sum.v_rforks, sum.v_rforkpages, sum.v_rforks == 0 ? 0.0 : (double)sum.v_rforkpages / sum.v_rforks); } static void devstats(void) { int dn, state; long double transfers_per_second; long double busy_seconds; long tmp; for (state = 0; state < CPUSTATES; ++state) { tmp = cur.cp_time[state]; cur.cp_time[state] -= last.cp_time[state]; last.cp_time[state] = tmp; } busy_seconds = cur.snap_time - last.snap_time; for (dn = 0; dn < num_devices; dn++) { int di; if ((dev_select[dn].selected == 0) || (dev_select[dn].selected > maxshowdevs)) continue; di = dev_select[dn].position; if (devstat_compute_statistics(&cur.dinfo->devices[di], &last.dinfo->devices[di], busy_seconds, DSM_TRANSFERS_PER_SECOND, &transfers_per_second, DSM_NONE) != 0) errx(1, "%s", devstat_errbuf); (void)printf("%3.0Lf ", transfers_per_second); } } static void percent(double pct, int *over) { char buf[10]; int l; l = snprintf(buf, sizeof(buf), "%.0f", pct); if (l == 1 && *over) { printf("%s", buf); (*over)--; } else printf("%2s", buf); if (l > 2) (*over)++; } static void cpustats(void) { int state, over; double lpct, total; total = 0; for (state = 0; state < CPUSTATES; ++state) total += cur.cp_time[state]; if (total) lpct = 100.0 / total; else lpct = 0.0; over = 0; printf(" "); percent((cur.cp_time[CP_USER] + cur.cp_time[CP_NICE]) * lpct, &over); printf(" "); percent((cur.cp_time[CP_SYS] + cur.cp_time[CP_INTR]) * lpct, &over); printf(" "); percent(cur.cp_time[CP_IDLE] * lpct, &over); } static void pcpustats(int ncpus, u_long cpumask, int maxid) { int state, i; double lpct, total; long tmp; int over; /* devstats does this for cp_time */ for (i = 0; i <= maxid; i++) { if ((cpumask & (1ul << i)) == 0) continue; for (state = 0; state < CPUSTATES; ++state) { tmp = cur_cp_times[i * CPUSTATES + state]; cur_cp_times[i * CPUSTATES + state] -= last_cp_times[i * CPUSTATES + state]; last_cp_times[i * CPUSTATES + state] = tmp; } } over = 0; for (i = 0; i <= maxid; i++) { if ((cpumask & (1ul << i)) == 0) continue; total = 0; for (state = 0; state < CPUSTATES; ++state) total += cur_cp_times[i * CPUSTATES + state]; if (total) lpct = 100.0 / total; else lpct = 0.0; printf(" "); percent((cur_cp_times[i * CPUSTATES + CP_USER] + cur_cp_times[i * CPUSTATES + CP_NICE]) * lpct, &over); printf(" "); percent((cur_cp_times[i * CPUSTATES + CP_SYS] + cur_cp_times[i * CPUSTATES + CP_INTR]) * lpct, &over); printf(" "); percent(cur_cp_times[i * CPUSTATES + CP_IDLE] * lpct, &over); } } static unsigned int read_intrcnts(unsigned long **intrcnts) { size_t intrcntlen; if (kd != NULL) { kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen)); if ((*intrcnts = malloc(intrcntlen)) == NULL) err(1, "malloc()"); kread(X_INTRCNT, *intrcnts, intrcntlen); } else { for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) { *intrcnts = reallocf(*intrcnts, intrcntlen); if (*intrcnts == NULL) err(1, "reallocf()"); if (mysysctl("hw.intrcnt", *intrcnts, &intrcntlen, NULL, 0) == 0) break; } } return (intrcntlen / sizeof(unsigned long)); } static void print_intrcnts(unsigned long *intrcnts, unsigned long *old_intrcnts, char *intrnames, unsigned int nintr, size_t istrnamlen, long long period_ms) { unsigned long *intrcnt, *old_intrcnt; uint64_t inttotal, old_inttotal, total_count, total_rate; char* intrname; unsigned int i; inttotal = 0; old_inttotal = 0; intrname = intrnames; for (i = 0, intrcnt=intrcnts, old_intrcnt=old_intrcnts; i < nintr; i++) { if (intrname[0] != '\0' && (*intrcnt != 0 || aflag)) { unsigned long count, rate; count = *intrcnt - *old_intrcnt; rate = (count * 1000 + period_ms / 2) / period_ms; (void)printf("%-*s %20lu %10lu\n", (int)istrnamlen, intrname, count, rate); } intrname += strlen(intrname) + 1; inttotal += *intrcnt++; old_inttotal += *old_intrcnt++; } total_count = inttotal - old_inttotal; total_rate = (total_count * 1000 + period_ms / 2) / period_ms; (void)printf("%-*s %20" PRIu64 " %10" PRIu64 "\n", (int)istrnamlen, "Total", total_count, total_rate); } static void dointr(unsigned int interval, int reps) { unsigned long *intrcnts; long long uptime, period_ms; unsigned long *old_intrcnts = NULL; size_t clen, inamlen, istrnamlen; char *intrnames, *intrname; uptime = getuptime(); /* Get the names of each interrupt source */ if (kd != NULL) { kread(X_SINTRNAMES, &inamlen, sizeof(inamlen)); if ((intrnames = malloc(inamlen)) == NULL) err(1, "malloc()"); kread(X_INTRNAMES, intrnames, inamlen); } else { for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) { if ((intrnames = reallocf(intrnames, inamlen)) == NULL) err(1, "reallocf()"); if (mysysctl("hw.intrnames", intrnames, &inamlen, NULL, 0) == 0) break; } } /* Determine the length of the longest interrupt name */ intrname = intrnames; istrnamlen = strlen("interrupt"); while(*intrname != '\0') { clen = strlen(intrname); if (clen > istrnamlen) istrnamlen = clen; intrname += strlen(intrname) + 1; } (void)printf("%-*s %20s %10s\n", (int)istrnamlen, "interrupt", "total", "rate"); /* * Loop reps times printing differential interrupt counts. If reps is * zero, then run just once, printing total counts */ period_ms = uptime / 1000000; while(1) { unsigned int nintr; long long old_uptime; nintr = read_intrcnts(&intrcnts); /* * Initialize old_intrcnts to 0 for the first pass, so * print_intrcnts will print total interrupts since boot */ if (old_intrcnts == NULL) { old_intrcnts = calloc(nintr, sizeof(unsigned long)); if (old_intrcnts == NULL) err(1, "calloc()"); } print_intrcnts(intrcnts, old_intrcnts, intrnames, nintr, istrnamlen, period_ms); free(old_intrcnts); old_intrcnts = intrcnts; if (reps >= 0 && --reps <= 0) break; usleep(interval * 1000); old_uptime = uptime; uptime = getuptime(); period_ms = (uptime - old_uptime) / 1000000; } } static void domemstat_malloc(void) { struct memory_type_list *mtlp; struct memory_type *mtp; int error, first, i; mtlp = memstat_mtl_alloc(); if (mtlp == NULL) { warn("memstat_mtl_alloc"); return; } if (kd == NULL) { if (memstat_sysctl_malloc(mtlp, 0) < 0) { warnx("memstat_sysctl_malloc: %s", memstat_strerror(memstat_mtl_geterror(mtlp))); return; } } else { if (memstat_kvm_malloc(mtlp, kd) < 0) { error = memstat_mtl_geterror(mtlp); if (error == MEMSTAT_ERROR_KVM) warnx("memstat_kvm_malloc: %s", kvm_geterr(kd)); else warnx("memstat_kvm_malloc: %s", memstat_strerror(error)); } } printf("%13s %5s %6s %7s %8s Size(s)\n", "Type", "InUse", "MemUse", "HighUse", "Requests"); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { if (memstat_get_numallocs(mtp) == 0 && memstat_get_count(mtp) == 0) continue; printf("%13s %5" PRIu64 " %5" PRIu64 "K %7s %8" PRIu64 " ", memstat_get_name(mtp), memstat_get_count(mtp), (memstat_get_bytes(mtp) + 1023) / 1024, "-", memstat_get_numallocs(mtp)); first = 1; for (i = 0; i < 32; i++) { if (memstat_get_sizemask(mtp) & (1 << i)) { if (!first) printf(","); printf("%d", 1 << (i + 4)); first = 0; } } printf("\n"); } memstat_mtl_free(mtlp); } static void domemstat_zone(void) { struct memory_type_list *mtlp; struct memory_type *mtp; char name[MEMTYPE_MAXNAME + 1]; int error; mtlp = memstat_mtl_alloc(); if (mtlp == NULL) { warn("memstat_mtl_alloc"); return; } if (kd == NULL) { if (memstat_sysctl_uma(mtlp, 0) < 0) { warnx("memstat_sysctl_uma: %s", memstat_strerror(memstat_mtl_geterror(mtlp))); return; } } else { if (memstat_kvm_uma(mtlp, kd) < 0) { error = memstat_mtl_geterror(mtlp); if (error == MEMSTAT_ERROR_KVM) warnx("memstat_kvm_uma: %s", kvm_geterr(kd)); else warnx("memstat_kvm_uma: %s", memstat_strerror(error)); } } printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE", "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP"); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); strcat(name, ":"); printf("%-20s %6" PRIu64 ", %6" PRIu64 ",%8" PRIu64 ",%8" PRIu64 ",%8" PRIu64 ",%4" PRIu64 ",%4" PRIu64 "\n", name, memstat_get_size(mtp), memstat_get_countlimit(mtp), memstat_get_count(mtp), memstat_get_free(mtp), memstat_get_numallocs(mtp), memstat_get_failures(mtp), memstat_get_sleeps(mtp)); } memstat_mtl_free(mtlp); printf("\n"); } +static void +display_object(struct kinfo_vmobject *kvo) +{ + const char *str; + + printf("%5jd ", (uintmax_t)kvo->kvo_resident); + printf("%5jd ", (uintmax_t)kvo->kvo_active); + printf("%5jd ", (uintmax_t)kvo->kvo_inactive); + printf("%3d ", kvo->kvo_ref_count); + printf("%3d ", kvo->kvo_shadow_count); + switch (kvo->kvo_memattr) { +#ifdef VM_MEMATTR_UNCACHEABLE + case VM_MEMATTR_UNCACHEABLE: + str = "UC"; + break; +#endif +#ifdef VM_MEMATTR_WRITE_COMBINING + case VM_MEMATTR_WRITE_COMBINING: + str = "WC"; + break; +#endif +#ifdef VM_MEMATTR_WRITE_THROUGH + case VM_MEMATTR_WRITE_THROUGH: + str = "WT"; + break; +#endif +#ifdef VM_MEMATTR_WRITE_PROTECTED + case VM_MEMATTR_WRITE_PROTECTED: + str = "WP"; + break; +#endif +#ifdef VM_MEMATTR_WRITE_BACK + case VM_MEMATTR_WRITE_BACK: + str = "WB"; + break; +#endif +#ifdef VM_MEMATTR_WEAK_UNCACHEABLE + case VM_MEMATTR_WEAK_UNCACHEABLE: + str = "UC-"; + break; +#endif +#ifdef VM_MEMATTR_WB_WA: + case VM_MEMATTR_WB_WA: + str = "WB"; + break; +#endif +#ifdef VM_MEMATTR_NOCACHE + case VM_MEMATTR_NOCACHE: + str = "NC"; + break; +#endif +#ifdef VM_MEMATTR_DEVICE + case VM_MEMATTR_DEVICE: + str = "DEV"; + break; +#endif +#ifdef VM_MEMATTR_CACHEABLE + case VM_MEMATTR_CACHEABLE: + str = "C"; + break; +#endif +#ifdef VM_MEMATTR_PREFETCHABLE + case VM_MEMATTR_PREFETCHABLE: + str = "PRE"; + break; +#endif + default: + str = "??"; + break; + } + printf("%-3s ", str); + switch (kvo->kvo_type) { + case KVME_TYPE_NONE: + str = "--"; + break; + case KVME_TYPE_DEFAULT: + str = "df"; + break; + case KVME_TYPE_VNODE: + str = "vn"; + break; + case KVME_TYPE_SWAP: + str = "sw"; + break; + case KVME_TYPE_DEVICE: + str = "dv"; + break; + case KVME_TYPE_PHYS: + str = "ph"; + break; + case KVME_TYPE_DEAD: + str = "dd"; + break; + case KVME_TYPE_SG: + str = "sg"; + break; + case KVME_TYPE_UNKNOWN: + default: + str = "??"; + break; + } + printf("%-2s ", str); + printf("%-s\n", kvo->kvo_path); +} + +static void +doobjstat(void) +{ + struct kinfo_vmobject *kvo; + int cnt, i; + + kvo = kinfo_getvmobject(&cnt); + if (kvo == NULL) { + warn("Failed to fetch VM object list"); + return; + } + printf("%5s %5s %5s %3s %3s %3s %2s %s\n", "RES", "ACT", "INACT", + "REF", "SHD", "CM", "TP", "PATH"); + for (i = 0; i < cnt; i++) + display_object(&kvo[i]); + free(kvo); +} + /* * kread reads something from the kernel, given its nlist index. */ static void kreado(int nlx, void *addr, size_t size, size_t offset) { const char *sym; if (namelist[nlx].n_type == 0 || namelist[nlx].n_value == 0) { sym = namelist[nlx].n_name; if (*sym == '_') ++sym; errx(1, "symbol %s not defined", sym); } if ((size_t)kvm_read(kd, namelist[nlx].n_value + offset, addr, size) != size) { sym = namelist[nlx].n_name; if (*sym == '_') ++sym; errx(1, "%s: %s", sym, kvm_geterr(kd)); } } static void kread(int nlx, void *addr, size_t size) { kreado(nlx, addr, size, 0); } static char * kgetstr(const char *strp) { int n = 0, size = 1; char *ret = NULL; do { if (size == n + 1) { ret = realloc(ret, size); if (ret == NULL) err(1, "%s: realloc", __func__); size *= 2; } if (kvm_read(kd, (u_long)strp + n, &ret[n], 1) != 1) errx(1, "%s: %s", __func__, kvm_geterr(kd)); } while (ret[n++] != '\0'); return (ret); } static void usage(void) { (void)fprintf(stderr, "%s%s", - "usage: vmstat [-afHhimPsz] [-M core [-N system]] [-c count] [-n devs]\n", + "usage: vmstat [-afHhimoPsz] [-M core [-N system]] [-c count] [-n devs]\n", " [-p type,if,pass] [-w wait] [disks] [wait [count]]\n"); exit(1); }