Index: head/lib/libmemstat/memstat.h =================================================================== --- head/lib/libmemstat/memstat.h (revision 367273) +++ head/lib/libmemstat/memstat.h (revision 367274) @@ -1,172 +1,179 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MEMSTAT_H_ #define _MEMSTAT_H_ /* * Amount of caller data to maintain for each caller data slot. Applications * must not request more than this number of caller save data, or risk * corrupting internal libmemstat(3) data structures. A compile time check * in the application is probably appropriate. */ #define MEMSTAT_MAXCALLER 16 /* * libmemstat(3) is able to extract memory data from different allocators; * when it does so, it tags which allocator it got the data from so that * consumers can determine which fields are usable, as data returned varies * some. */ #define ALLOCATOR_UNKNOWN 0 #define ALLOCATOR_MALLOC 1 #define ALLOCATOR_UMA 2 #define ALLOCATOR_ANY 255 /* * Library maximum type name. Should be max(set of name maximums over * various allocators). */ #define MEMTYPE_MAXNAME 32 /* * Library error conditions, mostly from the underlying data sources. On * failure, functions typically return (-1) or (NULL); on success, (0) or a * valid data pointer. The error from the last operation is stored in * struct memory_type_list, and accessed via memstat_get_error(list). */ #define MEMSTAT_ERROR_UNDEFINED 0 /* Initialization value. */ #define MEMSTAT_ERROR_NOMEMORY 1 /* Out of memory. */ #define MEMSTAT_ERROR_VERSION 2 /* Unsupported version. */ #define MEMSTAT_ERROR_PERMISSION 3 /* Permission denied. */ #define MEMSTAT_ERROR_DATAERROR 5 /* Error in stat data. */ #define MEMSTAT_ERROR_KVM 6 /* See kvm_geterr() for err. */ #define MEMSTAT_ERROR_KVM_NOSYMBOL 7 /* Symbol not available. */ #define MEMSTAT_ERROR_KVM_SHORTREAD 8 /* Short kvm_read return. */ /* * Forward declare struct memory_type, which holds per-type properties and * statistics. This is an opaque type, to be frobbed only from within the * library, in order to avoid building ABI assumptions into the application. * Accessor methods should be used to get and sometimes set the fields from * consumers of the library. */ struct memory_type; /* * struct memory_type_list is the head of a list of memory types and * statistics. */ struct memory_type_list; __BEGIN_DECLS /* * Functions that operate without memory type or memory type list context. */ const char *memstat_strerror(int error); /* * Functions for managing memory type and statistics data. */ struct memory_type_list *memstat_mtl_alloc(void); struct memory_type *memstat_mtl_first(struct memory_type_list *list); struct memory_type *memstat_mtl_next(struct memory_type *mtp); struct memory_type *memstat_mtl_find(struct memory_type_list *list, int allocator, const char *name); void memstat_mtl_free(struct memory_type_list *list); int memstat_mtl_geterror(struct memory_type_list *list); /* * Functions to retrieve data from a live kernel using sysctl. */ int memstat_sysctl_all(struct memory_type_list *list, int flags); int memstat_sysctl_malloc(struct memory_type_list *list, int flags); int memstat_sysctl_uma(struct memory_type_list *list, int flags); /* * Functions to retrieve data from a kernel core (or /dev/kmem). */ int memstat_kvm_all(struct memory_type_list *list, void *kvm_handle); int memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle); int memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle); /* + * General malloc routines. + */ +size_t memstat_malloc_zone_get_count(void); +size_t memstat_malloc_zone_get_size(size_t n); +int memstat_malloc_zone_used(const struct memory_type *mtp, size_t n); + +/* * Accessor methods for struct memory_type. */ const char *memstat_get_name(const struct memory_type *mtp); int memstat_get_allocator(const struct memory_type *mtp); uint64_t memstat_get_countlimit(const struct memory_type *mtp); uint64_t memstat_get_byteslimit(const struct memory_type *mtp); uint64_t memstat_get_sizemask(const struct memory_type *mtp); uint64_t memstat_get_size(const struct memory_type *mtp); uint64_t memstat_get_rsize(const struct memory_type *mtp); uint64_t memstat_get_memalloced(const struct memory_type *mtp); uint64_t memstat_get_memfreed(const struct memory_type *mtp); uint64_t memstat_get_numallocs(const struct memory_type *mtp); uint64_t memstat_get_numfrees(const struct memory_type *mtp); uint64_t memstat_get_bytes(const struct memory_type *mtp); uint64_t memstat_get_count(const struct memory_type *mtp); uint64_t memstat_get_free(const struct memory_type *mtp); uint64_t memstat_get_failures(const struct memory_type *mtp); uint64_t memstat_get_sleeps(const struct memory_type *mtp); uint64_t memstat_get_xdomain(const struct memory_type *mtp); void *memstat_get_caller_pointer(const struct memory_type *mtp, int index); void memstat_set_caller_pointer(struct memory_type *mtp, int index, void *value); uint64_t memstat_get_caller_uint64(const struct memory_type *mtp, int index); void memstat_set_caller_uint64(struct memory_type *mtp, int index, uint64_t value); uint64_t memstat_get_zonefree(const struct memory_type *mtp); uint64_t memstat_get_kegfree(const struct memory_type *mtp); uint64_t memstat_get_percpu_memalloced(const struct memory_type *mtp, int cpu); uint64_t memstat_get_percpu_memfreed(const struct memory_type *mtp, int cpu); uint64_t memstat_get_percpu_numallocs(const struct memory_type *mtp, int cpu); uint64_t memstat_get_percpu_numfrees(const struct memory_type *mtp, int cpu); uint64_t memstat_get_percpu_sizemask(const struct memory_type *mtp, int cpu); void *memstat_get_percpu_caller_pointer( const struct memory_type *mtp, int cpu, int index); void memstat_set_percpu_caller_pointer(struct memory_type *mtp, int cpu, int index, void *value); uint64_t memstat_get_percpu_caller_uint64( const struct memory_type *mtp, int cpu, int index); void memstat_set_percpu_caller_uint64(struct memory_type *mtp, int cpu, int index, uint64_t value); uint64_t memstat_get_percpu_free(const struct memory_type *mtp, int cpu); __END_DECLS #endif /* !_MEMSTAT_H_ */ Index: head/lib/libmemstat/memstat_malloc.c =================================================================== --- head/lib/libmemstat/memstat_malloc.c (revision 367273) +++ head/lib/libmemstat/memstat_malloc.c (revision 367274) @@ -1,418 +1,547 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include "memstat.h" #include "memstat_internal.h" +static int memstat_malloc_zone_count; +static int memstat_malloc_zone_sizes[32]; + +static int memstat_malloc_zone_init(void); +static int memstat_malloc_zone_init_kvm(kvm_t *kvm); + static struct nlist namelist[] = { #define X_KMEMSTATISTICS 0 { .n_name = "_kmemstatistics" }, -#define X_MP_MAXCPUS 1 +#define X_KMEMZONES 1 + { .n_name = "_kmemzones" }, +#define X_NUMZONES 2 + { .n_name = "_numzones" }, +#define X_VM_MALLOC_ZONE_COUNT 3 + { .n_name = "_vm_malloc_zone_count" }, +#define X_MP_MAXCPUS 4 { .n_name = "_mp_maxcpus" }, { .n_name = "" }, }; /* * Extract malloc(9) statistics from the running kernel, and store all memory * type information in the passed list. For each type, check the list for an * existing entry with the right name/allocator -- if present, update that * entry. Otherwise, add a new entry. On error, the entire list will be * cleared, as entries will be in an inconsistent state. * * To reduce the level of work for a list that starts empty, we keep around a * hint as to whether it was empty when we began, so we can avoid searching * the list for entries to update. Updates are O(n^2) due to searching for * each entry before adding it. */ int memstat_sysctl_malloc(struct memory_type_list *list, int flags) { struct malloc_type_stream_header *mtshp; struct malloc_type_header *mthp; struct malloc_type_stats *mtsp; struct memory_type *mtp; int count, hint_dontsearch, i, j, maxcpus; char *buffer, *p; size_t size; hint_dontsearch = LIST_EMPTY(&list->mtl_list); /* * Query the number of CPUs, number of malloc types so that we can * guess an initial buffer size. We loop until we succeed or really * fail. Note that the value of maxcpus we query using sysctl is not * the version we use when processing the real data -- that is read * from the header. */ retry: size = sizeof(maxcpus); if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) { if (errno == EACCES || errno == EPERM) list->mtl_error = MEMSTAT_ERROR_PERMISSION; else list->mtl_error = MEMSTAT_ERROR_DATAERROR; return (-1); } if (size != sizeof(maxcpus)) { list->mtl_error = MEMSTAT_ERROR_DATAERROR; return (-1); } size = sizeof(count); if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) { if (errno == EACCES || errno == EPERM) list->mtl_error = MEMSTAT_ERROR_PERMISSION; else list->mtl_error = MEMSTAT_ERROR_VERSION; return (-1); } if (size != sizeof(count)) { list->mtl_error = MEMSTAT_ERROR_DATAERROR; return (-1); } + if (memstat_malloc_zone_init() == -1) { + list->mtl_error = MEMSTAT_ERROR_VERSION; + return (-1); + } + size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) * maxcpus); buffer = malloc(size); if (buffer == NULL) { list->mtl_error = MEMSTAT_ERROR_NOMEMORY; return (-1); } if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) { /* * XXXRW: ENOMEM is an ambiguous return, we should bound the * number of loops, perhaps. */ if (errno == ENOMEM) { free(buffer); goto retry; } if (errno == EACCES || errno == EPERM) list->mtl_error = MEMSTAT_ERROR_PERMISSION; else list->mtl_error = MEMSTAT_ERROR_VERSION; free(buffer); return (-1); } if (size == 0) { free(buffer); return (0); } if (size < sizeof(*mtshp)) { list->mtl_error = MEMSTAT_ERROR_VERSION; free(buffer); return (-1); } p = buffer; mtshp = (struct malloc_type_stream_header *)p; p += sizeof(*mtshp); if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) { list->mtl_error = MEMSTAT_ERROR_VERSION; free(buffer); return (-1); } /* * For the remainder of this function, we are quite trusting about * the layout of structures and sizes, since we've determined we have * a matching version and acceptable CPU count. */ maxcpus = mtshp->mtsh_maxcpus; count = mtshp->mtsh_count; for (i = 0; i < count; i++) { mthp = (struct malloc_type_header *)p; p += sizeof(*mthp); if (hint_dontsearch == 0) { mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, mthp->mth_name); } else mtp = NULL; if (mtp == NULL) mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC, mthp->mth_name, maxcpus); if (mtp == NULL) { _memstat_mtl_empty(list); free(buffer); list->mtl_error = MEMSTAT_ERROR_NOMEMORY; return (-1); } /* * Reset the statistics on a current node. */ _memstat_mt_reset_stats(mtp, maxcpus); for (j = 0; j < maxcpus; j++) { mtsp = (struct malloc_type_stats *)p; p += sizeof(*mtsp); /* * Sumarize raw statistics across CPUs into coalesced * statistics. */ mtp->mt_memalloced += mtsp->mts_memalloced; mtp->mt_memfreed += mtsp->mts_memfreed; mtp->mt_numallocs += mtsp->mts_numallocs; mtp->mt_numfrees += mtsp->mts_numfrees; mtp->mt_sizemask |= mtsp->mts_size; /* * Copies of per-CPU statistics. */ mtp->mt_percpu_alloc[j].mtp_memalloced = mtsp->mts_memalloced; mtp->mt_percpu_alloc[j].mtp_memfreed = mtsp->mts_memfreed; mtp->mt_percpu_alloc[j].mtp_numallocs = mtsp->mts_numallocs; mtp->mt_percpu_alloc[j].mtp_numfrees = mtsp->mts_numfrees; mtp->mt_percpu_alloc[j].mtp_sizemask = mtsp->mts_size; } /* * Derived cross-CPU statistics. */ mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; } free(buffer); return (0); } static int kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, size_t offset) { ssize_t ret; ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, size); if (ret < 0) return (MEMSTAT_ERROR_KVM); if ((size_t)ret != size) return (MEMSTAT_ERROR_KVM_SHORTREAD); return (0); } static int kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) { ssize_t ret; int i; for (i = 0; i < buflen; i++) { ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) + i, &(buffer[i]), sizeof(char)); if (ret < 0) return (MEMSTAT_ERROR_KVM); if ((size_t)ret != sizeof(char)) return (MEMSTAT_ERROR_KVM_SHORTREAD); if (buffer[i] == '\0') return (0); } /* Truncate. */ buffer[i-1] = '\0'; return (0); } static int kread_symbol(kvm_t *kvm, int index, void *address, size_t size, size_t offset) { ssize_t ret; ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); if (ret < 0) return (MEMSTAT_ERROR_KVM); if ((size_t)ret != size) return (MEMSTAT_ERROR_KVM_SHORTREAD); return (0); } static int kread_zpcpu(kvm_t *kvm, u_long base, void *buf, size_t size, int cpu) { ssize_t ret; ret = kvm_read_zpcpu(kvm, base, buf, size, cpu); if (ret < 0) return (MEMSTAT_ERROR_KVM); if ((size_t)ret != size) return (MEMSTAT_ERROR_KVM_SHORTREAD); return (0); } int memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle) { struct memory_type *mtp; void *kmemstatistics; int hint_dontsearch, j, mp_maxcpus, mp_ncpus, ret; char name[MEMTYPE_MAXNAME]; struct malloc_type_stats mts; struct malloc_type_internal mti, *mtip; struct malloc_type type, *typep; kvm_t *kvm; kvm = (kvm_t *)kvm_handle; hint_dontsearch = LIST_EMPTY(&list->mtl_list); if (kvm_nlist(kvm, namelist) != 0) { list->mtl_error = MEMSTAT_ERROR_KVM; return (-1); } if (namelist[X_KMEMSTATISTICS].n_type == 0 || namelist[X_KMEMSTATISTICS].n_value == 0) { list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; return (-1); } ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus, sizeof(mp_maxcpus), 0); if (ret != 0) { list->mtl_error = ret; return (-1); } ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics, sizeof(kmemstatistics), 0); if (ret != 0) { list->mtl_error = ret; return (-1); } + ret = memstat_malloc_zone_init_kvm(kvm); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + mp_ncpus = kvm_getncpus(kvm); for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) { ret = kread(kvm, typep, &type, sizeof(type), 0); if (ret != 0) { _memstat_mtl_empty(list); list->mtl_error = ret; return (-1); } ret = kread_string(kvm, (void *)type.ks_shortdesc, name, MEMTYPE_MAXNAME); if (ret != 0) { _memstat_mtl_empty(list); list->mtl_error = ret; return (-1); } /* * Since our compile-time value for MAXCPU may differ from the * kernel's, we populate our own array. */ mtip = type.ks_handle; ret = kread(kvm, mtip, &mti, sizeof(mti), 0); if (ret != 0) { _memstat_mtl_empty(list); list->mtl_error = ret; return (-1); } if (hint_dontsearch == 0) { mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name); } else mtp = NULL; if (mtp == NULL) mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC, name, mp_maxcpus); if (mtp == NULL) { _memstat_mtl_empty(list); list->mtl_error = MEMSTAT_ERROR_NOMEMORY; return (-1); } /* * This logic is replicated from kern_malloc.c, and should * be kept in sync. */ _memstat_mt_reset_stats(mtp, mp_maxcpus); for (j = 0; j < mp_ncpus; j++) { ret = kread_zpcpu(kvm, (u_long)mti.mti_stats, &mts, sizeof(mts), j); if (ret != 0) { _memstat_mtl_empty(list); list->mtl_error = ret; return (-1); } mtp->mt_memalloced += mts.mts_memalloced; mtp->mt_memfreed += mts.mts_memfreed; mtp->mt_numallocs += mts.mts_numallocs; mtp->mt_numfrees += mts.mts_numfrees; mtp->mt_sizemask |= mts.mts_size; mtp->mt_percpu_alloc[j].mtp_memalloced = mts.mts_memalloced; mtp->mt_percpu_alloc[j].mtp_memfreed = mts.mts_memfreed; mtp->mt_percpu_alloc[j].mtp_numallocs = mts.mts_numallocs; mtp->mt_percpu_alloc[j].mtp_numfrees = mts.mts_numfrees; mtp->mt_percpu_alloc[j].mtp_sizemask = mts.mts_size; } for (; j < mp_maxcpus; j++) { bzero(&mtp->mt_percpu_alloc[j], sizeof(mtp->mt_percpu_alloc[0])); } mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; } + + return (0); +} + +static int +memstat_malloc_zone_init(void) +{ + size_t size; + + size = sizeof(memstat_malloc_zone_count); + if (sysctlbyname("vm.malloc.zone_count", &memstat_malloc_zone_count, + &size, NULL, 0) < 0) { + return (-1); + } + + if (memstat_malloc_zone_count > (int)nitems(memstat_malloc_zone_sizes)) { + return (-1); + } + + size = sizeof(memstat_malloc_zone_sizes); + if (sysctlbyname("vm.malloc.zone_sizes", &memstat_malloc_zone_sizes, + &size, NULL, 0) < 0) { + return (-1); + } + + return (0); +} + +/* + * Copied from kern_malloc.c + * + * kz_zone is an array sized at compilation time, the size is exported in + * "numzones". Below we need to iterate kz_size. + */ +struct memstat_kmemzone { + int kz_size; + const char *kz_name; + void *kz_zone[1]; +}; + +static int +memstat_malloc_zone_init_kvm(kvm_t *kvm) +{ + struct memstat_kmemzone *kmemzones, *kz; + int numzones, objsize, allocsize, ret; + int i; + + ret = kread_symbol(kvm, X_VM_MALLOC_ZONE_COUNT, + &memstat_malloc_zone_count, sizeof(memstat_malloc_zone_count), 0); + if (ret != 0) { + return (ret); + } + + ret = kread_symbol(kvm, X_NUMZONES, &numzones, sizeof(numzones), 0); + if (ret != 0) { + return (ret); + } + + objsize = __offsetof(struct memstat_kmemzone, kz_zone) + + sizeof(void *) * numzones; + + allocsize = objsize * memstat_malloc_zone_count; + kmemzones = malloc(allocsize); + if (kmemzones == NULL) { + return (MEMSTAT_ERROR_NOMEMORY); + } + ret = kread_symbol(kvm, X_KMEMZONES, kmemzones, allocsize, 0); + if (ret != 0) { + free(kmemzones); + return (ret); + } + + kz = kmemzones; + for (i = 0; i < (int)nitems(memstat_malloc_zone_sizes); i++) { + memstat_malloc_zone_sizes[i] = kz->kz_size; + kz = (struct memstat_kmemzone *)((char *)kz + objsize); + } + + free(kmemzones); + return (0); +} + +size_t +memstat_malloc_zone_get_count(void) +{ + + return (memstat_malloc_zone_count); +} + +size_t +memstat_malloc_zone_get_size(size_t n) +{ + + if (n >= nitems(memstat_malloc_zone_sizes)) { + return (-1); + } + + return (memstat_malloc_zone_sizes[n]); +} + +int +memstat_malloc_zone_used(const struct memory_type *mtp, size_t n) +{ + + if (memstat_get_sizemask(mtp) & (1 << n)) + return (1); return (0); } Index: head/share/man/man9/malloc.9 =================================================================== --- head/share/man/man9/malloc.9 (revision 367273) +++ head/share/man/man9/malloc.9 (revision 367274) @@ -1,343 +1,349 @@ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This code is derived from software contributed to The NetBSD Foundation .\" by Paul Kranenburg. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $NetBSD: malloc.9,v 1.3 1996/11/11 00:05:11 lukem Exp $ .\" $FreeBSD$ .\" -.Dd August 28, 2020 +.Dd October 30, 2020 .Dt MALLOC 9 .Os .Sh NAME .Nm malloc , .Nm free , .Nm realloc , .Nm reallocf , .Nm MALLOC_DEFINE , .Nm MALLOC_DECLARE .Nd kernel memory management routines .Sh SYNOPSIS .In sys/types.h .In sys/malloc.h .Ft void * .Fn malloc "size_t size" "struct malloc_type *type" "int flags" .Ft void * .Fn mallocarray "size_t nmemb" "size_t size" "struct malloc_type *type" "int flags" .Ft void .Fn free "void *addr" "struct malloc_type *type" .Ft void .Fn zfree "void *addr" "struct malloc_type *type" .Ft void * .Fn realloc "void *addr" "size_t size" "struct malloc_type *type" "int flags" .Ft void * .Fn reallocf "void *addr" "size_t size" "struct malloc_type *type" "int flags" .Ft size_t .Fn malloc_usable_size "const void *addr" +.Ft void * +.Fn malloc_exec "size_t size" "struct malloc_type *type" "int flags" .Fn MALLOC_DECLARE type .In sys/param.h .In sys/malloc.h .In sys/kernel.h .Fn MALLOC_DEFINE type shortdesc longdesc .In sys/param.h .In sys/domainset.h .Ft void * .Fn malloc_domainset "size_t size" "struct malloc_type *type" "struct domainset *ds" "int flags" +.Ft void * +.Fn malloc_domainset_exec "size_t size" "struct malloc_type *type" "struct domainset *ds" "int flags" .Sh DESCRIPTION The .Fn malloc function allocates uninitialized memory in kernel address space for an object whose size is specified by .Fa size . .Pp The .Fn malloc_domainset variant allocates memory from a specific .Xr numa 4 domain using the specified domain selection policy. See .Xr domainset 9 for some example policies. .Pp +Both +.Fn malloc_exec +and +.Fn malloc_domainset_exec +can be used to return executable memory. +Not all platforms enforce a distinction between executable and non-executable memory. +.Pp The .Fn mallocarray function allocates uninitialized memory in kernel address space for an array of .Fa nmemb entries whose size is specified by .Fa size . .Pp The .Fn free function releases memory at address .Fa addr that was previously allocated by .Fn malloc for re-use. The memory is not zeroed. If .Fa addr is .Dv NULL , then .Fn free does nothing. .Pp Like .Fn free , the .Fn zfree function releases memory at address .Fa addr that was previously allocated by .Fn malloc for re-use. However, .Fn zfree will zero the memory before it is released. .Pp The .Fn realloc function changes the size of the previously allocated memory referenced by .Fa addr to .Fa size bytes. The contents of the memory are unchanged up to the lesser of the new and old sizes. Note that the returned value may differ from .Fa addr . If the requested memory cannot be allocated, .Dv NULL is returned and the memory referenced by .Fa addr is valid and unchanged. If .Fa addr is .Dv NULL , the .Fn realloc function behaves identically to .Fn malloc for the specified size. .Pp The .Fn reallocf function is identical to .Fn realloc except that it will free the passed pointer when the requested memory cannot be allocated. .Pp The .Fn malloc_usable_size function returns the usable size of the allocation pointed to by .Fa addr . The return value may be larger than the size that was requested during allocation. .Pp Unlike its standard C library counterpart .Pq Xr malloc 3 , the kernel version takes two more arguments. The .Fa flags argument further qualifies .Fn malloc Ns 's operational characteristics as follows: .Bl -tag -width indent .It Dv M_ZERO Causes the allocated memory to be set to all zeros. .It Dv M_NODUMP For allocations greater than page size, causes the allocated memory to be excluded from kernel core dumps. .It Dv M_NOWAIT Causes .Fn malloc , .Fn realloc , and .Fn reallocf to return .Dv NULL if the request cannot be immediately fulfilled due to resource shortage. Note that .Dv M_NOWAIT is required when running in an interrupt context. .It Dv M_WAITOK Indicates that it is OK to wait for resources. If the request cannot be immediately fulfilled, the current process is put to sleep to wait for resources to be released by other processes. The .Fn malloc , .Fn mallocarray , .Fn realloc , and .Fn reallocf functions cannot return .Dv NULL if .Dv M_WAITOK is specified. If the multiplication of .Fa nmemb and .Fa size would cause an integer overflow, the .Fn mallocarray function induces a panic. .It Dv M_USE_RESERVE Indicates that the system can use its reserve of memory to satisfy the request. This option should only be used in combination with .Dv M_NOWAIT when an allocation failure cannot be tolerated by the caller without catastrophic effects on the system. -.It Dv M_EXEC -Indicates that the system should allocate executable memory. -If this flag is not set, the system will not allocate executable memory. -Not all platforms enforce a distinction between executable and -non-executable memory. .El .Pp Exactly one of either .Dv M_WAITOK or .Dv M_NOWAIT must be specified. .Pp The .Fa type argument is used to perform statistics on memory usage, and for basic sanity checks. It can be used to identify multiple allocations. The statistics can be examined by .Sq vmstat -m . .Pp A .Fa type is defined using .Vt "struct malloc_type" via the .Fn MALLOC_DECLARE and .Fn MALLOC_DEFINE macros. .Bd -literal -offset indent /* sys/something/foo_extern.h */ MALLOC_DECLARE(M_FOOBUF); /* sys/something/foo_main.c */ MALLOC_DEFINE(M_FOOBUF, "foobuffers", "Buffers to foo data into the ether"); /* sys/something/foo_subr.c */ \&... buf = malloc(sizeof(*buf), M_FOOBUF, M_NOWAIT); .Ed .Pp In order to use .Fn MALLOC_DEFINE , one must include .In sys/param.h (instead of .In sys/types.h ) and .In sys/kernel.h . .Sh CONTEXT .Fn malloc , .Fn realloc and .Fn reallocf may not be called from fast interrupts handlers. When called from threaded interrupts, .Fa flags must contain .Dv M_NOWAIT . .Pp .Fn malloc , .Fn realloc and .Fn reallocf may sleep when called with .Dv M_WAITOK . .Fn free never sleeps. However, .Fn malloc , .Fn realloc , .Fn reallocf and .Fn free may not be called in a critical section or while holding a spin lock. .Pp Any calls to .Fn malloc (even with .Dv M_NOWAIT ) or .Fn free when holding a .Xr vnode 9 interlock, will cause a LOR (Lock Order Reversal) due to the intertwining of VM Objects and Vnodes. .Sh IMPLEMENTATION NOTES The memory allocator allocates memory in chunks that have size a power of two for requests up to the size of a page of memory. For larger requests, one or more pages is allocated. While it should not be relied upon, this information may be useful for optimizing the efficiency of memory use. .Sh RETURN VALUES The .Fn malloc , .Fn realloc , and .Fn reallocf functions return a kernel virtual address that is suitably aligned for storage of any type of object, or .Dv NULL if the request could not be satisfied (implying that .Dv M_NOWAIT was set). .Sh DIAGNOSTICS A kernel compiled with the .Dv INVARIANTS configuration option attempts to detect memory corruption caused by such things as writing outside the allocated area and imbalanced calls to the .Fn malloc and .Fn free functions. Failing consistency checks will cause a panic or a system console message. .Sh SEE ALSO .Xr numa 4 , .Xr vmstat 8 , .Xr contigmalloc 9 , .Xr domainset 9 , .Xr memguard 9 , .Xr vnode 9 Index: head/sys/kern/kern_malloc.c =================================================================== --- head/sys/kern/kern_malloc.c (revision 367273) +++ head/sys/kern/kern_malloc.c (revision 367274) @@ -1,1543 +1,1571 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2005-2009 Robert N. M. Watson * Copyright (c) 2008 Otto Moerbeek (mallocarray) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 */ /* * Kernel malloc(9) implementation -- general purpose kernel memory allocator * based on memory types. Back end is implemented using the UMA(9) zone * allocator. A set of fixed-size buckets are used for smaller allocations, * and a special UMA allocation interface is used for larger allocations. * Callers declare memory types, and statistics are maintained independently * for each memory type. Statistics are maintained per-CPU for performance * reasons. See malloc(9) and comments in malloc.h for a detailed * description. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef EPOCH_TRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG_MEMGUARD #include #endif #ifdef DEBUG_REDZONE #include #endif #if defined(INVARIANTS) && defined(__i386__) #include #endif #include #ifdef KDTRACE_HOOKS #include bool __read_frequently dtrace_malloc_enabled; dtrace_malloc_probe_func_t __read_mostly dtrace_malloc_probe; #endif #if defined(INVARIANTS) || defined(MALLOC_MAKE_FAILURES) || \ defined(DEBUG_MEMGUARD) || defined(DEBUG_REDZONE) #define MALLOC_DEBUG 1 #endif /* * When realloc() is called, if the new size is sufficiently smaller than * the old size, realloc() will allocate a new, smaller block to avoid * wasting memory. 'Sufficiently smaller' is defined as: newsize <= * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. */ #ifndef REALLOC_FRACTION #define REALLOC_FRACTION 1 /* new block if <= half the size */ #endif /* * Centrally define some common malloc types. */ MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); static struct malloc_type *kmemstatistics; static int kmemcount; #define KMEM_ZSHIFT 4 #define KMEM_ZBASE 16 #define KMEM_ZMASK (KMEM_ZBASE - 1) #define KMEM_ZMAX 65536 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) static uint8_t kmemsize[KMEM_ZSIZE + 1]; #ifndef MALLOC_DEBUG_MAXZONES #define MALLOC_DEBUG_MAXZONES 1 #endif static int numzones = MALLOC_DEBUG_MAXZONES; /* * Small malloc(9) memory allocations are allocated from a set of UMA buckets * of various sizes. * + * Warning: the layout of the struct is duplicated in libmemstat for KVM support. + * * XXX: The comment here used to read "These won't be powers of two for * long." It's possible that a significant amount of wasted memory could be * recovered by tuning the sizes of these buckets. */ struct { int kz_size; const char *kz_name; uma_zone_t kz_zone[MALLOC_DEBUG_MAXZONES]; } kmemzones[] = { {16, "16", }, {32, "32", }, {64, "64", }, {128, "128", }, {256, "256", }, {512, "512", }, {1024, "1024", }, {2048, "2048", }, {4096, "4096", }, {8192, "8192", }, {16384, "16384", }, {32768, "32768", }, {65536, "65536", }, {0, NULL}, }; /* * Zone to allocate malloc type descriptions from. For ABI reasons, memory * types are described by a data structure passed by the declaring code, but * the malloc(9) implementation has its own data structure describing the * type and statistics. This permits the malloc(9)-internal data structures * to be modified without breaking binary-compiled kernel modules that * declare malloc types. */ static uma_zone_t mt_zone; static uma_zone_t mt_stats_zone; u_long vm_kmem_size; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0, "Size of kernel memory"); static u_long kmem_zmax = KMEM_ZMAX; SYSCTL_ULONG(_vm, OID_AUTO, kmem_zmax, CTLFLAG_RDTUN, &kmem_zmax, 0, "Maximum allocation size that malloc(9) would use UMA as backend"); static u_long vm_kmem_size_min; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0, "Minimum size of kernel memory"); static u_long vm_kmem_size_max; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, &vm_kmem_size_max, 0, "Maximum size of kernel memory"); static u_int vm_kmem_size_scale; SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, 0, "Scale factor for kernel memory size"); static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, sysctl_kmem_map_size, "LU", "Current kmem allocation size"); static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, sysctl_kmem_map_free, "LU", "Free space in kmem"); +static SYSCTL_NODE(_vm, OID_AUTO, malloc, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "Malloc information"); + +static u_int vm_malloc_zone_count = nitems(kmemzones); +SYSCTL_UINT(_vm_malloc, OID_AUTO, zone_count, + CTLFLAG_RD, &vm_malloc_zone_count, 0, + "Number of malloc zones"); + +static int sysctl_vm_malloc_zone_sizes(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_vm_malloc, OID_AUTO, zone_sizes, + CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, NULL, 0, + sysctl_vm_malloc_zone_sizes, "S", "Zone sizes used by malloc"); + /* * The malloc_mtx protects the kmemstatistics linked list. */ struct mtx malloc_mtx; #ifdef MALLOC_PROFILE uint64_t krequests[KMEM_ZSIZE + 1]; static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS); #endif static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS); /* * time_uptime of the last malloc(9) failure (induced or real). */ static time_t t_malloc_fail; #if defined(MALLOC_MAKE_FAILURES) || (MALLOC_DEBUG_MAXZONES > 1) static SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Kernel malloc debugging options"); #endif /* * malloc(9) fault injection -- cause malloc failures every (n) mallocs when * the caller specifies M_NOWAIT. If set to 0, no failures are caused. */ #ifdef MALLOC_MAKE_FAILURES static int malloc_failure_rate; static int malloc_nowait_count; static int malloc_failure_count; SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RWTUN, &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail"); SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD, &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures"); #endif static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS) { u_long size; size = uma_size(); return (sysctl_handle_long(oidp, &size, 0, req)); } static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { u_long size, limit; /* The sysctl is unsigned, implement as a saturation value. */ size = uma_size(); limit = uma_limit(); if (size > limit) size = 0; else size = limit - size; return (sysctl_handle_long(oidp, &size, 0, req)); +} + +static int +sysctl_vm_malloc_zone_sizes(SYSCTL_HANDLER_ARGS) +{ + int sizes[nitems(kmemzones)]; + int i; + + for (i = 0; i < nitems(kmemzones); i++) { + sizes[i] = kmemzones[i].kz_size; + } + + return (SYSCTL_OUT(req, &sizes, sizeof(sizes))); } /* * malloc(9) uma zone separation -- sub-page buffer overruns in one * malloc type will affect only a subset of other malloc types. */ #if MALLOC_DEBUG_MAXZONES > 1 static void tunable_set_numzones(void) { TUNABLE_INT_FETCH("debug.malloc.numzones", &numzones); /* Sanity check the number of malloc uma zones. */ if (numzones <= 0) numzones = 1; if (numzones > MALLOC_DEBUG_MAXZONES) numzones = MALLOC_DEBUG_MAXZONES; } SYSINIT(numzones, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_set_numzones, NULL); SYSCTL_INT(_debug_malloc, OID_AUTO, numzones, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &numzones, 0, "Number of malloc uma subzones"); /* * Any number that changes regularly is an okay choice for the * offset. Build numbers are pretty good of you have them. */ static u_int zone_offset = __FreeBSD_version; TUNABLE_INT("debug.malloc.zone_offset", &zone_offset); SYSCTL_UINT(_debug_malloc, OID_AUTO, zone_offset, CTLFLAG_RDTUN, &zone_offset, 0, "Separate malloc types by examining the " "Nth character in the malloc type short description."); static void mtp_set_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; const char *desc; size_t len; u_int val; mtip = mtp->ks_handle; desc = mtp->ks_shortdesc; if (desc == NULL || (len = strlen(desc)) == 0) val = 0; else val = desc[zone_offset % len]; mtip->mti_zone = (val % numzones); } static inline u_int mtp_get_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; mtip = mtp->ks_handle; KASSERT(mtip->mti_zone < numzones, ("mti_zone %u out of range %d", mtip->mti_zone, numzones)); return (mtip->mti_zone); } #elif MALLOC_DEBUG_MAXZONES == 0 #error "MALLOC_DEBUG_MAXZONES must be positive." #else static void mtp_set_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; mtip = mtp->ks_handle; mtip->mti_zone = 0; } static inline u_int mtp_get_subzone(struct malloc_type *mtp) { return (0); } #endif /* MALLOC_DEBUG_MAXZONES > 1 */ int malloc_last_fail(void) { return (time_uptime - t_malloc_fail); } /* * An allocation has succeeded -- update malloc type statistics for the * amount of bucket size. Occurs within a critical section so that the * thread isn't preempted and doesn't migrate while updating per-PCU * statistics. */ static void malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size, int zindx) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; critical_enter(); mtip = mtp->ks_handle; mtsp = zpcpu_get(mtip->mti_stats); if (size > 0) { mtsp->mts_memalloced += size; mtsp->mts_numallocs++; } if (zindx != -1) mtsp->mts_size |= 1 << zindx; #ifdef KDTRACE_HOOKS if (__predict_false(dtrace_malloc_enabled)) { uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_MALLOC]; if (probe_id != 0) (dtrace_malloc_probe)(probe_id, (uintptr_t) mtp, (uintptr_t) mtip, (uintptr_t) mtsp, size, zindx); } #endif critical_exit(); } void malloc_type_allocated(struct malloc_type *mtp, unsigned long size) { if (size > 0) malloc_type_zone_allocated(mtp, size, -1); } /* * A free operation has occurred -- update malloc type statistics for the * amount of the bucket size. Occurs within a critical section so that the * thread isn't preempted and doesn't migrate while updating per-CPU * statistics. */ void malloc_type_freed(struct malloc_type *mtp, unsigned long size) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; critical_enter(); mtip = mtp->ks_handle; mtsp = zpcpu_get(mtip->mti_stats); mtsp->mts_memfreed += size; mtsp->mts_numfrees++; #ifdef KDTRACE_HOOKS if (__predict_false(dtrace_malloc_enabled)) { uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_FREE]; if (probe_id != 0) (dtrace_malloc_probe)(probe_id, (uintptr_t) mtp, (uintptr_t) mtip, (uintptr_t) mtsp, size, 0); } #endif critical_exit(); } /* * contigmalloc: * * Allocate a block of physically contiguous memory. * * If M_NOWAIT is set, this routine will not block and return NULL if * the allocation fails. */ void * contigmalloc(unsigned long size, struct malloc_type *type, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) { void *ret; ret = (void *)kmem_alloc_contig(size, flags, low, high, alignment, boundary, VM_MEMATTR_DEFAULT); if (ret != NULL) malloc_type_allocated(type, round_page(size)); return (ret); } void * contigmalloc_domainset(unsigned long size, struct malloc_type *type, struct domainset *ds, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) { void *ret; ret = (void *)kmem_alloc_contig_domainset(ds, size, flags, low, high, alignment, boundary, VM_MEMATTR_DEFAULT); if (ret != NULL) malloc_type_allocated(type, round_page(size)); return (ret); } /* * contigfree: * * Free a block of memory allocated by contigmalloc. * * This routine may not block. */ void contigfree(void *addr, unsigned long size, struct malloc_type *type) { kmem_free((vm_offset_t)addr, size); malloc_type_freed(type, round_page(size)); } #ifdef MALLOC_DEBUG static int malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type *mtp, int flags) { #ifdef INVARIANTS int indx; KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic")); /* * Check that exactly one of M_WAITOK or M_NOWAIT is specified. */ indx = flags & (M_WAITOK | M_NOWAIT); if (indx != M_NOWAIT && indx != M_WAITOK) { static struct timeval lasterr; static int curerr, once; if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) { printf("Bad malloc flags: %x\n", indx); kdb_backtrace(); flags |= M_WAITOK; once++; } } #endif #ifdef MALLOC_MAKE_FAILURES if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) { atomic_add_int(&malloc_nowait_count, 1); if ((malloc_nowait_count % malloc_failure_rate) == 0) { atomic_add_int(&malloc_failure_count, 1); t_malloc_fail = time_uptime; *vap = NULL; return (EJUSTRETURN); } } #endif if (flags & M_WAITOK) { KASSERT(curthread->td_intr_nesting_level == 0, ("malloc(M_WAITOK) in interrupt context")); if (__predict_false(!THREAD_CAN_SLEEP())) { #ifdef EPOCH_TRACE epoch_trace_list(curthread); #endif KASSERT(1, ("malloc(M_WAITOK) with sleeping prohibited")); } } KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("malloc: called with spinlock or critical section held")); #ifdef DEBUG_MEMGUARD if (memguard_cmp_mtp(mtp, *sizep)) { *vap = memguard_alloc(*sizep, flags); if (*vap != NULL) return (EJUSTRETURN); /* This is unfortunate but should not be fatal. */ } #endif #ifdef DEBUG_REDZONE *sizep = redzone_size_ntor(*sizep); #endif return (0); } #endif /* * Handle large allocations and frees by using kmem_malloc directly. */ static inline bool malloc_large_slab(uma_slab_t slab) { uintptr_t va; va = (uintptr_t)slab; return ((va & 1) != 0); } static inline size_t malloc_large_size(uma_slab_t slab) { uintptr_t va; va = (uintptr_t)slab; return (va >> 1); } static caddr_t malloc_large(size_t *size, struct domainset *policy, int flags) { vm_offset_t va; size_t sz; sz = roundup(*size, PAGE_SIZE); va = kmem_malloc_domainset(policy, sz, flags); if (va != 0) { /* The low bit is unused for slab pointers. */ vsetzoneslab(va, NULL, (void *)((sz << 1) | 1)); uma_total_inc(sz); *size = sz; } return ((caddr_t)va); } static void free_large(void *addr, size_t size) { kmem_free((vm_offset_t)addr, size); uma_total_dec(size); } /* * malloc: * * Allocate a block of memory. * * If M_NOWAIT is set, this routine will not block and return NULL if * the allocation fails. */ void * (malloc)(size_t size, struct malloc_type *mtp, int flags) { int indx; caddr_t va; uma_zone_t zone; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif MPASS((flags & M_EXEC) == 0); #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif if (size <= kmem_zmax) { if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)]; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif va = uma_zalloc(zone, flags); if (va != NULL) size = zone->uz_size; malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else { va = malloc_large(&size, DOMAINSET_RR(), flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); } if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); t_malloc_fail = time_uptime; } #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return ((void *) va); } static void * malloc_domain(size_t *sizep, int *indxp, struct malloc_type *mtp, int domain, int flags) { uma_zone_t zone; caddr_t va; size_t size; int indx; size = *sizep; KASSERT(size <= kmem_zmax && (flags & M_EXEC) == 0, ("malloc_domain: Called with bad flag / size combination.")); if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)]; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif va = uma_zalloc_domain(zone, NULL, domain, flags); if (va != NULL) *sizep = zone->uz_size; *indxp = indx; return ((void *)va); } void * malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds, int flags) { struct vm_domainset_iter di; caddr_t va; int domain; int indx; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif MPASS((flags & M_EXEC) == 0); #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif if (size <= kmem_zmax) { vm_domainset_iter_policy_init(&di, ds, &domain, &flags); do { va = malloc_domain(&size, &indx, mtp, domain, flags); } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0); malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else { /* Policy is handled by kmem. */ va = malloc_large(&size, ds, flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); } if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); t_malloc_fail = time_uptime; } #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return (va); } /* * Allocate an executable area. */ void * malloc_exec(size_t size, struct malloc_type *mtp, int flags) { caddr_t va; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif flags |= M_EXEC; #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif va = malloc_large(&size, DOMAINSET_RR(), flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); t_malloc_fail = time_uptime; } #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return ((void *) va); } void * malloc_domainset_exec(size_t size, struct malloc_type *mtp, struct domainset *ds, int flags) { caddr_t va; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif flags |= M_EXEC; #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif /* Policy is handled by kmem. */ va = malloc_large(&size, ds, flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); t_malloc_fail = time_uptime; } #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return (va); } void * mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) { if (WOULD_OVERFLOW(nmemb, size)) panic("mallocarray: %zu * %zu overflowed", nmemb, size); return (malloc(size * nmemb, type, flags)); } #ifdef INVARIANTS static void free_save_type(void *addr, struct malloc_type *mtp, u_long size) { struct malloc_type **mtpp = addr; /* * Cache a pointer to the malloc_type that most recently freed * this memory here. This way we know who is most likely to * have stepped on it later. * * This code assumes that size is a multiple of 8 bytes for * 64 bit machines */ mtpp = (struct malloc_type **) ((unsigned long)mtpp & ~UMA_ALIGN_PTR); mtpp += (size - sizeof(struct malloc_type *)) / sizeof(struct malloc_type *); *mtpp = mtp; } #endif #ifdef MALLOC_DEBUG static int free_dbg(void **addrp, struct malloc_type *mtp) { void *addr; addr = *addrp; KASSERT(mtp->ks_magic == M_MAGIC, ("free: bad malloc type magic")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("free: called with spinlock or critical section held")); /* free(NULL, ...) does nothing */ if (addr == NULL) return (EJUSTRETURN); #ifdef DEBUG_MEMGUARD if (is_memguard_addr(addr)) { memguard_free(addr); return (EJUSTRETURN); } #endif #ifdef DEBUG_REDZONE redzone_check(addr); *addrp = redzone_addr_ntor(addr); #endif return (0); } #endif /* * free: * * Free a block of memory allocated by malloc. * * This routine may not block. */ void free(void *addr, struct malloc_type *mtp) { uma_zone_t zone; uma_slab_t slab; u_long size; #ifdef MALLOC_DEBUG if (free_dbg(&addr, mtp) != 0) return; #endif /* free(NULL, ...) does nothing */ if (addr == NULL) return; vtozoneslab((vm_offset_t)addr & (~UMA_SLAB_MASK), &zone, &slab); if (slab == NULL) panic("free: address %p(%p) has not been allocated.\n", addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); if (__predict_true(!malloc_large_slab(slab))) { size = zone->uz_size; #ifdef INVARIANTS free_save_type(addr, mtp, size); #endif uma_zfree_arg(zone, addr, slab); } else { size = malloc_large_size(slab); free_large(addr, size); } malloc_type_freed(mtp, size); } /* * zfree: * * Zero then free a block of memory allocated by malloc. * * This routine may not block. */ void zfree(void *addr, struct malloc_type *mtp) { uma_zone_t zone; uma_slab_t slab; u_long size; #ifdef MALLOC_DEBUG if (free_dbg(&addr, mtp) != 0) return; #endif /* free(NULL, ...) does nothing */ if (addr == NULL) return; vtozoneslab((vm_offset_t)addr & (~UMA_SLAB_MASK), &zone, &slab); if (slab == NULL) panic("free: address %p(%p) has not been allocated.\n", addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); if (__predict_true(!malloc_large_slab(slab))) { size = zone->uz_size; #ifdef INVARIANTS free_save_type(addr, mtp, size); #endif explicit_bzero(addr, size); uma_zfree_arg(zone, addr, slab); } else { size = malloc_large_size(slab); explicit_bzero(addr, size); free_large(addr, size); } malloc_type_freed(mtp, size); } /* * realloc: change the size of a memory block */ void * realloc(void *addr, size_t size, struct malloc_type *mtp, int flags) { uma_zone_t zone; uma_slab_t slab; unsigned long alloc; void *newaddr; KASSERT(mtp->ks_magic == M_MAGIC, ("realloc: bad malloc type magic")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("realloc: called with spinlock or critical section held")); /* realloc(NULL, ...) is equivalent to malloc(...) */ if (addr == NULL) return (malloc(size, mtp, flags)); /* * XXX: Should report free of old memory and alloc of new memory to * per-CPU stats. */ #ifdef DEBUG_MEMGUARD if (is_memguard_addr(addr)) return (memguard_realloc(addr, size, mtp, flags)); #endif #ifdef DEBUG_REDZONE slab = NULL; zone = NULL; alloc = redzone_get_size(addr); #else vtozoneslab((vm_offset_t)addr & (~UMA_SLAB_MASK), &zone, &slab); /* Sanity check */ KASSERT(slab != NULL, ("realloc: address %p out of range", (void *)addr)); /* Get the size of the original block */ if (!malloc_large_slab(slab)) alloc = zone->uz_size; else alloc = malloc_large_size(slab); /* Reuse the original block if appropriate */ if (size <= alloc && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) return (addr); #endif /* !DEBUG_REDZONE */ /* Allocate a new, bigger (or smaller) block */ if ((newaddr = malloc(size, mtp, flags)) == NULL) return (NULL); /* Copy over original contents */ bcopy(addr, newaddr, min(size, alloc)); free(addr, mtp); return (newaddr); } /* * reallocf: same as realloc() but free memory on failure. */ void * reallocf(void *addr, size_t size, struct malloc_type *mtp, int flags) { void *mem; if ((mem = realloc(addr, size, mtp, flags)) == NULL) free(addr, mtp); return (mem); } /* * malloc_usable_size: returns the usable size of the allocation. */ size_t malloc_usable_size(const void *addr) { #ifndef DEBUG_REDZONE uma_zone_t zone; uma_slab_t slab; #endif u_long size; if (addr == NULL) return (0); #ifdef DEBUG_MEMGUARD if (is_memguard_addr(__DECONST(void *, addr))) return (memguard_get_req_size(addr)); #endif #ifdef DEBUG_REDZONE size = redzone_get_size(__DECONST(void *, addr)); #else vtozoneslab((vm_offset_t)addr & (~UMA_SLAB_MASK), &zone, &slab); if (slab == NULL) panic("malloc_usable_size: address %p(%p) is not allocated.\n", addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); if (!malloc_large_slab(slab)) size = zone->uz_size; else size = malloc_large_size(slab); #endif return (size); } CTASSERT(VM_KMEM_SIZE_SCALE >= 1); /* * Initialize the kernel memory (kmem) arena. */ void kmeminit(void) { u_long mem_size; u_long tmp; #ifdef VM_KMEM_SIZE if (vm_kmem_size == 0) vm_kmem_size = VM_KMEM_SIZE; #endif #ifdef VM_KMEM_SIZE_MIN if (vm_kmem_size_min == 0) vm_kmem_size_min = VM_KMEM_SIZE_MIN; #endif #ifdef VM_KMEM_SIZE_MAX if (vm_kmem_size_max == 0) vm_kmem_size_max = VM_KMEM_SIZE_MAX; #endif /* * Calculate the amount of kernel virtual address (KVA) space that is * preallocated to the kmem arena. In order to support a wide range * of machines, it is a function of the physical memory size, * specifically, * * min(max(physical memory size / VM_KMEM_SIZE_SCALE, * VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX) * * Every architecture must define an integral value for * VM_KMEM_SIZE_SCALE. However, the definitions of VM_KMEM_SIZE_MIN * and VM_KMEM_SIZE_MAX, which represent respectively the floor and * ceiling on this preallocation, are optional. Typically, * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on * a given architecture. */ mem_size = vm_cnt.v_page_count; if (mem_size <= 32768) /* delphij XXX 128MB */ kmem_zmax = PAGE_SIZE; if (vm_kmem_size_scale < 1) vm_kmem_size_scale = VM_KMEM_SIZE_SCALE; /* * Check if we should use defaults for the "vm_kmem_size" * variable: */ if (vm_kmem_size == 0) { vm_kmem_size = mem_size / vm_kmem_size_scale; vm_kmem_size = vm_kmem_size * PAGE_SIZE < vm_kmem_size ? vm_kmem_size_max : vm_kmem_size * PAGE_SIZE; if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min) vm_kmem_size = vm_kmem_size_min; if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max) vm_kmem_size = vm_kmem_size_max; } if (vm_kmem_size == 0) panic("Tune VM_KMEM_SIZE_* for the platform"); /* * The amount of KVA space that is preallocated to the * kmem arena can be set statically at compile-time or manually * through the kernel environment. However, it is still limited to * twice the physical memory size, which has been sufficient to handle * the most severe cases of external fragmentation in the kmem arena. */ if (vm_kmem_size / 2 / PAGE_SIZE > mem_size) vm_kmem_size = 2 * mem_size * PAGE_SIZE; vm_kmem_size = round_page(vm_kmem_size); #ifdef DEBUG_MEMGUARD tmp = memguard_fudge(vm_kmem_size, kernel_map); #else tmp = vm_kmem_size; #endif uma_set_limit(tmp); #ifdef DEBUG_MEMGUARD /* * Initialize MemGuard if support compiled in. MemGuard is a * replacement allocator used for detecting tamper-after-free * scenarios as they occur. It is only used for debugging. */ memguard_init(kernel_arena); #endif } /* * Initialize the kernel memory allocator */ /* ARGSUSED*/ static void mallocinit(void *dummy) { int i; uint8_t indx; mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); kmeminit(); if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX) kmem_zmax = KMEM_ZMAX; mt_stats_zone = uma_zcreate("mt_stats_zone", sizeof(struct malloc_type_stats), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal), #ifdef INVARIANTS mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, #else NULL, NULL, NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_MALLOC); for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { int size = kmemzones[indx].kz_size; const char *name = kmemzones[indx].kz_name; int subzone; for (subzone = 0; subzone < numzones; subzone++) { kmemzones[indx].kz_zone[subzone] = uma_zcreate(name, size, #ifdef INVARIANTS mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, #else NULL, NULL, NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_MALLOC); } for (;i <= size; i+= KMEM_ZBASE) kmemsize[i >> KMEM_ZSHIFT] = indx; } } SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_SECOND, mallocinit, NULL); void malloc_init(void *data) { struct malloc_type_internal *mtip; struct malloc_type *mtp; KASSERT(vm_cnt.v_page_count != 0, ("malloc_register before vm_init")); mtp = data; if (mtp->ks_magic != M_MAGIC) panic("malloc_init: bad malloc type magic"); mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO); mtip->mti_stats = uma_zalloc_pcpu(mt_stats_zone, M_WAITOK | M_ZERO); mtp->ks_handle = mtip; mtp_set_subzone(mtp); mtx_lock(&malloc_mtx); mtp->ks_next = kmemstatistics; kmemstatistics = mtp; kmemcount++; mtx_unlock(&malloc_mtx); } void malloc_uninit(void *data) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; struct malloc_type *mtp, *temp; uma_slab_t slab; long temp_allocs, temp_bytes; int i; mtp = data; KASSERT(mtp->ks_magic == M_MAGIC, ("malloc_uninit: bad malloc type magic")); KASSERT(mtp->ks_handle != NULL, ("malloc_deregister: cookie NULL")); mtx_lock(&malloc_mtx); mtip = mtp->ks_handle; mtp->ks_handle = NULL; if (mtp != kmemstatistics) { for (temp = kmemstatistics; temp != NULL; temp = temp->ks_next) { if (temp->ks_next == mtp) { temp->ks_next = mtp->ks_next; break; } } KASSERT(temp, ("malloc_uninit: type '%s' not found", mtp->ks_shortdesc)); } else kmemstatistics = mtp->ks_next; kmemcount--; mtx_unlock(&malloc_mtx); /* * Look for memory leaks. */ temp_allocs = temp_bytes = 0; for (i = 0; i <= mp_maxid; i++) { mtsp = zpcpu_get_cpu(mtip->mti_stats, i); temp_allocs += mtsp->mts_numallocs; temp_allocs -= mtsp->mts_numfrees; temp_bytes += mtsp->mts_memalloced; temp_bytes -= mtsp->mts_memfreed; } if (temp_allocs > 0 || temp_bytes > 0) { printf("Warning: memory type %s leaked memory on destroy " "(%ld allocations, %ld bytes leaked).\n", mtp->ks_shortdesc, temp_allocs, temp_bytes); } slab = vtoslab((vm_offset_t) mtip & (~UMA_SLAB_MASK)); uma_zfree_pcpu(mt_stats_zone, mtip->mti_stats); uma_zfree_arg(mt_zone, mtip, slab); } struct malloc_type * malloc_desc2type(const char *desc) { struct malloc_type *mtp; mtx_assert(&malloc_mtx, MA_OWNED); for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { if (strcmp(mtp->ks_shortdesc, desc) == 0) return (mtp); } return (NULL); } static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS) { struct malloc_type_stream_header mtsh; struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp, zeromts; struct malloc_type_header mth; struct malloc_type *mtp; int error, i; struct sbuf sbuf; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL); mtx_lock(&malloc_mtx); bzero(&zeromts, sizeof(zeromts)); /* * Insert stream header. */ bzero(&mtsh, sizeof(mtsh)); mtsh.mtsh_version = MALLOC_TYPE_STREAM_VERSION; mtsh.mtsh_maxcpus = MAXCPU; mtsh.mtsh_count = kmemcount; (void)sbuf_bcat(&sbuf, &mtsh, sizeof(mtsh)); /* * Insert alternating sequence of type headers and type statistics. */ for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { mtip = (struct malloc_type_internal *)mtp->ks_handle; /* * Insert type header. */ bzero(&mth, sizeof(mth)); strlcpy(mth.mth_name, mtp->ks_shortdesc, MALLOC_MAX_NAME); (void)sbuf_bcat(&sbuf, &mth, sizeof(mth)); /* * Insert type statistics for each CPU. */ for (i = 0; i <= mp_maxid; i++) { mtsp = zpcpu_get_cpu(mtip->mti_stats, i); (void)sbuf_bcat(&sbuf, mtsp, sizeof(*mtsp)); } /* * Fill in the missing CPUs. */ for (; i < MAXCPU; i++) { (void)sbuf_bcat(&sbuf, &zeromts, sizeof(zeromts)); } } mtx_unlock(&malloc_mtx); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_PROC(_kern, OID_AUTO, malloc_stats, CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_malloc_stats, "s,malloc_type_ustats", "Return malloc types"); SYSCTL_INT(_kern, OID_AUTO, malloc_count, CTLFLAG_RD, &kmemcount, 0, "Count of kernel malloc types"); void malloc_type_list(malloc_type_list_func_t *func, void *arg) { struct malloc_type *mtp, **bufmtp; int count, i; size_t buflen; mtx_lock(&malloc_mtx); restart: mtx_assert(&malloc_mtx, MA_OWNED); count = kmemcount; mtx_unlock(&malloc_mtx); buflen = sizeof(struct malloc_type *) * count; bufmtp = malloc(buflen, M_TEMP, M_WAITOK); mtx_lock(&malloc_mtx); if (count < kmemcount) { free(bufmtp, M_TEMP); goto restart; } for (mtp = kmemstatistics, i = 0; mtp != NULL; mtp = mtp->ks_next, i++) bufmtp[i] = mtp; mtx_unlock(&malloc_mtx); for (i = 0; i < count; i++) (func)(bufmtp[i], arg); free(bufmtp, M_TEMP); } #ifdef DDB static int64_t get_malloc_stats(const struct malloc_type_internal *mtip, uint64_t *allocs, uint64_t *inuse) { const struct malloc_type_stats *mtsp; uint64_t frees, alloced, freed; int i; *allocs = 0; frees = 0; alloced = 0; freed = 0; for (i = 0; i <= mp_maxid; i++) { mtsp = zpcpu_get_cpu(mtip->mti_stats, i); *allocs += mtsp->mts_numallocs; frees += mtsp->mts_numfrees; alloced += mtsp->mts_memalloced; freed += mtsp->mts_memfreed; } *inuse = *allocs - frees; return (alloced - freed); } DB_SHOW_COMMAND(malloc, db_show_malloc) { const char *fmt_hdr, *fmt_entry; struct malloc_type *mtp; uint64_t allocs, inuse; int64_t size; /* variables for sorting */ struct malloc_type *last_mtype, *cur_mtype; int64_t cur_size, last_size; int ties; if (modif[0] == 'i') { fmt_hdr = "%s,%s,%s,%s\n"; fmt_entry = "\"%s\",%ju,%jdK,%ju\n"; } else { fmt_hdr = "%18s %12s %12s %12s\n"; fmt_entry = "%18s %12ju %12jdK %12ju\n"; } db_printf(fmt_hdr, "Type", "InUse", "MemUse", "Requests"); /* Select sort, largest size first. */ last_mtype = NULL; last_size = INT64_MAX; for (;;) { cur_mtype = NULL; cur_size = -1; ties = 0; for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { /* * In the case of size ties, print out mtypes * in the order they are encountered. That is, * when we encounter the most recently output * mtype, we have already printed all preceding * ties, and we must print all following ties. */ if (mtp == last_mtype) { ties = 1; continue; } size = get_malloc_stats(mtp->ks_handle, &allocs, &inuse); if (size > cur_size && size < last_size + ties) { cur_size = size; cur_mtype = mtp; } } if (cur_mtype == NULL) break; size = get_malloc_stats(cur_mtype->ks_handle, &allocs, &inuse); db_printf(fmt_entry, cur_mtype->ks_shortdesc, inuse, howmany(size, 1024), allocs); if (db_pager_quit) break; last_mtype = cur_mtype; last_size = cur_size; } } #if MALLOC_DEBUG_MAXZONES > 1 DB_SHOW_COMMAND(multizone_matches, db_show_multizone_matches) { struct malloc_type_internal *mtip; struct malloc_type *mtp; u_int subzone; if (!have_addr) { db_printf("Usage: show multizone_matches \n"); return; } mtp = (void *)addr; if (mtp->ks_magic != M_MAGIC) { db_printf("Magic %lx does not match expected %x\n", mtp->ks_magic, M_MAGIC); return; } mtip = mtp->ks_handle; subzone = mtip->mti_zone; for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { mtip = mtp->ks_handle; if (mtip->mti_zone != subzone) continue; db_printf("%s\n", mtp->ks_shortdesc); if (db_pager_quit) break; } } #endif /* MALLOC_DEBUG_MAXZONES > 1 */ #endif /* DDB */ #ifdef MALLOC_PROFILE static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; uint64_t count; uint64_t waste; uint64_t mem; int error; int rsize; int size; int i; waste = 0; mem = 0; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sbuf_printf(&sbuf, "\n Size Requests Real Size\n"); for (i = 0; i < KMEM_ZSIZE; i++) { size = i << KMEM_ZSHIFT; rsize = kmemzones[kmemsize[i]].kz_size; count = (long long unsigned)krequests[i]; sbuf_printf(&sbuf, "%6d%28llu%11d\n", size, (unsigned long long)count, rsize); if ((rsize * count) > (size * count)) waste += (rsize * count) - (size * count); mem += (rsize * count); } sbuf_printf(&sbuf, "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", (unsigned long long)mem, (unsigned long long)waste); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling"); #endif /* MALLOC_PROFILE */ Index: head/usr.bin/vmstat/vmstat.c =================================================================== --- head/usr.bin/vmstat/vmstat.c (revision 367273) +++ head/usr.bin/vmstat/vmstat.c (revision 367274) @@ -1,1706 +1,1708 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1991, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)vmstat.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define _WANT_VMMETER #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define VMSTAT_XO_VERSION "1" static char da[] = "da"; enum x_stats { X_SUM, X_HZ, X_STATHZ, X_NCHSTATS, X_INTRNAMES, X_SINTRNAMES, X_INTRCNT, X_SINTRCNT, X_NINTRCNT }; static struct nlist namelist[] = { [X_SUM] = { .n_name = "_vm_cnt", }, [X_HZ] = { .n_name = "_hz", }, [X_STATHZ] = { .n_name = "_stathz", }, [X_NCHSTATS] = { .n_name = "_nchstats", }, [X_INTRNAMES] = { .n_name = "_intrnames", }, [X_SINTRNAMES] = { .n_name = "_sintrnames", }, [X_INTRCNT] = { .n_name = "_intrcnt", }, [X_SINTRCNT] = { .n_name = "_sintrcnt", }, [X_NINTRCNT] = { .n_name = "_nintrcnt", }, { .n_name = NULL, }, }; static struct devstat_match *matches; static struct device_selection *dev_select; static struct statinfo cur, last; static devstat_select_mode select_mode; static size_t size_cp_times; static long *cur_cp_times, *last_cp_times; static long generation, select_generation; static int hz, hdrcnt, maxshowdevs; static int num_devices, num_devices_specified; static int num_matches, num_selected, num_selections; static char **specified_devices; static struct __vmmeter { uint64_t v_swtch; uint64_t v_trap; uint64_t v_syscall; uint64_t v_intr; uint64_t v_soft; uint64_t v_vm_faults; uint64_t v_io_faults; uint64_t v_cow_faults; uint64_t v_cow_optim; uint64_t v_zfod; uint64_t v_ozfod; uint64_t v_swapin; uint64_t v_swapout; uint64_t v_swappgsin; uint64_t v_swappgsout; uint64_t v_vnodein; uint64_t v_vnodeout; uint64_t v_vnodepgsin; uint64_t v_vnodepgsout; uint64_t v_intrans; uint64_t v_reactivated; uint64_t v_pdwakeups; uint64_t v_pdpages; uint64_t v_pdshortfalls; uint64_t v_dfree; uint64_t v_pfree; uint64_t v_tfree; uint64_t v_forks; uint64_t v_vforks; uint64_t v_rforks; uint64_t v_kthreads; uint64_t v_forkpages; uint64_t v_vforkpages; uint64_t v_rforkpages; uint64_t v_kthreadpages; u_int v_page_size; u_int v_page_count; u_int v_free_reserved; u_int v_free_target; u_int v_free_min; u_int v_free_count; u_int v_wire_count; u_long v_user_wire_count; u_int v_active_count; u_int v_inactive_target; u_int v_inactive_count; u_int v_laundry_count; u_int v_pageout_free_min; u_int v_interrupt_free_min; u_int v_free_severe; } sum, osum; #define VMSTAT_DEFAULT_LINES 20 /* Default number of `winlines'. */ static volatile sig_atomic_t wresized; /* Tty resized when non-zero. */ static int winlines = VMSTAT_DEFAULT_LINES; /* Current number of tty rows. */ static int aflag; static int nflag; static int Pflag; static int hflag; static kvm_t *kd; #define FORKSTAT 0x01 #define INTRSTAT 0x02 #define MEMSTAT 0x04 #define SUMSTAT 0x08 #define TIMESTAT 0x10 #define VMSTAT 0x20 #define ZMEMSTAT 0x40 #define OBJSTAT 0x80 static void cpustats(void); static void pcpustats(u_long, int); static void devstats(void); static void doforkst(void); static void dointr(unsigned int, int); static void doobjstat(void); static void dosum(void); static void dovmstat(unsigned int, int); static void domemstat_malloc(void); static void domemstat_zone(void); static void kread(int, void *, size_t); static void kreado(int, void *, size_t, size_t); static void kreadptr(uintptr_t, void *, size_t); static void needhdr(int); static void needresize(int); static void doresize(void); static void printhdr(int, u_long); static void usage(void); static long pct(long, long); static long long getuptime(void); static char **getdrivedata(char **); int main(int argc, char *argv[]) { char *bp, *buf, *memf, *nlistf; float f; int bufsize, c, reps, todo; size_t len; unsigned int interval; char errbuf[_POSIX2_LINE_MAX]; memf = nlistf = NULL; interval = reps = todo = 0; maxshowdevs = 2; hflag = isatty(1); argc = xo_parse_args(argc, argv); if (argc < 0) return (argc); while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:oPp:sw:z")) != -1) { switch (c) { case 'a': aflag++; break; case 'c': reps = atoi(optarg); break; case 'P': Pflag++; break; case 'f': todo |= FORKSTAT; break; case 'h': hflag = 1; break; case 'H': hflag = 0; break; case 'i': todo |= INTRSTAT; break; case 'M': memf = optarg; break; case 'm': todo |= MEMSTAT; break; case 'N': nlistf = optarg; break; case 'n': nflag = 1; maxshowdevs = atoi(optarg); if (maxshowdevs < 0) xo_errx(1, "number of devices %d is < 0", maxshowdevs); break; case 'o': todo |= OBJSTAT; break; case 'p': if (devstat_buildmatch(optarg, &matches, &num_matches) != 0) xo_errx(1, "%s", devstat_errbuf); break; case 's': todo |= SUMSTAT; break; case 'w': /* Convert to milliseconds. */ f = atof(optarg); interval = f * 1000; break; case 'z': todo |= ZMEMSTAT; break; case '?': default: usage(); } } argc -= optind; argv += optind; xo_set_version(VMSTAT_XO_VERSION); if (todo == 0) todo = VMSTAT; if (memf != NULL) { kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf); if (kd == NULL) xo_errx(1, "kvm_openfiles: %s", errbuf); } retry_nlist: if (kd != NULL && (c = kvm_nlist(kd, namelist)) != 0) { if (c > 0) { bufsize = 0; len = 0; /* * 'cnt' was renamed to 'vm_cnt'. If 'vm_cnt' is not * found try looking up older 'cnt' symbol. * */ if (namelist[X_SUM].n_type == 0 && strcmp(namelist[X_SUM].n_name, "_vm_cnt") == 0) { namelist[X_SUM].n_name = "_cnt"; goto retry_nlist; } /* * 'nintrcnt' doesn't exist in older kernels, but * that isn't fatal. */ if (namelist[X_NINTRCNT].n_type == 0 && c == 1) goto nlist_ok; for (c = 0; c < (int)(nitems(namelist)); c++) if (namelist[c].n_type == 0) bufsize += strlen(namelist[c].n_name) + 1; bufsize += len + 1; buf = bp = alloca(bufsize); for (c = 0; c < (int)(nitems(namelist)); c++) if (namelist[c].n_type == 0) { xo_error(" %s", namelist[c].n_name); len = strlen(namelist[c].n_name); *bp++ = ' '; memcpy(bp, namelist[c].n_name, len); bp += len; } *bp = '\0'; xo_error("undefined symbols:\n", buf); } else xo_warnx("kvm_nlist: %s", kvm_geterr(kd)); xo_finish(); exit(1); } nlist_ok: if (kd && Pflag) xo_errx(1, "Cannot use -P with crash dumps"); if (todo & VMSTAT) { /* * Make sure that the userland devstat version matches the * kernel devstat version. If not, exit and print a * message informing the user of his mistake. */ if (devstat_checkversion(NULL) < 0) xo_errx(1, "%s", devstat_errbuf); argv = getdrivedata(argv); } if (*argv) { f = atof(*argv); interval = f * 1000; if (*++argv) reps = atoi(*argv); } if (interval) { if (!reps) reps = -1; } else if (reps) interval = 1 * 1000; if (todo & FORKSTAT) doforkst(); if (todo & MEMSTAT) domemstat_malloc(); if (todo & ZMEMSTAT) domemstat_zone(); if (todo & SUMSTAT) dosum(); if (todo & OBJSTAT) doobjstat(); if (todo & INTRSTAT) dointr(interval, reps); if (todo & VMSTAT) dovmstat(interval, reps); xo_finish(); exit(0); } static int mysysctl(const char *name, void *oldp, size_t *oldlenp) { int error; error = sysctlbyname(name, oldp, oldlenp, NULL, 0); if (error != 0 && errno != ENOMEM) xo_err(1, "sysctl(%s)", name); return (error); } static char ** getdrivedata(char **argv) { if ((num_devices = devstat_getnumdevs(NULL)) < 0) xo_errx(1, "%s", devstat_errbuf); cur.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); last.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); if (devstat_getdevs(NULL, &cur) == -1) xo_errx(1, "%s", devstat_errbuf); num_devices = cur.dinfo->numdevs; generation = cur.dinfo->generation; specified_devices = malloc(sizeof(char *)); for (num_devices_specified = 0; *argv; ++argv) { if (isdigit(**argv)) break; num_devices_specified++; specified_devices = reallocf(specified_devices, sizeof(char *) * num_devices_specified); if (specified_devices == NULL) { xo_errx(1, "%s", "reallocf (specified_devices)"); } specified_devices[num_devices_specified - 1] = *argv; } dev_select = NULL; if (nflag == 0 && maxshowdevs < num_devices_specified) maxshowdevs = num_devices_specified; /* * People are generally only interested in disk statistics when * they're running vmstat. So, that's what we're going to give * them if they don't specify anything by default. We'll also give * them any other random devices in the system so that we get to * maxshowdevs devices, if that many devices exist. If the user * specifies devices on the command line, either through a pattern * match or by naming them explicitly, we will give the user only * those devices. */ if ((num_devices_specified == 0) && (num_matches == 0)) { if (devstat_buildmatch(da, &matches, &num_matches) != 0) xo_errx(1, "%s", devstat_errbuf); select_mode = DS_SELECT_ADD; } else select_mode = DS_SELECT_ONLY; /* * At this point, selectdevs will almost surely indicate that the * device list has changed, so we don't look for return values of 0 * or 1. If we get back -1, though, there is an error. */ if (devstat_selectdevs(&dev_select, &num_selected, &num_selections, &select_generation, generation, cur.dinfo->devices, num_devices, matches, num_matches, specified_devices, num_devices_specified, select_mode, maxshowdevs, 0) == -1) xo_errx(1, "%s", devstat_errbuf); return(argv); } /* Return system uptime in nanoseconds */ static long long getuptime(void) { struct timespec sp; (void)clock_gettime(CLOCK_UPTIME, &sp); return((long long)sp.tv_sec * 1000000000LL + sp.tv_nsec); } static void fill_vmmeter(struct __vmmeter *vmmp) { struct vmmeter vm_cnt; size_t size; if (kd != NULL) { kread(X_SUM, &vm_cnt, sizeof(vm_cnt)); #define GET_COUNTER(name) \ vmmp->name = kvm_counter_u64_fetch(kd, (u_long)vm_cnt.name) GET_COUNTER(v_swtch); GET_COUNTER(v_trap); GET_COUNTER(v_syscall); GET_COUNTER(v_intr); GET_COUNTER(v_soft); GET_COUNTER(v_vm_faults); GET_COUNTER(v_io_faults); GET_COUNTER(v_cow_faults); GET_COUNTER(v_cow_optim); GET_COUNTER(v_zfod); GET_COUNTER(v_ozfod); GET_COUNTER(v_swapin); GET_COUNTER(v_swapout); GET_COUNTER(v_swappgsin); GET_COUNTER(v_swappgsout); GET_COUNTER(v_vnodein); GET_COUNTER(v_vnodeout); GET_COUNTER(v_vnodepgsin); GET_COUNTER(v_vnodepgsout); GET_COUNTER(v_intrans); GET_COUNTER(v_tfree); GET_COUNTER(v_forks); GET_COUNTER(v_vforks); GET_COUNTER(v_rforks); GET_COUNTER(v_kthreads); GET_COUNTER(v_forkpages); GET_COUNTER(v_vforkpages); GET_COUNTER(v_rforkpages); GET_COUNTER(v_kthreadpages); #undef GET_COUNTER } else { #define GET_VM_STATS(cat, name) do { \ size = sizeof(vmmp->name); \ mysysctl("vm.stats." #cat "." #name, &vmmp->name, &size); \ } while (0) /* sys */ GET_VM_STATS(sys, v_swtch); GET_VM_STATS(sys, v_trap); GET_VM_STATS(sys, v_syscall); GET_VM_STATS(sys, v_intr); GET_VM_STATS(sys, v_soft); /* vm */ GET_VM_STATS(vm, v_vm_faults); GET_VM_STATS(vm, v_io_faults); GET_VM_STATS(vm, v_cow_faults); GET_VM_STATS(vm, v_cow_optim); GET_VM_STATS(vm, v_zfod); GET_VM_STATS(vm, v_ozfod); GET_VM_STATS(vm, v_swapin); GET_VM_STATS(vm, v_swapout); GET_VM_STATS(vm, v_swappgsin); GET_VM_STATS(vm, v_swappgsout); GET_VM_STATS(vm, v_vnodein); GET_VM_STATS(vm, v_vnodeout); GET_VM_STATS(vm, v_vnodepgsin); GET_VM_STATS(vm, v_vnodepgsout); GET_VM_STATS(vm, v_intrans); GET_VM_STATS(vm, v_reactivated); GET_VM_STATS(vm, v_pdwakeups); GET_VM_STATS(vm, v_pdpages); GET_VM_STATS(vm, v_pdshortfalls); GET_VM_STATS(vm, v_dfree); GET_VM_STATS(vm, v_pfree); GET_VM_STATS(vm, v_tfree); GET_VM_STATS(vm, v_page_size); GET_VM_STATS(vm, v_page_count); GET_VM_STATS(vm, v_free_reserved); GET_VM_STATS(vm, v_free_target); GET_VM_STATS(vm, v_free_min); GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); GET_VM_STATS(vm, v_user_wire_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); GET_VM_STATS(vm, v_laundry_count); GET_VM_STATS(vm, v_pageout_free_min); GET_VM_STATS(vm, v_interrupt_free_min); /*GET_VM_STATS(vm, v_free_severe);*/ GET_VM_STATS(vm, v_forks); GET_VM_STATS(vm, v_vforks); GET_VM_STATS(vm, v_rforks); GET_VM_STATS(vm, v_kthreads); GET_VM_STATS(vm, v_forkpages); GET_VM_STATS(vm, v_vforkpages); GET_VM_STATS(vm, v_rforkpages); GET_VM_STATS(vm, v_kthreadpages); #undef GET_VM_STATS } } static void fill_vmtotal(struct vmtotal *vmtp) { size_t size; if (kd != NULL) { /* XXX fill vmtp */ xo_errx(1, "not implemented"); } else { size = sizeof(*vmtp); mysysctl("vm.vmtotal", vmtp, &size); if (size != sizeof(*vmtp)) xo_errx(1, "vm.total size mismatch"); } } /* Determine how many cpu columns, and what index they are in kern.cp_times */ static int getcpuinfo(u_long *maskp, int *maxidp) { long *times; u_long mask; size_t size; int empty, i, j, maxcpu, maxid, ncpus; if (kd != NULL) xo_errx(1, "not implemented"); mask = 0; ncpus = 0; size = sizeof(maxcpu); mysysctl("kern.smp.maxcpus", &maxcpu, &size); if (size != sizeof(maxcpu)) xo_errx(1, "sysctl kern.smp.maxcpus"); size = sizeof(long) * maxcpu * CPUSTATES; times = malloc(size); if (times == NULL) xo_err(1, "malloc %zd bytes", size); mysysctl("kern.cp_times", times, &size); maxid = (size / CPUSTATES / sizeof(long)) - 1; for (i = 0; i <= maxid; i++) { empty = 1; for (j = 0; empty && j < CPUSTATES; j++) { if (times[i * CPUSTATES + j] != 0) empty = 0; } if (!empty) { mask |= (1ul << i); ncpus++; } } if (maskp) *maskp = mask; if (maxidp) *maxidp = maxid; return (ncpus); } static void prthuman(const char *name, uint64_t val, int size, int flags) { char buf[10]; char fmt[128]; snprintf(fmt, sizeof(fmt), "{:%s/%%*s}", name); if (size < 5 || size > 9) xo_errx(1, "doofus"); flags |= HN_NOSPACE | HN_DECIMAL; humanize_number(buf, size, val, "", HN_AUTOSCALE, flags); xo_attr("value", "%ju", (uintmax_t) val); xo_emit(fmt, size, buf); } static void dovmstat(unsigned int interval, int reps) { struct clockinfo clockrate; struct vmtotal total; struct devinfo *tmp_dinfo; u_long cpumask; size_t size; time_t uptime, halfuptime; int ncpus, maxid, rate_adj, retval; uptime = getuptime() / 1000000000LL; halfuptime = uptime / 2; rate_adj = 1; ncpus = 1; maxid = 0; cpumask = 0; /* * If the user stops the program (control-Z) and then resumes it, * print out the header again. */ (void)signal(SIGCONT, needhdr); /* * If our standard output is a tty, then install a SIGWINCH handler * and set wresized so that our first iteration through the main * vmstat loop will peek at the terminal's current rows to find out * how many lines can fit in a screenful of output. */ if (isatty(fileno(stdout)) != 0) { wresized = 1; (void)signal(SIGWINCH, needresize); } else { wresized = 0; winlines = VMSTAT_DEFAULT_LINES; } if (kd != NULL) { if (namelist[X_STATHZ].n_type != 0 && namelist[X_STATHZ].n_value != 0) kread(X_STATHZ, &hz, sizeof(hz)); if (!hz) kread(X_HZ, &hz, sizeof(hz)); } else { size = sizeof(clockrate); mysysctl("kern.clockrate", &clockrate, &size); if (size != sizeof(clockrate)) xo_errx(1, "clockrate size mismatch"); hz = clockrate.hz; } if (Pflag) { ncpus = getcpuinfo(&cpumask, &maxid); size_cp_times = sizeof(long) * (maxid + 1) * CPUSTATES; cur_cp_times = calloc(1, size_cp_times); last_cp_times = calloc(1, size_cp_times); } for (hdrcnt = 1;;) { if (!--hdrcnt) printhdr(maxid, cpumask); if (kd != NULL) { if (kvm_getcptime(kd, cur.cp_time) < 0) xo_errx(1, "kvm_getcptime: %s", kvm_geterr(kd)); } else { size = sizeof(cur.cp_time); mysysctl("kern.cp_time", &cur.cp_time, &size); if (size != sizeof(cur.cp_time)) xo_errx(1, "cp_time size mismatch"); } if (Pflag) { size = size_cp_times; mysysctl("kern.cp_times", cur_cp_times, &size); if (size != size_cp_times) xo_errx(1, "cp_times mismatch"); } tmp_dinfo = last.dinfo; last.dinfo = cur.dinfo; cur.dinfo = tmp_dinfo; last.snap_time = cur.snap_time; /* * Here what we want to do is refresh our device stats. * getdevs() returns 1 when the device list has changed. * If the device list has changed, we want to go through * the selection process again, in case a device that we * were previously displaying has gone away. */ switch (devstat_getdevs(NULL, &cur)) { case -1: xo_errx(1, "%s", devstat_errbuf); break; case 1: num_devices = cur.dinfo->numdevs; generation = cur.dinfo->generation; retval = devstat_selectdevs(&dev_select, &num_selected, &num_selections, &select_generation, generation, cur.dinfo->devices, num_devices, matches, num_matches, specified_devices, num_devices_specified, select_mode, maxshowdevs, 0); switch (retval) { case -1: xo_errx(1, "%s", devstat_errbuf); break; case 1: printhdr(maxid, cpumask); break; default: break; } break; default: break; } fill_vmmeter(&sum); fill_vmtotal(&total); xo_open_container("processes"); xo_emit("{:runnable/%2d} {:waiting/%2ld} " "{:swapped-out/%2ld}", total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw); xo_close_container("processes"); xo_open_container("memory"); #define vmstat_pgtok(a) ((uintmax_t)(a) * (sum.v_page_size >> 10)) #define rate(x) (unsigned long)(((x) * rate_adj + halfuptime) / uptime) if (hflag) { prthuman("available-memory", total.t_avm * (uint64_t)sum.v_page_size, 5, HN_B); prthuman("free-memory", total.t_free * (uint64_t)sum.v_page_size, 5, HN_B); prthuman("total-page-faults", rate(sum.v_vm_faults - osum.v_vm_faults), 5, 0); xo_emit(" "); } else { xo_emit(" "); xo_emit("{:available-memory/%7ju}", vmstat_pgtok(total.t_avm)); xo_emit(" "); xo_emit("{:free-memory/%7ju}", vmstat_pgtok(total.t_free)); xo_emit(" "); xo_emit("{:total-page-faults/%5lu} ", rate(sum.v_vm_faults - osum.v_vm_faults)); } xo_close_container("memory"); xo_open_container("paging-rates"); xo_emit("{:page-reactivated/%3lu} ", rate(sum.v_reactivated - osum.v_reactivated)); xo_emit("{:paged-in/%3lu} ", rate(sum.v_swapin + sum.v_vnodein - (osum.v_swapin + osum.v_vnodein))); xo_emit("{:paged-out/%3lu}", rate(sum.v_swapout + sum.v_vnodeout - (osum.v_swapout + osum.v_vnodeout))); if (hflag) { prthuman("freed", rate(sum.v_tfree - osum.v_tfree), 5, 0); prthuman("scanned", rate(sum.v_pdpages - osum.v_pdpages), 5, 0); xo_emit(" "); } else { xo_emit(" "); xo_emit("{:freed/%5lu} ", rate(sum.v_tfree - osum.v_tfree)); xo_emit("{:scanned/%4lu} ", rate(sum.v_pdpages - osum.v_pdpages)); } xo_close_container("paging-rates"); devstats(); xo_open_container("fault-rates"); xo_emit("{:interrupts/%4lu}", rate(sum.v_intr - osum.v_intr)); if (hflag) { prthuman("system-calls", rate(sum.v_syscall - osum.v_syscall), 5, 0); prthuman("context-switches", rate(sum.v_swtch - osum.v_swtch), 5, 0); } else { xo_emit(" "); xo_emit("{:system-calls/%5lu} " "{:context-switches/%5lu}", rate(sum.v_syscall - osum.v_syscall), rate(sum.v_swtch - osum.v_swtch)); } xo_close_container("fault-rates"); if (Pflag) pcpustats(cpumask, maxid); else cpustats(); xo_emit("\n"); xo_flush(); if (reps >= 0 && --reps <= 0) break; osum = sum; uptime = interval; rate_adj = 1000; /* * We round upward to avoid losing low-frequency events * (i.e., >= 1 per interval but < 1 per millisecond). */ if (interval != 1) halfuptime = (uptime + 1) / 2; else halfuptime = 0; (void)usleep(interval * 1000); } } static void printhdr(int maxid, u_long cpumask) { int i, num_shown; num_shown = MIN(num_selected, maxshowdevs); if (hflag) xo_emit(" {T:procs} {T:memory} {T:/page%*s}", 19, ""); else xo_emit("{T:procs} {T:memory} {T:/page%*s}", 19, ""); if (num_shown > 1) xo_emit(" {T:/disks %*s} ", num_shown * 4 - 7, ""); else if (num_shown == 1) xo_emit(" {T:disks}"); xo_emit(" {T:faults} "); if (Pflag) { for (i = 0; i <= maxid; i++) { if (cpumask & (1ul << i)) xo_emit(" {T:/cpu%d} ", i); } xo_emit("\n"); } else xo_emit(" {T:cpu}\n"); if (hflag) { xo_emit(" {T:r} {T:b} {T:w} {T:avm} {T:fre} {T:flt} {T:re}" " {T:pi} {T:po} {T:fr} {T:sr} "); } else { xo_emit("{T:r} {T:b} {T:w} {T:avm} {T:fre} {T:flt} " "{T:re} {T:pi} {T:po} {T:fr} {T:sr} "); } for (i = 0; i < num_devices; i++) if ((dev_select[i].selected) && (dev_select[i].selected <= maxshowdevs)) xo_emit("{T:/%c%c%d} ", dev_select[i].device_name[0], dev_select[i].device_name[1], dev_select[i].unit_number); xo_emit(" {T:in} {T:sy} {T:cs}"); if (Pflag) { for (i = 0; i <= maxid; i++) { if (cpumask & (1ul << i)) xo_emit(" {T:us} {T:sy} {T:id}"); } xo_emit("\n"); } else xo_emit(" {T:us} {T:sy} {T:id}\n"); if (wresized != 0) doresize(); hdrcnt = winlines; } /* * Force a header to be prepended to the next output. */ static void needhdr(int dummy __unused) { hdrcnt = 1; } /* * When the terminal is resized, force an update of the maximum number of rows * printed between each header repetition. Then force a new header to be * prepended to the next output. */ void needresize(int signo __unused) { wresized = 1; hdrcnt = 1; } /* * Update the global `winlines' count of terminal rows. */ void doresize(void) { struct winsize w; int status; for (;;) { status = ioctl(fileno(stdout), TIOCGWINSZ, &w); if (status == -1 && errno == EINTR) continue; else if (status == -1) xo_err(1, "ioctl"); if (w.ws_row > 3) winlines = w.ws_row - 3; else winlines = VMSTAT_DEFAULT_LINES; break; } /* * Inhibit doresize() calls until we are rescheduled by SIGWINCH. */ wresized = 0; } static long pct(long top, long bot) { long ans; if (bot == 0) return(0); ans = (quad_t)top * 100 / bot; return (ans); } #define PCT(top, bot) pct((long)(top), (long)(bot)) static void dosum(void) { struct nchstats lnchstats; size_t size; long nchtotal; fill_vmmeter(&sum); xo_open_container("summary-statistics"); xo_emit("{:context-switches/%9u} {N:cpu context switches}\n", sum.v_swtch); xo_emit("{:interrupts/%9u} {N:device interrupts}\n", sum.v_intr); xo_emit("{:software-interrupts/%9u} {N:software interrupts}\n", sum.v_soft); xo_emit("{:traps/%9u} {N:traps}\n", sum.v_trap); xo_emit("{:system-calls/%9u} {N:system calls}\n", sum.v_syscall); xo_emit("{:kernel-threads/%9u} {N:kernel threads created}\n", sum.v_kthreads); xo_emit("{:forks/%9u} {N: fork() calls}\n", sum.v_forks); xo_emit("{:vforks/%9u} {N:vfork() calls}\n", sum.v_vforks); xo_emit("{:rforks/%9u} {N:rfork() calls}\n", sum.v_rforks); xo_emit("{:swap-ins/%9u} {N:swap pager pageins}\n", sum.v_swapin); xo_emit("{:swap-in-pages/%9u} {N:swap pager pages paged in}\n", sum.v_swappgsin); xo_emit("{:swap-outs/%9u} {N:swap pager pageouts}\n", sum.v_swapout); xo_emit("{:swap-out-pages/%9u} {N:swap pager pages paged out}\n", sum.v_swappgsout); xo_emit("{:vnode-page-ins/%9u} {N:vnode pager pageins}\n", sum.v_vnodein); xo_emit("{:vnode-page-in-pages/%9u} {N:vnode pager pages paged in}\n", sum.v_vnodepgsin); xo_emit("{:vnode-page-outs/%9u} {N:vnode pager pageouts}\n", sum.v_vnodeout); xo_emit("{:vnode-page-out-pages/%9u} {N:vnode pager pages paged out}\n", sum.v_vnodepgsout); xo_emit("{:page-daemon-wakeups/%9u} {N:page daemon wakeups}\n", sum.v_pdwakeups); xo_emit("{:page-daemon-pages/%9u} {N:pages examined by the page " "daemon}\n", sum.v_pdpages); xo_emit("{:page-reclamation-shortfalls/%9u} {N:clean page reclamation " "shortfalls}\n", sum.v_pdshortfalls); xo_emit("{:reactivated/%9u} {N:pages reactivated by the page daemon}\n", sum.v_reactivated); xo_emit("{:copy-on-write-faults/%9u} {N:copy-on-write faults}\n", sum.v_cow_faults); xo_emit("{:copy-on-write-optimized-faults/%9u} {N:copy-on-write " "optimized faults}\n", sum.v_cow_optim); xo_emit("{:zero-fill-pages/%9u} {N:zero fill pages zeroed}\n", sum.v_zfod); xo_emit("{:zero-fill-prezeroed/%9u} {N:zero fill pages prezeroed}\n", sum.v_ozfod); xo_emit("{:intransit-blocking/%9u} {N:intransit blocking page faults}\n", sum.v_intrans); xo_emit("{:total-faults/%9u} {N:total VM faults taken}\n", sum.v_vm_faults); xo_emit("{:faults-requiring-io/%9u} {N:page faults requiring I\\/O}\n", sum.v_io_faults); xo_emit("{:faults-from-thread-creation/%9u} {N:pages affected by " "kernel thread creation}\n", sum.v_kthreadpages); xo_emit("{:faults-from-fork/%9u} {N:pages affected by fork}()\n", sum.v_forkpages); xo_emit("{:faults-from-vfork/%9u} {N:pages affected by vfork}()\n", sum.v_vforkpages); xo_emit("{:pages-rfork/%9u} {N:pages affected by rfork}()\n", sum.v_rforkpages); xo_emit("{:pages-freed/%9u} {N:pages freed}\n", sum.v_tfree); xo_emit("{:pages-freed-by-daemon/%9u} {N:pages freed by daemon}\n", sum.v_dfree); xo_emit("{:pages-freed-on-exit/%9u} {N:pages freed by exiting processes}\n", sum.v_pfree); xo_emit("{:active-pages/%9u} {N:pages active}\n", sum.v_active_count); xo_emit("{:inactive-pages/%9u} {N:pages inactive}\n", sum.v_inactive_count); xo_emit("{:laundry-pages/%9u} {N:pages in the laundry queue}\n", sum.v_laundry_count); xo_emit("{:wired-pages/%9u} {N:pages wired down}\n", sum.v_wire_count); xo_emit("{:virtual-user-wired-pages/%9lu} {N:virtual user pages wired " "down}\n", sum.v_user_wire_count); xo_emit("{:free-pages/%9u} {N:pages free}\n", sum.v_free_count); xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size); if (kd != NULL) { kread(X_NCHSTATS, &lnchstats, sizeof(lnchstats)); } else { size = sizeof(lnchstats); mysysctl("vfs.cache.nchstats", &lnchstats, &size); if (size != sizeof(lnchstats)) xo_errx(1, "vfs.cache.nchstats size mismatch"); } nchtotal = lnchstats.ncs_goodhits + lnchstats.ncs_neghits + lnchstats.ncs_badhits + lnchstats.ncs_falsehits + lnchstats.ncs_miss + lnchstats.ncs_long; xo_emit("{:total-name-lookups/%9ld} {N:total name lookups}\n", nchtotal); xo_emit("{P:/%9s} {N:cache hits} " "({:positive-cache-hits/%ld}% pos + " "{:negative-cache-hits/%ld}% {N:neg}) " "system {:cache-hit-percent/%ld}% per-directory\n", "", PCT(lnchstats.ncs_goodhits, nchtotal), PCT(lnchstats.ncs_neghits, nchtotal), PCT(lnchstats.ncs_pass2, nchtotal)); xo_emit("{P:/%9s} {L:deletions} {:deletions/%ld}%, " "{L:falsehits} {:false-hits/%ld}%, " "{L:toolong} {:too-long/%ld}%\n", "", PCT(lnchstats.ncs_badhits, nchtotal), PCT(lnchstats.ncs_falsehits, nchtotal), PCT(lnchstats.ncs_long, nchtotal)); xo_close_container("summary-statistics"); } static void doforkst(void) { fill_vmmeter(&sum); xo_open_container("fork-statistics"); xo_emit("{:fork/%u} {N:forks}, {:fork-pages/%u} {N:pages}, " "{L:average} {:fork-average/%.2f}\n", sum.v_forks, sum.v_forkpages, sum.v_forks == 0 ? 0.0 : (double)sum.v_forkpages / sum.v_forks); xo_emit("{:vfork/%u} {N:vforks}, {:vfork-pages/%u} {N:pages}, " "{L:average} {:vfork-average/%.2f}\n", sum.v_vforks, sum.v_vforkpages, sum.v_vforks == 0 ? 0.0 : (double)sum.v_vforkpages / sum.v_vforks); xo_emit("{:rfork/%u} {N:rforks}, {:rfork-pages/%u} {N:pages}, " "{L:average} {:rfork-average/%.2f}\n", sum.v_rforks, sum.v_rforkpages, sum.v_rforks == 0 ? 0.0 : (double)sum.v_rforkpages / sum.v_rforks); xo_close_container("fork-statistics"); } static void devstats(void) { long double busy_seconds, transfers_per_second; long tmp; int di, dn, state; for (state = 0; state < CPUSTATES; ++state) { tmp = cur.cp_time[state]; cur.cp_time[state] -= last.cp_time[state]; last.cp_time[state] = tmp; } busy_seconds = cur.snap_time - last.snap_time; xo_open_list("device"); for (dn = 0; dn < num_devices; dn++) { if (dev_select[dn].selected == 0 || dev_select[dn].selected > maxshowdevs) continue; di = dev_select[dn].position; if (devstat_compute_statistics(&cur.dinfo->devices[di], &last.dinfo->devices[di], busy_seconds, DSM_TRANSFERS_PER_SECOND, &transfers_per_second, DSM_NONE) != 0) xo_errx(1, "%s", devstat_errbuf); xo_open_instance("device"); xo_emit("{ekq:name/%c%c%d}{:transfers/%3.0Lf} ", dev_select[dn].device_name[0], dev_select[dn].device_name[1], dev_select[dn].unit_number, transfers_per_second); xo_close_instance("device"); } xo_close_list("device"); } static void percent(const char *name, double pctv, int *over) { int l; char buf[10]; char fmt[128]; snprintf(fmt, sizeof(fmt), " {:%s/%%*s}", name); l = snprintf(buf, sizeof(buf), "%.0f", pctv); if (l == 1 && *over) { xo_emit(fmt, 1, buf); (*over)--; } else xo_emit(fmt, 2, buf); if (l > 2) (*over)++; } static void cpustats(void) { double lpct, total; int state, over; total = 0; for (state = 0; state < CPUSTATES; ++state) total += cur.cp_time[state]; if (total > 0) lpct = 100.0 / total; else lpct = 0.0; over = 0; xo_open_container("cpu-statistics"); percent("user", (cur.cp_time[CP_USER] + cur.cp_time[CP_NICE]) * lpct, &over); percent("system", (cur.cp_time[CP_SYS] + cur.cp_time[CP_INTR]) * lpct, &over); percent("idle", cur.cp_time[CP_IDLE] * lpct, &over); xo_close_container("cpu-statistics"); } static void pcpustats(u_long cpumask, int maxid) { double lpct, total; long tmp; int i, over, state; /* devstats does this for cp_time */ for (i = 0; i <= maxid; i++) { if ((cpumask & (1ul << i)) == 0) continue; for (state = 0; state < CPUSTATES; ++state) { tmp = cur_cp_times[i * CPUSTATES + state]; cur_cp_times[i * CPUSTATES + state] -= last_cp_times[i * CPUSTATES + state]; last_cp_times[i * CPUSTATES + state] = tmp; } } over = 0; xo_open_list("cpu"); for (i = 0; i <= maxid; i++) { if ((cpumask & (1ul << i)) == 0) continue; xo_open_instance("cpu"); xo_emit("{ke:name/%d}", i); total = 0; for (state = 0; state < CPUSTATES; ++state) total += cur_cp_times[i * CPUSTATES + state]; if (total) lpct = 100.0 / total; else lpct = 0.0; percent("user", (cur_cp_times[i * CPUSTATES + CP_USER] + cur_cp_times[i * CPUSTATES + CP_NICE]) * lpct, &over); percent("system", (cur_cp_times[i * CPUSTATES + CP_SYS] + cur_cp_times[i * CPUSTATES + CP_INTR]) * lpct, &over); percent("idle", cur_cp_times[i * CPUSTATES + CP_IDLE] * lpct, &over); xo_close_instance("cpu"); } xo_close_list("cpu"); } static unsigned int read_intrcnts(unsigned long **intrcnts) { size_t intrcntlen; uintptr_t kaddr; if (kd != NULL) { kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen)); if ((*intrcnts = malloc(intrcntlen)) == NULL) err(1, "malloc()"); if (namelist[X_NINTRCNT].n_type == 0) kread(X_INTRCNT, *intrcnts, intrcntlen); else { kread(X_INTRCNT, &kaddr, sizeof(kaddr)); kreadptr(kaddr, *intrcnts, intrcntlen); } } else { for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) { *intrcnts = reallocf(*intrcnts, intrcntlen); if (*intrcnts == NULL) err(1, "reallocf()"); if (mysysctl("hw.intrcnt", *intrcnts, &intrcntlen) == 0) break; } } return (intrcntlen / sizeof(unsigned long)); } static void print_intrcnts(unsigned long *intrcnts, unsigned long *old_intrcnts, char *intrnames, unsigned int nintr, size_t istrnamlen, long long period_ms) { unsigned long *intrcnt, *old_intrcnt; char *intrname; uint64_t inttotal, old_inttotal, total_count, total_rate; unsigned long count, rate; unsigned int i; inttotal = 0; old_inttotal = 0; intrname = intrnames; xo_open_list("interrupt"); for (i = 0, intrcnt=intrcnts, old_intrcnt=old_intrcnts; i < nintr; i++) { if (intrname[0] != '\0' && (*intrcnt != 0 || aflag)) { count = *intrcnt - *old_intrcnt; rate = ((uint64_t)count * 1000 + period_ms / 2) / period_ms; xo_open_instance("interrupt"); xo_emit("{d:name/%-*s}{ket:name/%s} " "{:total/%20lu} {:rate/%10lu}\n", (int)istrnamlen, intrname, intrname, count, rate); xo_close_instance("interrupt"); } intrname += strlen(intrname) + 1; inttotal += *intrcnt++; old_inttotal += *old_intrcnt++; } total_count = inttotal - old_inttotal; total_rate = (total_count * 1000 + period_ms / 2) / period_ms; xo_close_list("interrupt"); xo_emit("{L:/%-*s} {:total-interrupts/%20ju} " "{:total-rate/%10ju}\n", (int)istrnamlen, "Total", (uintmax_t)total_count, (uintmax_t)total_rate); } static void dointr(unsigned int interval, int reps) { unsigned long *intrcnts, *old_intrcnts; char *intrname, *intrnames; long long period_ms, old_uptime, uptime; size_t clen, inamlen, istrnamlen; uintptr_t kaddr; unsigned int nintr; old_intrcnts = NULL; uptime = getuptime(); /* Get the names of each interrupt source */ if (kd != NULL) { kread(X_SINTRNAMES, &inamlen, sizeof(inamlen)); if ((intrnames = malloc(inamlen)) == NULL) xo_err(1, "malloc()"); if (namelist[X_NINTRCNT].n_type == 0) kread(X_INTRNAMES, intrnames, inamlen); else { kread(X_INTRNAMES, &kaddr, sizeof(kaddr)); kreadptr(kaddr, intrnames, inamlen); } } else { for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) { if ((intrnames = reallocf(intrnames, inamlen)) == NULL) xo_err(1, "reallocf()"); if (mysysctl("hw.intrnames", intrnames, &inamlen) == 0) break; } } /* Determine the length of the longest interrupt name */ intrname = intrnames; istrnamlen = strlen("interrupt"); while(*intrname != '\0') { clen = strlen(intrname); if (clen > istrnamlen) istrnamlen = clen; intrname += strlen(intrname) + 1; } xo_emit("{T:/%-*s} {T:/%20s} {T:/%10s}\n", (int)istrnamlen, "interrupt", "total", "rate"); /* * Loop reps times printing differential interrupt counts. If reps is * zero, then run just once, printing total counts */ xo_open_container("interrupt-statistics"); period_ms = uptime / 1000000; while(1) { nintr = read_intrcnts(&intrcnts); /* * Initialize old_intrcnts to 0 for the first pass, so * print_intrcnts will print total interrupts since boot */ if (old_intrcnts == NULL) { old_intrcnts = calloc(nintr, sizeof(unsigned long)); if (old_intrcnts == NULL) xo_err(1, "calloc()"); } print_intrcnts(intrcnts, old_intrcnts, intrnames, nintr, istrnamlen, period_ms); xo_flush(); free(old_intrcnts); old_intrcnts = intrcnts; if (reps >= 0 && --reps <= 0) break; usleep(interval * 1000); old_uptime = uptime; uptime = getuptime(); period_ms = (uptime - old_uptime) / 1000000; } xo_close_container("interrupt-statistics"); } static void domemstat_malloc(void) { struct memory_type_list *mtlp; struct memory_type *mtp; - int error, first, i; + size_t i, zones; + int error, first; mtlp = memstat_mtl_alloc(); if (mtlp == NULL) { xo_warn("memstat_mtl_alloc"); return; } if (kd == NULL) { if (memstat_sysctl_malloc(mtlp, 0) < 0) { xo_warnx("memstat_sysctl_malloc: %s", memstat_strerror(memstat_mtl_geterror(mtlp))); return; } } else { if (memstat_kvm_malloc(mtlp, kd) < 0) { error = memstat_mtl_geterror(mtlp); if (error == MEMSTAT_ERROR_KVM) xo_warnx("memstat_kvm_malloc: %s", kvm_geterr(kd)); else xo_warnx("memstat_kvm_malloc: %s", memstat_strerror(error)); } } xo_open_container("malloc-statistics"); xo_emit("{T:/%13s} {T:/%5s} {T:/%6s} {T:/%7s} {T:/%8s} {T:Size(s)}\n", "Type", "InUse", "MemUse", "HighUse", "Requests"); xo_open_list("memory"); + zones = memstat_malloc_zone_get_count(); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { if (memstat_get_numallocs(mtp) == 0 && memstat_get_count(mtp) == 0) continue; xo_open_instance("memory"); xo_emit("{k:type/%13s/%s} {:in-use/%5ju} " "{:memory-use/%5ju}{U:K} {:high-use/%7s} " "{:requests/%8ju} ", memstat_get_name(mtp), (uintmax_t)memstat_get_count(mtp), ((uintmax_t)memstat_get_bytes(mtp) + 1023) / 1024, "-", (uintmax_t)memstat_get_numallocs(mtp)); first = 1; xo_open_list("size"); - for (i = 0; i < 32; i++) { - if (memstat_get_sizemask(mtp) & (1 << i)) { + for (i = 0; i < zones; i++) { + if (memstat_malloc_zone_used(mtp, i)) { if (!first) xo_emit(","); - xo_emit("{l:size/%d}", 1 << (i + 4)); + xo_emit("{l:size/%d}", memstat_malloc_zone_get_size(i)); first = 0; } } xo_close_list("size"); xo_close_instance("memory"); xo_emit("\n"); } xo_close_list("memory"); xo_close_container("malloc-statistics"); memstat_mtl_free(mtlp); } static void domemstat_zone(void) { struct memory_type_list *mtlp; struct memory_type *mtp; int error; char name[MEMTYPE_MAXNAME + 1]; mtlp = memstat_mtl_alloc(); if (mtlp == NULL) { xo_warn("memstat_mtl_alloc"); return; } if (kd == NULL) { if (memstat_sysctl_uma(mtlp, 0) < 0) { xo_warnx("memstat_sysctl_uma: %s", memstat_strerror(memstat_mtl_geterror(mtlp))); return; } } else { if (memstat_kvm_uma(mtlp, kd) < 0) { error = memstat_mtl_geterror(mtlp); if (error == MEMSTAT_ERROR_KVM) xo_warnx("memstat_kvm_uma: %s", kvm_geterr(kd)); else xo_warnx("memstat_kvm_uma: %s", memstat_strerror(error)); } } xo_open_container("memory-zone-statistics"); xo_emit("{T:/%-20s} {T:/%6s} {T:/%6s} {T:/%8s} {T:/%8s} {T:/%8s} {T:/%8s}" "{T:/%4s} {T:/%4s}\n\n", "ITEM", "SIZE", "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP", "XDOMAIN"); xo_open_list("zone"); for (mtp = memstat_mtl_first(mtlp); mtp != NULL; mtp = memstat_mtl_next(mtp)) { strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); strcat(name, ":"); xo_open_instance("zone"); xo_emit("{d:name/%-20s}{ke:name/%s} {:size/%6ju}, " "{:limit/%6ju},{:used/%8ju}," "{:free/%8ju},{:requests/%8ju}," "{:fail/%4ju},{:sleep/%4ju},{:xdomain/%4ju}\n", name, memstat_get_name(mtp), (uintmax_t)memstat_get_size(mtp), (uintmax_t)memstat_get_countlimit(mtp), (uintmax_t)memstat_get_count(mtp), (uintmax_t)memstat_get_free(mtp), (uintmax_t)memstat_get_numallocs(mtp), (uintmax_t)memstat_get_failures(mtp), (uintmax_t)memstat_get_sleeps(mtp), (uintmax_t)memstat_get_xdomain(mtp)); xo_close_instance("zone"); } memstat_mtl_free(mtlp); xo_close_list("zone"); xo_close_container("memory-zone-statistics"); xo_emit("\n"); } static void display_object(struct kinfo_vmobject *kvo) { const char *str; xo_open_instance("object"); xo_emit("{:resident/%5ju} ", (uintmax_t)kvo->kvo_resident); xo_emit("{:active/%5ju} ", (uintmax_t)kvo->kvo_active); xo_emit("{:inactive/%5ju} ", (uintmax_t)kvo->kvo_inactive); xo_emit("{:refcount/%3d} ", kvo->kvo_ref_count); xo_emit("{:shadowcount/%3d} ", kvo->kvo_shadow_count); switch (kvo->kvo_memattr) { #ifdef VM_MEMATTR_UNCACHEABLE case VM_MEMATTR_UNCACHEABLE: str = "UC"; break; #endif #ifdef VM_MEMATTR_WRITE_COMBINING case VM_MEMATTR_WRITE_COMBINING: str = "WC"; break; #endif #ifdef VM_MEMATTR_WRITE_THROUGH case VM_MEMATTR_WRITE_THROUGH: str = "WT"; break; #endif #ifdef VM_MEMATTR_WRITE_PROTECTED case VM_MEMATTR_WRITE_PROTECTED: str = "WP"; break; #endif #ifdef VM_MEMATTR_WRITE_BACK case VM_MEMATTR_WRITE_BACK: str = "WB"; break; #endif #ifdef VM_MEMATTR_WEAK_UNCACHEABLE case VM_MEMATTR_WEAK_UNCACHEABLE: str = "UC-"; break; #endif #ifdef VM_MEMATTR_WB_WA case VM_MEMATTR_WB_WA: str = "WB"; break; #endif #ifdef VM_MEMATTR_NOCACHE case VM_MEMATTR_NOCACHE: str = "NC"; break; #endif #ifdef VM_MEMATTR_DEVICE case VM_MEMATTR_DEVICE: str = "DEV"; break; #endif #ifdef VM_MEMATTR_CACHEABLE case VM_MEMATTR_CACHEABLE: str = "C"; break; #endif #ifdef VM_MEMATTR_PREFETCHABLE case VM_MEMATTR_PREFETCHABLE: str = "PRE"; break; #endif default: str = "??"; break; } xo_emit("{:attribute/%-3s} ", str); switch (kvo->kvo_type) { case KVME_TYPE_NONE: str = "--"; break; case KVME_TYPE_DEFAULT: str = "df"; break; case KVME_TYPE_VNODE: str = "vn"; break; case KVME_TYPE_SWAP: str = "sw"; break; case KVME_TYPE_DEVICE: str = "dv"; break; case KVME_TYPE_PHYS: str = "ph"; break; case KVME_TYPE_DEAD: str = "dd"; break; case KVME_TYPE_SG: str = "sg"; break; case KVME_TYPE_MGTDEVICE: str = "md"; break; case KVME_TYPE_UNKNOWN: default: str = "??"; break; } xo_emit("{:type/%-2s} ", str); xo_emit("{:path/%-s}\n", kvo->kvo_path); xo_close_instance("object"); } static void doobjstat(void) { struct kinfo_vmobject *kvo; int cnt, i; kvo = kinfo_getvmobject(&cnt); if (kvo == NULL) { xo_warn("Failed to fetch VM object list"); return; } xo_emit("{T:RES/%5s} {T:ACT/%5s} {T:INACT/%5s} {T:REF/%3s} {T:SHD/%3s} " "{T:CM/%3s} {T:TP/%2s} {T:PATH/%s}\n"); xo_open_list("object"); for (i = 0; i < cnt; i++) display_object(&kvo[i]); free(kvo); xo_close_list("object"); } /* * kread reads something from the kernel, given its nlist index. */ static void kreado(int nlx, void *addr, size_t size, size_t offset) { const char *sym; if (namelist[nlx].n_type == 0 || namelist[nlx].n_value == 0) { sym = namelist[nlx].n_name; if (*sym == '_') ++sym; xo_errx(1, "symbol %s not defined", sym); } if ((size_t)kvm_read(kd, namelist[nlx].n_value + offset, addr, size) != size) { sym = namelist[nlx].n_name; if (*sym == '_') ++sym; xo_errx(1, "%s: %s", sym, kvm_geterr(kd)); } } static void kread(int nlx, void *addr, size_t size) { kreado(nlx, addr, size, 0); } static void kreadptr(uintptr_t addr, void *buf, size_t size) { if ((size_t)kvm_read(kd, addr, buf, size) != size) xo_errx(1, "%s", kvm_geterr(kd)); } static void __dead2 usage(void) { xo_error("%s%s", "usage: vmstat [-afHhimoPsz] [-M core [-N system]] [-c count] [-n devs]\n", " [-p type,if,pass] [-w wait] [disks] [wait [count]]\n"); xo_finish(); exit(1); }