diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9 index b8c6e504e0c0..71375e90951f 100644 --- a/share/man/man9/malloc.9 +++ b/share/man/man9/malloc.9 @@ -1,360 +1,383 @@ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This code is derived from software contributed to The NetBSD Foundation .\" by Paul Kranenburg. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $NetBSD: malloc.9,v 1.3 1996/11/11 00:05:11 lukem Exp $ .\" $FreeBSD$ .\" -.Dd March 6, 2021 +.Dd July 2, 2021 .Dt MALLOC 9 .Os .Sh NAME .Nm malloc , .Nm free , .Nm realloc , .Nm reallocf , .Nm MALLOC_DEFINE , .Nm MALLOC_DECLARE .Nd kernel memory management routines .Sh SYNOPSIS .In sys/types.h .In sys/malloc.h .Ft void * .Fn malloc "size_t size" "struct malloc_type *type" "int flags" .Ft void * .Fn mallocarray "size_t nmemb" "size_t size" "struct malloc_type *type" "int flags" .Ft void .Fn free "void *addr" "struct malloc_type *type" .Ft void .Fn zfree "void *addr" "struct malloc_type *type" .Ft void * .Fn realloc "void *addr" "size_t size" "struct malloc_type *type" "int flags" .Ft void * .Fn reallocf "void *addr" "size_t size" "struct malloc_type *type" "int flags" .Ft size_t .Fn malloc_usable_size "const void *addr" .Ft void * +.Fo malloc_aligned +.Fa "size_t size" +.Fa "size_t align" +.Fa "struct malloc_type *type" +.Fa "int flags" +.Fc +.Ft void * .Fn malloc_exec "size_t size" "struct malloc_type *type" "int flags" .Fn MALLOC_DECLARE type .In sys/param.h .In sys/malloc.h .In sys/kernel.h .Fn MALLOC_DEFINE type shortdesc longdesc .In sys/param.h .In sys/domainset.h .Ft void * .Fn malloc_domainset "size_t size" "struct malloc_type *type" "struct domainset *ds" "int flags" .Ft void * +.Fo malloc_domainset_aligned +.Fa "size_t size" +.Fa "size_t align" +.Fa "struct malloc_type *type" +.Fa "struct domainset *ds" +.Fa "int flags" +.Fc +.Ft void * .Fn malloc_domainset_exec "size_t size" "struct malloc_type *type" "struct domainset *ds" "int flags" .Ft void * .Fn mallocarray_domainset "size_t nmemb" "size_t size" "struct malloc_type *type" "struct domainset *ds" "int flags" .Sh DESCRIPTION The .Fn malloc function allocates uninitialized memory in kernel address space for an object whose size is specified by .Fa size . .Pp The .Fn malloc_domainset variant allocates memory from a specific .Xr numa 4 domain using the specified domain selection policy. See .Xr domainset 9 for some example policies. .Pp +The +.Fn malloc_aligned +and +.Fn malloc_domainset_aligned +variants return allocations aligned as specified by +.Fa align , +which must be non-zero, a power of two, and less than or equal to the page size. +.Pp Both .Fn malloc_exec and .Fn malloc_domainset_exec can be used to return executable memory. Not all platforms enforce a distinction between executable and non-executable memory. .Pp The .Fn mallocarray function allocates uninitialized memory in kernel address space for an array of .Fa nmemb entries whose size is specified by .Fa size . .Pp The .Fn mallocarray_domainset variant allocates memory from a specific .Xr numa 4 domain using the specified domain selection policy. See .Xr domainset 9 for some example policies. .Pp The .Fn free function releases memory at address .Fa addr that was previously allocated by .Fn malloc for re-use. The memory is not zeroed. If .Fa addr is .Dv NULL , then .Fn free does nothing. .Pp Like .Fn free , the .Fn zfree function releases memory at address .Fa addr that was previously allocated by .Fn malloc for re-use. However, .Fn zfree will zero the memory before it is released. .Pp The .Fn realloc function changes the size of the previously allocated memory referenced by .Fa addr to .Fa size bytes. The contents of the memory are unchanged up to the lesser of the new and old sizes. Note that the returned value may differ from .Fa addr . If the requested memory cannot be allocated, .Dv NULL is returned and the memory referenced by .Fa addr is valid and unchanged. If .Fa addr is .Dv NULL , the .Fn realloc function behaves identically to .Fn malloc for the specified size. .Pp The .Fn reallocf function is identical to .Fn realloc except that it will free the passed pointer when the requested memory cannot be allocated. .Pp The .Fn malloc_usable_size function returns the usable size of the allocation pointed to by .Fa addr . The return value may be larger than the size that was requested during allocation. .Pp Unlike its standard C library counterpart .Pq Xr malloc 3 , the kernel version takes two more arguments. The .Fa flags argument further qualifies .Fn malloc Ns 's operational characteristics as follows: .Bl -tag -width indent .It Dv M_ZERO Causes the allocated memory to be set to all zeros. .It Dv M_NODUMP For allocations greater than page size, causes the allocated memory to be excluded from kernel core dumps. .It Dv M_NOWAIT Causes .Fn malloc , .Fn realloc , and .Fn reallocf to return .Dv NULL if the request cannot be immediately fulfilled due to resource shortage. Note that .Dv M_NOWAIT is required when running in an interrupt context. .It Dv M_WAITOK Indicates that it is OK to wait for resources. If the request cannot be immediately fulfilled, the current process is put to sleep to wait for resources to be released by other processes. The .Fn malloc , .Fn mallocarray , .Fn realloc , and .Fn reallocf functions cannot return .Dv NULL if .Dv M_WAITOK is specified. If the multiplication of .Fa nmemb and .Fa size would cause an integer overflow, the .Fn mallocarray function induces a panic. .It Dv M_USE_RESERVE Indicates that the system can use its reserve of memory to satisfy the request. This option should only be used in combination with .Dv M_NOWAIT when an allocation failure cannot be tolerated by the caller without catastrophic effects on the system. .El .Pp Exactly one of either .Dv M_WAITOK or .Dv M_NOWAIT must be specified. .Pp The .Fa type argument is used to perform statistics on memory usage, and for basic sanity checks. It can be used to identify multiple allocations. The statistics can be examined by .Sq vmstat -m . .Pp A .Fa type is defined using .Vt "struct malloc_type" via the .Fn MALLOC_DECLARE and .Fn MALLOC_DEFINE macros. .Bd -literal -offset indent /* sys/something/foo_extern.h */ MALLOC_DECLARE(M_FOOBUF); /* sys/something/foo_main.c */ MALLOC_DEFINE(M_FOOBUF, "foobuffers", "Buffers to foo data into the ether"); /* sys/something/foo_subr.c */ \&... buf = malloc(sizeof(*buf), M_FOOBUF, M_NOWAIT); .Ed .Pp In order to use .Fn MALLOC_DEFINE , one must include .In sys/param.h (instead of .In sys/types.h ) and .In sys/kernel.h . .Sh CONTEXT .Fn malloc , .Fn realloc and .Fn reallocf may not be called from fast interrupts handlers. When called from threaded interrupts, .Fa flags must contain .Dv M_NOWAIT . .Pp .Fn malloc , .Fn realloc and .Fn reallocf may sleep when called with .Dv M_WAITOK . .Fn free never sleeps. However, .Fn malloc , .Fn realloc , .Fn reallocf and .Fn free may not be called in a critical section or while holding a spin lock. .Pp Any calls to .Fn malloc (even with .Dv M_NOWAIT ) or .Fn free when holding a .Xr vnode 9 interlock, will cause a LOR (Lock Order Reversal) due to the intertwining of VM Objects and Vnodes. .Sh IMPLEMENTATION NOTES The memory allocator allocates memory in chunks that have size a power of two for requests up to the size of a page of memory. For larger requests, one or more pages is allocated. While it should not be relied upon, this information may be useful for optimizing the efficiency of memory use. .Sh RETURN VALUES The .Fn malloc , .Fn realloc , and .Fn reallocf functions return a kernel virtual address that is suitably aligned for storage of any type of object, or .Dv NULL if the request could not be satisfied (implying that .Dv M_NOWAIT was set). .Sh DIAGNOSTICS A kernel compiled with the .Dv INVARIANTS configuration option attempts to detect memory corruption caused by such things as writing outside the allocated area and imbalanced calls to the .Fn malloc and .Fn free functions. Failing consistency checks will cause a panic or a system console message. .Sh SEE ALSO .Xr numa 4 , .Xr vmstat 8 , .Xr contigmalloc 9 , .Xr domainset 9 , .Xr memguard 9 , .Xr vnode 9 diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index b8c2788edd44..3d88460a751e 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -1,324 +1,326 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1987, 1993 * The Regents of the University of California. * Copyright (c) 2005, 2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)malloc.h 8.5 (Berkeley) 5/3/95 * $FreeBSD$ */ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ #ifndef _STANDALONE #include #ifdef _KERNEL #include #endif #include #include #include #include #define MINALLOCSIZE UMA_SMALLEST_UNIT /* * Flags to memory allocation functions. */ #define M_NOWAIT 0x0001 /* do not block */ #define M_WAITOK 0x0002 /* ok to block */ #define M_NORECLAIM 0x0080 /* do not reclaim after failure */ #define M_ZERO 0x0100 /* bzero the allocation */ #define M_NOVM 0x0200 /* don't ask VM for pages */ #define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */ #define M_NODUMP 0x0800 /* don't dump pages in this allocation */ #define M_FIRSTFIT 0x1000 /* only for vmem, fast fit */ #define M_BESTFIT 0x2000 /* only for vmem, low fragmentation */ #define M_EXEC 0x4000 /* allocate executable space */ #define M_NEXTFIT 0x8000 /* only for vmem, follow cursor */ #define M_VERSION 2020110501 /* * Two malloc type structures are present: malloc_type, which is used by a * type owner to declare the type, and malloc_type_internal, which holds * malloc-owned statistics and other ABI-sensitive fields, such as the set of * malloc statistics indexed by the compile-time MAXCPU constant. * Applications should avoid introducing dependence on the allocator private * data layout and size. * * The malloc_type ks_next field is protected by malloc_mtx. Other fields in * malloc_type are static after initialization so unsynchronized. * * Statistics in malloc_type_stats are written only when holding a critical * section and running on the CPU associated with the index into the stat * array, but read lock-free resulting in possible (minor) races, which the * monitoring app should take into account. */ struct malloc_type_stats { uint64_t mts_memalloced; /* Bytes allocated on CPU. */ uint64_t mts_memfreed; /* Bytes freed on CPU. */ uint64_t mts_numallocs; /* Number of allocates on CPU. */ uint64_t mts_numfrees; /* number of frees on CPU. */ uint64_t mts_size; /* Bitmask of sizes allocated on CPU. */ uint64_t _mts_reserved1; /* Reserved field. */ uint64_t _mts_reserved2; /* Reserved field. */ uint64_t _mts_reserved3; /* Reserved field. */ }; _Static_assert(sizeof(struct malloc_type_stats) == 64, "allocations come from pcpu_zone_64"); /* * Index definitions for the mti_probes[] array. */ #define DTMALLOC_PROBE_MALLOC 0 #define DTMALLOC_PROBE_FREE 1 #define DTMALLOC_PROBE_MAX 2 struct malloc_type_internal { uint32_t mti_probes[DTMALLOC_PROBE_MAX]; /* DTrace probe ID array. */ u_char mti_zone; struct malloc_type_stats *mti_stats; u_long mti_spare[8]; }; /* * Public data structure describing a malloc type. */ struct malloc_type { struct malloc_type *ks_next; /* Next in global chain. */ u_long ks_version; /* Detect programmer error. */ const char *ks_shortdesc; /* Printable type name. */ struct malloc_type_internal ks_mti; }; /* * Statistics structure headers for user space. The kern.malloc sysctl * exposes a structure stream consisting of a stream header, then a series of * malloc type headers and statistics structures (quantity maxcpus). For * convenience, the kernel will provide the current value of maxcpus at the * head of the stream. */ #define MALLOC_TYPE_STREAM_VERSION 0x00000001 struct malloc_type_stream_header { uint32_t mtsh_version; /* Stream format version. */ uint32_t mtsh_maxcpus; /* Value of MAXCPU for stream. */ uint32_t mtsh_count; /* Number of records. */ uint32_t _mtsh_pad; /* Pad/reserved field. */ }; #define MALLOC_MAX_NAME 32 struct malloc_type_header { char mth_name[MALLOC_MAX_NAME]; }; #ifdef _KERNEL #define MALLOC_DEFINE(type, shortdesc, longdesc) \ struct malloc_type type[1] = { \ { \ .ks_next = NULL, \ .ks_version = M_VERSION, \ .ks_shortdesc = shortdesc, \ } \ }; \ SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_THIRD, malloc_init, \ type); \ SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \ malloc_uninit, type) #define MALLOC_DECLARE(type) \ extern struct malloc_type type[1] MALLOC_DECLARE(M_CACHE); MALLOC_DECLARE(M_DEVBUF); MALLOC_DECLARE(M_TEMP); /* * XXX this should be declared in , but that tends to fail * because is included in a header before the source file * has a chance to include to get MALLOC_DECLARE() defined. */ MALLOC_DECLARE(M_IOV); struct domainset; extern struct mtx malloc_mtx; /* * Function type used when iterating over the list of malloc types. */ typedef void malloc_type_list_func_t(struct malloc_type *, void *); void contigfree(void *addr, unsigned long size, struct malloc_type *type); void *contigmalloc(unsigned long size, struct malloc_type *type, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(6); void *contigmalloc_domainset(unsigned long size, struct malloc_type *type, struct domainset *ds, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(7); void free(void *addr, struct malloc_type *type); void zfree(void *addr, struct malloc_type *type); void *malloc(size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size(1); /* * Try to optimize malloc(..., ..., M_ZERO) allocations by doing zeroing in * place if the size is known at compilation time. * * Passing the flag down requires malloc to blindly zero the entire object. * In practice a lot of the zeroing can be avoided if most of the object * gets explicitly initialized after the allocation. Letting the compiler * zero in place gives it the opportunity to take advantage of this state. * * Note that the operation is only applicable if both flags and size are * known at compilation time. If M_ZERO is passed but M_WAITOK is not, the * allocation can fail and a NULL check is needed. However, if M_WAITOK is * passed we know the allocation must succeed and the check can be elided. * * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO); * if (((flags) & M_WAITOK) != 0 || _malloc_item != NULL) * bzero(_malloc_item, _size); * * If the flag is set, the compiler knows the left side is always true, * therefore the entire statement is true and the callsite is: * * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO); * bzero(_malloc_item, _size); * * If the flag is not set, the compiler knows the left size is always false * and the NULL check is needed, therefore the callsite is: * * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO); * if (_malloc_item != NULL) * bzero(_malloc_item, _size); * * The implementation is a macro because of what appears to be a clang 6 bug: * an inline function variant ended up being compiled to a mere malloc call * regardless of argument. gcc generates expected code (like the above). */ #define malloc(size, type, flags) ({ \ void *_malloc_item; \ size_t _size = (size); \ if (__builtin_constant_p(size) && __builtin_constant_p(flags) &&\ ((flags) & M_ZERO) != 0) { \ _malloc_item = malloc(_size, type, (flags) &~ M_ZERO); \ if (((flags) & M_WAITOK) != 0 || \ __predict_true(_malloc_item != NULL)) \ bzero(_malloc_item, _size); \ } else { \ _malloc_item = malloc(_size, type, flags); \ } \ _malloc_item; \ }) void *malloc_domainset(size_t size, struct malloc_type *type, struct domainset *ds, int flags) __malloc_like __result_use_check __alloc_size(1); void *mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size2(1, 2); void *mallocarray_domainset(size_t nmemb, size_t size, struct malloc_type *type, struct domainset *ds, int flags) __malloc_like __result_use_check __alloc_size2(1, 2); void *malloc_exec(size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size(1); void *malloc_domainset_exec(size_t size, struct malloc_type *type, struct domainset *ds, int flags) __malloc_like __result_use_check __alloc_size(1); void malloc_init(void *); void malloc_type_allocated(struct malloc_type *type, unsigned long size); void malloc_type_freed(struct malloc_type *type, unsigned long size); void malloc_type_list(malloc_type_list_func_t *, void *); void malloc_uninit(void *); size_t malloc_size(size_t); size_t malloc_usable_size(const void *); void *realloc(void *addr, size_t size, struct malloc_type *type, int flags) __result_use_check __alloc_size(2); void *reallocf(void *addr, size_t size, struct malloc_type *type, int flags) __result_use_check __alloc_size(2); +void *malloc_aligned(size_t size, size_t align, struct malloc_type *type, + int flags) __malloc_like __result_use_check __alloc_size(1); void *malloc_domainset_aligned(size_t size, size_t align, struct malloc_type *mtp, struct domainset *ds, int flags) __malloc_like __result_use_check __alloc_size(1); struct malloc_type *malloc_desc2type(const char *desc); /* * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW */ #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 8 / 2)) static inline bool WOULD_OVERFLOW(size_t nmemb, size_t size) { return ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && nmemb > 0 && __SIZE_T_MAX / nmemb < size); } #undef MUL_NO_OVERFLOW #endif /* _KERNEL */ #else /* * The native stand malloc / free interface we're mapping to */ extern void Free(void *p, const char *file, int line); extern void *Malloc(size_t bytes, const char *file, int line); /* * Minimal standalone malloc implementation / environment. None of the * flags mean anything and there's no need declare malloc types. * Define the simple alloc / free routines in terms of Malloc and * Free. None of the kernel features that this stuff disables are needed. */ #define M_WAITOK 1 #define M_ZERO 0 #define M_NOWAIT 2 #define MALLOC_DECLARE(x) #define kmem_zalloc(size, flags) ({ \ void *p = Malloc((size), __FILE__, __LINE__); \ if (p == NULL && (flags & M_WAITOK) != 0) \ panic("Could not malloc %zd bytes with M_WAITOK from %s line %d", \ (size_t)size, __FILE__, __LINE__); \ p; \ }) #define kmem_free(p, size) Free(p, __FILE__, __LINE__) /* * ZFS mem.h define that's the OpenZFS porting layer way of saying * M_WAITOK. Given the above, it will also be a nop. */ #define KM_SLEEP M_WAITOK #define KM_NOSLEEP M_NOWAIT #endif /* _STANDALONE */ #endif /* !_SYS_MALLOC_H_ */