diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index b02f33dddf1b..56d92687a693 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -1,2493 +1,2503 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008, Jeffrey Roberson * All rights reserved. * * Copyright (c) 2008 Nokia Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" +#include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* DDB */ /* * cpusets provide a mechanism for creating and manipulating sets of * processors for the purpose of constraining the scheduling of threads to * specific processors. * * Each process belongs to an identified set, by default this is set 1. Each * thread may further restrict the cpus it may run on to a subset of this * named set. This creates an anonymous set which other threads and processes * may not join by number. * * The named set is referred to herein as the 'base' set to avoid ambiguity. * This set is usually a child of a 'root' set while the anonymous set may * simply be referred to as a mask. In the syscall api these are referred to * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. * * Threads inherit their set from their creator whether it be anonymous or * not. This means that anonymous sets are immutable because they may be * shared. To modify an anonymous set a new set is created with the desired * mask and the same parent as the existing anonymous set. This gives the * illusion of each thread having a private mask. * * Via the syscall apis a user may ask to retrieve or modify the root, base, * or mask that is discovered via a pid, tid, or setid. Modifying a set * modifies all numbered and anonymous child sets to comply with the new mask. * Modifying a pid or tid's mask applies only to that tid but must still * exist within the assigned parent set. * * A thread may not be assigned to a group separate from other threads in * the process. This is to remove ambiguity when the setid is queried with * a pid argument. There is no other technical limitation. * * This somewhat complex arrangement is intended to make it easy for * applications to query available processors and bind their threads to * specific processors while also allowing administrators to dynamically * reprovision by changing sets which apply to groups of processes. * * A simple application should not concern itself with sets at all and * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id * meaning 'curthread'. It may query available cpus for that tid with a * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). */ LIST_HEAD(domainlist, domainset); struct domainset __read_mostly domainset_firsttouch; struct domainset __read_mostly domainset_fixed[MAXMEMDOM]; struct domainset __read_mostly domainset_interleave; struct domainset __read_mostly domainset_prefer[MAXMEMDOM]; struct domainset __read_mostly domainset_roundrobin; static uma_zone_t cpuset_zone; static uma_zone_t domainset_zone; static struct mtx cpuset_lock; static struct setlist cpuset_ids; static struct domainlist cpuset_domains; static struct unrhdr *cpuset_unr; static struct cpuset *cpuset_zero, *cpuset_default, *cpuset_kernel; static struct domainset *domainset0, *domainset2; /* Return the size of cpuset_t at the kernel level */ SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); cpuset_t *cpuset_root; cpuset_t cpuset_domain[MAXMEMDOM]; static int domainset_valid(const struct domainset *, const struct domainset *); /* * Find the first non-anonymous set starting from 'set'. */ static struct cpuset * cpuset_getbase(struct cpuset *set) { if (set->cs_id == CPUSET_INVALID) set = set->cs_parent; return (set); } /* * Walks up the tree from 'set' to find the root. */ static struct cpuset * cpuset_getroot(struct cpuset *set) { while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL) set = set->cs_parent; return (set); } /* * Acquire a reference to a cpuset, all pointers must be tracked with refs. */ struct cpuset * cpuset_ref(struct cpuset *set) { refcount_acquire(&set->cs_ref); return (set); } /* * Walks up the tree from 'set' to find the root. Returns the root * referenced. */ static struct cpuset * cpuset_refroot(struct cpuset *set) { return (cpuset_ref(cpuset_getroot(set))); } /* * Find the first non-anonymous set starting from 'set'. Returns this set * referenced. May return the passed in set with an extra ref if it is * not anonymous. */ static struct cpuset * cpuset_refbase(struct cpuset *set) { return (cpuset_ref(cpuset_getbase(set))); } /* * Release a reference in a context where it is safe to allocate. */ void cpuset_rel(struct cpuset *set) { cpusetid_t id; if (refcount_release_if_not_last(&set->cs_ref)) return; mtx_lock_spin(&cpuset_lock); if (!refcount_release(&set->cs_ref)) { mtx_unlock_spin(&cpuset_lock); return; } LIST_REMOVE(set, cs_siblings); id = set->cs_id; if (id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); mtx_unlock_spin(&cpuset_lock); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); if (id != CPUSET_INVALID) free_unr(cpuset_unr, id); } /* * Deferred release must be used when in a context that is not safe to * allocate/free. This places any unreferenced sets on the list 'head'. */ static void cpuset_rel_defer(struct setlist *head, struct cpuset *set) { if (refcount_release_if_not_last(&set->cs_ref)) return; mtx_lock_spin(&cpuset_lock); if (!refcount_release(&set->cs_ref)) { mtx_unlock_spin(&cpuset_lock); return; } LIST_REMOVE(set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); LIST_INSERT_HEAD(head, set, cs_link); mtx_unlock_spin(&cpuset_lock); } /* * Complete a deferred release. Removes the set from the list provided to * cpuset_rel_defer. */ static void cpuset_rel_complete(struct cpuset *set) { cpusetid_t id; id = set->cs_id; LIST_REMOVE(set, cs_link); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); if (id != CPUSET_INVALID) free_unr(cpuset_unr, id); } /* * Find a set based on an id. Returns it with a ref. */ static struct cpuset * cpuset_lookup(cpusetid_t setid, struct thread *td) { struct cpuset *set; if (setid == CPUSET_INVALID) return (NULL); mtx_lock_spin(&cpuset_lock); LIST_FOREACH(set, &cpuset_ids, cs_link) if (set->cs_id == setid) break; if (set) cpuset_ref(set); mtx_unlock_spin(&cpuset_lock); KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); if (set != NULL && jailed(td->td_ucred)) { struct cpuset *jset, *tset; jset = td->td_ucred->cr_prison->pr_cpuset; for (tset = set; tset != NULL; tset = tset->cs_parent) if (tset == jset) break; if (tset == NULL) { cpuset_rel(set); set = NULL; } } return (set); } /* * Initialize a set in the space provided in 'set' with the provided parameters. * The set is returned with a single ref. May return EDEADLK if the set * will have no valid cpu based on restrictions from the parent. */ static int cpuset_init(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask, struct domainset *domain, cpusetid_t id) { if (domain == NULL) domain = parent->cs_domain; if (mask == NULL) mask = &parent->cs_mask; if (!CPU_OVERLAP(&parent->cs_mask, mask)) return (EDEADLK); /* The domain must be prepared ahead of time. */ if (!domainset_valid(parent->cs_domain, domain)) return (EDEADLK); CPU_COPY(mask, &set->cs_mask); LIST_INIT(&set->cs_children); refcount_init(&set->cs_ref, 1); set->cs_flags = 0; mtx_lock_spin(&cpuset_lock); set->cs_domain = domain; CPU_AND(&set->cs_mask, &set->cs_mask, &parent->cs_mask); set->cs_id = id; set->cs_parent = cpuset_ref(parent); LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); mtx_unlock_spin(&cpuset_lock); return (0); } /* * Create a new non-anonymous set with the requested parent and mask. May * return failures if the mask is invalid or a new number can not be * allocated. * * If *setp is not NULL, then it will be used as-is. The caller must take * into account that *setp will be inserted at the head of cpuset_ids and * plan any potentially conflicting cs_link usage accordingly. */ static int cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) { struct cpuset *set; cpusetid_t id; int error; bool dofree; id = alloc_unr(cpuset_unr); if (id == -1) return (ENFILE); dofree = (*setp == NULL); if (*setp != NULL) set = *setp; else *setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); error = cpuset_init(set, parent, mask, NULL, id); if (error == 0) return (0); free_unr(cpuset_unr, id); if (dofree) uma_zfree(cpuset_zone, set); return (error); } static void cpuset_freelist_add(struct setlist *list, int count) { struct cpuset *set; int i; for (i = 0; i < count; i++) { set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK); LIST_INSERT_HEAD(list, set, cs_link); } } static void cpuset_freelist_init(struct setlist *list, int count) { LIST_INIT(list); cpuset_freelist_add(list, count); } static void cpuset_freelist_free(struct setlist *list) { struct cpuset *set; while ((set = LIST_FIRST(list)) != NULL) { LIST_REMOVE(set, cs_link); uma_zfree(cpuset_zone, set); } } static void domainset_freelist_add(struct domainlist *list, int count) { struct domainset *set; int i; for (i = 0; i < count; i++) { set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK); LIST_INSERT_HEAD(list, set, ds_link); } } static void domainset_freelist_init(struct domainlist *list, int count) { LIST_INIT(list); domainset_freelist_add(list, count); } static void domainset_freelist_free(struct domainlist *list) { struct domainset *set; while ((set = LIST_FIRST(list)) != NULL) { LIST_REMOVE(set, ds_link); uma_zfree(domainset_zone, set); } } /* Copy a domainset preserving mask and policy. */ static void domainset_copy(const struct domainset *from, struct domainset *to) { DOMAINSET_COPY(&from->ds_mask, &to->ds_mask); to->ds_policy = from->ds_policy; to->ds_prefer = from->ds_prefer; } /* Return 1 if mask and policy are equal, otherwise 0. */ static int domainset_equal(const struct domainset *one, const struct domainset *two) { return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 && one->ds_policy == two->ds_policy && one->ds_prefer == two->ds_prefer); } /* Return 1 if child is a valid subset of parent. */ static int domainset_valid(const struct domainset *parent, const struct domainset *child) { if (child->ds_policy != DOMAINSET_POLICY_PREFER) return (DOMAINSET_SUBSET(&parent->ds_mask, &child->ds_mask)); return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); } static int domainset_restrict(const struct domainset *parent, const struct domainset *child) { if (child->ds_policy != DOMAINSET_POLICY_PREFER) return (DOMAINSET_OVERLAP(&parent->ds_mask, &child->ds_mask)); return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); } /* * Lookup or create a domainset. The key is provided in ds_mask and * ds_policy. If the domainset does not yet exist the storage in * 'domain' is used to insert. Otherwise this storage is freed to the * domainset_zone and the existing domainset is returned. */ static struct domainset * _domainset_create(struct domainset *domain, struct domainlist *freelist) { struct domainset *ndomain; int i, j; KASSERT(domain->ds_cnt <= vm_ndomains, ("invalid domain count in domainset %p", domain)); KASSERT(domain->ds_policy != DOMAINSET_POLICY_PREFER || domain->ds_prefer < vm_ndomains, ("invalid preferred domain in domains %p", domain)); mtx_lock_spin(&cpuset_lock); LIST_FOREACH(ndomain, &cpuset_domains, ds_link) if (domainset_equal(ndomain, domain)) break; /* * If the domain does not yet exist we insert it and initialize * various iteration helpers which are not part of the key. */ if (ndomain == NULL) { LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link); domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask); for (i = 0, j = 0; i < DOMAINSET_FLS(&domain->ds_mask); i++) if (DOMAINSET_ISSET(i, &domain->ds_mask)) domain->ds_order[j++] = i; } mtx_unlock_spin(&cpuset_lock); if (ndomain == NULL) return (domain); if (freelist != NULL) LIST_INSERT_HEAD(freelist, domain, ds_link); else uma_zfree(domainset_zone, domain); return (ndomain); } /* * Are any of the domains in the mask empty? If so, silently * remove them and update the domainset accordingly. If only empty * domains are present, we must return failure. */ static bool domainset_empty_vm(struct domainset *domain) { domainset_t empty; int i, j; DOMAINSET_ZERO(&empty); for (i = 0; i < vm_ndomains; i++) if (VM_DOMAIN_EMPTY(i)) DOMAINSET_SET(i, &empty); if (DOMAINSET_SUBSET(&empty, &domain->ds_mask)) return (true); /* Remove empty domains from the set and recompute. */ DOMAINSET_ANDNOT(&domain->ds_mask, &empty); domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask); for (i = j = 0; i < DOMAINSET_FLS(&domain->ds_mask); i++) if (DOMAINSET_ISSET(i, &domain->ds_mask)) domain->ds_order[j++] = i; /* Convert a PREFER policy referencing an empty domain to RR. */ if (domain->ds_policy == DOMAINSET_POLICY_PREFER && DOMAINSET_ISSET(domain->ds_prefer, &empty)) { domain->ds_policy = DOMAINSET_POLICY_ROUNDROBIN; domain->ds_prefer = -1; } return (false); } /* * Create or lookup a domainset based on the key held in 'domain'. */ struct domainset * domainset_create(const struct domainset *domain) { struct domainset *ndomain; /* * Validate the policy. It must specify a useable policy number with * only valid domains. Preferred must include the preferred domain * in the mask. */ if (domain->ds_policy <= DOMAINSET_POLICY_INVALID || domain->ds_policy > DOMAINSET_POLICY_MAX) return (NULL); if (domain->ds_policy == DOMAINSET_POLICY_PREFER && !DOMAINSET_ISSET(domain->ds_prefer, &domain->ds_mask)) return (NULL); if (!DOMAINSET_SUBSET(&domainset0->ds_mask, &domain->ds_mask)) return (NULL); ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO); domainset_copy(domain, ndomain); return _domainset_create(ndomain, NULL); } /* * Update thread domainset pointers. */ static void domainset_notify(void) { struct thread *td; struct proc *p; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); td->td_domain.dr_policy = td->td_cpuset->cs_domain; thread_unlock(td); } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); kernel_object->domain.dr_policy = cpuset_kernel->cs_domain; } /* * Create a new set that is a subset of a parent. */ static struct domainset * domainset_shadow(const struct domainset *pdomain, const struct domainset *domain, struct domainlist *freelist) { struct domainset *ndomain; ndomain = LIST_FIRST(freelist); LIST_REMOVE(ndomain, ds_link); /* * Initialize the key from the request. */ domainset_copy(domain, ndomain); /* * Restrict the key by the parent. */ DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask); return _domainset_create(ndomain, freelist); } /* * Recursively check for errors that would occur from applying mask to * the tree of sets starting at 'set'. Checks for sets that would become * empty as well as RDONLY flags. */ static int cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int augment_mask) { struct cpuset *nset; cpuset_t newmask; int error; mtx_assert(&cpuset_lock, MA_OWNED); if (set->cs_flags & CPU_SET_RDONLY) return (EPERM); if (augment_mask) { CPU_AND(&newmask, &set->cs_mask, mask); } else CPU_COPY(mask, &newmask); if (CPU_EMPTY(&newmask)) return (EDEADLK); error = 0; LIST_FOREACH(nset, &set->cs_children, cs_siblings) if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) break; return (error); } /* * Applies the mask 'mask' without checking for empty sets or permissions. */ static void cpuset_update(struct cpuset *set, cpuset_t *mask) { struct cpuset *nset; mtx_assert(&cpuset_lock, MA_OWNED); CPU_AND(&set->cs_mask, &set->cs_mask, mask); LIST_FOREACH(nset, &set->cs_children, cs_siblings) cpuset_update(nset, &set->cs_mask); return; } /* * Modify the set 'set' to use a copy of the mask provided. Apply this new * mask to restrict all children in the tree. Checks for validity before * applying the changes. */ static int cpuset_modify(struct cpuset *set, cpuset_t *mask) { struct cpuset *root; int error; error = priv_check(curthread, PRIV_SCHED_CPUSET); if (error) return (error); /* * In case we are called from within the jail, * we do not allow modifying the dedicated root * cpuset of the jail but may still allow to * change child sets, including subordinate jails' * roots. */ if ((set->cs_flags & CPU_SET_ROOT) != 0 && jailed(curthread->td_ucred) && set == curthread->td_ucred->cr_prison->pr_cpuset) return (EPERM); /* * Verify that we have access to this set of * cpus. */ if ((set->cs_flags & (CPU_SET_ROOT | CPU_SET_RDONLY)) == CPU_SET_ROOT) { KASSERT(set->cs_parent != NULL, ("jail.cpuset=%d is not a proper child of parent jail's root.", set->cs_id)); /* * cpuset_getroot() cannot work here due to how top-level jail * roots are constructed. Top-level jails are parented to * thread0's cpuset (i.e. cpuset 1) rather than the system root. */ root = set->cs_parent; } else { root = cpuset_getroot(set); } mtx_lock_spin(&cpuset_lock); if (root && !CPU_SUBSET(&root->cs_mask, mask)) { error = EINVAL; goto out; } error = cpuset_testupdate(set, mask, 0); if (error) goto out; CPU_COPY(mask, &set->cs_mask); cpuset_update(set, mask); out: mtx_unlock_spin(&cpuset_lock); return (error); } /* * Recursively check for errors that would occur from applying mask to * the tree of sets starting at 'set'. Checks for sets that would become * empty as well as RDONLY flags. */ static int cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset, struct domainset *orig, int *count, int augment_mask __unused) { struct cpuset *nset; struct domainset *domain; struct domainset newset; int error; mtx_assert(&cpuset_lock, MA_OWNED); if (set->cs_flags & CPU_SET_RDONLY) return (EPERM); domain = set->cs_domain; domainset_copy(domain, &newset); if (!domainset_equal(domain, orig)) { if (!domainset_restrict(domain, dset)) return (EDEADLK); DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask); /* Count the number of domains that are changing. */ (*count)++; } error = 0; LIST_FOREACH(nset, &set->cs_children, cs_siblings) if ((error = cpuset_testupdate_domain(nset, &newset, domain, count, 1)) != 0) break; return (error); } /* * Applies the mask 'mask' without checking for empty sets or permissions. */ static void cpuset_update_domain(struct cpuset *set, struct domainset *domain, struct domainset *orig, struct domainlist *domains) { struct cpuset *nset; mtx_assert(&cpuset_lock, MA_OWNED); /* * If this domainset has changed from the parent we must calculate * a new set. Otherwise it simply inherits from the parent. When * we inherit from the parent we get a new mask and policy. If the * set is modified from the parent we keep the policy and only * update the mask. */ if (set->cs_domain != orig) { orig = set->cs_domain; set->cs_domain = domainset_shadow(domain, orig, domains); } else set->cs_domain = domain; LIST_FOREACH(nset, &set->cs_children, cs_siblings) cpuset_update_domain(nset, set->cs_domain, orig, domains); return; } /* * Modify the set 'set' to use a copy the domainset provided. Apply this new * mask to restrict all children in the tree. Checks for validity before * applying the changes. */ static int cpuset_modify_domain(struct cpuset *set, struct domainset *domain) { struct domainlist domains; struct domainset temp; struct domainset *dset; struct cpuset *root; int ndomains, needed; int error; error = priv_check(curthread, PRIV_SCHED_CPUSET); if (error) return (error); /* * In case we are called from within the jail * we do not allow modifying the dedicated root * cpuset of the jail but may still allow to * change child sets. */ if (jailed(curthread->td_ucred) && set->cs_flags & CPU_SET_ROOT) return (EPERM); domainset_freelist_init(&domains, 0); domain = domainset_create(domain); ndomains = 0; mtx_lock_spin(&cpuset_lock); for (;;) { root = cpuset_getroot(set); dset = root->cs_domain; /* * Verify that we have access to this set of domains. */ if (!domainset_valid(dset, domain)) { error = EINVAL; goto out; } /* * If applying prefer we keep the current set as the fallback. */ if (domain->ds_policy == DOMAINSET_POLICY_PREFER) DOMAINSET_COPY(&set->cs_domain->ds_mask, &domain->ds_mask); /* * Determine whether we can apply this set of domains and * how many new domain structures it will require. */ domainset_copy(domain, &temp); needed = 0; error = cpuset_testupdate_domain(set, &temp, set->cs_domain, &needed, 0); if (error) goto out; if (ndomains >= needed) break; /* Dropping the lock; we'll need to re-evaluate again. */ mtx_unlock_spin(&cpuset_lock); domainset_freelist_add(&domains, needed - ndomains); ndomains = needed; mtx_lock_spin(&cpuset_lock); } dset = set->cs_domain; cpuset_update_domain(set, domain, dset, &domains); out: mtx_unlock_spin(&cpuset_lock); domainset_freelist_free(&domains); if (error == 0) domainset_notify(); return (error); } /* * Resolve the 'which' parameter of several cpuset apis. * * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also * checks for permission via p_cansched(). * * For WHICH_SET returns a valid set with a new reference. * * -1 may be supplied for any argument to mean the current proc/thread or * the base set of the current thread. May fail with ESRCH/EPERM. */ int cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, struct cpuset **setp) { struct cpuset *set; struct thread *td; struct proc *p; int error; *pp = p = NULL; *tdp = td = NULL; *setp = set = NULL; switch (which) { case CPU_WHICH_PID: if (id == -1) { PROC_LOCK(curproc); p = curproc; break; } if ((p = pfind(id)) == NULL) return (ESRCH); break; case CPU_WHICH_TID: if (id == -1) { PROC_LOCK(curproc); p = curproc; td = curthread; break; } td = tdfind(id, -1); if (td == NULL) return (ESRCH); p = td->td_proc; break; case CPU_WHICH_CPUSET: if (id == -1) { thread_lock(curthread); set = cpuset_refbase(curthread->td_cpuset); thread_unlock(curthread); } else set = cpuset_lookup(id, curthread); if (set) { *setp = set; return (0); } return (ESRCH); case CPU_WHICH_JAIL: { /* Find `set' for prison with given id. */ struct prison *pr; sx_slock(&allprison_lock); pr = prison_find_child(curthread->td_ucred->cr_prison, id); sx_sunlock(&allprison_lock); if (pr == NULL) return (ESRCH); cpuset_ref(pr->pr_cpuset); *setp = pr->pr_cpuset; mtx_unlock(&pr->pr_mtx); return (0); } case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: return (0); default: return (EINVAL); } error = p_cansched(curthread, p); if (error) { PROC_UNLOCK(p); return (error); } if (td == NULL) td = FIRST_THREAD_IN_PROC(p); *pp = p; *tdp = td; return (0); } static int cpuset_testshadow(struct cpuset *set, const cpuset_t *mask, const struct domainset *domain) { struct cpuset *parent; struct domainset *dset; parent = cpuset_getbase(set); /* * If we are restricting a cpu mask it must be a subset of the * parent or invalid CPUs have been specified. */ if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask)) return (EINVAL); /* * If we are restricting a domain mask it must be a subset of the * parent or invalid domains have been specified. */ dset = parent->cs_domain; if (domain != NULL && !domainset_valid(dset, domain)) return (EINVAL); return (0); } /* * Create an anonymous set with the provided mask in the space provided by * 'nset'. If the passed in set is anonymous we use its parent otherwise * the new set is a child of 'set'. */ static int cpuset_shadow(struct cpuset *set, struct cpuset **nsetp, const cpuset_t *mask, const struct domainset *domain, struct setlist *cpusets, struct domainlist *domains) { struct cpuset *parent; struct cpuset *nset; struct domainset *dset; struct domainset *d; int error; error = cpuset_testshadow(set, mask, domain); if (error) return (error); parent = cpuset_getbase(set); dset = parent->cs_domain; if (mask == NULL) mask = &set->cs_mask; if (domain != NULL) d = domainset_shadow(dset, domain, domains); else d = set->cs_domain; nset = LIST_FIRST(cpusets); error = cpuset_init(nset, parent, mask, d, CPUSET_INVALID); if (error == 0) { LIST_REMOVE(nset, cs_link); *nsetp = nset; } return (error); } static struct cpuset * cpuset_update_thread(struct thread *td, struct cpuset *nset) { struct cpuset *tdset; tdset = td->td_cpuset; td->td_cpuset = nset; td->td_domain.dr_policy = nset->cs_domain; sched_affinity(td); return (tdset); } static int cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask, struct domainset *domain) { struct cpuset *parent; parent = cpuset_getbase(tdset); if (mask == NULL) mask = &tdset->cs_mask; if (domain == NULL) domain = tdset->cs_domain; return cpuset_testshadow(parent, mask, domain); } static int cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask, struct domainset *domain, struct cpuset **nsetp, struct setlist *freelist, struct domainlist *domainlist) { struct cpuset *parent; parent = cpuset_getbase(tdset); if (mask == NULL) mask = &tdset->cs_mask; if (domain == NULL) domain = tdset->cs_domain; return cpuset_shadow(parent, nsetp, mask, domain, freelist, domainlist); } static int cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set, cpuset_t *mask, struct domainset *domain) { struct cpuset *parent; parent = cpuset_getbase(tdset); /* * If the thread restricted its mask then apply that same * restriction to the new set, otherwise take it wholesale. */ if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) { CPU_AND(mask, &tdset->cs_mask, &set->cs_mask); } else CPU_COPY(&set->cs_mask, mask); /* * If the thread restricted the domain then we apply the * restriction to the new set but retain the policy. */ if (tdset->cs_domain != parent->cs_domain) { domainset_copy(tdset->cs_domain, domain); DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask); } else domainset_copy(set->cs_domain, domain); if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask)) return (EDEADLK); return (0); } static int cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set) { struct domainset domain; cpuset_t mask; if (tdset->cs_id != CPUSET_INVALID) return (0); return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); } static int cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set, struct cpuset **nsetp, struct setlist *freelist, struct domainlist *domainlist) { struct domainset domain; cpuset_t mask; int error; /* * If we're replacing on a thread that has not constrained the * original set we can simply accept the new set. */ if (tdset->cs_id != CPUSET_INVALID) { *nsetp = cpuset_ref(set); return (0); } error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); if (error) return (error); return cpuset_shadow(set, nsetp, &mask, &domain, freelist, domainlist); } static int cpuset_setproc_newbase(struct thread *td, struct cpuset *set, struct cpuset *nroot, struct cpuset **nsetp, struct setlist *cpusets, struct domainlist *domainlist) { struct domainset ndomain; cpuset_t nmask; struct cpuset *pbase; int error; pbase = cpuset_getbase(td->td_cpuset); /* Copy process mask, then further apply the new root mask. */ CPU_AND(&nmask, &pbase->cs_mask, &nroot->cs_mask); domainset_copy(pbase->cs_domain, &ndomain); DOMAINSET_AND(&ndomain.ds_mask, &set->cs_domain->ds_mask); /* Policy is too restrictive, will not work. */ if (CPU_EMPTY(&nmask) || DOMAINSET_EMPTY(&ndomain.ds_mask)) return (EDEADLK); /* * Remove pbase from the freelist in advance, it'll be pushed to * cpuset_ids on success. We assume here that cpuset_create() will not * touch pbase on failure, and we just enqueue it back to the freelist * to remain in a consistent state. */ pbase = LIST_FIRST(cpusets); LIST_REMOVE(pbase, cs_link); error = cpuset_create(&pbase, set, &nmask); if (error != 0) { LIST_INSERT_HEAD(cpusets, pbase, cs_link); return (error); } /* Duplicates some work from above... oh well. */ pbase->cs_domain = domainset_shadow(set->cs_domain, &ndomain, domainlist); *nsetp = pbase; return (0); } /* * Handle four cases for updating an entire process. * * 1) Set is non-null and the process is not rebasing onto a new root. This * reparents all anonymous sets to the provided set and replaces all * non-anonymous td_cpusets with the provided set. * 2) Set is non-null and the process is rebasing onto a new root. This * creates a new base set if the process previously had its own base set, * then reparents all anonymous sets either to that set or the provided set * if one was not created. Non-anonymous sets are similarly replaced. * 3) Mask is non-null. This replaces or creates anonymous sets for every * thread with the existing base as a parent. * 4) domain is non-null. This creates anonymous sets for every thread * and replaces the domain set. * * This is overly complicated because we can't allocate while holding a * spinlock and spinlocks must be held while changing and examining thread * state. */ static int cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask, struct domainset *domain, bool rebase) { struct setlist freelist; struct setlist droplist; struct domainlist domainlist; struct cpuset *base, *nset, *nroot, *tdroot; struct thread *td; struct proc *p; int needed; int nfree; int error; /* * The algorithm requires two passes due to locking considerations. * * 1) Lookup the process and acquire the locks in the required order. * 2) If enough cpusets have not been allocated release the locks and * allocate them. Loop. */ cpuset_freelist_init(&freelist, 1); domainset_freelist_init(&domainlist, 1); nfree = 1; LIST_INIT(&droplist); nfree = 0; base = set; nroot = NULL; if (set != NULL) nroot = cpuset_getroot(set); for (;;) { error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); if (error) goto out; tdroot = cpuset_getroot(td->td_cpuset); needed = p->p_numthreads; if (set != NULL && rebase && tdroot != nroot) needed++; if (nfree >= needed) break; PROC_UNLOCK(p); if (nfree < needed) { cpuset_freelist_add(&freelist, needed - nfree); domainset_freelist_add(&domainlist, needed - nfree); nfree = needed; } } PROC_LOCK_ASSERT(p, MA_OWNED); /* * If we're changing roots and the root set is what has been specified * as the parent, then we'll check if the process was previously using * the root set and, if it wasn't, create a new base with the process's * mask applied to it. * * If the new root is incompatible with the existing mask, then we allow * the process to take on the new root if and only if they have * privilege to widen their mask anyways. Unprivileged processes get * rejected with EDEADLK. */ if (set != NULL && rebase && nroot != tdroot) { cpusetid_t base_id, root_id; root_id = td->td_ucred->cr_prison->pr_cpuset->cs_id; base_id = cpuset_getbase(td->td_cpuset)->cs_id; if (base_id != root_id) { error = cpuset_setproc_newbase(td, set, nroot, &base, &freelist, &domainlist); if (error == EDEADLK && priv_check(td, PRIV_SCHED_CPUSET) == 0) error = 0; if (error != 0) goto unlock_out; } } /* * Now that the appropriate locks are held and we have enough cpusets, * make sure the operation will succeed before applying changes. The * proc lock prevents td_cpuset from changing between calls. */ error = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (set != NULL) error = cpuset_setproc_test_setthread(td->td_cpuset, base); else error = cpuset_setproc_test_maskthread(td->td_cpuset, mask, domain); thread_unlock(td); if (error) goto unlock_out; } /* * Replace each thread's cpuset while using deferred release. We * must do this because the thread lock must be held while operating * on the thread and this limits the type of operations allowed. */ FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (set != NULL) error = cpuset_setproc_setthread(td->td_cpuset, base, &nset, &freelist, &domainlist); else error = cpuset_setproc_maskthread(td->td_cpuset, mask, domain, &nset, &freelist, &domainlist); if (error) { thread_unlock(td); break; } cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset)); thread_unlock(td); } unlock_out: PROC_UNLOCK(p); out: if (base != NULL && base != set) cpuset_rel(base); while ((nset = LIST_FIRST(&droplist)) != NULL) cpuset_rel_complete(nset); cpuset_freelist_free(&freelist); domainset_freelist_free(&domainlist); return (error); } static int bitset_strprint(char *buf, size_t bufsiz, const struct bitset *set, int setlen) { size_t bytes; int i, once; char *p; once = 0; p = buf; for (i = 0; i < __bitset_words(setlen); i++) { if (once != 0) { if (bufsiz < 1) return (0); *p = ','; p++; bufsiz--; } else once = 1; if (bufsiz < sizeof(__STRING(ULONG_MAX))) return (0); bytes = snprintf(p, bufsiz, "%lx", set->__bits[i]); p += bytes; bufsiz -= bytes; } return (p - buf); } static int bitset_strscan(struct bitset *set, int setlen, const char *buf) { int i, ret; const char *p; BIT_ZERO(setlen, set); p = buf; for (i = 0; i < __bitset_words(setlen); i++) { if (*p == ',') { p++; continue; } ret = sscanf(p, "%lx", &set->__bits[i]); if (ret == 0 || ret == -1) break; while (isxdigit(*p)) p++; } return (p - buf); } /* * Return a string representing a valid layout for a cpuset_t object. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ char * cpusetobj_strprint(char *buf, const cpuset_t *set) { bitset_strprint(buf, CPUSETBUFSIZ, (const struct bitset *)set, CPU_SETSIZE); return (buf); } /* * Build a valid cpuset_t object from a string representation. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ int cpusetobj_strscan(cpuset_t *set, const char *buf) { char p; if (strlen(buf) > CPUSETBUFSIZ - 1) return (-1); p = buf[bitset_strscan((struct bitset *)set, CPU_SETSIZE, buf)]; if (p != '\0') return (-1); return (0); } /* * Handle a domainset specifier in the sysctl tree. A poiner to a pointer to * a domainset is in arg1. If the user specifies a valid domainset the * pointer is updated. * * Format is: * hex mask word 0,hex mask word 1,...:decimal policy:decimal preferred */ int sysctl_handle_domainset(SYSCTL_HANDLER_ARGS) { char buf[DOMAINSETBUFSIZ]; struct domainset *dset; struct domainset key; int policy, prefer, error; char *p; dset = *(struct domainset **)arg1; error = 0; if (dset != NULL) { p = buf + bitset_strprint(buf, DOMAINSETBUFSIZ, (const struct bitset *)&dset->ds_mask, DOMAINSET_SETSIZE); sprintf(p, ":%d:%d", dset->ds_policy, dset->ds_prefer); } else sprintf(buf, ""); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); /* * Read in and validate the string. */ memset(&key, 0, sizeof(key)); p = &buf[bitset_strscan((struct bitset *)&key.ds_mask, DOMAINSET_SETSIZE, buf)]; if (p == buf) return (EINVAL); if (sscanf(p, ":%d:%d", &policy, &prefer) != 2) return (EINVAL); key.ds_policy = policy; key.ds_prefer = prefer; /* Domainset_create() validates the policy.*/ dset = domainset_create(&key); if (dset == NULL) return (EINVAL); *(struct domainset **)arg1 = dset; return (error); } /* * Apply an anonymous mask or a domain to a single thread. */ static int _cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain) { struct setlist cpusets; struct domainlist domainlist; struct cpuset *nset; struct cpuset *set; struct thread *td; struct proc *p; int error; cpuset_freelist_init(&cpusets, 1); domainset_freelist_init(&domainlist, domain != NULL); error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); if (error) goto out; set = NULL; thread_lock(td); error = cpuset_shadow(td->td_cpuset, &nset, mask, domain, &cpusets, &domainlist); if (error == 0) set = cpuset_update_thread(td, nset); thread_unlock(td); PROC_UNLOCK(p); if (set) cpuset_rel(set); out: cpuset_freelist_free(&cpusets); domainset_freelist_free(&domainlist); return (error); } /* * Apply an anonymous mask to a single thread. */ int cpuset_setthread(lwpid_t id, cpuset_t *mask) { return _cpuset_setthread(id, mask, NULL); } /* * Apply new cpumask to the ithread. */ int cpuset_setithread(lwpid_t id, int cpu) { cpuset_t mask; CPU_ZERO(&mask); if (cpu == NOCPU) CPU_COPY(cpuset_root, &mask); else CPU_SET(cpu, &mask); return _cpuset_setthread(id, &mask, NULL); } /* * Initialize static domainsets after NUMA information is available. This is * called before memory allocators are initialized. */ void domainset_init(void) { struct domainset *dset; int i; dset = &domainset_firsttouch; DOMAINSET_COPY(&all_domains, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH; dset->ds_prefer = -1; _domainset_create(dset, NULL); dset = &domainset_interleave; DOMAINSET_COPY(&all_domains, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_INTERLEAVE; dset->ds_prefer = -1; _domainset_create(dset, NULL); dset = &domainset_roundrobin; DOMAINSET_COPY(&all_domains, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_ROUNDROBIN; dset->ds_prefer = -1; _domainset_create(dset, NULL); for (i = 0; i < vm_ndomains; i++) { dset = &domainset_fixed[i]; DOMAINSET_ZERO(&dset->ds_mask); DOMAINSET_SET(i, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_ROUNDROBIN; _domainset_create(dset, NULL); dset = &domainset_prefer[i]; DOMAINSET_COPY(&all_domains, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_PREFER; dset->ds_prefer = i; _domainset_create(dset, NULL); } } /* * Define the domainsets for cpuset 0, 1 and cpuset 2. */ void domainset_zero(void) { struct domainset *dset, *tmp; mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); domainset0 = &domainset_firsttouch; curthread->td_domain.dr_policy = domainset0; domainset2 = &domainset_interleave; kernel_object->domain.dr_policy = domainset2; /* Remove empty domains from the global policies. */ LIST_FOREACH_SAFE(dset, &cpuset_domains, ds_link, tmp) if (domainset_empty_vm(dset)) LIST_REMOVE(dset, ds_link); } /* * Creates system-wide cpusets and the cpuset for thread0 including three * sets: * * 0 - The root set which should represent all valid processors in the * system. This set is immutable. * 1 - The default set which all processes are a member of until changed. * This allows an administrator to move all threads off of given cpus to * dedicate them to high priority tasks or save power etc. * 2 - The kernel set which allows restriction and policy to be applied only * to kernel threads and the kernel_object. */ struct cpuset * cpuset_thread0(void) { struct cpuset *set; int i; int error __unused; cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); domainset_zone = uma_zcreate("domainset", sizeof(struct domainset), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); /* * Create the root system set (0) for the whole machine. Doesn't use * cpuset_create() due to NULL parent. */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); CPU_COPY(&all_cpus, &set->cs_mask); LIST_INIT(&set->cs_children); LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); refcount_init(&set->cs_ref, 1); set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY; set->cs_domain = domainset0; cpuset_zero = set; cpuset_root = &set->cs_mask; /* * Now derive a default (1), modifiable set from that to give out. */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); error = cpuset_init(set, cpuset_zero, NULL, NULL, 1); KASSERT(error == 0, ("Error creating default set: %d\n", error)); cpuset_default = set; /* * Create the kernel set (2). */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); error = cpuset_init(set, cpuset_zero, NULL, NULL, 2); KASSERT(error == 0, ("Error creating kernel set: %d\n", error)); set->cs_domain = domainset2; cpuset_kernel = set; /* * Initialize the unit allocator. 0 and 1 are allocated above. */ cpuset_unr = new_unrhdr(3, INT_MAX, NULL); /* * If MD code has not initialized per-domain cpusets, place all * CPUs in domain 0. */ for (i = 0; i < MAXMEMDOM; i++) if (!CPU_EMPTY(&cpuset_domain[i])) goto domains_set; CPU_COPY(&all_cpus, &cpuset_domain[0]); domains_set: return (cpuset_default); } void cpuset_kernthread(struct thread *td) { struct cpuset *set; thread_lock(td); set = td->td_cpuset; td->td_cpuset = cpuset_ref(cpuset_kernel); thread_unlock(td); cpuset_rel(set); } /* * Create a cpuset, which would be cpuset_create() but * mark the new 'set' as root. * * We are not going to reparent the td to it. Use cpuset_setproc_update_set() * for that. * * In case of no error, returns the set in *setp locked with a reference. */ int cpuset_create_root(struct prison *pr, struct cpuset **setp) { struct cpuset *set; int error; KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); set = NULL; error = cpuset_create(&set, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); if (error) return (error); KASSERT(set != NULL, ("[%s:%d] cpuset_create returned invalid data", __func__, __LINE__)); /* Mark the set as root. */ set->cs_flags |= CPU_SET_ROOT; *setp = set; return (0); } int cpuset_setproc_update_set(struct proc *p, struct cpuset *set) { int error; KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); cpuset_ref(set); error = cpuset_setproc(p->p_pid, set, NULL, NULL, true); if (error) return (error); cpuset_rel(set); return (0); } /* * In Capability mode, the only accesses that are permitted are to the current * thread and process' CPU and domain sets. */ static int cpuset_check_capabilities(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id) { if (IN_CAPABILITY_MODE(td)) { if (level != CPU_LEVEL_WHICH) return (ECAPMODE); if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) return (ECAPMODE); if (id != -1 && !(which == CPU_WHICH_TID && id == td->td_tid) && !(which == CPU_WHICH_PID && id == td->td_proc->p_pid)) return (ECAPMODE); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_args { cpusetid_t *setid; }; #endif int sys_cpuset(struct thread *td, struct cpuset_args *uap) { struct cpuset *root; struct cpuset *set; int error; thread_lock(td); root = cpuset_refroot(td->td_cpuset); thread_unlock(td); set = NULL; error = cpuset_create(&set, root, &root->cs_mask); cpuset_rel(root); if (error) return (error); error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); if (error == 0) error = cpuset_setproc(-1, set, NULL, NULL, false); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setid_args { cpuwhich_t which; id_t id; cpusetid_t setid; }; #endif int sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid)); } int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid) { struct cpuset *set; int error; /* * Presently we only support per-process sets. */ if (which != CPU_WHICH_PID) return (EINVAL); set = cpuset_lookup(setid, td); if (set == NULL) return (ESRCH); error = cpuset_setproc(id, set, NULL, NULL, false); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getid_args { cpulevel_t level; cpuwhich_t which; id_t id; cpusetid_t *setid; }; #endif int sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, uap->id, uap->setid)); } int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; cpusetid_t tmpid; int error; if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET) return (EINVAL); error = cpuset_which(which, id, &p, &ttd, &set); if (error) return (error); switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_refbase(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: return (EINVAL); } switch (level) { case CPU_LEVEL_ROOT: nset = cpuset_refroot(set); cpuset_rel(set); set = nset; break; case CPU_LEVEL_CPUSET: break; case CPU_LEVEL_WHICH: break; } tmpid = set->cs_id; cpuset_rel(set); if (error == 0) error = copyout(&tmpid, setid, sizeof(tmpid)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; cpuset_t *mask; }; #endif int sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) { return (kern_cpuset_getaffinity(td, uap->level, uap->which, uap->id, uap->cpusetsize, uap->mask)); } int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp) { struct thread *ttd; struct cpuset *nset; struct cpuset *set; struct proc *p; cpuset_t *mask; int error; size_t size; error = cpuset_check_capabilities(td, level, which, id); if (error != 0) return (error); mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); error = cpuset_which(which, id, &p, &ttd, &set); if (error) goto out; switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: case CPU_WHICH_DOMAIN: error = EINVAL; goto out; } if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); CPU_COPY(&nset->cs_mask, mask); cpuset_rel(nset); break; case CPU_LEVEL_WHICH: switch (which) { case CPU_WHICH_TID: thread_lock(ttd); CPU_COPY(&ttd->td_cpuset->cs_mask, mask); thread_unlock(ttd); break; case CPU_WHICH_PID: FOREACH_THREAD_IN_PROC(p, ttd) { thread_lock(ttd); CPU_OR(mask, mask, &ttd->td_cpuset->cs_mask); thread_unlock(ttd); } break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: CPU_COPY(&set->cs_mask, mask); break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: error = intr_getaffinity(id, which, mask); break; case CPU_WHICH_DOMAIN: if (id < 0 || id >= MAXMEMDOM) error = ESRCH; else CPU_COPY(&cpuset_domain[id], mask); break; } break; default: error = EINVAL; break; } if (set) cpuset_rel(set); if (p) PROC_UNLOCK(p); if (error == 0) { if (cpusetsize < howmany(CPU_FLS(mask), NBBY)) { error = ERANGE; goto out; } size = min(cpusetsize, sizeof(cpuset_t)); error = copyout(mask, maskp, size); if (error != 0) goto out; if (cpusetsize > size) { char *end; char *cp; int rv; end = cp = (char *)&maskp->__bits; end += cpusetsize; cp += size; while (cp != end) { rv = subyte(cp, 0); if (rv == -1) { error = EFAULT; goto out; } cp++; } } +#ifdef KTRACE + if ( KTRPOINT(td, KTR_STRUCT)) + ktrcpuset(mask, size); +#endif } out: free(mask, M_TEMP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; const cpuset_t *mask; }; #endif int sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) { return (user_cpuset_setaffinity(td, uap->level, uap->which, uap->id, uap->cpusetsize, uap->mask)); } int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpuset_t *mask) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; int error; +#ifdef KTRACE + if (KTRPOINT(td, KTR_STRUCT)) + ktrcpuset(mask, sizeof(cpuset_t)); +#endif error = cpuset_check_capabilities(td, level, which, id); if (error != 0) return (error); if (CPU_EMPTY(mask)) return (EDEADLK); switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: error = cpuset_which(which, id, &p, &ttd, &set); if (error) break; switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: case CPU_WHICH_DOMAIN: return (EINVAL); } if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); error = cpuset_modify(nset, mask); cpuset_rel(nset); cpuset_rel(set); break; case CPU_LEVEL_WHICH: switch (which) { case CPU_WHICH_TID: error = cpuset_setthread(id, mask); break; case CPU_WHICH_PID: error = cpuset_setproc(id, NULL, mask, NULL, false); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: error = cpuset_which(which, id, &p, &ttd, &set); if (error == 0) { error = cpuset_modify(set, mask); cpuset_rel(set); } break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: error = intr_setaffinity(id, which, mask); break; default: error = EINVAL; break; } break; default: error = EINVAL; break; } return (error); } int user_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, const cpuset_t *maskp) { cpuset_t *mask; int error; size_t size; size = min(cpusetsize, sizeof(cpuset_t)); mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); error = copyin(maskp, mask, size); if (error) goto out; /* * Verify that no high bits are set. */ if (cpusetsize > sizeof(cpuset_t)) { const char *end, *cp; int val; end = cp = (const char *)&maskp->__bits; end += cpusetsize; cp += sizeof(cpuset_t); while (cp != end) { val = fubyte(cp); if (val == -1) { error = EFAULT; goto out; } if (val != 0) { error = EINVAL; goto out; } cp++; } } error = kern_cpuset_setaffinity(td, level, which, id, mask); out: free(mask, M_TEMP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getdomain_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t domainsetsize; domainset_t *mask; int *policy; }; #endif int sys_cpuset_getdomain(struct thread *td, struct cpuset_getdomain_args *uap) { return (kern_cpuset_getdomain(td, uap->level, uap->which, uap->id, uap->domainsetsize, uap->mask, uap->policy)); } int kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp) { struct domainset outset; struct thread *ttd; struct cpuset *nset; struct cpuset *set; struct domainset *dset; struct proc *p; domainset_t *mask; int error; if (domainsetsize < sizeof(domainset_t) || domainsetsize > DOMAINSET_MAXSIZE / NBBY) return (ERANGE); error = cpuset_check_capabilities(td, level, which, id); if (error != 0) return (error); mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); bzero(&outset, sizeof(outset)); error = cpuset_which(which, id, &p, &ttd, &set); if (error) goto out; switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: case CPU_WHICH_DOMAIN: error = EINVAL; goto out; } if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); domainset_copy(nset->cs_domain, &outset); cpuset_rel(nset); break; case CPU_LEVEL_WHICH: switch (which) { case CPU_WHICH_TID: thread_lock(ttd); domainset_copy(ttd->td_cpuset->cs_domain, &outset); thread_unlock(ttd); break; case CPU_WHICH_PID: FOREACH_THREAD_IN_PROC(p, ttd) { thread_lock(ttd); dset = ttd->td_cpuset->cs_domain; /* Show all domains in the proc. */ DOMAINSET_OR(&outset.ds_mask, &dset->ds_mask); /* Last policy wins. */ outset.ds_policy = dset->ds_policy; outset.ds_prefer = dset->ds_prefer; thread_unlock(ttd); } break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: domainset_copy(set->cs_domain, &outset); break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: case CPU_WHICH_DOMAIN: error = EINVAL; break; } break; default: error = EINVAL; break; } if (set) cpuset_rel(set); if (p) PROC_UNLOCK(p); /* * Translate prefer into a set containing only the preferred domain, * not the entire fallback set. */ if (outset.ds_policy == DOMAINSET_POLICY_PREFER) { DOMAINSET_ZERO(&outset.ds_mask); DOMAINSET_SET(outset.ds_prefer, &outset.ds_mask); } DOMAINSET_COPY(&outset.ds_mask, mask); if (error == 0) error = copyout(mask, maskp, domainsetsize); if (error == 0) if (suword32(policyp, outset.ds_policy) != 0) error = EFAULT; out: free(mask, M_TEMP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setdomain_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t domainsetsize; domainset_t *mask; int policy; }; #endif int sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap) { return (kern_cpuset_setdomain(td, uap->level, uap->which, uap->id, uap->domainsetsize, uap->mask, uap->policy)); } int kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, const domainset_t *maskp, int policy) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; struct domainset domain; domainset_t *mask; int error; if (domainsetsize < sizeof(domainset_t) || domainsetsize > DOMAINSET_MAXSIZE / NBBY) return (ERANGE); if (policy <= DOMAINSET_POLICY_INVALID || policy > DOMAINSET_POLICY_MAX) return (EINVAL); error = cpuset_check_capabilities(td, level, which, id); if (error != 0) return (error); memset(&domain, 0, sizeof(domain)); mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); error = copyin(maskp, mask, domainsetsize); if (error) goto out; /* * Verify that no high bits are set. */ if (domainsetsize > sizeof(domainset_t)) { char *end; char *cp; end = cp = (char *)&mask->__bits; end += domainsetsize; cp += sizeof(domainset_t); while (cp != end) if (*cp++ != 0) { error = EINVAL; goto out; } } if (DOMAINSET_EMPTY(mask)) { error = EDEADLK; goto out; } DOMAINSET_COPY(mask, &domain.ds_mask); domain.ds_policy = policy; /* * Sanitize the provided mask. */ if (!DOMAINSET_SUBSET(&all_domains, &domain.ds_mask)) { error = EINVAL; goto out; } /* Translate preferred policy into a mask and fallback. */ if (policy == DOMAINSET_POLICY_PREFER) { /* Only support a single preferred domain. */ if (DOMAINSET_COUNT(&domain.ds_mask) != 1) { error = EINVAL; goto out; } domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1; /* This will be constrained by domainset_shadow(). */ DOMAINSET_COPY(&all_domains, &domain.ds_mask); } /* * When given an impossible policy, fall back to interleaving * across all domains. */ if (domainset_empty_vm(&domain)) domainset_copy(domainset2, &domain); switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: error = cpuset_which(which, id, &p, &ttd, &set); if (error) break; switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: case CPU_WHICH_DOMAIN: error = EINVAL; goto out; } if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); error = cpuset_modify_domain(nset, &domain); cpuset_rel(nset); cpuset_rel(set); break; case CPU_LEVEL_WHICH: switch (which) { case CPU_WHICH_TID: error = _cpuset_setthread(id, NULL, &domain); break; case CPU_WHICH_PID: error = cpuset_setproc(id, NULL, NULL, &domain, false); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: error = cpuset_which(which, id, &p, &ttd, &set); if (error == 0) { error = cpuset_modify_domain(set, &domain); cpuset_rel(set); } break; case CPU_WHICH_IRQ: case CPU_WHICH_INTRHANDLER: case CPU_WHICH_ITHREAD: default: error = EINVAL; break; } break; default: error = EINVAL; break; } out: free(mask, M_TEMP); return (error); } #ifdef DDB static void ddb_display_bitset(const struct bitset *set, int size) { int bit, once; for (once = 0, bit = 0; bit < size; bit++) { if (CPU_ISSET(bit, set)) { if (once == 0) { db_printf("%d", bit); once = 1; } else db_printf(",%d", bit); } } if (once == 0) db_printf(""); } void ddb_display_cpuset(const cpuset_t *set) { ddb_display_bitset((const struct bitset *)set, CPU_SETSIZE); } static void ddb_display_domainset(const domainset_t *set) { ddb_display_bitset((const struct bitset *)set, DOMAINSET_SETSIZE); } DB_SHOW_COMMAND(cpusets, db_show_cpusets) { struct cpuset *set; LIST_FOREACH(set, &cpuset_ids, cs_link) { db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", set, set->cs_id, refcount_load(&set->cs_ref), set->cs_flags, (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); db_printf(" cpu mask="); ddb_display_cpuset(&set->cs_mask); db_printf("\n"); db_printf(" domain policy %d prefer %d mask=", set->cs_domain->ds_policy, set->cs_domain->ds_prefer); ddb_display_domainset(&set->cs_domain->ds_mask); db_printf("\n"); if (db_pager_quit) break; } } DB_SHOW_COMMAND(domainsets, db_show_domainsets) { struct domainset *set; LIST_FOREACH(set, &cpuset_domains, ds_link) { db_printf("set=%p policy %d prefer %d cnt %d\n", set, set->ds_policy, set->ds_prefer, set->ds_cnt); db_printf(" mask ="); ddb_display_domainset(&set->ds_mask); db_printf("\n"); } } #endif /* DDB */ diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index d00981a93d24..f417fdf8a22a 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -1,327 +1,329 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ktrace.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_KTRACE_H_ #define _SYS_KTRACE_H_ #include /* * operations to ktrace system call (KTROP(op)) */ #define KTROP_SET 0 /* set trace points */ #define KTROP_CLEAR 1 /* clear trace points */ #define KTROP_CLEARFILE 2 /* stop all tracing to file */ #define KTROP(o) ((o)&3) /* macro to extract operation */ /* * flags (ORed in with operation) */ #define KTRFLAG_DESCEND 4 /* perform op on all children too */ /* * ktrace record header */ struct ktr_header { int ktr_len; /* length of buf */ short ktr_type; /* trace record type */ pid_t ktr_pid; /* process id */ char ktr_comm[MAXCOMLEN + 1];/* command name */ struct timeval ktr_time; /* timestamp */ intptr_t ktr_tid; /* was ktr_buffer */ }; /* * Test for kernel trace point (MP SAFE). * * KTRCHECK() just checks that the type is enabled and is only for * internal use in the ktrace subsystem. KTRPOINT() checks against * ktrace recursion as well as checking that the type is enabled and * is the public interface. */ #define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type)) #define KTRPOINT(td, type) (__predict_false(KTRCHECK((td), (type)))) #define KTRCHECKDRAIN(td) (!(STAILQ_EMPTY(&(td)->td_proc->p_ktr))) #define KTRUSERRET(td) do { \ if (__predict_false(KTRCHECKDRAIN(td))) \ ktruserret(td); \ } while (0) /* * ktrace record types */ /* * KTR_SYSCALL - system call record */ #define KTR_SYSCALL 1 struct ktr_syscall { short ktr_code; /* syscall number */ short ktr_narg; /* number of arguments */ /* * followed by ktr_narg register_t */ register_t ktr_args[1]; }; /* * KTR_SYSRET - return from system call record */ #define KTR_SYSRET 2 struct ktr_sysret { short ktr_code; short ktr_eosys; int ktr_error; register_t ktr_retval; }; /* * KTR_NAMEI - namei record */ #define KTR_NAMEI 3 /* record contains pathname */ /* * KTR_GENIO - trace generic process i/o */ #define KTR_GENIO 4 struct ktr_genio { int ktr_fd; enum uio_rw ktr_rw; /* * followed by data successfully read/written */ }; /* * KTR_PSIG - trace processed signal */ #define KTR_PSIG 5 struct ktr_psig { int signo; sig_t action; int code; sigset_t mask; }; /* * KTR_CSW - trace context switches */ #define KTR_CSW 6 struct ktr_csw_old { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ }; struct ktr_csw { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ char wmesg[8]; }; /* * KTR_USER - data coming from userland */ #define KTR_USER_MAXLEN 2048 /* maximum length of passed data */ #define KTR_USER 7 /* * KTR_STRUCT - misc. structs */ #define KTR_STRUCT 8 /* * record contains null-terminated struct name followed by * struct contents */ struct sockaddr; struct stat; struct sysentvec; /* * KTR_SYSCTL - name of a sysctl MIB */ #define KTR_SYSCTL 9 /* record contains null-terminated MIB name */ /* * KTR_PROCCTOR - trace process creation (multiple ABI support) */ #define KTR_PROCCTOR 10 struct ktr_proc_ctor { u_int sv_flags; /* struct sysentvec sv_flags copy */ }; /* * KTR_PROCDTOR - trace process destruction (multiple ABI support) */ #define KTR_PROCDTOR 11 /* * KTR_CAPFAIL - trace capability check failures */ #define KTR_CAPFAIL 12 enum ktr_cap_fail_type { CAPFAIL_NOTCAPABLE, /* insufficient capabilities in cap_check() */ CAPFAIL_INCREASE, /* attempt to increase capabilities */ CAPFAIL_SYSCALL, /* disallowed system call */ CAPFAIL_LOOKUP, /* disallowed VFS lookup */ }; struct ktr_cap_fail { enum ktr_cap_fail_type cap_type; cap_rights_t cap_needed; cap_rights_t cap_held; }; /* * KTR_FAULT - page fault record */ #define KTR_FAULT 13 struct ktr_fault { vm_offset_t vaddr; int type; }; /* * KTR_FAULTEND - end of page fault record */ #define KTR_FAULTEND 14 struct ktr_faultend { int result; }; /* * KTR_STRUCT_ARRAY - array of misc. structs */ #define KTR_STRUCT_ARRAY 15 struct ktr_struct_array { size_t struct_size; /* * Followed by null-terminated structure name and then payload * contents. */ }; /* * KTR_DROP - If this bit is set in ktr_type, then at least one event * between the previous record and this record was dropped. */ #define KTR_DROP 0x8000 /* * kernel trace points (in p_traceflag) */ #define KTRFAC_MASK 0x00ffffff #define KTRFAC_SYSCALL (1<sa_len) #define ktrstat(s) \ ktrstruct("stat", (s), sizeof(struct stat)) #define ktrstat_error(s, error) \ ktrstruct_error("stat", (s), sizeof(struct stat), error) +#define ktrcpuset(s, l) \ + ktrstruct("cpuset_t", (s), l) extern u_int ktr_geniosize; #ifdef KTRACE extern int ktr_filesize_limit_signal; #else #define ktr_filesize_limit_signal 0 #endif #else #include __BEGIN_DECLS int ktrace(const char *, int, int, pid_t); int utrace(const void *, size_t); __END_DECLS #endif #endif diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 845e2000a4c2..0f4ac9a0c262 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1,2222 +1,2260 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)kdump.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #define _WANT_KERNEL_ERRNO #ifdef __LP64__ #define _WANT_KEVENT32 #endif #define _WANT_FREEBSD11_KEVENT +#define _WANT_FREEBSD_BITSET #include #include +#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WITH_CASPER #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ktrace.h" #ifdef WITH_CASPER #include #include #include #endif int fetchprocinfo(struct ktr_header *, u_int *); u_int findabi(struct ktr_header *); int fread_tail(void *, int, int); void dumpheader(struct ktr_header *, u_int); void ktrsyscall(struct ktr_syscall *, u_int); void ktrsysret(struct ktr_sysret *, u_int); void ktrnamei(char *, int); void hexdump(char *, int, int); void visdump(char *, int, int); void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); void ktrcapfail(struct ktr_cap_fail *); void ktrfault(struct ktr_fault *); void ktrfaultend(struct ktr_faultend *); void ktrkevent(struct kevent *); void ktrstructarray(struct ktr_struct_array *, size_t); +void ktrbitset(char *, struct bitset *, size_t); void usage(void); #define TIMESTAMP_NONE 0x0 #define TIMESTAMP_ABSOLUTE 0x1 #define TIMESTAMP_ELAPSED 0x2 #define TIMESTAMP_RELATIVE 0x4 static bool abiflag, decimal, fancy = true, resolv, suppressdata, syscallno, tail, threads; static int timestamp, maxdata; static const char *tracefile = DEF_TRACEFILE; static struct ktr_header ktr_header; #define TIME_FORMAT "%b %e %T %Y" #define eqs(s1, s2) (strcmp((s1), (s2)) == 0) #define print_number64(first,i,n,c) do { \ uint64_t __v; \ \ if (quad_align && (((ptrdiff_t)((i) - (first))) & 1) == 1) { \ (i)++; \ (n)--; \ } \ if (quad_slots == 2) \ __v = (uint64_t)(uint32_t)(i)[0] | \ ((uint64_t)(uint32_t)(i)[1]) << 32; \ else \ __v = (uint64_t)*(i); \ if (decimal) \ printf("%c%jd", (c), (intmax_t)__v); \ else \ printf("%c%#jx", (c), (uintmax_t)__v); \ (i) += quad_slots; \ (n) -= quad_slots; \ (c) = ','; \ } while (0) #define print_number(i,n,c) do { \ if (decimal) \ printf("%c%jd", c, (intmax_t)*i); \ else \ printf("%c%#jx", c, (uintmax_t)(u_register_t)*i); \ i++; \ n--; \ c = ','; \ } while (0) struct proc_info { TAILQ_ENTRY(proc_info) info; u_int sv_flags; pid_t pid; }; static TAILQ_HEAD(trace_procs, proc_info) trace_procs; #ifdef WITH_CASPER static cap_channel_t *cappwd, *capgrp; static int cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp) { cap_channel_t *capcas, *cappwdloc, *capgrploc; const char *cmds[1], *fields[1]; capcas = cap_init(); if (capcas == NULL) { err(1, "unable to create casper process"); exit(1); } cappwdloc = cap_service_open(capcas, "system.pwd"); capgrploc = cap_service_open(capcas, "system.grp"); /* Casper capability no longer needed. */ cap_close(capcas); if (cappwdloc == NULL || capgrploc == NULL) { if (cappwdloc == NULL) warn("unable to open system.pwd service"); if (capgrploc == NULL) warn("unable to open system.grp service"); exit(1); } /* Limit system.pwd to only getpwuid() function and pw_name field. */ cmds[0] = "getpwuid"; if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0) err(1, "unable to limit system.pwd service"); fields[0] = "pw_name"; if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0) err(1, "unable to limit system.pwd service"); /* Limit system.grp to only getgrgid() function and gr_name field. */ cmds[0] = "getgrgid"; if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0) err(1, "unable to limit system.grp service"); fields[0] = "gr_name"; if (cap_grp_limit_fields(capgrploc, fields, 1) < 0) err(1, "unable to limit system.grp service"); *cappwdp = cappwdloc; *capgrpp = capgrploc; return (0); } #endif /* WITH_CASPER */ static void print_integer_arg(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("", value); else printf("", value); } } /* Like print_integer_arg but unknown values are treated as valid. */ static void print_integer_arg_valid(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("%d", value); else printf("%#x", value); } } static bool print_mask_arg_part(bool (*decoder)(FILE *, int, int *), int value, int *rem) { printf("%#x<", value); return (decoder(stdout, value, rem)); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; invalid = !print_mask_arg_part(decoder, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg0(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_fileflags(fflags_t value) { bool invalid; fflags_t rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !sysdecode_fileflags(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_filemode(int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#o<", value); invalid = !sysdecode_filemode(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), uint32_t value) { bool invalid; uint32_t rem; printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_argul(bool (*decoder)(FILE *, u_long, u_long *), u_long value) { bool invalid; u_long rem; if (value == 0) { printf("0"); return; } printf("%#lx<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%lu", rem); } int main(int argc, char *argv[]) { int ch, ktrlen, size; void *m; int trpoints = ALL_POINTS; int drop_logged; pid_t pid = 0; u_int sv_flags; setlocale(LC_CTYPE, ""); timestamp = TIMESTAMP_NONE; while ((ch = getopt(argc,argv,"f:dElm:np:AHRrSsTt:")) != -1) switch (ch) { case 'A': abiflag = true; break; case 'f': tracefile = optarg; break; case 'd': decimal = true; break; case 'l': tail = true; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = false; break; case 'p': pid = atoi(optarg); break; case 'r': resolv = true; break; case 'S': syscallno = true; break; case 's': suppressdata = true; break; case 'E': timestamp |= TIMESTAMP_ELAPSED; break; case 'H': threads = true; break; case 'R': timestamp |= TIMESTAMP_RELATIVE; break; case 'T': timestamp |= TIMESTAMP_ABSOLUTE; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (strcmp(tracefile, "-") != 0) if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); caph_cache_catpages(); caph_cache_tzdata(); #ifdef WITH_CASPER if (resolv) { if (cappwdgrp_setup(&cappwd, &capgrp) < 0) { cappwd = NULL; capgrp = NULL; } } if (!resolv || (cappwd != NULL && capgrp != NULL)) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #else if (!resolv) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #endif if (caph_limit_stdio() == -1) err(1, "unable to limit stdio"); TAILQ_INIT(&trace_procs); drop_logged = 0; while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (ktr_header.ktr_type & KTR_DROP) { ktr_header.ktr_type &= ~KTR_DROP; if (!drop_logged && threads) { printf( "%6jd %6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, ktr_header.ktr_tid > 0 ? (intmax_t)ktr_header.ktr_tid : 0, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } else if (!drop_logged) { printf("%6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } } if ((ktrlen = ktr_header.ktr_len) < 0) errx(1, "bogus length 0x%x", ktrlen); if (ktrlen > size) { m = realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if (fetchprocinfo(&ktr_header, (u_int *)m) != 0) continue; if (pid && ktr_header.ktr_pid != pid && ktr_header.ktr_tid != pid) continue; if ((trpoints & (1<ktr_type) { case KTR_PROCCTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); break; } } pi = malloc(sizeof(struct proc_info)); if (pi == NULL) errx(1, "%s", strerror(ENOMEM)); pi->sv_flags = *flags; pi->pid = kth->ktr_pid; TAILQ_INSERT_TAIL(&trace_procs, pi, info); return (1); case KTR_PROCDTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); free(pi); break; } } return (1); } return (0); } u_int findabi(struct ktr_header *kth) { struct proc_info *pi; TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { return (pi->sv_flags); } } return (0); } void dumpheader(struct ktr_header *kth, u_int sv_flags) { static char unknown[64]; static struct timeval prevtime, prevtime_e; struct timeval temp; const char *abi; const char *arch; const char *type; const char *sign; switch (kth->ktr_type) { case KTR_SYSCALL: type = "CALL"; break; case KTR_SYSRET: type = "RET "; break; case KTR_NAMEI: type = "NAMI"; break; case KTR_GENIO: type = "GIO "; break; case KTR_PSIG: type = "PSIG"; break; case KTR_CSW: type = "CSW "; break; case KTR_USER: type = "USER"; break; case KTR_STRUCT: case KTR_STRUCT_ARRAY: type = "STRU"; break; case KTR_SYSCTL: type = "SCTL"; break; case KTR_CAPFAIL: type = "CAP "; break; case KTR_FAULT: type = "PFLT"; break; case KTR_FAULTEND: type = "PRET"; break; default: sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; } /* * The ktr_tid field was previously the ktr_buffer field, which held * the kernel pointer value for the buffer associated with data * following the record header. It now holds a threadid, but only * for trace files after the change. Older trace files still contain * kernel pointers. Detect this and suppress the results by printing * negative tid's as 0. */ if (threads) printf("%6jd %6jd %-8.*s ", (intmax_t)kth->ktr_pid, kth->ktr_tid > 0 ? (intmax_t)kth->ktr_tid : 0, MAXCOMLEN, kth->ktr_comm); else printf("%6jd %-8.*s ", (intmax_t)kth->ktr_pid, MAXCOMLEN, kth->ktr_comm); if (timestamp) { if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timersub(&kth->ktr_time, &prevtime_e, &temp); printf("%jd.%06ld ", (intmax_t)temp.tv_sec, temp.tv_usec); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; if (timercmp(&kth->ktr_time, &prevtime, <)) { timersub(&prevtime, &kth->ktr_time, &temp); sign = "-"; } else { timersub(&kth->ktr_time, &prevtime, &temp); sign = ""; } prevtime = kth->ktr_time; printf("%s%jd.%06ld ", sign, (intmax_t)temp.tv_sec, temp.tv_usec); } } printf("%s ", type); if (abiflag != 0) { switch (sv_flags & SV_ABI_MASK) { case SV_ABI_LINUX: abi = "L"; break; case SV_ABI_FREEBSD: abi = "F"; break; default: abi = "U"; break; } if ((sv_flags & SV_LP64) != 0) arch = "64"; else if ((sv_flags & SV_ILP32) != 0) arch = "32"; else arch = "00"; printf("%s%s ", abi, arch); } } #include static void ioctlname(unsigned long val) { const char *str; str = sysdecode_ioctlname(val); if (str != NULL) printf("%s", str); else if (decimal) printf("%lu", val); else printf("%#lx", val); } static enum sysdecode_abi syscallabi(u_int sv_flags) { if (sv_flags == 0) return (SYSDECODE_ABI_FREEBSD); switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: return (SYSDECODE_ABI_FREEBSD); case SV_ABI_LINUX: #ifdef __LP64__ if (sv_flags & SV_ILP32) return (SYSDECODE_ABI_LINUX32); #endif return (SYSDECODE_ABI_LINUX); default: return (SYSDECODE_ABI_UNKNOWN); } } static void syscallname(u_int code, u_int sv_flags) { const char *name; name = sysdecode_syscallname(syscallabi(sv_flags), code); if (name == NULL) printf("[%d]", code); else { printf("%s", name); if (syscallno) printf("[%d]", code); } } static void print_signal(int signo) { const char *signame; signame = sysdecode_signal(signo); if (signame != NULL) printf("%s", signame); else printf("SIG %d", signo); } void ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip, *first; intmax_t arg; int quad_align, quad_slots; syscallname(ktr->ktr_code, sv_flags); ip = first = &ktr->ktr_args[0]; if (narg) { char c = '('; if (fancy && (sv_flags == 0 || (sv_flags & SV_ABI_MASK) == SV_ABI_FREEBSD)) { quad_align = 0; if (sv_flags & SV_ILP32) { #ifdef __powerpc__ quad_align = 1; #endif quad_slots = 2; } else quad_slots = 1; switch (ktr->ktr_code) { case SYS_bindat: case SYS_chflagsat: case SYS_connectat: case SYS_faccessat: case SYS_fchmodat: case SYS_fchownat: case SYS_fstatat: case SYS_futimesat: case SYS_linkat: case SYS_mkdirat: case SYS_mkfifoat: case SYS_mknodat: case SYS_openat: case SYS_readlinkat: case SYS_renameat: case SYS_unlinkat: case SYS_utimensat: putchar('('); print_integer_arg_valid(sysdecode_atfd, *ip); c = ','; ip++; narg--; break; } switch (ktr->ktr_code) { case SYS_ioctl: { print_number(ip, narg, c); putchar(c); ioctlname(*ip); c = ','; ip++; narg--; break; } case SYS_ptrace: putchar('('); print_integer_arg(sysdecode_ptrace_request, *ip); c = ','; ip++; narg--; break; case SYS_access: case SYS_eaccess: case SYS_faccessat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_access_mode, *ip); ip++; narg--; break; case SYS_close_range: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_close_range_flags, *ip); ip += 3; narg -= 3; break; case SYS_open: case SYS_openat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); if ((ip[0] & O_CREAT) == O_CREAT) { putchar(','); decode_filemode(ip[1]); } ip += 2; narg -= 2; break; case SYS_wait4: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_wait4_options, *ip); ip++; narg--; break; case SYS_wait6: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_wait6_options, *ip); ip++; narg--; break; case SYS_chmod: case SYS_fchmod: case SYS_lchmod: case SYS_fchmodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_mknodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_getfsstat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_getfsstat_mode, *ip); ip++; narg--; break; case SYS_mount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_unmount: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_recvmsg: case SYS_sendmsg: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_recvfrom: case SYS_sendto: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_chflags: case SYS_chflagsat: case SYS_fchflags: case SYS_lchflags: print_number(ip, narg, c); putchar(','); decode_fileflags(*ip); ip++; narg--; break; case SYS_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_reboot: putchar('('); print_mask_arg(sysdecode_reboot_howto, *ip); ip++; narg--; break; case SYS_umask: putchar('('); decode_filemode(*ip); ip++; narg--; break; case SYS_msync: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_msync_flags, *ip); ip++; narg--; break; #ifdef SYS_freebsd6_mmap case SYS_freebsd6_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; #endif case SYS_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; case SYS_mprotect: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); ip++; narg--; break; case SYS_madvise: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_madvice, *ip); ip++; narg--; break; case SYS_pathconf: case SYS_lpathconf: case SYS_fpathconf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_pathconf_name, *ip); ip++; narg--; break; case SYS_getpriority: case SYS_setpriority: putchar('('); print_integer_arg(sysdecode_prio_which, *ip); c = ','; ip++; narg--; break; case SYS_fcntl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_fcntl_cmd, ip[0]); if (sysdecode_fcntl_arg_p(ip[0])) { putchar(','); if (ip[0] == F_SETFL) print_mask_arg( sysdecode_fcntl_fileflags, ip[1]); else sysdecode_fcntl_arg(stdout, ip[0], ip[1], decimal ? 10 : 16); } ip += 2; narg -= 2; break; case SYS_socket: { int sockdomain; putchar('('); sockdomain = *ip; print_integer_arg(sysdecode_socketdomain, sockdomain); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; if (sockdomain == PF_INET || sockdomain == PF_INET6) { putchar(','); print_integer_arg(sysdecode_ipproto, *ip); ip++; narg--; } c = ','; break; } case SYS_setsockopt: case SYS_getsockopt: { const char *str; print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_sockopt_level, *ip); str = sysdecode_sockopt_name(ip[0], ip[1]); if (str != NULL) { printf(",%s", str); ip++; narg--; } ip++; narg--; break; } #ifdef SYS_freebsd6_lseek case SYS_freebsd6_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; #endif case SYS_lseek: print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; case SYS_flock: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_flock_operation, *ip); ip++; narg--; break; case SYS_mkfifo: case SYS_mkfifoat: case SYS_mkdir: case SYS_mkdirat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_shutdown: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shutdown_how, *ip); ip++; narg--; break; case SYS_socketpair: putchar('('); print_integer_arg(sysdecode_socketdomain, *ip); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; c = ','; break; case SYS_getrlimit: case SYS_setrlimit: putchar('('); print_integer_arg(sysdecode_rlimit, *ip); ip++; narg--; c = ','; break; case SYS_getrusage: putchar('('); print_integer_arg(sysdecode_getrusage_who, *ip); ip++; narg--; c = ','; break; case SYS_quotactl: print_number(ip, narg, c); putchar(','); if (!sysdecode_quotactl_cmd(stdout, *ip)) { if (decimal) printf("", (int)*ip); else printf("", (int)*ip); } ip++; narg--; c = ','; break; case SYS_nfssvc: putchar('('); print_integer_arg(sysdecode_nfssvc_flags, *ip); ip++; narg--; c = ','; break; case SYS_rtprio: case SYS_rtprio_thread: putchar('('); print_integer_arg(sysdecode_rtprio_function, *ip); ip++; narg--; c = ','; break; case SYS___semctl: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_semctl_cmd, *ip); ip++; narg--; break; case SYS_semget: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_semget_flags, *ip); ip++; narg--; break; case SYS_msgctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_msgctl_cmd, *ip); ip++; narg--; break; case SYS_shmat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_shmat_flags, *ip); ip++; narg--; break; case SYS_shmctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shmctl_cmd, *ip); ip++; narg--; break; #ifdef SYS_freebsd12_shm_open case SYS_freebsd12_shm_open: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); ip += 2; narg -= 2; break; #endif case SYS_shm_open2: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); putchar(','); print_mask_arg(sysdecode_shmflags, ip[2]); ip += 3; narg -= 3; break; case SYS_minherit: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_minherit_inherit, *ip); ip++; narg--; break; case SYS_rfork: putchar('('); print_mask_arg(sysdecode_rfork_flags, *ip); ip++; narg--; c = ','; break; case SYS_lio_listio: putchar('('); print_integer_arg(sysdecode_lio_listio_mode, *ip); ip++; narg--; c = ','; break; case SYS_mlockall: putchar('('); print_mask_arg(sysdecode_mlockall_flags, *ip); ip++; narg--; break; case SYS_sched_setscheduler: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sched_get_priority_max: case SYS_sched_get_priority_min: putchar('('); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sendfile: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_sendfile_flags, *ip); ip++; narg--; break; case SYS_kldsym: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldsym_cmd, *ip); ip++; narg--; break; case SYS_sigprocmask: putchar('('); print_integer_arg(sysdecode_sigprocmask_how, *ip); ip++; narg--; c = ','; break; case SYS___acl_get_file: case SYS___acl_set_file: case SYS___acl_get_fd: case SYS___acl_set_fd: case SYS___acl_delete_file: case SYS___acl_delete_fd: case SYS___acl_aclcheck_file: case SYS___acl_aclcheck_fd: case SYS___acl_get_link: case SYS___acl_set_link: case SYS___acl_delete_link: case SYS___acl_aclcheck_link: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_acltype, *ip); ip++; narg--; break; case SYS_sigaction: putchar('('); print_signal(*ip); ip++; narg--; c = ','; break; case SYS_extattrctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_extattrnamespace, *ip); ip++; narg--; break; case SYS_nmount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_thr_create: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_thr_create_flags, *ip); ip++; narg--; break; case SYS_thr_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_kldunloadf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldunload_flags, *ip); ip++; narg--; break; case SYS_linkat: case SYS_renameat: case SYS_symlinkat: print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_atfd, *ip); ip++; narg--; print_number(ip, narg, c); break; case SYS_cap_fcntls_limit: print_number(ip, narg, c); putchar(','); arg = *ip; ip++; narg--; print_mask_arg32(sysdecode_cap_fcntlrights, arg); break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); (void)putchar(','); print_integer_arg(sysdecode_fadvice, *ip); ip++; narg--; break; case SYS_procctl: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_procctl_cmd, *ip); ip++; narg--; break; case SYS__umtx_op: { int op; print_number(ip, narg, c); putchar(','); if (print_mask_arg_part(sysdecode_umtx_op_flags, *ip, &op)) putchar('|'); print_integer_arg(sysdecode_umtx_op, op); putchar('>'); switch (*ip) { case UMTX_OP_CV_WAIT: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_cvwait_flags, *ip); break; case UMTX_OP_RW_RDLOCK: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_rwlock_flags, *ip); break; } ip++; narg--; break; } case SYS_ftruncate: case SYS_truncate: print_number(ip, narg, c); print_number64(first, ip, narg, c); break; case SYS_fchownat: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_fstatat: case SYS_utimensat: print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_unlinkat: print_number(ip, narg, c); break; case SYS_sysarch: putchar('('); print_integer_arg(sysdecode_sysarch_number, *ip); ip++; narg--; c = ','; break; } switch (ktr->ktr_code) { case SYS_chflagsat: case SYS_fchownat: case SYS_faccessat: case SYS_fchmodat: case SYS_fstatat: case SYS_linkat: case SYS_unlinkat: case SYS_utimensat: putchar(','); print_mask_arg0(sysdecode_atflags, *ip); ip++; narg--; break; } } while (narg > 0) { print_number(ip, narg, c); } putchar(')'); } putchar('\n'); } void ktrsysret(struct ktr_sysret *ktr, u_int sv_flags) { register_t ret = ktr->ktr_retval; int error = ktr->ktr_error; syscallname(ktr->ktr_code, sv_flags); printf(" "); if (error == 0) { if (fancy) { printf("%ld", (long)ret); if (ret < 0 || ret > 9) printf("/%#lx", (unsigned long)ret); } else { if (decimal) printf("%ld", (long)ret); else printf("%#lx", (unsigned long)ret); } } else if (error == ERESTART) printf("RESTART"); else if (error == EJUSTRETURN) printf("JUSTRETURN"); else { printf("-1 errno %d", sysdecode_freebsd_to_abi_errno( syscallabi(sv_flags), error)); if (fancy) printf(" %s", strerror(ktr->ktr_error)); } putchar('\n'); } void ktrnamei(char *cp, int len) { printf("\"%.*s\"\n", len, cp); } void hexdump(char *p, int len, int screenwidth) { int n, i; int width; width = 0; do { width += 2; i = 13; /* base offset */ i += (width / 2) + 1; /* spaces every second byte */ i += (width * 2); /* width of bytes */ i += 3; /* " |" */ i += width; /* each byte */ i += 1; /* "|" */ } while (i < screenwidth); width -= 2; for (n = 0; n < len; n += width) { for (i = n; i < n + width; i++) { if ((i % width) == 0) { /* beginning of line */ printf(" 0x%04x", i); } if ((i % 2) == 0) { printf(" "); } if (i < len) printf("%02x", p[i] & 0xff); else printf(" "); } printf(" |"); for (i = n; i < n + width; i++) { if (i >= len) break; if (p[i] >= ' ' && p[i] <= '~') printf("%c", p[i]); else printf("."); } printf("|\n"); } if ((i % width) != 0) printf("\n"); } void visdump(char *dp, int datalen, int screenwidth) { int col = 0; char *cp; int width; char visbuf[5]; printf(" \""); col = 8; for (;datalen > 0; datalen--, dp++) { vis(visbuf, *dp, VIS_CSTYLE | VIS_NOLOCALE, *(dp+1)); cp = visbuf; /* * Keep track of printables and * space chars (like fold(1)). */ if (col == 0) { putchar('\t'); col = 8; } switch(*cp) { case '\n': col = 0; putchar('\n'); continue; case '\t': width = 8 - (col&07); break; default: width = strlen(cp); } if (col + width > (screenwidth-2)) { printf("\\\n\t"); col = 8; } col += width; do { putchar(*cp++); } while (*cp); } if (col == 0) printf(" "); printf("\"\n"); } void ktrgenio(struct ktr_genio *ktr, int len) { int datalen = len - sizeof (struct ktr_genio); char *dp = (char *)ktr + sizeof (struct ktr_genio); static int screenwidth = 0; int i, binary; printf("fd %d %s %d byte%s\n", ktr->ktr_fd, ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen, datalen == 1 ? "" : "s"); if (suppressdata) return; if (screenwidth == 0) { struct winsize ws; if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 && ws.ws_col > 8) screenwidth = ws.ws_col; else screenwidth = 80; } if (maxdata && datalen > maxdata) datalen = maxdata; for (i = 0, binary = 0; i < datalen && binary == 0; i++) { if (dp[i] >= 32 && dp[i] < 127) continue; if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9) continue; binary = 1; } if (binary) hexdump(dp, datalen, screenwidth); else visdump(dp, datalen, screenwidth); } void ktrpsig(struct ktr_psig *psig) { const char *str; print_signal(psig->signo); if (psig->action == SIG_DFL) { printf(" SIG_DFL"); } else { printf(" caught handler=0x%lx mask=0x%x", (u_long)psig->action, psig->mask.__bits[0]); } printf(" code="); str = sysdecode_sigcode(psig->signo, psig->code); if (str != NULL) printf("%s", str); else printf("", psig->code); putchar('\n'); } void ktrcsw_old(struct ktr_csw_old *cs) { printf("%s %s\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel"); } void ktrcsw(struct ktr_csw *cs) { printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel", cs->wmesg); } void ktruser(int len, void *p) { unsigned char *cp; if (sysdecode_utrace(stdout, p, len)) { printf("\n"); return; } printf("%d ", len); cp = p; while (len--) if (decimal) printf(" %d", *cp++); else printf(" %02x", *cp++); printf("\n"); } void ktrcaprights(cap_rights_t *rightsp) { printf("cap_rights_t "); sysdecode_cap_rights(stdout, rightsp); printf("\n"); } static void ktrtimeval(struct timeval *tv) { printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); } void ktritimerval(struct itimerval *it) { printf("itimerval { .interval = "); ktrtimeval(&it->it_interval); printf(", .value = "); ktrtimeval(&it->it_value); printf(" }\n"); } void ktrsockaddr(struct sockaddr *sa) { /* TODO: Support additional address families #include struct sockaddr_nb *nb; */ const char *str; char addr[64]; /* * note: ktrstruct() has already verified that sa points to a * buffer at least sizeof(struct sockaddr) bytes long and exactly * sa->sa_len bytes long. */ printf("struct sockaddr { "); str = sysdecode_sockaddr_family(sa->sa_family); if (str != NULL) printf("%s", str); else printf("", sa->sa_family); printf(", "); #define check_sockaddr_len(n) \ if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) { \ printf("invalid"); \ break; \ } switch(sa->sa_family) { case AF_INET: { struct sockaddr_in sa_in; memset(&sa_in, 0, sizeof(sa_in)); memcpy(&sa_in, sa, sa->sa_len); check_sockaddr_len(in); inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr); printf("%s:%u", addr, ntohs(sa_in.sin_port)); break; } case AF_INET6: { struct sockaddr_in6 sa_in6; memset(&sa_in6, 0, sizeof(sa_in6)); memcpy(&sa_in6, sa, sa->sa_len); check_sockaddr_len(in6); getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6), addr, sizeof(addr), NULL, 0, NI_NUMERICHOST); printf("[%s]:%u", addr, htons(sa_in6.sin6_port)); break; } case AF_UNIX: { struct sockaddr_un sa_un; memset(&sa_un, 0, sizeof(sa_un)); memcpy(&sa_un, sa, sa->sa_len); printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path); break; } default: printf("unknown address family"); } printf(" }\n"); } void ktrstat(struct stat *statp) { char mode[12], timestr[PATH_MAX + 4]; struct passwd *pwd; struct group *grp; struct tm *tm; /* * note: ktrstruct() has already verified that statp points to a * buffer exactly sizeof(struct stat) bytes long. */ printf("struct stat {"); printf("dev=%ju, ino=%ju, ", (uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino); if (!resolv) printf("mode=0%jo, ", (uintmax_t)statp->st_mode); else { strmode(statp->st_mode, mode); printf("mode=%s, ", mode); } printf("nlink=%ju, ", (uintmax_t)statp->st_nlink); if (!resolv) { pwd = NULL; } else { #ifdef WITH_CASPER if (cappwd != NULL) pwd = cap_getpwuid(cappwd, statp->st_uid); else #endif pwd = getpwuid(statp->st_uid); } if (pwd == NULL) printf("uid=%ju, ", (uintmax_t)statp->st_uid); else printf("uid=\"%s\", ", pwd->pw_name); if (!resolv) { grp = NULL; } else { #ifdef WITH_CASPER if (capgrp != NULL) grp = cap_getgrgid(capgrp, statp->st_gid); else #endif grp = getgrgid(statp->st_gid); } if (grp == NULL) printf("gid=%ju, ", (uintmax_t)statp->st_gid); else printf("gid=\"%s\", ", grp->gr_name); printf("rdev=%ju, ", (uintmax_t)statp->st_rdev); printf("atime="); if (!resolv) printf("%jd", (intmax_t)statp->st_atim.tv_sec); else { tm = localtime(&statp->st_atim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_atim.tv_nsec != 0) printf(".%09ld, ", statp->st_atim.tv_nsec); else printf(", "); printf("mtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_mtim.tv_sec); else { tm = localtime(&statp->st_mtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_mtim.tv_nsec != 0) printf(".%09ld, ", statp->st_mtim.tv_nsec); else printf(", "); printf("ctime="); if (!resolv) printf("%jd", (intmax_t)statp->st_ctim.tv_sec); else { tm = localtime(&statp->st_ctim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_ctim.tv_nsec != 0) printf(".%09ld, ", statp->st_ctim.tv_nsec); else printf(", "); printf("birthtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_birthtim.tv_sec); else { tm = localtime(&statp->st_birthtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_birthtim.tv_nsec != 0) printf(".%09ld, ", statp->st_birthtim.tv_nsec); else printf(", "); printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x", (uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize, (intmax_t)statp->st_blocks, statp->st_flags); printf(" }\n"); } +void +ktrbitset(char *name, struct bitset *set, size_t setlen) +{ + int i, maxi, c = 0; + + if (setlen > INT32_MAX) + setlen = INT32_MAX; + maxi = setlen * CHAR_BIT; + printf("%s [ ", name); + for (i = 0; i < maxi; i++) { + if (!BIT_ISSET(setlen, i, set)) + continue; + if (c == 0) + printf("%d", i); + else + printf(", %d", i); + c++; + } + if (c == 0) + printf(" empty ]\n"); + else + printf(" ]\n"); +} + void ktrstruct(char *buf, size_t buflen) { char *name, *data; size_t namelen, datalen; int i; cap_rights_t rights; struct itimerval it; struct stat sb; struct sockaddr_storage ss; + struct bitset *set; for (name = buf, namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; data = buf + namelen + 1; datalen = buflen - namelen - 1; if (datalen == 0) goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) - if (!isalpha(name[i])) + if (!isalpha(name[i]) && name[i] != '_') goto invalid; if (strcmp(name, "caprights") == 0) { if (datalen != sizeof(cap_rights_t)) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; memcpy(&it, data, datalen); ktritimerval(&it); } else if (strcmp(name, "stat") == 0) { if (datalen != sizeof(struct stat)) goto invalid; memcpy(&sb, data, datalen); ktrstat(&sb); } else if (strcmp(name, "sockaddr") == 0) { if (datalen > sizeof(ss)) goto invalid; memcpy(&ss, data, datalen); if (datalen != ss.ss_len) goto invalid; ktrsockaddr((struct sockaddr *)&ss); + } else if (strcmp(name, "cpuset_t") == 0) { + if (datalen < 1) + goto invalid; + set = malloc(datalen); + if (set == NULL) + errx(1, "%s", strerror(ENOMEM)); + memcpy(set, data, datalen); + ktrbitset(name, set, datalen); + free(set); } else { printf("unknown structure\n"); } return; invalid: printf("invalid record\n"); } void ktrcapfail(struct ktr_cap_fail *ktr) { switch (ktr->cap_type) { case CAPFAIL_NOTCAPABLE: /* operation on fd with insufficient capabilities */ printf("operation requires "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(", descriptor holds "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; case CAPFAIL_INCREASE: /* requested more capabilities than fd already has */ printf("attempt to increase capabilities from "); sysdecode_cap_rights(stdout, &ktr->cap_held); printf(" to "); sysdecode_cap_rights(stdout, &ktr->cap_needed); break; case CAPFAIL_SYSCALL: /* called restricted syscall */ printf("disallowed system call"); break; case CAPFAIL_LOOKUP: /* absolute or AT_FDCWD path, ".." path, etc. */ printf("restricted VFS lookup"); break; default: printf("unknown capability failure: "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(" "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; } printf("\n"); } void ktrfault(struct ktr_fault *ktr) { printf("0x%jx ", (uintmax_t)ktr->vaddr); print_mask_arg(sysdecode_vmprot, ktr->type); printf("\n"); } void ktrfaultend(struct ktr_faultend *ktr) { const char *str; str = sysdecode_vmresult(ktr->result); if (str != NULL) printf("%s", str); else printf("", ktr->result); printf("\n"); } void ktrkevent(struct kevent *kev) { printf("{ ident="); switch (kev->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: case EVFILT_EMPTY: printf("%ju", (uintmax_t)kev->ident); break; case EVFILT_SIGNAL: print_signal(kev->ident); break; default: printf("%p", (void *)kev->ident); } printf(", filter="); print_integer_arg(sysdecode_kevent_filter, kev->filter); printf(", flags="); print_mask_arg0(sysdecode_kevent_flags, kev->flags); printf(", fflags="); sysdecode_kevent_fflags(stdout, kev->filter, kev->fflags, decimal ? 10 : 16); printf(", data=%#jx, udata=%p }", (uintmax_t)kev->data, kev->udata); } void ktrstructarray(struct ktr_struct_array *ksa, size_t buflen) { struct kevent kev; char *name, *data; size_t namelen, datalen; int i; bool first; buflen -= sizeof(*ksa); for (name = (char *)(ksa + 1), namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalnum(name[i]) && name[i] != '_') goto invalid; data = name + namelen + 1; datalen = buflen - namelen - 1; printf("struct %s[] = { ", name); first = true; for (; datalen >= ksa->struct_size; data += ksa->struct_size, datalen -= ksa->struct_size) { if (!first) printf("\n "); else first = false; if (strcmp(name, "kevent") == 0) { if (ksa->struct_size != sizeof(kev)) goto bad_size; memcpy(&kev, data, sizeof(kev)); ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent") == 0) { struct freebsd11_kevent kev11; if (ksa->struct_size != sizeof(kev11)) goto bad_size; memcpy(&kev11, data, sizeof(kev11)); memset(&kev, 0, sizeof(kev)); kev.ident = kev11.ident; kev.filter = kev11.filter; kev.flags = kev11.flags; kev.fflags = kev11.fflags; kev.data = kev11.data; kev.udata = kev11.udata; ktrkevent(&kev); #ifdef _WANT_KEVENT32 } else if (strcmp(name, "kevent32") == 0) { struct kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; #if BYTE_ORDER == BIG_ENDIAN kev.data = kev32.data2 | ((int64_t)kev32.data1 << 32); #else kev.data = kev32.data1 | ((int64_t)kev32.data2 << 32); #endif kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent32") == 0) { struct freebsd11_kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; kev.data = kev32.data; kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); #endif } else { printf(" }\n"); return; } } printf(" }\n"); return; invalid: printf("invalid record\n"); return; bad_size: printf(" }\n"); return; } void usage(void) { fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] " "[-m maxdata] [-p pid] [-t trstr]\n"); exit(1); }