Index: head/share/man/man9/Makefile
===================================================================
--- head/share/man/man9/Makefile	(revision 355708)
+++ head/share/man/man9/Makefile	(revision 355709)
@@ -1,2333 +1,2333 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 MAN=	accept_filter.9 \
 	accf_data.9 \
 	accf_dns.9 \
 	accf_http.9 \
 	acl.9 \
 	alq.9 \
 	altq.9 \
 	atomic.9 \
 	bhnd.9 \
 	bhnd_erom.9 \
 	bios.9 \
 	bitset.9 \
 	boot.9 \
 	bpf.9 \
 	buf.9 \
 	buf_ring.9 \
 	BUF_ISLOCKED.9 \
 	BUF_LOCK.9 \
 	BUF_LOCKFREE.9 \
 	BUF_LOCKINIT.9 \
 	BUF_RECURSED.9 \
 	BUF_TIMELOCK.9 \
 	BUF_UNLOCK.9 \
 	bus_activate_resource.9 \
 	BUS_ADD_CHILD.9 \
 	bus_adjust_resource.9 \
 	bus_alloc_resource.9 \
 	BUS_BIND_INTR.9 \
 	bus_child_present.9 \
 	BUS_CHILD_DELETED.9 \
 	BUS_CHILD_DETACHED.9 \
 	BUS_CONFIG_INTR.9 \
 	BUS_DESCRIBE_INTR.9 \
 	bus_dma.9 \
 	bus_generic_attach.9 \
 	bus_generic_detach.9 \
 	bus_generic_new_pass.9 \
 	bus_generic_print_child.9 \
 	bus_generic_read_ivar.9 \
 	bus_generic_shutdown.9 \
 	BUS_GET_CPUS.9 \
 	bus_get_resource.9 \
 	bus_map_resource.9 \
 	BUS_NEW_PASS.9 \
 	BUS_PRINT_CHILD.9 \
 	BUS_READ_IVAR.9 \
 	BUS_RESCAN.9 \
 	bus_release_resource.9 \
 	bus_set_pass.9 \
 	bus_set_resource.9 \
 	BUS_SETUP_INTR.9 \
 	bus_space.9 \
 	byteorder.9 \
 	casuword.9 \
 	cd.9 \
 	cnv.9 \
 	condvar.9 \
 	config_intrhook.9 \
 	contigmalloc.9 \
 	copy.9 \
 	counter.9 \
 	cpuset.9 \
 	cr_cansee.9 \
 	critical_enter.9 \
 	cr_seeothergids.9 \
 	cr_seeotheruids.9 \
 	crypto.9 \
 	CTASSERT.9 \
 	DB_COMMAND.9 \
 	DECLARE_GEOM_CLASS.9 \
 	DECLARE_MODULE.9 \
 	DEFINE_IFUNC.9 \
 	DELAY.9 \
 	devclass.9 \
 	devclass_find.9 \
 	devclass_get_device.9 \
 	devclass_get_devices.9 \
 	devclass_get_drivers.9 \
 	devclass_get_maxunit.9 \
 	devclass_get_name.9 \
 	devclass_get_softc.9 \
 	dev_clone.9 \
 	devfs_set_cdevpriv.9 \
 	device.9 \
 	device_add_child.9 \
 	DEVICE_ATTACH.9 \
 	device_delete_child.9 \
 	device_delete_children.9 \
 	DEVICE_DETACH.9 \
 	device_enable.9 \
 	device_find_child.9 \
 	device_get_children.9 \
 	device_get_devclass.9 \
 	device_get_driver.9 \
 	device_get_ivars.9 \
 	device_get_name.9 \
 	device_get_parent.9 \
 	device_get_softc.9 \
 	device_get_state.9 \
 	device_get_sysctl.9 \
 	device_get_unit.9 \
 	DEVICE_IDENTIFY.9 \
 	device_printf.9 \
 	DEVICE_PROBE.9 \
 	device_probe_and_attach.9 \
 	device_quiet.9 \
 	device_set_desc.9 \
 	device_set_driver.9 \
 	device_set_flags.9 \
 	DEVICE_SHUTDOWN.9 \
 	DEV_MODULE.9 \
 	dev_refthread.9 \
 	devstat.9 \
 	devtoname.9 \
 	disk.9 \
 	dnv.9 \
 	domain.9 \
 	domainset.9 \
 	dpcpu.9 \
 	drbr.9 \
 	driver.9 \
 	DRIVER_MODULE.9 \
 	efirt.9 \
 	epoch.9 \
 	EVENTHANDLER.9 \
 	eventtimers.9 \
 	extattr.9 \
 	fail.9 \
 	fdt_pinctrl.9 \
 	fetch.9 \
 	firmware.9 \
 	fpu_kern.9 \
 	g_access.9 \
 	g_attach.9 \
 	g_bio.9 \
 	g_consumer.9 \
 	g_data.9 \
 	get_cyclecount.9 \
 	getenv.9 \
 	getnewvnode.9 \
 	g_event.9 \
 	g_geom.9 \
 	g_provider.9 \
 	g_provider_by_name.9 \
 	groupmember.9 \
 	g_wither_geom.9 \
 	hash.9 \
 	hashinit.9 \
 	hexdump.9 \
 	hhook.9 \
 	ieee80211.9 \
 	ieee80211_amrr.9 \
 	ieee80211_beacon.9 \
 	ieee80211_bmiss.9 \
 	ieee80211_crypto.9 \
 	ieee80211_ddb.9 \
 	ieee80211_input.9 \
 	ieee80211_node.9 \
 	ieee80211_output.9 \
 	ieee80211_proto.9 \
 	ieee80211_radiotap.9 \
 	ieee80211_regdomain.9 \
 	ieee80211_scan.9 \
 	ieee80211_vap.9 \
 	iflib.9 \
 	iflibdd.9 \
 	iflibdi.9 \
 	iflibtxrx.9 \
 	ifnet.9 \
 	inittodr.9 \
 	insmntque.9 \
 	intro.9 \
 	ithread.9 \
 	KASSERT.9 \
 	kern_testfrwk.9 \
 	kernacc.9 \
 	kernel_mount.9 \
 	khelp.9 \
 	kobj.9 \
 	kproc.9 \
 	kqueue.9 \
 	kthread.9 \
 	ktr.9 \
 	lock.9 \
 	locking.9 \
 	LOCK_PROFILING.9 \
 	mac.9 \
 	make_dev.9 \
 	malloc.9 \
 	mbchain.9 \
 	mbuf.9 \
 	mbuf_tags.9 \
 	MD5.9 \
 	mdchain.9 \
 	memcchr.9 \
 	memguard.9 \
 	microseq.9 \
 	microtime.9 \
 	microuptime.9 \
 	mi_switch.9 \
 	mod_cc.9 \
 	module.9 \
 	MODULE_DEPEND.9 \
 	MODULE_PNP_INFO.9 \
 	MODULE_VERSION.9 \
 	mtx_pool.9 \
 	mutex.9 \
 	namei.9 \
 	netisr.9 \
 	nv.9 \
 	OF_child.9 \
 	OF_device_from_xref.9 \
 	OF_finddevice.9 \
 	OF_getprop.9 \
 	OF_node_from_xref.9 \
 	OF_package_to_path.9 \
 	ofw_bus_is_compatible.9 \
 	ofw_bus_status_okay.9 \
 	osd.9 \
 	owll.9 \
 	own.9 \
 	panic.9 \
 	PCBGROUP.9 \
 	p_candebug.9 \
 	p_cansee.9 \
 	pci.9 \
 	PCI_IOV_ADD_VF.9 \
 	PCI_IOV_INIT.9 \
 	pci_iov_schema.9 \
 	PCI_IOV_UNINIT.9 \
 	pfil.9 \
 	pfind.9 \
 	pget.9 \
 	pgfind.9 \
 	PHOLD.9 \
 	physio.9 \
 	pmap.9 \
 	pmap_activate.9 \
 	pmap_clear_modify.9 \
 	pmap_copy.9 \
 	pmap_enter.9 \
 	pmap_extract.9 \
 	pmap_growkernel.9 \
 	pmap_init.9 \
 	pmap_is_modified.9 \
 	pmap_is_prefaultable.9 \
 	pmap_map.9 \
 	pmap_mincore.9 \
 	pmap_object_init_pt.9 \
 	pmap_page_exists_quick.9 \
 	pmap_page_init.9 \
 	pmap_pinit.9 \
 	pmap_protect.9 \
 	pmap_qenter.9 \
 	pmap_quick_enter_page.9 \
 	pmap_release.9 \
 	pmap_remove.9 \
 	pmap_resident_count.9 \
 	pmap_unwire.9 \
 	pmap_zero_page.9 \
 	printf.9 \
 	prison_check.9 \
 	priv.9 \
 	proc_rwmem.9 \
 	pseudofs.9 \
 	psignal.9 \
 	pwmbus.9 \
 	random.9 \
 	random_harvest.9 \
 	ratecheck.9 \
 	redzone.9 \
 	refcount.9 \
 	resettodr.9 \
 	resource_int_value.9 \
 	rijndael.9 \
 	rman.9 \
 	rmlock.9 \
 	rtalloc.9 \
 	rtentry.9 \
 	runqueue.9 \
 	rwlock.9 \
 	sbuf.9 \
 	scheduler.9 \
 	SDT.9 \
 	securelevel_gt.9 \
 	selrecord.9 \
 	sema.9 \
 	seqc.9 \
 	sf_buf.9 \
 	sglist.9 \
 	shm_map.9 \
 	signal.9 \
 	sleep.9 \
 	sleepqueue.9 \
 	socket.9 \
 	stack.9 \
 	store.9 \
 	style.9 \
 	style.lua.9 \
 	${_superio.9} \
 	swi.9 \
 	sx.9 \
 	syscall_helper_register.9 \
 	SYSCALL_MODULE.9 \
 	sysctl.9 \
 	sysctl_add_oid.9 \
 	sysctl_ctx_init.9 \
 	SYSINIT.9 \
 	taskqueue.9 \
 	tcp_functions.9 \
 	thread_exit.9 \
 	time.9 \
 	timeout.9 \
 	tvtohz.9 \
 	ucred.9 \
 	uidinfo.9 \
 	uio.9 \
 	unr.9 \
 	vaccess.9 \
 	vaccess_acl_nfs4.9 \
 	vaccess_acl_posix1e.9 \
 	vcount.9 \
 	vflush.9 \
 	VFS.9 \
 	vfs_busy.9 \
 	VFS_CHECKEXP.9 \
 	vfsconf.9 \
 	VFS_FHTOVP.9 \
 	vfs_getnewfsid.9 \
 	vfs_getopt.9 \
 	vfs_getvfs.9 \
 	VFS_MOUNT.9 \
 	vfs_mountedfrom.9 \
 	VFS_QUOTACTL.9 \
 	VFS_ROOT.9 \
 	vfs_rootmountalloc.9 \
 	VFS_SET.9 \
 	VFS_STATFS.9 \
 	vfs_suser.9 \
 	VFS_SYNC.9 \
 	vfs_timestamp.9 \
 	vfs_unbusy.9 \
 	VFS_UNMOUNT.9 \
 	vfs_unmountall.9 \
 	VFS_VGET.9 \
 	vget.9 \
 	vgone.9 \
 	vhold.9 \
 	vinvalbuf.9 \
 	vm_fault_prefault.9 \
 	vm_map.9 \
 	vm_map_check_protection.9 \
 	vm_map_create.9 \
 	vm_map_delete.9 \
 	vm_map_entry_resize_free.9 \
 	vm_map_find.9 \
 	vm_map_findspace.9 \
 	vm_map_inherit.9 \
 	vm_map_init.9 \
 	vm_map_insert.9 \
 	vm_map_lock.9 \
 	vm_map_lookup.9 \
 	vm_map_madvise.9 \
 	vm_map_max.9 \
 	vm_map_protect.9 \
 	vm_map_remove.9 \
 	vm_map_simplify_entry.9 \
 	vm_map_stack.9 \
 	vm_map_submap.9 \
 	vm_map_sync.9 \
 	vm_map_wire.9 \
 	vm_page_alloc.9 \
 	vm_page_bits.9 \
 	vm_page_busy.9 \
 	vm_page_deactivate.9 \
 	vm_page_dontneed.9 \
 	vm_page_aflag.9 \
 	vm_page_free.9 \
 	vm_page_grab.9 \
 	vm_page_insert.9 \
 	vm_page_lookup.9 \
 	vm_page_rename.9 \
 	vm_page_wire.9 \
 	vm_set_page_size.9 \
 	vmem.9 \
 	vn_fullpath.9 \
 	vn_isdisk.9 \
 	vnet.9 \
 	vnode.9 \
 	VOP_ACCESS.9 \
 	VOP_ACLCHECK.9 \
 	VOP_ADVISE.9 \
 	VOP_ADVLOCK.9 \
 	VOP_ALLOCATE.9 \
 	VOP_ATTRIB.9 \
 	VOP_BMAP.9 \
 	VOP_BWRITE.9 \
 	VOP_COPY_FILE_RANGE.9 \
 	VOP_CREATE.9 \
 	VOP_FSYNC.9 \
 	VOP_GETACL.9 \
 	VOP_GETEXTATTR.9 \
 	VOP_GETPAGES.9 \
 	VOP_INACTIVE.9 \
 	VOP_IOCTL.9 \
 	VOP_LINK.9 \
 	VOP_LISTEXTATTR.9 \
 	VOP_LOCK.9 \
 	VOP_LOOKUP.9 \
 	VOP_OPENCLOSE.9 \
 	VOP_PATHCONF.9 \
 	VOP_PRINT.9 \
 	VOP_RDWR.9 \
 	VOP_READDIR.9 \
 	VOP_READLINK.9 \
 	VOP_REALLOCBLKS.9 \
 	VOP_REMOVE.9 \
 	VOP_RENAME.9 \
 	VOP_REVOKE.9 \
 	VOP_SETACL.9 \
 	VOP_SETEXTATTR.9 \
 	VOP_STRATEGY.9 \
 	VOP_VPTOCNP.9 \
 	VOP_VPTOFH.9 \
 	vref.9 \
 	vrefcnt.9 \
 	vrele.9 \
 	vslock.9 \
 	watchdog.9 \
 	zone.9
 
 MLINKS=	unr.9 alloc_unr.9 \
 	unr.9 alloc_unrl.9 \
 	unr.9 alloc_unr_specific.9 \
 	unr.9 clear_unrhdr.9 \
 	unr.9 delete_unrhdr.9 \
 	unr.9 free_unr.9 \
 	unr.9 new_unrhdr.9
 MLINKS+=accept_filter.9 accept_filt_add.9 \
 	accept_filter.9 accept_filt_del.9 \
 	accept_filter.9 accept_filt_generic_mod_event.9 \
 	accept_filter.9 accept_filt_get.9
 MLINKS+=alq.9 ALQ.9 \
 	alq.9 alq_close.9 \
 	alq.9 alq_flush.9 \
 	alq.9 alq_get.9 \
 	alq.9 alq_getn.9 \
 	alq.9 alq_open.9 \
 	alq.9 alq_open_flags.9 \
 	alq.9 alq_post.9 \
 	alq.9 alq_post_flags.9 \
 	alq.9 alq_write.9 \
 	alq.9 alq_writen.9
 MLINKS+=altq.9 ALTQ.9
 MLINKS+=atomic.9 atomic_add.9 \
 	atomic.9 atomic_clear.9 \
 	atomic.9 atomic_cmpset.9 \
 	atomic.9 atomic_fcmpset.9 \
 	atomic.9 atomic_fetchadd.9 \
 	atomic.9 atomic_load.9 \
 	atomic.9 atomic_readandclear.9 \
 	atomic.9 atomic_set.9 \
 	atomic.9 atomic_store.9 \
 	atomic.9 atomic_subtract.9 \
 	atomic.9 atomic_swap.9 \
 	atomic.9 atomic_testandclear.9 \
 	atomic.9 atomic_testandset.9 \
 	atomic.9 atomic_thread_fence.9
 MLINKS+=bhnd.9 BHND_MATCH_BOARD_TYPE.9 \
 	bhnd.9 BHND_MATCH_BOARD_VENDOR.9 \
 	bhnd.9 BHND_MATCH_CHIP_ID.9 \
 	bhnd.9 BHND_MATCH_CHIP_PKG.9 \
 	bhnd.9 BHND_MATCH_CHIP_REV.9 \
 	bhnd.9 BHND_MATCH_CORE_ID.9 \
 	bhnd.9 BHND_MATCH_CORE_VENDOR.9 \
 	bhnd.9 bhnd_activate_resource.9 \
 	bhnd.9 bhnd_alloc_pmu.9 \
 	bhnd.9 bhnd_alloc_resource.9 \
 	bhnd.9 bhnd_alloc_resource_any.9 \
 	bhnd.9 bhnd_alloc_resources.9 \
 	bhnd.9 bhnd_board_matches.9 \
 	bhnd.9 bhnd_bus_match_child.9 \
 	bhnd.9 bhnd_bus_read_1.9 \
 	bhnd.9 bhnd_bus_read_2.9 \
 	bhnd.9 bhnd_bus_read_4.9 \
 	bhnd.9 bhnd_bus_read_stream_1.9 \
 	bhnd.9 bhnd_bus_read_stream_2.9 \
 	bhnd.9 bhnd_bus_read_stream_4.9 \
 	bhnd.9 bhnd_bus_write_1.9 \
 	bhnd.9 bhnd_bus_write_2.9 \
 	bhnd.9 bhnd_bus_write_4.9 \
 	bhnd.9 bhnd_bus_write_stream_1.9 \
 	bhnd.9 bhnd_bus_write_stream_2.9 \
 	bhnd.9 bhnd_bus_write_stream_4.9 \
 	bhnd.9 bhnd_chip_matches.9 \
 	bhnd.9 bhnd_core_class.9 \
 	bhnd.9 bhnd_core_get_match_desc.9 \
 	bhnd.9 bhnd_core_matches.9 \
 	bhnd.9 bhnd_core_name.9 \
 	bhnd.9 bhnd_cores_equal.9 \
 	bhnd.9 bhnd_deactivate_resource.9 \
 	bhnd.9 bhnd_decode_port_rid.9 \
 	bhnd.9 bhnd_deregister_provider.9 \
 	bhnd.9 bhnd_device_lookup.9 \
 	bhnd.9 bhnd_device_matches.9 \
 	bhnd.9 bhnd_device_quirks.9 \
 	bhnd.9 bhnd_driver_get_erom_class.9 \
 	bhnd.9 bhnd_enable_clocks.9 \
 	bhnd.9 bhnd_find_core_class.9 \
 	bhnd.9 bhnd_find_core_name.9 \
 	bhnd.9 bhnd_format_chip_id.9 \
 	bhnd.9 bhnd_get_attach_type.9 \
 	bhnd.9 bhnd_get_chipid.9 \
 	bhnd.9 bhnd_get_class.9 \
 	bhnd.9 bhnd_get_clock_freq.9 \
 	bhnd.9 bhnd_get_clock_latency.9 \
 	bhnd.9 bhnd_get_core_index.9 \
 	bhnd.9 bhnd_get_core_info.9 \
 	bhnd.9 bhnd_get_core_unit.9 \
 	bhnd.9 bhnd_get_device.9 \
 	bhnd.9 bhnd_get_device_name.9 \
 	bhnd.9 bhnd_get_dma_translation.9 \
 	bhnd.9 bhnd_get_hwrev.9 \
 	bhnd.9 bhnd_get_intr_count.9 \
 	bhnd.9 bhnd_get_intr_ivec.9 \
 	bhnd.9 bhnd_get_port_count.9 \
 	bhnd.9 bhnd_get_port_rid.9 \
 	bhnd.9 bhnd_get_region_addr.9 \
 	bhnd.9 bhnd_get_region_count.9 \
 	bhnd.9 bhnd_get_vendor.9 \
 	bhnd.9 bhnd_get_vendor_name.9 \
 	bhnd.9 bhnd_hwrev_matches.9 \
 	bhnd.9 bhnd_is_hw_suspended.9 \
 	bhnd.9 bhnd_is_region_valid.9 \
 	bhnd.9 bhnd_map_intr.9 \
 	bhnd.9 bhnd_match_core.9 \
 	bhnd.9 bhnd_nvram_getvar.9 \
 	bhnd.9 bhnd_nvram_getvar_array.9 \
 	bhnd.9 bhnd_nvram_getvar_int.9 \
 	bhnd.9 bhnd_nvram_getvar_int16.9 \
 	bhnd.9 bhnd_nvram_getvar_int32.9 \
 	bhnd.9 bhnd_nvram_getvar_int8.9 \
 	bhnd.9 bhnd_nvram_getvar_str.9 \
 	bhnd.9 bhnd_nvram_getvar_uint.9 \
 	bhnd.9 bhnd_nvram_getvar_uint16.9 \
 	bhnd.9 bhnd_nvram_getvar_uint32.9 \
 	bhnd.9 bhnd_nvram_getvar_uint8.9 \
 	bhnd.9 bhnd_nvram_string_array_next.9 \
 	bhnd.9 bhnd_read_board_info.9 \
 	bhnd.9 bhnd_read_config.9 \
 	bhnd.9 bhnd_read_ioctl.9 \
 	bhnd.9 bhnd_read_iost.9 \
 	bhnd.9 bhnd_register_provider.9 \
 	bhnd.9 bhnd_release_ext_rsrc.9 \
 	bhnd.9 bhnd_release_pmu.9 \
 	bhnd.9 bhnd_release_provider.9 \
 	bhnd.9 bhnd_release_resource.9 \
 	bhnd.9 bhnd_release_resources.9 \
 	bhnd.9 bhnd_request_clock.9 \
 	bhnd.9 bhnd_request_ext_rsrc.9 \
 	bhnd.9 bhnd_reset_hw.9 \
 	bhnd.9 bhnd_retain_provider.9 \
 	bhnd.9 bhnd_set_custom_core_desc.9 \
 	bhnd.9 bhnd_set_default_core_desc.9 \
 	bhnd.9 bhnd_suspend_hw.9 \
 	bhnd.9 bhnd_unmap_intr.9 \
 	bhnd.9 bhnd_vendor_name.9 \
 	bhnd.9 bhnd_write_config.9 \
 	bhnd.9 bhnd_write_ioctl.9
 MLINKS+=bhnd_erom.9 bhnd_erom_alloc.9 \
 	bhnd_erom.9 bhnd_erom_dump.9 \
 	bhnd_erom.9 bhnd_erom_fini_static.9 \
 	bhnd_erom.9 bhnd_erom_free.9 \
 	bhnd_erom.9 bhnd_erom_free_core_table.9 \
 	bhnd_erom.9 bhnd_erom_get_core_table.9 \
 	bhnd_erom.9 bhnd_erom_init_static.9 \
 	bhnd_erom.9 bhnd_erom_io.9 \
 	bhnd_erom.9 bhnd_erom_io_fini.9 \
 	bhnd_erom.9 bhnd_erom_io_map.9 \
 	bhnd_erom.9 bhnd_erom_io_read.9 \
 	bhnd_erom.9 bhnd_erom_iobus_init.9 \
 	bhnd_erom.9 bhnd_erom_iores_new.9 \
 	bhnd_erom.9 bhnd_erom_lookup_core.9 \
 	bhnd_erom.9 bhnd_erom_lookup_core_addr.9 \
 	bhnd_erom.9 bhnd_erom_probe.9 \
 	bhnd_erom.9 bhnd_erom_probe_driver_classes.9
 MLINKS+=bitset.9 BITSET_DEFINE.9 \
 	bitset.9 BITSET_T_INITIALIZER.9 \
 	bitset.9 BITSET_FSET.9 \
 	bitset.9 BIT_CLR.9 \
 	bitset.9 BIT_COPY.9 \
 	bitset.9 BIT_ISSET.9 \
 	bitset.9 BIT_SET.9 \
 	bitset.9 BIT_ZERO.9 \
 	bitset.9 BIT_FILL.9 \
 	bitset.9 BIT_SETOF.9 \
 	bitset.9 BIT_EMPTY.9 \
 	bitset.9 BIT_ISFULLSET.9 \
 	bitset.9 BIT_FFS.9 \
 	bitset.9 BIT_COUNT.9 \
 	bitset.9 BIT_SUBSET.9 \
 	bitset.9 BIT_OVERLAP.9 \
 	bitset.9 BIT_CMP.9 \
 	bitset.9 BIT_OR.9 \
 	bitset.9 BIT_AND.9 \
-	bitset.9 BIT_NAND.9 \
+	bitset.9 BIT_ANDNOT.9 \
 	bitset.9 BIT_CLR_ATOMIC.9 \
 	bitset.9 BIT_SET_ATOMIC.9 \
 	bitset.9 BIT_SET_ATOMIC_ACQ.9 \
 	bitset.9 BIT_AND_ATOMIC.9 \
 	bitset.9 BIT_OR_ATOMIC.9 \
 	bitset.9 BIT_COPY_STORE_REL.9
 MLINKS+=bpf.9 bpfattach.9 \
 	bpf.9 bpfattach2.9 \
 	bpf.9 bpfdetach.9 \
 	bpf.9 bpf_filter.9 \
 	bpf.9 bpf_mtap.9 \
 	bpf.9 bpf_mtap2.9 \
 	bpf.9 bpf_tap.9 \
 	bpf.9 bpf_validate.9
 MLINKS+=buf.9 bp.9
 MLINKS+=buf_ring.9 buf_ring_alloc.9 \
 	buf_ring.9 buf_ring_free.9 \
 	buf_ring.9 buf_ring_enqueue.9 \
 	buf_ring.9 buf_ring_enqueue_bytes.9 \
 	buf_ring.9 buf_ring_dequeue_mc.9 \
 	buf_ring.9 buf_ring_dequeue_sc.9 \
 	buf_ring.9 buf_ring_count.9 \
 	buf_ring.9 buf_ring_empty.9 \
 	buf_ring.9 buf_ring_full.9 \
 	buf_ring.9 buf_ring_peek.9
 MLINKS+=bus_activate_resource.9 bus_deactivate_resource.9
 MLINKS+=bus_alloc_resource.9 bus_alloc_resource_any.9
 MLINKS+=BUS_BIND_INTR.9 bus_bind_intr.9
 MLINKS+=BUS_DESCRIBE_INTR.9 bus_describe_intr.9
 MLINKS+=bus_dma.9 busdma.9 \
 	bus_dma.9 bus_dmamap_create.9 \
 	bus_dma.9 bus_dmamap_destroy.9 \
 	bus_dma.9 bus_dmamap_load.9 \
 	bus_dma.9 bus_dmamap_load_bio.9 \
 	bus_dma.9 bus_dmamap_load_ccb.9 \
 	bus_dma.9 bus_dmamap_load_mbuf.9 \
 	bus_dma.9 bus_dmamap_load_mbuf_sg.9 \
 	bus_dma.9 bus_dmamap_load_uio.9 \
 	bus_dma.9 bus_dmamap_sync.9 \
 	bus_dma.9 bus_dmamap_unload.9 \
 	bus_dma.9 bus_dmamem_alloc.9 \
 	bus_dma.9 bus_dmamem_free.9 \
 	bus_dma.9 bus_dma_tag_create.9 \
 	bus_dma.9 bus_dma_tag_destroy.9
 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9
 MLINKS+=BUS_GET_CPUS.9 bus_get_cpus.9
 MLINKS+=bus_map_resource.9 bus_unmap_resource.9 \
 	bus_map_resource.9 resource_init_map_request.9
 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9
 MLINKS+=BUS_SETUP_INTR.9 bus_setup_intr.9 \
 	BUS_SETUP_INTR.9 BUS_TEARDOWN_INTR.9 \
 	BUS_SETUP_INTR.9 bus_teardown_intr.9
 MLINKS+=bus_space.9 bus_space_alloc.9 \
 	bus_space.9 bus_space_barrier.9 \
 	bus_space.9 bus_space_copy_region_1.9 \
 	bus_space.9 bus_space_copy_region_2.9 \
 	bus_space.9 bus_space_copy_region_4.9 \
 	bus_space.9 bus_space_copy_region_8.9 \
 	bus_space.9 bus_space_copy_region_stream_1.9 \
 	bus_space.9 bus_space_copy_region_stream_2.9 \
 	bus_space.9 bus_space_copy_region_stream_4.9 \
 	bus_space.9 bus_space_copy_region_stream_8.9 \
 	bus_space.9 bus_space_free.9 \
 	bus_space.9 bus_space_map.9 \
 	bus_space.9 bus_space_read_1.9 \
 	bus_space.9 bus_space_read_2.9 \
 	bus_space.9 bus_space_read_4.9 \
 	bus_space.9 bus_space_read_8.9 \
 	bus_space.9 bus_space_read_multi_1.9 \
 	bus_space.9 bus_space_read_multi_2.9 \
 	bus_space.9 bus_space_read_multi_4.9 \
 	bus_space.9 bus_space_read_multi_8.9 \
 	bus_space.9 bus_space_read_multi_stream_1.9 \
 	bus_space.9 bus_space_read_multi_stream_2.9 \
 	bus_space.9 bus_space_read_multi_stream_4.9 \
 	bus_space.9 bus_space_read_multi_stream_8.9 \
 	bus_space.9 bus_space_read_region_1.9 \
 	bus_space.9 bus_space_read_region_2.9 \
 	bus_space.9 bus_space_read_region_4.9 \
 	bus_space.9 bus_space_read_region_8.9 \
 	bus_space.9 bus_space_read_region_stream_1.9 \
 	bus_space.9 bus_space_read_region_stream_2.9 \
 	bus_space.9 bus_space_read_region_stream_4.9 \
 	bus_space.9 bus_space_read_region_stream_8.9 \
 	bus_space.9 bus_space_read_stream_1.9 \
 	bus_space.9 bus_space_read_stream_2.9 \
 	bus_space.9 bus_space_read_stream_4.9 \
 	bus_space.9 bus_space_read_stream_8.9 \
 	bus_space.9 bus_space_set_multi_1.9 \
 	bus_space.9 bus_space_set_multi_2.9 \
 	bus_space.9 bus_space_set_multi_4.9 \
 	bus_space.9 bus_space_set_multi_8.9 \
 	bus_space.9 bus_space_set_multi_stream_1.9 \
 	bus_space.9 bus_space_set_multi_stream_2.9 \
 	bus_space.9 bus_space_set_multi_stream_4.9 \
 	bus_space.9 bus_space_set_multi_stream_8.9 \
 	bus_space.9 bus_space_set_region_1.9 \
 	bus_space.9 bus_space_set_region_2.9 \
 	bus_space.9 bus_space_set_region_4.9 \
 	bus_space.9 bus_space_set_region_8.9 \
 	bus_space.9 bus_space_set_region_stream_1.9 \
 	bus_space.9 bus_space_set_region_stream_2.9 \
 	bus_space.9 bus_space_set_region_stream_4.9 \
 	bus_space.9 bus_space_set_region_stream_8.9 \
 	bus_space.9 bus_space_subregion.9 \
 	bus_space.9 bus_space_unmap.9 \
 	bus_space.9 bus_space_write_1.9 \
 	bus_space.9 bus_space_write_2.9 \
 	bus_space.9 bus_space_write_4.9 \
 	bus_space.9 bus_space_write_8.9 \
 	bus_space.9 bus_space_write_multi_1.9 \
 	bus_space.9 bus_space_write_multi_2.9 \
 	bus_space.9 bus_space_write_multi_4.9 \
 	bus_space.9 bus_space_write_multi_8.9 \
 	bus_space.9 bus_space_write_multi_stream_1.9 \
 	bus_space.9 bus_space_write_multi_stream_2.9 \
 	bus_space.9 bus_space_write_multi_stream_4.9 \
 	bus_space.9 bus_space_write_multi_stream_8.9 \
 	bus_space.9 bus_space_write_region_1.9 \
 	bus_space.9 bus_space_write_region_2.9 \
 	bus_space.9 bus_space_write_region_4.9 \
 	bus_space.9 bus_space_write_region_8.9 \
 	bus_space.9 bus_space_write_region_stream_1.9 \
 	bus_space.9 bus_space_write_region_stream_2.9 \
 	bus_space.9 bus_space_write_region_stream_4.9 \
 	bus_space.9 bus_space_write_region_stream_8.9 \
 	bus_space.9 bus_space_write_stream_1.9 \
 	bus_space.9 bus_space_write_stream_2.9 \
 	bus_space.9 bus_space_write_stream_4.9 \
 	bus_space.9 bus_space_write_stream_8.9
 MLINKS+=byteorder.9 be16dec.9 \
 	byteorder.9 be16enc.9 \
 	byteorder.9 be16toh.9 \
 	byteorder.9 be32dec.9 \
 	byteorder.9 be32enc.9 \
 	byteorder.9 be32toh.9 \
 	byteorder.9 be64dec.9 \
 	byteorder.9 be64enc.9 \
 	byteorder.9 be64toh.9 \
 	byteorder.9 bswap16.9 \
 	byteorder.9 bswap32.9 \
 	byteorder.9 bswap64.9 \
 	byteorder.9 htobe16.9 \
 	byteorder.9 htobe32.9 \
 	byteorder.9 htobe64.9 \
 	byteorder.9 htole16.9 \
 	byteorder.9 htole32.9 \
 	byteorder.9 htole64.9 \
 	byteorder.9 le16dec.9 \
 	byteorder.9 le16enc.9 \
 	byteorder.9 le16toh.9 \
 	byteorder.9 le32dec.9 \
 	byteorder.9 le32enc.9 \
 	byteorder.9 le32toh.9 \
 	byteorder.9 le64dec.9 \
 	byteorder.9 le64enc.9 \
 	byteorder.9 le64toh.9
 MLINKS+=cnv.9 cnvlist.9 \
 	cnv.9 cnvlist_free_binary.9 \
 	cnv.9 cnvlist_free_bool.9 \
 	cnv.9 cnvlist_free_bool_array.9 \
 	cnv.9 cnvlist_free_descriptor.9 \
 	cnv.9 cnvlist_free_descriptor_array.9 \
 	cnv.9 cnvlist_free_null.9 \
 	cnv.9 cnvlist_free_number.9 \
 	cnv.9 cnvlist_free_number_array.9 \
 	cnv.9 cnvlist_free_nvlist.9 \
 	cnv.9 cnvlist_free_nvlist_array.9 \
 	cnv.9 cnvlist_free_string.9 \
 	cnv.9 cnvlist_free_string_array.9 \
 	cnv.9 cnvlist_get_binary.9 \
 	cnv.9 cnvlist_get_bool.9 \
 	cnv.9 cnvlist_get_bool_array.9 \
 	cnv.9 cnvlist_get_descriptor.9 \
 	cnv.9 cnvlist_get_descriptor_array.9 \
 	cnv.9 cnvlist_get_number.9 \
 	cnv.9 cnvlist_get_number_array.9 \
 	cnv.9 cnvlist_get_nvlist.9 \
 	cnv.9 cnvlist_get_nvlist_array.9 \
 	cnv.9 cnvlist_get_string.9 \
 	cnv.9 cnvlist_get_string_array.9 \
 	cnv.9 cnvlist_take_binary.9 \
 	cnv.9 cnvlist_take_bool.9 \
 	cnv.9 cnvlist_take_bool_array.9 \
 	cnv.9 cnvlist_take_descriptor.9 \
 	cnv.9 cnvlist_take_descriptor_array.9 \
 	cnv.9 cnvlist_take_number.9 \
 	cnv.9 cnvlist_take_number_array.9 \
 	cnv.9 cnvlist_take_nvlist.9 \
 	cnv.9 cnvlist_take_nvlist_array.9 \
 	cnv.9 cnvlist_take_string.9 \
 	cnv.9 cnvlist_take_string_array.9
 MLINKS+=condvar.9 cv_broadcast.9 \
 	condvar.9 cv_broadcastpri.9 \
 	condvar.9 cv_destroy.9 \
 	condvar.9 cv_init.9 \
 	condvar.9 cv_signal.9 \
 	condvar.9 cv_timedwait.9 \
 	condvar.9 cv_timedwait_sig.9 \
 	condvar.9 cv_timedwait_sig_sbt.9 \
 	condvar.9 cv_wait.9 \
 	condvar.9 cv_wait_sig.9 \
 	condvar.9 cv_wait_unlock.9 \
 	condvar.9 cv_wmesg.9
 MLINKS+=config_intrhook.9 config_intrhook_disestablish.9 \
 	config_intrhook.9 config_intrhook_establish.9 \
 	config_intrhook.9 config_intrhook_oneshot.9
 MLINKS+=contigmalloc.9 contigmalloc_domainset.9 \
 	contigmalloc.9 contigfree.9
 MLINKS+=casuword.9 casueword.9 \
 	casuword.9 casueword32.9 \
 	casuword.9 casuword32.9
 MLINKS+=copy.9 copyin.9 \
 	copy.9 copyin_nofault.9 \
 	copy.9 copyinstr.9 \
 	copy.9 copyout.9 \
 	copy.9 copyout_nofault.9 \
 	copy.9 copystr.9
 MLINKS+=counter.9 counter_u64_alloc.9 \
 	counter.9 counter_u64_free.9 \
 	counter.9 counter_u64_add.9 \
 	counter.9 counter_enter.9 \
 	counter.9 counter_exit.9 \
 	counter.9 counter_u64_add_protected.9 \
 	counter.9 counter_u64_fetch.9 \
 	counter.9 counter_u64_zero.9 \
 	counter.9 SYSCTL_COUNTER_U64.9 \
 	counter.9 SYSCTL_ADD_COUNTER_U64.9 \
 	counter.9 SYSCTL_COUNTER_U64_ARRAY.9 \
 	counter.9 SYSCTL_ADD_COUNTER_U64_ARRAY.9
 MLINKS+=cpuset.9 CPUSET_T_INITIALIZER.9 \
 	cpuset.9 CPUSET_FSET.9 \
 	cpuset.9 CPU_CLR.9 \
 	cpuset.9 CPU_COPY.9 \
 	cpuset.9 CPU_ISSET.9 \
 	cpuset.9 CPU_SET.9 \
 	cpuset.9 CPU_ZERO.9 \
 	cpuset.9 CPU_FILL.9 \
 	cpuset.9 CPU_SETOF.9 \
 	cpuset.9 CPU_EMPTY.9 \
 	cpuset.9 CPU_ISFULLSET.9 \
 	cpuset.9 CPU_FFS.9 \
 	cpuset.9 CPU_COUNT.9 \
 	cpuset.9 CPU_SUBSET.9 \
 	cpuset.9 CPU_OVERLAP.9 \
 	cpuset.9 CPU_CMP.9 \
 	cpuset.9 CPU_OR.9 \
 	cpuset.9 CPU_AND.9 \
-	cpuset.9 CPU_NAND.9 \
+	cpuset.9 CPU_ANDNOT.9 \
 	cpuset.9 CPU_CLR_ATOMIC.9 \
 	cpuset.9 CPU_SET_ATOMIC.9 \
 	cpuset.9 CPU_SET_ATOMIC_ACQ.9 \
 	cpuset.9 CPU_AND_ATOMIC.9 \
 	cpuset.9 CPU_OR_ATOMIC.9 \
 	cpuset.9 CPU_COPY_STORE_REL.9
 MLINKS+=critical_enter.9 critical.9 \
 	critical_enter.9 critical_exit.9
 MLINKS+=crypto.9 crypto_dispatch.9 \
 	crypto.9 crypto_done.9 \
 	crypto.9 crypto_freereq.9 \
 	crypto.9 crypto_freesession.9 \
 	crypto.9 crypto_get_driverid.9 \
 	crypto.9 crypto_getreq.9 \
 	crypto.9 crypto_kdispatch.9 \
 	crypto.9 crypto_kdone.9 \
 	crypto.9 crypto_kregister.9 \
 	crypto.9 crypto_newsession.9 \
 	crypto.9 crypto_register.9 \
 	crypto.9 crypto_unblock.9 \
 	crypto.9 crypto_unregister.9 \
 	crypto.9 crypto_unregister_all.9
 MLINKS+=DB_COMMAND.9 DB_SHOW_ALL_COMMAND.9 \
 	DB_COMMAND.9 DB_SHOW_COMMAND.9
 MLINKS+=DECLARE_MODULE.9 DECLARE_MODULE_TIED.9
 MLINKS+=dev_clone.9 drain_dev_clone_events.9
 MLINKS+=dev_refthread.9 devvn_refthread.9 \
 	dev_refthread.9 dev_relthread.9
 MLINKS+=devfs_set_cdevpriv.9 devfs_clear_cdevpriv.9 \
 	devfs_set_cdevpriv.9 devfs_get_cdevpriv.9
 MLINKS+=device_add_child.9 device_add_child_ordered.9
 MLINKS+=device_enable.9 device_disable.9 \
 	device_enable.9 device_is_enabled.9
 MLINKS+=device_get_ivars.9 device_set_ivars.9
 MLINKS+=device_get_name.9 device_get_nameunit.9
 MLINKS+=device_get_state.9 device_busy.9 \
 	device_get_state.9 device_is_alive.9 \
 	device_get_state.9 device_is_attached.9 \
 	device_get_state.9 device_unbusy.9
 MLINKS+=device_get_sysctl.9 device_get_sysctl_ctx.9 \
 	device_get_sysctl.9 device_get_sysctl_tree.9
 MLINKS+=device_quiet.9 device_is_quiet.9 \
 	device_quiet.9 device_verbose.9
 MLINKS+=device_set_desc.9 device_get_desc.9 \
 	device_set_desc.9 device_set_desc_copy.9
 MLINKS+=device_set_flags.9 device_get_flags.9
 MLINKS+=devstat.9 devicestat.9 \
 	devstat.9 devstat_add_entry.9 \
 	devstat.9 devstat_end_transaction.9 \
 	devstat.9 devstat_remove_entry.9 \
 	devstat.9 devstat_start_transaction.9
 MLINKS+=disk.9 disk_add_alias.9 \
 	disk.9 disk_alloc.9 \
 	disk.9 disk_create.9 \
 	disk.9 disk_destroy.9 \
 	disk.9 disk_gone.9 \
 	disk.9 disk_resize.9
 MLINKS+=dnv.9 dnvlist.9 \
 	dnv.9 dnvlist_get_binary.9 \
 	dnv.9 dnvlist_get_bool.9 \
 	dnv.9 dnvlist_get_descriptor.9 \
 	dnv.9 dnvlist_get_number.9 \
 	dnv.9 dnvlist_get_nvlist.9 \
 	dnv.9 dnvlist_get_string.9 \
 	dnv.9 dnvlist_take_binary.9 \
 	dnv.9 dnvlist_take_bool.9 \
 	dnv.9 dnvlist_take_descriptor.9 \
 	dnv.9 dnvlist_take_number.9 \
 	dnv.9 dnvlist_take_nvlist.9 \
 	dnv.9 dnvlist_take_string.9
 MLINKS+=domain.9 DOMAIN_SET.9 \
 	domain.9 domain_add.9 \
 	domain.9 pfctlinput.9 \
 	domain.9 pfctlinput2.9 \
 	domain.9 pffinddomain.9 \
 	domain.9 pffindproto.9 \
 	domain.9 pffindtype.9
 MLINKS+=drbr.9 drbr_free.9 \
 	drbr.9 drbr_enqueue.9 \
 	drbr.9 drbr_dequeue.9 \
 	drbr.9 drbr_dequeue_cond.9 \
 	drbr.9 drbr_flush.9 \
 	drbr.9 drbr_empty.9 \
 	drbr.9 drbr_inuse.9 \
 	drbr.9 drbr_stats_update.9
 MLINKS+=DRIVER_MODULE.9 DRIVER_MODULE_ORDERED.9 \
 	DRIVER_MODULE.9 EARLY_DRIVER_MODULE.9 \
 	DRIVER_MODULE.9 EARLY_DRIVER_MODULE_ORDERED.9
 MLINKS+=epoch.9 epoch_context.9 \
 	epoch.9 epoch_alloc.9 \
 	epoch.9 epoch_free.9 \
 	epoch.9 epoch_enter.9 \
 	epoch.9 epoch_exit.9 \
 	epoch.9 epoch_wait.9 \
 	epoch.9 epoch_call.9 \
 	epoch.9 epoch_drain_callbacks.9 \
 	epoch.9 in_epoch.9
 MLINKS+=EVENTHANDLER.9 EVENTHANDLER_DECLARE.9 \
 	EVENTHANDLER.9 EVENTHANDLER_DEFINE.9 \
 	EVENTHANDLER.9 EVENTHANDLER_DEREGISTER.9 \
 	EVENTHANDLER.9 eventhandler_deregister.9 \
 	EVENTHANDLER.9 eventhandler_find_list.9 \
 	EVENTHANDLER.9 EVENTHANDLER_INVOKE.9 \
 	EVENTHANDLER.9 eventhandler_prune_list.9 \
 	EVENTHANDLER.9 EVENTHANDLER_REGISTER.9 \
 	EVENTHANDLER.9 eventhandler_register.9
 MLINKS+=eventtimers.9 et_register.9 \
 	eventtimers.9 et_deregister.9 \
 	eventtimers.9 et_ban.9 \
 	eventtimers.9 et_find.9 \
 	eventtimers.9 et_free.9 \
 	eventtimers.9 et_init.9 \
 	eventtimers.9 ET_LOCK.9 \
 	eventtimers.9 ET_UNLOCK.9 \
 	eventtimers.9 et_start.9 \
 	eventtimers.9 et_stop.9
 MLINKS+=fail.9 KFAIL_POINT_CODE.9 \
 	fail.9 KFAIL_POINT_ERROR.9 \
 	fail.9 KFAIL_POINT_GOTO.9 \
 	fail.9 KFAIL_POINT_RETURN.9 \
 	fail.9 KFAIL_POINT_RETURN_VOID.9
 MLINKS+=fdt_pinctrl.9 fdt_pinctrl_configure.9 \
 	fdt_pinctrl.9 fdt_pinctrl_configure_by_name.9 \
 	fdt_pinctrl.9 fdt_pinctrl_configure_tree.9 \
 	fdt_pinctrl.9 fdt_pinctrl_register.9
 MLINKS+=fetch.9 fubyte.9 \
 	fetch.9 fuword.9 \
 	fetch.9 fuword16.9 \
 	fetch.9 fuword32.9 \
 	fetch.9 fuword64.9 \
 	fetch.9 fueword.9 \
 	fetch.9 fueword32.9 \
 	fetch.9 fueword64.9
 MLINKS+=firmware.9 firmware_get.9 \
 	firmware.9 firmware_put.9 \
 	firmware.9 firmware_register.9 \
 	firmware.9 firmware_unregister.9
 MLINKS+=fpu_kern.9 fpu_kern_alloc_ctx.9 \
 	fpu_kern.9 fpu_kern_free_ctx.9 \
 	fpu_kern.9 fpu_kern_enter.9 \
 	fpu_kern.9 fpu_kern_leave.9 \
 	fpu_kern.9 fpu_kern_thread.9 \
 	fpu_kern.9 is_fpu_kern_thread.9
 MLINKS+=g_attach.9 g_detach.9
 MLINKS+=g_bio.9 g_alloc_bio.9 \
 	g_bio.9 g_clone_bio.9 \
 	g_bio.9 g_destroy_bio.9 \
 	g_bio.9 g_duplicate_bio.9 \
 	g_bio.9 g_format_bio.9 \
 	g_bio.9 g_new_bio.9 \
 	g_bio.9 g_print_bio.9 \
 	g_bio.9 g_reset_bio.9
 MLINKS+=g_consumer.9 g_destroy_consumer.9 \
 	g_consumer.9 g_new_consumer.9
 MLINKS+=g_data.9 g_read_data.9 \
 	g_data.9 g_write_data.9
 MLINKS+=getenv.9 freeenv.9 \
 	getenv.9 getenv_int.9 \
 	getenv.9 getenv_long.9 \
 	getenv.9 getenv_string.9 \
 	getenv.9 getenv_quad.9 \
 	getenv.9 getenv_uint.9 \
 	getenv.9 getenv_ulong.9 \
 	getenv.9 kern_getenv.9 \
 	getenv.9 kern_setenv.9 \
 	getenv.9 kern_unsetenv.9 \
 	getenv.9 setenv.9 \
 	getenv.9 testenv.9 \
 	getenv.9 unsetenv.9
 MLINKS+=g_event.9 g_cancel_event.9 \
 	g_event.9 g_post_event.9 \
 	g_event.9 g_waitfor_event.9
 MLINKS+=g_geom.9 g_destroy_geom.9 \
 	g_geom.9 g_new_geomf.9
 MLINKS+=g_provider.9 g_destroy_provider.9 \
 	g_provider.9 g_error_provider.9 \
 	g_provider.9 g_new_providerf.9
 MLINKS+=hash.9 hash32.9 \
 	hash.9 hash32_buf.9 \
 	hash.9 hash32_str.9 \
 	hash.9 hash32_stre.9 \
 	hash.9 hash32_strn.9 \
 	hash.9 hash32_strne.9 \
 	hash.9 jenkins_hash.9 \
 	hash.9 jenkins_hash32.9
 MLINKS+=hashinit.9 hashdestroy.9 \
 	hashinit.9 hashinit_flags.9 \
 	hashinit.9 phashinit.9
 MLINKS+=hhook.9 hhook_head_register.9 \
 	hhook.9 hhook_head_deregister.9 \
 	hhook.9 hhook_head_deregister_lookup.9 \
 	hhook.9 hhook_run_hooks.9 \
 	hhook.9 HHOOKS_RUN_IF.9 \
 	hhook.9 HHOOKS_RUN_LOOKUP_IF.9
 MLINKS+=ieee80211.9 ieee80211_ifattach.9 \
 	ieee80211.9 ieee80211_ifdetach.9
 MLINKS+=ieee80211_amrr.9 ieee80211_amrr_choose.9 \
 	ieee80211_amrr.9 ieee80211_amrr_cleanup.9 \
 	ieee80211_amrr.9 ieee80211_amrr_init.9 \
 	ieee80211_amrr.9 ieee80211_amrr_node_init.9 \
 	ieee80211_amrr.9 ieee80211_amrr_setinterval.9 \
 	ieee80211_amrr.9 ieee80211_amrr_tx_complete.9 \
 	ieee80211_amrr.9 ieee80211_amrr_tx_update.9
 MLINKS+=ieee80211_beacon.9 ieee80211_beacon_alloc.9 \
 	ieee80211_beacon.9 ieee80211_beacon_notify.9 \
 	ieee80211_beacon.9 ieee80211_beacon_update.9
 MLINKS+=ieee80211_bmiss.9 ieee80211_beacon_miss.9
 MLINKS+=ieee80211_crypto.9 ieee80211_crypto_available.9 \
 	ieee80211_crypto.9 ieee80211_crypto_decap.9 \
 	ieee80211_crypto.9 ieee80211_crypto_delglobalkeys.9 \
 	ieee80211_crypto.9 ieee80211_crypto_delkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_demic.9 \
 	ieee80211_crypto.9 ieee80211_crypto_encap.9 \
 	ieee80211_crypto.9 ieee80211_crypto_enmic.9 \
 	ieee80211_crypto.9 ieee80211_crypto_newkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_register.9 \
 	ieee80211_crypto.9 ieee80211_crypto_reload_keys.9 \
 	ieee80211_crypto.9 ieee80211_crypto_setkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_unregister.9 \
 	ieee80211_crypto.9 ieee80211_key_update_begin.9 \
 	ieee80211_crypto.9 ieee80211_key_update_end.9 \
 	ieee80211_crypto.9 ieee80211_notify_michael_failure.9 \
 	ieee80211_crypto.9 ieee80211_notify_replay_failure.9
 MLINKS+=ieee80211_input.9 ieee80211_input_all.9
 MLINKS+=ieee80211_node.9 ieee80211_dump_node.9 \
 	ieee80211_node.9 ieee80211_dump_nodes.9 \
 	ieee80211_node.9 ieee80211_find_rxnode.9 \
 	ieee80211_node.9 ieee80211_find_rxnode_withkey.9 \
 	ieee80211_node.9 ieee80211_free_node.9 \
 	ieee80211_node.9 ieee80211_iterate_nodes.9 \
 	ieee80211_node.9 ieee80211_ref_node.9 \
 	ieee80211_node.9 ieee80211_unref_node.9
 MLINKS+=ieee80211_output.9 ieee80211_process_callback.9 \
 	ieee80211_output.9 M_SEQNO_GET.9 \
 	ieee80211_output.9 M_WME_GETAC.9
 MLINKS+=ieee80211_proto.9 ieee80211_new_state.9 \
 	ieee80211_proto.9 ieee80211_resume_all.9 \
 	ieee80211_proto.9 ieee80211_start_all.9 \
 	ieee80211_proto.9 ieee80211_stop_all.9 \
 	ieee80211_proto.9 ieee80211_suspend_all.9 \
 	ieee80211_proto.9 ieee80211_waitfor_parent.9
 MLINKS+=ieee80211_radiotap.9 ieee80211_radiotap_active.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_active_vap.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_attach.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_tx.9 \
 	ieee80211_radiotap.9 radiotap.9
 MLINKS+=ieee80211_regdomain.9 ieee80211_alloc_countryie.9 \
 	ieee80211_regdomain.9 ieee80211_init_channels.9 \
 	ieee80211_regdomain.9 ieee80211_sort_channels.9
 MLINKS+=ieee80211_scan.9 ieee80211_add_scan.9 \
 	ieee80211_scan.9 ieee80211_bg_scan.9 \
 	ieee80211_scan.9 ieee80211_cancel_scan.9 \
 	ieee80211_scan.9 ieee80211_cancel_scan_any.9 \
 	ieee80211_scan.9 ieee80211_check_scan.9 \
 	ieee80211_scan.9 ieee80211_check_scan_current.9 \
 	ieee80211_scan.9 ieee80211_flush.9 \
 	ieee80211_scan.9 ieee80211_probe_curchan.9 \
 	ieee80211_scan.9 ieee80211_scan_assoc_fail.9 \
 	ieee80211_scan.9 ieee80211_scan_done.9 \
 	ieee80211_scan.9 ieee80211_scan_dump_channels.9 \
 	ieee80211_scan.9 ieee80211_scan_flush.9 \
 	ieee80211_scan.9 ieee80211_scan_iterate.9 \
 	ieee80211_scan.9 ieee80211_scan_next.9 \
 	ieee80211_scan.9 ieee80211_scan_timeout.9 \
 	ieee80211_scan.9 ieee80211_scanner_get.9 \
 	ieee80211_scan.9 ieee80211_scanner_register.9 \
 	ieee80211_scan.9 ieee80211_scanner_unregister.9 \
 	ieee80211_scan.9 ieee80211_scanner_unregister_all.9 \
 	ieee80211_scan.9 ieee80211_start_scan.9
 MLINKS+=ieee80211_vap.9 ieee80211_vap_attach.9 \
 	ieee80211_vap.9 ieee80211_vap_detach.9 \
 	ieee80211_vap.9 ieee80211_vap_setup.9
 MLINKS+=iflibdd.9 ifdi_attach_pre.9 \
 	iflibdd.9 ifdi_attach_post.9 \
 	iflibdd.9 ifdi_detach.9 \
 	iflibdd.9 ifdi_get_counter.9 \
 	iflibdd.9 ifdi_i2c_req.9 \
 	iflibdd.9 ifdi_init.9 \
 	iflibdd.9 ifdi_intr_enable.9 \
 	iflibdd.9 ifdi_intr_disable.9 \
 	iflibdd.9 ifdi_led_func.9 \
 	iflibdd.9 ifdi_link_intr_enable.9 \
 	iflibdd.9 ifdi_media_set.9 \
 	iflibdd.9 ifdi_media_status.9 \
 	iflibdd.9 ifdi_media_change.9 \
 	iflibdd.9 ifdi_mtu_set.9 \
 	iflibdd.9 ifdi_multi_set.9 \
 	iflibdd.9 ifdi_promisc_set.9 \
 	iflibdd.9 ifdi_queues_alloc.9 \
 	iflibdd.9 ifdi_queues_free.9 \
 	iflibdd.9 ifdi_queue_intr_enable.9 \
 	iflibdd.9 ifdi_resume.9 \
 	iflibdd.9 ifdi_rxq_setup.9 \
 	iflibdd.9 ifdi_stop.9 \
 	iflibdd.9 ifdi_suspend.9 \
 	iflibdd.9 ifdi_sysctl_int_delay.9 \
 	iflibdd.9 ifdi_timer.9 \
 	iflibdd.9 ifdi_txq_setup.9 \
 	iflibdd.9 ifdi_update_admin_status.9 \
 	iflibdd.9 ifdi_vf_add.9 \
 	iflibdd.9 ifdi_vflr_handle.9 \
 	iflibdd.9 ifdi_vlan_register.9 \
 	iflibdd.9 ifdi_vlan_unregister.9 \
 	iflibdd.9 ifdi_watchdog_reset.9 \
 	iflibdd.9 iov_init.9 \
 	iflibdd.9 iov_uinit.9
 MLINKS+=iflibdi.9 iflib_add_int_delay_sysctl.9 \
 	iflibdi.9 iflib_device_attach.9 \
 	iflibdi.9 iflib_device_deregister.9 \
 	iflibdi.9 iflib_device_detach.9 \
 	iflibdi.9 iflib_device_suspend.9 \
 	iflibdi.9 iflib_device_register.9 \
 	iflibdi.9 iflib_device_resume.9 \
 	iflibdi.9 iflib_led_create.9 \
 	iflibdi.9 iflib_irq_alloc.9 \
 	iflibdi.9 iflib_irq_alloc_generic.9 \
 	iflibdi.9 iflib_link_intr_deferred.9 \
 	iflibdi.9 iflib_link_state_change.9 \
 	iflibdi.9 iflib_rx_intr_deferred.9 \
 	iflibdi.9 iflib_tx_intr_deferred.9
 MLINKS+=iflibtxrx.9 isc_rxd_available.9 \
 	iflibtxrx.9 isc_rxd_refill.9 \
 	iflibtxrx.9 isc_rxd_flush.9 \
 	iflibtxrx.9 isc_rxd_pkt_get.9 \
 	iflibtxrx.9 isc_txd_credits_update.9 \
 	iflibtxrx.9 isc_txd_encap.9 \
 	iflibtxrx.9 isc_txd_flush.9
 MLINKS+=ifnet.9 if_addmulti.9 \
 	ifnet.9 if_alloc.9 \
 	ifnet.9 if_alloc_dev.9 \
 	ifnet.9 if_alloc_domain.9 \
 	ifnet.9 if_allmulti.9 \
 	ifnet.9 if_attach.9 \
 	ifnet.9 if_data.9 \
 	ifnet.9 IF_DEQUEUE.9 \
 	ifnet.9 if_delmulti.9 \
 	ifnet.9 if_detach.9 \
 	ifnet.9 if_down.9 \
 	ifnet.9 if_findmulti.9 \
 	ifnet.9 if_free.9 \
 	ifnet.9 if_free_type.9 \
 	ifnet.9 if_up.9 \
 	ifnet.9 ifa_free.9 \
 	ifnet.9 ifa_ifwithaddr.9 \
 	ifnet.9 ifa_ifwithdstaddr.9 \
 	ifnet.9 ifa_ifwithnet.9 \
 	ifnet.9 ifa_ref.9 \
 	ifnet.9 ifaddr.9 \
 	ifnet.9 ifaddr_byindex.9 \
 	ifnet.9 ifaof_ifpforaddr.9 \
 	ifnet.9 ifioctl.9 \
 	ifnet.9 ifpromisc.9 \
 	ifnet.9 ifqueue.9 \
 	ifnet.9 ifunit.9 \
 	ifnet.9 ifunit_ref.9
 MLINKS+=insmntque.9 insmntque1.9
 MLINKS+=ithread.9 ithread_add_handler.9 \
 	ithread.9 ithread_create.9 \
 	ithread.9 ithread_destroy.9 \
 	ithread.9 ithread_priority.9 \
 	ithread.9 ithread_remove_handler.9 \
 	ithread.9 ithread_schedule.9
 MLINKS+=kernacc.9 useracc.9
 MLINKS+=kernel_mount.9 free_mntarg.9 \
 	kernel_mount.9 kernel_vmount.9 \
 	kernel_mount.9 mount_arg.9 \
 	kernel_mount.9 mount_argb.9 \
 	kernel_mount.9 mount_argf.9 \
 	kernel_mount.9 mount_argsu.9
 MLINKS+=khelp.9 khelp_add_hhook.9 \
 	khelp.9 KHELP_DECLARE_MOD.9 \
 	khelp.9 KHELP_DECLARE_MOD_UMA.9 \
 	khelp.9 khelp_destroy_osd.9 \
 	khelp.9 khelp_get_id.9 \
 	khelp.9 khelp_get_osd.9 \
 	khelp.9 khelp_init_osd.9 \
 	khelp.9 khelp_remove_hhook.9
 MLINKS+=kobj.9 DEFINE_CLASS.9 \
 	kobj.9 kobj_class_compile.9 \
 	kobj.9 kobj_class_compile_static.9 \
 	kobj.9 kobj_class_free.9 \
 	kobj.9 kobj_create.9 \
 	kobj.9 kobj_delete.9 \
 	kobj.9 kobj_init.9 \
 	kobj.9 kobj_init_static.9
 MLINKS+=kproc.9 kproc_create.9 \
 	kproc.9 kproc_exit.9 \
 	kproc.9 kproc_kthread_add.9 \
 	kproc.9 kproc_resume.9 \
 	kproc.9 kproc_shutdown.9 \
 	kproc.9 kproc_start.9 \
 	kproc.9 kproc_suspend.9 \
 	kproc.9 kproc_suspend_check.9 \
 	kproc.9 kthread_create.9
 MLINKS+=kqueue.9 knlist_add.9 \
 	kqueue.9 knlist_clear.9 \
 	kqueue.9 knlist_delete.9 \
 	kqueue.9 knlist_destroy.9 \
 	kqueue.9 knlist_empty.9 \
 	kqueue.9 knlist_init.9 \
 	kqueue.9 knlist_init_mtx.9 \
 	kqueue.9 knlist_init_rw_reader.9 \
 	kqueue.9 knlist_remove.9 \
 	kqueue.9 knlist_remove_inevent.9 \
 	kqueue.9 knote_fdclose.9 \
 	kqueue.9 KNOTE_LOCKED.9 \
 	kqueue.9 KNOTE_UNLOCKED.9 \
 	kqueue.9 kqfd_register.9 \
 	kqueue.9 kqueue_add_filteropts.9 \
 	kqueue.9 kqueue_del_filteropts.9
 MLINKS+=kthread.9 kthread_add.9 \
 	kthread.9 kthread_exit.9 \
 	kthread.9 kthread_resume.9 \
 	kthread.9 kthread_shutdown.9 \
 	kthread.9 kthread_start.9 \
 	kthread.9 kthread_suspend.9 \
 	kthread.9 kthread_suspend_check.9
 MLINKS+=ktr.9 CTR0.9 \
 	ktr.9 CTR1.9 \
 	ktr.9 CTR2.9 \
 	ktr.9 CTR3.9 \
 	ktr.9 CTR4.9 \
 	ktr.9 CTR5.9 \
 	ktr.9 CTR6.9
 MLINKS+=lock.9 lockdestroy.9 \
 	lock.9 lockinit.9 \
 	lock.9 lockmgr.9 \
 	lock.9 lockmgr_args.9 \
 	lock.9 lockmgr_args_rw.9 \
 	lock.9 lockmgr_assert.9 \
 	lock.9 lockmgr_disown.9 \
 	lock.9 lockmgr_printinfo.9 \
 	lock.9 lockmgr_recursed.9 \
 	lock.9 lockmgr_rw.9 \
 	lock.9 lockstatus.9
 MLINKS+=LOCK_PROFILING.9 MUTEX_PROFILING.9
 MLINKS+=make_dev.9 destroy_dev.9 \
 	make_dev.9 destroy_dev_drain.9 \
 	make_dev.9 destroy_dev_sched.9 \
 	make_dev.9 destroy_dev_sched_cb.9 \
 	make_dev.9 dev_depends.9 \
 	make_dev.9 make_dev_alias.9 \
 	make_dev.9 make_dev_alias_p.9 \
 	make_dev.9 make_dev_cred.9 \
 	make_dev.9 make_dev_credf.9 \
 	make_dev.9 make_dev_p.9 \
 	make_dev.9 make_dev_s.9
 MLINKS+=malloc.9 free.9 \
 	malloc.9 malloc_domainset.9 \
 	malloc.9 free_domain.9 \
 	malloc.9 mallocarray.9 \
 	malloc.9 MALLOC_DECLARE.9 \
 	malloc.9 MALLOC_DEFINE.9 \
 	malloc.9 realloc.9 \
 	malloc.9 reallocf.9
 MLINKS+=mbchain.9 mb_detach.9 \
 	mbchain.9 mb_done.9 \
 	mbchain.9 mb_fixhdr.9 \
 	mbchain.9 mb_init.9 \
 	mbchain.9 mb_initm.9 \
 	mbchain.9 mb_put_int64be.9 \
 	mbchain.9 mb_put_int64le.9 \
 	mbchain.9 mb_put_mbuf.9 \
 	mbchain.9 mb_put_mem.9 \
 	mbchain.9 mb_put_uint16be.9 \
 	mbchain.9 mb_put_uint16le.9 \
 	mbchain.9 mb_put_uint32be.9 \
 	mbchain.9 mb_put_uint32le.9 \
 	mbchain.9 mb_put_uint8.9 \
 	mbchain.9 mb_put_uio.9 \
 	mbchain.9 mb_reserve.9
 MLINKS+=\
 	mbuf.9 m_adj.9 \
 	mbuf.9 m_align.9 \
 	mbuf.9 M_ALIGN.9 \
 	mbuf.9 m_append.9 \
 	mbuf.9 m_apply.9 \
 	mbuf.9 m_cat.9 \
 	mbuf.9 m_catpkt.9 \
 	mbuf.9 MCHTYPE.9 \
 	mbuf.9 MCLGET.9 \
 	mbuf.9 m_collapse.9 \
 	mbuf.9 m_copyback.9 \
 	mbuf.9 m_copydata.9 \
 	mbuf.9 m_copym.9 \
 	mbuf.9 m_copypacket.9 \
 	mbuf.9 m_copyup.9 \
 	mbuf.9 m_defrag.9 \
 	mbuf.9 m_devget.9 \
 	mbuf.9 m_dup.9 \
 	mbuf.9 m_dup_pkthdr.9 \
 	mbuf.9 MEXTADD.9 \
 	mbuf.9 m_fixhdr.9 \
 	mbuf.9 m_free.9 \
 	mbuf.9 m_freem.9 \
 	mbuf.9 MGET.9 \
 	mbuf.9 m_get.9 \
 	mbuf.9 m_get2.9 \
 	mbuf.9 m_getjcl.9 \
 	mbuf.9 m_getcl.9 \
 	mbuf.9 MGETHDR.9 \
 	mbuf.9 m_gethdr.9 \
 	mbuf.9 m_getm.9 \
 	mbuf.9 m_getptr.9 \
 	mbuf.9 MH_ALIGN.9 \
 	mbuf.9 M_LEADINGSPACE.9 \
 	mbuf.9 m_length.9 \
 	mbuf.9 M_MOVE_PKTHDR.9 \
 	mbuf.9 m_move_pkthdr.9 \
 	mbuf.9 M_PREPEND.9 \
 	mbuf.9 m_prepend.9 \
 	mbuf.9 m_pulldown.9 \
 	mbuf.9 m_pullup.9 \
 	mbuf.9 m_split.9 \
 	mbuf.9 mtod.9 \
 	mbuf.9 M_TRAILINGSPACE.9 \
 	mbuf.9 m_unshare.9 \
 	mbuf.9 M_WRITABLE.9
 MLINKS+=\
 	mbuf_tags.9 m_tag_alloc.9 \
 	mbuf_tags.9 m_tag_copy.9 \
 	mbuf_tags.9 m_tag_copy_chain.9 \
 	mbuf_tags.9 m_tag_delete.9 \
 	mbuf_tags.9 m_tag_delete_chain.9 \
 	mbuf_tags.9 m_tag_delete_nonpersistent.9 \
 	mbuf_tags.9 m_tag_find.9 \
 	mbuf_tags.9 m_tag_first.9 \
 	mbuf_tags.9 m_tag_free.9 \
 	mbuf_tags.9 m_tag_get.9 \
 	mbuf_tags.9 m_tag_init.9 \
 	mbuf_tags.9 m_tag_locate.9 \
 	mbuf_tags.9 m_tag_next.9 \
 	mbuf_tags.9 m_tag_prepend.9 \
 	mbuf_tags.9 m_tag_unlink.9
 MLINKS+=MD5.9 MD5Init.9 \
 	MD5.9 MD5Transform.9
 MLINKS+=mdchain.9 md_append_record.9 \
 	mdchain.9 md_done.9 \
 	mdchain.9 md_get_int64.9 \
 	mdchain.9 md_get_int64be.9 \
 	mdchain.9 md_get_int64le.9 \
 	mdchain.9 md_get_mbuf.9 \
 	mdchain.9 md_get_mem.9 \
 	mdchain.9 md_get_uint16.9 \
 	mdchain.9 md_get_uint16be.9 \
 	mdchain.9 md_get_uint16le.9 \
 	mdchain.9 md_get_uint32.9 \
 	mdchain.9 md_get_uint32be.9 \
 	mdchain.9 md_get_uint32le.9 \
 	mdchain.9 md_get_uint8.9 \
 	mdchain.9 md_get_uio.9 \
 	mdchain.9 md_initm.9 \
 	mdchain.9 md_next_record.9
 MLINKS+=microtime.9 bintime.9 \
 	microtime.9 getbintime.9 \
 	microtime.9 getmicrotime.9 \
 	microtime.9 getnanotime.9 \
 	microtime.9 nanotime.9
 MLINKS+=microuptime.9 binuptime.9 \
 	microuptime.9 getbinuptime.9 \
 	microuptime.9 getmicrouptime.9 \
 	microuptime.9 getnanouptime.9 \
 	microuptime.9 getsbinuptime.9 \
 	microuptime.9 nanouptime.9 \
 	microuptime.9 sbinuptime.9
 MLINKS+=mi_switch.9 cpu_switch.9 \
 	mi_switch.9 cpu_throw.9
 MLINKS+=mod_cc.9 CCV.9 \
 	mod_cc.9 DECLARE_CC_MODULE.9
 MLINKS+=mtx_pool.9 mtx_pool_alloc.9 \
 	mtx_pool.9 mtx_pool_create.9 \
 	mtx_pool.9 mtx_pool_destroy.9 \
 	mtx_pool.9 mtx_pool_find.9 \
 	mtx_pool.9 mtx_pool_lock.9 \
 	mtx_pool.9 mtx_pool_lock_spin.9 \
 	mtx_pool.9 mtx_pool_unlock.9 \
 	mtx_pool.9 mtx_pool_unlock_spin.9
 MLINKS+=mutex.9 mtx_assert.9 \
 	mutex.9 mtx_destroy.9 \
 	mutex.9 mtx_init.9 \
 	mutex.9 mtx_initialized.9 \
 	mutex.9 mtx_lock.9 \
 	mutex.9 mtx_lock_flags.9 \
 	mutex.9 mtx_lock_spin.9 \
 	mutex.9 mtx_lock_spin_flags.9 \
 	mutex.9 mtx_owned.9 \
 	mutex.9 mtx_recursed.9 \
 	mutex.9 mtx_sleep.9 \
 	mutex.9 MTX_SYSINIT.9 \
 	mutex.9 mtx_trylock.9 \
 	mutex.9 mtx_trylock_flags.9 \
 	mutex.9 mtx_trylock_spin.9 \
 	mutex.9 mtx_trylock_spin_flags.9 \
 	mutex.9 mtx_unlock.9 \
 	mutex.9 mtx_unlock_flags.9 \
 	mutex.9 mtx_unlock_spin.9 \
 	mutex.9 mtx_unlock_spin_flags.9
 MLINKS+=namei.9 NDFREE.9 \
 	namei.9 NDINIT.9
 MLINKS+=netisr.9 netisr_clearqdrops.9 \
 	netisr.9 netisr_default_flow2cpu.9 \
 	netisr.9 netisr_dispatch.9 \
 	netisr.9 netisr_dispatch_src.9 \
 	netisr.9 netisr_get_cpucount.9 \
 	netisr.9 netisr_get_cpuid.9 \
 	netisr.9 netisr_getqdrops.9 \
 	netisr.9 netisr_getqlimit.9 \
 	netisr.9 netisr_queue.9 \
 	netisr.9 netisr_queue_src.9 \
 	netisr.9 netisr_register.9 \
 	netisr.9 netisr_setqlimit.9 \
 	netisr.9 netisr_unregister.9
 MLINKS+=nv.9 libnv.9 \
 	nv.9 nvlist.9 \
 	nv.9 nvlist_add_binary.9 \
 	nv.9 nvlist_add_bool.9 \
 	nv.9 nvlist_add_bool_array.9 \
 	nv.9 nvlist_add_descriptor.9 \
 	nv.9 nvlist_add_descriptor_array.9 \
 	nv.9 nvlist_add_null.9 \
 	nv.9 nvlist_add_number.9 \
 	nv.9 nvlist_add_number_array.9 \
 	nv.9 nvlist_add_nvlist.9 \
 	nv.9 nvlist_add_nvlist_array.9 \
 	nv.9 nvlist_add_string.9 \
 	nv.9 nvlist_add_stringf.9 \
 	nv.9 nvlist_add_stringv.9 \
 	nv.9 nvlist_add_string_array.9 \
 	nv.9 nvlist_clone.9 \
 	nv.9 nvlist_create.9 \
 	nv.9 nvlist_destroy.9 \
 	nv.9 nvlist_dump.9 \
 	nv.9 nvlist_empty.9 \
 	nv.9 nvlist_error.9 \
 	nv.9 nvlist_exists.9 \
 	nv.9 nvlist_exists_binary.9 \
 	nv.9 nvlist_exists_bool.9 \
 	nv.9 nvlist_exists_bool_array.9 \
 	nv.9 nvlist_exists_descriptor.9 \
 	nv.9 nvlist_exists_descriptor_array.9 \
 	nv.9 nvlist_exists_null.9 \
 	nv.9 nvlist_exists_number.9 \
 	nv.9 nvlist_exists_number_array.9 \
 	nv.9 nvlist_exists_nvlist.9 \
 	nv.9 nvlist_exists_nvlist_array.9 \
 	nv.9 nvlist_exists_string.9 \
 	nv.9 nvlist_exists_type.9 \
 	nv.9 nvlist_fdump.9 \
 	nv.9 nvlist_flags.9 \
 	nv.9 nvlist_free.9 \
 	nv.9 nvlist_free_binary.9 \
 	nv.9 nvlist_free_bool.9 \
 	nv.9 nvlist_free_bool_array.9 \
 	nv.9 nvlist_free_descriptor.9 \
 	nv.9 nvlist_free_descriptor_array.9 \
 	nv.9 nvlist_free_null.9 \
 	nv.9 nvlist_free_number.9 \
 	nv.9 nvlist_free_number_array.9 \
 	nv.9 nvlist_free_nvlist.9 \
 	nv.9 nvlist_free_nvlist_array.9 \
 	nv.9 nvlist_free_string.9 \
 	nv.9 nvlist_free_string_array.9 \
 	nv.9 nvlist_free_type.9 \
 	nv.9 nvlist_get_binary.9 \
 	nv.9 nvlist_get_bool.9 \
 	nv.9 nvlist_get_bool_array.9 \
 	nv.9 nvlist_get_descriptor.9 \
 	nv.9 nvlist_get_descriptor_array.9 \
 	nv.9 nvlist_get_number.9 \
 	nv.9 nvlist_get_number_array.9 \
 	nv.9 nvlist_get_nvlist.9 \
 	nv.9 nvlist_get_nvlist_array.9 \
 	nv.9 nvlist_get_parent.9 \
 	nv.9 nvlist_get_string.9 \
 	nv.9 nvlist_get_string_array.9 \
 	nv.9 nvlist_move_binary.9 \
 	nv.9 nvlist_move_descriptor.9 \
 	nv.9 nvlist_move_descriptor_array.9 \
 	nv.9 nvlist_move_nvlist.9 \
 	nv.9 nvlist_move_nvlist_array.9 \
 	nv.9 nvlist_move_string.9 \
 	nv.9 nvlist_move_string_array.9 \
 	nv.9 nvlist_next.9 \
 	nv.9 nvlist_pack.9 \
 	nv.9 nvlist_recv.9 \
 	nv.9 nvlist_send.9 \
 	nv.9 nvlist_set_error.9 \
 	nv.9 nvlist_size.9 \
 	nv.9 nvlist_take_binary.9 \
 	nv.9 nvlist_take_bool.9 \
 	nv.9 nvlist_take_bool_array.9 \
 	nv.9 nvlist_take_descriptor.9 \
 	nv.9 nvlist_take_descriptor_array.9 \
 	nv.9 nvlist_take_number.9 \
 	nv.9 nvlist_take_number_array.9 \
 	nv.9 nvlist_take_nvlist.9 \
 	nv.9 nvlist_take_nvlist_array.9 \
 	nv.9 nvlist_take_string.9 \
 	nv.9 nvlist_take_string_array.9 \
 	nv.9 nvlist_unpack.9 \
 	nv.9 nvlist_xfer.9
 MLINKS+=OF_child.9 OF_parent.9 \
 	OF_child.9 OF_peer.9
 MLINKS+=OF_device_from_xref.9 OF_device_register_xref.9 \
 	OF_device_from_xref.9 OF_xref_from_device.9
 MLINKS+=OF_getprop.9 OF_getencprop.9 \
 	OF_getprop.9 OF_getencprop_alloc.9 \
 	OF_getprop.9 OF_getencprop_alloc_multi.9 \
 	OF_getprop.9 OF_getprop_alloc.9 \
 	OF_getprop.9 OF_getprop_alloc_multi.9 \
 	OF_getprop.9 OF_getproplen.9 \
 	OF_getprop.9 OF_hasprop.9 \
 	OF_getprop.9 OF_nextprop.9 \
 	OF_getprop.9 OF_prop_free.9 \
 	OF_getprop.9 OF_searchencprop.9 \
 	OF_getprop.9 OF_searchprop.9 \
 	OF_getprop.9 OF_setprop.9
 MLINKS+=OF_node_from_xref.9 OF_xref_from_node.9
 MLINKS+=ofw_bus_is_compatible.9 ofw_bus_is_compatible_strict.9 \
 	ofw_bus_is_compatible.9 ofw_bus_node_is_compatible.9 \
 	ofw_bus_is_compatible.9 ofw_bus_search_compatible.9
 MLINKS+= ofw_bus_status_okay.9 ofw_bus_get_status.9 \
 	ofw_bus_status_okay.9 ofw_bus_node_status_okay.9
 MLINKS+=osd.9 osd_call.9 \
 	osd.9 osd_del.9 \
 	osd.9 osd_deregister.9 \
 	osd.9 osd_exit.9 \
 	osd.9 osd_get.9 \
 	osd.9 osd_register.9 \
 	osd.9 osd_set.9
 MLINKS+=panic.9 vpanic.9
 MLINKS+=PCBGROUP.9 in_pcbgroup_byhash.9 \
 	PCBGROUP.9 in_pcbgroup_byinpcb.9 \
 	PCBGROUP.9 in_pcbgroup_destroy.9 \
 	PCBGROUP.9 in_pcbgroup_enabled.9 \
 	PCBGROUP.9 in_pcbgroup_init.9 \
 	PCBGROUP.9 in_pcbgroup_remove.9 \
 	PCBGROUP.9 in_pcbgroup_update.9 \
 	PCBGROUP.9 in_pcbgroup_update_mbuf.9 \
 	PCBGROUP.9 in6_pcbgroup_byhash.9
 MLINKS+=pci.9 pci_alloc_msi.9 \
 	pci.9 pci_alloc_msix.9 \
 	pci.9 pci_disable_busmaster.9 \
 	pci.9 pci_disable_io.9 \
 	pci.9 pci_enable_busmaster.9 \
 	pci.9 pci_enable_io.9 \
 	pci.9 pci_find_bsf.9 \
 	pci.9 pci_find_cap.9 \
 	pci.9 pci_find_dbsf.9 \
 	pci.9 pci_find_device.9 \
 	pci.9 pci_find_extcap.9 \
 	pci.9 pci_find_htcap.9 \
 	pci.9 pci_find_pcie_root_port.9 \
 	pci.9 pci_get_id.9 \
 	pci.9 pci_get_max_read_req.9 \
 	pci.9 pci_get_powerstate.9 \
 	pci.9 pci_get_vpd_ident.9 \
 	pci.9 pci_get_vpd_readonly.9 \
 	pci.9 pci_iov_attach.9 \
 	pci.9 pci_iov_attach_name.9 \
 	pci.9 pci_iov_detach.9 \
 	pci.9 pci_msi_count.9 \
 	pci.9 pci_msix_count.9 \
 	pci.9 pci_msix_pba_bar.9 \
 	pci.9 pci_msix_table_bar.9 \
 	pci.9 pci_pending_msix.9 \
 	pci.9 pci_read_config.9 \
 	pci.9 pci_release_msi.9 \
 	pci.9 pci_remap_msix.9 \
 	pci.9 pci_restore_state.9 \
 	pci.9 pci_save_state.9 \
 	pci.9 pci_set_powerstate.9 \
 	pci.9 pci_set_max_read_req.9 \
 	pci.9 pci_write_config.9 \
 	pci.9 pcie_adjust_config.9 \
 	pci.9 pcie_flr.9 \
 	pci.9 pcie_max_completion_timeout.9 \
 	pci.9 pcie_read_config.9 \
 	pci.9 pcie_wait_for_pending_transactions.9 \
 	pci.9 pcie_write_config.9
 MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \
 	pci_iov_schema.9 pci_iov_schema_add_bool.9 \
 	pci_iov_schema.9 pci_iov_schema_add_string.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint8.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint16.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint32.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint64.9 \
 	pci_iov_schema.9 pci_iov_schema_add_unicast_mac.9
 MLINKS+=pfil.9 pfil_add_hook.9 \
 	pfil.9 pfil_head_register.9 \
 	pfil.9 pfil_head_unregister.9 \
 	pfil.9 pfil_remove_hook.9 \
 	pfil.9 pfil_run_hooks.9 \
 	pfil.9 pfil_link.9
 MLINKS+=pfind.9 zpfind.9
 MLINKS+=PHOLD.9 PRELE.9 \
 	PHOLD.9 _PHOLD.9 \
 	PHOLD.9 _PRELE.9 \
 	PHOLD.9 PROC_ASSERT_HELD.9 \
 	PHOLD.9 PROC_ASSERT_NOT_HELD.9
 MLINKS+=pmap_copy.9 pmap_copy_page.9
 MLINKS+=pmap_extract.9 pmap_extract_and_hold.9
 MLINKS+=pmap_init.9 pmap_init2.9
 MLINKS+=pmap_is_modified.9 pmap_ts_referenced.9
 MLINKS+=pmap_pinit.9 pmap_pinit0.9 \
 	pmap_pinit.9 pmap_pinit2.9
 MLINKS+=pmap_qenter.9 pmap_qremove.9
 MLINKS+=pmap_quick_enter_page.9 pmap_quick_remove_page.9
 MLINKS+=pmap_remove.9 pmap_remove_all.9 \
 	pmap_remove.9 pmap_remove_pages.9
 MLINKS+=pmap_resident_count.9 pmap_wired_count.9
 MLINKS+=pmap_zero_page.9 pmap_zero_area.9
 MLINKS+=printf.9 log.9 \
 	printf.9 tprintf.9 \
 	printf.9 uprintf.9
 MLINKS+=priv.9 priv_check.9 \
 	priv.9 priv_check_cred.9
 MLINKS+=proc_rwmem.9 proc_readmem.9 \
 	proc_rwmem.9 proc_writemem.9
 MLINKS+=psignal.9 gsignal.9 \
 	psignal.9 pgsignal.9 \
 	psignal.9 tdsignal.9
 MLINKS+=pwmbus.9 pwm.9
 MLINKS+=random.9 arc4rand.9 \
 	random.9 arc4random.9 \
 	random.9 is_random_seeded.9 \
 	random.9 read_random.9 \
 	random.9 read_random_uio.9 \
 	random.9 srandom.9
 MLINKS+=random_harvest.9 random_harvest_direct.9 \
 	random_harvest.9 random_harvest_fast.9 \
 	random_harvest.9 random_harvest_queue.9
 MLINKS+=ratecheck.9 ppsratecheck.9
 MLINKS+=refcount.9 refcount_acquire.9 \
 	refcount.9 refcount_init.9 \
 	refcount.9 refcount_release.9
 MLINKS+=resource_int_value.9 resource_long_value.9 \
 	resource_int_value.9 resource_string_value.9
 MLINKS+=rman.9 rman_activate_resource.9 \
 	rman.9 rman_adjust_resource.9 \
 	rman.9 rman_deactivate_resource.9 \
 	rman.9 rman_fini.9 \
 	rman.9 rman_first_free_region.9 \
 	rman.9 rman_get_bushandle.9 \
 	rman.9 rman_get_bustag.9 \
 	rman.9 rman_get_device.9 \
 	rman.9 rman_get_end.9 \
 	rman.9 rman_get_flags.9 \
 	rman.9 rman_get_mapping.9 \
 	rman.9 rman_get_rid.9 \
 	rman.9 rman_get_size.9 \
 	rman.9 rman_get_start.9 \
 	rman.9 rman_get_virtual.9 \
 	rman.9 rman_init.9 \
 	rman.9 rman_init_from_resource.9 \
 	rman.9 rman_is_region_manager.9 \
 	rman.9 rman_last_free_region.9 \
 	rman.9 rman_make_alignment_flags.9 \
 	rman.9 rman_manage_region.9 \
 	rman.9 rman_release_resource.9 \
 	rman.9 rman_reserve_resource.9 \
 	rman.9 rman_reserve_resource_bound.9 \
 	rman.9 rman_set_bushandle.9 \
 	rman.9 rman_set_bustag.9 \
 	rman.9 rman_set_mapping.9 \
 	rman.9 rman_set_rid.9 \
 	rman.9 rman_set_virtual.9
 MLINKS+=rmlock.9 rm_assert.9 \
 	rmlock.9 rm_destroy.9 \
 	rmlock.9 rm_init.9 \
 	rmlock.9 rm_init_flags.9 \
 	rmlock.9 rm_rlock.9 \
 	rmlock.9 rm_runlock.9 \
 	rmlock.9 rm_sleep.9 \
 	rmlock.9 RM_SYSINIT.9 \
 	rmlock.9 RM_SYSINIT_FLAGS.9 \
 	rmlock.9 rm_try_rlock.9 \
 	rmlock.9 rm_wlock.9 \
 	rmlock.9 rm_wowned.9 \
 	rmlock.9 rm_wunlock.9
 MLINKS+=rtalloc.9 rtalloc1.9 \
 	rtalloc.9 rtalloc_ign.9 \
 	rtalloc.9 RT_ADDREF.9 \
 	rtalloc.9 RT_LOCK.9 \
 	rtalloc.9 RT_REMREF.9 \
 	rtalloc.9 RT_RTFREE.9 \
 	rtalloc.9 RT_UNLOCK.9 \
 	rtalloc.9 RTFREE_LOCKED.9 \
 	rtalloc.9 RTFREE.9 \
 	rtalloc.9 rtfree.9 \
 	rtalloc.9 rtalloc1_fib.9 \
 	rtalloc.9 rtalloc_ign_fib.9 \
 	rtalloc.9 rtalloc_fib.9
 MLINKS+=runqueue.9 choosethread.9 \
 	runqueue.9 procrunnable.9 \
 	runqueue.9 remrunqueue.9 \
 	runqueue.9 setrunqueue.9
 MLINKS+=rwlock.9 rw_assert.9 \
 	rwlock.9 rw_destroy.9 \
 	rwlock.9 rw_downgrade.9 \
 	rwlock.9 rw_init.9 \
 	rwlock.9 rw_init_flags.9 \
 	rwlock.9 rw_initialized.9 \
 	rwlock.9 rw_rlock.9 \
 	rwlock.9 rw_runlock.9 \
 	rwlock.9 rw_unlock.9 \
 	rwlock.9 rw_sleep.9 \
 	rwlock.9 RW_SYSINIT.9 \
 	rwlock.9 RW_SYSINIT_FLAGS.9 \
 	rwlock.9 rw_try_rlock.9 \
 	rwlock.9 rw_try_upgrade.9 \
 	rwlock.9 rw_try_wlock.9 \
 	rwlock.9 rw_wlock.9 \
 	rwlock.9 rw_wowned.9 \
 	rwlock.9 rw_wunlock.9
 MLINKS+=sbuf.9 sbuf_bcat.9 \
 	sbuf.9 sbuf_bcopyin.9 \
 	sbuf.9 sbuf_bcpy.9 \
 	sbuf.9 sbuf_cat.9 \
 	sbuf.9 sbuf_clear.9 \
 	sbuf.9 sbuf_clear_flags.9 \
 	sbuf.9 sbuf_copyin.9 \
 	sbuf.9 sbuf_cpy.9 \
 	sbuf.9 sbuf_data.9 \
 	sbuf.9 sbuf_delete.9 \
 	sbuf.9 sbuf_done.9 \
 	sbuf.9 sbuf_error.9 \
 	sbuf.9 sbuf_finish.9 \
 	sbuf.9 sbuf_get_flags.9 \
 	sbuf.9 sbuf_hexdump.9 \
 	sbuf.9 sbuf_len.9 \
 	sbuf.9 sbuf_new.9 \
 	sbuf.9 sbuf_new_auto.9 \
 	sbuf.9 sbuf_new_for_sysctl.9 \
 	sbuf.9 sbuf_nl_terminate.9 \
 	sbuf.9 sbuf_printf.9 \
 	sbuf.9 sbuf_printf_drain.9 \
 	sbuf.9 sbuf_putbuf.9 \
 	sbuf.9 sbuf_putc.9 \
 	sbuf.9 sbuf_set_drain.9 \
 	sbuf.9 sbuf_set_flags.9 \
 	sbuf.9 sbuf_setpos.9 \
 	sbuf.9 sbuf_start_section.9 \
 	sbuf.9 sbuf_end_section.9  \
 	sbuf.9 sbuf_trim.9 \
 	sbuf.9 sbuf_vprintf.9
 MLINKS+=scheduler.9 curpriority_cmp.9 \
 	scheduler.9 maybe_resched.9 \
 	scheduler.9 propagate_priority.9 \
 	scheduler.9 resetpriority.9 \
 	scheduler.9 roundrobin.9 \
 	scheduler.9 roundrobin_interval.9 \
 	scheduler.9 schedclock.9 \
 	scheduler.9 schedcpu.9 \
 	scheduler.9 sched_setup.9 \
 	scheduler.9 setrunnable.9 \
 	scheduler.9 updatepri.9
 MLINKS+=SDT.9 SDT_PROVIDER_DECLARE.9 \
 	SDT.9 SDT_PROVIDER_DEFINE.9 \
 	SDT.9 SDT_PROBE_DECLARE.9 \
 	SDT.9 SDT_PROBE_DEFINE.9 \
 	SDT.9 SDT_PROBE.9
 MLINKS+=securelevel_gt.9 securelevel_ge.9
 MLINKS+=selrecord.9 seldrain.9 \
 	selrecord.9 selwakeup.9
 MLINKS+=sema.9 sema_destroy.9 \
 	sema.9 sema_init.9 \
 	sema.9 sema_post.9 \
 	sema.9 sema_timedwait.9 \
 	sema.9 sema_trywait.9 \
 	sema.9 sema_value.9 \
 	sema.9 sema_wait.9
 MLINKS+=seqc.9 seqc_consistent.9 \
 	seqc.9 seqc_read.9 \
 	seqc.9 seqc_write_begin.9 \
 	seqc.9 seqc_write_end.9
 MLINKS+=sf_buf.9 sf_buf_alloc.9 \
 	sf_buf.9 sf_buf_free.9 \
 	sf_buf.9 sf_buf_kva.9 \
 	sf_buf.9 sf_buf_page.9
 MLINKS+=sglist.9 sglist_alloc.9 \
 	sglist.9 sglist_append.9 \
 	sglist.9 sglist_append_bio.9 \
 	sglist.9 sglist_append_ext_pgs.9 \
 	sglist.9 sglist_append_mb_ext_pgs.9 \
 	sglist.9 sglist_append_mbuf.9 \
 	sglist.9 sglist_append_phys.9 \
 	sglist.9 sglist_append_sglist.9 \
 	sglist.9 sglist_append_uio.9 \
 	sglist.9 sglist_append_user.9 \
 	sglist.9 sglist_append_vmpages.9 \
 	sglist.9 sglist_build.9 \
 	sglist.9 sglist_clone.9 \
 	sglist.9 sglist_consume_uio.9 \
 	sglist.9 sglist_count.9 \
 	sglist.9 sglist_count_ext_pgs.9 \
 	sglist.9 sglist_count_mb_ext_pgs.9 \
 	sglist.9 sglist_count_vmpages.9 \
 	sglist.9 sglist_free.9 \
 	sglist.9 sglist_hold.9 \
 	sglist.9 sglist_init.9 \
 	sglist.9 sglist_join.9 \
 	sglist.9 sglist_length.9 \
 	sglist.9 sglist_reset.9 \
 	sglist.9 sglist_slice.9 \
 	sglist.9 sglist_split.9
 MLINKS+=shm_map.9 shm_unmap.9
 MLINKS+=signal.9 cursig.9 \
 	signal.9 execsigs.9 \
 	signal.9 issignal.9 \
 	signal.9 killproc.9 \
 	signal.9 pgsigio.9 \
 	signal.9 postsig.9 \
 	signal.9 SETSETNEQ.9 \
 	signal.9 SETSETOR.9 \
 	signal.9 SIGADDSET.9 \
 	signal.9 SIG_CONTSIGMASK.9 \
 	signal.9 SIGDELSET.9 \
 	signal.9 SIGEMPTYSET.9 \
 	signal.9 sigexit.9 \
 	signal.9 SIGFILLSET.9 \
 	signal.9 siginit.9 \
 	signal.9 SIGISEMPTY.9 \
 	signal.9 SIGISMEMBER.9 \
 	signal.9 SIGNOTEMPTY.9 \
 	signal.9 signotify.9 \
 	signal.9 SIGPENDING.9 \
 	signal.9 SIGSETAND.9 \
 	signal.9 SIGSETCANTMASK.9 \
 	signal.9 SIGSETEQ.9 \
 	signal.9 SIGSETNAND.9 \
 	signal.9 SIG_STOPSIGMASK.9 \
 	signal.9 trapsignal.9
 MLINKS+=sleep.9 msleep.9 \
 	sleep.9 msleep_sbt.9 \
 	sleep.9 msleep_spin.9 \
 	sleep.9 msleep_spin_sbt.9 \
 	sleep.9 pause.9 \
 	sleep.9 pause_sig.9 \
 	sleep.9 pause_sbt.9 \
 	sleep.9 tsleep.9 \
 	sleep.9 tsleep_sbt.9 \
 	sleep.9 wakeup.9 \
 	sleep.9 wakeup_one.9 \
 	sleep.9 wakeup_any.9
 MLINKS+=sleepqueue.9 init_sleepqueues.9 \
 	sleepqueue.9 sleepq_abort.9 \
 	sleepqueue.9 sleepq_add.9 \
 	sleepqueue.9 sleepq_alloc.9 \
 	sleepqueue.9 sleepq_broadcast.9 \
 	sleepqueue.9 sleepq_free.9 \
 	sleepqueue.9 sleepq_lookup.9 \
 	sleepqueue.9 sleepq_lock.9 \
 	sleepqueue.9 sleepq_release.9 \
 	sleepqueue.9 sleepq_remove.9 \
 	sleepqueue.9 sleepq_set_timeout.9 \
 	sleepqueue.9 sleepq_set_timeout_sbt.9 \
 	sleepqueue.9 sleepq_signal.9 \
 	sleepqueue.9 sleepq_sleepcnt.9 \
 	sleepqueue.9 sleepq_timedwait.9 \
 	sleepqueue.9 sleepq_timedwait_sig.9 \
 	sleepqueue.9 sleepq_type.9 \
 	sleepqueue.9 sleepq_wait.9 \
 	sleepqueue.9 sleepq_wait_sig.9
 MLINKS+=socket.9 soabort.9 \
 	socket.9 soaccept.9 \
 	socket.9 sobind.9 \
 	socket.9 socheckuid.9 \
 	socket.9 soclose.9 \
 	socket.9 soconnect.9 \
 	socket.9 socreate.9 \
 	socket.9 sodisconnect.9 \
 	socket.9 sodtor_set.9 \
 	socket.9 sodupsockaddr.9 \
 	socket.9 sofree.9 \
 	socket.9 sogetopt.9 \
 	socket.9 sohasoutofband.9 \
 	socket.9 solisten.9 \
 	socket.9 solisten_proto.9 \
 	socket.9 solisten_proto_check.9 \
 	socket.9 sonewconn.9 \
 	socket.9 sooptcopyin.9 \
 	socket.9 sooptcopyout.9 \
 	socket.9 sopoll.9 \
 	socket.9 sopoll_generic.9 \
 	socket.9 soreceive.9 \
 	socket.9 soreceive_dgram.9 \
 	socket.9 soreceive_generic.9 \
 	socket.9 soreceive_stream.9 \
 	socket.9 soreserve.9 \
 	socket.9 sorflush.9 \
 	socket.9 sosend.9 \
 	socket.9 sosend_dgram.9 \
 	socket.9 sosend_generic.9 \
 	socket.9 sosetopt.9 \
 	socket.9 soshutdown.9 \
 	socket.9 sotoxsocket.9 \
 	socket.9 soupcall_clear.9 \
 	socket.9 soupcall_set.9 \
 	socket.9 sowakeup.9
 MLINKS+=stack.9 stack_copy.9 \
 	stack.9 stack_create.9 \
 	stack.9 stack_destroy.9 \
 	stack.9 stack_print.9 \
 	stack.9 stack_print_ddb.9 \
 	stack.9 stack_print_short.9 \
 	stack.9 stack_print_short_ddb.9 \
 	stack.9 stack_put.9 \
 	stack.9 stack_save.9 \
 	stack.9 stack_sbuf_print.9 \
 	stack.9 stack_sbuf_print_ddb.9 \
 	stack.9 stack_zero.9
 MLINKS+=store.9 subyte.9 \
 	store.9 suword.9 \
 	store.9 suword16.9 \
 	store.9 suword32.9 \
 	store.9 suword64.9
 MLINKS+=swi.9 swi_add.9 \
 	swi.9 swi_remove.9 \
 	swi.9 swi_sched.9
 MLINKS+=sx.9 sx_assert.9 \
 	sx.9 sx_destroy.9 \
 	sx.9 sx_downgrade.9 \
 	sx.9 sx_init.9 \
 	sx.9 sx_init_flags.9 \
 	sx.9 sx_sleep.9 \
 	sx.9 sx_slock.9 \
 	sx.9 sx_slock_sig.9 \
 	sx.9 sx_sunlock.9 \
 	sx.9 SX_SYSINIT.9 \
 	sx.9 SX_SYSINIT_FLAGS.9 \
 	sx.9 sx_try_slock.9 \
 	sx.9 sx_try_upgrade.9 \
 	sx.9 sx_try_xlock.9 \
 	sx.9 sx_unlock.9 \
 	sx.9 sx_xholder.9 \
 	sx.9 sx_xlock.9 \
 	sx.9 sx_xlock_sig.9 \
 	sx.9 sx_xlocked.9 \
 	sx.9 sx_xunlock.9
 MLINKS+=syscall_helper_register.9 syscall_helper_unregister.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_COMPAT.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_COMPAT_F.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_F.9
 MLINKS+=sysctl.9 SYSCTL_DECL.9 \
 	sysctl.9 SYSCTL_ADD_INT.9 \
 	sysctl.9 SYSCTL_ADD_LONG.9 \
 	sysctl.9 SYSCTL_ADD_NODE.9 \
 	sysctl.9 SYSCTL_ADD_NODE_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_ADD_OPAQUE.9 \
 	sysctl.9 SYSCTL_ADD_PROC.9 \
 	sysctl.9 SYSCTL_ADD_QUAD.9 \
 	sysctl.9 SYSCTL_ADD_ROOT_NODE.9 \
 	sysctl.9 SYSCTL_ADD_S8.9 \
 	sysctl.9 SYSCTL_ADD_S16.9 \
 	sysctl.9 SYSCTL_ADD_S32.9 \
 	sysctl.9 SYSCTL_ADD_S64.9 \
 	sysctl.9 SYSCTL_ADD_STRING.9 \
 	sysctl.9 SYSCTL_ADD_STRUCT.9 \
 	sysctl.9 SYSCTL_ADD_TIMEVAL_SEC.9 \
 	sysctl.9 SYSCTL_ADD_U8.9 \
 	sysctl.9 SYSCTL_ADD_U16.9 \
 	sysctl.9 SYSCTL_ADD_U32.9 \
 	sysctl.9 SYSCTL_ADD_U64.9 \
 	sysctl.9 SYSCTL_ADD_UAUTO.9 \
 	sysctl.9 SYSCTL_ADD_UINT.9 \
 	sysctl.9 SYSCTL_ADD_ULONG.9 \
 	sysctl.9 SYSCTL_ADD_UQUAD.9 \
 	sysctl.9 SYSCTL_CHILDREN.9 \
 	sysctl.9 SYSCTL_STATIC_CHILDREN.9 \
 	sysctl.9 SYSCTL_NODE_CHILDREN.9 \
 	sysctl.9 SYSCTL_PARENT.9 \
 	sysctl.9 SYSCTL_INT.9 \
 	sysctl.9 SYSCTL_INT_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_LONG.9 \
 	sysctl.9 sysctl_msec_to_ticks.9 \
 	sysctl.9 SYSCTL_NODE.9 \
 	sysctl.9 SYSCTL_NODE_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_OPAQUE.9 \
 	sysctl.9 SYSCTL_PROC.9 \
 	sysctl.9 SYSCTL_QUAD.9 \
 	sysctl.9 SYSCTL_ROOT_NODE.9 \
 	sysctl.9 SYSCTL_S8.9 \
 	sysctl.9 SYSCTL_S16.9 \
 	sysctl.9 SYSCTL_S32.9 \
 	sysctl.9 SYSCTL_S64.9 \
 	sysctl.9 SYSCTL_STRING.9 \
 	sysctl.9 SYSCTL_STRUCT.9 \
 	sysctl.9 SYSCTL_TIMEVAL_SEC.9 \
 	sysctl.9 SYSCTL_U8.9 \
 	sysctl.9 SYSCTL_U16.9 \
 	sysctl.9 SYSCTL_U32.9 \
 	sysctl.9 SYSCTL_U64.9 \
 	sysctl.9 SYSCTL_UINT.9 \
 	sysctl.9 SYSCTL_ULONG.9 \
 	sysctl.9 SYSCTL_UQUAD.9
 MLINKS+=sysctl_add_oid.9 sysctl_move_oid.9 \
 	sysctl_add_oid.9 sysctl_remove_oid.9 \
 	sysctl_add_oid.9 sysctl_remove_name.9
 MLINKS+=sysctl_ctx_init.9 sysctl_ctx_entry_add.9 \
 	sysctl_ctx_init.9 sysctl_ctx_entry_del.9 \
 	sysctl_ctx_init.9 sysctl_ctx_entry_find.9 \
 	sysctl_ctx_init.9 sysctl_ctx_free.9
 MLINKS+=SYSINIT.9 SYSUNINIT.9
 MLINKS+=taskqueue.9 TASK_INIT.9 \
 	taskqueue.9 TASK_INITIALIZER.9 \
 	taskqueue.9 taskqueue_block.9 \
 	taskqueue.9 taskqueue_cancel.9 \
 	taskqueue.9 taskqueue_cancel_timeout.9 \
 	taskqueue.9 taskqueue_create.9 \
 	taskqueue.9 taskqueue_create_fast.9 \
 	taskqueue.9 TASKQUEUE_DECLARE.9 \
 	taskqueue.9 TASKQUEUE_DEFINE.9 \
 	taskqueue.9 TASKQUEUE_DEFINE_THREAD.9 \
 	taskqueue.9 taskqueue_drain.9 \
 	taskqueue.9 taskqueue_drain_all.9 \
 	taskqueue.9 taskqueue_drain_timeout.9 \
 	taskqueue.9 taskqueue_enqueue.9 \
 	taskqueue.9 taskqueue_enqueue_timeout.9 \
 	taskqueue.9 TASKQUEUE_FAST_DEFINE.9 \
 	taskqueue.9 TASKQUEUE_FAST_DEFINE_THREAD.9 \
 	taskqueue.9 taskqueue_free.9 \
 	taskqueue.9 taskqueue_member.9 \
 	taskqueue.9 taskqueue_quiesce.9 \
 	taskqueue.9 taskqueue_run.9 \
 	taskqueue.9 taskqueue_set_callback.9 \
 	taskqueue.9 taskqueue_start_threads.9 \
 	taskqueue.9 taskqueue_start_threads_cpuset.9 \
 	taskqueue.9 taskqueue_start_threads_in_proc.9 \
 	taskqueue.9 taskqueue_unblock.9 \
 	taskqueue.9 TIMEOUT_TASK_INIT.9
 MLINKS+=tcp_functions.9 register_tcp_functions.9 \
 	tcp_functions.9 register_tcp_functions_as_name.9 \
 	tcp_functions.9 register_tcp_functions_as_names.9 \
 	tcp_functions.9 deregister_tcp_functions.9
 MLINKS+=time.9 boottime.9 \
 	time.9 time_second.9 \
 	time.9 time_uptime.9
 MLINKS+=timeout.9 callout.9 \
 	timeout.9 callout_active.9 \
 	timeout.9 callout_async_drain.9 \
 	timeout.9 callout_deactivate.9 \
 	timeout.9 callout_drain.9 \
 	timeout.9 callout_handle_init.9 \
 	timeout.9 callout_init.9 \
 	timeout.9 callout_init_mtx.9 \
 	timeout.9 callout_init_rm.9 \
 	timeout.9 callout_init_rw.9 \
 	timeout.9 callout_pending.9 \
 	timeout.9 callout_reset.9 \
 	timeout.9 callout_reset_curcpu.9 \
 	timeout.9 callout_reset_on.9 \
 	timeout.9 callout_reset_sbt.9 \
 	timeout.9 callout_reset_sbt_curcpu.9 \
 	timeout.9 callout_reset_sbt_on.9 \
 	timeout.9 callout_schedule.9 \
 	timeout.9 callout_schedule_curcpu.9 \
 	timeout.9 callout_schedule_on.9 \
 	timeout.9 callout_schedule_sbt.9 \
 	timeout.9 callout_schedule_sbt_curcpu.9 \
 	timeout.9 callout_schedule_sbt_on.9 \
 	timeout.9 callout_stop.9 \
 	timeout.9 callout_when.9 \
 	timeout.9 untimeout.9
 MLINKS+=ucred.9 crcopy.9 \
 	ucred.9 crcopysafe.9 \
 	ucred.9 crdup.9 \
 	ucred.9 crfree.9 \
 	ucred.9 crget.9 \
 	ucred.9 crhold.9 \
 	ucred.9 crsetgroups.9 \
 	ucred.9 cru2x.9
 MLINKS+=uidinfo.9 uifind.9 \
 	uidinfo.9 uifree.9 \
 	uidinfo.9 uihashinit.9 \
 	uidinfo.9 uihold.9
 MLINKS+=uio.9 uiomove.9 \
 	uio.9 uiomove_frombuf.9 \
 	uio.9 uiomove_nofault.9
 
 .if ${MK_USB} != "no"
 MAN+=	usbdi.9
 MLINKS+=usbdi.9 usbd_do_request.9 \
 	usbdi.9 usbd_do_request_flags.9 \
 	usbdi.9 usbd_errstr.9 \
 	usbdi.9 usbd_lookup_id_by_info.9 \
 	usbdi.9 usbd_lookup_id_by_uaa.9 \
 	usbdi.9 usbd_transfer_clear_stall.9 \
 	usbdi.9 usbd_transfer_drain.9 \
 	usbdi.9 usbd_transfer_pending.9 \
 	usbdi.9 usbd_transfer_poll.9 \
 	usbdi.9 usbd_transfer_setup.9 \
 	usbdi.9 usbd_transfer_start.9 \
 	usbdi.9 usbd_transfer_stop.9 \
 	usbdi.9 usbd_transfer_submit.9 \
 	usbdi.9 usbd_transfer_unsetup.9 \
 	usbdi.9 usbd_xfer_clr_flag.9 \
 	usbdi.9 usbd_xfer_frame_data.9 \
 	usbdi.9 usbd_xfer_frame_len.9 \
 	usbdi.9 usbd_xfer_get_frame.9 \
 	usbdi.9 usbd_xfer_get_priv.9 \
 	usbdi.9 usbd_xfer_is_stalled.9 \
 	usbdi.9 usbd_xfer_max_framelen.9 \
 	usbdi.9 usbd_xfer_max_frames.9 \
 	usbdi.9 usbd_xfer_max_len.9 \
 	usbdi.9 usbd_xfer_set_flag.9 \
 	usbdi.9 usbd_xfer_set_frame_data.9 \
 	usbdi.9 usbd_xfer_set_frame_len.9 \
 	usbdi.9 usbd_xfer_set_frame_offset.9 \
 	usbdi.9 usbd_xfer_set_frames.9 \
 	usbdi.9 usbd_xfer_set_interval.9 \
 	usbdi.9 usbd_xfer_set_priv.9 \
 	usbdi.9 usbd_xfer_set_stall.9 \
 	usbdi.9 usbd_xfer_set_timeout.9 \
 	usbdi.9 usbd_xfer_softc.9 \
 	usbdi.9 usbd_xfer_state.9 \
 	usbdi.9 usbd_xfer_status.9 \
 	usbdi.9 usb_fifo_alloc_buffer.9 \
 	usbdi.9 usb_fifo_attach.9 \
 	usbdi.9 usb_fifo_detach.9 \
 	usbdi.9 usb_fifo_free_buffer.9 \
 	usbdi.9 usb_fifo_get_data.9 \
 	usbdi.9 usb_fifo_get_data_buffer.9 \
 	usbdi.9 usb_fifo_get_data_error.9 \
 	usbdi.9 usb_fifo_get_data_linear.9 \
 	usbdi.9 usb_fifo_put_bytes_max.9 \
 	usbdi.9 usb_fifo_put_data.9 \
 	usbdi.9 usb_fifo_put_data_buffer.9 \
 	usbdi.9 usb_fifo_put_data_error.9 \
 	usbdi.9 usb_fifo_put_data_linear.9 \
 	usbdi.9 usb_fifo_reset.9 \
 	usbdi.9 usb_fifo_softc.9 \
 	usbdi.9 usb_fifo_wakeup.9
 .endif
 MLINKS+=vcount.9 count_dev.9
 MLINKS+=vfsconf.9 vfs_modevent.9 \
 	vfsconf.9 vfs_register.9 \
 	vfsconf.9 vfs_unregister.9
 MLINKS+=vfs_getopt.9 vfs_copyopt.9 \
 	vfs_getopt.9 vfs_filteropt.9 \
 	vfs_getopt.9 vfs_flagopt.9 \
 	vfs_getopt.9 vfs_getopts.9 \
 	vfs_getopt.9 vfs_scanopt.9 \
 	vfs_getopt.9 vfs_setopt.9 \
 	vfs_getopt.9 vfs_setopt_part.9 \
 	vfs_getopt.9 vfs_setopts.9
 MLINKS+=vhold.9 vdrop.9 \
 	vhold.9 vdropl.9 \
 	vhold.9 vholdl.9
 MLINKS+=vmem.9 vmem_add.9 \
 	vmem.9 vmem_alloc.9 \
 	vmem.9 vmem_create.9 \
 	vmem.9 vmem_destroy.9 \
 	vmem.9 vmem_free.9 \
 	vmem.9 vmem_xalloc.9 \
 	vmem.9 vmem_xfree.9  
 MLINKS+=vm_map_lock.9 vm_map_lock_downgrade.9 \
 	vm_map_lock.9 vm_map_lock_read.9 \
 	vm_map_lock.9 vm_map_lock_upgrade.9 \
 	vm_map_lock.9 vm_map_trylock.9 \
 	vm_map_lock.9 vm_map_trylock_read.9 \
 	vm_map_lock.9 vm_map_unlock.9 \
 	vm_map_lock.9 vm_map_unlock_read.9
 MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9
 MLINKS+=vm_map_max.9 vm_map_min.9 \
 	vm_map_max.9 vm_map_pmap.9
 MLINKS+=vm_map_stack.9 vm_map_growstack.9
 MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \
 	vm_page_wire.9 vm_page_unwire.9 \
 	vm_page_wire.9 vm_page_unwire_noq.9
 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \
 	vm_page_bits.9 vm_page_dirty.9 \
 	vm_page_bits.9 vm_page_is_valid.9 \
 	vm_page_bits.9 vm_page_set_invalid.9 \
 	vm_page_bits.9 vm_page_set_validclean.9 \
 	vm_page_bits.9 vm_page_test_dirty.9 \
 	vm_page_bits.9 vm_page_undirty.9 \
 	vm_page_bits.9 vm_page_zero_invalid.9
 MLINKS+=vm_page_busy.9 vm_page_busied.9 \
 	vm_page_busy.9 vm_page_busy_downgrade.9 \
 	vm_page_busy.9 vm_page_busy_sleep.9 \
 	vm_page_busy.9 vm_page_sbusied.9 \
 	vm_page_busy.9 vm_page_sbusy.9 \
 	vm_page_busy.9 vm_page_sleep_if_busy.9 \
 	vm_page_busy.9 vm_page_sunbusy.9 \
 	vm_page_busy.9 vm_page_trysbusy.9 \
 	vm_page_busy.9 vm_page_tryxbusy.9 \
 	vm_page_busy.9 vm_page_xbusied.9 \
 	vm_page_busy.9 vm_page_xbusy.9 \
 	vm_page_busy.9 vm_page_xunbusy.9 \
 	vm_page_busy.9 vm_page_assert_sbusied.9 \
 	vm_page_busy.9 vm_page_assert_unbusied.9 \
 	vm_page_busy.9 vm_page_assert_xbusied.9
 MLINKS+=vm_page_aflag.9 vm_page_aflag_clear.9 \
 	vm_page_aflag.9 vm_page_aflag_set.9 \
 	vm_page_aflag.9 vm_page_reference.9
 MLINKS+=vm_page_free.9 vm_page_free_toq.9 \
 	vm_page_free.9 vm_page_free_zero.9 \
 	vm_page_free.9 vm_page_try_to_free.9
 MLINKS+=vm_page_insert.9 vm_page_remove.9
 MLINKS+=vm_page_wire.9 vm_page_unwire.9
 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
 	VOP_ATTRIB.9 VOP_SETATTR.9
 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
 	VOP_CREATE.9 VOP_MKNOD.9 \
 	VOP_CREATE.9 VOP_SYMLINK.9
 MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9
 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9
 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9
 MLINKS+=VOP_LOCK.9 vn_lock.9 \
 	VOP_LOCK.9 VOP_ISLOCKED.9 \
 	VOP_LOCK.9 VOP_UNLOCK.9
 MLINKS+=VOP_OPENCLOSE.9 VOP_CLOSE.9 \
 	VOP_OPENCLOSE.9 VOP_OPEN.9
 MLINKS+=VOP_RDWR.9 VOP_READ.9 \
 	VOP_RDWR.9 VOP_WRITE.9
 MLINKS+=VOP_REMOVE.9 VOP_RMDIR.9
 MLINKS+=vnet.9 vimage.9
 MLINKS+=vref.9 VREF.9 \
 	vref.9 vrefl.9
 MLINKS+=vrele.9 vput.9 \
 	vrele.9 vunref.9
 MLINKS+=vslock.9 vsunlock.9
 MLINKS+=zone.9 uma.9 \
 	zone.9 uma_prealloc.9 \
 	zone.9 uma_reclaim.9 \
 	zone.9 uma_zalloc.9 \
 	zone.9 uma_zalloc_arg.9 \
 	zone.9 uma_zalloc_domain.9 \
 	zone.9 uma_zalloc_pcpu.9 \
 	zone.9 uma_zalloc_pcpu_arg.9 \
 	zone.9 uma_zcache_create.9 \
 	zone.9 uma_zcreate.9 \
 	zone.9 uma_zdestroy.9 \
 	zone.9 uma_zfree.9 \
 	zone.9 uma_zfree_arg.9 \
 	zone.9 uma_zfree_domain.9 \
 	zone.9 uma_zfree_pcpu.9 \
 	zone.9 uma_zfree_pcpu_arg.9 \
 	zone.9 uma_zone_get_cur.9 \
 	zone.9 uma_zone_get_max.9 \
 	zone.9 uma_zone_reclaim.9 \
 	zone.9 uma_zone_reserve.9 \
 	zone.9 uma_zone_reserve_kva.9 \
 	zone.9 uma_zone_set_allocf.9 \
 	zone.9 uma_zone_set_freef.9 \
 	zone.9 uma_zone_set_max.9 \
 	zone.9 uma_zone_set_maxaction.9 \
 	zone.9 uma_zone_set_maxcache.9 \
 	zone.9 uma_zone_set_warning.9 \
 	zone.9 uma_zsecond_create.9
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 _superio.9=	superio.9
 MLINKS+=superio.9 superio_devid.9 \
 	superio.9 superio_dev_disable.9 \
 	superio.9 superio_dev_enable.9 \
 	superio.9 superio_dev_enabled.9 \
 	superio.9 superio_find_dev.9 \
 	superio.9 superio_find_dev.9 \
 	superio.9 superio_get_dma.9 \
 	superio.9 superio_get_iobase.9 \
 	superio.9 superio_get_irq.9 \
 	superio.9 superio_get_ldn.9 \
 	superio.9 superio_get_type.9 \
 	superio.9 superio_read.9 \
 	superio.9 superio_revid.9 \
 	superio.9 superio_vendor.9 \
 	superio.9 superio_write.9
 .endif
 
 .include <bsd.prog.mk>
Index: head/share/man/man9/bitset.9
===================================================================
--- head/share/man/man9/bitset.9	(revision 355708)
+++ head/share/man/man9/bitset.9	(revision 355709)
@@ -1,524 +1,524 @@
 .\" Copyright (c) 2015 Conrad Meyer <cem@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
 .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd July 7, 2017
+.Dd December 12, 2019
 .Dt BITSET 9
 .Os
 .Sh NAME
 .Nm bitset(9)
 \(em
 .Nm BITSET_DEFINE ,
 .Nm BITSET_T_INITIALIZER ,
 .Nm BITSET_FSET ,
 .Nm BIT_CLR ,
 .Nm BIT_COPY ,
 .Nm BIT_ISSET ,
 .Nm BIT_SET ,
 .Nm BIT_ZERO ,
 .Nm BIT_FILL ,
 .Nm BIT_SETOF ,
 .Nm BIT_EMPTY ,
 .Nm BIT_ISFULLSET ,
 .Nm BIT_FFS ,
 .Nm BIT_FLS ,
 .Nm BIT_COUNT ,
 .Nm BIT_SUBSET ,
 .Nm BIT_OVERLAP ,
 .Nm BIT_CMP ,
 .Nm BIT_OR ,
 .Nm BIT_OR2 ,
 .Nm BIT_AND ,
 .Nm BIT_AND2 ,
-.Nm BIT_NAND ,
-.Nm BIT_NAND2 ,
+.Nm BIT_ANDNOT ,
+.Nm BIT_ANDNOT2 ,
 .Nm BIT_XOR ,
 .Nm BIT_XOR2 ,
 .Nm BIT_CLR_ATOMIC ,
 .Nm BIT_SET_ATOMIC ,
 .Nm BIT_SET_ATOMIC_ACQ ,
 .Nm BIT_AND_ATOMIC ,
 .Nm BIT_OR_ATOMIC ,
 .Nm BIT_COPY_STORE_REL
 .Nd bitset manipulation macros
 .Sh SYNOPSIS
 .In sys/_bitset.h
 .In sys/bitset.h
 .\"
 .Fn BITSET_DEFINE "STRUCTNAME" "const SETSIZE"
 .Fn BITSET_T_INITIALIZER "ARRAY_CONTENTS"
 .Fn BITSET_FSET "N_WORDS"
 .\"
 .Fn BIT_CLR "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_COPY "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to"
 .Ft bool
 .Fn BIT_ISSET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_ZERO "const SETSIZE" "struct STRUCTNAME *bitset"
 .Fn BIT_FILL "const SETSIZE" "struct STRUCTNAME *bitset"
 .Fn BIT_SETOF "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Ft bool
 .Fn BIT_EMPTY "const SETSIZE" "struct STRUCTNAME *bitset"
 .Ft bool
 .Fn BIT_ISFULLSET "const SETSIZE" "struct STRUCTNAME *bitset"
 .Ft int
 .Fn BIT_FFS "const SETSIZE" "struct STRUCTNAME *bitset"
 .Ft int
 .Fn BIT_FLS "const SETSIZE" "struct STRUCTNAME *bitset"
 .Ft int
 .Fn BIT_COUNT "const SETSIZE" "struct STRUCTNAME *bitset"
 .\"
 .Ft bool
 .Fo BIT_SUBSET
 .Fa "const SETSIZE" "struct STRUCTNAME *haystack" "struct STRUCTNAME *needle"
 .Fc
 .Ft bool
 .Fo BIT_OVERLAP
 .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2"
 .Fc
 .Ft bool
 .Fo BIT_CMP
 .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2"
 .Fc
 .Fn BIT_OR "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fo BIT_OR2
 .Fa "const SETSIZE"
 .Fa "struct STRUCTNAME *dst"
 .Fa "struct STRUCTNAME *src1"
 .Fa "struct STRUCTNAME *src2"
 .Fc
 .Fn BIT_AND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fo BIT_AND2
 .Fa "const SETSIZE"
 .Fa "struct STRUCTNAME *dst"
 .Fa "struct STRUCTNAME *src1"
 .Fa "struct STRUCTNAME *src2"
 .Fc
-.Fn BIT_NAND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
-.Fo BIT_NAND2
+.Fn BIT_ANDNOT "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
+.Fo BIT_ANDNOT2
 .Fa "const SETSIZE"
 .Fa "struct STRUCTNAME *dst"
 .Fa "struct STRUCTNAME *src1"
 .Fa "struct STRUCTNAME *src2"
 .Fc
 .Fn BIT_XOR "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fo BIT_XOR2
 .Fa "const SETSIZE"
 .Fa "struct STRUCTNAME *dst"
 .Fa "struct STRUCTNAME *src1"
 .Fa "struct STRUCTNAME *src2"
 .Fc
 .\"
 .Fn BIT_CLR_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET_ATOMIC_ACQ "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .\"
 .Fo BIT_AND_ATOMIC
 .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fc
 .Fo BIT_OR_ATOMIC
 .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fc
 .Fo BIT_COPY_STORE_REL
 .Fa "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to"
 .Fc
 .Sh DESCRIPTION
 The
 .Nm
 family of macros provide a flexible and efficient bitset implementation if the
 maximum size of the set is known at compilation.
 Throughout this manual page, the name
 .Fa SETSIZE
 refers to the size of the bitset in bits.
 Individual bits in bitsets are referenced with indices zero through
 .Fa SETSIZE - 1 .
 One example use of
 .In sys/bitset.h
 is
 .In sys/cpuset.h .
 .Pp
 The
 .Fn BITSET_DEFINE
 macro defines a bitset struct
 .Fa STRUCTNAME
 with room to represent
 .Fa SETSIZE
 bits.
 .Pp
 The
 .Fn BITSET_T_INITIALIZER
 macro allows one to initialize a bitset struct with a compile time literal
 value.
 .Pp
 The
 .Fn BITSET_FSET
 macro generates a compile time literal, usable by
 .Fn BITSET_T_INITIALIZER ,
 representing a full bitset (all bits set).
 For examples of
 .Fn BITSET_T_INITIALIZER
 and
 .Fn BITSET_FSET
 usage, see the
 .Sx BITSET_T_INITIALIZER EXAMPLE
 section.
 The
 .Fa N_WORDS
 parameter to
 .Fn BITSET_FSET
 should be:
 .Bd -literal -offset indent
 __bitset_words(SETSIZE)
 .Ed
 .Pp
 The
 .Fn BIT_CLR
 macro clears bit
 .Fa bit
 in the bitset pointed to by
 .Fa bitset .
 The
 .Fn BIT_CLR_ATOMIC
 macro is identical, but the bit is cleared atomically.
 .Pp
 The
 .Fn BIT_COPY
 macro copies the contents of the bitset
 .Fa from
 to the bitset
 .Fa to .
 .Fn BIT_COPY_STORE_REL
 is similar, but copies component machine words from
 .Fa from
 and writes them to
 .Fa to
 with atomic store with release semantics.
 (That is, if
 .Fa to
 is composed of multiple machine words,
 .Fn BIT_COPY_STORE_REL
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn BIT_SET
 macro sets bit
 .Fa bit
 in the bitset pointed to by
 .Fa bitset .
 The
 .Fn BIT_SET_ATOMIC
 macro is identical, but the bit is set atomically.
 The
 .Fn BIT_SET_ATOMIC_ACQ
 macro sets the bit with acquire semantics.
 .Pp
 The
 .Fn BIT_ZERO
 macro clears all bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_FILL
 macro sets all bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_SETOF
 macro clears all bits in
 .Fa bitset
 before setting only bit
 .Fa bit .
 .Pp
 The
 .Fn BIT_EMPTY
 macro returns
 .Dv true
 if
 .Fa bitset
 is empty.
 .Pp
 The
 .Fn BIT_ISFULLSET
 macro returns
 .Dv true
 if
 .Fa bitset
 is full (all bits set).
 .Pp
 The
 .Fn BIT_FFS
 macro returns the 1-index of the first (lowest) set bit in
 .Fa bitset ,
 or zero if
 .Fa bitset
 is empty.
 Like with
 .Xr ffs 3 ,
 to use the non-zero result of
 .Fn BIT_FFS
 as a
 .Fa bit
 index parameter to any other
 .Nm
 macro, you must subtract one from the result.
 .Pp
 The
 .Fn BIT_FLS
 macro returns the 1-index of the last (highest) set bit in
 .Fa bitset ,
 or zero if
 .Fa bitset
 is empty.
 Like with
 .Xr fls 3 ,
 to use the non-zero result of
 .Fn BIT_FLS
 as a
 .Fa bit
 index parameter to any other
 .Nm
 macro, you must subtract one from the result.
 .Pp
 The
 .Fn BIT_COUNT
 macro returns the total number of set bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_SUBSET
 macro returns
 .Dv true
 if
 .Fa needle
 is a subset of
 .Fa haystack .
 .Pp
 The
 .Fn BIT_OVERLAP
 macro returns
 .Dv true
 if
 .Fa bitset1
 and
 .Fa bitset2
 have any common bits.
 (That is, if
 .Fa bitset1
 AND
 .Fa bitset2
 is not the empty set.)
 .Pp
 The
 .Fn BIT_CMP
 macro returns
 .Dv true
 if
 .Fa bitset1
 is NOT equal to
 .Fa bitset2 .
 .Pp
 The
 .Fn BIT_OR
 macro sets bits present in
 .Fa src
 in
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 |=
 .Fa src . )
 .Fn BIT_OR_ATOMIC
 is similar, but sets bits in the component machine words in
 .Fa dst
 atomically.
 (That is, if
 .Fa dst
 is composed of multiple machine words,
 .Fn BIT_OR_ATOMIC
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn BIT_OR2
 macro computes
 .Fa src1
 bitwise or
 .Fa src2
 and assigns the result to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 =
 .Fa src1
 |
 .Fa src2 . )
 .Pp
 The
 .Fn BIT_AND
 macro clears bits absent from
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa src . )
 .Fn BIT_AND_ATOMIC
 is similar, with the same atomic semantics as
 .Fn BIT_OR_ATOMIC .
 .Pp
 The
 .Fn BIT_AND2
 macro computes
 .Fa src1
 bitwise and
 .Fa src2
 and assigns the result to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 =
 .Fa src1
 &
 .Fa src2 . )
 .Pp
 The
-.Fn BIT_NAND
+.Fn BIT_ANDNOT
 macro clears bits set in
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa ~ src . )
 .Pp
 The
-.Fn BIT_NAND2
+.Fn BIT_ANDNOT2
 macro computes
 .Fa src1
 bitwise and not
 .Fa src2
 and assigns the result to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 =
 .Fa src1
 & ~
 .Fa src2 . )
 .Pp
 The
 .Fn BIT_XOR
 macro toggles bits set in
 .Fa src
 in
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 ^=
 .Fa src . )
 .Pp
 The
 .Fn BIT_XOR2
 macro computes
 .Fa src1
 bitwise exclusive or
 .Fa src2
 and assigns the result to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 =
 .Fa src1
 ^
 .Fa src2 . )
 .Sh BITSET_T_INITIALIZER EXAMPLE
 .Bd -literal
 BITSET_DEFINE(_myset, MYSETSIZE);
 
 struct _myset myset;
 
 /* Initialize myset to filled (all bits set) */
 myset = BITSET_T_INITIALIZER(BITSET_FSET(__bitset_words(MYSETSIZE)));
 
 /* Initialize myset to only the lowest bit set */
 myset = BITSET_T_INITIALIZER(0x1);
 .Ed
 .Sh SEE ALSO
 .Xr bitstring 3 ,
 .Xr cpuset 9
 .Sh HISTORY
 The
 .Nm
 macros first appeared in
 .Fx 10.0
 in January 2014.
 They were MFCed to
 .Fx 9.3 ,
 released in July 2014.
 .Pp
 This manual page first appeared in
 .Fx 11.0 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 macros were generalized and pulled out of
 .In sys/cpuset.h
 as
 .In sys/_bitset.h
 and
 .In sys/bitset.h
 by
 .An Attilio Rao Aq Mt attilio@FreeBSD.org .
 This manual page was written by
 .An Conrad Meyer Aq Mt cem@FreeBSD.org .
 .Sh CAVEATS
 The
 .Fa SETSIZE
 argument to all of these macros must match the value given to
 .Fn BITSET_DEFINE .
 .Pp
 Unlike every other reference to individual set members, which are zero-indexed,
 .Fn BIT_FFS
 and
 .Fn BIT_FLS
 return a one-indexed result (or zero if the set is empty).
Index: head/share/man/man9/cpuset.9
===================================================================
--- head/share/man/man9/cpuset.9	(revision 355708)
+++ head/share/man/man9/cpuset.9	(revision 355709)
@@ -1,352 +1,352 @@
 .\" Copyright (c) 2015 Conrad Meyer <cem@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
 .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd July 29, 2016
+.Dd December 12, 2019
 .Dt CPUSET 9
 .Os
 .Sh NAME
 .Nm cpuset(9)
 \(em
 .Nm CPUSET_T_INITIALIZER ,
 .Nm CPUSET_FSET ,
 .Nm CPU_CLR ,
 .Nm CPU_COPY ,
 .Nm CPU_ISSET ,
 .Nm CPU_SET ,
 .Nm CPU_ZERO ,
 .Nm CPU_FILL ,
 .Nm CPU_SETOF ,
 .Nm CPU_EMPTY ,
 .Nm CPU_ISFULLSET ,
 .Nm CPU_FFS ,
 .Nm CPU_COUNT ,
 .Nm CPU_SUBSET ,
 .Nm CPU_OVERLAP ,
 .Nm CPU_CMP ,
 .Nm CPU_OR ,
 .Nm CPU_AND ,
-.Nm CPU_NAND ,
+.Nm CPU_ANDNOT ,
 .Nm CPU_CLR_ATOMIC ,
 .Nm CPU_SET_ATOMIC ,
 .Nm CPU_SET_ATOMIC_ACQ ,
 .Nm CPU_AND_ATOMIC ,
 .Nm CPU_OR_ATOMIC ,
 .Nm CPU_COPY_STORE_REL
 .Nd cpuset manipulation macros
 .Sh SYNOPSIS
 .In sys/_cpuset.h
 .In sys/cpuset.h
 .\"
 .Fn CPUSET_T_INITIALIZER "ARRAY_CONTENTS"
 .Vt CPUSET_FSET
 .\"
 .Fn CPU_CLR "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_COPY "cpuset_t *from" "cpuset_t *to"
 .Ft bool
 .Fn CPU_ISSET "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_ZERO "cpuset_t *cpuset"
 .Fn CPU_FILL "cpuset_t *cpuset"
 .Fn CPU_SETOF "size_t cpu_idx" "cpuset_t *cpuset"
 .Ft bool
 .Fn CPU_EMPTY "cpuset_t *cpuset"
 .Ft bool
 .Fn CPU_ISFULLSET "cpuset_t *cpuset"
 .Ft int
 .Fn CPU_FFS "cpuset_t *cpuset"
 .Ft int
 .Fn CPU_COUNT "cpuset_t *cpuset"
 .\"
 .Ft bool
 .Fn CPU_SUBSET "cpuset_t *haystack" "cpuset_t *needle"
 .Ft bool
 .Fn CPU_OVERLAP "cpuset_t *cpuset1" "cpuset_t *cpuset2"
 .Ft bool
 .Fn CPU_CMP "cpuset_t *cpuset1" "cpuset_t *cpuset2"
 .Fn CPU_OR "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_AND "cpuset_t *dst" "cpuset_t *src"
-.Fn CPU_NAND "cpuset_t *dst" "cpuset_t *src"
+.Fn CPU_ANDNOT "cpuset_t *dst" "cpuset_t *src"
 .\"
 .Fn CPU_CLR_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET_ATOMIC_ACQ "size_t cpu_idx" "cpuset_t *cpuset"
 .\"
 .Fn CPU_AND_ATOMIC "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_OR_ATOMIC "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_COPY_STORE_REL "cpuset_t *from" "cpuset_t *to"
 .Sh DESCRIPTION
 The
 .Nm
 family of macros provide a flexible and efficient CPU set implementation,
 backed by the
 .Xr bitset 9
 macros.
 Each CPU is represented by a single bit.
 The maximum number of CPUs representable by
 .Vt cpuset_t
 is
 .Va MAXCPU .
 Individual CPUs in cpusets are referenced with indices zero through
 .Fa MAXCPU - 1 .
 .Pp
 The
 .Fn CPUSET_T_INITIALIZER
 macro allows one to initialize a
 .Vt cpuset_t
 with a compile time literal value.
 .Pp
 The
 .Fn CPUSET_FSET
 macro defines a compile time literal, usable by
 .Fn CPUSET_T_INITIALIZER ,
 representing a full cpuset (all CPUs present).
 For examples of
 .Fn CPUSET_T_INITIALIZER
 and
 .Fn CPUSET_FSET
 usage, see the
 .Sx CPUSET_T_INITIALIZER EXAMPLE
 section.
 .Pp
 The
 .Fn CPU_CLR
 macro removes CPU
 .Fa cpu_idx
 from the cpuset pointed to by
 .Fa cpuset .
 The
 .Fn CPU_CLR_ATOMIC
 macro is identical, but the bit representing the CPU is cleared with atomic
 machine instructions.
 .Pp
 The
 .Fn CPU_COPY
 macro copies the contents of the cpuset
 .Fa from
 to the cpuset
 .Fa to .
 .Fn CPU_COPY_STORE_REL
 is similar, but copies component machine words from
 .Fa from
 and writes them to
 .Fa to
 with atomic store with release semantics.
 (That is, if
 .Fa to
 is composed of multiple machine words,
 .Fn CPU_COPY_STORE_REL
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn CPU_SET
 macro adds CPU
 .Fa cpu_idx
 to the cpuset pointed to by
 .Fa cpuset ,
 if it is not already present.
 The
 .Fn CPU_SET_ATOMIC
 macro is identical, but the bit representing the CPU is set with atomic
 machine instructions.
 The
 .Fn CPU_SET_ATOMIC_ACQ
 macro sets the bit representing the CPU with atomic acquire semantics.
 .Pp
 The
 .Fn CPU_ZERO
 macro removes all CPUs from
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_FILL
 macro adds all CPUs to
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_SETOF
 macro removes all CPUs in
 .Fa cpuset
 before adding only CPU
 .Fa cpu_idx .
 .Pp
 The
 .Fn CPU_EMPTY
 macro returns
 .Dv true
 if
 .Fa cpuset
 is empty.
 .Pp
 The
 .Fn CPU_ISFULLSET
 macro returns
 .Dv true
 if
 .Fa cpuset
 is full (the set of all CPUs).
 .Pp
 The
 .Fn CPU_FFS
 macro returns the 1-index of the first (lowest) CPU in
 .Fa cpuset ,
 or zero if
 .Fa cpuset
 is empty.
 Like with
 .Xr ffs 3 ,
 to use the non-zero result of
 .Fn CPU_FFS
 as a
 .Fa cpu_idx
 index parameter to any other
 .Nm
 macro, you must subtract one from the result.
 .Pp
 The
 .Fn CPU_COUNT
 macro returns the total number of CPUs in
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_SUBSET
 macro returns
 .Dv true
 if
 .Fa needle
 is a subset of
 .Fa haystack .
 .Pp
 The
 .Fn CPU_OVERLAP
 macro returns
 .Dv true
 if
 .Fa cpuset1
 and
 .Fa cpuset2
 have any common CPUs.
 (That is, if
 .Fa cpuset1
 AND
 .Fa cpuset2
 is not the empty set.)
 .Pp
 The
 .Fn CPU_CMP
 macro returns
 .Dv true
 if
 .Fa cpuset1
 is NOT equal to
 .Fa cpuset2 .
 .Pp
 The
 .Fn CPU_OR
 macro adds CPUs present in
 .Fa src
 to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 |=
 .Fa src . )
 .Fn CPU_OR_ATOMIC
 is similar, but sets the bits representing CPUs in the component machine words
 in
 .Fa dst
 with atomic machine instructions.
 (That is, if
 .Fa dst
 is composed of multiple machine words,
 .Fn CPU_OR_ATOMIC
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn CPU_AND
 macro removes CPUs absent from
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa src . )
 .Fn CPU_AND_ATOMIC
 is similar, with the same atomic semantics as
 .Fn CPU_OR_ATOMIC .
 .Pp
 The
-.Fn CPU_NAND
+.Fn CPU_ANDNOT
 macro removes CPUs in
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa ~ src . )
 .Sh CPUSET_T_INITIALIZER EXAMPLE
 .Bd -literal
 cpuset_t myset;
 
 /* Initialize myset to filled (all CPUs) */
 myset = CPUSET_T_INITIALIZER(CPUSET_FSET);
 
 /* Initialize myset to only the lowest CPU */
 myset = CPUSET_T_INITIALIZER(0x1);
 .Ed
 .Sh SEE ALSO
 .Xr cpuset 1 ,
 .Xr cpuset 2 ,
 .Xr bitset 9
 .Sh HISTORY
 .In sys/cpuset.h
 first appeared in
 .Fx 7.1 ,
 released in January 2009, and in
 .Fx 8.0 ,
 released in November 2009.
 .Pp
 This manual page first appeared in
 .Fx 11.0 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 macros were written by
 .An Jeff Roberson Aq Mt jeff@FreeBSD.org .
 This manual page was written by
 .An Conrad Meyer Aq Mt cem@FreeBSD.org .
 .Sh CAVEATS
 Unlike every other reference to individual set members, which are zero-indexed,
 .Fn CPU_FFS
 returns a one-indexed result (or zero if the cpuset is empty).
Index: head/sys/i386/i386/vm_machdep.c
===================================================================
--- head/sys/i386/i386/vm_machdep.c	(revision 355708)
+++ head/sys/i386/i386/vm_machdep.c	(revision 355709)
@@ -1,680 +1,680 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_isa.h"
 #include "opt_npx.h"
 #include "opt_reset.h"
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/smp.h>
 #include <machine/vm86.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
 _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
     "__OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf.");
 
 union savefpu *
 get_pcb_user_save_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
 	KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
 	return ((union savefpu *)p);
 }
 
 union savefpu *
 get_pcb_user_save_pcb(struct pcb *pcb)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)(pcb + 1);
 	return ((union savefpu *)p);
 }
 
 struct pcb *
 get_pcb_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
 	    sizeof(struct pcb);
 	return ((struct pcb *)p);
 }
 
 void *
 alloc_fpusave(int flags)
 {
 	void *res;
 	struct savefpu_ymm *sf;
 
 	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
 	if (use_xsave) {
 		sf = (struct savefpu_ymm *)res;
 		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
 		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
 	}
 	return (res);
 }
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 {
 	struct proc *p1;
 	struct pcb *pcb2;
 	struct mdproc *mdp2;
 
 	p1 = td1->td_proc;
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct mdproc *mdp1 = &p1->p_md;
 			struct proc_ldt *pldt, *pldt1;
 
 			mtx_lock_spin(&dt_lock);
 			if ((pldt1 = mdp1->md_ldt) != NULL &&
 			    pldt1->ldt_refcnt > 1) {
 				pldt = user_ldt_alloc(mdp1, pldt1->ldt_len);
 				if (pldt == NULL)
 					panic("could not copy LDT");
 				mdp1->md_ldt = pldt;
 				set_user_ldt(mdp1);
 				user_ldt_deref(pldt1);
 			} else
 				mtx_unlock_spin(&dt_lock);
 		}
 		return;
 	}
 
 	/* Ensure that td1's pcb is up to date. */
 	if (td1 == curthread)
 		td1->td_pcb->pcb_gs = rgs();
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td1)
 		npxsave(td1->td_pcb->pcb_save);
 	critical_exit();
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = get_pcb_td(td2);
 	td2->td_pcb = pcb2;
 
 	/* Copy td1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Properly initialize pcb_save */
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
 	    cpu_max_ext_state_size);
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe
 	 * if we go to vm86.
 	 */
 	td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb -
 	    VM86_STACK_SPACE) - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_eax = 0;		/* Child returns zero */
 	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
 	td2->td_frame->tf_edx = 1;
 
 	/*
 	 * If the parent process has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame unless the debugger had set PF_FORK
 	 * on the parent.  Otherwise, the child will receive a (likely
 	 * unexpected) SIGTRAP when it executes the first instruction after
 	 * returning  to userland.
 	 */
 	if ((p1->p_pfsflags & PF_FORK) == 0)
 		td2->td_frame->tf_eflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace));
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)td2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
 	/* Copy the LDT, if necessary. */
 	mtx_lock_spin(&dt_lock);
 	if (mdp2->md_ldt != NULL) {
 		if (flags & RFMEM) {
 			mdp2->md_ldt->ldt_refcnt++;
 		} else {
 			mdp2->md_ldt = user_ldt_alloc(mdp2,
 			    mdp2->md_ldt->ldt_len);
 			if (mdp2->md_ldt == NULL)
 				panic("could not copy LDT");
 		}
 	}
 	mtx_unlock_spin(&dt_lock);
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	td->td_pcb->pcb_esi = (int) func;	/* function */
 	td->td_pcb->pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(struct thread *td)
 {
 
 	/*
 	 * If this process has a custom LDT, release it.  Reset pc->pcb_gs
 	 * and %gs before we free it in case they refer to an LDT entry.
 	 */
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt) {
 		td->td_pcb->pcb_gs = _udatasel;
 		load_gs(_udatasel);
 		user_ldt_free(td);
 	} else
 		mtx_unlock_spin(&dt_lock);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 
 	/* Disable any hardware breakpoints. */
 	if (td->td_pcb->pcb_flags & PCB_DBREGS) {
 		reset_dbregs();
 		td->td_pcb->pcb_flags &= ~PCB_DBREGS;
 	}
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb; 
 	if (pcb->pcb_ext != NULL) {
 		/* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
 		/*
 		 * XXX do we need to move the TSS off the allocated pages
 		 * before freeing them?  (not done here)
 		 */
 		pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1));
 		pcb->pcb_ext = NULL;
 	}
 }
 
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	struct pcb *pcb;
 	struct xstate_hdr *xhdr;
 
 	td->td_pcb = pcb = get_pcb_td(td);
 	td->td_frame = (struct trapframe *)((caddr_t)pcb -
 	    VM86_STACK_SPACE) - 1;
 	pcb->pcb_ext = NULL; 
 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
 		bzero(xhdr, sizeof(*xhdr));
 		xhdr->xstate_bv = xsave_mask;
 	}
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 
 	cpu_thread_clean(td);
 }
 
 bool
 cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused)
 {
 
 	return (true);
 }
 
 int
 cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused,
     int com __unused, void *data __unused)
 {
 
 	return (EINVAL);
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 
 	switch (error) {
 	case 0:
 		td->td_frame->tf_eax = td->td_retval[0];
 		td->td_frame->tf_edx = td->td_retval[1];
 		td->td_frame->tf_eflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes, int
 		 * 0x80 is 2 bytes. We saved this in tf_err.
 		 */
 		td->td_frame->tf_eip -= td->td_frame->tf_err;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 		td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error);
 		td->td_frame->tf_eflags |= PSL_C;
 		break;
 	}
 }
 
 /*
  * Initialize machine state, mostly pcb and trap frame for a new
  * thread, about to return to userspace.  Put enough state in the new
  * thread's PCB to get it to go back to the fork_return(), which
  * finalizes the thread state and handles peculiarities of the first
  * return to userspace for the new thread.
  */
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 
 	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*
 	 * Copy the upcall pcb.  This loads kernel regs.
 	 * Those not loaded individually below get their default
 	 * values here.
 	 */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 	pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
 	    PCB_KERNNPX);
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
 	    cpu_max_ext_state_size);
 
 	/*
 	 * Create a new fresh stack for the new thread.
 	 */
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 
 	/* If the current thread has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame. Otherwise, the new thread will
 	 * receive a (likely unexpected) SIGTRAP when it executes the first
 	 * instruction after returning to userland.
 	 */
 	td->td_frame->tf_eflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;		    /* trampoline arg */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
 	pcb2->pcb_ebx = (int)td;			    /* trampoline arg */
 	pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
 	pcb2->pcb_gs = rgs();
 	/*
 	 * If we didn't copy the pcb, we'd need to do the following registers:
 	 * pcb2->pcb_cr3:	cloned above.
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 	pcb2->pcb_ext = NULL;
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 }
 
 /*
  * Set that machine state for performing an upcall that starts
  * the entry function with the given argument.
  */
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
     stack_t *stack)
 {
 
 	/* 
 	 * Do any extra cleaning that needs to be done.
 	 * The thread may have optional components
 	 * that are not present in a fresh thread.
 	 * This may be a recycled thread so make it look
 	 * as though it's newly allocated.
 	 */
 	cpu_thread_clean(td);
 
 	/*
 	 * Set the trap frame to point at the beginning of the entry
 	 * function.
 	 */
 	td->td_frame->tf_ebp = 0; 
 	td->td_frame->tf_esp =
 	    (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
 	td->td_frame->tf_eip = (int)entry;
 
 	/* Return address sentinel value to stop stack unwinding. */
 	suword((void *)td->td_frame->tf_esp, 0);
 
 	/* Pass the argument to the entry point. */
 	suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
 	    (int)arg);
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 	struct segment_descriptor sd;
 	uint32_t base;
 
 	/*
 	 * Construct a descriptor and store it in the pcb for
 	 * the next context switch.  Also store it in the gdt
 	 * so that the load of tf_fs into %fs will activate it
 	 * at return to userland.
 	 */
 	base = (uint32_t)tls_base;
 	sd.sd_lobase = base & 0xffffff;
 	sd.sd_hibase = (base >> 24) & 0xff;
 	sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
 	sd.sd_hilimit = 0xf;
 	sd.sd_type  = SDT_MEMRWA;
 	sd.sd_dpl   = SEL_UPL;
 	sd.sd_p     = 1;
 	sd.sd_xx    = 0;
 	sd.sd_def32 = 1;
 	sd.sd_gran  = 1;
 	critical_enter();
 	/* set %gs */
 	td->td_pcb->pcb_gsd = sd;
 	if (td == curthread) {
 		PCPU_GET(fsgs_gdt)[1] = sd;
 		load_gs(GSEL(GUGS_SEL, SEL_UPL));
 	}
 	critical_exit();
 	return (0);
 }
 
 /*
  * Convert kernel VA to physical address
  */
 vm_paddr_t
 kvtop(void *addr)
 {
 	vm_paddr_t pa;
 
 	pa = pmap_kextract((vm_offset_t)addr);
 	if (pa == 0)
 		panic("kvtop: zero page frame");
 	return (pa);
 }
 
 /*
  * Get an sf_buf from the freelist.  May block if none are available.
  */
 void
 sf_buf_map(struct sf_buf *sf, int flags)
 {
 
 	pmap_sf_buf_map(sf);
 #ifdef SMP
 	sf_buf_shootdown(sf, flags);
 #endif
 }
 
 #ifdef SMP
 void
 sf_buf_shootdown(struct sf_buf *sf, int flags)
 {
 	cpuset_t other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &sf->cpumask)) {
 		CPU_SET(cpuid, &sf->cpumask);
 		invlpg(sf->kva);
 	}
 	if ((flags & SFB_CPUPRIVATE) == 0) {
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
-		CPU_NAND(&other_cpus, &sf->cpumask);
+		CPU_ANDNOT(&other_cpus, &sf->cpumask);
 		if (!CPU_EMPTY(&other_cpus)) {
 			CPU_OR(&sf->cpumask, &other_cpus);
 			smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap);
 		}
 	}
 	sched_unpin();
 }
 #endif
 
 /*
  * MD part of sf_buf_free().
  */
 int
 sf_buf_unmap(struct sf_buf *sf)
 {
 
 	return (0);
 }
 
 static void
 sf_buf_invalidate(struct sf_buf *sf)
 {
 	vm_page_t m = sf->m;
 
 	/*
 	 * Use pmap_qenter to update the pte for
 	 * existing mapping, in particular, the PAT
 	 * settings are recalculated.
 	 */
 	pmap_qenter(sf->kva, &m, 1);
 	pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
 }
 
 /*
  * Invalidate the cache lines that may belong to the page, if
  * (possibly old) mapping of the page by sf buffer exists.  Returns
  * TRUE when mapping was found and cache invalidated.
  */
 boolean_t
 sf_buf_invalidate_cache(vm_page_t m)
 {
 
 	return (sf_buf_process_page(m, sf_buf_invalidate));
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(vm_paddr_t addr)
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/kern/kern_cpuset.c
===================================================================
--- head/sys/kern/kern_cpuset.c	(revision 355708)
+++ head/sys/kern/kern_cpuset.c	(revision 355709)
@@ -1,2315 +1,2315 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008,  Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  * 
  * Copyright (c) 2008 Nokia Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/ctype.h>
 #include <sys/sysproto.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/capsicum.h>
 #include <sys/cpuset.h>
 #include <sys/domainset.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
 #include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/vmmeter.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_pagequeue.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif /* DDB */
 
 /*
  * cpusets provide a mechanism for creating and manipulating sets of
  * processors for the purpose of constraining the scheduling of threads to
  * specific processors.
  *
  * Each process belongs to an identified set, by default this is set 1.  Each
  * thread may further restrict the cpus it may run on to a subset of this
  * named set.  This creates an anonymous set which other threads and processes
  * may not join by number.
  *
  * The named set is referred to herein as the 'base' set to avoid ambiguity.
  * This set is usually a child of a 'root' set while the anonymous set may
  * simply be referred to as a mask.  In the syscall api these are referred to
  * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here.
  *
  * Threads inherit their set from their creator whether it be anonymous or
  * not.  This means that anonymous sets are immutable because they may be
  * shared.  To modify an anonymous set a new set is created with the desired
  * mask and the same parent as the existing anonymous set.  This gives the
  * illusion of each thread having a private mask.
  *
  * Via the syscall apis a user may ask to retrieve or modify the root, base,
  * or mask that is discovered via a pid, tid, or setid.  Modifying a set
  * modifies all numbered and anonymous child sets to comply with the new mask.
  * Modifying a pid or tid's mask applies only to that tid but must still
  * exist within the assigned parent set.
  *
  * A thread may not be assigned to a group separate from other threads in
  * the process.  This is to remove ambiguity when the setid is queried with
  * a pid argument.  There is no other technical limitation.
  *
  * This somewhat complex arrangement is intended to make it easy for
  * applications to query available processors and bind their threads to
  * specific processors while also allowing administrators to dynamically
  * reprovision by changing sets which apply to groups of processes.
  *
  * A simple application should not concern itself with sets at all and
  * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id
  * meaning 'curthread'.  It may query available cpus for that tid with a
  * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...).
  */
 
 LIST_HEAD(domainlist, domainset);
 struct domainset __read_mostly domainset_fixed[MAXMEMDOM];
 struct domainset __read_mostly domainset_prefer[MAXMEMDOM];
 struct domainset __read_mostly domainset_roundrobin;
 
 static uma_zone_t cpuset_zone;
 static uma_zone_t domainset_zone;
 static struct mtx cpuset_lock;
 static struct setlist cpuset_ids;
 static struct domainlist cpuset_domains;
 static struct unrhdr *cpuset_unr;
 static struct cpuset *cpuset_zero, *cpuset_default, *cpuset_kernel;
 static struct domainset domainset0, domainset2;
 
 /* Return the size of cpuset_t at the kernel level */
 SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD,
     SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)");
 
 cpuset_t *cpuset_root;
 cpuset_t cpuset_domain[MAXMEMDOM];
 
 static int domainset_valid(const struct domainset *, const struct domainset *);
 
 /*
  * Find the first non-anonymous set starting from 'set'.
  */
 static struct cpuset *
 cpuset_getbase(struct cpuset *set)
 {
 
 	if (set->cs_id == CPUSET_INVALID)
 		set = set->cs_parent;
 	return (set);
 }
 
 /*
  * Walks up the tree from 'set' to find the root.
  */
 static struct cpuset *
 cpuset_getroot(struct cpuset *set)
 {
 
 	while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL)
 		set = set->cs_parent;
 	return (set);
 }
 
 /*
  * Acquire a reference to a cpuset, all pointers must be tracked with refs.
  */
 struct cpuset *
 cpuset_ref(struct cpuset *set)
 {
 
 	refcount_acquire(&set->cs_ref);
 	return (set);
 }
 
 /*
  * Walks up the tree from 'set' to find the root.  Returns the root
  * referenced.
  */
 static struct cpuset *
 cpuset_refroot(struct cpuset *set)
 {
 
 	return (cpuset_ref(cpuset_getroot(set)));
 }
 
 /*
  * Find the first non-anonymous set starting from 'set'.  Returns this set
  * referenced.  May return the passed in set with an extra ref if it is
  * not anonymous. 
  */
 static struct cpuset *
 cpuset_refbase(struct cpuset *set)
 {
 
 	return (cpuset_ref(cpuset_getbase(set)));
 }
 
 /*
  * Release a reference in a context where it is safe to allocate.
  */
 void
 cpuset_rel(struct cpuset *set)
 {
 	cpusetid_t id;
 
 	if (refcount_release(&set->cs_ref) == 0)
 		return;
 	mtx_lock_spin(&cpuset_lock);
 	LIST_REMOVE(set, cs_siblings);
 	id = set->cs_id;
 	if (id != CPUSET_INVALID)
 		LIST_REMOVE(set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 	cpuset_rel(set->cs_parent);
 	uma_zfree(cpuset_zone, set);
 	if (id != CPUSET_INVALID)
 		free_unr(cpuset_unr, id);
 }
 
 /*
  * Deferred release must be used when in a context that is not safe to
  * allocate/free.  This places any unreferenced sets on the list 'head'.
  */
 static void
 cpuset_rel_defer(struct setlist *head, struct cpuset *set)
 {
 
 	if (refcount_release(&set->cs_ref) == 0)
 		return;
 	mtx_lock_spin(&cpuset_lock);
 	LIST_REMOVE(set, cs_siblings);
 	if (set->cs_id != CPUSET_INVALID)
 		LIST_REMOVE(set, cs_link);
 	LIST_INSERT_HEAD(head, set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 }
 
 /*
  * Complete a deferred release.  Removes the set from the list provided to
  * cpuset_rel_defer.
  */
 static void
 cpuset_rel_complete(struct cpuset *set)
 {
 	LIST_REMOVE(set, cs_link);
 	cpuset_rel(set->cs_parent);
 	uma_zfree(cpuset_zone, set);
 }
 
 /*
  * Find a set based on an id.  Returns it with a ref.
  */
 static struct cpuset *
 cpuset_lookup(cpusetid_t setid, struct thread *td)
 {
 	struct cpuset *set;
 
 	if (setid == CPUSET_INVALID)
 		return (NULL);
 	mtx_lock_spin(&cpuset_lock);
 	LIST_FOREACH(set, &cpuset_ids, cs_link)
 		if (set->cs_id == setid)
 			break;
 	if (set)
 		cpuset_ref(set);
 	mtx_unlock_spin(&cpuset_lock);
 
 	KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
 	if (set != NULL && jailed(td->td_ucred)) {
 		struct cpuset *jset, *tset;
 
 		jset = td->td_ucred->cr_prison->pr_cpuset;
 		for (tset = set; tset != NULL; tset = tset->cs_parent)
 			if (tset == jset)
 				break;
 		if (tset == NULL) {
 			cpuset_rel(set);
 			set = NULL;
 		}
 	}
 
 	return (set);
 }
 
 /*
  * Create a set in the space provided in 'set' with the provided parameters.
  * The set is returned with a single ref.  May return EDEADLK if the set
  * will have no valid cpu based on restrictions from the parent.
  */
 static int
 _cpuset_create(struct cpuset *set, struct cpuset *parent,
     const cpuset_t *mask, struct domainset *domain, cpusetid_t id)
 {
 
 	if (domain == NULL)
 		domain = parent->cs_domain;
 	if (mask == NULL)
 		mask = &parent->cs_mask;
 	if (!CPU_OVERLAP(&parent->cs_mask, mask))
 		return (EDEADLK);
 	/* The domain must be prepared ahead of time. */
 	if (!domainset_valid(parent->cs_domain, domain))
 		return (EDEADLK);
 	CPU_COPY(mask, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	refcount_init(&set->cs_ref, 1);
 	set->cs_flags = 0;
 	mtx_lock_spin(&cpuset_lock);
 	set->cs_domain = domain;
 	CPU_AND(&set->cs_mask, &parent->cs_mask);
 	set->cs_id = id;
 	set->cs_parent = cpuset_ref(parent);
 	LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings);
 	if (set->cs_id != CPUSET_INVALID)
 		LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 
 	return (0);
 }
 
 /*
  * Create a new non-anonymous set with the requested parent and mask.  May
  * return failures if the mask is invalid or a new number can not be
  * allocated.
  */
 static int
 cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask)
 {
 	struct cpuset *set;
 	cpusetid_t id;
 	int error;
 
 	id = alloc_unr(cpuset_unr);
 	if (id == -1)
 		return (ENFILE);
 	*setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	error = _cpuset_create(set, parent, mask, NULL, id);
 	if (error == 0)
 		return (0);
 	free_unr(cpuset_unr, id);
 	uma_zfree(cpuset_zone, set);
 
 	return (error);
 }
 
 static void
 cpuset_freelist_add(struct setlist *list, int count)
 {
 	struct cpuset *set;
 	int i;
 
 	for (i = 0; i < count; i++) {
 		set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK);
 		LIST_INSERT_HEAD(list, set, cs_link);
 	}
 }
 
 static void
 cpuset_freelist_init(struct setlist *list, int count)
 {
 
 	LIST_INIT(list);
 	cpuset_freelist_add(list, count);
 }
 
 static void
 cpuset_freelist_free(struct setlist *list)
 {
 	struct cpuset *set;
 
 	while ((set = LIST_FIRST(list)) != NULL) {
 		LIST_REMOVE(set, cs_link);
 		uma_zfree(cpuset_zone, set);
 	}
 }
 
 static void
 domainset_freelist_add(struct domainlist *list, int count)
 {
 	struct domainset *set;
 	int i;
 
 	for (i = 0; i < count; i++) {
 		set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK);
 		LIST_INSERT_HEAD(list, set, ds_link);
 	}
 }
 
 static void
 domainset_freelist_init(struct domainlist *list, int count)
 {
 
 	LIST_INIT(list);
 	domainset_freelist_add(list, count);
 }
 
 static void
 domainset_freelist_free(struct domainlist *list)
 {
 	struct domainset *set;
 
 	while ((set = LIST_FIRST(list)) != NULL) {
 		LIST_REMOVE(set, ds_link);
 		uma_zfree(domainset_zone, set);
 	}
 }
 
 /* Copy a domainset preserving mask and policy. */
 static void
 domainset_copy(const struct domainset *from, struct domainset *to)
 {
 
 	DOMAINSET_COPY(&from->ds_mask, &to->ds_mask);
 	to->ds_policy = from->ds_policy;
 	to->ds_prefer = from->ds_prefer;
 }
 
 /* Return 1 if mask and policy are equal, otherwise 0. */
 static int
 domainset_equal(const struct domainset *one, const struct domainset *two)
 {
 
 	return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 &&
 	    one->ds_policy == two->ds_policy &&
 	    one->ds_prefer == two->ds_prefer);
 }
 
 /* Return 1 if child is a valid subset of parent. */
 static int
 domainset_valid(const struct domainset *parent, const struct domainset *child)
 {
 	if (child->ds_policy != DOMAINSET_POLICY_PREFER)
 		return (DOMAINSET_SUBSET(&parent->ds_mask, &child->ds_mask));
 	return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask));
 }
 
 static int
 domainset_restrict(const struct domainset *parent,
     const struct domainset *child)
 {
 	if (child->ds_policy != DOMAINSET_POLICY_PREFER)
 		return (DOMAINSET_OVERLAP(&parent->ds_mask, &child->ds_mask));
 	return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask));
 }
 
 /*
  * Lookup or create a domainset.  The key is provided in ds_mask and
  * ds_policy.  If the domainset does not yet exist the storage in
  * 'domain' is used to insert.  Otherwise this storage is freed to the
  * domainset_zone and the existing domainset is returned.
  */
 static struct domainset *
 _domainset_create(struct domainset *domain, struct domainlist *freelist)
 {
 	struct domainset *ndomain;
 	int i, j;
 
 	KASSERT(domain->ds_cnt <= vm_ndomains,
 	    ("invalid domain count in domainset %p", domain));
 	KASSERT(domain->ds_policy != DOMAINSET_POLICY_PREFER ||
 	    domain->ds_prefer < vm_ndomains,
 	    ("invalid preferred domain in domains %p", domain));
 
 	mtx_lock_spin(&cpuset_lock);
 	LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
 		if (domainset_equal(ndomain, domain))
 			break;
 	/*
 	 * If the domain does not yet exist we insert it and initialize
 	 * various iteration helpers which are not part of the key.
 	 */
 	if (ndomain == NULL) {
 		LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link);
 		domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
 		for (i = 0, j = 0; i < DOMAINSET_FLS(&domain->ds_mask); i++)
 			if (DOMAINSET_ISSET(i, &domain->ds_mask))
 				domain->ds_order[j++] = i;
 	}
 	mtx_unlock_spin(&cpuset_lock);
 	if (ndomain == NULL)
 		return (domain);
 	if (freelist != NULL)
 		LIST_INSERT_HEAD(freelist, domain, ds_link);
 	else
 		uma_zfree(domainset_zone, domain);
 	return (ndomain);
 	
 }
 
 /*
  * Are any of the domains in the mask empty?  If so, silently
  * remove them and update the domainset accordingly.  If only empty
  * domains are present, we must return failure.
  */
 static bool
 domainset_empty_vm(struct domainset *domain)
 {
 	domainset_t empty;
 	int i, j;
 
 	DOMAINSET_ZERO(&empty);
 	for (i = 0; i < vm_ndomains; i++)
 		if (VM_DOMAIN_EMPTY(i))
 			DOMAINSET_SET(i, &empty);
 	if (DOMAINSET_SUBSET(&empty, &domain->ds_mask))
 		return (true);
 
 	/* Remove empty domains from the set and recompute. */
-	DOMAINSET_NAND(&domain->ds_mask, &empty);
+	DOMAINSET_ANDNOT(&domain->ds_mask, &empty);
 	domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
 	for (i = j = 0; i < DOMAINSET_FLS(&domain->ds_mask); i++)
 		if (DOMAINSET_ISSET(i, &domain->ds_mask))
 			domain->ds_order[j++] = i;
 
 	/* Convert a PREFER policy referencing an empty domain to RR. */
 	if (domain->ds_policy == DOMAINSET_POLICY_PREFER &&
 	    DOMAINSET_ISSET(domain->ds_prefer, &empty)) {
 		domain->ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
 		domain->ds_prefer = -1;
 	}
 
 	return (false);
 }
 
 /*
  * Create or lookup a domainset based on the key held in 'domain'.
  */
 struct domainset *
 domainset_create(const struct domainset *domain)
 {
 	struct domainset *ndomain;
 
 	/*
 	 * Validate the policy.  It must specify a useable policy number with
 	 * only valid domains.  Preferred must include the preferred domain
 	 * in the mask.
 	 */
 	if (domain->ds_policy <= DOMAINSET_POLICY_INVALID ||
 	    domain->ds_policy > DOMAINSET_POLICY_MAX)
 		return (NULL);
 	if (domain->ds_policy == DOMAINSET_POLICY_PREFER &&
 	    !DOMAINSET_ISSET(domain->ds_prefer, &domain->ds_mask))
 		return (NULL);
 	if (!DOMAINSET_SUBSET(&domainset0.ds_mask, &domain->ds_mask))
 		return (NULL);
 	ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO);
 	domainset_copy(domain, ndomain);
 	return _domainset_create(ndomain, NULL);
 }
 
 /*
  * Update thread domainset pointers.
  */
 static void
 domainset_notify(void)
 {
 	struct thread *td;
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			td->td_domain.dr_policy = td->td_cpuset->cs_domain;
 			thread_unlock(td);
 		}
 		PROC_UNLOCK(p);
 	}
 	sx_sunlock(&allproc_lock);
 	kernel_object->domain.dr_policy = cpuset_kernel->cs_domain;
 }
 
 /*
  * Create a new set that is a subset of a parent.
  */
 static struct domainset *
 domainset_shadow(const struct domainset *pdomain,
     const struct domainset *domain, struct domainlist *freelist)
 {
 	struct domainset *ndomain;
 
 	ndomain = LIST_FIRST(freelist);
 	LIST_REMOVE(ndomain, ds_link);
 
 	/*
 	 * Initialize the key from the request.
 	 */
 	domainset_copy(domain, ndomain);
 
 	/*
 	 * Restrict the key by the parent.
 	 */
 	DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask);
 
 	return _domainset_create(ndomain, freelist);
 }
 
 /*
  * Recursively check for errors that would occur from applying mask to
  * the tree of sets starting at 'set'.  Checks for sets that would become
  * empty as well as RDONLY flags.
  */
 static int
 cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask)
 {
 	struct cpuset *nset;
 	cpuset_t newmask;
 	int error;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	if (set->cs_flags & CPU_SET_RDONLY)
 		return (EPERM);
 	if (check_mask) {
 		if (!CPU_OVERLAP(&set->cs_mask, mask))
 			return (EDEADLK);
 		CPU_COPY(&set->cs_mask, &newmask);
 		CPU_AND(&newmask, mask);
 	} else
 		CPU_COPY(mask, &newmask);
 	error = 0;
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0)
 			break;
 	return (error);
 }
 
 /*
  * Applies the mask 'mask' without checking for empty sets or permissions.
  */
 static void
 cpuset_update(struct cpuset *set, cpuset_t *mask)
 {
 	struct cpuset *nset;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	CPU_AND(&set->cs_mask, mask);
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		cpuset_update(nset, &set->cs_mask);
 
 	return;
 }
 
 /*
  * Modify the set 'set' to use a copy of the mask provided.  Apply this new
  * mask to restrict all children in the tree.  Checks for validity before
  * applying the changes.
  */
 static int
 cpuset_modify(struct cpuset *set, cpuset_t *mask)
 {
 	struct cpuset *root;
 	int error;
 
 	error = priv_check(curthread, PRIV_SCHED_CPUSET);
 	if (error)
 		return (error);
 	/*
 	 * In case we are called from within the jail
 	 * we do not allow modifying the dedicated root
 	 * cpuset of the jail but may still allow to
 	 * change child sets.
 	 */
 	if (jailed(curthread->td_ucred) &&
 	    set->cs_flags & CPU_SET_ROOT)
 		return (EPERM);
 	/*
 	 * Verify that we have access to this set of
 	 * cpus.
 	 */
 	root = cpuset_getroot(set);
 	mtx_lock_spin(&cpuset_lock);
 	if (root && !CPU_SUBSET(&root->cs_mask, mask)) {
 		error = EINVAL;
 		goto out;
 	}
 	error = cpuset_testupdate(set, mask, 0);
 	if (error)
 		goto out;
 	CPU_COPY(mask, &set->cs_mask);
 	cpuset_update(set, mask);
 out:
 	mtx_unlock_spin(&cpuset_lock);
 
 	return (error);
 }
 
 /*
  * Recursively check for errors that would occur from applying mask to
  * the tree of sets starting at 'set'.  Checks for sets that would become
  * empty as well as RDONLY flags.
  */
 static int
 cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset,
     struct domainset *orig, int *count, int check_mask)
 {
 	struct cpuset *nset;
 	struct domainset *domain;
 	struct domainset newset;
 	int error;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	if (set->cs_flags & CPU_SET_RDONLY)
 		return (EPERM);
 	domain = set->cs_domain;
 	domainset_copy(domain, &newset);
 	if (!domainset_equal(domain, orig)) {
 		if (!domainset_restrict(domain, dset))
 			return (EDEADLK);
 		DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask);
 		/* Count the number of domains that are changing. */
 		(*count)++;
 	}
 	error = 0;
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		if ((error = cpuset_testupdate_domain(nset, &newset, domain,
 		    count, 1)) != 0)
 			break;
 	return (error);
 }
 
 /*
  * Applies the mask 'mask' without checking for empty sets or permissions.
  */
 static void
 cpuset_update_domain(struct cpuset *set, struct domainset *domain,
     struct domainset *orig, struct domainlist *domains)
 {
 	struct cpuset *nset;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	/*
 	 * If this domainset has changed from the parent we must calculate
 	 * a new set.  Otherwise it simply inherits from the parent.  When
 	 * we inherit from the parent we get a new mask and policy.  If the
 	 * set is modified from the parent we keep the policy and only
 	 * update the mask.
 	 */
 	if (set->cs_domain != orig) {
 		orig = set->cs_domain;
 		set->cs_domain = domainset_shadow(domain, orig, domains);
 	} else
 		set->cs_domain = domain;
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		cpuset_update_domain(nset, set->cs_domain, orig, domains);
 
 	return;
 }
 
 /*
  * Modify the set 'set' to use a copy the domainset provided.  Apply this new
  * mask to restrict all children in the tree.  Checks for validity before
  * applying the changes.
  */
 static int
 cpuset_modify_domain(struct cpuset *set, struct domainset *domain)
 {
 	struct domainlist domains;
 	struct domainset temp;
 	struct domainset *dset;
 	struct cpuset *root;
 	int ndomains, needed;
 	int error;
 
 	error = priv_check(curthread, PRIV_SCHED_CPUSET);
 	if (error)
 		return (error);
 	/*
 	 * In case we are called from within the jail
 	 * we do not allow modifying the dedicated root
 	 * cpuset of the jail but may still allow to
 	 * change child sets.
 	 */
 	if (jailed(curthread->td_ucred) &&
 	    set->cs_flags & CPU_SET_ROOT)
 		return (EPERM);
 	domainset_freelist_init(&domains, 0);
 	domain = domainset_create(domain);
 	ndomains = needed = 0;
 	do {
 		if (ndomains < needed) {
 			domainset_freelist_add(&domains, needed - ndomains);
 			ndomains = needed;
 		}
 		root = cpuset_getroot(set);
 		mtx_lock_spin(&cpuset_lock);
 		dset = root->cs_domain;
 		/*
 		 * Verify that we have access to this set of domains.
 		 */
 		if (!domainset_valid(dset, domain)) {
 			error = EINVAL;
 			goto out;
 		}
 		/*
 		 * If applying prefer we keep the current set as the fallback.
 		 */
 		if (domain->ds_policy == DOMAINSET_POLICY_PREFER)
 			DOMAINSET_COPY(&set->cs_domain->ds_mask,
 			    &domain->ds_mask);
 		/*
 		 * Determine whether we can apply this set of domains and
 		 * how many new domain structures it will require.
 		 */
 		domainset_copy(domain, &temp);
 		needed = 0;
 		error = cpuset_testupdate_domain(set, &temp, set->cs_domain,
 		    &needed, 0);
 		if (error)
 			goto out;
 	} while (ndomains < needed);
 	dset = set->cs_domain;
 	cpuset_update_domain(set, domain, dset, &domains);
 out:
 	mtx_unlock_spin(&cpuset_lock);
 	domainset_freelist_free(&domains);
 	if (error == 0)
 		domainset_notify();
 
 	return (error);
 }
 
 /*
  * Resolve the 'which' parameter of several cpuset apis.
  *
  * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid.  Also
  * checks for permission via p_cansched().
  *
  * For WHICH_SET returns a valid set with a new reference.
  *
  * -1 may be supplied for any argument to mean the current proc/thread or
  * the base set of the current thread.  May fail with ESRCH/EPERM.
  */
 int
 cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp,
     struct cpuset **setp)
 {
 	struct cpuset *set;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
 	*pp = p = NULL;
 	*tdp = td = NULL;
 	*setp = set = NULL;
 	switch (which) {
 	case CPU_WHICH_PID:
 		if (id == -1) {
 			PROC_LOCK(curproc);
 			p = curproc;
 			break;
 		}
 		if ((p = pfind(id)) == NULL)
 			return (ESRCH);
 		break;
 	case CPU_WHICH_TID:
 		if (id == -1) {
 			PROC_LOCK(curproc);
 			p = curproc;
 			td = curthread;
 			break;
 		}
 		td = tdfind(id, -1);
 		if (td == NULL)
 			return (ESRCH);
 		p = td->td_proc;
 		break;
 	case CPU_WHICH_CPUSET:
 		if (id == -1) {
 			thread_lock(curthread);
 			set = cpuset_refbase(curthread->td_cpuset);
 			thread_unlock(curthread);
 		} else
 			set = cpuset_lookup(id, curthread);
 		if (set) {
 			*setp = set;
 			return (0);
 		}
 		return (ESRCH);
 	case CPU_WHICH_JAIL:
 	{
 		/* Find `set' for prison with given id. */
 		struct prison *pr;
 
 		sx_slock(&allprison_lock);
 		pr = prison_find_child(curthread->td_ucred->cr_prison, id);
 		sx_sunlock(&allprison_lock);
 		if (pr == NULL)
 			return (ESRCH);
 		cpuset_ref(pr->pr_cpuset);
 		*setp = pr->pr_cpuset;
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	case CPU_WHICH_IRQ:
 	case CPU_WHICH_DOMAIN:
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	error = p_cansched(curthread, p);
 	if (error) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (td == NULL)
 		td = FIRST_THREAD_IN_PROC(p);
 	*pp = p;
 	*tdp = td;
 	return (0);
 }
 
 static int
 cpuset_testshadow(struct cpuset *set, const cpuset_t *mask,
     const struct domainset *domain)
 {
 	struct cpuset *parent;
 	struct domainset *dset;
 
 	parent = cpuset_getbase(set);
 	/*
 	 * If we are restricting a cpu mask it must be a subset of the
 	 * parent or invalid CPUs have been specified.
 	 */
 	if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask))
 		return (EINVAL);
 
 	/*
 	 * If we are restricting a domain mask it must be a subset of the
 	 * parent or invalid domains have been specified.
 	 */
 	dset = parent->cs_domain;
 	if (domain != NULL && !domainset_valid(dset, domain))
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Create an anonymous set with the provided mask in the space provided by
  * 'nset'.  If the passed in set is anonymous we use its parent otherwise
  * the new set is a child of 'set'.
  */
 static int
 cpuset_shadow(struct cpuset *set, struct cpuset **nsetp,
    const cpuset_t *mask, const struct domainset *domain,
    struct setlist *cpusets, struct domainlist *domains)
 {
 	struct cpuset *parent;
 	struct cpuset *nset;
 	struct domainset *dset;
 	struct domainset *d;
 	int error;
 
 	error = cpuset_testshadow(set, mask, domain);
 	if (error)
 		return (error);
 
 	parent = cpuset_getbase(set);
 	dset = parent->cs_domain;
 	if (mask == NULL)
 		mask = &set->cs_mask;
 	if (domain != NULL)
 		d = domainset_shadow(dset, domain, domains);
 	else
 		d = set->cs_domain;
 	nset = LIST_FIRST(cpusets);
 	error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID);
 	if (error == 0) {
 		LIST_REMOVE(nset, cs_link);
 		*nsetp = nset;
 	}
 	return (error);
 }
 
 static struct cpuset *
 cpuset_update_thread(struct thread *td, struct cpuset *nset)
 {
 	struct cpuset *tdset;
 
 	tdset = td->td_cpuset;
 	td->td_cpuset = nset;
 	td->td_domain.dr_policy = nset->cs_domain;
 	sched_affinity(td);
 
 	return (tdset);
 }
 
 static int
 cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask,
     struct domainset *domain)
 {
 	struct cpuset *parent;
 
 	parent = cpuset_getbase(tdset);
 	if (mask == NULL)
 		mask = &tdset->cs_mask;
 	if (domain == NULL)
 		domain = tdset->cs_domain;
 	return cpuset_testshadow(parent, mask, domain);
 }
 
 static int
 cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask,
     struct domainset *domain, struct cpuset **nsetp,
     struct setlist *freelist, struct domainlist *domainlist)
 {
 	struct cpuset *parent;
 
 	parent = cpuset_getbase(tdset);
 	if (mask == NULL)
 		mask = &tdset->cs_mask;
 	if (domain == NULL)
 		domain = tdset->cs_domain;
 	return cpuset_shadow(parent, nsetp, mask, domain, freelist,
 	    domainlist);
 }
 
 static int
 cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set,
     cpuset_t *mask, struct domainset *domain)
 {
 	struct cpuset *parent;
 
 	parent = cpuset_getbase(tdset);
 
 	/*
 	 * If the thread restricted its mask then apply that same
 	 * restriction to the new set, otherwise take it wholesale.
 	 */
 	if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) {
 		CPU_COPY(&tdset->cs_mask, mask);
 		CPU_AND(mask, &set->cs_mask);
 	} else
 		CPU_COPY(&set->cs_mask, mask);
 
 	/*
 	 * If the thread restricted the domain then we apply the
 	 * restriction to the new set but retain the policy.
 	 */
 	if (tdset->cs_domain != parent->cs_domain) {
 		domainset_copy(tdset->cs_domain, domain);
 		DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask);
 	} else
 		domainset_copy(set->cs_domain, domain);
 
 	if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask))
 		return (EDEADLK);
 
 	return (0);
 }
 
 static int
 cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set)
 {
 	struct domainset domain;
 	cpuset_t mask;
 
 	if (tdset->cs_id != CPUSET_INVALID)
 		return (0);
 	return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
 }
 
 static int
 cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set,
     struct cpuset **nsetp, struct setlist *freelist,
     struct domainlist *domainlist)
 {
 	struct domainset domain;
 	cpuset_t mask;
 	int error;
 
 	/*
 	 * If we're replacing on a thread that has not constrained the
 	 * original set we can simply accept the new set.
 	 */
 	if (tdset->cs_id != CPUSET_INVALID) {
 		*nsetp = cpuset_ref(set);
 		return (0);
 	}
 	error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
 	if (error)
 		return (error);
 
 	return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist,
 	    domainlist);
 }
 
 /*
  * Handle three cases for updating an entire process.
  *
  * 1) Set is non-null.  This reparents all anonymous sets to the provided
  *    set and replaces all non-anonymous td_cpusets with the provided set.
  * 2) Mask is non-null.  This replaces or creates anonymous sets for every
  *    thread with the existing base as a parent.
  * 3) domain is non-null.  This creates anonymous sets for every thread
  *    and replaces the domain set.
  *
  * This is overly complicated because we can't allocate while holding a 
  * spinlock and spinlocks must be held while changing and examining thread
  * state.
  */
 static int
 cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
     struct domainset *domain)
 {
 	struct setlist freelist;
 	struct setlist droplist;
 	struct domainlist domainlist;
 	struct cpuset *nset;
 	struct thread *td;
 	struct proc *p;
 	int threads;
 	int nfree;
 	int error;
 
 	/*
 	 * The algorithm requires two passes due to locking considerations.
 	 * 
 	 * 1) Lookup the process and acquire the locks in the required order.
 	 * 2) If enough cpusets have not been allocated release the locks and
 	 *    allocate them.  Loop.
 	 */
 	cpuset_freelist_init(&freelist, 1);
 	domainset_freelist_init(&domainlist, 1);
 	nfree = 1;
 	LIST_INIT(&droplist);
 	nfree = 0;
 	for (;;) {
 		error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset);
 		if (error)
 			goto out;
 		if (nfree >= p->p_numthreads)
 			break;
 		threads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		if (nfree < threads) {
 			cpuset_freelist_add(&freelist, threads - nfree);
 			domainset_freelist_add(&domainlist, threads - nfree);
 			nfree = threads;
 		}
 	}
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * Now that the appropriate locks are held and we have enough cpusets,
 	 * make sure the operation will succeed before applying changes. The
 	 * proc lock prevents td_cpuset from changing between calls.
 	 */
 	error = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		if (set != NULL)
 			error = cpuset_setproc_test_setthread(td->td_cpuset,
 			    set);
 		else
 			error = cpuset_setproc_test_maskthread(td->td_cpuset,
 			    mask, domain);
 		thread_unlock(td);
 		if (error)
 			goto unlock_out;
 	}
 	/*
 	 * Replace each thread's cpuset while using deferred release.  We
 	 * must do this because the thread lock must be held while operating
 	 * on the thread and this limits the type of operations allowed.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		if (set != NULL)
 			error = cpuset_setproc_setthread(td->td_cpuset, set,
 			    &nset, &freelist, &domainlist);
 		else
 			error = cpuset_setproc_maskthread(td->td_cpuset, mask,
 			    domain, &nset, &freelist, &domainlist);
 		if (error) {
 			thread_unlock(td);
 			break;
 		}
 		cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset));
 		thread_unlock(td);
 	}
 unlock_out:
 	PROC_UNLOCK(p);
 out:
 	while ((nset = LIST_FIRST(&droplist)) != NULL)
 		cpuset_rel_complete(nset);
 	cpuset_freelist_free(&freelist);
 	domainset_freelist_free(&domainlist);
 	return (error);
 }
 
 static int
 bitset_strprint(char *buf, size_t bufsiz, const struct bitset *set, int setlen)
 {
 	size_t bytes;
 	int i, once;
 	char *p;
 
 	once = 0;
 	p = buf;
 	for (i = 0; i < __bitset_words(setlen); i++) {
 		if (once != 0) {
 			if (bufsiz < 1)
 				return (0);
 			*p = ',';
 			p++;
 			bufsiz--;
 		} else
 			once = 1;
 		if (bufsiz < sizeof(__STRING(ULONG_MAX)))
 			return (0);
 		bytes = snprintf(p, bufsiz, "%lx", set->__bits[i]);
 		p += bytes;
 		bufsiz -= bytes;
 	}
 	return (p - buf);
 }
 
 static int
 bitset_strscan(struct bitset *set, int setlen, const char *buf)
 {
 	int i, ret;
 	const char *p;
 
 	BIT_ZERO(setlen, set);
 	p = buf;
 	for (i = 0; i < __bitset_words(setlen); i++) {
 		if (*p == ',') {
 			p++;
 			continue;
 		}
 		ret = sscanf(p, "%lx", &set->__bits[i]);
 		if (ret == 0 || ret == -1)
 			break;
 		while (isxdigit(*p))
 			p++;
 	}
 	return (p - buf);
 }
 
 /*
  * Return a string representing a valid layout for a cpuset_t object.
  * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
  */
 char *
 cpusetobj_strprint(char *buf, const cpuset_t *set)
 {
 
 	bitset_strprint(buf, CPUSETBUFSIZ, (const struct bitset *)set,
 	    CPU_SETSIZE);
 	return (buf);
 }
 
 /*
  * Build a valid cpuset_t object from a string representation.
  * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
  */
 int
 cpusetobj_strscan(cpuset_t *set, const char *buf)
 {
 	char p;
 
 	if (strlen(buf) > CPUSETBUFSIZ - 1)
 		return (-1);
 
 	p = buf[bitset_strscan((struct bitset *)set, CPU_SETSIZE, buf)];
 	if (p != '\0')
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Handle a domainset specifier in the sysctl tree.  A poiner to a pointer to
  * a domainset is in arg1.  If the user specifies a valid domainset the
  * pointer is updated.
  *
  * Format is:
  * hex mask word 0,hex mask word 1,...:decimal policy:decimal preferred
  */
 int
 sysctl_handle_domainset(SYSCTL_HANDLER_ARGS)
 {
 	char buf[DOMAINSETBUFSIZ];
 	struct domainset *dset;
 	struct domainset key;
 	int policy, prefer, error;
 	char *p;
 
 	dset = *(struct domainset **)arg1;
 	error = 0;
 
 	if (dset != NULL) {
 		p = buf + bitset_strprint(buf, DOMAINSETBUFSIZ,
 		    (const struct bitset *)&dset->ds_mask, DOMAINSET_SETSIZE);
 		sprintf(p, ":%d:%d", dset->ds_policy, dset->ds_prefer);
 	} else
 		sprintf(buf, "<NULL>");
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	/*
 	 * Read in and validate the string.
 	 */
 	memset(&key, 0, sizeof(key));
 	p = &buf[bitset_strscan((struct bitset *)&key.ds_mask,
 	    DOMAINSET_SETSIZE, buf)];
 	if (p == buf)
 		return (EINVAL);
 	if (sscanf(p, ":%d:%d", &policy, &prefer) != 2)
 		return (EINVAL);
 	key.ds_policy = policy;
 	key.ds_prefer = prefer;
 
 	/* Domainset_create() validates the policy.*/
 	dset = domainset_create(&key);
 	if (dset == NULL)
 		return (EINVAL);
 	*(struct domainset **)arg1 = dset;
 
 	return (error);
 }
 
 /*
  * Apply an anonymous mask or a domain to a single thread.
  */
 static int
 _cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain)
 {
 	struct setlist cpusets;
 	struct domainlist domainlist;
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
 	cpuset_freelist_init(&cpusets, 1);
 	domainset_freelist_init(&domainlist, domain != NULL);
 	error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set);
 	if (error)
 		goto out;
 	set = NULL;
 	thread_lock(td);
 	error = cpuset_shadow(td->td_cpuset, &nset, mask, domain,
 	    &cpusets, &domainlist);
 	if (error == 0)
 		set = cpuset_update_thread(td, nset);
 	thread_unlock(td);
 	PROC_UNLOCK(p);
 	if (set)
 		cpuset_rel(set);
 out:
 	cpuset_freelist_free(&cpusets);
 	domainset_freelist_free(&domainlist);
 	return (error);
 }
 
 /*
  * Apply an anonymous mask to a single thread.
  */
 int
 cpuset_setthread(lwpid_t id, cpuset_t *mask)
 {
 
 	return _cpuset_setthread(id, mask, NULL);
 }
 
 /*
  * Apply new cpumask to the ithread.
  */
 int
 cpuset_setithread(lwpid_t id, int cpu)
 {
 	cpuset_t mask;
 
 	CPU_ZERO(&mask);
 	if (cpu == NOCPU)
 		CPU_COPY(cpuset_root, &mask);
 	else
 		CPU_SET(cpu, &mask);
 	return _cpuset_setthread(id, &mask, NULL);
 }
 
 /*
  * Initialize static domainsets after NUMA information is available.  This is
  * called before memory allocators are initialized.
  */
 void
 domainset_init(void)
 {
 	struct domainset *dset;
 	int i;
 
 	dset = &domainset_roundrobin;
 	DOMAINSET_COPY(&all_domains, &dset->ds_mask);
 	dset->ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
 	dset->ds_prefer = -1;
 	_domainset_create(dset, NULL);
 
 	for (i = 0; i < vm_ndomains; i++) {
 		dset = &domainset_fixed[i];
 		DOMAINSET_ZERO(&dset->ds_mask);
 		DOMAINSET_SET(i, &dset->ds_mask);
 		dset->ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
 		_domainset_create(dset, NULL);
 
 		dset = &domainset_prefer[i];
 		DOMAINSET_COPY(&all_domains, &dset->ds_mask);
 		dset->ds_policy = DOMAINSET_POLICY_PREFER;
 		dset->ds_prefer = i;
 		_domainset_create(dset, NULL);
 	}
 }
 
 /*
  * Create the domainset for cpuset 0, 1 and cpuset 2.
  */
 void
 domainset_zero(void)
 {
 	struct domainset *dset, *tmp;
 
 	mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE);
 
 	dset = &domainset0;
 	DOMAINSET_COPY(&all_domains, &dset->ds_mask);
 	dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH;
 	dset->ds_prefer = -1;
 	curthread->td_domain.dr_policy = _domainset_create(dset, NULL);
 
 	domainset_copy(dset, &domainset2);
 	domainset2.ds_policy = DOMAINSET_POLICY_INTERLEAVE;
 	kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL);
 
 	/* Remove empty domains from the global policies. */
 	LIST_FOREACH_SAFE(dset, &cpuset_domains, ds_link, tmp)
 		if (domainset_empty_vm(dset))
 			LIST_REMOVE(dset, ds_link);
 }
 
 /*
  * Creates system-wide cpusets and the cpuset for thread0 including three
  * sets:
  * 
  * 0 - The root set which should represent all valid processors in the
  *     system.  It is initially created with a mask of all processors
  *     because we don't know what processors are valid until cpuset_init()
  *     runs.  This set is immutable.
  * 1 - The default set which all processes are a member of until changed.
  *     This allows an administrator to move all threads off of given cpus to
  *     dedicate them to high priority tasks or save power etc.
  * 2 - The kernel set which allows restriction and policy to be applied only
  *     to kernel threads and the kernel_object.
  */
 struct cpuset *
 cpuset_thread0(void)
 {
 	struct cpuset *set;
 	int i;
 	int error __unused;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	domainset_zone = uma_zcreate("domainset", sizeof(struct domainset),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 
 	/*
 	 * Create the root system set (0) for the whole machine.  Doesn't use
 	 * cpuset_create() due to NULL parent.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	CPU_COPY(&all_cpus, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	set->cs_ref = 1;
 	set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY;
 	set->cs_domain = &domainset0;
 	cpuset_zero = set;
 	cpuset_root = &set->cs_mask;
 
 	/*
 	 * Now derive a default (1), modifiable set from that to give out.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	error = _cpuset_create(set, cpuset_zero, NULL, NULL, 1);
 	KASSERT(error == 0, ("Error creating default set: %d\n", error));
 	cpuset_default = set;
 	/*
 	 * Create the kernel set (2).
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	error = _cpuset_create(set, cpuset_zero, NULL, NULL, 2);
 	KASSERT(error == 0, ("Error creating kernel set: %d\n", error));
 	set->cs_domain = &domainset2;
 	cpuset_kernel = set;
 
 	/*
 	 * Initialize the unit allocator. 0 and 1 are allocated above.
 	 */
 	cpuset_unr = new_unrhdr(3, INT_MAX, NULL);
 
 	/*
 	 * If MD code has not initialized per-domain cpusets, place all
 	 * CPUs in domain 0.
 	 */
 	for (i = 0; i < MAXMEMDOM; i++)
 		if (!CPU_EMPTY(&cpuset_domain[i]))
 			goto domains_set;
 	CPU_COPY(&all_cpus, &cpuset_domain[0]);
 domains_set:
 
 	return (cpuset_default);
 }
 
 void
 cpuset_kernthread(struct thread *td)
 {
 	struct cpuset *set;
 
 	thread_lock(td);
 	set = td->td_cpuset;
 	td->td_cpuset = cpuset_ref(cpuset_kernel);
 	thread_unlock(td);
 	cpuset_rel(set);
 }
 
 /*
  * Create a cpuset, which would be cpuset_create() but
  * mark the new 'set' as root.
  *
  * We are not going to reparent the td to it.  Use cpuset_setproc_update_set()
  * for that.
  *
  * In case of no error, returns the set in *setp locked with a reference.
  */
 int
 cpuset_create_root(struct prison *pr, struct cpuset **setp)
 {
 	struct cpuset *set;
 	int error;
 
 	KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__));
 	KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__));
 
 	error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask);
 	if (error)
 		return (error);
 
 	KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data",
 	    __func__, __LINE__));
 
 	/* Mark the set as root. */
 	set = *setp;
 	set->cs_flags |= CPU_SET_ROOT;
 
 	return (0);
 }
 
 int
 cpuset_setproc_update_set(struct proc *p, struct cpuset *set)
 {
 	int error;
 
 	KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__));
 	KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__));
 
 	cpuset_ref(set);
 	error = cpuset_setproc(p->p_pid, set, NULL, NULL);
 	if (error)
 		return (error);
 	cpuset_rel(set);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_args {
 	cpusetid_t	*setid;
 };
 #endif
 int
 sys_cpuset(struct thread *td, struct cpuset_args *uap)
 {
 	struct cpuset *root;
 	struct cpuset *set;
 	int error;
 
 	thread_lock(td);
 	root = cpuset_refroot(td->td_cpuset);
 	thread_unlock(td);
 	error = cpuset_create(&set, root, &root->cs_mask);
 	cpuset_rel(root);
 	if (error)
 		return (error);
 	error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id));
 	if (error == 0)
 		error = cpuset_setproc(-1, set, NULL, NULL);
 	cpuset_rel(set);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setid_args {
 	cpuwhich_t	which;
 	id_t		id;
 	cpusetid_t	setid;
 };
 #endif
 int
 sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap)
 {
 
 	return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid));
 }
 
 int
 kern_cpuset_setid(struct thread *td, cpuwhich_t which,
     id_t id, cpusetid_t setid)
 {
 	struct cpuset *set;
 	int error;
 
 	/*
 	 * Presently we only support per-process sets.
 	 */
 	if (which != CPU_WHICH_PID)
 		return (EINVAL);
 	set = cpuset_lookup(setid, td);
 	if (set == NULL)
 		return (ESRCH);
 	error = cpuset_setproc(id, set, NULL, NULL);
 	cpuset_rel(set);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_getid_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	cpusetid_t	*setid;
 };
 #endif
 int
 sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap)
 {
 
 	return (kern_cpuset_getid(td, uap->level, uap->which, uap->id,
 	    uap->setid));
 }
 
 int
 kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which,
     id_t id, cpusetid_t *setid)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
 	cpusetid_t tmpid;
 	int error;
 
 	if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET)
 		return (EINVAL);
 	error = cpuset_which(which, id, &p, &ttd, &set);
 	if (error)
 		return (error);
 	switch (which) {
 	case CPU_WHICH_TID:
 	case CPU_WHICH_PID:
 		thread_lock(ttd);
 		set = cpuset_refbase(ttd->td_cpuset);
 		thread_unlock(ttd);
 		PROC_UNLOCK(p);
 		break;
 	case CPU_WHICH_CPUSET:
 	case CPU_WHICH_JAIL:
 		break;
 	case CPU_WHICH_IRQ:
 	case CPU_WHICH_DOMAIN:
 		return (EINVAL);
 	}
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 		nset = cpuset_refroot(set);
 		cpuset_rel(set);
 		set = nset;
 		break;
 	case CPU_LEVEL_CPUSET:
 		break;
 	case CPU_LEVEL_WHICH:
 		break;
 	}
 	tmpid = set->cs_id;
 	cpuset_rel(set);
 	if (error == 0)
 		error = copyout(&tmpid, setid, sizeof(tmpid));
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_getaffinity_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		cpusetsize;
 	cpuset_t	*mask;
 };
 #endif
 int
 sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap)
 {
 
 	return (kern_cpuset_getaffinity(td, uap->level, uap->which,
 	    uap->id, uap->cpusetsize, uap->mask));
 }
 
 int
 kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
     id_t id, size_t cpusetsize, cpuset_t *maskp)
 {
 	struct thread *ttd;
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct proc *p;
 	cpuset_t *mask;
 	int error;
 	size_t size;
 
 	if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
 		return (ERANGE);
 	/* In Capability mode, you can only get your own CPU set. */
 	if (IN_CAPABILITY_MODE(td)) {
 		if (level != CPU_LEVEL_WHICH)
 			return (ECAPMODE);
 		if (which != CPU_WHICH_TID && which != CPU_WHICH_PID)
 			return (ECAPMODE);
 		if (id != -1)
 			return (ECAPMODE);
 	}
 	size = cpusetsize;
 	mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 	error = cpuset_which(which, id, &p, &ttd, &set);
 	if (error)
 		goto out;
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		switch (which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		CPU_COPY(&nset->cs_mask, mask);
 		cpuset_rel(nset);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (which) {
 		case CPU_WHICH_TID:
 			thread_lock(ttd);
 			CPU_COPY(&ttd->td_cpuset->cs_mask, mask);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_PID:
 			FOREACH_THREAD_IN_PROC(p, ttd) {
 				thread_lock(ttd);
 				CPU_OR(mask, &ttd->td_cpuset->cs_mask);
 				thread_unlock(ttd);
 			}
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			CPU_COPY(&set->cs_mask, mask);
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 			error = intr_getaffinity(id, which, mask);
 			break;
 		case CPU_WHICH_DOMAIN:
 			if (id < 0 || id >= MAXMEMDOM)
 				error = ESRCH;
 			else
 				CPU_COPY(&cpuset_domain[id], mask);
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (set)
 		cpuset_rel(set);
 	if (p)
 		PROC_UNLOCK(p);
 	if (error == 0)
 		error = copyout(mask, maskp, size);
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setaffinity_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		cpusetsize;
 	const cpuset_t	*mask;
 };
 #endif
 int
 sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap)
 {
 
 	return (kern_cpuset_setaffinity(td, uap->level, uap->which,
 	    uap->id, uap->cpusetsize, uap->mask));
 }
 
 int
 kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
     id_t id, size_t cpusetsize, const cpuset_t *maskp)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
 	cpuset_t *mask;
 	int error;
 
 	if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
 		return (ERANGE);
 	/* In Capability mode, you can only set your own CPU set. */
 	if (IN_CAPABILITY_MODE(td)) {
 		if (level != CPU_LEVEL_WHICH)
 			return (ECAPMODE);
 		if (which != CPU_WHICH_TID && which != CPU_WHICH_PID)
 			return (ECAPMODE);
 		if (id != -1)
 			return (ECAPMODE);
 	}
 	mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO);
 	error = copyin(maskp, mask, cpusetsize);
 	if (error)
 		goto out;
 	/*
 	 * Verify that no high bits are set.
 	 */
 	if (cpusetsize > sizeof(cpuset_t)) {
 		char *end;
 		char *cp;
 
 		end = cp = (char *)&mask->__bits;
 		end += cpusetsize;
 		cp += sizeof(cpuset_t);
 		while (cp != end)
 			if (*cp++ != 0) {
 				error = EINVAL;
 				goto out;
 			}
 
 	}
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		error = cpuset_which(which, id, &p, &ttd, &set);
 		if (error)
 			break;
 		switch (which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			PROC_UNLOCK(p);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		error = cpuset_modify(nset, mask);
 		cpuset_rel(nset);
 		cpuset_rel(set);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (which) {
 		case CPU_WHICH_TID:
 			error = cpuset_setthread(id, mask);
 			break;
 		case CPU_WHICH_PID:
 			error = cpuset_setproc(id, NULL, mask, NULL);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			error = cpuset_which(which, id, &p, &ttd, &set);
 			if (error == 0) {
 				error = cpuset_modify(set, mask);
 				cpuset_rel(set);
 			}
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 			error = intr_setaffinity(id, which, mask);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_getdomain_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		domainsetsize;
 	domainset_t	*mask;
 	int 		*policy;
 };
 #endif
 int
 sys_cpuset_getdomain(struct thread *td, struct cpuset_getdomain_args *uap)
 {
 
 	return (kern_cpuset_getdomain(td, uap->level, uap->which,
 	    uap->id, uap->domainsetsize, uap->mask, uap->policy));
 }
 
 int
 kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
     id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp)
 {
 	struct domainset outset;
 	struct thread *ttd;
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct domainset *dset;
 	struct proc *p;
 	domainset_t *mask;
 	int error;
 
 	if (domainsetsize < sizeof(domainset_t) ||
 	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
 		return (ERANGE);
 	/* In Capability mode, you can only get your own domain set. */
 	if (IN_CAPABILITY_MODE(td)) {
 		if (level != CPU_LEVEL_WHICH)
 			return (ECAPMODE);
 		if (which != CPU_WHICH_TID && which != CPU_WHICH_PID)
 			return (ECAPMODE);
 		if (id != -1)
 			return (ECAPMODE);
 	}
 	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
 	bzero(&outset, sizeof(outset));
 	error = cpuset_which(which, id, &p, &ttd, &set);
 	if (error)
 		goto out;
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		switch (which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		domainset_copy(nset->cs_domain, &outset);
 		cpuset_rel(nset);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (which) {
 		case CPU_WHICH_TID:
 			thread_lock(ttd);
 			domainset_copy(ttd->td_cpuset->cs_domain, &outset);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_PID:
 			FOREACH_THREAD_IN_PROC(p, ttd) {
 				thread_lock(ttd);
 				dset = ttd->td_cpuset->cs_domain;
 				/* Show all domains in the proc. */
 				DOMAINSET_OR(&outset.ds_mask, &dset->ds_mask);
 				/* Last policy wins. */
 				outset.ds_policy = dset->ds_policy;
 				outset.ds_prefer = dset->ds_prefer;
 				thread_unlock(ttd);
 			}
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			domainset_copy(set->cs_domain, &outset);
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (set)
 		cpuset_rel(set);
 	if (p)
 		PROC_UNLOCK(p);
 	/*
 	 * Translate prefer into a set containing only the preferred domain,
 	 * not the entire fallback set.
 	 */
 	if (outset.ds_policy == DOMAINSET_POLICY_PREFER) {
 		DOMAINSET_ZERO(&outset.ds_mask);
 		DOMAINSET_SET(outset.ds_prefer, &outset.ds_mask);
 	}
 	DOMAINSET_COPY(&outset.ds_mask, mask);
 	if (error == 0)
 		error = copyout(mask, maskp, domainsetsize);
 	if (error == 0)
 		if (suword32(policyp, outset.ds_policy) != 0)
 			error = EFAULT;
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setdomain_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		domainsetsize;
 	domainset_t	*mask;
 	int 		policy;
 };
 #endif
 int
 sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap)
 {
 
 	return (kern_cpuset_setdomain(td, uap->level, uap->which,
 	    uap->id, uap->domainsetsize, uap->mask, uap->policy));
 }
 
 int
 kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
     id_t id, size_t domainsetsize, const domainset_t *maskp, int policy)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
 	struct domainset domain;
 	domainset_t *mask;
 	int error;
 
 	if (domainsetsize < sizeof(domainset_t) ||
 	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
 		return (ERANGE);
 	if (policy <= DOMAINSET_POLICY_INVALID ||
 	    policy > DOMAINSET_POLICY_MAX)
 		return (EINVAL);
 	/* In Capability mode, you can only set your own CPU set. */
 	if (IN_CAPABILITY_MODE(td)) {
 		if (level != CPU_LEVEL_WHICH)
 			return (ECAPMODE);
 		if (which != CPU_WHICH_TID && which != CPU_WHICH_PID)
 			return (ECAPMODE);
 		if (id != -1)
 			return (ECAPMODE);
 	}
 	memset(&domain, 0, sizeof(domain));
 	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
 	error = copyin(maskp, mask, domainsetsize);
 	if (error)
 		goto out;
 	/*
 	 * Verify that no high bits are set.
 	 */
 	if (domainsetsize > sizeof(domainset_t)) {
 		char *end;
 		char *cp;
 
 		end = cp = (char *)&mask->__bits;
 		end += domainsetsize;
 		cp += sizeof(domainset_t);
 		while (cp != end)
 			if (*cp++ != 0) {
 				error = EINVAL;
 				goto out;
 			}
 
 	}
 	DOMAINSET_COPY(mask, &domain.ds_mask);
 	domain.ds_policy = policy;
 
 	/*
 	 * Sanitize the provided mask.
 	 */
 	if (!DOMAINSET_SUBSET(&all_domains, &domain.ds_mask)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/* Translate preferred policy into a mask and fallback. */
 	if (policy == DOMAINSET_POLICY_PREFER) {
 		/* Only support a single preferred domain. */
 		if (DOMAINSET_COUNT(&domain.ds_mask) != 1) {
 			error = EINVAL;
 			goto out;
 		}
 		domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1;
 		/* This will be constrained by domainset_shadow(). */
 		DOMAINSET_COPY(&all_domains, &domain.ds_mask);
 	}
 
 	/*
 	 * When given an impossible policy, fall back to interleaving
 	 * across all domains.
 	 */
 	if (domainset_empty_vm(&domain))
 		domainset_copy(&domainset2, &domain);
 
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		error = cpuset_which(which, id, &p, &ttd, &set);
 		if (error)
 			break;
 		switch (which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			PROC_UNLOCK(p);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		error = cpuset_modify_domain(nset, &domain);
 		cpuset_rel(nset);
 		cpuset_rel(set);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (which) {
 		case CPU_WHICH_TID:
 			error = _cpuset_setthread(id, NULL, &domain);
 			break;
 		case CPU_WHICH_PID:
 			error = cpuset_setproc(id, NULL, NULL, &domain);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			error = cpuset_which(which, id, &p, &ttd, &set);
 			if (error == 0) {
 				error = cpuset_modify_domain(set, &domain);
 				cpuset_rel(set);
 			}
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifdef DDB
 
 static void
 ddb_display_bitset(const struct bitset *set, int size)
 {
 	int bit, once;
 
 	for (once = 0, bit = 0; bit < size; bit++) {
 		if (CPU_ISSET(bit, set)) {
 			if (once == 0) {
 				db_printf("%d", bit);
 				once = 1;
 			} else  
 				db_printf(",%d", bit);
 		}
 	}
 	if (once == 0)
 		db_printf("<none>");
 }
 
 void
 ddb_display_cpuset(const cpuset_t *set)
 {
 	ddb_display_bitset((const struct bitset *)set, CPU_SETSIZE);
 }
 
 static void
 ddb_display_domainset(const domainset_t *set)
 {
 	ddb_display_bitset((const struct bitset *)set, DOMAINSET_SETSIZE);
 }
 
 DB_SHOW_COMMAND(cpusets, db_show_cpusets)
 {
 	struct cpuset *set;
 
 	LIST_FOREACH(set, &cpuset_ids, cs_link) {
 		db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n",
 		    set, set->cs_id, set->cs_ref, set->cs_flags,
 		    (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0);
 		db_printf("  cpu mask=");
 		ddb_display_cpuset(&set->cs_mask);
 		db_printf("\n");
 		db_printf("  domain policy %d prefer %d mask=",
 		    set->cs_domain->ds_policy, set->cs_domain->ds_prefer);
 		ddb_display_domainset(&set->cs_domain->ds_mask);
 		db_printf("\n");
 		if (db_pager_quit)
 			break;
 	}
 }
 
 DB_SHOW_COMMAND(domainsets, db_show_domainsets)
 {
 	struct domainset *set;
 
 	LIST_FOREACH(set, &cpuset_domains, ds_link) {
 		db_printf("set=%p policy %d prefer %d cnt %d\n",
 		    set, set->ds_policy, set->ds_prefer, set->ds_cnt);
 		db_printf("  mask =");
 		ddb_display_domainset(&set->ds_mask);
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/kern_rmlock.c
===================================================================
--- head/sys/kern/kern_rmlock.c	(revision 355708)
+++ head/sys/kern/kern_rmlock.c	(revision 355709)
@@ -1,855 +1,855 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 
 #include <sys/kernel.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/turnstile.h>
 #include <sys/lock_profile.h>
 #include <machine/cpu.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /*
  * A cookie to mark destroyed rmlocks.  This is stored in the head of
  * rm_activeReaders.
  */
 #define	RM_DESTROYED	((void *)0xdead)
 
 #define	rm_destroyed(rm)						\
 	(LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED)
 
 #define RMPF_ONQUEUE	1
 #define RMPF_SIGNAL	2
 
 #ifndef INVARIANTS
 #define	_rm_assert(c, what, file, line)
 #endif
 
 static void	assert_rm(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_rm(const struct lock_object *lock);
 #endif
 static void	lock_rm(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rm(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rm(struct lock_object *lock);
 
 struct lock_class lock_class_rm = {
 	.lc_name = "rm",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_rm,
 #ifdef DDB
 	.lc_ddb_show = db_show_rm,
 #endif
 	.lc_lock = lock_rm,
 	.lc_unlock = unlock_rm,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rm,
 #endif
 };
 
 struct lock_class lock_class_rm_sleepable = {
 	.lc_name = "sleepable rm",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE,
 	.lc_assert = assert_rm,
 #ifdef DDB
 	.lc_ddb_show = db_show_rm,
 #endif
 	.lc_lock = lock_rm,
 	.lc_unlock = unlock_rm,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rm,
 #endif
 };
 
 static void
 assert_rm(const struct lock_object *lock, int what)
 {
 
 	rm_assert((const struct rmlock *)lock, what);
 }
 
 static void
 lock_rm(struct lock_object *lock, uintptr_t how)
 {
 	struct rmlock *rm;
 	struct rm_priotracker *tracker;
 
 	rm = (struct rmlock *)lock;
 	if (how == 0)
 		rm_wlock(rm);
 	else {
 		tracker = (struct rm_priotracker *)how;
 		rm_rlock(rm, tracker);
 	}
 }
 
 static uintptr_t
 unlock_rm(struct lock_object *lock)
 {
 	struct thread *td;
 	struct pcpu *pc;
 	struct rmlock *rm;
 	struct rm_queue *queue;
 	struct rm_priotracker *tracker;
 	uintptr_t how;
 
 	rm = (struct rmlock *)lock;
 	tracker = NULL;
 	how = 0;
 	rm_assert(rm, RA_LOCKED | RA_NOTRECURSED);
 	if (rm_wowned(rm))
 		rm_wunlock(rm);
 	else {
 		/*
 		 * Find the right rm_priotracker structure for curthread.
 		 * The guarantee about its uniqueness is given by the fact
 		 * we already asserted the lock wasn't recursively acquired.
 		 */
 		critical_enter();
 		td = curthread;
 		pc = get_pcpu();
 		for (queue = pc->pc_rm_queue.rmq_next;
 		    queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
 			tracker = (struct rm_priotracker *)queue;
 				if ((tracker->rmp_rmlock == rm) &&
 				    (tracker->rmp_thread == td)) {
 					how = (uintptr_t)tracker;
 					break;
 				}
 		}
 		KASSERT(tracker != NULL,
 		    ("rm_priotracker is non-NULL when lock held in read mode"));
 		critical_exit();
 		rm_runlock(rm, tracker);
 	}
 	return (how);
 }
 
 #ifdef KDTRACE_HOOKS
 static int
 owner_rm(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rmlock *rm;
 	struct lock_class *lc;
 
 	rm = (const struct rmlock *)lock;
 	lc = LOCK_CLASS(&rm->rm_wlock_object);
 	return (lc->lc_owner(&rm->rm_wlock_object, owner));
 }
 #endif
 
 static struct mtx rm_spinlock;
 
 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN);
 
 /*
  * Add or remove tracker from per-cpu list.
  *
  * The per-cpu list can be traversed at any time in forward direction from an
  * interrupt on the *local* cpu.
  */
 static void inline
 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker)
 {
 	struct rm_queue *next;
 
 	/* Initialize all tracker pointers */
 	tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue;
 	next = pc->pc_rm_queue.rmq_next;
 	tracker->rmp_cpuQueue.rmq_next = next;
 
 	/* rmq_prev is not used during froward traversal. */
 	next->rmq_prev = &tracker->rmp_cpuQueue;
 
 	/* Update pointer to first element. */
 	pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue;
 }
 
 /*
  * Return a count of the number of trackers the thread 'td' already
  * has on this CPU for the lock 'rm'.
  */
 static int
 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm,
     const struct thread *td)
 {
 	struct rm_queue *queue;
 	struct rm_priotracker *tracker;
 	int count;
 
 	count = 0;
 	for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue;
 	    queue = queue->rmq_next) {
 		tracker = (struct rm_priotracker *)queue;
 		if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td))
 			count++;
 	}
 	return (count);
 }
 
 static void inline
 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker)
 {
 	struct rm_queue *next, *prev;
 
 	next = tracker->rmp_cpuQueue.rmq_next;
 	prev = tracker->rmp_cpuQueue.rmq_prev;
 
 	/* Not used during forward traversal. */
 	next->rmq_prev = prev;
 
 	/* Remove from list. */
 	prev->rmq_next = next;
 }
 
 static void
 rm_cleanIPI(void *arg)
 {
 	struct pcpu *pc;
 	struct rmlock *rm = arg;
 	struct rm_priotracker *tracker;
 	struct rm_queue *queue;
 	pc = get_pcpu();
 
 	for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue;
 	    queue = queue->rmq_next) {
 		tracker = (struct rm_priotracker *)queue;
 		if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) {
 			tracker->rmp_flags = RMPF_ONQUEUE;
 			mtx_lock_spin(&rm_spinlock);
 			LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker,
 			    rmp_qentry);
 			mtx_unlock_spin(&rm_spinlock);
 		}
 	}
 }
 
 void
 rm_init_flags(struct rmlock *rm, const char *name, int opts)
 {
 	struct lock_class *lc;
 	int liflags, xflags;
 
 	liflags = 0;
 	if (!(opts & RM_NOWITNESS))
 		liflags |= LO_WITNESS;
 	if (opts & RM_RECURSE)
 		liflags |= LO_RECURSABLE;
 	if (opts & RM_NEW)
 		liflags |= LO_NEW;
 	rm->rm_writecpus = all_cpus;
 	LIST_INIT(&rm->rm_activeReaders);
 	if (opts & RM_SLEEPABLE) {
 		liflags |= LO_SLEEPABLE;
 		lc = &lock_class_rm_sleepable;
 		xflags = (opts & RM_NEW ? SX_NEW : 0);
 		sx_init_flags(&rm->rm_lock_sx, "rmlock_sx",
 		    xflags | SX_NOWITNESS);
 	} else {
 		lc = &lock_class_rm;
 		xflags = (opts & RM_NEW ? MTX_NEW : 0);
 		mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx",
 		    xflags | MTX_NOWITNESS);
 	}
 	lock_init(&rm->lock_object, lc, name, NULL, liflags);
 }
 
 void
 rm_init(struct rmlock *rm, const char *name)
 {
 
 	rm_init_flags(rm, name, 0);
 }
 
 void
 rm_destroy(struct rmlock *rm)
 {
 
 	rm_assert(rm, RA_UNLOCKED);
 	LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED;
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_destroy(&rm->rm_lock_sx);
 	else
 		mtx_destroy(&rm->rm_lock_mtx);
 	lock_destroy(&rm->lock_object);
 }
 
 int
 rm_wowned(const struct rmlock *rm)
 {
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		return (sx_xlocked(&rm->rm_lock_sx));
 	else
 		return (mtx_owned(&rm->rm_lock_mtx));
 }
 
 void
 rm_sysinit(void *arg)
 {
 	struct rm_args *args;
 
 	args = arg;
 	rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags);
 }
 
 static __noinline int
 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 {
 	struct pcpu *pc;
 
 	critical_enter();
 	pc = get_pcpu();
 
 	/* Check if we just need to do a proper critical_exit. */
 	if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) {
 		critical_exit();
 		return (1);
 	}
 
 	/* Remove our tracker from the per-cpu list. */
 	rm_tracker_remove(pc, tracker);
 
 	/* Check to see if the IPI granted us the lock after all. */
 	if (tracker->rmp_flags) {
 		/* Just add back tracker - we hold the lock. */
 		rm_tracker_add(pc, tracker);
 		critical_exit();
 		return (1);
 	}
 
 	/*
 	 * We allow readers to acquire a lock even if a writer is blocked if
 	 * the lock is recursive and the reader already holds the lock.
 	 */
 	if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		/*
 		 * Just grant the lock if this thread already has a tracker
 		 * for this lock on the per-cpu queue.
 		 */
 		if (rm_trackers_present(pc, rm, curthread) != 0) {
 			mtx_lock_spin(&rm_spinlock);
 			LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker,
 			    rmp_qentry);
 			tracker->rmp_flags = RMPF_ONQUEUE;
 			mtx_unlock_spin(&rm_spinlock);
 			rm_tracker_add(pc, tracker);
 			critical_exit();
 			return (1);
 		}
 	}
 
 	sched_unpin();
 	critical_exit();
 
 	if (trylock) {
 		if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
 			if (!sx_try_xlock(&rm->rm_lock_sx))
 				return (0);
 		} else {
 			if (!mtx_trylock(&rm->rm_lock_mtx))
 				return (0);
 		}
 	} else {
 		if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
 			THREAD_SLEEPING_OK();
 			sx_xlock(&rm->rm_lock_sx);
 			THREAD_NO_SLEEPING();
 		} else
 			mtx_lock(&rm->rm_lock_mtx);
 	}
 
 	critical_enter();
 	pc = get_pcpu();
 	CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus);
 	rm_tracker_add(pc, tracker);
 	sched_pin();
 	critical_exit();
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xunlock(&rm->rm_lock_sx);
 	else
 		mtx_unlock(&rm->rm_lock_mtx);
 
 	return (1);
 }
 
 int
 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 {
 	struct thread *td = curthread;
 	struct pcpu *pc;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	tracker->rmp_flags  = 0;
 	tracker->rmp_thread = td;
 	tracker->rmp_rmlock = rm;
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		THREAD_NO_SLEEPING();
 
 	td->td_critnest++;	/* critical_enter(); */
 
 	__compiler_membar();
 
 	pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */
 
 	rm_tracker_add(pc, tracker);
 
 	sched_pin();
 
 	__compiler_membar();
 
 	td->td_critnest--;
 
 	/*
 	 * Fast path to combine two common conditions into a single
 	 * conditional jump.
 	 */
 	if (__predict_true(0 == (td->td_owepreempt |
 	    CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus))))
 		return (1);
 
 	/* We do not have a read token and need to acquire one. */
 	return _rm_rlock_hard(rm, tracker, trylock);
 }
 
 static __noinline void
 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker)
 {
 
 	if (td->td_owepreempt) {
 		td->td_critnest++;
 		critical_exit();
 	}
 
 	if (!tracker->rmp_flags)
 		return;
 
 	mtx_lock_spin(&rm_spinlock);
 	LIST_REMOVE(tracker, rmp_qentry);
 
 	if (tracker->rmp_flags & RMPF_SIGNAL) {
 		struct rmlock *rm;
 		struct turnstile *ts;
 
 		rm = tracker->rmp_rmlock;
 
 		turnstile_chain_lock(&rm->lock_object);
 		mtx_unlock_spin(&rm_spinlock);
 
 		ts = turnstile_lookup(&rm->lock_object);
 
 		turnstile_signal(ts, TS_EXCLUSIVE_QUEUE);
 		turnstile_unpend(ts);
 		turnstile_chain_unlock(&rm->lock_object);
 	} else
 		mtx_unlock_spin(&rm_spinlock);
 }
 
 void
 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker)
 {
 	struct pcpu *pc;
 	struct thread *td = tracker->rmp_thread;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	td->td_critnest++;	/* critical_enter(); */
 	pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */
 	rm_tracker_remove(pc, tracker);
 	td->td_critnest--;
 	sched_unpin();
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		THREAD_SLEEPING_OK();
 
 	if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags)))
 		return;
 
 	_rm_unlock_hard(td, tracker);
 }
 
 void
 _rm_wlock(struct rmlock *rm)
 {
 	struct rm_priotracker *prio;
 	struct turnstile *ts;
 	cpuset_t readcpus;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xlock(&rm->rm_lock_sx);
 	else
 		mtx_lock(&rm->rm_lock_mtx);
 
 	if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) {
 		/* Get all read tokens back */
 		readcpus = all_cpus;
-		CPU_NAND(&readcpus, &rm->rm_writecpus);
+		CPU_ANDNOT(&readcpus, &rm->rm_writecpus);
 		rm->rm_writecpus = all_cpus;
 
 		/*
 		 * Assumes rm->rm_writecpus update is visible on other CPUs
 		 * before rm_cleanIPI is called.
 		 */
 #ifdef SMP
 		smp_rendezvous_cpus(readcpus,
 		    smp_no_rendezvous_barrier,
 		    rm_cleanIPI,
 		    smp_no_rendezvous_barrier,
 		    rm);
 
 #else
 		rm_cleanIPI(rm);
 #endif
 
 		mtx_lock_spin(&rm_spinlock);
 		while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) {
 			ts = turnstile_trywait(&rm->lock_object);
 			prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL;
 			mtx_unlock_spin(&rm_spinlock);
 			turnstile_wait(ts, prio->rmp_thread,
 			    TS_EXCLUSIVE_QUEUE);
 			mtx_lock_spin(&rm_spinlock);
 		}
 		mtx_unlock_spin(&rm_spinlock);
 	}
 }
 
 void
 _rm_wunlock(struct rmlock *rm)
 {
 
 	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xunlock(&rm->rm_lock_sx);
 	else
 		mtx_unlock(&rm->rm_lock_mtx);
 }
 
 #if LOCK_DEBUG > 0
 
 void
 _rm_wlock_debug(struct rmlock *rm, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d",
 	    curthread, rm->lock_object.lo_name, file, line));
 	KASSERT(!rm_destroyed(rm),
 	    ("rm_wlock() of destroyed rmlock @ %s:%d", file, line));
 	_rm_assert(rm, RA_UNLOCKED, file, line);
 
 	WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 
 	_rm_wlock(rm);
 
 	LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(!rm_destroyed(rm),
 	    ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line));
 	_rm_assert(rm, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line);
 	_rm_wunlock(rm);
 	TD_LOCKS_DEC(curthread);
 }
 
 int
 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
     int trylock, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 #ifdef INVARIANTS
 	if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) {
 		critical_enter();
 		KASSERT(rm_trackers_present(get_pcpu(), rm,
 		    curthread) == 0,
 		    ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n",
 		    rm->lock_object.lo_name, file, line));
 		critical_exit();
 	}
 #endif
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d",
 	    curthread, rm->lock_object.lo_name, file, line));
 	KASSERT(!rm_destroyed(rm),
 	    ("rm_rlock() of destroyed rmlock @ %s:%d", file, line));
 	if (!trylock) {
 		KASSERT(!rm_wowned(rm),
 		    ("rm_rlock: wlock already held for %s @ %s:%d",
 		    rm->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&rm->lock_object,
 		    LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL);
 	}
 
 	if (_rm_rlock(rm, tracker, trylock)) {
 		if (trylock)
 			LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file,
 			    line);
 		else
 			LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file,
 			    line);
 		WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line);
 		TD_LOCKS_INC(curthread);
 		return (1);
 	} else if (trylock)
 		LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line);
 
 	return (0);
 }
 
 void
 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
     const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(!rm_destroyed(rm),
 	    ("rm_runlock() of destroyed rmlock @ %s:%d", file, line));
 	_rm_assert(rm, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rm->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line);
 	_rm_runlock(rm, tracker);
 	TD_LOCKS_DEC(curthread);
 }
 
 #else
 
 /*
  * Just strip out file and line arguments if no lock debugging is enabled in
  * the kernel - we are called from a kernel module.
  */
 void
 _rm_wlock_debug(struct rmlock *rm, const char *file, int line)
 {
 
 	_rm_wlock(rm);
 }
 
 void
 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line)
 {
 
 	_rm_wunlock(rm);
 }
 
 int
 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
     int trylock, const char *file, int line)
 {
 
 	return _rm_rlock(rm, tracker, trylock);
 }
 
 void
 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
     const char *file, int line)
 {
 
 	_rm_runlock(rm, tracker);
 }
 
 #endif
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef _rm_assert
 #endif
 
 /*
  * Note that this does not need to use witness_assert() for read lock
  * assertions since an exact count of read locks held by this thread
  * is computable.
  */
 void
 _rm_assert(const struct rmlock *rm, int what, const char *file, int line)
 {
 	int count;
 
 	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 		/*
 		 * Handle the write-locked case.  Unlike other
 		 * primitives, writers can never recurse.
 		 */
 		if (rm_wowned(rm)) {
 			if (what & RA_RLOCKED)
 				panic("Lock %s exclusively locked @ %s:%d\n",
 				    rm->lock_object.lo_name, file, line);
 			if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rm->lock_object.lo_name, file, line);
 			break;
 		}
 
 		critical_enter();
 		count = rm_trackers_present(get_pcpu(), rm, curthread);
 		critical_exit();
 
 		if (count == 0)
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rm->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 		if (count > 1) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rm->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rm->lock_object.lo_name, file, line);
 		break;
 	case RA_WLOCKED:
 		if (!rm_wowned(rm))
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rm->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 		if (rm_wowned(rm))
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rm->lock_object.lo_name, file, line);
 
 		critical_enter();
 		count = rm_trackers_present(get_pcpu(), rm, curthread);
 		critical_exit();
 
 		if (count != 0)
 			panic("Lock %s read locked @ %s:%d\n",
 			    rm->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("Unknown rm lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 print_tracker(struct rm_priotracker *tr)
 {
 	struct thread *td;
 
 	td = tr->rmp_thread;
 	db_printf("   thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid,
 	    td->td_proc->p_pid, td->td_name);
 	if (tr->rmp_flags & RMPF_ONQUEUE) {
 		db_printf("ONQUEUE");
 		if (tr->rmp_flags & RMPF_SIGNAL)
 			db_printf(",SIGNAL");
 	} else
 		db_printf("0");
 	db_printf("}\n");
 }
 
 static void
 db_show_rm(const struct lock_object *lock)
 {
 	struct rm_priotracker *tr;
 	struct rm_queue *queue;
 	const struct rmlock *rm;
 	struct lock_class *lc;
 	struct pcpu *pc;
 
 	rm = (const struct rmlock *)lock;
 	db_printf(" writecpus: ");
 	ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus));
 	db_printf("\n");
 	db_printf(" per-CPU readers:\n");
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
 		for (queue = pc->pc_rm_queue.rmq_next;
 		    queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
 			tr = (struct rm_priotracker *)queue;
 			if (tr->rmp_rmlock == rm)
 				print_tracker(tr);
 		}
 	db_printf(" active readers:\n");
 	LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry)
 		print_tracker(tr);
 	lc = LOCK_CLASS(&rm->rm_wlock_object);
 	db_printf("Backing write-lock (%s):\n", lc->lc_name);
 	lc->lc_ddb_show(&rm->rm_wlock_object);
 }
 #endif
Index: head/sys/kern/sched_4bsd.c
===================================================================
--- head/sys/kern/sched_4bsd.c	(revision 355708)
+++ head/sys/kern/sched_4bsd.c	(revision 355709)
@@ -1,1797 +1,1797 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/kthread.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
 #include <sys/turnstile.h>
 #include <sys/umtx.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 int __read_mostly		dtrace_vtime_active;
 dtrace_vtime_switch_func_t	dtrace_vtime_switch_func;
 #endif
 
 /*
  * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
  * the range 100-256 Hz (approximately).
  */
 #define	ESTCPULIM(e) \
     min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \
     RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1)
 #ifdef SMP
 #define	INVERSE_ESTCPU_WEIGHT	(8 * smp_cpus)
 #else
 #define	INVERSE_ESTCPU_WEIGHT	8	/* 1 / (priorities per estcpu level). */
 #endif
 #define	NICE_WEIGHT		1	/* Priorities per nice level. */
 
 #define	TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
 
 /*
  * The schedulable entity that runs a context.
  * This is  an extension to the thread structure and is tailored to
  * the requirements of this scheduler.
  * All fields are protected by the scheduler lock.
  */
 struct td_sched {
 	fixpt_t		ts_pctcpu;	/* %cpu during p_swtime. */
 	u_int		ts_estcpu;	/* Estimated cpu utilization. */
 	int		ts_cpticks;	/* Ticks of cpu time. */
 	int		ts_slptime;	/* Seconds !RUNNING. */
 	int		ts_slice;	/* Remaining part of time slice. */
 	int		ts_flags;
 	struct runq	*ts_runq;	/* runq the thread is currently on */
 #ifdef KTR
 	char		ts_name[TS_NAME_LEN];
 #endif
 };
 
 /* flags kept in td_flags */
 #define TDF_DIDRUN	TDF_SCHED0	/* thread actually ran. */
 #define TDF_BOUND	TDF_SCHED1	/* Bound to one CPU. */
 #define	TDF_SLICEEND	TDF_SCHED2	/* Thread time slice is over. */
 
 /* flags kept in ts_flags */
 #define	TSF_AFFINITY	0x0001		/* Has a non-"full" CPU set. */
 
 #define SKE_RUNQ_PCPU(ts)						\
     ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq)
 
 #define	THREAD_CAN_SCHED(td, cpu)	\
     CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask)
 
 _Static_assert(sizeof(struct thread) + sizeof(struct td_sched) <=
     sizeof(struct thread0_storage),
     "increase struct thread0_storage.t0st_sched size");
 
 static struct mtx sched_lock;
 
 static int	realstathz = 127; /* stathz is sometimes 0 and run off of hz. */
 static int	sched_tdcnt;	/* Total runnable threads in the system. */
 static int	sched_slice = 12; /* Thread run time before rescheduling. */
 
 static void	setup_runqs(void);
 static void	schedcpu(void);
 static void	schedcpu_thread(void);
 static void	sched_priority(struct thread *td, u_char prio);
 static void	sched_setup(void *dummy);
 static void	maybe_resched(struct thread *td);
 static void	updatepri(struct thread *td);
 static void	resetpriority(struct thread *td);
 static void	resetpriority_thread(struct thread *td);
 #ifdef SMP
 static int	sched_pickcpu(struct thread *td);
 static int	forward_wakeup(int cpunum);
 static void	kick_other_cpu(int pri, int cpuid);
 #endif
 
 static struct kproc_desc sched_kp = {
         "schedcpu",
         schedcpu_thread,
         NULL
 };
 SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start,
     &sched_kp);
 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
 
 static void sched_initticks(void *dummy);
 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
     NULL);
 
 /*
  * Global run queue.
  */
 static struct runq runq;
 
 #ifdef SMP
 /*
  * Per-CPU run queues
  */
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
 
 static cpuset_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
 	u_int idlecalls;
 	u_int oldidlecalls;
 };
 DPCPU_DEFINE_STATIC(struct pcpuidlestat, idlestat);
 
 static void
 setup_runqs(void)
 {
 #ifdef SMP
 	int i;
 
 	for (i = 0; i < MAXCPU; ++i)
 		runq_init(&runq_pcpu[i]);
 #endif
 
 	runq_init(&runq);
 }
 
 static int
 sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
 {
 	int error, new_val, period;
 
 	period = 1000000 / realstathz;
 	new_val = period * sched_slice;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (new_val <= 0)
 		return (EINVAL);
 	sched_slice = imax(1, (new_val + period / 2) / period);
 	hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
 	    realstathz);
 	return (0);
 }
 
 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler");
 
 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0,
     "Scheduler name");
 SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, sysctl_kern_quantum, "I",
     "Quantum for timeshare threads in microseconds");
 SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
     "Quantum for timeshare threads in stathz ticks");
 #ifdef SMP
 /* Enable forwarding of wakeups to all other cpus */
 static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL,
     "Kernel SMP");
 
 static int runq_fuzz = 1;
 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
 
 static int forward_wakeup_enabled = 1;
 SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
 	   &forward_wakeup_enabled, 0,
 	   "Forwarding of wakeup to idle CPUs");
 
 static int forward_wakeups_requested = 0;
 SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
 	   &forward_wakeups_requested, 0,
 	   "Requests for Forwarding of wakeup to idle CPUs");
 
 static int forward_wakeups_delivered = 0;
 SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
 	   &forward_wakeups_delivered, 0,
 	   "Completed Forwarding of wakeup to idle CPUs");
 
 static int forward_wakeup_use_mask = 1;
 SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
 	   &forward_wakeup_use_mask, 0,
 	   "Use the mask of idle cpus");
 
 static int forward_wakeup_use_loop = 0;
 SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
 	   &forward_wakeup_use_loop, 0,
 	   "Use a loop to find idle cpus");
 
 #endif
 #if 0
 static int sched_followon = 0;
 SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
 	   &sched_followon, 0,
 	   "allow threads to share a quantum");
 #endif
 
 SDT_PROVIDER_DEFINE(sched);
 
 SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", 
     "struct proc *", "uint8_t");
 SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", 
     "struct proc *", "void *");
 SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", 
     "struct proc *", "void *", "int");
 SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", 
     "struct proc *", "uint8_t", "struct thread *");
 SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
 SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
     "struct proc *");
 SDT_PROBE_DEFINE(sched, , , on__cpu);
 SDT_PROBE_DEFINE(sched, , , remain__cpu);
 SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
     "struct proc *");
 
 static __inline void
 sched_load_add(void)
 {
 
 	sched_tdcnt++;
 	KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
 	SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
 }
 
 static __inline void
 sched_load_rem(void)
 {
 
 	sched_tdcnt--;
 	KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
 	SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
 }
 /*
  * Arrange to reschedule if necessary, taking the priorities and
  * schedulers into account.
  */
 static void
 maybe_resched(struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	if (td->td_priority < curthread->td_priority)
 		curthread->td_flags |= TDF_NEEDRESCHED;
 }
 
 /*
  * This function is called when a thread is about to be put on run queue
  * because it has been made runnable or its priority has been adjusted.  It
  * determines if the new thread should preempt the current thread.  If so,
  * it sets td_owepreempt to request a preemption.
  */
 int
 maybe_preempt(struct thread *td)
 {
 #ifdef PREEMPTION
 	struct thread *ctd;
 	int cpri, pri;
 
 	/*
 	 * The new thread should not preempt the current thread if any of the
 	 * following conditions are true:
 	 *
 	 *  - The kernel is in the throes of crashing (panicstr).
 	 *  - The current thread has a higher (numerically lower) or
 	 *    equivalent priority.  Note that this prevents curthread from
 	 *    trying to preempt to itself.
 	 *  - The current thread has an inhibitor set or is in the process of
 	 *    exiting.  In this case, the current thread is about to switch
 	 *    out anyways, so there's no point in preempting.  If we did,
 	 *    the current thread would not be properly resumed as well, so
 	 *    just avoid that whole landmine.
 	 *  - If the new thread's priority is not a realtime priority and
 	 *    the current thread's priority is not an idle priority and
 	 *    FULL_PREEMPTION is disabled.
 	 *
 	 * If all of these conditions are false, but the current thread is in
 	 * a nested critical section, then we have to defer the preemption
 	 * until we exit the critical section.  Otherwise, switch immediately
 	 * to the new thread.
 	 */
 	ctd = curthread;
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 			("maybe_preempt: trying to run inhibited thread"));
 	pri = td->td_priority;
 	cpri = ctd->td_priority;
 	if (panicstr != NULL || pri >= cpri /* || dumping */ ||
 	    TD_IS_INHIBITED(ctd))
 		return (0);
 #ifndef FULL_PREEMPTION
 	if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE)
 		return (0);
 #endif
 
 	CTR0(KTR_PROC, "maybe_preempt: scheduling preemption");
 	ctd->td_owepreempt = 1;
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Constants for digital decay and forget:
  *	90% of (ts_estcpu) usage in 5 * loadav time
  *	95% of (ts_pctcpu) usage in 60 seconds (load insensitive)
  *          Note that, as ps(1) mentions, this can let percentages
  *          total over 100% (I've seen 137.9% for 3 processes).
  *
  * Note that schedclock() updates ts_estcpu and p_cpticks asynchronously.
  *
  * We wish to decay away 90% of ts_estcpu in (5 * loadavg) seconds.
  * That is, the system wants to compute a value of decay such
  * that the following for loop:
  * 	for (i = 0; i < (5 * loadavg); i++)
  * 		ts_estcpu *= decay;
  * will compute
  * 	ts_estcpu *= 0.1;
  * for all values of loadavg:
  *
  * Mathematically this loop can be expressed by saying:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * The system computes decay as:
  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
  *
  * We wish to prove that the system's computation of decay
  * will always fulfill the equation:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * If we compute b as:
  * 	b = 2 * loadavg
  * then
  * 	decay = b / (b + 1)
  *
  * We now need to prove two things:
  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
  *
  * Facts:
  *         For x close to zero, exp(x) =~ 1 + x, since
  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
  *         For x close to zero, ln(1+x) =~ x, since
  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
  *         ln(.1) =~ -2.30
  *
  * Proof of (1):
  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
  *	solving for factor,
  *      ln(factor) =~ (-2.30/5*loadav), or
  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
  *
  * Proof of (2):
  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
  *	solving for power,
  *      power*ln(b/(b+1)) =~ -2.30, or
  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
  *
  * Actual power values for the implemented algorithm are as follows:
  *      loadav: 1       2       3       4
  *      power:  5.68    10.32   14.94   19.55
  */
 
 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
 #define	loadfactor(loadav)	(2 * (loadav))
 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
 
 /* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
 static fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
 SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
 
 /*
  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
  *
  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
  *
  * If you don't want to bother with the faster/more-accurate formula, you
  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
  * (more general) method of calculating the %age of CPU used by a process.
  */
 #define	CCPU_SHIFT	11
 
 /*
  * Recompute process priorities, every hz ticks.
  * MP-safe, called without the Giant mutex.
  */
 /* ARGSUSED */
 static void
 schedcpu(void)
 {
 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 	struct thread *td;
 	struct proc *p;
 	struct td_sched *ts;
 	int awake;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			awake = 0;
 			ts = td_get_sched(td);
 			thread_lock(td);
 			/*
 			 * Increment sleep time (if sleeping).  We
 			 * ignore overflow, as above.
 			 */
 			/*
 			 * The td_sched slptimes are not touched in wakeup
 			 * because the thread may not HAVE everything in
 			 * memory? XXX I think this is out of date.
 			 */
 			if (TD_ON_RUNQ(td)) {
 				awake = 1;
 				td->td_flags &= ~TDF_DIDRUN;
 			} else if (TD_IS_RUNNING(td)) {
 				awake = 1;
 				/* Do not clear TDF_DIDRUN */
 			} else if (td->td_flags & TDF_DIDRUN) {
 				awake = 1;
 				td->td_flags &= ~TDF_DIDRUN;
 			}
 
 			/*
 			 * ts_pctcpu is only for ps and ttyinfo().
 			 */
 			ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT;
 			/*
 			 * If the td_sched has been idle the entire second,
 			 * stop recalculating its priority until
 			 * it wakes up.
 			 */
 			if (ts->ts_cpticks != 0) {
 #if	(FSHIFT >= CCPU_SHIFT)
 				ts->ts_pctcpu += (realstathz == 100)
 				    ? ((fixpt_t) ts->ts_cpticks) <<
 				    (FSHIFT - CCPU_SHIFT) :
 				    100 * (((fixpt_t) ts->ts_cpticks)
 				    << (FSHIFT - CCPU_SHIFT)) / realstathz;
 #else
 				ts->ts_pctcpu += ((FSCALE - ccpu) *
 				    (ts->ts_cpticks *
 				    FSCALE / realstathz)) >> FSHIFT;
 #endif
 				ts->ts_cpticks = 0;
 			}
 			/*
 			 * If there are ANY running threads in this process,
 			 * then don't count it as sleeping.
 			 * XXX: this is broken.
 			 */
 			if (awake) {
 				if (ts->ts_slptime > 1) {
 					/*
 					 * In an ideal world, this should not
 					 * happen, because whoever woke us
 					 * up from the long sleep should have
 					 * unwound the slptime and reset our
 					 * priority before we run at the stale
 					 * priority.  Should KASSERT at some
 					 * point when all the cases are fixed.
 					 */
 					updatepri(td);
 				}
 				ts->ts_slptime = 0;
 			} else
 				ts->ts_slptime++;
 			if (ts->ts_slptime > 1) {
 				thread_unlock(td);
 				continue;
 			}
 			ts->ts_estcpu = decay_cpu(loadfac, ts->ts_estcpu);
 		      	resetpriority(td);
 			resetpriority_thread(td);
 			thread_unlock(td);
 		}
 		PROC_UNLOCK(p);
 	}
 	sx_sunlock(&allproc_lock);
 }
 
 /*
  * Main loop for a kthread that executes schedcpu once a second.
  */
 static void
 schedcpu_thread(void)
 {
 
 	for (;;) {
 		schedcpu();
 		pause("-", hz);
 	}
 }
 
 /*
  * Recalculate the priority of a process after it has slept for a while.
  * For all load averages >= 1 and max ts_estcpu of 255, sleeping for at
  * least six times the loadfactor will decay ts_estcpu to zero.
  */
 static void
 updatepri(struct thread *td)
 {
 	struct td_sched *ts;
 	fixpt_t loadfac;
 	unsigned int newcpu;
 
 	ts = td_get_sched(td);
 	loadfac = loadfactor(averunnable.ldavg[0]);
 	if (ts->ts_slptime > 5 * loadfac)
 		ts->ts_estcpu = 0;
 	else {
 		newcpu = ts->ts_estcpu;
 		ts->ts_slptime--;	/* was incremented in schedcpu() */
 		while (newcpu && --ts->ts_slptime)
 			newcpu = decay_cpu(loadfac, newcpu);
 		ts->ts_estcpu = newcpu;
 	}
 }
 
 /*
  * Compute the priority of a process when running in user mode.
  * Arrange to reschedule if the resulting priority is better
  * than that of the current process.
  */
 static void
 resetpriority(struct thread *td)
 {
 	u_int newpriority;
 
 	if (td->td_pri_class != PRI_TIMESHARE)
 		return;
 	newpriority = PUSER +
 	    td_get_sched(td)->ts_estcpu / INVERSE_ESTCPU_WEIGHT +
 	    NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN);
 	newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
 	    PRI_MAX_TIMESHARE);
 	sched_user_prio(td, newpriority);
 }
 
 /*
  * Update the thread's priority when the associated process's user
  * priority changes.
  */
 static void
 resetpriority_thread(struct thread *td)
 {
 
 	/* Only change threads with a time sharing user priority. */
 	if (td->td_priority < PRI_MIN_TIMESHARE ||
 	    td->td_priority > PRI_MAX_TIMESHARE)
 		return;
 
 	/* XXX the whole needresched thing is broken, but not silly. */
 	maybe_resched(td);
 
 	sched_prio(td, td->td_user_pri);
 }
 
 /* ARGSUSED */
 static void
 sched_setup(void *dummy)
 {
 
 	setup_runqs();
 
 	/* Account for thread0. */
 	sched_load_add();
 }
 
 /*
  * This routine determines time constants after stathz and hz are setup.
  */
 static void
 sched_initticks(void *dummy)
 {
 
 	realstathz = stathz ? stathz : hz;
 	sched_slice = realstathz / 10;	/* ~100ms */
 	hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
 	    realstathz);
 }
 
 /* External interfaces start here */
 
 /*
  * Very early in the boot some setup of scheduler-specific
  * parts of proc0 and of some scheduler resources needs to be done.
  * Called from:
  *  proc0_init()
  */
 void
 schedinit(void)
 {
 
 	/*
 	 * Set up the scheduler specific parts of thread0.
 	 */
 	thread0.td_lock = &sched_lock;
 	td_get_sched(&thread0)->ts_slice = sched_slice;
 	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
 }
 
 int
 sched_runnable(void)
 {
 #ifdef SMP
 	return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]);
 #else
 	return runq_check(&runq);
 #endif
 }
 
 int
 sched_rr_interval(void)
 {
 
 	/* Convert sched_slice from stathz to hz. */
 	return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz));
 }
 
 /*
  * We adjust the priority of the current process.  The priority of a
  * process gets worse as it accumulates CPU time.  The cpu usage
  * estimator (ts_estcpu) is increased here.  resetpriority() will
  * compute a different priority each time ts_estcpu increases by
  * INVERSE_ESTCPU_WEIGHT (until PRI_MAX_TIMESHARE is reached).  The
  * cpu usage estimator ramps up quite quickly when the process is
  * running (linearly), and decays away exponentially, at a rate which
  * is proportionally slower when the system is busy.  The basic
  * principle is that the system will 90% forget that the process used
  * a lot of CPU time in 5 * loadav seconds.  This causes the system to
  * favor processes which haven't run much recently, and to round-robin
  * among other processes.
  */
 static void
 sched_clock_tick(struct thread *td)
 {
 	struct pcpuidlestat *stat;
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 
 	ts->ts_cpticks++;
 	ts->ts_estcpu = ESTCPULIM(ts->ts_estcpu + 1);
 	if ((ts->ts_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
 		resetpriority(td);
 		resetpriority_thread(td);
 	}
 
 	/*
 	 * Force a context switch if the current thread has used up a full
 	 * time slice (default is 100ms).
 	 */
 	if (!TD_IS_IDLETHREAD(td) && --ts->ts_slice <= 0) {
 		ts->ts_slice = sched_slice;
 		td->td_flags |= TDF_NEEDRESCHED | TDF_SLICEEND;
 	}
 
 	stat = DPCPU_PTR(idlestat);
 	stat->oldidlecalls = stat->idlecalls;
 	stat->idlecalls = 0;
 }
 
 void
 sched_clock(struct thread *td, int cnt)
 {
 
 	for ( ; cnt > 0; cnt--)
 		sched_clock_tick(td);
 }
 
 /*
  * Charge child's scheduling CPU usage to parent.
  */
 void
 sched_exit(struct proc *p, struct thread *td)
 {
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit",
 	    "prio:%d", td->td_priority);
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
 }
 
 void
 sched_exit_thread(struct thread *td, struct thread *child)
 {
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit",
 	    "prio:%d", child->td_priority);
 	thread_lock(td);
 	td_get_sched(td)->ts_estcpu = ESTCPULIM(td_get_sched(td)->ts_estcpu +
 	    td_get_sched(child)->ts_estcpu);
 	thread_unlock(td);
 	thread_lock(child);
 	if ((child->td_flags & TDF_NOLOAD) == 0)
 		sched_load_rem();
 	thread_unlock(child);
 }
 
 void
 sched_fork(struct thread *td, struct thread *childtd)
 {
 	sched_fork_thread(td, childtd);
 }
 
 void
 sched_fork_thread(struct thread *td, struct thread *childtd)
 {
 	struct td_sched *ts, *tsc;
 
 	childtd->td_oncpu = NOCPU;
 	childtd->td_lastcpu = NOCPU;
 	childtd->td_lock = &sched_lock;
 	childtd->td_cpuset = cpuset_ref(td->td_cpuset);
 	childtd->td_domain.dr_policy = td->td_cpuset->cs_domain;
 	childtd->td_priority = childtd->td_base_pri;
 	ts = td_get_sched(childtd);
 	bzero(ts, sizeof(*ts));
 	tsc = td_get_sched(td);
 	ts->ts_estcpu = tsc->ts_estcpu;
 	ts->ts_flags |= (tsc->ts_flags & TSF_AFFINITY);
 	ts->ts_slice = 1;
 }
 
 void
 sched_nice(struct proc *p, int nice)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_nice = nice;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		resetpriority(td);
 		resetpriority_thread(td);
 		thread_unlock(td);
 	}
 }
 
 void
 sched_class(struct thread *td, int class)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	td->td_pri_class = class;
 }
 
 /*
  * Adjust the priority of a thread.
  */
 static void
 sched_priority(struct thread *td, u_char prio)
 {
 
 
 	KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change",
 	    "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED,
 	    sched_tdname(curthread));
 	SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);
 	if (td != curthread && prio > td->td_priority) {
 		KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
 		    "lend prio", "prio:%d", td->td_priority, "new prio:%d",
 		    prio, KTR_ATTR_LINKED, sched_tdname(td));
 		SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio, 
 		    curthread);
 	}
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	if (td->td_priority == prio)
 		return;
 	td->td_priority = prio;
 	if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {
 		sched_rem(td);
 		sched_add(td, SRQ_BORING);
 	}
 }
 
 /*
  * Update a thread's priority when it is lent another thread's
  * priority.
  */
 void
 sched_lend_prio(struct thread *td, u_char prio)
 {
 
 	td->td_flags |= TDF_BORROWING;
 	sched_priority(td, prio);
 }
 
 /*
  * Restore a thread's priority when priority propagation is
  * over.  The prio argument is the minimum priority the thread
  * needs to have to satisfy other possible priority lending
  * requests.  If the thread's regulary priority is less
  * important than prio the thread will keep a priority boost
  * of prio.
  */
 void
 sched_unlend_prio(struct thread *td, u_char prio)
 {
 	u_char base_pri;
 
 	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
 	    td->td_base_pri <= PRI_MAX_TIMESHARE)
 		base_pri = td->td_user_pri;
 	else
 		base_pri = td->td_base_pri;
 	if (prio >= base_pri) {
 		td->td_flags &= ~TDF_BORROWING;
 		sched_prio(td, base_pri);
 	} else
 		sched_lend_prio(td, prio);
 }
 
 void
 sched_prio(struct thread *td, u_char prio)
 {
 	u_char oldprio;
 
 	/* First, update the base priority. */
 	td->td_base_pri = prio;
 
 	/*
 	 * If the thread is borrowing another thread's priority, don't ever
 	 * lower the priority.
 	 */
 	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
 		return;
 
 	/* Change the real priority. */
 	oldprio = td->td_priority;
 	sched_priority(td, prio);
 
 	/*
 	 * If the thread is on a turnstile, then let the turnstile update
 	 * its state.
 	 */
 	if (TD_ON_LOCK(td) && oldprio != prio)
 		turnstile_adjust(td, oldprio);
 }
 
 void
 sched_user_prio(struct thread *td, u_char prio)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	td->td_base_user_pri = prio;
 	if (td->td_lend_user_pri <= prio)
 		return;
 	td->td_user_pri = prio;
 }
 
 void
 sched_lend_user_prio(struct thread *td, u_char prio)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	td->td_lend_user_pri = prio;
 	td->td_user_pri = min(prio, td->td_base_user_pri);
 	if (td->td_priority > td->td_user_pri)
 		sched_prio(td, td->td_user_pri);
 	else if (td->td_priority != td->td_user_pri)
 		td->td_flags |= TDF_NEEDRESCHED;
 }
 
 /*
  * Like the above but first check if there is anything to do.
  */
 void
 sched_lend_user_prio_cond(struct thread *td, u_char prio)
 {
 
 	if (td->td_lend_user_pri != prio)
 		goto lend;
 	if (td->td_user_pri != min(prio, td->td_base_user_pri))
 		goto lend;
 	if (td->td_priority >= td->td_user_pri)
 		goto lend;
 	return;
 
 lend:
 	thread_lock(td);
 	sched_lend_user_prio(td, prio);
 	thread_unlock(td);
 }
 
 void
 sched_sleep(struct thread *td, int pri)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	td->td_slptick = ticks;
 	td_get_sched(td)->ts_slptime = 0;
 	if (pri != 0 && PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, pri);
 	if (TD_IS_SUSPENDED(td) || pri >= PSOCK)
 		td->td_flags |= TDF_CANSWAP;
 }
 
 void
 sched_switch(struct thread *td, struct thread *newtd, int flags)
 {
 	struct mtx *tmtx;
 	struct td_sched *ts;
 	struct proc *p;
 	int preempted;
 
 	tmtx = NULL;
 	ts = td_get_sched(td);
 	p = td->td_proc;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/* 
 	 * Switch to the sched lock to fix things up and pick
 	 * a new thread.
 	 * Block the td_lock in order to avoid breaking the critical path.
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
 		tmtx = thread_lock_block(td);
 	}
 
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		sched_load_rem();
 
 	td->td_lastcpu = td->td_oncpu;
 	preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
 	    (flags & SW_PREEMPT) != 0;
 	td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND);
 	td->td_owepreempt = 0;
 	td->td_oncpu = NOCPU;
 
 	/*
 	 * At the last moment, if this thread is still marked RUNNING,
 	 * then put it back on the run queue as it has not been suspended
 	 * or stopped or any thing else similar.  We never put the idle
 	 * threads on the run queue, however.
 	 */
 	if (td->td_flags & TDF_IDLETD) {
 		TD_SET_CAN_RUN(td);
 #ifdef SMP
 		CPU_CLR(PCPU_GET(cpuid), &idle_cpus_mask);
 #endif
 	} else {
 		if (TD_IS_RUNNING(td)) {
 			/* Put us back on the run queue. */
 			sched_add(td, preempted ?
 			    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
 			    SRQ_OURSELF|SRQ_YIELDING);
 		}
 	}
 	if (newtd) {
 		/*
 		 * The thread we are about to run needs to be counted
 		 * as if it had been added to the run queue and selected.
 		 * It came from:
 		 * * A preemption
 		 * * An upcall
 		 * * A followon
 		 */
 		KASSERT((newtd->td_inhibitors == 0),
 			("trying to run inhibited thread"));
 		newtd->td_flags |= TDF_DIDRUN;
         	TD_SET_RUNNING(newtd);
 		if ((newtd->td_flags & TDF_NOLOAD) == 0)
 			sched_load_add();
 	} else {
 		newtd = choosethread();
 		MPASS(newtd->td_lock == &sched_lock);
 	}
 
 #if (KTR_COMPILE & KTR_SCHED) != 0
 	if (TD_IS_IDLETHREAD(td))
 		KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
 		    "prio:%d", td->td_priority);
 	else
 		KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
 		    "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
 		    "lockname:\"%s\"", td->td_lockname);
 #endif
 
 	if (td != newtd) {
 #ifdef	HWPMC_HOOKS
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
 
 		SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);
 
                 /* I feel sleepy */
 		lock_profile_release_lock(&sched_lock.lock_object);
 #ifdef KDTRACE_HOOKS
 		/*
 		 * If DTrace has set the active vtime enum to anything
 		 * other than INACTIVE (0), then it should have set the
 		 * function to call.
 		 */
 		if (dtrace_vtime_active)
 			(*dtrace_vtime_switch_func)(newtd);
 #endif
 
 		cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock);
 		lock_profile_obtain_lock_success(&sched_lock.lock_object,
 		    0, 0, __FILE__, __LINE__);
 		/*
 		 * Where am I?  What year is it?
 		 * We are in the same thread that went to sleep above,
 		 * but any amount of time may have passed. All our context
 		 * will still be available as will local variables.
 		 * PCPU values however may have changed as we may have
 		 * changed CPU so don't trust cached values of them.
 		 * New threads will go to fork_exit() instead of here
 		 * so if you change things here you may need to change
 		 * things there too.
 		 *
 		 * If the thread above was exiting it will never wake
 		 * up again here, so either it has saved everything it
 		 * needed to, or the thread_wait() or wait() will
 		 * need to reap it.
 		 */
 
 		SDT_PROBE0(sched, , , on__cpu);
 #ifdef	HWPMC_HOOKS
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
 #endif
 	} else
 		SDT_PROBE0(sched, , , remain__cpu);
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
 
 #ifdef SMP
 	if (td->td_flags & TDF_IDLETD)
 		CPU_SET(PCPU_GET(cpuid), &idle_cpus_mask);
 #endif
 	sched_lock.mtx_lock = (uintptr_t)td;
 	td->td_oncpu = PCPU_GET(cpuid);
 	MPASS(td->td_lock == &sched_lock);
 }
 
 void
 sched_wakeup(struct thread *td)
 {
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	td->td_flags &= ~TDF_CANSWAP;
 	if (ts->ts_slptime > 1) {
 		updatepri(td);
 		resetpriority(td);
 	}
 	td->td_slptick = 0;
 	ts->ts_slptime = 0;
 	ts->ts_slice = sched_slice;
 	sched_add(td, SRQ_BORING);
 }
 
 #ifdef SMP
 static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
 	cpuset_t dontuse, map, map2;
 	u_int id, me;
 	int iscpuset;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	CTR0(KTR_RUNQ, "forward_wakeup()");
 
 	if ((!forward_wakeup_enabled) ||
 	     (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
 		return (0);
 	if (!smp_started || panicstr)
 		return (0);
 
 	forward_wakeups_requested++;
 
 	/*
 	 * Check the idle mask we received against what we calculated
 	 * before in the old version.
 	 */
 	me = PCPU_GET(cpuid);
 
 	/* Don't bother if we should be doing it ourself. */
 	if (CPU_ISSET(me, &idle_cpus_mask) &&
 	    (cpunum == NOCPU || me == cpunum))
 		return (0);
 
 	CPU_SETOF(me, &dontuse);
 	CPU_OR(&dontuse, &stopped_cpus);
 	CPU_OR(&dontuse, &hlt_cpus_mask);
 	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpuid;
 			if (!CPU_ISSET(id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
 				CPU_SET(id, &map2);
 			}
 		}
 	}
 
 	if (forward_wakeup_use_mask) {
 		map = idle_cpus_mask;
-		CPU_NAND(&map, &dontuse);
+		CPU_ANDNOT(&map, &dontuse);
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
 			if (CPU_CMP(&map, &map2)) {
 				printf("map != map2, loop method preferred\n");
 				map = map2;
 			}
 		}
 	} else {
 		map = map2;
 	}
 
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
 		iscpuset = CPU_ISSET(cpunum, &map);
 		if (iscpuset == 0)
 			CPU_ZERO(&map);
 		else
 			CPU_SETOF(cpunum, &map);
 	}
 	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpuid;
 			if (!CPU_ISSET(id, &map))
 				continue;
 			if (cpu_idle_wakeup(pc->pc_cpuid))
 				CPU_CLR(id, &map);
 		}
 		if (!CPU_EMPTY(&map))
 			ipi_selected(map, IPI_AST);
 		return (1);
 	}
 	if (cpunum == NOCPU)
 		printf("forward_wakeup: Idle processor not found\n");
 	return (0);
 }
 
 static void
 kick_other_cpu(int pri, int cpuid)
 {
 	struct pcpu *pcpu;
 	int cpri;
 
 	pcpu = pcpu_find(cpuid);
 	if (CPU_ISSET(cpuid, &idle_cpus_mask)) {
 		forward_wakeups_delivered++;
 		if (!cpu_idle_wakeup(cpuid))
 			ipi_cpu(cpuid, IPI_AST);
 		return;
 	}
 
 	cpri = pcpu->pc_curthread->td_priority;
 	if (pri >= cpri)
 		return;
 
 #if defined(IPI_PREEMPTION) && defined(PREEMPTION)
 #if !defined(FULL_PREEMPTION)
 	if (pri <= PRI_MAX_ITHD)
 #endif /* ! FULL_PREEMPTION */
 	{
 		ipi_cpu(cpuid, IPI_PREEMPT);
 		return;
 	}
 #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
 
 	pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
 	ipi_cpu(cpuid, IPI_AST);
 	return;
 }
 #endif /* SMP */
 
 #ifdef SMP
 static int
 sched_pickcpu(struct thread *td)
 {
 	int best, cpu;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	if (td->td_lastcpu != NOCPU && THREAD_CAN_SCHED(td, td->td_lastcpu))
 		best = td->td_lastcpu;
 	else
 		best = NOCPU;
 	CPU_FOREACH(cpu) {
 		if (!THREAD_CAN_SCHED(td, cpu))
 			continue;
 	
 		if (best == NOCPU)
 			best = cpu;
 		else if (runq_length[cpu] < runq_length[best])
 			best = cpu;
 	}
 	KASSERT(best != NOCPU, ("no valid CPUs"));
 
 	return (best);
 }
 #endif
 
 void
 sched_add(struct thread *td, int flags)
 #ifdef SMP
 {
 	cpuset_t tidlemsk;
 	struct td_sched *ts;
 	u_int cpu, cpuid;
 	int forwarded = 0;
 	int single_cpu = 0;
 
 	ts = td_get_sched(td);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 	    ("sched_add: bad thread state"));
 	KASSERT(td->td_flags & TDF_INMEM,
 	    ("sched_add: thread swapped out"));
 
 	KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add",
 	    "prio:%d", td->td_priority, KTR_ATTR_LINKED,
 	    sched_tdname(curthread));
 	KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
 	    KTR_ATTR_LINKED, sched_tdname(td));
 	SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, 
 	    flags & SRQ_PREEMPTED);
 
 
 	/*
 	 * Now that the thread is moving to the run-queue, set the lock
 	 * to the scheduler's lock.
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
 		thread_lock_set(td, &sched_lock);
 	}
 	TD_SET_RUNQ(td);
 
 	/*
 	 * If SMP is started and the thread is pinned or otherwise limited to
 	 * a specific set of CPUs, queue the thread to a per-CPU run queue.
 	 * Otherwise, queue the thread to the global run queue.
 	 *
 	 * If SMP has not yet been started we must use the global run queue
 	 * as per-CPU state may not be initialized yet and we may crash if we
 	 * try to access the per-CPU run queues.
 	 */
 	if (smp_started && (td->td_pinned != 0 || td->td_flags & TDF_BOUND ||
 	    ts->ts_flags & TSF_AFFINITY)) {
 		if (td->td_pinned != 0)
 			cpu = td->td_lastcpu;
 		else if (td->td_flags & TDF_BOUND) {
 			/* Find CPU from bound runq. */
 			KASSERT(SKE_RUNQ_PCPU(ts),
 			    ("sched_add: bound td_sched not on cpu runq"));
 			cpu = ts->ts_runq - &runq_pcpu[0];
 		} else
 			/* Find a valid CPU for our cpuset */
 			cpu = sched_pickcpu(td);
 		ts->ts_runq = &runq_pcpu[cpu];
 		single_cpu = 1;
 		CTR3(KTR_RUNQ,
 		    "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td,
 		    cpu);
 	} else {
 		CTR2(KTR_RUNQ,
 		    "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts,
 		    td);
 		cpu = NOCPU;
 		ts->ts_runq = &runq;
 	}
 
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		sched_load_add();
 	runq_add(ts->ts_runq, td, flags);
 	if (cpu != NOCPU)
 		runq_length[cpu]++;
 
 	cpuid = PCPU_GET(cpuid);
 	if (single_cpu && cpu != cpuid) {
 	        kick_other_cpu(td->td_priority, cpu);
 	} else {
 		if (!single_cpu) {
 			tidlemsk = idle_cpus_mask;
-			CPU_NAND(&tidlemsk, &hlt_cpus_mask);
+			CPU_ANDNOT(&tidlemsk, &hlt_cpus_mask);
 			CPU_CLR(cpuid, &tidlemsk);
 
 			if (!CPU_ISSET(cpuid, &idle_cpus_mask) &&
 			    ((flags & SRQ_INTR) == 0) &&
 			    !CPU_EMPTY(&tidlemsk))
 				forwarded = forward_wakeup(cpu);
 		}
 
 		if (!forwarded) {
 			if (!maybe_preempt(td))
 				maybe_resched(td);
 		}
 	}
 }
 #else /* SMP */
 {
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 	    ("sched_add: bad thread state"));
 	KASSERT(td->td_flags & TDF_INMEM,
 	    ("sched_add: thread swapped out"));
 	KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add",
 	    "prio:%d", td->td_priority, KTR_ATTR_LINKED,
 	    sched_tdname(curthread));
 	KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
 	    KTR_ATTR_LINKED, sched_tdname(td));
 	SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, 
 	    flags & SRQ_PREEMPTED);
 
 	/*
 	 * Now that the thread is moving to the run-queue, set the lock
 	 * to the scheduler's lock.
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
 		thread_lock_set(td, &sched_lock);
 	}
 	TD_SET_RUNQ(td);
 	CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
 	ts->ts_runq = &runq;
 
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		sched_load_add();
 	runq_add(ts->ts_runq, td, flags);
 	if (!maybe_preempt(td))
 		maybe_resched(td);
 }
 #endif /* SMP */
 
 void
 sched_rem(struct thread *td)
 {
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	KASSERT(td->td_flags & TDF_INMEM,
 	    ("sched_rem: thread swapped out"));
 	KASSERT(TD_ON_RUNQ(td),
 	    ("sched_rem: thread not on run queue"));
 	mtx_assert(&sched_lock, MA_OWNED);
 	KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq rem",
 	    "prio:%d", td->td_priority, KTR_ATTR_LINKED,
 	    sched_tdname(curthread));
 	SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);
 
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		sched_load_rem();
 #ifdef SMP
 	if (ts->ts_runq != &runq)
 		runq_length[ts->ts_runq - runq_pcpu]--;
 #endif
 	runq_remove(ts->ts_runq, td);
 	TD_SET_CAN_RUN(td);
 }
 
 /*
  * Select threads to run.  Note that running threads still consume a
  * slot.
  */
 struct thread *
 sched_choose(void)
 {
 	struct thread *td;
 	struct runq *rq;
 
 	mtx_assert(&sched_lock,  MA_OWNED);
 #ifdef SMP
 	struct thread *tdcpu;
 
 	rq = &runq;
 	td = runq_choose_fuzz(&runq, runq_fuzz);
 	tdcpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
 
 	if (td == NULL ||
 	    (tdcpu != NULL &&
 	     tdcpu->td_priority < td->td_priority)) {
 		CTR2(KTR_RUNQ, "choosing td %p from pcpu runq %d", tdcpu,
 		     PCPU_GET(cpuid));
 		td = tdcpu;
 		rq = &runq_pcpu[PCPU_GET(cpuid)];
 	} else {
 		CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", td);
 	}
 
 #else
 	rq = &runq;
 	td = runq_choose(&runq);
 #endif
 
 	if (td) {
 #ifdef SMP
 		if (td == tdcpu)
 			runq_length[PCPU_GET(cpuid)]--;
 #endif
 		runq_remove(rq, td);
 		td->td_flags |= TDF_DIDRUN;
 
 		KASSERT(td->td_flags & TDF_INMEM,
 		    ("sched_choose: thread swapped out"));
 		return (td);
 	}
 	return (PCPU_GET(idlethread));
 }
 
 void
 sched_preempt(struct thread *td)
 {
 
 	SDT_PROBE2(sched, , , surrender, td, td->td_proc);
 	thread_lock(td);
 	if (td->td_critnest > 1)
 		td->td_owepreempt = 1;
 	else
 		mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
 	thread_unlock(td);
 }
 
 void
 sched_userret_slowpath(struct thread *td)
 {
 
 	thread_lock(td);
 	td->td_priority = td->td_user_pri;
 	td->td_base_pri = td->td_user_pri;
 	thread_unlock(td);
 }
 
 void
 sched_bind(struct thread *td, int cpu)
 {
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED);
 	KASSERT(td == curthread, ("sched_bind: can only bind curthread"));
 
 	ts = td_get_sched(td);
 
 	td->td_flags |= TDF_BOUND;
 #ifdef SMP
 	ts->ts_runq = &runq_pcpu[cpu];
 	if (PCPU_GET(cpuid) == cpu)
 		return;
 
 	mi_switch(SW_VOL, NULL);
 #endif
 }
 
 void
 sched_unbind(struct thread* td)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td == curthread, ("sched_unbind: can only bind curthread"));
 	td->td_flags &= ~TDF_BOUND;
 }
 
 int
 sched_is_bound(struct thread *td)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	return (td->td_flags & TDF_BOUND);
 }
 
 void
 sched_relinquish(struct thread *td)
 {
 	thread_lock(td);
 	mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 	thread_unlock(td);
 }
 
 int
 sched_load(void)
 {
 	return (sched_tdcnt);
 }
 
 int
 sched_sizeof_proc(void)
 {
 	return (sizeof(struct proc));
 }
 
 int
 sched_sizeof_thread(void)
 {
 	return (sizeof(struct thread) + sizeof(struct td_sched));
 }
 
 fixpt_t
 sched_pctcpu(struct thread *td)
 {
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	return (ts->ts_pctcpu);
 }
 
 #ifdef RACCT
 /*
  * Calculates the contribution to the thread cpu usage for the latest
  * (unfinished) second.
  */
 fixpt_t
 sched_pctcpu_delta(struct thread *td)
 {
 	struct td_sched *ts;
 	fixpt_t delta;
 	int realstathz;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	delta = 0;
 	realstathz = stathz ? stathz : hz;
 	if (ts->ts_cpticks != 0) {
 #if	(FSHIFT >= CCPU_SHIFT)
 		delta = (realstathz == 100)
 		    ? ((fixpt_t) ts->ts_cpticks) <<
 		    (FSHIFT - CCPU_SHIFT) :
 		    100 * (((fixpt_t) ts->ts_cpticks)
 		    << (FSHIFT - CCPU_SHIFT)) / realstathz;
 #else
 		delta = ((FSCALE - ccpu) *
 		    (ts->ts_cpticks *
 		    FSCALE / realstathz)) >> FSHIFT;
 #endif
 	}
 
 	return (delta);
 }
 #endif
 
 u_int
 sched_estcpu(struct thread *td)
 {
 	
 	return (td_get_sched(td)->ts_estcpu);
 }
 
 /*
  * The actual idle process.
  */
 void
 sched_idletd(void *dummy)
 {
 	struct pcpuidlestat *stat;
 
 	THREAD_NO_SLEEPING();
 	stat = DPCPU_PTR(idlestat);
 	for (;;) {
 		mtx_assert(&Giant, MA_NOTOWNED);
 
 		while (sched_runnable() == 0) {
 			cpu_idle(stat->idlecalls + stat->oldidlecalls > 64);
 			stat->idlecalls++;
 		}
 
 		mtx_lock_spin(&sched_lock);
 		mi_switch(SW_VOL | SWT_IDLE, NULL);
 		mtx_unlock_spin(&sched_lock);
 	}
 }
 
 /*
  * A CPU is entering for the first time or a thread is exiting.
  */
 void
 sched_throw(struct thread *td)
 {
 	/*
 	 * Correct spinlock nesting.  The idle thread context that we are
 	 * borrowing was created so that it would start out with a single
 	 * spin lock (sched_lock) held in fork_trampoline().  Since we've
 	 * explicitly acquired locks in this function, the nesting count
 	 * is now 2 rather than 1.  Since we are nested, calling
 	 * spinlock_exit() will simply adjust the counts without allowing
 	 * spin lock using code to interrupt us.
 	 */
 	if (td == NULL) {
 		mtx_lock_spin(&sched_lock);
 		spinlock_exit();
 		PCPU_SET(switchtime, cpu_ticks());
 		PCPU_SET(switchticks, ticks);
 	} else {
 		lock_profile_release_lock(&sched_lock.lock_object);
 		MPASS(td->td_lock == &sched_lock);
 		td->td_lastcpu = td->td_oncpu;
 		td->td_oncpu = NOCPU;
 	}
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
 	cpu_throw(td, choosethread());	/* doesn't return */
 }
 
 void
 sched_fork_exit(struct thread *td)
 {
 
 	/*
 	 * Finish setting up thread glue so that it begins execution in a
 	 * non-nested critical section with sched_lock held but not recursed.
 	 */
 	td->td_oncpu = PCPU_GET(cpuid);
 	sched_lock.mtx_lock = (uintptr_t)td;
 	lock_profile_obtain_lock_success(&sched_lock.lock_object,
 	    0, 0, __FILE__, __LINE__);
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
 	SDT_PROBE0(sched, , , on__cpu);
 }
 
 char *
 sched_tdname(struct thread *td)
 {
 #ifdef KTR
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	if (ts->ts_name[0] == '\0')
 		snprintf(ts->ts_name, sizeof(ts->ts_name),
 		    "%s tid %d", td->td_name, td->td_tid);
 	return (ts->ts_name);
 #else   
 	return (td->td_name);
 #endif
 }
 
 #ifdef KTR
 void
 sched_clear_tdname(struct thread *td)
 {
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	ts->ts_name[0] = '\0';
 }
 #endif
 
 void
 sched_affinity(struct thread *td)
 {
 #ifdef SMP
 	struct td_sched *ts;
 	int cpu;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);	
 
 	/*
 	 * Set the TSF_AFFINITY flag if there is at least one CPU this
 	 * thread can't run on.
 	 */
 	ts = td_get_sched(td);
 	ts->ts_flags &= ~TSF_AFFINITY;
 	CPU_FOREACH(cpu) {
 		if (!THREAD_CAN_SCHED(td, cpu)) {
 			ts->ts_flags |= TSF_AFFINITY;
 			break;
 		}
 	}
 
 	/*
 	 * If this thread can run on all CPUs, nothing else to do.
 	 */
 	if (!(ts->ts_flags & TSF_AFFINITY))
 		return;
 
 	/* Pinned threads and bound threads should be left alone. */
 	if (td->td_pinned != 0 || td->td_flags & TDF_BOUND)
 		return;
 
 	switch (td->td_state) {
 	case TDS_RUNQ:
 		/*
 		 * If we are on a per-CPU runqueue that is in the set,
 		 * then nothing needs to be done.
 		 */
 		if (ts->ts_runq != &runq &&
 		    THREAD_CAN_SCHED(td, ts->ts_runq - runq_pcpu))
 			return;
 
 		/* Put this thread on a valid per-CPU runqueue. */
 		sched_rem(td);
 		sched_add(td, SRQ_BORING);
 		break;
 	case TDS_RUNNING:
 		/*
 		 * See if our current CPU is in the set.  If not, force a
 		 * context switch.
 		 */
 		if (THREAD_CAN_SCHED(td, td->td_oncpu))
 			return;
 
 		td->td_flags |= TDF_NEEDRESCHED;
 		if (td != curthread)
 			ipi_cpu(cpu, IPI_AST);
 		break;
 	default:
 		break;
 	}
 #endif
 }
Index: head/sys/kern/sched_ule.c
===================================================================
--- head/sys/kern/sched_ule.c	(revision 355708)
+++ head/sys/kern/sched_ule.c	(revision 355709)
@@ -1,3142 +1,3142 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * This file implements the ULE scheduler.  ULE supports independent CPU
  * run queues and fine grain locking.  It has superior interactive
  * performance under load even on uni-processor systems.
  *
  * etymology:
  *   ULE is the last three letters in schedule.  It owes its name to a
  * generic user created for a scheduling system by Paul Mikesell at
  * Isilon Systems and a general lack of creativity on the part of the author.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/turnstile.h>
 #include <sys/umtx.h>
 #include <sys/vmmeter.h>
 #include <sys/cpuset.h>
 #include <sys/sbuf.h>
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 int __read_mostly		dtrace_vtime_active;
 dtrace_vtime_switch_func_t	dtrace_vtime_switch_func;
 #endif
 
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
 #define	KTR_ULE	0
 
 #define	TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
 #define	TDQ_NAME_LEN	(sizeof("sched lock ") + sizeof(__XSTRING(MAXCPU)))
 #define	TDQ_LOADNAME_LEN	(sizeof("CPU ") + sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load"))
 
 /*
  * Thread scheduler specific section.  All fields are protected
  * by the thread lock.
  */
 struct td_sched {	
 	struct runq	*ts_runq;	/* Run-queue we're queued on. */
 	short		ts_flags;	/* TSF_* flags. */
 	int		ts_cpu;		/* CPU that we have affinity for. */
 	int		ts_rltick;	/* Real last tick, for affinity. */
 	int		ts_slice;	/* Ticks of slice remaining. */
 	u_int		ts_slptime;	/* Number of ticks we vol. slept */
 	u_int		ts_runtime;	/* Number of ticks we were running */
 	int		ts_ltick;	/* Last tick that we were running on */
 	int		ts_ftick;	/* First tick that we were running on */
 	int		ts_ticks;	/* Tick count */
 #ifdef KTR
 	char		ts_name[TS_NAME_LEN];
 #endif
 };
 /* flags kept in ts_flags */
 #define	TSF_BOUND	0x0001		/* Thread can not migrate. */
 #define	TSF_XFERABLE	0x0002		/* Thread was added as transferable. */
 
 #define	THREAD_CAN_MIGRATE(td)	((td)->td_pinned == 0)
 #define	THREAD_CAN_SCHED(td, cpu)	\
     CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask)
 
 _Static_assert(sizeof(struct thread) + sizeof(struct td_sched) <=
     sizeof(struct thread0_storage),
     "increase struct thread0_storage.t0st_sched size");
 
 /*
  * Priority ranges used for interactive and non-interactive timeshare
  * threads.  The timeshare priorities are split up into four ranges.
  * The first range handles interactive threads.  The last three ranges
  * (NHALF, x, and NHALF) handle non-interactive threads with the outer
  * ranges supporting nice values.
  */
 #define	PRI_TIMESHARE_RANGE	(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
 #define	PRI_INTERACT_RANGE	((PRI_TIMESHARE_RANGE - SCHED_PRI_NRESV) / 2)
 #define	PRI_BATCH_RANGE		(PRI_TIMESHARE_RANGE - PRI_INTERACT_RANGE)
 
 #define	PRI_MIN_INTERACT	PRI_MIN_TIMESHARE
 #define	PRI_MAX_INTERACT	(PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE - 1)
 #define	PRI_MIN_BATCH		(PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE)
 #define	PRI_MAX_BATCH		PRI_MAX_TIMESHARE
 
 /*
  * Cpu percentage computation macros and defines.
  *
  * SCHED_TICK_SECS:	Number of seconds to average the cpu usage across.
  * SCHED_TICK_TARG:	Number of hz ticks to average the cpu usage across.
  * SCHED_TICK_MAX:	Maximum number of ticks before scaling back.
  * SCHED_TICK_SHIFT:	Shift factor to avoid rounding away results.
  * SCHED_TICK_HZ:	Compute the number of hz ticks for a given ticks count.
  * SCHED_TICK_TOTAL:	Gives the amount of time we've been recording ticks.
  */
 #define	SCHED_TICK_SECS		10
 #define	SCHED_TICK_TARG		(hz * SCHED_TICK_SECS)
 #define	SCHED_TICK_MAX		(SCHED_TICK_TARG + hz)
 #define	SCHED_TICK_SHIFT	10
 #define	SCHED_TICK_HZ(ts)	((ts)->ts_ticks >> SCHED_TICK_SHIFT)
 #define	SCHED_TICK_TOTAL(ts)	(max((ts)->ts_ltick - (ts)->ts_ftick, hz))
 
 /*
  * These macros determine priorities for non-interactive threads.  They are
  * assigned a priority based on their recent cpu utilization as expressed
  * by the ratio of ticks to the tick total.  NHALF priorities at the start
  * and end of the MIN to MAX timeshare range are only reachable with negative
  * or positive nice respectively.
  *
  * PRI_RANGE:	Priority range for utilization dependent priorities.
  * PRI_NRESV:	Number of nice values.
  * PRI_TICKS:	Compute a priority in PRI_RANGE from the ticks count and total.
  * PRI_NICE:	Determines the part of the priority inherited from nice.
  */
 #define	SCHED_PRI_NRESV		(PRIO_MAX - PRIO_MIN)
 #define	SCHED_PRI_NHALF		(SCHED_PRI_NRESV / 2)
 #define	SCHED_PRI_MIN		(PRI_MIN_BATCH + SCHED_PRI_NHALF)
 #define	SCHED_PRI_MAX		(PRI_MAX_BATCH - SCHED_PRI_NHALF)
 #define	SCHED_PRI_RANGE		(SCHED_PRI_MAX - SCHED_PRI_MIN + 1)
 #define	SCHED_PRI_TICKS(ts)						\
     (SCHED_TICK_HZ((ts)) /						\
     (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE))
 #define	SCHED_PRI_NICE(nice)	(nice)
 
 /*
  * These determine the interactivity of a process.  Interactivity differs from
  * cpu utilization in that it expresses the voluntary time slept vs time ran
  * while cpu utilization includes all time not running.  This more accurately
  * models the intent of the thread.
  *
  * SLP_RUN_MAX:	Maximum amount of sleep time + run time we'll accumulate
  *		before throttling back.
  * SLP_RUN_FORK:	Maximum slp+run time to inherit at fork time.
  * INTERACT_MAX:	Maximum interactivity value.  Smaller is better.
  * INTERACT_THRESH:	Threshold for placement on the current runq.
  */
 #define	SCHED_SLP_RUN_MAX	((hz * 5) << SCHED_TICK_SHIFT)
 #define	SCHED_SLP_RUN_FORK	((hz / 2) << SCHED_TICK_SHIFT)
 #define	SCHED_INTERACT_MAX	(100)
 #define	SCHED_INTERACT_HALF	(SCHED_INTERACT_MAX / 2)
 #define	SCHED_INTERACT_THRESH	(30)
 
 /*
  * These parameters determine the slice behavior for batch work.
  */
 #define	SCHED_SLICE_DEFAULT_DIVISOR	10	/* ~94 ms, 12 stathz ticks. */
 #define	SCHED_SLICE_MIN_DIVISOR		6	/* DEFAULT/MIN = ~16 ms. */
 
 /* Flags kept in td_flags. */
 #define	TDF_SLICEEND	TDF_SCHED2	/* Thread time slice is over. */
 
 /*
  * tickincr:		Converts a stathz tick into a hz domain scaled by
  *			the shift factor.  Without the shift the error rate
  *			due to rounding would be unacceptably high.
  * realstathz:		stathz is sometimes 0 and run off of hz.
  * sched_slice:		Runtime of each thread before rescheduling.
  * preempt_thresh:	Priority threshold for preemption and remote IPIs.
  */
 static int __read_mostly sched_interact = SCHED_INTERACT_THRESH;
 static int __read_mostly tickincr = 8 << SCHED_TICK_SHIFT;
 static int __read_mostly realstathz = 127;	/* reset during boot. */
 static int __read_mostly sched_slice = 10;	/* reset during boot. */
 static int __read_mostly sched_slice_min = 1;	/* reset during boot. */
 #ifdef PREEMPTION
 #ifdef FULL_PREEMPTION
 static int __read_mostly preempt_thresh = PRI_MAX_IDLE;
 #else
 static int __read_mostly preempt_thresh = PRI_MIN_KERN;
 #endif
 #else 
 static int __read_mostly preempt_thresh = 0;
 #endif
 static int __read_mostly static_boost = PRI_MIN_BATCH;
 static int __read_mostly sched_idlespins = 10000;
 static int __read_mostly sched_idlespinthresh = -1;
 
 /*
  * tdq - per processor runqs and statistics.  All fields are protected by the
  * tdq_lock.  The load and lowpri may be accessed without to avoid excess
  * locking in sched_pickcpu();
  */
 struct tdq {
 	/* 
 	 * Ordered to improve efficiency of cpu_search() and switch().
 	 * tdq_lock is padded to avoid false sharing with tdq_load and
 	 * tdq_cpu_idle.
 	 */
 	struct mtx_padalign tdq_lock;		/* run queue lock. */
 	struct cpu_group *tdq_cg;		/* Pointer to cpu topology. */
 	volatile int	tdq_load;		/* Aggregate load. */
 	volatile int	tdq_cpu_idle;		/* cpu_idle() is active. */
 	int		tdq_sysload;		/* For loadavg, !ITHD load. */
 	volatile int	tdq_transferable;	/* Transferable thread count. */
 	volatile short	tdq_switchcnt;		/* Switches this tick. */
 	volatile short	tdq_oldswitchcnt;	/* Switches last tick. */
 	u_char		tdq_lowpri;		/* Lowest priority thread. */
 	u_char		tdq_owepreempt;		/* Remote preemption pending. */
 	u_char		tdq_idx;		/* Current insert index. */
 	u_char		tdq_ridx;		/* Current removal index. */
 	int		tdq_id;			/* cpuid. */
 	struct runq	tdq_realtime;		/* real-time run queue. */
 	struct runq	tdq_timeshare;		/* timeshare run queue. */
 	struct runq	tdq_idle;		/* Queue of IDLE threads. */
 	char		tdq_name[TDQ_NAME_LEN];
 #ifdef KTR
 	char		tdq_loadname[TDQ_LOADNAME_LEN];
 #endif
 } __aligned(64);
 
 /* Idle thread states and config. */
 #define	TDQ_RUNNING	1
 #define	TDQ_IDLE	2
 
 #ifdef SMP
 struct cpu_group __read_mostly *cpu_top;		/* CPU topology */
 
 #define	SCHED_AFFINITY_DEFAULT	(max(1, hz / 1000))
 #define	SCHED_AFFINITY(ts, t)	((ts)->ts_rltick > ticks - ((t) * affinity))
 
 /*
  * Run-time tunables.
  */
 static int rebalance = 1;
 static int balance_interval = 128;	/* Default set in sched_initticks(). */
 static int __read_mostly affinity;
 static int __read_mostly steal_idle = 1;
 static int __read_mostly steal_thresh = 2;
 static int __read_mostly always_steal = 0;
 static int __read_mostly trysteal_limit = 2;
 
 /*
  * One thread queue per processor.
  */
 static struct tdq __read_mostly *balance_tdq;
 static int balance_ticks;
 DPCPU_DEFINE_STATIC(struct tdq, tdq);
 DPCPU_DEFINE_STATIC(uint32_t, randomval);
 
 #define	TDQ_SELF()	((struct tdq *)PCPU_GET(sched))
 #define	TDQ_CPU(x)	(DPCPU_ID_PTR((x), tdq))
 #define	TDQ_ID(x)	((x)->tdq_id)
 #else	/* !SMP */
 static struct tdq	tdq_cpu;
 
 #define	TDQ_ID(x)	(0)
 #define	TDQ_SELF()	(&tdq_cpu)
 #define	TDQ_CPU(x)	(&tdq_cpu)
 #endif
 
 #define	TDQ_LOCK_ASSERT(t, type)	mtx_assert(TDQ_LOCKPTR((t)), (type))
 #define	TDQ_LOCK(t)		mtx_lock_spin(TDQ_LOCKPTR((t)))
 #define	TDQ_LOCK_FLAGS(t, f)	mtx_lock_spin_flags(TDQ_LOCKPTR((t)), (f))
 #define	TDQ_UNLOCK(t)		mtx_unlock_spin(TDQ_LOCKPTR((t)))
 #define	TDQ_LOCKPTR(t)		((struct mtx *)(&(t)->tdq_lock))
 
 static void sched_priority(struct thread *);
 static void sched_thread_priority(struct thread *, u_char);
 static int sched_interact_score(struct thread *);
 static void sched_interact_update(struct thread *);
 static void sched_interact_fork(struct thread *);
 static void sched_pctcpu_update(struct td_sched *, int);
 
 /* Operations on per processor queues */
 static struct thread *tdq_choose(struct tdq *);
 static void tdq_setup(struct tdq *, int i);
 static void tdq_load_add(struct tdq *, struct thread *);
 static void tdq_load_rem(struct tdq *, struct thread *);
 static __inline void tdq_runq_add(struct tdq *, struct thread *, int);
 static __inline void tdq_runq_rem(struct tdq *, struct thread *);
 static inline int sched_shouldpreempt(int, int, int);
 void tdq_print(int cpu);
 static void runq_print(struct runq *rq);
 static void tdq_add(struct tdq *, struct thread *, int);
 #ifdef SMP
 static struct thread *tdq_move(struct tdq *, struct tdq *);
 static int tdq_idled(struct tdq *);
 static void tdq_notify(struct tdq *, struct thread *);
 static struct thread *tdq_steal(struct tdq *, int);
 static struct thread *runq_steal(struct runq *, int);
 static int sched_pickcpu(struct thread *, int);
 static void sched_balance(void);
 static int sched_balance_pair(struct tdq *, struct tdq *);
 static inline struct tdq *sched_setcpu(struct thread *, int, int);
 static inline void thread_unblock_switch(struct thread *, struct mtx *);
 static struct mtx *sched_switch_migrate(struct tdq *, struct thread *, int);
 static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, 
     struct cpu_group *cg, int indent);
 #endif
 
 static void sched_setup(void *dummy);
 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
 
 static void sched_initticks(void *dummy);
 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
     NULL);
 
 SDT_PROVIDER_DEFINE(sched);
 
 SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", 
     "struct proc *", "uint8_t");
 SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", 
     "struct proc *", "void *");
 SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", 
     "struct proc *", "void *", "int");
 SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", 
     "struct proc *", "uint8_t", "struct thread *");
 SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
 SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", 
     "struct proc *");
 SDT_PROBE_DEFINE(sched, , , on__cpu);
 SDT_PROBE_DEFINE(sched, , , remain__cpu);
 SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", 
     "struct proc *");
 
 /*
  * Print the threads waiting on a run-queue.
  */
 static void
 runq_print(struct runq *rq)
 {
 	struct rqhead *rqh;
 	struct thread *td;
 	int pri;
 	int j;
 	int i;
 
 	for (i = 0; i < RQB_LEN; i++) {
 		printf("\t\trunq bits %d 0x%zx\n",
 		    i, rq->rq_status.rqb_bits[i]);
 		for (j = 0; j < RQB_BPW; j++)
 			if (rq->rq_status.rqb_bits[i] & (1ul << j)) {
 				pri = j + (i << RQB_L2BPW);
 				rqh = &rq->rq_queues[pri];
 				TAILQ_FOREACH(td, rqh, td_runq) {
 					printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n",
 					    td, td->td_name, td->td_priority,
 					    td->td_rqindex, pri);
 				}
 			}
 	}
 }
 
 /*
  * Print the status of a per-cpu thread queue.  Should be a ddb show cmd.
  */
 void
 tdq_print(int cpu)
 {
 	struct tdq *tdq;
 
 	tdq = TDQ_CPU(cpu);
 
 	printf("tdq %d:\n", TDQ_ID(tdq));
 	printf("\tlock            %p\n", TDQ_LOCKPTR(tdq));
 	printf("\tLock name:      %s\n", tdq->tdq_name);
 	printf("\tload:           %d\n", tdq->tdq_load);
 	printf("\tswitch cnt:     %d\n", tdq->tdq_switchcnt);
 	printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
 	printf("\ttimeshare idx:  %d\n", tdq->tdq_idx);
 	printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx);
 	printf("\tload transferable: %d\n", tdq->tdq_transferable);
 	printf("\tlowest priority:   %d\n", tdq->tdq_lowpri);
 	printf("\trealtime runq:\n");
 	runq_print(&tdq->tdq_realtime);
 	printf("\ttimeshare runq:\n");
 	runq_print(&tdq->tdq_timeshare);
 	printf("\tidle runq:\n");
 	runq_print(&tdq->tdq_idle);
 }
 
 static inline int
 sched_shouldpreempt(int pri, int cpri, int remote)
 {
 	/*
 	 * If the new priority is not better than the current priority there is
 	 * nothing to do.
 	 */
 	if (pri >= cpri)
 		return (0);
 	/*
 	 * Always preempt idle.
 	 */
 	if (cpri >= PRI_MIN_IDLE)
 		return (1);
 	/*
 	 * If preemption is disabled don't preempt others.
 	 */
 	if (preempt_thresh == 0)
 		return (0);
 	/*
 	 * Preempt if we exceed the threshold.
 	 */
 	if (pri <= preempt_thresh)
 		return (1);
 	/*
 	 * If we're interactive or better and there is non-interactive
 	 * or worse running preempt only remote processors.
 	 */
 	if (remote && pri <= PRI_MAX_INTERACT && cpri > PRI_MAX_INTERACT)
 		return (1);
 	return (0);
 }
 
 /*
  * Add a thread to the actual run-queue.  Keeps transferable counts up to
  * date with what is actually on the run-queue.  Selects the correct
  * queue position for timeshare threads.
  */
 static __inline void
 tdq_runq_add(struct tdq *tdq, struct thread *td, int flags)
 {
 	struct td_sched *ts;
 	u_char pri;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	pri = td->td_priority;
 	ts = td_get_sched(td);
 	TD_SET_RUNQ(td);
 	if (THREAD_CAN_MIGRATE(td)) {
 		tdq->tdq_transferable++;
 		ts->ts_flags |= TSF_XFERABLE;
 	}
 	if (pri < PRI_MIN_BATCH) {
 		ts->ts_runq = &tdq->tdq_realtime;
 	} else if (pri <= PRI_MAX_BATCH) {
 		ts->ts_runq = &tdq->tdq_timeshare;
 		KASSERT(pri <= PRI_MAX_BATCH && pri >= PRI_MIN_BATCH,
 			("Invalid priority %d on timeshare runq", pri));
 		/*
 		 * This queue contains only priorities between MIN and MAX
 		 * realtime.  Use the whole queue to represent these values.
 		 */
 		if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) == 0) {
 			pri = RQ_NQS * (pri - PRI_MIN_BATCH) / PRI_BATCH_RANGE;
 			pri = (pri + tdq->tdq_idx) % RQ_NQS;
 			/*
 			 * This effectively shortens the queue by one so we
 			 * can have a one slot difference between idx and
 			 * ridx while we wait for threads to drain.
 			 */
 			if (tdq->tdq_ridx != tdq->tdq_idx &&
 			    pri == tdq->tdq_ridx)
 				pri = (unsigned char)(pri - 1) % RQ_NQS;
 		} else
 			pri = tdq->tdq_ridx;
 		runq_add_pri(ts->ts_runq, td, pri, flags);
 		return;
 	} else
 		ts->ts_runq = &tdq->tdq_idle;
 	runq_add(ts->ts_runq, td, flags);
 }
 
 /* 
  * Remove a thread from a run-queue.  This typically happens when a thread
  * is selected to run.  Running threads are not on the queue and the
  * transferable count does not reflect them.
  */
 static __inline void
 tdq_runq_rem(struct tdq *tdq, struct thread *td)
 {
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	KASSERT(ts->ts_runq != NULL,
 	    ("tdq_runq_remove: thread %p null ts_runq", td));
 	if (ts->ts_flags & TSF_XFERABLE) {
 		tdq->tdq_transferable--;
 		ts->ts_flags &= ~TSF_XFERABLE;
 	}
 	if (ts->ts_runq == &tdq->tdq_timeshare) {
 		if (tdq->tdq_idx != tdq->tdq_ridx)
 			runq_remove_idx(ts->ts_runq, td, &tdq->tdq_ridx);
 		else
 			runq_remove_idx(ts->ts_runq, td, NULL);
 	} else
 		runq_remove(ts->ts_runq, td);
 }
 
 /*
  * Load is maintained for all threads RUNNING and ON_RUNQ.  Add the load
  * for this thread to the referenced thread queue.
  */
 static void
 tdq_load_add(struct tdq *tdq, struct thread *td)
 {
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	tdq->tdq_load++;
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		tdq->tdq_sysload++;
 	KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
 	SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);
 }
 
 /*
  * Remove the load from a thread that is transitioning to a sleep state or
  * exiting.
  */
 static void
 tdq_load_rem(struct tdq *tdq, struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	KASSERT(tdq->tdq_load != 0,
 	    ("tdq_load_rem: Removing with 0 load on queue %d", TDQ_ID(tdq)));
 
 	tdq->tdq_load--;
 	if ((td->td_flags & TDF_NOLOAD) == 0)
 		tdq->tdq_sysload--;
 	KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
 	SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);
 }
 
 /*
  * Bound timeshare latency by decreasing slice size as load increases.  We
  * consider the maximum latency as the sum of the threads waiting to run
  * aside from curthread and target no more than sched_slice latency but
  * no less than sched_slice_min runtime.
  */
 static inline int
 tdq_slice(struct tdq *tdq)
 {
 	int load;
 
 	/*
 	 * It is safe to use sys_load here because this is called from
 	 * contexts where timeshare threads are running and so there
 	 * cannot be higher priority load in the system.
 	 */
 	load = tdq->tdq_sysload - 1;
 	if (load >= SCHED_SLICE_MIN_DIVISOR)
 		return (sched_slice_min);
 	if (load <= 1)
 		return (sched_slice);
 	return (sched_slice / load);
 }
 
 /*
  * Set lowpri to its exact value by searching the run-queue and
  * evaluating curthread.  curthread may be passed as an optimization.
  */
 static void
 tdq_setlowpri(struct tdq *tdq, struct thread *ctd)
 {
 	struct thread *td;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	if (ctd == NULL)
 		ctd = pcpu_find(TDQ_ID(tdq))->pc_curthread;
 	td = tdq_choose(tdq);
 	if (td == NULL || td->td_priority > ctd->td_priority)
 		tdq->tdq_lowpri = ctd->td_priority;
 	else
 		tdq->tdq_lowpri = td->td_priority;
 }
 
 #ifdef SMP
 /*
  * We need some randomness. Implement a classic Linear Congruential
  * Generator X_{n+1}=(aX_n+c) mod m. These values are optimized for
  * m = 2^32, a = 69069 and c = 5. We only return the upper 16 bits
  * of the random state (in the low bits of our answer) to keep
  * the maximum randomness.
  */
 static uint32_t
 sched_random(void)
 {
 	uint32_t *rndptr;
 
 	rndptr = DPCPU_PTR(randomval);
 	*rndptr = *rndptr * 69069 + 5;
 
 	return (*rndptr >> 16);
 }
 
 struct cpu_search {
 	cpuset_t cs_mask;
 	u_int	cs_prefer;
 	int	cs_pri;		/* Min priority for low. */
 	int	cs_limit;	/* Max load for low, min load for high. */
 	int	cs_cpu;
 	int	cs_load;
 };
 
 #define	CPU_SEARCH_LOWEST	0x1
 #define	CPU_SEARCH_HIGHEST	0x2
 #define	CPU_SEARCH_BOTH		(CPU_SEARCH_LOWEST|CPU_SEARCH_HIGHEST)
 
 static __always_inline int cpu_search(const struct cpu_group *cg,
     struct cpu_search *low, struct cpu_search *high, const int match);
 int __noinline cpu_search_lowest(const struct cpu_group *cg,
     struct cpu_search *low);
 int __noinline cpu_search_highest(const struct cpu_group *cg,
     struct cpu_search *high);
 int __noinline cpu_search_both(const struct cpu_group *cg,
     struct cpu_search *low, struct cpu_search *high);
 
 /*
  * Search the tree of cpu_groups for the lowest or highest loaded cpu
  * according to the match argument.  This routine actually compares the
  * load on all paths through the tree and finds the least loaded cpu on
  * the least loaded path, which may differ from the least loaded cpu in
  * the system.  This balances work among caches and buses.
  *
  * This inline is instantiated in three forms below using constants for the
  * match argument.  It is reduced to the minimum set for each case.  It is
  * also recursive to the depth of the tree.
  */
 static __always_inline int
 cpu_search(const struct cpu_group *cg, struct cpu_search *low,
     struct cpu_search *high, const int match)
 {
 	struct cpu_search lgroup;
 	struct cpu_search hgroup;
 	cpuset_t cpumask;
 	struct cpu_group *child;
 	struct tdq *tdq;
 	int cpu, i, hload, lload, load, total, rnd;
 
 	total = 0;
 	cpumask = cg->cg_mask;
 	if (match & CPU_SEARCH_LOWEST) {
 		lload = INT_MAX;
 		lgroup = *low;
 	}
 	if (match & CPU_SEARCH_HIGHEST) {
 		hload = INT_MIN;
 		hgroup = *high;
 	}
 
 	/* Iterate through the child CPU groups and then remaining CPUs. */
 	for (i = cg->cg_children, cpu = mp_maxid; ; ) {
 		if (i == 0) {
 #ifdef HAVE_INLINE_FFSL
 			cpu = CPU_FFS(&cpumask) - 1;
 #else
 			while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
 				cpu--;
 #endif
 			if (cpu < 0)
 				break;
 			child = NULL;
 		} else
 			child = &cg->cg_child[i - 1];
 
 		if (match & CPU_SEARCH_LOWEST)
 			lgroup.cs_cpu = -1;
 		if (match & CPU_SEARCH_HIGHEST)
 			hgroup.cs_cpu = -1;
 		if (child) {			/* Handle child CPU group. */
-			CPU_NAND(&cpumask, &child->cg_mask);
+			CPU_ANDNOT(&cpumask, &child->cg_mask);
 			switch (match) {
 			case CPU_SEARCH_LOWEST:
 				load = cpu_search_lowest(child, &lgroup);
 				break;
 			case CPU_SEARCH_HIGHEST:
 				load = cpu_search_highest(child, &hgroup);
 				break;
 			case CPU_SEARCH_BOTH:
 				load = cpu_search_both(child, &lgroup, &hgroup);
 				break;
 			}
 		} else {			/* Handle child CPU. */
 			CPU_CLR(cpu, &cpumask);
 			tdq = TDQ_CPU(cpu);
 			load = tdq->tdq_load * 256;
 			rnd = sched_random() % 32;
 			if (match & CPU_SEARCH_LOWEST) {
 				if (cpu == low->cs_prefer)
 					load -= 64;
 				/* If that CPU is allowed and get data. */
 				if (tdq->tdq_lowpri > lgroup.cs_pri &&
 				    tdq->tdq_load <= lgroup.cs_limit &&
 				    CPU_ISSET(cpu, &lgroup.cs_mask)) {
 					lgroup.cs_cpu = cpu;
 					lgroup.cs_load = load - rnd;
 				}
 			}
 			if (match & CPU_SEARCH_HIGHEST)
 				if (tdq->tdq_load >= hgroup.cs_limit &&
 				    tdq->tdq_transferable &&
 				    CPU_ISSET(cpu, &hgroup.cs_mask)) {
 					hgroup.cs_cpu = cpu;
 					hgroup.cs_load = load - rnd;
 				}
 		}
 		total += load;
 
 		/* We have info about child item. Compare it. */
 		if (match & CPU_SEARCH_LOWEST) {
 			if (lgroup.cs_cpu >= 0 &&
 			    (load < lload ||
 			     (load == lload && lgroup.cs_load < low->cs_load))) {
 				lload = load;
 				low->cs_cpu = lgroup.cs_cpu;
 				low->cs_load = lgroup.cs_load;
 			}
 		}
 		if (match & CPU_SEARCH_HIGHEST)
 			if (hgroup.cs_cpu >= 0 &&
 			    (load > hload ||
 			     (load == hload && hgroup.cs_load > high->cs_load))) {
 				hload = load;
 				high->cs_cpu = hgroup.cs_cpu;
 				high->cs_load = hgroup.cs_load;
 			}
 		if (child) {
 			i--;
 			if (i == 0 && CPU_EMPTY(&cpumask))
 				break;
 		}
 #ifndef HAVE_INLINE_FFSL
 		else
 			cpu--;
 #endif
 	}
 	return (total);
 }
 
 /*
  * cpu_search instantiations must pass constants to maintain the inline
  * optimization.
  */
 int
 cpu_search_lowest(const struct cpu_group *cg, struct cpu_search *low)
 {
 	return cpu_search(cg, low, NULL, CPU_SEARCH_LOWEST);
 }
 
 int
 cpu_search_highest(const struct cpu_group *cg, struct cpu_search *high)
 {
 	return cpu_search(cg, NULL, high, CPU_SEARCH_HIGHEST);
 }
 
 int
 cpu_search_both(const struct cpu_group *cg, struct cpu_search *low,
     struct cpu_search *high)
 {
 	return cpu_search(cg, low, high, CPU_SEARCH_BOTH);
 }
 
 /*
  * Find the cpu with the least load via the least loaded path that has a
  * lowpri greater than pri  pri.  A pri of -1 indicates any priority is
  * acceptable.
  */
 static inline int
 sched_lowest(const struct cpu_group *cg, cpuset_t mask, int pri, int maxload,
     int prefer)
 {
 	struct cpu_search low;
 
 	low.cs_cpu = -1;
 	low.cs_prefer = prefer;
 	low.cs_mask = mask;
 	low.cs_pri = pri;
 	low.cs_limit = maxload;
 	cpu_search_lowest(cg, &low);
 	return low.cs_cpu;
 }
 
 /*
  * Find the cpu with the highest load via the highest loaded path.
  */
 static inline int
 sched_highest(const struct cpu_group *cg, cpuset_t mask, int minload)
 {
 	struct cpu_search high;
 
 	high.cs_cpu = -1;
 	high.cs_mask = mask;
 	high.cs_limit = minload;
 	cpu_search_highest(cg, &high);
 	return high.cs_cpu;
 }
 
 static void
 sched_balance_group(struct cpu_group *cg)
 {
 	struct tdq *tdq;
 	cpuset_t hmask, lmask;
 	int high, low, anylow;
 
 	CPU_FILL(&hmask);
 	for (;;) {
 		high = sched_highest(cg, hmask, 2);
 		/* Stop if there is no more CPU with transferrable threads. */
 		if (high == -1)
 			break;
 		CPU_CLR(high, &hmask);
 		CPU_COPY(&hmask, &lmask);
 		/* Stop if there is no more CPU left for low. */
 		if (CPU_EMPTY(&lmask))
 			break;
 		anylow = 1;
 		tdq = TDQ_CPU(high);
 nextlow:
 		low = sched_lowest(cg, lmask, -1, tdq->tdq_load - 1, high);
 		/* Stop if we looked well and found no less loaded CPU. */
 		if (anylow && low == -1)
 			break;
 		/* Go to next high if we found no less loaded CPU. */
 		if (low == -1)
 			continue;
 		/* Transfer thread from high to low. */
 		if (sched_balance_pair(tdq, TDQ_CPU(low))) {
 			/* CPU that got thread can no longer be a donor. */
 			CPU_CLR(low, &hmask);
 		} else {
 			/*
 			 * If failed, then there is no threads on high
 			 * that can run on this low. Drop low from low
 			 * mask and look for different one.
 			 */
 			CPU_CLR(low, &lmask);
 			anylow = 0;
 			goto nextlow;
 		}
 	}
 }
 
 static void
 sched_balance(void)
 {
 	struct tdq *tdq;
 
 	balance_ticks = max(balance_interval / 2, 1) +
 	    (sched_random() % balance_interval);
 	tdq = TDQ_SELF();
 	TDQ_UNLOCK(tdq);
 	sched_balance_group(cpu_top);
 	TDQ_LOCK(tdq);
 }
 
 /*
  * Lock two thread queues using their address to maintain lock order.
  */
 static void
 tdq_lock_pair(struct tdq *one, struct tdq *two)
 {
 	if (one < two) {
 		TDQ_LOCK(one);
 		TDQ_LOCK_FLAGS(two, MTX_DUPOK);
 	} else {
 		TDQ_LOCK(two);
 		TDQ_LOCK_FLAGS(one, MTX_DUPOK);
 	}
 }
 
 /*
  * Unlock two thread queues.  Order is not important here.
  */
 static void
 tdq_unlock_pair(struct tdq *one, struct tdq *two)
 {
 	TDQ_UNLOCK(one);
 	TDQ_UNLOCK(two);
 }
 
 /*
  * Transfer load between two imbalanced thread queues.
  */
 static int
 sched_balance_pair(struct tdq *high, struct tdq *low)
 {
 	struct thread *td;
 	int cpu;
 
 	tdq_lock_pair(high, low);
 	td = NULL;
 	/*
 	 * Transfer a thread from high to low.
 	 */
 	if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load &&
 	    (td = tdq_move(high, low)) != NULL) {
 		/*
 		 * In case the target isn't the current cpu notify it of the
 		 * new load, possibly sending an IPI to force it to reschedule.
 		 */
 		cpu = TDQ_ID(low);
 		if (cpu != PCPU_GET(cpuid))
 			tdq_notify(low, td);
 	}
 	tdq_unlock_pair(high, low);
 	return (td != NULL);
 }
 
 /*
  * Move a thread from one thread queue to another.
  */
 static struct thread *
 tdq_move(struct tdq *from, struct tdq *to)
 {
 	struct td_sched *ts;
 	struct thread *td;
 	struct tdq *tdq;
 	int cpu;
 
 	TDQ_LOCK_ASSERT(from, MA_OWNED);
 	TDQ_LOCK_ASSERT(to, MA_OWNED);
 
 	tdq = from;
 	cpu = TDQ_ID(to);
 	td = tdq_steal(tdq, cpu);
 	if (td == NULL)
 		return (NULL);
 	ts = td_get_sched(td);
 	/*
 	 * Although the run queue is locked the thread may be blocked.  Lock
 	 * it to clear this and acquire the run-queue lock.
 	 */
 	thread_lock(td);
 	/* Drop recursive lock on from acquired via thread_lock(). */
 	TDQ_UNLOCK(from);
 	sched_rem(td);
 	ts->ts_cpu = cpu;
 	td->td_lock = TDQ_LOCKPTR(to);
 	tdq_add(to, td, SRQ_YIELDING);
 	return (td);
 }
 
 /*
  * This tdq has idled.  Try to steal a thread from another cpu and switch
  * to it.
  */
 static int
 tdq_idled(struct tdq *tdq)
 {
 	struct cpu_group *cg;
 	struct tdq *steal;
 	cpuset_t mask;
 	int cpu, switchcnt;
 
 	if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL)
 		return (1);
 	CPU_FILL(&mask);
 	CPU_CLR(PCPU_GET(cpuid), &mask);
     restart:
 	switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
 	for (cg = tdq->tdq_cg; ; ) {
 		cpu = sched_highest(cg, mask, steal_thresh);
 		/*
 		 * We were assigned a thread but not preempted.  Returning
 		 * 0 here will cause our caller to switch to it.
 		 */
 		if (tdq->tdq_load)
 			return (0);
 		if (cpu == -1) {
 			cg = cg->cg_parent;
 			if (cg == NULL)
 				return (1);
 			continue;
 		}
 		steal = TDQ_CPU(cpu);
 		/*
 		 * The data returned by sched_highest() is stale and
 		 * the chosen CPU no longer has an eligible thread.
 		 *
 		 * Testing this ahead of tdq_lock_pair() only catches
 		 * this situation about 20% of the time on an 8 core
 		 * 16 thread Ryzen 7, but it still helps performance.
 		 */
 		if (steal->tdq_load < steal_thresh ||
 		    steal->tdq_transferable == 0)
 			goto restart;
 		tdq_lock_pair(tdq, steal);
 		/*
 		 * We were assigned a thread while waiting for the locks.
 		 * Switch to it now instead of stealing a thread.
 		 */
 		if (tdq->tdq_load)
 			break;
 		/*
 		 * The data returned by sched_highest() is stale and
 		 * the chosen CPU no longer has an eligible thread, or
 		 * we were preempted and the CPU loading info may be out
 		 * of date.  The latter is rare.  In either case restart
 		 * the search.
 		 */
 		if (steal->tdq_load < steal_thresh ||
 		    steal->tdq_transferable == 0 ||
 		    switchcnt != tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt) {
 			tdq_unlock_pair(tdq, steal);
 			goto restart;
 		}
 		/*
 		 * Steal the thread and switch to it.
 		 */
 		if (tdq_move(steal, tdq) != NULL)
 			break;
 		/*
 		 * We failed to acquire a thread even though it looked
 		 * like one was available.  This could be due to affinity
 		 * restrictions or for other reasons.  Loop again after
 		 * removing this CPU from the set.  The restart logic
 		 * above does not restore this CPU to the set due to the
 		 * likelyhood of failing here again.
 		 */
 		CPU_CLR(cpu, &mask);
 		tdq_unlock_pair(tdq, steal);
 	}
 	TDQ_UNLOCK(steal);
 	mi_switch(SW_VOL | SWT_IDLE, NULL);
 	thread_unlock(curthread);
 	return (0);
 }
 
 /*
  * Notify a remote cpu of new work.  Sends an IPI if criteria are met.
  */
 static void
 tdq_notify(struct tdq *tdq, struct thread *td)
 {
 	struct thread *ctd;
 	int pri;
 	int cpu;
 
 	if (tdq->tdq_owepreempt)
 		return;
 	cpu = td_get_sched(td)->ts_cpu;
 	pri = td->td_priority;
 	ctd = pcpu_find(cpu)->pc_curthread;
 	if (!sched_shouldpreempt(pri, ctd->td_priority, 1))
 		return;
 
 	/*
 	 * Make sure that our caller's earlier update to tdq_load is
 	 * globally visible before we read tdq_cpu_idle.  Idle thread
 	 * accesses both of them without locks, and the order is important.
 	 */
 	atomic_thread_fence_seq_cst();
 
 	if (TD_IS_IDLETHREAD(ctd)) {
 		/*
 		 * If the MD code has an idle wakeup routine try that before
 		 * falling back to IPI.
 		 */
 		if (!tdq->tdq_cpu_idle || cpu_idle_wakeup(cpu))
 			return;
 	}
 
 	/*
 	 * The run queues have been updated, so any switch on the remote CPU
 	 * will satisfy the preemption request.
 	 */
 	tdq->tdq_owepreempt = 1;
 	ipi_cpu(cpu, IPI_PREEMPT);
 }
 
 /*
  * Steals load from a timeshare queue.  Honors the rotating queue head
  * index.
  */
 static struct thread *
 runq_steal_from(struct runq *rq, int cpu, u_char start)
 {
 	struct rqbits *rqb;
 	struct rqhead *rqh;
 	struct thread *td, *first;
 	int bit;
 	int i;
 
 	rqb = &rq->rq_status;
 	bit = start & (RQB_BPW -1);
 	first = NULL;
 again:
 	for (i = RQB_WORD(start); i < RQB_LEN; bit = 0, i++) {
 		if (rqb->rqb_bits[i] == 0)
 			continue;
 		if (bit == 0)
 			bit = RQB_FFS(rqb->rqb_bits[i]);
 		for (; bit < RQB_BPW; bit++) {
 			if ((rqb->rqb_bits[i] & (1ul << bit)) == 0)
 				continue;
 			rqh = &rq->rq_queues[bit + (i << RQB_L2BPW)];
 			TAILQ_FOREACH(td, rqh, td_runq) {
 				if (first && THREAD_CAN_MIGRATE(td) &&
 				    THREAD_CAN_SCHED(td, cpu))
 					return (td);
 				first = td;
 			}
 		}
 	}
 	if (start != 0) {
 		start = 0;
 		goto again;
 	}
 
 	if (first && THREAD_CAN_MIGRATE(first) &&
 	    THREAD_CAN_SCHED(first, cpu))
 		return (first);
 	return (NULL);
 }
 
 /*
  * Steals load from a standard linear queue.
  */
 static struct thread *
 runq_steal(struct runq *rq, int cpu)
 {
 	struct rqhead *rqh;
 	struct rqbits *rqb;
 	struct thread *td;
 	int word;
 	int bit;
 
 	rqb = &rq->rq_status;
 	for (word = 0; word < RQB_LEN; word++) {
 		if (rqb->rqb_bits[word] == 0)
 			continue;
 		for (bit = 0; bit < RQB_BPW; bit++) {
 			if ((rqb->rqb_bits[word] & (1ul << bit)) == 0)
 				continue;
 			rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)];
 			TAILQ_FOREACH(td, rqh, td_runq)
 				if (THREAD_CAN_MIGRATE(td) &&
 				    THREAD_CAN_SCHED(td, cpu))
 					return (td);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Attempt to steal a thread in priority order from a thread queue.
  */
 static struct thread *
 tdq_steal(struct tdq *tdq, int cpu)
 {
 	struct thread *td;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	if ((td = runq_steal(&tdq->tdq_realtime, cpu)) != NULL)
 		return (td);
 	if ((td = runq_steal_from(&tdq->tdq_timeshare,
 	    cpu, tdq->tdq_ridx)) != NULL)
 		return (td);
 	return (runq_steal(&tdq->tdq_idle, cpu));
 }
 
 /*
  * Sets the thread lock and ts_cpu to match the requested cpu.  Unlocks the
  * current lock and returns with the assigned queue locked.
  */
 static inline struct tdq *
 sched_setcpu(struct thread *td, int cpu, int flags)
 {
 
 	struct tdq *tdq;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	tdq = TDQ_CPU(cpu);
 	td_get_sched(td)->ts_cpu = cpu;
 	/*
 	 * If the lock matches just return the queue.
 	 */
 	if (td->td_lock == TDQ_LOCKPTR(tdq))
 		return (tdq);
 #ifdef notyet
 	/*
 	 * If the thread isn't running its lockptr is a
 	 * turnstile or a sleepqueue.  We can just lock_set without
 	 * blocking.
 	 */
 	if (TD_CAN_RUN(td)) {
 		TDQ_LOCK(tdq);
 		thread_lock_set(td, TDQ_LOCKPTR(tdq));
 		return (tdq);
 	}
 #endif
 	/*
 	 * The hard case, migration, we need to block the thread first to
 	 * prevent order reversals with other cpus locks.
 	 */
 	spinlock_enter();
 	thread_lock_block(td);
 	TDQ_LOCK(tdq);
 	thread_lock_unblock(td, TDQ_LOCKPTR(tdq));
 	spinlock_exit();
 	return (tdq);
 }
 
 SCHED_STAT_DEFINE(pickcpu_intrbind, "Soft interrupt binding");
 SCHED_STAT_DEFINE(pickcpu_idle_affinity, "Picked idle cpu based on affinity");
 SCHED_STAT_DEFINE(pickcpu_affinity, "Picked cpu based on affinity");
 SCHED_STAT_DEFINE(pickcpu_lowest, "Selected lowest load");
 SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
 SCHED_STAT_DEFINE(pickcpu_migration, "Selection may have caused migration");
 
 static int
 sched_pickcpu(struct thread *td, int flags)
 {
 	struct cpu_group *cg, *ccg;
 	struct td_sched *ts;
 	struct tdq *tdq;
 	cpuset_t mask;
 	int cpu, pri, self, intr;
 
 	self = PCPU_GET(cpuid);
 	ts = td_get_sched(td);
 	KASSERT(!CPU_ABSENT(ts->ts_cpu), ("sched_pickcpu: Start scheduler on "
 	    "absent CPU %d for thread %s.", ts->ts_cpu, td->td_name));
 	if (smp_started == 0)
 		return (self);
 	/*
 	 * Don't migrate a running thread from sched_switch().
 	 */
 	if ((flags & SRQ_OURSELF) || !THREAD_CAN_MIGRATE(td))
 		return (ts->ts_cpu);
 	/*
 	 * Prefer to run interrupt threads on the processors that generate
 	 * the interrupt.
 	 */
 	if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) &&
 	    curthread->td_intr_nesting_level) {
 		tdq = TDQ_SELF();
 		if (tdq->tdq_lowpri >= PRI_MIN_IDLE) {
 			SCHED_STAT_INC(pickcpu_idle_affinity);
 			return (self);
 		}
 		ts->ts_cpu = self;
 		intr = 1;
 		cg = tdq->tdq_cg;
 		goto llc;
 	} else {
 		intr = 0;
 		tdq = TDQ_CPU(ts->ts_cpu);
 		cg = tdq->tdq_cg;
 	}
 	/*
 	 * If the thread can run on the last cpu and the affinity has not
 	 * expired and it is idle, run it there.
 	 */
 	if (THREAD_CAN_SCHED(td, ts->ts_cpu) &&
 	    tdq->tdq_lowpri >= PRI_MIN_IDLE &&
 	    SCHED_AFFINITY(ts, CG_SHARE_L2)) {
 		if (cg->cg_flags & CG_FLAG_THREAD) {
 			/* Check all SMT threads for being idle. */
 			for (cpu = CPU_FFS(&cg->cg_mask) - 1; ; cpu++) {
 				if (CPU_ISSET(cpu, &cg->cg_mask) &&
 				    TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE)
 					break;
 				if (cpu >= mp_maxid) {
 					SCHED_STAT_INC(pickcpu_idle_affinity);
 					return (ts->ts_cpu);
 				}
 			}
 		} else {
 			SCHED_STAT_INC(pickcpu_idle_affinity);
 			return (ts->ts_cpu);
 		}
 	}
 llc:
 	/*
 	 * Search for the last level cache CPU group in the tree.
 	 * Skip SMT, identical groups and caches with expired affinity.
 	 * Interrupt threads affinity is explicit and never expires.
 	 */
 	for (ccg = NULL; cg != NULL; cg = cg->cg_parent) {
 		if (cg->cg_flags & CG_FLAG_THREAD)
 			continue;
 		if (cg->cg_children == 1 || cg->cg_count == 1)
 			continue;
 		if (cg->cg_level == CG_SHARE_NONE ||
 		    (!intr && !SCHED_AFFINITY(ts, cg->cg_level)))
 			continue;
 		ccg = cg;
 	}
 	/* Found LLC shared by all CPUs, so do a global search. */
 	if (ccg == cpu_top)
 		ccg = NULL;
 	cpu = -1;
 	mask = td->td_cpuset->cs_mask;
 	pri = td->td_priority;
 	/*
 	 * Try hard to keep interrupts within found LLC.  Search the LLC for
 	 * the least loaded CPU we can run now.  For NUMA systems it should
 	 * be within target domain, and it also reduces scheduling overhead.
 	 */
 	if (ccg != NULL && intr) {
 		cpu = sched_lowest(ccg, mask, pri, INT_MAX, ts->ts_cpu);
 		if (cpu >= 0)
 			SCHED_STAT_INC(pickcpu_intrbind);
 	} else
 	/* Search the LLC for the least loaded idle CPU we can run now. */
 	if (ccg != NULL) {
 		cpu = sched_lowest(ccg, mask, max(pri, PRI_MAX_TIMESHARE),
 		    INT_MAX, ts->ts_cpu);
 		if (cpu >= 0)
 			SCHED_STAT_INC(pickcpu_affinity);
 	}
 	/* Search globally for the least loaded CPU we can run now. */
 	if (cpu < 0) {
 		cpu = sched_lowest(cpu_top, mask, pri, INT_MAX, ts->ts_cpu);
 		if (cpu >= 0)
 			SCHED_STAT_INC(pickcpu_lowest);
 	}
 	/* Search globally for the least loaded CPU. */
 	if (cpu < 0) {
 		cpu = sched_lowest(cpu_top, mask, -1, INT_MAX, ts->ts_cpu);
 		if (cpu >= 0)
 			SCHED_STAT_INC(pickcpu_lowest);
 	}
 	KASSERT(cpu >= 0, ("sched_pickcpu: Failed to find a cpu."));
 	KASSERT(!CPU_ABSENT(cpu), ("sched_pickcpu: Picked absent CPU %d.", cpu));
 	/*
 	 * Compare the lowest loaded cpu to current cpu.
 	 */
 	tdq = TDQ_CPU(cpu);
 	if (THREAD_CAN_SCHED(td, self) && TDQ_SELF()->tdq_lowpri > pri &&
 	    tdq->tdq_lowpri < PRI_MIN_IDLE &&
 	    TDQ_SELF()->tdq_load <= tdq->tdq_load + 1) {
 		SCHED_STAT_INC(pickcpu_local);
 		cpu = self;
 	}
 	if (cpu != ts->ts_cpu)
 		SCHED_STAT_INC(pickcpu_migration);
 	return (cpu);
 }
 #endif
 
 /*
  * Pick the highest priority task we have and return it.
  */
 static struct thread *
 tdq_choose(struct tdq *tdq)
 {
 	struct thread *td;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	td = runq_choose(&tdq->tdq_realtime);
 	if (td != NULL)
 		return (td);
 	td = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
 	if (td != NULL) {
 		KASSERT(td->td_priority >= PRI_MIN_BATCH,
 		    ("tdq_choose: Invalid priority on timeshare queue %d",
 		    td->td_priority));
 		return (td);
 	}
 	td = runq_choose(&tdq->tdq_idle);
 	if (td != NULL) {
 		KASSERT(td->td_priority >= PRI_MIN_IDLE,
 		    ("tdq_choose: Invalid priority on idle queue %d",
 		    td->td_priority));
 		return (td);
 	}
 
 	return (NULL);
 }
 
 /*
  * Initialize a thread queue.
  */
 static void
 tdq_setup(struct tdq *tdq, int id)
 {
 
 	if (bootverbose)
 		printf("ULE: setup cpu %d\n", id);
 	runq_init(&tdq->tdq_realtime);
 	runq_init(&tdq->tdq_timeshare);
 	runq_init(&tdq->tdq_idle);
 	tdq->tdq_id = id;
 	snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),
 	    "sched lock %d", (int)TDQ_ID(tdq));
 	mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock",
 	    MTX_SPIN | MTX_RECURSE);
 #ifdef KTR
 	snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname),
 	    "CPU %d load", (int)TDQ_ID(tdq));
 #endif
 }
 
 #ifdef SMP
 static void
 sched_setup_smp(void)
 {
 	struct tdq *tdq;
 	int i;
 
 	cpu_top = smp_topo();
 	CPU_FOREACH(i) {
 		tdq = DPCPU_ID_PTR(i, tdq);
 		tdq_setup(tdq, i);
 		tdq->tdq_cg = smp_topo_find(cpu_top, i);
 		if (tdq->tdq_cg == NULL)
 			panic("Can't find cpu group for %d\n", i);
 	}
 	PCPU_SET(sched, DPCPU_PTR(tdq));
 	balance_tdq = TDQ_SELF();
 }
 #endif
 
 /*
  * Setup the thread queues and initialize the topology based on MD
  * information.
  */
 static void
 sched_setup(void *dummy)
 {
 	struct tdq *tdq;
 
 #ifdef SMP
 	sched_setup_smp();
 #else
 	tdq_setup(TDQ_SELF(), 0);
 #endif
 	tdq = TDQ_SELF();
 
 	/* Add thread0's load since it's running. */
 	TDQ_LOCK(tdq);
 	thread0.td_lock = TDQ_LOCKPTR(tdq);
 	tdq_load_add(tdq, &thread0);
 	tdq->tdq_lowpri = thread0.td_priority;
 	TDQ_UNLOCK(tdq);
 }
 
 /*
  * This routine determines time constants after stathz and hz are setup.
  */
 /* ARGSUSED */
 static void
 sched_initticks(void *dummy)
 {
 	int incr;
 
 	realstathz = stathz ? stathz : hz;
 	sched_slice = realstathz / SCHED_SLICE_DEFAULT_DIVISOR;
 	sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR;
 	hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
 	    realstathz);
 
 	/*
 	 * tickincr is shifted out by 10 to avoid rounding errors due to
 	 * hz not being evenly divisible by stathz on all platforms.
 	 */
 	incr = (hz << SCHED_TICK_SHIFT) / realstathz;
 	/*
 	 * This does not work for values of stathz that are more than
 	 * 1 << SCHED_TICK_SHIFT * hz.  In practice this does not happen.
 	 */
 	if (incr == 0)
 		incr = 1;
 	tickincr = incr;
 #ifdef SMP
 	/*
 	 * Set the default balance interval now that we know
 	 * what realstathz is.
 	 */
 	balance_interval = realstathz;
 	balance_ticks = balance_interval;
 	affinity = SCHED_AFFINITY_DEFAULT;
 #endif
 	if (sched_idlespinthresh < 0)
 		sched_idlespinthresh = 2 * max(10000, 6 * hz) / realstathz;
 }
 
 
 /*
  * This is the core of the interactivity algorithm.  Determines a score based
  * on past behavior.  It is the ratio of sleep time to run time scaled to
  * a [0, 100] integer.  This is the voluntary sleep time of a process, which
  * differs from the cpu usage because it does not account for time spent
  * waiting on a run-queue.  Would be prettier if we had floating point.
  *
  * When a thread's sleep time is greater than its run time the
  * calculation is:
  *
  *                           scaling factor 
  * interactivity score =  ---------------------
  *                        sleep time / run time
  *
  *
  * When a thread's run time is greater than its sleep time the
  * calculation is:
  *
  *                           scaling factor 
  * interactivity score =  ---------------------    + scaling factor
  *                        run time / sleep time
  */
 static int
 sched_interact_score(struct thread *td)
 {
 	struct td_sched *ts;
 	int div;
 
 	ts = td_get_sched(td);
 	/*
 	 * The score is only needed if this is likely to be an interactive
 	 * task.  Don't go through the expense of computing it if there's
 	 * no chance.
 	 */
 	if (sched_interact <= SCHED_INTERACT_HALF &&
 		ts->ts_runtime >= ts->ts_slptime)
 			return (SCHED_INTERACT_HALF);
 
 	if (ts->ts_runtime > ts->ts_slptime) {
 		div = max(1, ts->ts_runtime / SCHED_INTERACT_HALF);
 		return (SCHED_INTERACT_HALF +
 		    (SCHED_INTERACT_HALF - (ts->ts_slptime / div)));
 	}
 	if (ts->ts_slptime > ts->ts_runtime) {
 		div = max(1, ts->ts_slptime / SCHED_INTERACT_HALF);
 		return (ts->ts_runtime / div);
 	}
 	/* runtime == slptime */
 	if (ts->ts_runtime)
 		return (SCHED_INTERACT_HALF);
 
 	/*
 	 * This can happen if slptime and runtime are 0.
 	 */
 	return (0);
 
 }
 
 /*
  * Scale the scheduling priority according to the "interactivity" of this
  * process.
  */
 static void
 sched_priority(struct thread *td)
 {
 	int score;
 	int pri;
 
 	if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)
 		return;
 	/*
 	 * If the score is interactive we place the thread in the realtime
 	 * queue with a priority that is less than kernel and interrupt
 	 * priorities.  These threads are not subject to nice restrictions.
 	 *
 	 * Scores greater than this are placed on the normal timeshare queue
 	 * where the priority is partially decided by the most recent cpu
 	 * utilization and the rest is decided by nice value.
 	 *
 	 * The nice value of the process has a linear effect on the calculated
 	 * score.  Negative nice values make it easier for a thread to be
 	 * considered interactive.
 	 */
 	score = imax(0, sched_interact_score(td) + td->td_proc->p_nice);
 	if (score < sched_interact) {
 		pri = PRI_MIN_INTERACT;
 		pri += ((PRI_MAX_INTERACT - PRI_MIN_INTERACT + 1) /
 		    sched_interact) * score;
 		KASSERT(pri >= PRI_MIN_INTERACT && pri <= PRI_MAX_INTERACT,
 		    ("sched_priority: invalid interactive priority %d score %d",
 		    pri, score));
 	} else {
 		pri = SCHED_PRI_MIN;
 		if (td_get_sched(td)->ts_ticks)
 			pri += min(SCHED_PRI_TICKS(td_get_sched(td)),
 			    SCHED_PRI_RANGE - 1);
 		pri += SCHED_PRI_NICE(td->td_proc->p_nice);
 		KASSERT(pri >= PRI_MIN_BATCH && pri <= PRI_MAX_BATCH,
 		    ("sched_priority: invalid priority %d: nice %d, " 
 		    "ticks %d ftick %d ltick %d tick pri %d",
 		    pri, td->td_proc->p_nice, td_get_sched(td)->ts_ticks,
 		    td_get_sched(td)->ts_ftick, td_get_sched(td)->ts_ltick,
 		    SCHED_PRI_TICKS(td_get_sched(td))));
 	}
 	sched_user_prio(td, pri);
 
 	return;
 }
 
 /*
  * This routine enforces a maximum limit on the amount of scheduling history
  * kept.  It is called after either the slptime or runtime is adjusted.  This
  * function is ugly due to integer math.
  */
 static void
 sched_interact_update(struct thread *td)
 {
 	struct td_sched *ts;
 	u_int sum;
 
 	ts = td_get_sched(td);
 	sum = ts->ts_runtime + ts->ts_slptime;
 	if (sum < SCHED_SLP_RUN_MAX)
 		return;
 	/*
 	 * This only happens from two places:
 	 * 1) We have added an unusual amount of run time from fork_exit.
 	 * 2) We have added an unusual amount of sleep time from sched_sleep().
 	 */
 	if (sum > SCHED_SLP_RUN_MAX * 2) {
 		if (ts->ts_runtime > ts->ts_slptime) {
 			ts->ts_runtime = SCHED_SLP_RUN_MAX;
 			ts->ts_slptime = 1;
 		} else {
 			ts->ts_slptime = SCHED_SLP_RUN_MAX;
 			ts->ts_runtime = 1;
 		}
 		return;
 	}
 	/*
 	 * If we have exceeded by more than 1/5th then the algorithm below
 	 * will not bring us back into range.  Dividing by two here forces
 	 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX]
 	 */
 	if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) {
 		ts->ts_runtime /= 2;
 		ts->ts_slptime /= 2;
 		return;
 	}
 	ts->ts_runtime = (ts->ts_runtime / 5) * 4;
 	ts->ts_slptime = (ts->ts_slptime / 5) * 4;
 }
 
 /*
  * Scale back the interactivity history when a child thread is created.  The
  * history is inherited from the parent but the thread may behave totally
  * differently.  For example, a shell spawning a compiler process.  We want
  * to learn that the compiler is behaving badly very quickly.
  */
 static void
 sched_interact_fork(struct thread *td)
 {
 	struct td_sched *ts;
 	int ratio;
 	int sum;
 
 	ts = td_get_sched(td);
 	sum = ts->ts_runtime + ts->ts_slptime;
 	if (sum > SCHED_SLP_RUN_FORK) {
 		ratio = sum / SCHED_SLP_RUN_FORK;
 		ts->ts_runtime /= ratio;
 		ts->ts_slptime /= ratio;
 	}
 }
 
 /*
  * Called from proc0_init() to setup the scheduler fields.
  */
 void
 schedinit(void)
 {
 	struct td_sched *ts0;
 
 	/*
 	 * Set up the scheduler specific parts of thread0.
 	 */
 	ts0 = td_get_sched(&thread0);
 	ts0->ts_ltick = ticks;
 	ts0->ts_ftick = ticks;
 	ts0->ts_slice = 0;
 	ts0->ts_cpu = curcpu;	/* set valid CPU number */
 }
 
 /*
  * This is only somewhat accurate since given many processes of the same
  * priority they will switch when their slices run out, which will be
  * at most sched_slice stathz ticks.
  */
 int
 sched_rr_interval(void)
 {
 
 	/* Convert sched_slice from stathz to hz. */
 	return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz));
 }
 
 /*
  * Update the percent cpu tracking information when it is requested or
  * the total history exceeds the maximum.  We keep a sliding history of
  * tick counts that slowly decays.  This is less precise than the 4BSD
  * mechanism since it happens with less regular and frequent events.
  */
 static void
 sched_pctcpu_update(struct td_sched *ts, int run)
 {
 	int t = ticks;
 
 	/*
 	 * The signed difference may be negative if the thread hasn't run for
 	 * over half of the ticks rollover period.
 	 */
 	if ((u_int)(t - ts->ts_ltick) >= SCHED_TICK_TARG) {
 		ts->ts_ticks = 0;
 		ts->ts_ftick = t - SCHED_TICK_TARG;
 	} else if (t - ts->ts_ftick >= SCHED_TICK_MAX) {
 		ts->ts_ticks = (ts->ts_ticks / (ts->ts_ltick - ts->ts_ftick)) *
 		    (ts->ts_ltick - (t - SCHED_TICK_TARG));
 		ts->ts_ftick = t - SCHED_TICK_TARG;
 	}
 	if (run)
 		ts->ts_ticks += (t - ts->ts_ltick) << SCHED_TICK_SHIFT;
 	ts->ts_ltick = t;
 }
 
 /*
  * Adjust the priority of a thread.  Move it to the appropriate run-queue
  * if necessary.  This is the back-end for several priority related
  * functions.
  */
 static void
 sched_thread_priority(struct thread *td, u_char prio)
 {
 	struct td_sched *ts;
 	struct tdq *tdq;
 	int oldpri;
 
 	KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "prio",
 	    "prio:%d", td->td_priority, "new prio:%d", prio,
 	    KTR_ATTR_LINKED, sched_tdname(curthread));
 	SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);
 	if (td != curthread && prio < td->td_priority) {
 		KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
 		    "lend prio", "prio:%d", td->td_priority, "new prio:%d",
 		    prio, KTR_ATTR_LINKED, sched_tdname(td));
 		SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio, 
 		    curthread);
 	} 
 	ts = td_get_sched(td);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	if (td->td_priority == prio)
 		return;
 	/*
 	 * If the priority has been elevated due to priority
 	 * propagation, we may have to move ourselves to a new
 	 * queue.  This could be optimized to not re-add in some
 	 * cases.
 	 */
 	if (TD_ON_RUNQ(td) && prio < td->td_priority) {
 		sched_rem(td);
 		td->td_priority = prio;
 		sched_add(td, SRQ_BORROWING);
 		return;
 	}
 	/*
 	 * If the thread is currently running we may have to adjust the lowpri
 	 * information so other cpus are aware of our current priority.
 	 */
 	if (TD_IS_RUNNING(td)) {
 		tdq = TDQ_CPU(ts->ts_cpu);
 		oldpri = td->td_priority;
 		td->td_priority = prio;
 		if (prio < tdq->tdq_lowpri)
 			tdq->tdq_lowpri = prio;
 		else if (tdq->tdq_lowpri == oldpri)
 			tdq_setlowpri(tdq, td);
 		return;
 	}
 	td->td_priority = prio;
 }
 
 /*
  * Update a thread's priority when it is lent another thread's
  * priority.
  */
 void
 sched_lend_prio(struct thread *td, u_char prio)
 {
 
 	td->td_flags |= TDF_BORROWING;
 	sched_thread_priority(td, prio);
 }
 
 /*
  * Restore a thread's priority when priority propagation is
  * over.  The prio argument is the minimum priority the thread
  * needs to have to satisfy other possible priority lending
  * requests.  If the thread's regular priority is less
  * important than prio, the thread will keep a priority boost
  * of prio.
  */
 void
 sched_unlend_prio(struct thread *td, u_char prio)
 {
 	u_char base_pri;
 
 	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
 	    td->td_base_pri <= PRI_MAX_TIMESHARE)
 		base_pri = td->td_user_pri;
 	else
 		base_pri = td->td_base_pri;
 	if (prio >= base_pri) {
 		td->td_flags &= ~TDF_BORROWING;
 		sched_thread_priority(td, base_pri);
 	} else
 		sched_lend_prio(td, prio);
 }
 
 /*
  * Standard entry for setting the priority to an absolute value.
  */
 void
 sched_prio(struct thread *td, u_char prio)
 {
 	u_char oldprio;
 
 	/* First, update the base priority. */
 	td->td_base_pri = prio;
 
 	/*
 	 * If the thread is borrowing another thread's priority, don't
 	 * ever lower the priority.
 	 */
 	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
 		return;
 
 	/* Change the real priority. */
 	oldprio = td->td_priority;
 	sched_thread_priority(td, prio);
 
 	/*
 	 * If the thread is on a turnstile, then let the turnstile update
 	 * its state.
 	 */
 	if (TD_ON_LOCK(td) && oldprio != prio)
 		turnstile_adjust(td, oldprio);
 }
 
 /*
  * Set the base user priority, does not effect current running priority.
  */
 void
 sched_user_prio(struct thread *td, u_char prio)
 {
 
 	td->td_base_user_pri = prio;
 	if (td->td_lend_user_pri <= prio)
 		return;
 	td->td_user_pri = prio;
 }
 
 void
 sched_lend_user_prio(struct thread *td, u_char prio)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	td->td_lend_user_pri = prio;
 	td->td_user_pri = min(prio, td->td_base_user_pri);
 	if (td->td_priority > td->td_user_pri)
 		sched_prio(td, td->td_user_pri);
 	else if (td->td_priority != td->td_user_pri)
 		td->td_flags |= TDF_NEEDRESCHED;
 }
 
 /*
  * Like the above but first check if there is anything to do.
  */
 void
 sched_lend_user_prio_cond(struct thread *td, u_char prio)
 {
 
 	if (td->td_lend_user_pri != prio)
 		goto lend;
 	if (td->td_user_pri != min(prio, td->td_base_user_pri))
 		goto lend;
 	if (td->td_priority >= td->td_user_pri)
 		goto lend;
 	return;
 
 lend:
 	thread_lock(td);
 	sched_lend_user_prio(td, prio);
 	thread_unlock(td);
 }
 
 #ifdef SMP
 /*
  * This tdq is about to idle.  Try to steal a thread from another CPU before
  * choosing the idle thread.
  */
 static void
 tdq_trysteal(struct tdq *tdq)
 {
 	struct cpu_group *cg;
 	struct tdq *steal;
 	cpuset_t mask;
 	int cpu, i;
 
 	if (smp_started == 0 || trysteal_limit == 0 || tdq->tdq_cg == NULL)
 		return;
 	CPU_FILL(&mask);
 	CPU_CLR(PCPU_GET(cpuid), &mask);
 	/* We don't want to be preempted while we're iterating. */
 	spinlock_enter();
 	TDQ_UNLOCK(tdq);
 	for (i = 1, cg = tdq->tdq_cg; ; ) {
 		cpu = sched_highest(cg, mask, steal_thresh);
 		/*
 		 * If a thread was added while interrupts were disabled don't
 		 * steal one here.
 		 */
 		if (tdq->tdq_load > 0) {
 			TDQ_LOCK(tdq);
 			break;
 		}
 		if (cpu == -1) {
 			i++;
 			cg = cg->cg_parent;
 			if (cg == NULL || i > trysteal_limit) {
 				TDQ_LOCK(tdq);
 				break;
 			}
 			continue;
 		}
 		steal = TDQ_CPU(cpu);
 		/*
 		 * The data returned by sched_highest() is stale and
                  * the chosen CPU no longer has an eligible thread.
 		 */
 		if (steal->tdq_load < steal_thresh ||
 		    steal->tdq_transferable == 0)
 			continue;
 		tdq_lock_pair(tdq, steal);
 		/*
 		 * If we get to this point, unconditonally exit the loop
 		 * to bound the time spent in the critcal section.
 		 *
 		 * If a thread was added while interrupts were disabled don't
 		 * steal one here.
 		 */
 		if (tdq->tdq_load > 0) {
 			TDQ_UNLOCK(steal);
 			break;
 		}
 		/*
 		 * The data returned by sched_highest() is stale and
                  * the chosen CPU no longer has an eligible thread.
 		 */
 		if (steal->tdq_load < steal_thresh ||
 		    steal->tdq_transferable == 0) {
 			TDQ_UNLOCK(steal);
 			break;
 		}
 		/*
 		 * If we fail to acquire one due to affinity restrictions,
 		 * bail out and let the idle thread to a more complete search
 		 * outside of a critical section.
 		 */
 		if (tdq_move(steal, tdq) == NULL) {
 			TDQ_UNLOCK(steal);
 			break;
 		}
 		TDQ_UNLOCK(steal);
 		break;
 	}
 	spinlock_exit();
 }
 #endif
 
 /*
  * Handle migration from sched_switch().  This happens only for
  * cpu binding.
  */
 static struct mtx *
 sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
 {
 	struct tdq *tdn;
 
 	KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "
 	    "thread %s queued on absent CPU %d.", td->td_name,
 	    td_get_sched(td)->ts_cpu));
 	tdn = TDQ_CPU(td_get_sched(td)->ts_cpu);
 #ifdef SMP
 	tdq_load_rem(tdq, td);
 	/*
 	 * Do the lock dance required to avoid LOR.  We grab an extra
 	 * spinlock nesting to prevent preemption while we're
 	 * not holding either run-queue lock.
 	 */
 	spinlock_enter();
 	thread_lock_block(td);	/* This releases the lock on tdq. */
 
 	/*
 	 * Acquire both run-queue locks before placing the thread on the new
 	 * run-queue to avoid deadlocks created by placing a thread with a
 	 * blocked lock on the run-queue of a remote processor.  The deadlock
 	 * occurs when a third processor attempts to lock the two queues in
 	 * question while the target processor is spinning with its own
 	 * run-queue lock held while waiting for the blocked lock to clear.
 	 */
 	tdq_lock_pair(tdn, tdq);
 	tdq_add(tdn, td, flags);
 	tdq_notify(tdn, td);
 	TDQ_UNLOCK(tdn);
 	spinlock_exit();
 #endif
 	return (TDQ_LOCKPTR(tdn));
 }
 
 /*
  * Variadic version of thread_lock_unblock() that does not assume td_lock
  * is blocked.
  */
 static inline void
 thread_unblock_switch(struct thread *td, struct mtx *mtx)
 {
 	atomic_store_rel_ptr((volatile uintptr_t *)&td->td_lock,
 	    (uintptr_t)mtx);
 }
 
 /*
  * Switch threads.  This function has to handle threads coming in while
  * blocked for some reason, running, or idle.  It also must deal with
  * migrating a thread from one queue to another as running threads may
  * be assigned elsewhere via binding.
  */
 void
 sched_switch(struct thread *td, struct thread *newtd, int flags)
 {
 	struct tdq *tdq;
 	struct td_sched *ts;
 	struct mtx *mtx;
 	int srqflag;
 	int cpuid, preempted;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(newtd == NULL, ("sched_switch: Unsupported newtd argument"));
 
 	cpuid = PCPU_GET(cpuid);
 	tdq = TDQ_SELF();
 	ts = td_get_sched(td);
 	mtx = td->td_lock;
 	sched_pctcpu_update(ts, 1);
 	ts->ts_rltick = ticks;
 	td->td_lastcpu = td->td_oncpu;
 	td->td_oncpu = NOCPU;
 	preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
 	    (flags & SW_PREEMPT) != 0;
 	td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND);
 	td->td_owepreempt = 0;
 	tdq->tdq_owepreempt = 0;
 	if (!TD_IS_IDLETHREAD(td))
 		tdq->tdq_switchcnt++;
 
 	/*
 	 * The lock pointer in an idle thread should never change.  Reset it
 	 * to CAN_RUN as well.
 	 */
 	if (TD_IS_IDLETHREAD(td)) {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 		TD_SET_CAN_RUN(td);
 	} else if (TD_IS_RUNNING(td)) {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 		srqflag = preempted ?
 		    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
 		    SRQ_OURSELF|SRQ_YIELDING;
 #ifdef SMP
 		if (THREAD_CAN_MIGRATE(td) && !THREAD_CAN_SCHED(td, ts->ts_cpu))
 			ts->ts_cpu = sched_pickcpu(td, 0);
 #endif
 		if (ts->ts_cpu == cpuid)
 			tdq_runq_add(tdq, td, srqflag);
 		else {
 			KASSERT(THREAD_CAN_MIGRATE(td) ||
 			    (ts->ts_flags & TSF_BOUND) != 0,
 			    ("Thread %p shouldn't migrate", td));
 			mtx = sched_switch_migrate(tdq, td, srqflag);
 		}
 	} else {
 		/* This thread must be going to sleep. */
 		TDQ_LOCK(tdq);
 		mtx = thread_lock_block(td);
 		tdq_load_rem(tdq, td);
 #ifdef SMP
 		if (tdq->tdq_load == 0)
 			tdq_trysteal(tdq);
 #endif
 	}
 
 #if (KTR_COMPILE & KTR_SCHED) != 0
 	if (TD_IS_IDLETHREAD(td))
 		KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
 		    "prio:%d", td->td_priority);
 	else
 		KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
 		    "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
 		    "lockname:\"%s\"", td->td_lockname);
 #endif
 
 	/*
 	 * We enter here with the thread blocked and assigned to the
 	 * appropriate cpu run-queue or sleep-queue and with the current
 	 * thread-queue locked.
 	 */
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
 	newtd = choosethread();
 	/*
 	 * Call the MD code to switch contexts if necessary.
 	 */
 	if (td != newtd) {
 #ifdef	HWPMC_HOOKS
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
 		SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);
 		lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
 		TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
 		sched_pctcpu_update(td_get_sched(newtd), 0);
 
 #ifdef KDTRACE_HOOKS
 		/*
 		 * If DTrace has set the active vtime enum to anything
 		 * other than INACTIVE (0), then it should have set the
 		 * function to call.
 		 */
 		if (dtrace_vtime_active)
 			(*dtrace_vtime_switch_func)(newtd);
 #endif
 
 		cpu_switch(td, newtd, mtx);
 		/*
 		 * We may return from cpu_switch on a different cpu.  However,
 		 * we always return with td_lock pointing to the current cpu's
 		 * run queue lock.
 		 */
 		cpuid = PCPU_GET(cpuid);
 		tdq = TDQ_SELF();
 		lock_profile_obtain_lock_success(
 		    &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
 
 		SDT_PROBE0(sched, , , on__cpu);
 #ifdef	HWPMC_HOOKS
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
 #endif
 	} else {
 		thread_unblock_switch(td, mtx);
 		SDT_PROBE0(sched, , , remain__cpu);
 	}
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
 
 	/*
 	 * Assert that all went well and return.
 	 */
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED|MA_NOTRECURSED);
 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 	td->td_oncpu = cpuid;
 }
 
 /*
  * Adjust thread priorities as a result of a nice request.
  */
 void
 sched_nice(struct proc *p, int nice)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	p->p_nice = nice;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		sched_priority(td);
 		sched_prio(td, td->td_base_user_pri);
 		thread_unlock(td);
 	}
 }
 
 /*
  * Record the sleep time for the interactivity scorer.
  */
 void
 sched_sleep(struct thread *td, int prio)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	td->td_slptick = ticks;
 	if (TD_IS_SUSPENDED(td) || prio >= PSOCK)
 		td->td_flags |= TDF_CANSWAP;
 	if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)
 		return;
 	if (static_boost == 1 && prio)
 		sched_prio(td, prio);
 	else if (static_boost && td->td_priority > static_boost)
 		sched_prio(td, static_boost);
 }
 
 /*
  * Schedule a thread to resume execution and record how long it voluntarily
  * slept.  We also update the pctcpu, interactivity, and priority.
  */
 void
 sched_wakeup(struct thread *td)
 {
 	struct td_sched *ts;
 	int slptick;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	td->td_flags &= ~TDF_CANSWAP;
 	/*
 	 * If we slept for more than a tick update our interactivity and
 	 * priority.
 	 */
 	slptick = td->td_slptick;
 	td->td_slptick = 0;
 	if (slptick && slptick != ticks) {
 		ts->ts_slptime += (ticks - slptick) << SCHED_TICK_SHIFT;
 		sched_interact_update(td);
 		sched_pctcpu_update(ts, 0);
 	}
 	/*
 	 * Reset the slice value since we slept and advanced the round-robin.
 	 */
 	ts->ts_slice = 0;
 	sched_add(td, SRQ_BORING);
 }
 
 /*
  * Penalize the parent for creating a new child and initialize the child's
  * priority.
  */
 void
 sched_fork(struct thread *td, struct thread *child)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	sched_pctcpu_update(td_get_sched(td), 1);
 	sched_fork_thread(td, child);
 	/*
 	 * Penalize the parent and child for forking.
 	 */
 	sched_interact_fork(child);
 	sched_priority(child);
 	td_get_sched(td)->ts_runtime += tickincr;
 	sched_interact_update(td);
 	sched_priority(td);
 }
 
 /*
  * Fork a new thread, may be within the same process.
  */
 void
 sched_fork_thread(struct thread *td, struct thread *child)
 {
 	struct td_sched *ts;
 	struct td_sched *ts2;
 	struct tdq *tdq;
 
 	tdq = TDQ_SELF();
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	/*
 	 * Initialize child.
 	 */
 	ts = td_get_sched(td);
 	ts2 = td_get_sched(child);
 	child->td_oncpu = NOCPU;
 	child->td_lastcpu = NOCPU;
 	child->td_lock = TDQ_LOCKPTR(tdq);
 	child->td_cpuset = cpuset_ref(td->td_cpuset);
 	child->td_domain.dr_policy = td->td_cpuset->cs_domain;
 	ts2->ts_cpu = ts->ts_cpu;
 	ts2->ts_flags = 0;
 	/*
 	 * Grab our parents cpu estimation information.
 	 */
 	ts2->ts_ticks = ts->ts_ticks;
 	ts2->ts_ltick = ts->ts_ltick;
 	ts2->ts_ftick = ts->ts_ftick;
 	/*
 	 * Do not inherit any borrowed priority from the parent.
 	 */
 	child->td_priority = child->td_base_pri;
 	/*
 	 * And update interactivity score.
 	 */
 	ts2->ts_slptime = ts->ts_slptime;
 	ts2->ts_runtime = ts->ts_runtime;
 	/* Attempt to quickly learn interactivity. */
 	ts2->ts_slice = tdq_slice(tdq) - sched_slice_min;
 #ifdef KTR
 	bzero(ts2->ts_name, sizeof(ts2->ts_name));
 #endif
 }
 
 /*
  * Adjust the priority class of a thread.
  */
 void
 sched_class(struct thread *td, int class)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	if (td->td_pri_class == class)
 		return;
 	td->td_pri_class = class;
 }
 
 /*
  * Return some of the child's priority and interactivity to the parent.
  */
 void
 sched_exit(struct proc *p, struct thread *child)
 {
 	struct thread *td;
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "proc exit",
 	    "prio:%d", child->td_priority);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	td = FIRST_THREAD_IN_PROC(p);
 	sched_exit_thread(td, child);
 }
 
 /*
  * Penalize another thread for the time spent on this one.  This helps to
  * worsen the priority and interactivity of processes which schedule batch
  * jobs such as make.  This has little effect on the make process itself but
  * causes new processes spawned by it to receive worse scores immediately.
  */
 void
 sched_exit_thread(struct thread *td, struct thread *child)
 {
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "thread exit",
 	    "prio:%d", child->td_priority);
 	/*
 	 * Give the child's runtime to the parent without returning the
 	 * sleep time as a penalty to the parent.  This causes shells that
 	 * launch expensive things to mark their children as expensive.
 	 */
 	thread_lock(td);
 	td_get_sched(td)->ts_runtime += td_get_sched(child)->ts_runtime;
 	sched_interact_update(td);
 	sched_priority(td);
 	thread_unlock(td);
 }
 
 void
 sched_preempt(struct thread *td)
 {
 	struct tdq *tdq;
 
 	SDT_PROBE2(sched, , , surrender, td, td->td_proc);
 
 	thread_lock(td);
 	tdq = TDQ_SELF();
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	if (td->td_priority > tdq->tdq_lowpri) {
 		int flags;
 
 		flags = SW_INVOL | SW_PREEMPT;
 		if (td->td_critnest > 1)
 			td->td_owepreempt = 1;
 		else if (TD_IS_IDLETHREAD(td))
 			mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
 		else
 			mi_switch(flags | SWT_REMOTEPREEMPT, NULL);
 	} else {
 		tdq->tdq_owepreempt = 0;
 	}
 	thread_unlock(td);
 }
 
 /*
  * Fix priorities on return to user-space.  Priorities may be elevated due
  * to static priorities in msleep() or similar.
  */
 void
 sched_userret_slowpath(struct thread *td)
 {
 
 	thread_lock(td);
 	td->td_priority = td->td_user_pri;
 	td->td_base_pri = td->td_user_pri;
 	tdq_setlowpri(TDQ_SELF(), td);
 	thread_unlock(td);
 }
 
 /*
  * Handle a stathz tick.  This is really only relevant for timeshare
  * threads.
  */
 void
 sched_clock(struct thread *td, int cnt)
 {
 	struct tdq *tdq;
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	tdq = TDQ_SELF();
 #ifdef SMP
 	/*
 	 * We run the long term load balancer infrequently on the first cpu.
 	 */
 	if (balance_tdq == tdq && smp_started != 0 && rebalance != 0 &&
 	    balance_ticks != 0) {
 		balance_ticks -= cnt;
 		if (balance_ticks <= 0)
 			sched_balance();
 	}
 #endif
 	/*
 	 * Save the old switch count so we have a record of the last ticks
 	 * activity.   Initialize the new switch count based on our load.
 	 * If there is some activity seed it to reflect that.
 	 */
 	tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt;
 	tdq->tdq_switchcnt = tdq->tdq_load;
 	/*
 	 * Advance the insert index once for each tick to ensure that all
 	 * threads get a chance to run.
 	 */
 	if (tdq->tdq_idx == tdq->tdq_ridx) {
 		tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS;
 		if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx]))
 			tdq->tdq_ridx = tdq->tdq_idx;
 	}
 	ts = td_get_sched(td);
 	sched_pctcpu_update(ts, 1);
 	if ((td->td_pri_class & PRI_FIFO_BIT) || TD_IS_IDLETHREAD(td))
 		return;
 
 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) {
 		/*
 		 * We used a tick; charge it to the thread so
 		 * that we can compute our interactivity.
 		 */
 		td_get_sched(td)->ts_runtime += tickincr * cnt;
 		sched_interact_update(td);
 		sched_priority(td);
 	}
 
 	/*
 	 * Force a context switch if the current thread has used up a full
 	 * time slice (default is 100ms).
 	 */
 	ts->ts_slice += cnt;
 	if (ts->ts_slice >= tdq_slice(tdq)) {
 		ts->ts_slice = 0;
 		td->td_flags |= TDF_NEEDRESCHED | TDF_SLICEEND;
 	}
 }
 
 u_int
 sched_estcpu(struct thread *td __unused)
 {
 
 	return (0);
 }
 
 /*
  * Return whether the current CPU has runnable tasks.  Used for in-kernel
  * cooperative idle threads.
  */
 int
 sched_runnable(void)
 {
 	struct tdq *tdq;
 	int load;
 
 	load = 1;
 
 	tdq = TDQ_SELF();
 	if ((curthread->td_flags & TDF_IDLETD) != 0) {
 		if (tdq->tdq_load > 0)
 			goto out;
 	} else
 		if (tdq->tdq_load - 1 > 0)
 			goto out;
 	load = 0;
 out:
 	return (load);
 }
 
 /*
  * Choose the highest priority thread to run.  The thread is removed from
  * the run-queue while running however the load remains.  For SMP we set
  * the tdq in the global idle bitmask if it idles here.
  */
 struct thread *
 sched_choose(void)
 {
 	struct thread *td;
 	struct tdq *tdq;
 
 	tdq = TDQ_SELF();
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	td = tdq_choose(tdq);
 	if (td) {
 		tdq_runq_rem(tdq, td);
 		tdq->tdq_lowpri = td->td_priority;
 		return (td);
 	}
 	tdq->tdq_lowpri = PRI_MAX_IDLE;
 	return (PCPU_GET(idlethread));
 }
 
 /*
  * Set owepreempt if necessary.  Preemption never happens directly in ULE,
  * we always request it once we exit a critical section.
  */
 static inline void
 sched_setpreempt(struct thread *td)
 {
 	struct thread *ctd;
 	int cpri;
 	int pri;
 
 	THREAD_LOCK_ASSERT(curthread, MA_OWNED);
 
 	ctd = curthread;
 	pri = td->td_priority;
 	cpri = ctd->td_priority;
 	if (pri < cpri)
 		ctd->td_flags |= TDF_NEEDRESCHED;
 	if (panicstr != NULL || pri >= cpri || cold || TD_IS_INHIBITED(ctd))
 		return;
 	if (!sched_shouldpreempt(pri, cpri, 0))
 		return;
 	ctd->td_owepreempt = 1;
 }
 
 /*
  * Add a thread to a thread queue.  Select the appropriate runq and add the
  * thread to it.  This is the internal function called when the tdq is
  * predetermined.
  */
 void
 tdq_add(struct tdq *tdq, struct thread *td, int flags)
 {
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 	    ("sched_add: bad thread state"));
 	KASSERT(td->td_flags & TDF_INMEM,
 	    ("sched_add: thread swapped out"));
 
 	if (td->td_priority < tdq->tdq_lowpri)
 		tdq->tdq_lowpri = td->td_priority;
 	tdq_runq_add(tdq, td, flags);
 	tdq_load_add(tdq, td);
 }
 
 /*
  * Select the target thread queue and add a thread to it.  Request
  * preemption or IPI a remote processor if required.
  */
 void
 sched_add(struct thread *td, int flags)
 {
 	struct tdq *tdq;
 #ifdef SMP
 	int cpu;
 #endif
 
 	KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add",
 	    "prio:%d", td->td_priority, KTR_ATTR_LINKED,
 	    sched_tdname(curthread));
 	KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
 	    KTR_ATTR_LINKED, sched_tdname(td));
 	SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, 
 	    flags & SRQ_PREEMPTED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	/*
 	 * Recalculate the priority before we select the target cpu or
 	 * run-queue.
 	 */
 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_priority(td);
 #ifdef SMP
 	/*
 	 * Pick the destination cpu and if it isn't ours transfer to the
 	 * target cpu.
 	 */
 	cpu = sched_pickcpu(td, flags);
 	tdq = sched_setcpu(td, cpu, flags);
 	tdq_add(tdq, td, flags);
 	if (cpu != PCPU_GET(cpuid)) {
 		tdq_notify(tdq, td);
 		return;
 	}
 #else
 	tdq = TDQ_SELF();
 	TDQ_LOCK(tdq);
 	/*
 	 * Now that the thread is moving to the run-queue, set the lock
 	 * to the scheduler's lock.
 	 */
 	thread_lock_set(td, TDQ_LOCKPTR(tdq));
 	tdq_add(tdq, td, flags);
 #endif
 	if (!(flags & SRQ_YIELDING))
 		sched_setpreempt(td);
 }
 
 /*
  * Remove a thread from a run-queue without running it.  This is used
  * when we're stealing a thread from a remote queue.  Otherwise all threads
  * exit by calling sched_exit_thread() and sched_throw() themselves.
  */
 void
 sched_rem(struct thread *td)
 {
 	struct tdq *tdq;
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "runq rem",
 	    "prio:%d", td->td_priority);
 	SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);
 	tdq = TDQ_CPU(td_get_sched(td)->ts_cpu);
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 	KASSERT(TD_ON_RUNQ(td),
 	    ("sched_rem: thread not on run queue"));
 	tdq_runq_rem(tdq, td);
 	tdq_load_rem(tdq, td);
 	TD_SET_CAN_RUN(td);
 	if (td->td_priority == tdq->tdq_lowpri)
 		tdq_setlowpri(tdq, NULL);
 }
 
 /*
  * Fetch cpu utilization information.  Updates on demand.
  */
 fixpt_t
 sched_pctcpu(struct thread *td)
 {
 	fixpt_t pctcpu;
 	struct td_sched *ts;
 
 	pctcpu = 0;
 	ts = td_get_sched(td);
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	sched_pctcpu_update(ts, TD_IS_RUNNING(td));
 	if (ts->ts_ticks) {
 		int rtick;
 
 		/* How many rtick per second ? */
 		rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz);
 		pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT;
 	}
 
 	return (pctcpu);
 }
 
 /*
  * Enforce affinity settings for a thread.  Called after adjustments to
  * cpumask.
  */
 void
 sched_affinity(struct thread *td)
 {
 #ifdef SMP
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	if (THREAD_CAN_SCHED(td, ts->ts_cpu))
 		return;
 	if (TD_ON_RUNQ(td)) {
 		sched_rem(td);
 		sched_add(td, SRQ_BORING);
 		return;
 	}
 	if (!TD_IS_RUNNING(td))
 		return;
 	/*
 	 * Force a switch before returning to userspace.  If the
 	 * target thread is not running locally send an ipi to force
 	 * the issue.
 	 */
 	td->td_flags |= TDF_NEEDRESCHED;
 	if (td != curthread)
 		ipi_cpu(ts->ts_cpu, IPI_PREEMPT);
 #endif
 }
 
 /*
  * Bind a thread to a target cpu.
  */
 void
 sched_bind(struct thread *td, int cpu)
 {
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED);
 	KASSERT(td == curthread, ("sched_bind: can only bind curthread"));
 	ts = td_get_sched(td);
 	if (ts->ts_flags & TSF_BOUND)
 		sched_unbind(td);
 	KASSERT(THREAD_CAN_MIGRATE(td), ("%p must be migratable", td));
 	ts->ts_flags |= TSF_BOUND;
 	sched_pin();
 	if (PCPU_GET(cpuid) == cpu)
 		return;
 	ts->ts_cpu = cpu;
 	/* When we return from mi_switch we'll be on the correct cpu. */
 	mi_switch(SW_VOL, NULL);
 }
 
 /*
  * Release a bound thread.
  */
 void
 sched_unbind(struct thread *td)
 {
 	struct td_sched *ts;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td == curthread, ("sched_unbind: can only bind curthread"));
 	ts = td_get_sched(td);
 	if ((ts->ts_flags & TSF_BOUND) == 0)
 		return;
 	ts->ts_flags &= ~TSF_BOUND;
 	sched_unpin();
 }
 
 int
 sched_is_bound(struct thread *td)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	return (td_get_sched(td)->ts_flags & TSF_BOUND);
 }
 
 /*
  * Basic yield call.
  */
 void
 sched_relinquish(struct thread *td)
 {
 	thread_lock(td);
 	mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 	thread_unlock(td);
 }
 
 /*
  * Return the total system load.
  */
 int
 sched_load(void)
 {
 #ifdef SMP
 	int total;
 	int i;
 
 	total = 0;
 	CPU_FOREACH(i)
 		total += TDQ_CPU(i)->tdq_sysload;
 	return (total);
 #else
 	return (TDQ_SELF()->tdq_sysload);
 #endif
 }
 
 int
 sched_sizeof_proc(void)
 {
 	return (sizeof(struct proc));
 }
 
 int
 sched_sizeof_thread(void)
 {
 	return (sizeof(struct thread) + sizeof(struct td_sched));
 }
 
 #ifdef SMP
 #define	TDQ_IDLESPIN(tdq)						\
     ((tdq)->tdq_cg != NULL && ((tdq)->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0)
 #else
 #define	TDQ_IDLESPIN(tdq)	1
 #endif
 
 /*
  * The actual idle process.
  */
 void
 sched_idletd(void *dummy)
 {
 	struct thread *td;
 	struct tdq *tdq;
 	int oldswitchcnt, switchcnt;
 	int i;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	td = curthread;
 	tdq = TDQ_SELF();
 	THREAD_NO_SLEEPING();
 	oldswitchcnt = -1;
 	for (;;) {
 		if (tdq->tdq_load) {
 			thread_lock(td);
 			mi_switch(SW_VOL | SWT_IDLE, NULL);
 			thread_unlock(td);
 		}
 		switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
 #ifdef SMP
 		if (always_steal || switchcnt != oldswitchcnt) {
 			oldswitchcnt = switchcnt;
 			if (tdq_idled(tdq) == 0)
 				continue;
 		}
 		switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
 #else
 		oldswitchcnt = switchcnt;
 #endif
 		/*
 		 * If we're switching very frequently, spin while checking
 		 * for load rather than entering a low power state that 
 		 * may require an IPI.  However, don't do any busy
 		 * loops while on SMT machines as this simply steals
 		 * cycles from cores doing useful work.
 		 */
 		if (TDQ_IDLESPIN(tdq) && switchcnt > sched_idlespinthresh) {
 			for (i = 0; i < sched_idlespins; i++) {
 				if (tdq->tdq_load)
 					break;
 				cpu_spinwait();
 			}
 		}
 
 		/* If there was context switch during spin, restart it. */
 		switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
 		if (tdq->tdq_load != 0 || switchcnt != oldswitchcnt)
 			continue;
 
 		/* Run main MD idle handler. */
 		tdq->tdq_cpu_idle = 1;
 		/*
 		 * Make sure that tdq_cpu_idle update is globally visible
 		 * before cpu_idle() read tdq_load.  The order is important
 		 * to avoid race with tdq_notify.
 		 */
 		atomic_thread_fence_seq_cst();
 		/*
 		 * Checking for again after the fence picks up assigned
 		 * threads often enough to make it worthwhile to do so in
 		 * order to avoid calling cpu_idle().
 		 */
 		if (tdq->tdq_load != 0) {
 			tdq->tdq_cpu_idle = 0;
 			continue;
 		}
 		cpu_idle(switchcnt * 4 > sched_idlespinthresh);
 		tdq->tdq_cpu_idle = 0;
 
 		/*
 		 * Account thread-less hardware interrupts and
 		 * other wakeup reasons equal to context switches.
 		 */
 		switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
 		if (switchcnt != oldswitchcnt)
 			continue;
 		tdq->tdq_switchcnt++;
 		oldswitchcnt++;
 	}
 }
 
 /*
  * A CPU is entering for the first time or a thread is exiting.
  */
 void
 sched_throw(struct thread *td)
 {
 	struct thread *newtd;
 	struct tdq *tdq;
 
 	if (td == NULL) {
 #ifdef SMP
 		PCPU_SET(sched, DPCPU_PTR(tdq));
 #endif
 		/* Correct spinlock nesting and acquire the correct lock. */
 		tdq = TDQ_SELF();
 		TDQ_LOCK(tdq);
 		spinlock_exit();
 		PCPU_SET(switchtime, cpu_ticks());
 		PCPU_SET(switchticks, ticks);
 		PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(tdq);
 	} else {
 		tdq = TDQ_SELF();
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 		tdq_load_rem(tdq, td);
 		lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
 		td->td_lastcpu = td->td_oncpu;
 		td->td_oncpu = NOCPU;
 	}
 	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
 	newtd = choosethread();
 	TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
 	cpu_throw(td, newtd);		/* doesn't return */
 }
 
 /*
  * This is called from fork_exit().  Just acquire the correct locks and
  * let fork do the rest of the work.
  */
 void
 sched_fork_exit(struct thread *td)
 {
 	struct tdq *tdq;
 	int cpuid;
 
 	/*
 	 * Finish setting up thread glue so that it begins execution in a
 	 * non-nested critical section with the scheduler lock held.
 	 */
 	cpuid = PCPU_GET(cpuid);
 	tdq = TDQ_SELF();
 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 	td->td_oncpu = cpuid;
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
 	lock_profile_obtain_lock_success(
 	    &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
 	SDT_PROBE0(sched, , , on__cpu);
 }
 
 /*
  * Create on first use to catch odd startup conditons.
  */
 char *
 sched_tdname(struct thread *td)
 {
 #ifdef KTR
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	if (ts->ts_name[0] == '\0')
 		snprintf(ts->ts_name, sizeof(ts->ts_name),
 		    "%s tid %d", td->td_name, td->td_tid);
 	return (ts->ts_name);
 #else
 	return (td->td_name);
 #endif
 }
 
 #ifdef KTR
 void
 sched_clear_tdname(struct thread *td)
 {
 	struct td_sched *ts;
 
 	ts = td_get_sched(td);
 	ts->ts_name[0] = '\0';
 }
 #endif
 
 #ifdef SMP
 
 /*
  * Build the CPU topology dump string. Is recursively called to collect
  * the topology tree.
  */
 static int
 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
     int indent)
 {
 	char cpusetbuf[CPUSETBUFSIZ];
 	int i, first;
 
 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
 	    "", 1 + indent / 2, cg->cg_level);
 	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
 	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
 	first = TRUE;
 	for (i = 0; i < MAXCPU; i++) {
 		if (CPU_ISSET(i, &cg->cg_mask)) {
 			if (!first)
 				sbuf_printf(sb, ", ");
 			else
 				first = FALSE;
 			sbuf_printf(sb, "%d", i);
 		}
 	}
 	sbuf_printf(sb, "</cpu>\n");
 
 	if (cg->cg_flags != 0) {
 		sbuf_printf(sb, "%*s <flags>", indent, "");
 		if ((cg->cg_flags & CG_FLAG_HTT) != 0)
 			sbuf_printf(sb, "<flag name=\"HTT\">HTT group</flag>");
 		if ((cg->cg_flags & CG_FLAG_THREAD) != 0)
 			sbuf_printf(sb, "<flag name=\"THREAD\">THREAD group</flag>");
 		if ((cg->cg_flags & CG_FLAG_SMT) != 0)
 			sbuf_printf(sb, "<flag name=\"SMT\">SMT group</flag>");
 		sbuf_printf(sb, "</flags>\n");
 	}
 
 	if (cg->cg_children > 0) {
 		sbuf_printf(sb, "%*s <children>\n", indent, "");
 		for (i = 0; i < cg->cg_children; i++)
 			sysctl_kern_sched_topology_spec_internal(sb, 
 			    &cg->cg_child[i], indent+2);
 		sbuf_printf(sb, "%*s </children>\n", indent, "");
 	}
 	sbuf_printf(sb, "%*s</group>\n", indent, "");
 	return (0);
 }
 
 /*
  * Sysctl handler for retrieving topology dump. It's a wrapper for
  * the recursive sysctl_kern_smp_topology_spec_internal().
  */
 static int
 sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf *topo;
 	int err;
 
 	KASSERT(cpu_top != NULL, ("cpu_top isn't initialized"));
 
 	topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
 	if (topo == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(topo, "<groups>\n");
 	err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
 	sbuf_printf(topo, "</groups>\n");
 
 	if (err == 0) {
 		err = sbuf_finish(topo);
 	}
 	sbuf_delete(topo);
 	return (err);
 }
 
 #endif
 
 static int
 sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
 {
 	int error, new_val, period;
 
 	period = 1000000 / realstathz;
 	new_val = period * sched_slice;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (new_val <= 0)
 		return (EINVAL);
 	sched_slice = imax(1, (new_val + period / 2) / period);
 	sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR;
 	hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
 	    realstathz);
 	return (0);
 }
 
 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler");
 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ULE", 0,
     "Scheduler name");
 SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, sysctl_kern_quantum, "I",
     "Quantum for timeshare threads in microseconds");
 SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
     "Quantum for timeshare threads in stathz ticks");
 SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0,
     "Interactivity score threshold");
 SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW,
     &preempt_thresh, 0,
     "Maximal (lowest) priority for preemption");
 SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RW, &static_boost, 0,
     "Assign static kernel priorities to sleeping threads");
 SYSCTL_INT(_kern_sched, OID_AUTO, idlespins, CTLFLAG_RW, &sched_idlespins, 0,
     "Number of times idle thread will spin waiting for new work");
 SYSCTL_INT(_kern_sched, OID_AUTO, idlespinthresh, CTLFLAG_RW,
     &sched_idlespinthresh, 0,
     "Threshold before we will permit idle thread spinning");
 #ifdef SMP
 SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
     "Number of hz ticks to keep thread affinity for");
 SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0,
     "Enables the long-term load balancer");
 SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW,
     &balance_interval, 0,
     "Average period in stathz ticks to run the long-term balancer");
 SYSCTL_INT(_kern_sched, OID_AUTO, steal_idle, CTLFLAG_RW, &steal_idle, 0,
     "Attempts to steal work from other cores before idling");
 SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RW, &steal_thresh, 0,
     "Minimum load on remote CPU before we'll steal");
 SYSCTL_INT(_kern_sched, OID_AUTO, trysteal_limit, CTLFLAG_RW, &trysteal_limit,
     0, "Topological distance limit for stealing threads in sched_switch()");
 SYSCTL_INT(_kern_sched, OID_AUTO, always_steal, CTLFLAG_RW, &always_steal, 0,
     "Always run the stealer from the idle thread");
 SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
     CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A",
     "XML dump of detected CPU topology");
 #endif
 
 /* ps compat.  All cpu percentages from ULE are weighted. */
 static int ccpu = 0;
 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
Index: head/sys/kern/subr_kdb.c
===================================================================
--- head/sys/kern/subr_kdb.c	(revision 355708)
+++ head/sys/kern/subr_kdb.c	(revision 355709)
@@ -1,715 +1,715 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kdb.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 
 #include <machine/kdb.h>
 #include <machine/pcb.h>
 
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 u_char __read_frequently kdb_active = 0;
 static void *kdb_jmpbufp = NULL;
 struct kdb_dbbe *kdb_dbbe = NULL;
 static struct pcb kdb_pcb;
 struct pcb *kdb_thrctx = NULL;
 struct thread *kdb_thread = NULL;
 struct trapframe *kdb_frame = NULL;
 
 #ifdef BREAK_TO_DEBUGGER
 #define	KDB_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_BREAK_TO_DEBUGGER	0
 #endif
 
 #ifdef ALT_BREAK_TO_DEBUGGER
 #define	KDB_ALT_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_ALT_BREAK_TO_DEBUGGER	0
 #endif
 
 static int	kdb_break_to_debugger = KDB_BREAK_TO_DEBUGGER;
 static int	kdb_alt_break_to_debugger = KDB_ALT_BREAK_TO_DEBUGGER;
 
 KDB_BACKEND(null, NULL, NULL, NULL, NULL);
 
 static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_debug, OID_AUTO, kdb, CTLFLAG_RW, NULL, "KDB nodes");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD, NULL,
     0, kdb_sysctl_available, "A", "list of available KDB backends");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, current, CTLTYPE_STRING | CTLFLAG_RW, NULL,
     0, kdb_sysctl_current, "A", "currently selected KDB backend");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, enter,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kdb_sysctl_enter, "I", "set to enter the debugger");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, panic,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kdb_sysctl_panic, "I", "set to panic the kernel");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kdb_sysctl_trap, "I", "set to cause a page fault via data access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kdb_sysctl_trap_code, "I", "set to cause a page fault via code access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, stack_overflow,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kdb_sysctl_stack_overflow, "I", "set to cause a stack overflow");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &kdb_break_to_debugger, 0, "Enable break to debugger");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &kdb_alt_break_to_debugger, 0, "Enable alternative break to debugger");
 
 /*
  * Flag to indicate to debuggers why the debugger was entered.
  */
 const char * volatile kdb_why = KDB_WHY_UNSET;
 
 static int
 kdb_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	struct kdb_dbbe **iter;
 	struct sbuf sbuf;
 	int error;
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 64, req);
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		if ((*iter)->dbbe_active == 0)
 			sbuf_printf(&sbuf, "%s ", (*iter)->dbbe_name);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 static int
 kdb_sysctl_current(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 
 	if (kdb_dbbe != NULL)
 		strlcpy(buf, kdb_dbbe->dbbe_name, sizeof(buf));
 	else
 		*buf = '\0';
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	return (kdb_dbbe_select(buf));
 }
 
 static int
 kdb_sysctl_enter(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	kdb_enter(KDB_WHY_SYSCTL, "sysctl debug.kdb.enter");
 	return (0);
 }
 
 static int
 kdb_sysctl_panic(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	panic("kdb_sysctl_panic");
 	return (0);
 }
 
 static int
 kdb_sysctl_trap(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	int *addr = (int *)0x10;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	return (*addr);
 }
 
 static int
 kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	void (*fp)(u_int, u_int, u_int) = (void *)0xdeadc0de;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	(*fp)(0x11111111, 0x22222222, 0x33333333);
 	return (0);
 }
 
 static void kdb_stack_overflow(volatile int *x)  __noinline;
 static void
 kdb_stack_overflow(volatile int *x)
 {
 
 	if (*x > 10000000)
 		return;
 	kdb_stack_overflow(x);
 	*x += PCPU_GET(cpuid) / 1000000;
 }
 
 static int
 kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	volatile int x;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	x = 0;
 	kdb_stack_overflow(&x);
 	return (0);
 }
 
 
 void
 kdb_panic(const char *msg)
 {
 
 	printf("KDB: panic\n");
 	panic("%s", msg);
 }
 
 void
 kdb_reboot(void)
 {
 
 	printf("KDB: reboot requested\n");
 	shutdown_nice(0);
 }
 
 /*
  * Solaris implements a new BREAK which is initiated by a character sequence
  * CR ~ ^b which is similar to a familiar pattern used on Sun servers by the
  * Remote Console.
  *
  * Note that this function may be called from almost anywhere, with interrupts
  * disabled and with unknown locks held, so it must not access data other than
  * its arguments.  Its up to the caller to ensure that the state variable is
  * consistent.
  */
 #define	KEY_CR		13	/* CR '\r' */
 #define	KEY_TILDE	126	/* ~ */
 #define	KEY_CRTLB	2	/* ^B */
 #define	KEY_CRTLP	16	/* ^P */
 #define	KEY_CRTLR	18	/* ^R */
 
 /* States of th KDB "alternate break sequence" detecting state machine. */
 enum {
 	KDB_ALT_BREAK_SEEN_NONE,
 	KDB_ALT_BREAK_SEEN_CR,
 	KDB_ALT_BREAK_SEEN_CR_TILDE,
 };
 
 int
 kdb_break(void)
 {
 
 	if (!kdb_break_to_debugger)
 		return (0);
 	kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 	return (KDB_REQ_DEBUGGER);
 }
 
 static int
 kdb_alt_break_state(int key, int *state)
 {
 	int brk;
 
 	/* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */
 	if (key == KEY_CR) {
 		*state = KDB_ALT_BREAK_SEEN_CR;
 		return (0);
 	}
 
 	brk = 0;
 	switch (*state) {
 	case KDB_ALT_BREAK_SEEN_CR:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_TILDE)
 			*state = KDB_ALT_BREAK_SEEN_CR_TILDE;
 		break;
 	case KDB_ALT_BREAK_SEEN_CR_TILDE:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_CRTLB)
 			brk = KDB_REQ_DEBUGGER;
 		else if (key == KEY_CRTLP)
 			brk = KDB_REQ_PANIC;
 		else if (key == KEY_CRTLR)
 			brk = KDB_REQ_REBOOT;
 		break;
 	case KDB_ALT_BREAK_SEEN_NONE:
 	default:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		break;
 	}
 	return (brk);
 }
 
 static int
 kdb_alt_break_internal(int key, int *state, int force_gdb)
 {
 	int brk;
 
 	if (!kdb_alt_break_to_debugger)
 		return (0);
 	brk = kdb_alt_break_state(key, state);
 	switch (brk) {
 	case KDB_REQ_DEBUGGER:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 		break;
 
 	case KDB_REQ_PANIC:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_panic("Panic sequence on console");
 		break;
 
 	case KDB_REQ_REBOOT:
 		kdb_reboot();
 		break;
 	}
 	return (0);
 }
 
 int
 kdb_alt_break(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 0));
 }
 
 /*
  * This variation on kdb_alt_break() is used only by dcons, which has its own
  * configuration flag to force GDB use regardless of the global KDB
  * configuration.
  */
 int
 kdb_alt_break_gdb(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 1));
 }
 
 /*
  * Print a backtrace of the calling thread. The backtrace is generated by
  * the selected debugger, provided it supports backtraces. If no debugger
  * is selected or the current debugger does not support backtraces, this
  * function silently returns.
  */
 void
 kdb_backtrace(void)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace != NULL) {
 		printf("KDB: stack backtrace:\n");
 		kdb_dbbe->dbbe_trace();
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace:\n");
 		stack_zero(&st);
 		stack_save(&st);
 		stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Similar to kdb_backtrace() except that it prints a backtrace of an
  * arbitrary thread rather than the calling thread.
  */
 void
 kdb_backtrace_thread(struct thread *td)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace_thread != NULL) {
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		kdb_dbbe->dbbe_trace_thread(td);
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		stack_zero(&st);
 		stack_save_td(&st, td);
 		stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Set/change the current backend.
  */
 int
 kdb_dbbe_select(const char *name)
 {
 	struct kdb_dbbe *be, **iter;
 
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		if (be->dbbe_active == 0 && strcmp(be->dbbe_name, name) == 0) {
 			kdb_dbbe = be;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 /*
  * Enter the currently selected debugger. If a message has been provided,
  * it is printed first. If the debugger does not support the enter method,
  * it is entered by using breakpoint(), which enters the debugger through
  * kdb_trap().  The 'why' argument will contain a more mechanically usable
  * string than 'msg', and is relied upon by DDB scripting to identify the
  * reason for entering the debugger so that the right script can be run.
  */
 void
 kdb_enter(const char *why, const char *msg)
 {
 
 	if (kdb_dbbe != NULL && kdb_active == 0) {
 		if (msg != NULL)
 			printf("KDB: enter: %s\n", msg);
 		kdb_why = why;
 		breakpoint();
 		kdb_why = KDB_WHY_UNSET;
 	}
 }
 
 /*
  * Initialize the kernel debugger interface.
  */
 void
 kdb_init(void)
 {
 	struct kdb_dbbe *be, **iter;
 	int cur_pri, pri;
 
 	kdb_active = 0;
 	kdb_dbbe = NULL;
 	cur_pri = -1;
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		pri = (be->dbbe_init != NULL) ? be->dbbe_init() : -1;
 		be->dbbe_active = (pri >= 0) ? 0 : -1;
 		if (pri > cur_pri) {
 			cur_pri = pri;
 			kdb_dbbe = be;
 		}
 	}
 	if (kdb_dbbe != NULL) {
 		printf("KDB: debugger backends:");
 		SET_FOREACH(iter, kdb_dbbe_set) {
 			be = *iter;
 			if (be->dbbe_active == 0)
 				printf(" %s", be->dbbe_name);
 		}
 		printf("\n");
 		printf("KDB: current backend: %s\n",
 		    kdb_dbbe->dbbe_name);
 	}
 }
 
 /*
  * Handle contexts.
  */
 void *
 kdb_jmpbuf(jmp_buf new)
 {
 	void *old;
 
 	old = kdb_jmpbufp;
 	kdb_jmpbufp = new;
 	return (old);
 }
 
 void
 kdb_reenter(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	printf("KDB: reentering\n");
 	kdb_backtrace();
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 void
 kdb_reenter_silent(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 /*
  * Thread-related support functions.
  */
 struct pcb *
 kdb_thr_ctx(struct thread *thr)
 {
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	struct pcpu *pc;
 #endif
 
 	if (thr == curthread)
 		return (&kdb_pcb);
 
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)  {
 		if (pc->pc_curthread == thr &&
 		    CPU_ISSET(pc->pc_cpuid, &stopped_cpus))
 			return (KDB_STOPPEDPCB(pc));
 	}
 #endif
 	return (thr->td_pcb);
 }
 
 struct thread *
 kdb_thr_first(void)
 {
 	struct proc *p;
 	struct thread *thr;
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_flag & P_INMEM) {
 			thr = FIRST_THREAD_IN_PROC(p);
 			if (thr != NULL)
 				return (thr);
 		}
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_from_pid(pid_t pid)
 {
 	struct proc *p;
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_flag & P_INMEM && p->p_pid == pid)
 			return (FIRST_THREAD_IN_PROC(p));
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_lookup(lwpid_t tid)
 {
 	struct thread *thr;
 
 	thr = kdb_thr_first();
 	while (thr != NULL && thr->td_tid != tid)
 		thr = kdb_thr_next(thr);
 	return (thr);
 }
 
 struct thread *
 kdb_thr_next(struct thread *thr)
 {
 	struct proc *p;
 
 	p = thr->td_proc;
 	thr = TAILQ_NEXT(thr, td_plist);
 	do {
 		if (thr != NULL)
 			return (thr);
 		p = LIST_NEXT(p, p_list);
 		if (p != NULL && (p->p_flag & P_INMEM))
 			thr = FIRST_THREAD_IN_PROC(p);
 	} while (p != NULL);
 	return (NULL);
 }
 
 int
 kdb_thr_select(struct thread *thr)
 {
 	if (thr == NULL)
 		return (EINVAL);
 	kdb_thread = thr;
 	kdb_thrctx = kdb_thr_ctx(thr);
 	return (0);
 }
 
 /*
  * Enter the debugger due to a trap.
  */
 int
 kdb_trap(int type, int code, struct trapframe *tf)
 {
 #ifdef SMP
 	cpuset_t other_cpus;
 #endif
 	struct kdb_dbbe *be;
 	register_t intr;
 	int handled;
 	int did_stop_cpus;
 
 	be = kdb_dbbe;
 	if (be == NULL || be->dbbe_trap == NULL)
 		return (0);
 
 	/* We reenter the debugger through kdb_reenter(). */
 	if (kdb_active)
 		return (0);
 
 	intr = intr_disable();
 
 	if (!SCHEDULER_STOPPED()) {
 #ifdef SMP
 		other_cpus = all_cpus;
-		CPU_NAND(&other_cpus, &stopped_cpus);
+		CPU_ANDNOT(&other_cpus, &stopped_cpus);
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 #endif
 		curthread->td_stopsched = 1;
 		did_stop_cpus = 1;
 	} else
 		did_stop_cpus = 0;
 
 	kdb_active++;
 
 	kdb_frame = tf;
 
 	/* Let MD code do its thing first... */
 	kdb_cpu_trap(type, code);
 
 	makectx(tf, &kdb_pcb);
 	kdb_thr_select(curthread);
 
 	cngrab();
 
 	for (;;) {
 		handled = be->dbbe_trap(type, code);
 		if (be == kdb_dbbe)
 			break;
 		be = kdb_dbbe;
 		if (be == NULL || be->dbbe_trap == NULL)
 			break;
 		printf("Switching to %s back-end\n", be->dbbe_name);
 	}
 
 	cnungrab();
 
 	kdb_active--;
 
 	if (did_stop_cpus) {
 		curthread->td_stopsched = 0;
 #ifdef SMP
 		CPU_AND(&other_cpus, &stopped_cpus);
 		restart_cpus(other_cpus);
 #endif
 	}
 
 	intr_restore(intr);
 
 	return (handled);
 }
Index: head/sys/sparc64/sparc64/mp_machdep.c
===================================================================
--- head/sys/sparc64/sparc64/mp_machdep.c	(revision 355708)
+++ head/sys/sparc64/sparc64/mp_machdep.c	(revision 355709)
@@ -1,810 +1,810 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from BSDI: locore.s,v 1.36.2.15 1999/08/23 22:34:41 cp Exp
  */
 /*-
  * Copyright (c) 2002 Jake Burkholder.
  * Copyright (c) 2007 - 2010 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <dev/ofw/openfirm.h>
 
 #include <machine/asi.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/ofw_machdep.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <machine/tick.h>
 #include <machine/tlb.h>
 #include <machine/tsb.h>
 #include <machine/tte.h>
 #include <machine/ver.h>
 
 #define	SUNW_STARTCPU		"SUNW,start-cpu"
 #define	SUNW_STOPSELF		"SUNW,stop-self"
 
 static ih_func_t cpu_ipi_ast;
 static ih_func_t cpu_ipi_hardclock;
 static ih_func_t cpu_ipi_preempt;
 static ih_func_t cpu_ipi_stop;
 
 /*
  * Argument area used to pass data to non-boot processors as they start up.
  * This must be statically initialized with a known invalid CPU module ID,
  * since the other processors will use it before the boot CPU enters the
  * kernel.
  */
 struct	cpu_start_args cpu_start_args = { 0, -1, -1, 0, 0, 0 };
 struct	ipi_cache_args ipi_cache_args;
 struct	ipi_rd_args ipi_rd_args;
 struct	ipi_tlb_args ipi_tlb_args;
 struct	pcb stoppcbs[MAXCPU];
 
 struct	mtx ipi_mtx;
 
 cpu_ipi_selected_t *cpu_ipi_selected;
 cpu_ipi_single_t *cpu_ipi_single;
 
 static u_int cpuid_to_mid[MAXCPU];
 static u_int cpuids = 1;
 static volatile cpuset_t shutdown_cpus;
 static char ipi_pbuf[CPUSETBUFSIZ];
 static vm_offset_t mp_tramp;
 
 static void ap_count(phandle_t node, u_int mid, u_int cpu_impl);
 static void ap_start(phandle_t node, u_int mid, u_int cpu_impl);
 static void cpu_mp_unleash(void *v);
 static void foreach_ap(phandle_t node, void (*func)(phandle_t node,
     u_int mid, u_int cpu_impl));
 static void sun4u_startcpu(phandle_t cpu, void *func, u_long arg);
 
 static cpu_ipi_selected_t cheetah_ipi_selected;
 static cpu_ipi_single_t cheetah_ipi_single;
 static cpu_ipi_selected_t jalapeno_ipi_selected;
 static cpu_ipi_single_t jalapeno_ipi_single;
 static cpu_ipi_selected_t spitfire_ipi_selected;
 static cpu_ipi_single_t spitfire_ipi_single;
 
 SYSINIT(cpu_mp_unleash, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
 
 void
 mp_init(void)
 {
 	struct tte *tp;
 	int i;
 
 	mp_tramp = (vm_offset_t)OF_claim(NULL, PAGE_SIZE, PAGE_SIZE);
 	if (mp_tramp == (vm_offset_t)-1)
 		panic("%s", __func__);
 	bcopy(mp_tramp_code, (void *)mp_tramp, mp_tramp_code_len);
 	*(vm_offset_t *)(mp_tramp + mp_tramp_tlb_slots) = kernel_tlb_slots;
 	*(vm_offset_t *)(mp_tramp + mp_tramp_func) = (vm_offset_t)mp_startup;
 	tp = (struct tte *)(mp_tramp + mp_tramp_code_len);
 	for (i = 0; i < kernel_tlb_slots; i++) {
 		tp[i].tte_vpn = TV_VPN(kernel_tlbs[i].te_va, TS_4M);
 		tp[i].tte_data = TD_V | TD_4M | TD_PA(kernel_tlbs[i].te_pa) |
 		    TD_L | TD_CP | TD_CV | TD_P | TD_W;
 	}
 	for (i = 0; i < PAGE_SIZE; i += sizeof(vm_offset_t))
 		flush(mp_tramp + i);
 }
 
 static void
 foreach_ap(phandle_t node, void (*func)(phandle_t node, u_int mid,
     u_int cpu_impl))
 {
 	static char type[sizeof("cpu")];
 	phandle_t child;
 	uint32_t cpu_impl, portid;
 
 	/* There's no need to traverse the whole OFW tree twice. */
 	if (mp_maxid > 0 && cpuids > mp_maxid)
 		return;
 
 	for (; node != 0; node = OF_peer(node)) {
 		child = OF_child(node);
 		if (child > 0)
 			foreach_ap(child, func);
 		else {
 			if (OF_getprop(node, "device_type", type,
 			    sizeof(type)) <= 0)
 				continue;
 			if (strcmp(type, "cpu") != 0)
 				continue;
 			if (OF_getprop(node, "implementation#", &cpu_impl,
 			    sizeof(cpu_impl)) <= 0)
 				panic("%s: couldn't determine CPU "
 				    "implementation", __func__);
 			if (OF_getprop(node, cpu_portid_prop(cpu_impl),
 			    &portid, sizeof(portid)) <= 0)
 				panic("%s: couldn't determine CPU port ID",
 				    __func__);
 			if (portid == PCPU_GET(mid))
 				continue;
 			(*func)(node, portid, cpu_impl);
 		}
 	}
 }
 
 /*
  * Probe for other CPUs.
  */
 void
 cpu_mp_setmaxid(void)
 {
 
 	CPU_SETOF(curcpu, &all_cpus);
 	mp_ncpus = 1;
 
 	foreach_ap(OF_child(OF_peer(0)), ap_count);
 	mp_ncpus = MIN(mp_ncpus, MAXCPU);
 	mp_maxid = mp_ncpus - 1;
 }
 
 static void
 ap_count(phandle_t node __unused, u_int mid __unused, u_int cpu_impl __unused)
 {
 
 	mp_ncpus++;
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	return (mp_maxid > 0);
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 static void
 sun4u_startcpu(phandle_t cpu, void *func, u_long arg)
 {
 	static struct {
 		cell_t	name;
 		cell_t	nargs;
 		cell_t	nreturns;
 		cell_t	cpu;
 		cell_t	func;
 		cell_t	arg;
 	} args = {
 		(cell_t)SUNW_STARTCPU,
 		3,
 	};
 
 	args.cpu = cpu;
 	args.func = (cell_t)func;
 	args.arg = (cell_t)arg;
 	ofw_entry(&args);
 }
 
 /*
  * Fire up any non-boot processors.
  */
 void
 cpu_mp_start(void)
 {
 	u_int cpu_impl, isjbus;
 
 	mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN);
 
 	isjbus = 0;
 	cpu_impl = PCPU_GET(impl);
 	if (cpu_impl == CPU_IMPL_ULTRASPARCIIIi ||
 	    cpu_impl == CPU_IMPL_ULTRASPARCIIIip) {
 		isjbus = 1;
 		cpu_ipi_selected = jalapeno_ipi_selected;
 		cpu_ipi_single = jalapeno_ipi_single;
 	} else if (cpu_impl == CPU_IMPL_SPARC64V ||
 	    cpu_impl >= CPU_IMPL_ULTRASPARCIII) {
 		cpu_ipi_selected = cheetah_ipi_selected;
 		cpu_ipi_single = cheetah_ipi_single;
 	} else {
 		cpu_ipi_selected = spitfire_ipi_selected;
 		cpu_ipi_single = spitfire_ipi_single;
 	}
 
 	intr_setup(PIL_AST, cpu_ipi_ast, -1, NULL, NULL);
 	intr_setup(PIL_RENDEZVOUS, (ih_func_t *)smp_rendezvous_action,
 	    -1, NULL, NULL);
 	intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL);
 	intr_setup(PIL_PREEMPT, cpu_ipi_preempt, -1, NULL, NULL);
 	intr_setup(PIL_HARDCLOCK, cpu_ipi_hardclock, -1, NULL, NULL);
 
 	cpuid_to_mid[curcpu] = PCPU_GET(mid);
 
 	foreach_ap(OF_child(OF_peer(0)), ap_start);
 	KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS,
 	    ("%s: can only IPI a maximum of %d JBus-CPUs",
 	    __func__, IDR_JALAPENO_MAX_BN_PAIRS));
 }
 
 static void
 ap_start(phandle_t node, u_int mid, u_int cpu_impl)
 {
 	volatile struct cpu_start_args *csa;
 	struct pcpu *pc;
 	register_t s;
 	vm_offset_t va;
 	u_int cpuid;
 	uint32_t clock;
 
 	if (cpuids > mp_maxid)
 		return;
 
 	if (OF_getprop(node, "clock-frequency", &clock, sizeof(clock)) <= 0)
 		panic("%s: couldn't determine CPU frequency", __func__);
 	if (clock != PCPU_GET(clock))
 		tick_et_use_stick = 1;
 
 	csa = &cpu_start_args;
 	csa->csa_state = 0;
 	sun4u_startcpu(node, (void *)mp_tramp, 0);
 	s = intr_disable();
 	while (csa->csa_state != CPU_TICKSYNC)
 		;
 	membar(StoreLoad);
 	csa->csa_tick = rd(tick);
 	if (cpu_impl == CPU_IMPL_SPARC64V ||
 	    cpu_impl >= CPU_IMPL_ULTRASPARCIII) {
 		while (csa->csa_state != CPU_STICKSYNC)
 			;
 		membar(StoreLoad);
 		csa->csa_stick = rdstick();
 	}
 	while (csa->csa_state != CPU_INIT)
 		;
 	csa->csa_tick = csa->csa_stick = 0;
 	intr_restore(s);
 
 	cpuid = cpuids++;
 	cpuid_to_mid[cpuid] = mid;
 	cpu_identify(csa->csa_ver, clock, cpuid);
 
 	va = kmem_malloc(PCPU_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO);
 	pc = (struct pcpu *)(va + (PCPU_PAGES * PAGE_SIZE)) - 1;
 	pcpu_init(pc, cpuid, sizeof(*pc));
 	dpcpu_init((void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO), cpuid);
 	pc->pc_addr = va;
 	pc->pc_clock = clock;
 	pc->pc_impl = cpu_impl;
 	pc->pc_mid = mid;
 	pc->pc_node = node;
 
 	cache_init(pc);
 
 	CPU_SET(cpuid, &all_cpus);
 	intr_add_cpu(cpuid);
 }
 
 void
 cpu_mp_announce(void)
 {
 
 }
 
 static void
 cpu_mp_unleash(void *v __unused)
 {
 	volatile struct cpu_start_args *csa;
 	struct pcpu *pc;
 	register_t s;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	u_int ctx_inc;
 	u_int ctx_min;
 	int i;
 
 	ctx_min = TLB_CTX_USER_MIN;
 	ctx_inc = (TLB_CTX_USER_MAX - 1) / mp_ncpus;
 	csa = &cpu_start_args;
 	csa->csa_count = mp_ncpus;
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		pc->pc_tlb_ctx = ctx_min;
 		pc->pc_tlb_ctx_min = ctx_min;
 		pc->pc_tlb_ctx_max = ctx_min + ctx_inc;
 		ctx_min += ctx_inc;
 
 		if (pc->pc_cpuid == curcpu)
 			continue;
 		KASSERT(pc->pc_idlethread != NULL,
 		    ("%s: idlethread", __func__));
 		pc->pc_curthread = pc->pc_idlethread;
 		pc->pc_curpcb = pc->pc_curthread->td_pcb;
 		for (i = 0; i < PCPU_PAGES; i++) {
 			va = pc->pc_addr + i * PAGE_SIZE;
 			pa = pmap_kextract(va);
 			if (pa == 0)
 				panic("%s: pmap_kextract", __func__);
 			csa->csa_ttes[i].tte_vpn = TV_VPN(va, TS_8K);
 			csa->csa_ttes[i].tte_data = TD_V | TD_8K | TD_PA(pa) |
 			    TD_L | TD_CP | TD_CV | TD_P | TD_W;
 		}
 		csa->csa_state = 0;
 		csa->csa_pcpu = pc->pc_addr;
 		csa->csa_mid = pc->pc_mid;
 		s = intr_disable();
 		while (csa->csa_state != CPU_BOOTSTRAP)
 			;
 		intr_restore(s);
 	}
 
 	membar(StoreLoad);
 	csa->csa_count = 0;
 }
 
 void
 cpu_mp_bootstrap(struct pcpu *pc)
 {
 	volatile struct cpu_start_args *csa;
 
 	csa = &cpu_start_args;
 
 	/* Do CPU-specific initialization. */
 	if (pc->pc_impl >= CPU_IMPL_ULTRASPARCIII)
 		cheetah_init(pc->pc_impl);
 	else if (pc->pc_impl == CPU_IMPL_SPARC64V)
 		zeus_init(pc->pc_impl);
 
 	/*
 	 * Enable the caches.  Note that his may include applying workarounds.
 	 */
 	cache_enable(pc->pc_impl);
 
 	/*
 	 * Clear (S)TICK timer(s) (including NPT) and ensure they are stopped.
 	 */
 	tick_clear(pc->pc_impl);
 	tick_stop(pc->pc_impl);
 
 	/* Set the kernel context. */
 	pmap_set_kctx();
 
 	/* Lock the kernel TSB in the TLB if necessary. */
 	if (tsb_kernel_ldd_phys == 0)
 		pmap_map_tsb();
 
 	/*
 	 * Flush all non-locked TLB entries possibly left over by the
 	 * firmware.
 	 */
 	tlb_flush_nonlocked();
 
 	/*
 	 * Enable interrupts.
 	 * Note that the PIL we be lowered indirectly via sched_throw(NULL)
 	 * when fake spinlock held by the idle thread eventually is released.
 	 */
 	wrpr(pstate, 0, PSTATE_KERNEL);
 
 	smp_cpus++;
 	KASSERT(curthread != NULL, ("%s: curthread", __func__));
 	printf("SMP: AP CPU #%d Launched!\n", curcpu);
 
 	csa->csa_count--;
 	membar(StoreLoad);
 	csa->csa_state = CPU_BOOTSTRAP;
 	while (csa->csa_count != 0)
 		;
 
 	if (smp_cpus == mp_ncpus)
 		atomic_store_rel_int(&smp_started, 1);
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	/* Ok, now enter the scheduler. */
 	sched_throw(NULL);
 }
 
 void
 cpu_mp_shutdown(void)
 {
 	cpuset_t cpus;
 	int i;
 
 	critical_enter();
 	shutdown_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &shutdown_cpus);
 	cpus = shutdown_cpus;
 
 	/* XXX: Stop all the CPUs which aren't already. */
 	if (CPU_CMP(&stopped_cpus, &cpus)) {
 
 		/* cpus is just a flat "on" mask without curcpu. */
-		CPU_NAND(&cpus, &stopped_cpus);
+		CPU_ANDNOT(&cpus, &stopped_cpus);
 		stop_cpus(cpus);
 	}
 	i = 0;
 	while (!CPU_EMPTY(&shutdown_cpus)) {
 		if (i++ > 100000) {
 			printf("timeout shutting down CPUs.\n");
 			break;
 		}
 	}
 	critical_exit();
 }
 
 static void
 cpu_ipi_ast(struct trapframe *tf __unused)
 {
 
 }
 
 static void
 cpu_ipi_stop(struct trapframe *tf __unused)
 {
 	u_int cpuid;
 
 	CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu);
 	sched_pin();
 	savectx(&stoppcbs[curcpu]);
 	cpuid = PCPU_GET(cpuid);
 	CPU_SET_ATOMIC(cpuid, &stopped_cpus);
 	while (!CPU_ISSET(cpuid, &started_cpus)) {
 		if (CPU_ISSET(cpuid, &shutdown_cpus)) {
 			CPU_CLR_ATOMIC(cpuid, &shutdown_cpus);
 			(void)intr_disable();
 			for (;;)
 				;
 		}
 	}
 	CPU_CLR_ATOMIC(cpuid, &started_cpus);
 	CPU_CLR_ATOMIC(cpuid, &stopped_cpus);
 	sched_unpin();
 	CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu);
 }
 
 static void
 cpu_ipi_preempt(struct trapframe *tf __unused)
 {
 
 	sched_preempt(curthread);
 }
 
 static void
 cpu_ipi_hardclock(struct trapframe *tf)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = tf;
 	hardclockintr();
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 static void
 spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
 	u_int cpu;
 
 	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		spitfire_ipi_single(cpu, d0, d1, d2);
 	}
 }
 
 static void
 spitfire_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
 {
 	register_t s;
 	u_long ids;
 	u_int mid;
 	int i;
 
 	mtx_assert(&ipi_mtx, MA_OWNED);
 	KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
 
 	mid = cpuid_to_mid[cpu];
 	for (i = 0; i < IPI_RETRIES; i++) {
 		s = intr_disable();
 		stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
 		stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		stxa(AA_INTR_SEND | (mid << IDC_ITID_SHIFT),
 		    ASI_SDB_INTR_W, 0);
 		/*
 		 * Workaround for SpitFire erratum #54; do a dummy read
 		 * from a SDB internal register before the MEMBAR #Sync
 		 * for the write to ASI_SDB_INTR_W (requiring another
 		 * MEMBAR #Sync in order to make sure the write has
 		 * occurred before the load).
 		 */
 		membar(Sync);
 		(void)ldxa(AA_SDB_CNTL_HIGH, ASI_SDB_CONTROL_R);
 		membar(Sync);
 		while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
 		    IDR_BUSY) != 0)
 			;
 		intr_restore(s);
 		if ((ids & (IDR_BUSY | IDR_NACK)) == 0)
 			return;
 	}
 	if (kdb_active != 0 || panicstr != NULL)
 		printf("%s: couldn't send IPI to module 0x%u\n",
 		    __func__, mid);
 	else
 		panic("%s: couldn't send IPI to module 0x%u",
 		    __func__, mid);
 }
 
 static void
 cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
 {
 	register_t s;
 	u_long ids;
 	u_int mid;
 	int i;
 
 	mtx_assert(&ipi_mtx, MA_OWNED);
 	KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
 
 	mid = cpuid_to_mid[cpu];
 	for (i = 0; i < IPI_RETRIES; i++) {
 		s = intr_disable();
 		stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
 		stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		stxa(AA_INTR_SEND | (mid << IDC_ITID_SHIFT),
 		    ASI_SDB_INTR_W, 0);
 		membar(Sync);
 		while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
 		    IDR_BUSY) != 0)
 			;
 		intr_restore(s);
 		if ((ids & (IDR_BUSY | IDR_NACK)) == 0)
 			return;
 	}
 	if (kdb_active != 0 || panicstr != NULL)
 		printf("%s: couldn't send IPI to module 0x%u\n",
 		    __func__, mid);
 	else
 		panic("%s: couldn't send IPI to module 0x%u",
 		    __func__, mid);
 }
 
 static void
 cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
 	register_t s;
 	u_long ids;
 	u_int bnp;
 	u_int cpu;
 	int i;
 
 	mtx_assert(&ipi_mtx, MA_OWNED);
 	KASSERT(!CPU_EMPTY(&cpus), ("%s: no CPUs to IPI", __func__));
 	KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
 	    __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
 
 	ids = 0;
 	for (i = 0; i < IPI_RETRIES * smp_cpus; i++) {
 		s = intr_disable();
 		stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
 		stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		bnp = 0;
 		for (cpu = 0; cpu < smp_cpus; cpu++) {
 			if (CPU_ISSET(cpu, &cpus)) {
 				stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
 				    IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT,
 				    ASI_SDB_INTR_W, 0);
 				membar(Sync);
 				bnp++;
 				if (bnp == IDR_CHEETAH_MAX_BN_PAIRS)
 					break;
 			}
 		}
 		while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
 		    IDR_CHEETAH_ALL_BUSY) != 0)
 			;
 		intr_restore(s);
 		bnp = 0;
 		for (cpu = 0; cpu < smp_cpus; cpu++) {
 			if (CPU_ISSET(cpu, &cpus)) {
 				if ((ids & (IDR_NACK << (2 * bnp))) == 0)
 					CPU_CLR(cpu, &cpus);
 				bnp++;
 			}
 		}
 		if (CPU_EMPTY(&cpus))
 			return;
 	}
 	if (kdb_active != 0 || panicstr != NULL)
 		printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
 		    __func__, cpusetobj_strprint(ipi_pbuf, &cpus), ids);
 	else
 		panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
 		    __func__, cpusetobj_strprint(ipi_pbuf, &cpus), ids);
 }
 
 static void
 jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
 {
 	register_t s;
 	u_long ids;
 	u_int busy, busynack, mid;
 	int i;
 
 	mtx_assert(&ipi_mtx, MA_OWNED);
 	KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
 
 	mid = cpuid_to_mid[cpu];
 	busy = IDR_BUSY << (2 * mid);
 	busynack = (IDR_BUSY | IDR_NACK) << (2 * mid);
 	for (i = 0; i < IPI_RETRIES; i++) {
 		s = intr_disable();
 		stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
 		stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		stxa(AA_INTR_SEND | (mid << IDC_ITID_SHIFT),
 		    ASI_SDB_INTR_W, 0);
 		membar(Sync);
 		while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
 		    busy) != 0)
 			;
 		intr_restore(s);
 		if ((ids & busynack) == 0)
 			return;
 	}
 	if (kdb_active != 0 || panicstr != NULL)
 		printf("%s: couldn't send IPI to module 0x%u\n",
 		    __func__, mid);
 	else
 		panic("%s: couldn't send IPI to module 0x%u",
 		    __func__, mid);
 }
 
 static void
 jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
 	register_t s;
 	u_long ids;
 	u_int cpu;
 	int i;
 
 	mtx_assert(&ipi_mtx, MA_OWNED);
 	KASSERT(!CPU_EMPTY(&cpus), ("%s: no CPUs to IPI", __func__));
 	KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
 	    __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
 
 	ids = 0;
 	for (i = 0; i < IPI_RETRIES * smp_cpus; i++) {
 		s = intr_disable();
 		stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
 		stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		for (cpu = 0; cpu < smp_cpus; cpu++) {
 			if (CPU_ISSET(cpu, &cpus)) {
 				stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
 				    IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0);
 				membar(Sync);
 			}
 		}
 		while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
 		    IDR_CHEETAH_ALL_BUSY) != 0)
 			;
 		intr_restore(s);
 		if ((ids &
 		    (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
 			return;
 		for (cpu = 0; cpu < smp_cpus; cpu++)
 			if (CPU_ISSET(cpu, &cpus))
 				if ((ids & (IDR_NACK <<
 				    (2 * cpuid_to_mid[cpu]))) == 0)
 					CPU_CLR(cpu, &cpus);
 	}
 	if (kdb_active != 0 || panicstr != NULL)
 		printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
 		    __func__, cpusetobj_strprint(ipi_pbuf, &cpus), ids);
 	else
 		panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
 		    __func__, cpusetobj_strprint(ipi_pbuf, &cpus), ids);
 }
Index: head/sys/sys/bitset.h
===================================================================
--- head/sys/sys/bitset.h	(revision 355708)
+++ head/sys/sys/bitset.h	(revision 355709)
@@ -1,263 +1,263 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Copyright (c) 2008 Nokia Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_BITSET_H_
 #define	_SYS_BITSET_H_
 
 /*
  * Whether expr is both constant and true.  Result is itself constant.
  * Used to enable optimizations for sets with a known small size.
  */
 #define	__constexpr_cond(expr)	(__builtin_constant_p((expr)) && (expr))
 
 #define	__bitset_mask(_s, n)						\
 	(1L << (__constexpr_cond(__bitset_words((_s)) == 1) ?		\
 	    (__size_t)(n) : ((n) % _BITSET_BITS)))
 
 #define	__bitset_word(_s, n)						\
 	(__constexpr_cond(__bitset_words((_s)) == 1) ?			\
 	 0 : ((n) / _BITSET_BITS))
 
 #define	BIT_CLR(_s, n, p)						\
 	((p)->__bits[__bitset_word(_s, n)] &= ~__bitset_mask((_s), (n)))
 
 #define	BIT_COPY(_s, f, t)	(void)(*(t) = *(f))
 
 #define	BIT_ISSET(_s, n, p)						\
 	((((p)->__bits[__bitset_word(_s, n)] & __bitset_mask((_s), (n))) != 0))
 
 #define	BIT_SET(_s, n, p)						\
 	((p)->__bits[__bitset_word(_s, n)] |= __bitset_mask((_s), (n)))
 
 #define	BIT_ZERO(_s, p) do {						\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(p)->__bits[__i] = 0L;					\
 } while (0)
 
 #define	BIT_FILL(_s, p) do {						\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(p)->__bits[__i] = -1L;					\
 } while (0)
 
 #define	BIT_SETOF(_s, n, p) do {					\
 	BIT_ZERO(_s, p);						\
 	(p)->__bits[__bitset_word(_s, n)] = __bitset_mask((_s), (n));	\
 } while (0)
 
 /* Is p empty. */
 #define	BIT_EMPTY(_s, p) __extension__ ({				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		if ((p)->__bits[__i])					\
 			break;						\
 	__i == __bitset_words((_s));					\
 })
 
 /* Is p full set. */
 #define	BIT_ISFULLSET(_s, p) __extension__ ({				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		if ((p)->__bits[__i] != (long)-1)			\
 			break;						\
 	__i == __bitset_words((_s));					\
 })
 
 /* Is c a subset of p. */
 #define	BIT_SUBSET(_s, p, c) __extension__ ({				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		if (((c)->__bits[__i] &					\
 		    (p)->__bits[__i]) !=				\
 		    (c)->__bits[__i])					\
 			break;						\
 	__i == __bitset_words((_s));					\
 })
 
 /* Are there any common bits between b & c? */
 #define	BIT_OVERLAP(_s, p, c) __extension__ ({				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		if (((c)->__bits[__i] &					\
 		    (p)->__bits[__i]) != 0)				\
 			break;						\
 	__i != __bitset_words((_s));					\
 })
 
 /* Compare two sets, returns 0 if equal 1 otherwise. */
 #define	BIT_CMP(_s, p, c) __extension__ ({				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		if (((c)->__bits[__i] !=				\
 		    (p)->__bits[__i]))					\
 			break;						\
 	__i != __bitset_words((_s));					\
 })
 
 #define	BIT_OR(_s, d, s) do {						\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] |= (s)->__bits[__i];			\
 } while (0)
 
 #define	BIT_OR2(_s, d, s1, s2) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] = (s1)->__bits[__i] | (s2)->__bits[__i];\
 } while (0)
 
 #define	BIT_AND(_s, d, s) do {						\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] &= (s)->__bits[__i];			\
 } while (0)
 
 #define	BIT_AND2(_s, d, s1, s2) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] = (s1)->__bits[__i] & (s2)->__bits[__i];\
 } while (0)
 
-#define	BIT_NAND(_s, d, s) do {						\
+#define	BIT_ANDNOT(_s, d, s) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] &= ~(s)->__bits[__i];			\
 } while (0)
 
-#define	BIT_NAND2(_s, d, s1, s2) do {					\
+#define	BIT_ANDNOT2(_s, d, s1, s2) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] = (s1)->__bits[__i] & ~(s2)->__bits[__i];\
 } while (0)
 
 #define	BIT_XOR(_s, d, s) do {						\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] ^= (s)->__bits[__i];			\
 } while (0)
 
 #define	BIT_XOR2(_s, d, s1, s2) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		(d)->__bits[__i] = (s1)->__bits[__i] ^ (s2)->__bits[__i];\
 } while (0)
 
 #define	BIT_CLR_ATOMIC(_s, n, p)					\
 	atomic_clear_long(&(p)->__bits[__bitset_word(_s, n)],		\
 	    __bitset_mask((_s), n))
 
 #define	BIT_SET_ATOMIC(_s, n, p)					\
 	atomic_set_long(&(p)->__bits[__bitset_word(_s, n)],		\
 	    __bitset_mask((_s), n))
 
 #define	BIT_SET_ATOMIC_ACQ(_s, n, p)					\
 	atomic_set_acq_long(&(p)->__bits[__bitset_word(_s, n)],		\
 	    __bitset_mask((_s), n))
 
 /* Convenience functions catering special cases. */
 #define	BIT_AND_ATOMIC(_s, d, s) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		atomic_clear_long(&(d)->__bits[__i],			\
 		    ~(s)->__bits[__i]);					\
 } while (0)
 
 #define	BIT_OR_ATOMIC(_s, d, s) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		atomic_set_long(&(d)->__bits[__i],			\
 		    (s)->__bits[__i]);					\
 } while (0)
 
 #define	BIT_COPY_STORE_REL(_s, f, t) do {				\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		atomic_store_rel_long(&(t)->__bits[__i],		\
 		    (f)->__bits[__i]);					\
 } while (0)
 
 #define	BIT_FFS(_s, p) __extension__ ({					\
 	__size_t __i;							\
 	int __bit;							\
 									\
 	__bit = 0;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++) {		\
 		if ((p)->__bits[__i] != 0) {				\
 			__bit = ffsl((p)->__bits[__i]);			\
 			__bit += __i * _BITSET_BITS;			\
 			break;						\
 		}							\
 	}								\
 	__bit;								\
 })
 
 #define	BIT_FLS(_s, p) __extension__ ({					\
 	__size_t __i;							\
 	int __bit;							\
 									\
 	__bit = 0;							\
 	for (__i = __bitset_words((_s)); __i > 0; __i--) {		\
 		if ((p)->__bits[__i - 1] != 0) {			\
 			__bit = flsl((p)->__bits[__i - 1]);		\
 			__bit += (__i - 1) * _BITSET_BITS;		\
 			break;						\
 		}							\
 	}								\
 	__bit;								\
 })
 
 #define	BIT_COUNT(_s, p) __extension__ ({				\
 	__size_t __i;							\
 	int __count;							\
 									\
 	__count = 0;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
 		__count += __bitcountl((p)->__bits[__i]);		\
 	__count;							\
 })
 
 #define	BITSET_T_INITIALIZER(x)						\
 	{ .__bits = { x } }
 
 #define	BITSET_FSET(n)							\
 	[ 0 ... ((n) - 1) ] = (-1L)
 
 #define	BITSET_SIZE(_s)	(__bitset_words((_s)) * sizeof(long))
 
 /*
  * Dynamically allocate a bitset.
  */
 #define BITSET_ALLOC(_s, mt, mf)	malloc(BITSET_SIZE((_s)), mt, (mf))
 
 #endif /* !_SYS_BITSET_H_ */
Index: head/sys/sys/cpuset.h
===================================================================
--- head/sys/sys/cpuset.h	(revision 355708)
+++ head/sys/sys/cpuset.h	(revision 355709)
@@ -1,159 +1,159 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008,	Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Copyright (c) 2008 Nokia Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_CPUSET_H_
 #define	_SYS_CPUSET_H_
 
 #include <sys/_cpuset.h>
 
 #include <sys/bitset.h>
 
 #define	_NCPUBITS	_BITSET_BITS
 #define	_NCPUWORDS	__bitset_words(CPU_SETSIZE)
 
 #define	CPUSETBUFSIZ	((2 + sizeof(long) * 2) * _NCPUWORDS)
 
 #define	CPU_CLR(n, p)			BIT_CLR(CPU_SETSIZE, n, p)
 #define	CPU_COPY(f, t)			BIT_COPY(CPU_SETSIZE, f, t)
 #define	CPU_ISSET(n, p)			BIT_ISSET(CPU_SETSIZE, n, p)
 #define	CPU_SET(n, p)			BIT_SET(CPU_SETSIZE, n, p)
 #define	CPU_ZERO(p) 			BIT_ZERO(CPU_SETSIZE, p)
 #define	CPU_FILL(p) 			BIT_FILL(CPU_SETSIZE, p)
 #define	CPU_SETOF(n, p)			BIT_SETOF(CPU_SETSIZE, n, p)
 #define	CPU_EMPTY(p)			BIT_EMPTY(CPU_SETSIZE, p)
 #define	CPU_ISFULLSET(p)		BIT_ISFULLSET(CPU_SETSIZE, p)
 #define	CPU_SUBSET(p, c)		BIT_SUBSET(CPU_SETSIZE, p, c)
 #define	CPU_OVERLAP(p, c)		BIT_OVERLAP(CPU_SETSIZE, p, c)
 #define	CPU_CMP(p, c)			BIT_CMP(CPU_SETSIZE, p, c)
 #define	CPU_OR(d, s)			BIT_OR(CPU_SETSIZE, d, s)
 #define	CPU_AND(d, s)			BIT_AND(CPU_SETSIZE, d, s)
-#define	CPU_NAND(d, s)			BIT_NAND(CPU_SETSIZE, d, s)
+#define	CPU_ANDNOT(d, s)		BIT_ANDNOT(CPU_SETSIZE, d, s)
 #define	CPU_CLR_ATOMIC(n, p)		BIT_CLR_ATOMIC(CPU_SETSIZE, n, p)
 #define	CPU_SET_ATOMIC(n, p)		BIT_SET_ATOMIC(CPU_SETSIZE, n, p)
 #define	CPU_SET_ATOMIC_ACQ(n, p)	BIT_SET_ATOMIC_ACQ(CPU_SETSIZE, n, p)
 #define	CPU_AND_ATOMIC(n, p)		BIT_AND_ATOMIC(CPU_SETSIZE, n, p)
 #define	CPU_OR_ATOMIC(d, s)		BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
 #define	CPU_COPY_STORE_REL(f, t)	BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
 #define	CPU_FFS(p)			BIT_FFS(CPU_SETSIZE, p)
 #define	CPU_COUNT(p)			BIT_COUNT(CPU_SETSIZE, p)
 #define	CPUSET_FSET			BITSET_FSET(_NCPUWORDS)
 #define	CPUSET_T_INITIALIZER		BITSET_T_INITIALIZER
 
 /*
  * Valid cpulevel_t values.
  */
 #define	CPU_LEVEL_ROOT		1	/* All system cpus. */
 #define	CPU_LEVEL_CPUSET	2	/* Available cpus for which. */
 #define	CPU_LEVEL_WHICH		3	/* Actual mask/id for which. */
 
 /*
  * Valid cpuwhich_t values.
  */
 #define	CPU_WHICH_TID		1	/* Specifies a thread id. */
 #define	CPU_WHICH_PID		2	/* Specifies a process id. */
 #define	CPU_WHICH_CPUSET	3	/* Specifies a set id. */
 #define	CPU_WHICH_IRQ		4	/* Specifies an irq #. */
 #define	CPU_WHICH_JAIL		5	/* Specifies a jail id. */
 #define	CPU_WHICH_DOMAIN	6	/* Specifies a NUMA domain id. */
 #define	CPU_WHICH_INTRHANDLER	7	/* Specifies an irq # (not ithread). */
 #define	CPU_WHICH_ITHREAD	8	/* Specifies an irq's ithread. */
 
 /*
  * Reserved cpuset identifiers.
  */
 #define	CPUSET_INVALID	-1
 #define	CPUSET_DEFAULT	0
 
 #ifdef _KERNEL
 #include <sys/queue.h>
 
 LIST_HEAD(setlist, cpuset);
 
 /*
  * cpusets encapsulate cpu binding information for one or more threads.
  *
  * 	a - Accessed with atomics.
  *	s - Set at creation, never modified.  Only a ref required to read.
  *	c - Locked internally by a cpuset lock.
  *
  * The bitmask is only modified while holding the cpuset lock.  It may be
  * read while only a reference is held but the consumer must be prepared
  * to deal with inconsistent results.
  */
 struct cpuset {
 	cpuset_t		cs_mask;	/* bitmask of valid cpus. */
 	struct domainset	*cs_domain;	/* (c) NUMA policy. */
 	volatile u_int		cs_ref;		/* (a) Reference count. */
 	int			cs_flags;	/* (s) Flags from below. */
 	cpusetid_t		cs_id;		/* (s) Id or INVALID. */
 	struct cpuset		*cs_parent;	/* (s) Pointer to our parent. */
 	LIST_ENTRY(cpuset)	cs_link;	/* (c) All identified sets. */
 	LIST_ENTRY(cpuset)	cs_siblings;	/* (c) Sibling set link. */
 	struct setlist		cs_children;	/* (c) List of children. */
 };
 
 #define CPU_SET_ROOT    0x0001  /* Set is a root set. */
 #define CPU_SET_RDONLY  0x0002  /* No modification allowed. */
 
 extern cpuset_t *cpuset_root;
 struct prison;
 struct proc;
 struct thread;
 
 struct cpuset *cpuset_thread0(void);
 struct cpuset *cpuset_ref(struct cpuset *);
 void	cpuset_rel(struct cpuset *);
 int	cpuset_setthread(lwpid_t id, cpuset_t *);
 int	cpuset_setithread(lwpid_t id, int cpu);
 int	cpuset_create_root(struct prison *, struct cpuset **);
 int	cpuset_setproc_update_set(struct proc *, struct cpuset *);
 int	cpuset_which(cpuwhich_t, id_t, struct proc **,
 	    struct thread **, struct cpuset **);
 void	cpuset_kernthread(struct thread *);
 
 char	*cpusetobj_strprint(char *, const cpuset_t *);
 int	cpusetobj_strscan(cpuset_t *, const char *);
 #ifdef DDB
 void	ddb_display_cpuset(const cpuset_t *);
 #endif
 
 #else
 __BEGIN_DECLS
 int	cpuset(cpusetid_t *);
 int	cpuset_setid(cpuwhich_t, id_t, cpusetid_t);
 int	cpuset_getid(cpulevel_t, cpuwhich_t, id_t, cpusetid_t *);
 int	cpuset_getaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, cpuset_t *);
 int	cpuset_setaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, const cpuset_t *);
 __END_DECLS
 #endif
 #endif /* !_SYS_CPUSET_H_ */
Index: head/sys/sys/domainset.h
===================================================================
--- head/sys/sys/domainset.h	(revision 355708)
+++ head/sys/sys/domainset.h	(revision 355709)
@@ -1,127 +1,127 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017,	Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_DOMAINSET_H_
 #define	_SYS_DOMAINSET_H_
 
 #include <sys/_domainset.h>
 #include <sys/bitset.h>
 #include <sys/queue.h>
 
 #define	_NDOMAINSETBITS			_BITSET_BITS
 #define	_NDOMAINSETWORDS		__bitset_words(DOMAINSET_SETSIZE)
 
 #define	DOMAINSETBUFSIZ							\
 	    (((2 + sizeof(long) * 2) * _NDOMAINSETWORDS) +		\
 	    sizeof("::") + sizeof(__XSTRING(DOMAINSET_POLICY_MAX)) +	\
 	    sizeof(__XSTRING(MAXMEMDOM)))
 
 
 #define	DOMAINSET_CLR(n, p)		BIT_CLR(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_COPY(f, t)		BIT_COPY(DOMAINSET_SETSIZE, f, t)
 #define	DOMAINSET_ISSET(n, p)		BIT_ISSET(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_SET(n, p)		BIT_SET(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_ZERO(p) 		BIT_ZERO(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_FILL(p) 		BIT_FILL(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_SETOF(n, p)		BIT_SETOF(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_EMPTY(p)		BIT_EMPTY(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_ISFULLSET(p)		BIT_ISFULLSET(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_SUBSET(p, c)		BIT_SUBSET(DOMAINSET_SETSIZE, p, c)
 #define	DOMAINSET_OVERLAP(p, c)		BIT_OVERLAP(DOMAINSET_SETSIZE, p, c)
 #define	DOMAINSET_CMP(p, c)		BIT_CMP(DOMAINSET_SETSIZE, p, c)
 #define	DOMAINSET_OR(d, s)		BIT_OR(DOMAINSET_SETSIZE, d, s)
 #define	DOMAINSET_AND(d, s)		BIT_AND(DOMAINSET_SETSIZE, d, s)
-#define	DOMAINSET_NAND(d, s)		BIT_NAND(DOMAINSET_SETSIZE, d, s)
+#define	DOMAINSET_ANDNOT(d, s)		BIT_ANDNOT(DOMAINSET_SETSIZE, d, s)
 #define	DOMAINSET_CLR_ATOMIC(n, p)	BIT_CLR_ATOMIC(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_SET_ATOMIC(n, p)	BIT_SET_ATOMIC(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_SET_ATOMIC_ACQ(n, p)					\
 	    BIT_SET_ATOMIC_ACQ(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_AND_ATOMIC(n, p)	BIT_AND_ATOMIC(DOMAINSET_SETSIZE, n, p)
 #define	DOMAINSET_OR_ATOMIC(d, s)	BIT_OR_ATOMIC(DOMAINSET_SETSIZE, d, s)
 #define	DOMAINSET_COPY_STORE_REL(f, t)					\
 	    BIT_COPY_STORE_REL(DOMAINSET_SETSIZE, f, t)
 #define	DOMAINSET_FFS(p)		BIT_FFS(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_FLS(p)		BIT_FLS(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_COUNT(p)		BIT_COUNT(DOMAINSET_SETSIZE, p)
 #define	DOMAINSET_FSET			BITSET_FSET(_NDOMAINSETWORDS)
 #define	DOMAINSET_T_INITIALIZER		BITSET_T_INITIALIZER
 
 #define	DOMAINSET_POLICY_INVALID	0
 #define	DOMAINSET_POLICY_ROUNDROBIN	1
 #define	DOMAINSET_POLICY_FIRSTTOUCH	2
 #define	DOMAINSET_POLICY_PREFER		3
 #define	DOMAINSET_POLICY_INTERLEAVE	4
 #define	DOMAINSET_POLICY_MAX		DOMAINSET_POLICY_INTERLEAVE
 
 #ifdef _KERNEL
 #if MAXMEMDOM < 256
 typedef	uint8_t		domainid_t;
 #else
 typedef uint16_t	domainid_t;
 #endif
 
 struct domainset {
 	LIST_ENTRY(domainset)	ds_link;
 	domainset_t	ds_mask;	/* allowed domains. */
 	uint16_t	ds_policy;	/* Policy type. */
 	domainid_t	ds_prefer;	/* Preferred domain or -1. */
 	domainid_t	ds_cnt;		/* popcnt from above. */
 	domainid_t	ds_order[MAXMEMDOM];  /* nth domain table. */
 };
 
 extern struct domainset domainset_fixed[MAXMEMDOM], domainset_prefer[MAXMEMDOM];
 #define	DOMAINSET_FIXED(domain)	(&domainset_fixed[(domain)])
 #define	DOMAINSET_PREF(domain)	(&domainset_prefer[(domain)])
 extern struct domainset domainset_roundrobin;
 #define	DOMAINSET_RR()		(&domainset_roundrobin)
 
 void domainset_init(void);
 void domainset_zero(void);
 
 /*
  * Add a domainset to the system based on a key initializing policy, prefer,
  * and mask.  Do not create and directly use domainset structures.  The
  * returned value will not match the key pointer.
  */
 struct domainset *domainset_create(const struct domainset *);
 #ifdef _SYS_SYSCTL_H_
 int sysctl_handle_domainset(SYSCTL_HANDLER_ARGS);
 #endif
 
 #else
 __BEGIN_DECLS
 int	cpuset_getdomain(cpulevel_t, cpuwhich_t, id_t, size_t, domainset_t *,
 	    int *);
 int	cpuset_setdomain(cpulevel_t, cpuwhich_t, id_t, size_t,
 	    const domainset_t *, int);
 
 __END_DECLS
 #endif
 #endif /* !_SYS_DOMAINSET_H_ */
Index: head/sys/x86/x86/cpu_machdep.c
===================================================================
--- head/sys/x86/x86/cpu_machdep.c	(revision 355708)
+++ head/sys/x86/x86/cpu_machdep.c	(revision 355709)
@@ -1,1426 +1,1426 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 #include "opt_atpic.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kdb.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_platform.h"
 #ifdef __i386__
 #include "opt_apic.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/domainset.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #include <machine/tss.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef CPU_ELAN
 #include <machine/elan_mmcr.h>
 #endif
 #include <x86/acpica_machdep.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #include <isa/isareg.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #define	STATE_RUNNING	0x0
 #define	STATE_MWAIT	0x1
 #define	STATE_SLEEPING	0x2
 
 #ifdef SMP
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 
 struct msr_op_arg {
 	u_int msr;
 	int op;
 	uint64_t arg1;
 };
 
 static void
 x86_msr_op_one(void *argp)
 {
 	struct msr_op_arg *a;
 	uint64_t v;
 
 	a = argp;
 	switch (a->op) {
 	case MSR_OP_ANDNOT:
 		v = rdmsr(a->msr);
 		v &= ~a->arg1;
 		wrmsr(a->msr, v);
 		break;
 	case MSR_OP_OR:
 		v = rdmsr(a->msr);
 		v |= a->arg1;
 		wrmsr(a->msr, v);
 		break;
 	case MSR_OP_WRITE:
 		wrmsr(a->msr, a->arg1);
 		break;
 	}
 }
 
 #define	MSR_OP_EXMODE_MASK	0xf0000000
 #define	MSR_OP_OP_MASK		0x000000ff
 
 void
 x86_msr_op(u_int msr, u_int op, uint64_t arg1)
 {
 	struct thread *td;
 	struct msr_op_arg a;
 	u_int exmode;
 	int bound_cpu, i, is_bound;
 
 	a.op = op & MSR_OP_OP_MASK;
 	MPASS(a.op == MSR_OP_ANDNOT || a.op == MSR_OP_OR ||
 	    a.op == MSR_OP_WRITE);
 	exmode = op & MSR_OP_EXMODE_MASK;
 	MPASS(exmode == MSR_OP_LOCAL || exmode == MSR_OP_SCHED ||
 	    exmode == MSR_OP_RENDEZVOUS);
 	a.msr = msr;
 	a.arg1 = arg1;
 	switch (exmode) {
 	case MSR_OP_LOCAL:
 		x86_msr_op_one(&a);
 		break;
 	case MSR_OP_SCHED:
 		td = curthread;
 		thread_lock(td);
 		is_bound = sched_is_bound(td);
 		bound_cpu = td->td_oncpu;
 		CPU_FOREACH(i) {
 			sched_bind(td, i);
 			x86_msr_op_one(&a);
 		}
 		if (is_bound)
 			sched_bind(td, bound_cpu);
 		else
 			sched_unbind(td);
 		thread_unlock(td);
 		break;
 	case MSR_OP_RENDEZVOUS:
 		smp_rendezvous(NULL, x86_msr_op_one, NULL, &a);
 		break;
 	}
 }
 
 /*
  * Automatically initialized per CPU errata in cpu_idle_tun below.
  */
 bool mwait_cpustop_broken = false;
 SYSCTL_BOOL(_machdep, OID_AUTO, mwait_cpustop_broken, CTLFLAG_RDTUN,
     &mwait_cpustop_broken, 0,
     "Can not reliably wake MONITOR/MWAIT cpus without interrupts");
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	/* Not applicable */
 }
 
 void
 acpi_cpu_c1(void)
 {
 
 	__asm __volatile("sti; hlt");
 }
 
 /*
  * Use mwait to pause execution while waiting for an interrupt or
  * another thread to signal that there is more work.
  *
  * NOTE: Interrupts will cause a wakeup; however, this function does
  * not enable interrupt handling. The caller is responsible to enable
  * interrupts.
  */
 void
 acpi_cpu_idle_mwait(uint32_t mwait_hint)
 {
 	int *state;
 	uint64_t v;
 
 	/*
 	 * A comment in Linux patch claims that 'CPUs run faster with
 	 * speculation protection disabled. All CPU threads in a core
 	 * must disable speculation protection for it to be
 	 * disabled. Disable it while we are idle so the other
 	 * hyperthread can run fast.'
 	 *
 	 * XXXKIB.  Software coordination mode should be supported,
 	 * but all Intel CPUs provide hardware coordination.
 	 */
 
 	state = &PCPU_PTR(monitorbuf)->idle_state;
 	KASSERT(atomic_load_int(state) == STATE_SLEEPING,
 	    ("cpu_mwait_cx: wrong monitorbuf state"));
 	atomic_store_int(state, STATE_MWAIT);
 	if (PCPU_GET(ibpb_set) || hw_ssb_active) {
 		v = rdmsr(MSR_IA32_SPEC_CTRL);
 		wrmsr(MSR_IA32_SPEC_CTRL, v & ~(IA32_SPEC_CTRL_IBRS |
 		    IA32_SPEC_CTRL_STIBP | IA32_SPEC_CTRL_SSBD));
 	} else {
 		v = 0;
 	}
 	cpu_monitor(state, 0, 0);
 	if (atomic_load_int(state) == STATE_MWAIT)
 		cpu_mwait(MWAIT_INTRBREAK, mwait_hint);
 
 	/*
 	 * SSB cannot be disabled while we sleep, or rather, if it was
 	 * disabled, the sysctl thread will bind to our cpu to tweak
 	 * MSR.
 	 */
 	if (v != 0)
 		wrmsr(MSR_IA32_SPEC_CTRL, v);
 
 	/*
 	 * We should exit on any event that interrupts mwait, because
 	 * that event might be a wanted interrupt.
 	 */
 	atomic_store_int(state, STATE_RUNNING);
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	uint64_t tsc1, tsc2;
 	uint64_t acnt, mcnt, perf;
 	register_t reg;
 
 	if (pcpu_find(cpu_id) == NULL || rate == NULL)
 		return (EINVAL);
 #ifdef __i386__
 	if ((cpu_feature & CPUID_TSC) == 0)
 		return (EOPNOTSUPP);
 #endif
 
 	/*
 	 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
 	 * DELAY(9) based logic fails.
 	 */
 	if (tsc_is_invariant && !tsc_perf_stat)
 		return (EOPNOTSUPP);
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		/* Schedule ourselves on the indicated cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, cpu_id);
 		thread_unlock(curthread);
 	}
 #endif
 
 	/* Calibrate by measuring a short delay. */
 	reg = intr_disable();
 	if (tsc_is_invariant) {
 		wrmsr(MSR_MPERF, 0);
 		wrmsr(MSR_APERF, 0);
 		tsc1 = rdtsc();
 		DELAY(1000);
 		mcnt = rdmsr(MSR_MPERF);
 		acnt = rdmsr(MSR_APERF);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		perf = 1000 * acnt / mcnt;
 		*rate = (tsc2 - tsc1) * perf;
 	} else {
 		tsc1 = rdtsc();
 		DELAY(1000);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		*rate = (tsc2 - tsc1) * 1000;
 	}
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		thread_lock(curthread);
 		sched_unbind(curthread);
 		thread_unlock(curthread);
 	}
 #endif
 
 	return (0);
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		halt();
 }
 
 static void
 cpu_reset_real(void)
 {
 	struct region_descriptor null_idt;
 	int b;
 
 	disable_intr();
 #ifdef CPU_ELAN
 	if (elan_mmcr != NULL)
 		elan_mmcr->RESCFG = 1;
 #endif
 #ifdef __i386__
 	if (cpu == CPU_GEODE1100) {
 		/* Attempt Geode's own reset */
 		outl(0xcf8, 0x80009044ul);
 		outl(0xcfc, 0xf);
 	}
 #endif
 #if !defined(BROKEN_KEYBOARD_RESET)
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn off GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 #endif
 
 	/*
 	 * Attempt to force a reset via the Reset Control register at
 	 * I/O port 0xcf9.  Bit 2 forces a system reset when it
 	 * transitions from 0 to 1.  Bit 1 selects the type of reset
 	 * to attempt: 0 selects a "soft" reset, and 1 selects a
 	 * "hard" reset.  We try a "hard" reset.  The first write sets
 	 * bit 1 to select a "hard" reset and clears bit 2.  The
 	 * second write forces a 0 -> 1 transition in bit 2 to trigger
 	 * a reset.
 	 */
 	outb(0xcf9, 0x2);
 	outb(0xcf9, 0x6);
 	DELAY(500000);  /* wait 0.5 sec to see if that did it */
 
 	/*
 	 * Attempt to force a reset via the Fast A20 and Init register
 	 * at I/O port 0x92.  Bit 1 serves as an alternate A20 gate.
 	 * Bit 0 asserts INIT# when set to 1.  We are careful to only
 	 * preserve bit 1 while setting bit 0.  We also must clear bit
 	 * 0 before setting it if it isn't already clear.
 	 */
 	b = inb(0x92);
 	if (b != 0xff) {
 		if ((b & 0x1) != 0)
 			outb(0x92, b & 0xfe);
 		outb(0x92, b | 0x1);
 		DELAY(500000);  /* wait 0.5 sec to see if that did it */
 	}
 
 	printf("No known reset method worked, attempting CPU shutdown\n");
 	DELAY(1000000); /* wait 1 sec for printf to complete */
 
 	/* Wipe the IDT. */
 	null_idt.rd_limit = 0;
 	null_idt.rd_base = 0;
 	lidt(&null_idt);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	breakpoint();
 
 	/* NOTREACHED */
 	while(1);
 }
 
 #ifdef SMP
 static void
 cpu_reset_proxy(void)
 {
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		ia32_pause(); /* Wait for other cpu to see that we've started */
 
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset(void)
 {
 #ifdef SMP
 	struct monitorbuf *mb;
 	cpuset_t map;
 	u_int cnt;
 
 	if (smp_started) {
 		map = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &map);
-		CPU_NAND(&map, &stopped_cpus);
+		CPU_ANDNOT(&map, &stopped_cpus);
 		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
 			CPU_SETOF(0, &started_cpus);
 			mb = &pcpu_find(0)->pc_monitorbuf;
 			atomic_store_int(&mb->stop_state,
 			    MONITOR_STOPSTATE_RUNNING);
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
 				ia32_pause();
 				cnt++;	/* Wait for BSP to announce restart */
 			}
 			if (cpu_reset_proxy_active == 0) {
 				printf("cpu_reset: Failed to restart BSP\n");
 			} else {
 				cpu_reset_proxy_active = 2;
 				while (1)
 					ia32_pause();
 				/* NOTREACHED */
 			}
 		}
 
 		DELAY(1000000);
 	}
 #endif
 	cpu_reset_real();
 	/* NOTREACHED */
 }
 
 bool
 cpu_mwait_usable(void)
 {
 
 	return ((cpu_feature2 & CPUID2_MON) != 0 && ((cpu_mon_mwait_flags &
 	    (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)) ==
 	    (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)));
 }
 
 void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
 static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait,
     0, "Use MONITOR/MWAIT for short idle");
 
 static void
 cpu_idle_acpi(sbintime_t sbt)
 {
 	int *state;
 
 	state = &PCPU_PTR(monitorbuf)->idle_state;
 	atomic_store_int(state, STATE_SLEEPING);
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook(sbt);
 	else
 		acpi_cpu_c1();
 	atomic_store_int(state, STATE_RUNNING);
 }
 
 static void
 cpu_idle_hlt(sbintime_t sbt)
 {
 	int *state;
 
 	state = &PCPU_PTR(monitorbuf)->idle_state;
 	atomic_store_int(state, STATE_SLEEPING);
 
 	/*
 	 * Since we may be in a critical section from cpu_idle(), if
 	 * an interrupt fires during that critical section we may have
 	 * a pending preemption.  If the CPU halts, then that thread
 	 * may not execute until a later interrupt awakens the CPU.
 	 * To handle this race, check for a runnable thread after
 	 * disabling interrupts and immediately return if one is
 	 * found.  Also, we must absolutely guarentee that hlt is
 	 * the next instruction after sti.  This ensures that any
 	 * interrupt that fires after the call to disable_intr() will
 	 * immediately awaken the CPU from hlt.  Finally, please note
 	 * that on x86 this works fine because of interrupts enabled only
 	 * after the instruction following sti takes place, while IF is set
 	 * to 1 immediately, allowing hlt instruction to acknowledge the
 	 * interrupt.
 	 */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else
 		acpi_cpu_c1();
 	atomic_store_int(state, STATE_RUNNING);
 }
 
 static void
 cpu_idle_mwait(sbintime_t sbt)
 {
 	int *state;
 
 	state = &PCPU_PTR(monitorbuf)->idle_state;
 	atomic_store_int(state, STATE_MWAIT);
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable()) {
 		atomic_store_int(state, STATE_RUNNING);
 		enable_intr();
 		return;
 	}
 
 	cpu_monitor(state, 0, 0);
 	if (atomic_load_int(state) == STATE_MWAIT)
 		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
 	else
 		enable_intr();
 	atomic_store_int(state, STATE_RUNNING);
 }
 
 static void
 cpu_idle_spin(sbintime_t sbt)
 {
 	int *state;
 	int i;
 
 	state = &PCPU_PTR(monitorbuf)->idle_state;
 	atomic_store_int(state, STATE_RUNNING);
 
 	/*
 	 * The sched_runnable() call is racy but as long as there is
 	 * a loop missing it one time will have just a little impact if any 
 	 * (and it is much better than missing the check at all).
 	 */
 	for (i = 0; i < 1000; i++) {
 		if (sched_runnable())
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * C1E renders the local APIC timer dead, so we disable it by
  * reading the Interrupt Pending Message register and clearing
  * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
  * 
  * Reference:
  *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
  *   #32559 revision 3.00+
  */
 #define	MSR_AMDK8_IPM		0xc0010055
 #define	AMDK8_SMIONCMPHALT	(1ULL << 27)
 #define	AMDK8_C1EONCMPHALT	(1ULL << 28)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 void
 cpu_probe_amdc1e(void)
 {
 
 	/*
 	 * Detect the presence of C1E capability mostly on latest
 	 * dual-cores (or future) k8 family.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 		cpu_ident_amdc1e = 1;
 	}
 }
 
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
 
 void
 cpu_idle(int busy)
 {
 	uint64_t msr;
 	sbintime_t sbt = -1;
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
 #ifdef MP_WATCHDOG
 	ap_watchdog(PCPU_GET(cpuid));
 #endif
 
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
 		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 			cpu_idle_mwait(busy);
 			goto out;
 		}
 	}
 
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
 		sbt = cpu_idleclock();
 	}
 
 	/* Apply AMD APIC timer C1E workaround. */
 	if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 	}
 
 	/* Call main idle method. */
 	cpu_idle_fn(sbt);
 
 	/* Switch timers back into active mode. */
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
 out:
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
 	    busy, curcpu);
 }
 
 static int cpu_idle_apl31_workaround;
 SYSCTL_INT(_machdep, OID_AUTO, idle_apl31, CTLFLAG_RW,
     &cpu_idle_apl31_workaround, 0,
     "Apollo Lake APL31 MWAIT bug workaround");
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct monitorbuf *mb;
 	int *state;
 
 	mb = &pcpu_find(cpu)->pc_monitorbuf;
 	state = &mb->idle_state;
 	switch (atomic_load_int(state)) {
 	case STATE_SLEEPING:
 		return (0);
 	case STATE_MWAIT:
 		atomic_store_int(state, STATE_RUNNING);
 		return (cpu_idle_apl31_workaround ? 0 : 1);
 	case STATE_RUNNING:
 		return (1);
 	default:
 		panic("bad monitor state");
 		return (1);
 	}
 }
 
 /*
  * Ordered by speed/power consumption.
  */
 static struct {
 	void	*id_fn;
 	char	*id_name;
 	int	id_cpuid2_flag;
 } idle_tbl[] = {
 	{ .id_fn = cpu_idle_spin, .id_name = "spin" },
 	{ .id_fn = cpu_idle_mwait, .id_name = "mwait",
 	    .id_cpuid2_flag = CPUID2_MON },
 	{ .id_fn = cpu_idle_hlt, .id_name = "hlt" },
 	{ .id_fn = cpu_idle_acpi, .id_name = "acpi" },
 };
 
 static int
 idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	char *avail, *p;
 	int error;
 	int i;
 
 	avail = malloc(256, M_TEMP, M_WAITOK);
 	p = avail;
 	for (i = 0; i < nitems(idle_tbl); i++) {
 		if (idle_tbl[i].id_cpuid2_flag != 0 &&
 		    (cpu_feature2 & idle_tbl[i].id_cpuid2_flag) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		p += sprintf(p, "%s%s", p != avail ? ", " : "",
 		    idle_tbl[i].id_name);
 	}
 	error = sysctl_handle_string(oidp, avail, 0, req);
 	free(avail, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, idle_sysctl_available, "A", "list of available idle functions");
 
 static bool
 cpu_idle_selector(const char *new_idle_name)
 {
 	int i;
 
 	for (i = 0; i < nitems(idle_tbl); i++) {
 		if (idle_tbl[i].id_cpuid2_flag != 0 &&
 		    (cpu_feature2 & idle_tbl[i].id_cpuid2_flag) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, new_idle_name))
 			continue;
 		cpu_idle_fn = idle_tbl[i].id_fn;
 		if (bootverbose)
 			printf("CPU idle set to %s\n", idle_tbl[i].id_name);
 		return (true);
 	}
 	return (false);
 }
 
 static int
 cpu_idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16], *p;
 	int error, i;
 
 	p = "unknown";
 	for (i = 0; i < nitems(idle_tbl); i++) {
 		if (idle_tbl[i].id_fn == cpu_idle_fn) {
 			p = idle_tbl[i].id_name;
 			break;
 		}
 	}
 	strncpy(buf, p, sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	return (cpu_idle_selector(buf) ? 0 : EINVAL);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     cpu_idle_sysctl, "A", "currently selected idle function");
 
 static void
 cpu_idle_tun(void *unused __unused)
 {
 	char tunvar[16];
 
 	if (TUNABLE_STR_FETCH("machdep.idle", tunvar, sizeof(tunvar)))
 		cpu_idle_selector(tunvar);
 	else if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1) {
 		/* Ryzen erratas 1057, 1109. */
 		cpu_idle_selector("hlt");
 		idle_mwait = 0;
 		mwait_cpustop_broken = true;
 	}
 
 	if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) {
 		/*
 		 * Apollo Lake errata APL31 (public errata APL30).
 		 * Stores to the armed address range may not trigger
 		 * MWAIT to resume execution.  OS needs to use
 		 * interrupts to wake processors from MWAIT-induced
 		 * sleep states.
 		 */
 		cpu_idle_apl31_workaround = 1;
 		mwait_cpustop_broken = true;
 	}
 	TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround);
 }
 SYSINIT(cpu_idle_tun, SI_SUB_CPU, SI_ORDER_MIDDLE, cpu_idle_tun, NULL);
 
 static int panic_on_nmi = 1;
 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN,
     &panic_on_nmi, 0,
     "Panic on NMI raised by hardware failure");
 int nmi_is_broadcast = 1;
 SYSCTL_INT(_machdep, OID_AUTO, nmi_is_broadcast, CTLFLAG_RWTUN,
     &nmi_is_broadcast, 0,
     "Chipset NMI is broadcast");
 #ifdef KDB
 int kdb_on_nmi = 1;
 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN,
     &kdb_on_nmi, 0,
     "Go to KDB on NMI with unknown source");
 #endif
 
 void
 nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame)
 {
 	bool claimed = false;
 
 #ifdef DEV_ISA
 	/* machine/parity/power fail/"kitchen sink" faults */
 	if (isa_nmi(frame->tf_err)) {
 		claimed = true;
 		if (panic_on_nmi)
 			panic("NMI indicates hardware failure");
 	}
 #endif /* DEV_ISA */
 #ifdef KDB
 	if (!claimed && kdb_on_nmi) {
 		/*
 		 * NMI can be hooked up to a pushbutton for debugging.
 		 */
 		printf("NMI/cpu%d ... going to debugger\n", cpu);
 		kdb_trap(type, 0, frame);
 	}
 #endif /* KDB */
 }
 
 void
 nmi_handle_intr(u_int type, struct trapframe *frame)
 {
 
 #ifdef SMP
 	if (nmi_is_broadcast) {
 		nmi_call_kdb_smp(type, frame);
 		return;
 	}
 #endif
 	nmi_call_kdb(PCPU_GET(cpuid), type, frame);
 }
 
 int hw_ibrs_active;
 int hw_ibrs_disable = 1;
 
 SYSCTL_INT(_hw, OID_AUTO, ibrs_active, CTLFLAG_RD, &hw_ibrs_active, 0,
     "Indirect Branch Restricted Speculation active");
 
 SYSCTL_NODE(_machdep_mitigations, OID_AUTO, ibrs, CTLFLAG_RW, 0,
     "Indirect Branch Restricted Speculation active");
 
 SYSCTL_INT(_machdep_mitigations_ibrs, OID_AUTO, active, CTLFLAG_RD,
     &hw_ibrs_active, 0, "Indirect Branch Restricted Speculation active");
 
 void
 hw_ibrs_recalculate(void)
 {
 	if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_IBRS_ALL) != 0) {
 		x86_msr_op(MSR_IA32_SPEC_CTRL, MSR_OP_LOCAL |
 		    (hw_ibrs_disable ? MSR_OP_ANDNOT : MSR_OP_OR),
 		    IA32_SPEC_CTRL_IBRS);
 		return;
 	}
 	hw_ibrs_active = (cpu_stdext_feature3 & CPUID_STDEXT3_IBPB) != 0 &&
 	    !hw_ibrs_disable;
 }
 
 static int
 hw_ibrs_disable_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = hw_ibrs_disable;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	hw_ibrs_disable = val != 0;
 	hw_ibrs_recalculate();
 	return (0);
 }
 SYSCTL_PROC(_hw, OID_AUTO, ibrs_disable, CTLTYPE_INT | CTLFLAG_RWTUN |
     CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, hw_ibrs_disable_handler, "I",
     "Disable Indirect Branch Restricted Speculation");
 
 SYSCTL_PROC(_machdep_mitigations_ibrs, OID_AUTO, disable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     hw_ibrs_disable_handler, "I",
     "Disable Indirect Branch Restricted Speculation");
 
 int hw_ssb_active;
 int hw_ssb_disable;
 
 SYSCTL_INT(_hw, OID_AUTO, spec_store_bypass_disable_active, CTLFLAG_RD,
     &hw_ssb_active, 0,
     "Speculative Store Bypass Disable active");
 
 SYSCTL_NODE(_machdep_mitigations, OID_AUTO, ssb, CTLFLAG_RW, 0,
     "Speculative Store Bypass Disable active");
 
 SYSCTL_INT(_machdep_mitigations_ssb, OID_AUTO, active, CTLFLAG_RD,
     &hw_ssb_active, 0, "Speculative Store Bypass Disable active");
 
 static void
 hw_ssb_set(bool enable, bool for_all_cpus)
 {
 
 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_SSBD) == 0) {
 		hw_ssb_active = 0;
 		return;
 	}
 	hw_ssb_active = enable;
 	x86_msr_op(MSR_IA32_SPEC_CTRL,
 	    (enable ? MSR_OP_OR : MSR_OP_ANDNOT) |
 	    (for_all_cpus ? MSR_OP_SCHED : MSR_OP_LOCAL), IA32_SPEC_CTRL_SSBD);
 }
 
 void
 hw_ssb_recalculate(bool all_cpus)
 {
 
 	switch (hw_ssb_disable) {
 	default:
 		hw_ssb_disable = 0;
 		/* FALLTHROUGH */
 	case 0: /* off */
 		hw_ssb_set(false, all_cpus);
 		break;
 	case 1: /* on */
 		hw_ssb_set(true, all_cpus);
 		break;
 	case 2: /* auto */
 		hw_ssb_set((cpu_ia32_arch_caps & IA32_ARCH_CAP_SSB_NO) != 0 ?
 		    false : true, all_cpus);
 		break;
 	}
 }
 
 static int
 hw_ssb_disable_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = hw_ssb_disable;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	hw_ssb_disable = val;
 	hw_ssb_recalculate(true);
 	return (0);
 }
 SYSCTL_PROC(_hw, OID_AUTO, spec_store_bypass_disable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     hw_ssb_disable_handler, "I",
     "Speculative Store Bypass Disable (0 - off, 1 - on, 2 - auto");
 
 SYSCTL_PROC(_machdep_mitigations_ssb, OID_AUTO, disable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     hw_ssb_disable_handler, "I",
     "Speculative Store Bypass Disable (0 - off, 1 - on, 2 - auto");
 
 int hw_mds_disable;
 
 /*
  * Handler for Microarchitectural Data Sampling issues.  Really not a
  * pointer to C function: on amd64 the code must not change any CPU
  * architectural state except possibly %rflags. Also, it is always
  * called with interrupts disabled.
  */
 void mds_handler_void(void);
 void mds_handler_verw(void);
 void mds_handler_ivb(void);
 void mds_handler_bdw(void);
 void mds_handler_skl_sse(void);
 void mds_handler_skl_avx(void);
 void mds_handler_skl_avx512(void);
 void mds_handler_silvermont(void);
 void (*mds_handler)(void) = mds_handler_void;
 
 static int
 sysctl_hw_mds_disable_state_handler(SYSCTL_HANDLER_ARGS)
 {
 	const char *state;
 
 	if (mds_handler == mds_handler_void)
 		state = "inactive";
 	else if (mds_handler == mds_handler_verw)
 		state = "VERW";
 	else if (mds_handler == mds_handler_ivb)
 		state = "software IvyBridge";
 	else if (mds_handler == mds_handler_bdw)
 		state = "software Broadwell";
 	else if (mds_handler == mds_handler_skl_sse)
 		state = "software Skylake SSE";
 	else if (mds_handler == mds_handler_skl_avx)
 		state = "software Skylake AVX";
 	else if (mds_handler == mds_handler_skl_avx512)
 		state = "software Skylake AVX512";
 	else if (mds_handler == mds_handler_silvermont)
 		state = "software Silvermont";
 	else
 		state = "unknown";
 	return (SYSCTL_OUT(req, state, strlen(state)));
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, mds_disable_state,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_hw_mds_disable_state_handler, "A",
     "Microarchitectural Data Sampling Mitigation state");
 
 SYSCTL_NODE(_machdep_mitigations, OID_AUTO, mds, CTLFLAG_RW, 0,
     "Microarchitectural Data Sampling Mitigation state");
 
 SYSCTL_PROC(_machdep_mitigations_mds, OID_AUTO, state,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_hw_mds_disable_state_handler, "A",
     "Microarchitectural Data Sampling Mitigation state");
 
 _Static_assert(__offsetof(struct pcpu, pc_mds_tmp) % 64 == 0, "MDS AVX512");
 
 void
 hw_mds_recalculate(void)
 {
 	struct pcpu *pc;
 	vm_offset_t b64;
 	u_long xcr0;
 	int i;
 
 	/*
 	 * Allow user to force VERW variant even if MD_CLEAR is not
 	 * reported.  For instance, hypervisor might unknowingly
 	 * filter the cap out.
 	 * For the similar reasons, and for testing, allow to enable
 	 * mitigation even for RDCL_NO or MDS_NO caps.
 	 */
 	if (cpu_vendor_id != CPU_VENDOR_INTEL || hw_mds_disable == 0 ||
 	    ((cpu_ia32_arch_caps & (IA32_ARCH_CAP_RDCL_NO |
 	    IA32_ARCH_CAP_MDS_NO)) != 0 && hw_mds_disable == 3)) {
 		mds_handler = mds_handler_void;
 	} else if (((cpu_stdext_feature3 & CPUID_STDEXT3_MD_CLEAR) != 0 &&
 	    hw_mds_disable == 3) || hw_mds_disable == 1) {
 		mds_handler = mds_handler_verw;
 	} else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    (CPUID_TO_MODEL(cpu_id) == 0x2e || CPUID_TO_MODEL(cpu_id) == 0x1e ||
 	    CPUID_TO_MODEL(cpu_id) == 0x1f || CPUID_TO_MODEL(cpu_id) == 0x1a ||
 	    CPUID_TO_MODEL(cpu_id) == 0x2f || CPUID_TO_MODEL(cpu_id) == 0x25 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x2c || CPUID_TO_MODEL(cpu_id) == 0x2d ||
 	    CPUID_TO_MODEL(cpu_id) == 0x2a || CPUID_TO_MODEL(cpu_id) == 0x3e ||
 	    CPUID_TO_MODEL(cpu_id) == 0x3a) &&
 	    (hw_mds_disable == 2 || hw_mds_disable == 3)) {
 		/*
 		 * Nehalem, SandyBridge, IvyBridge
 		 */
 		CPU_FOREACH(i) {
 			pc = pcpu_find(i);
 			if (pc->pc_mds_buf == NULL) {
 				pc->pc_mds_buf = malloc_domainset(672, M_TEMP,
 				    DOMAINSET_PREF(pc->pc_domain), M_WAITOK);
 				bzero(pc->pc_mds_buf, 16);
 			}
 		}
 		mds_handler = mds_handler_ivb;
 	} else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    (CPUID_TO_MODEL(cpu_id) == 0x3f || CPUID_TO_MODEL(cpu_id) == 0x3c ||
 	    CPUID_TO_MODEL(cpu_id) == 0x45 || CPUID_TO_MODEL(cpu_id) == 0x46 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x56 || CPUID_TO_MODEL(cpu_id) == 0x4f ||
 	    CPUID_TO_MODEL(cpu_id) == 0x47 || CPUID_TO_MODEL(cpu_id) == 0x3d) &&
 	    (hw_mds_disable == 2 || hw_mds_disable == 3)) {
 		/*
 		 * Haswell, Broadwell
 		 */
 		CPU_FOREACH(i) {
 			pc = pcpu_find(i);
 			if (pc->pc_mds_buf == NULL) {
 				pc->pc_mds_buf = malloc_domainset(1536, M_TEMP,
 				    DOMAINSET_PREF(pc->pc_domain), M_WAITOK);
 				bzero(pc->pc_mds_buf, 16);
 			}
 		}
 		mds_handler = mds_handler_bdw;
 	} else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    ((CPUID_TO_MODEL(cpu_id) == 0x55 && (cpu_id &
 	    CPUID_STEPPING) <= 5) ||
 	    CPUID_TO_MODEL(cpu_id) == 0x4e || CPUID_TO_MODEL(cpu_id) == 0x5e ||
 	    (CPUID_TO_MODEL(cpu_id) == 0x8e && (cpu_id &
 	    CPUID_STEPPING) <= 0xb) ||
 	    (CPUID_TO_MODEL(cpu_id) == 0x9e && (cpu_id &
 	    CPUID_STEPPING) <= 0xc)) &&
 	    (hw_mds_disable == 2 || hw_mds_disable == 3)) {
 		/*
 		 * Skylake, KabyLake, CoffeeLake, WhiskeyLake,
 		 * CascadeLake
 		 */
 		CPU_FOREACH(i) {
 			pc = pcpu_find(i);
 			if (pc->pc_mds_buf == NULL) {
 				pc->pc_mds_buf = malloc_domainset(6 * 1024,
 				    M_TEMP, DOMAINSET_PREF(pc->pc_domain),
 				    M_WAITOK);
 				b64 = (vm_offset_t)malloc_domainset(64 + 63,
 				    M_TEMP, DOMAINSET_PREF(pc->pc_domain),
 				    M_WAITOK);
 				pc->pc_mds_buf64 = (void *)roundup2(b64, 64);
 				bzero(pc->pc_mds_buf64, 64);
 			}
 		}
 		xcr0 = rxcr(0);
 		if ((xcr0 & XFEATURE_ENABLED_ZMM_HI256) != 0 &&
 		    (cpu_stdext_feature & CPUID_STDEXT_AVX512DQ) != 0)
 			mds_handler = mds_handler_skl_avx512;
 		else if ((xcr0 & XFEATURE_ENABLED_AVX) != 0 &&
 		    (cpu_feature2 & CPUID2_AVX) != 0)
 			mds_handler = mds_handler_skl_avx;
 		else
 			mds_handler = mds_handler_skl_sse;
 	} else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    ((CPUID_TO_MODEL(cpu_id) == 0x37 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x4a ||
 	    CPUID_TO_MODEL(cpu_id) == 0x4c ||
 	    CPUID_TO_MODEL(cpu_id) == 0x4d ||
 	    CPUID_TO_MODEL(cpu_id) == 0x5a ||
 	    CPUID_TO_MODEL(cpu_id) == 0x5d ||
 	    CPUID_TO_MODEL(cpu_id) == 0x6e ||
 	    CPUID_TO_MODEL(cpu_id) == 0x65 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x75 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x1c ||
 	    CPUID_TO_MODEL(cpu_id) == 0x26 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x27 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x35 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x36 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x7a))) {
 		/* Silvermont, Airmont */
 		CPU_FOREACH(i) {
 			pc = pcpu_find(i);
 			if (pc->pc_mds_buf == NULL)
 				pc->pc_mds_buf = malloc(256, M_TEMP, M_WAITOK);
 		}
 		mds_handler = mds_handler_silvermont;
 	} else {
 		hw_mds_disable = 0;
 		mds_handler = mds_handler_void;
 	}
 }
 
 static void
 hw_mds_recalculate_boot(void *arg __unused)
 {
 
 	hw_mds_recalculate();
 }
 SYSINIT(mds_recalc, SI_SUB_SMP, SI_ORDER_ANY, hw_mds_recalculate_boot, NULL);
 
 static int
 sysctl_mds_disable_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = hw_mds_disable;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val < 0 || val > 3)
 		return (EINVAL);
 	hw_mds_disable = val;
 	hw_mds_recalculate();
 	return (0);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, mds_disable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_mds_disable_handler, "I",
     "Microarchitectural Data Sampling Mitigation "
     "(0 - off, 1 - on VERW, 2 - on SW, 3 - on AUTO");
 
 SYSCTL_PROC(_machdep_mitigations_mds, OID_AUTO, disable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_mds_disable_handler, "I",
     "Microarchitectural Data Sampling Mitigation "
     "(0 - off, 1 - on VERW, 2 - on SW, 3 - on AUTO");
 
 /*
  * Intel Transactional Memory Asynchronous Abort Mitigation
  * CVE-2019-11135
  */
 int x86_taa_enable;
 int x86_taa_state;
 enum {
 	TAA_NONE	= 0,	/* No mitigation enabled */
 	TAA_TSX_DISABLE	= 1,	/* Disable TSX via MSR */
 	TAA_VERW	= 2,	/* Use VERW mitigation */
 	TAA_AUTO	= 3,	/* Automatically select the mitigation */
 
 	/* The states below are not selectable by the operator */
 
 	TAA_TAA_UC	= 4,	/* Mitigation present in microcode */
 	TAA_NOT_PRESENT	= 5	/* TSX is not present */
 };
 
 static void
 taa_set(bool enable, bool all)
 {
 
 	x86_msr_op(MSR_IA32_TSX_CTRL,
 	    (enable ? MSR_OP_OR : MSR_OP_ANDNOT) |
 	    (all ? MSR_OP_RENDEZVOUS : MSR_OP_LOCAL),
 	    IA32_TSX_CTRL_RTM_DISABLE | IA32_TSX_CTRL_TSX_CPUID_CLEAR);
 }
 
 void
 x86_taa_recalculate(void)
 {
 	static int taa_saved_mds_disable = 0;
 	int taa_need = 0, taa_state = 0;
 	int mds_disable = 0, need_mds_recalc = 0;
 
 	/* Check CPUID.07h.EBX.HLE and RTM for the presence of TSX */
 	if ((cpu_stdext_feature & CPUID_STDEXT_HLE) == 0 ||
 	    (cpu_stdext_feature & CPUID_STDEXT_RTM) == 0) {
 		/* TSX is not present */
 		x86_taa_state = TAA_NOT_PRESENT;
 		return;
 	}
 
 	/* Check to see what mitigation options the CPU gives us */
 	if (cpu_ia32_arch_caps & IA32_ARCH_CAP_TAA_NO) {
 		/* CPU is not suseptible to TAA */
 		taa_need = TAA_TAA_UC;
 	} else if (cpu_ia32_arch_caps & IA32_ARCH_CAP_TSX_CTRL) {
 		/*
 		 * CPU can turn off TSX.  This is the next best option
 		 * if TAA_NO hardware mitigation isn't present
 		 */
 		taa_need = TAA_TSX_DISABLE;
 	} else {
 		/* No TSX/TAA specific remedies are available. */
 		if (x86_taa_enable == TAA_TSX_DISABLE) {
 			if (bootverbose)
 				printf("TSX control not available\n");
 			return;
 		} else
 			taa_need = TAA_VERW;
 	}
 
 	/* Can we automatically take action, or are we being forced? */
 	if (x86_taa_enable == TAA_AUTO)
 		taa_state = taa_need;
 	else
 		taa_state = x86_taa_enable;
 
 	/* No state change, nothing to do */
 	if (taa_state == x86_taa_state) {
 		if (bootverbose)
 			printf("No TSX change made\n");
 		return;
 	}
 
 	/* Does the MSR need to be turned on or off? */
 	if (taa_state == TAA_TSX_DISABLE)
 		taa_set(true, true);
 	else if (x86_taa_state == TAA_TSX_DISABLE)
 		taa_set(false, true);
 
 	/* Does MDS need to be set to turn on VERW? */
 	if (taa_state == TAA_VERW) {
 		taa_saved_mds_disable = hw_mds_disable;
 		mds_disable = hw_mds_disable = 1;
 		need_mds_recalc = 1;
 	} else if (x86_taa_state == TAA_VERW) {
 		mds_disable = hw_mds_disable = taa_saved_mds_disable;
 		need_mds_recalc = 1;
 	}
 	if (need_mds_recalc) {
 		hw_mds_recalculate();
 		if (mds_disable != hw_mds_disable) {
 			if (bootverbose)
 				printf("Cannot change MDS state for TAA\n");
 			/* Don't update our state */
 			return;
 		}
 	}
 
 	x86_taa_state = taa_state;
 	return;
 }
 
 static void
 taa_recalculate_boot(void * arg __unused)
 {
 
 	x86_taa_recalculate();
 }
 SYSINIT(taa_recalc, SI_SUB_SMP, SI_ORDER_ANY, taa_recalculate_boot, NULL);
 
 SYSCTL_NODE(_machdep_mitigations, OID_AUTO, taa, CTLFLAG_RW, 0,
 	"TSX Asynchronous Abort Mitigation");
 
 static int
 sysctl_taa_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = x86_taa_enable;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val < TAA_NONE || val > TAA_AUTO)
 		return (EINVAL);
 	x86_taa_enable = val;
 	x86_taa_recalculate();
 	return (0);
 }
 
 SYSCTL_PROC(_machdep_mitigations_taa, OID_AUTO, enable, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_taa_handler, "I",
     "TAA Mitigation enablement control "
     "(0 - off, 1 - disable TSX, 2 - VERW, 3 - on AUTO");
 
 static int
 sysctl_taa_state_handler(SYSCTL_HANDLER_ARGS)
 {
 	const char *state;
 
 	switch (x86_taa_state) {
 	case TAA_NONE:
 		state = "inactive";
 		break;
 	case TAA_TSX_DISABLE:
 		state = "TSX disabled";
 		break;
 	case TAA_VERW:
 		state = "VERW";
 		break;
 	case TAA_TAA_UC:
 		state = "Mitigated in microcode";
 		break;
 	case TAA_NOT_PRESENT:
 		state = "TSX not present";
 		break;
 	default:
 		state = "unknown";
 	}
 
 	return (SYSCTL_OUT(req, state, strlen(state)));
 }
 
 SYSCTL_PROC(_machdep_mitigations_taa, OID_AUTO, state,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_taa_state_handler, "A",
     "TAA Mitigation state");
 
 /*
  * Enable and restore kernel text write permissions.
  * Callers must ensure that disable_wp()/restore_wp() are executed
  * without rescheduling on the same core.
  */
 bool
 disable_wp(void)
 {
 	u_int cr0;
 
 	cr0 = rcr0();
 	if ((cr0 & CR0_WP) == 0)
 		return (false);
 	load_cr0(cr0 & ~CR0_WP);
 	return (true);
 }
 
 void
 restore_wp(bool old_wp)
 {
 
 	if (old_wp)
 		load_cr0(rcr0() | CR0_WP);
 }
 
 bool
 acpi_get_fadt_bootflags(uint16_t *flagsp)
 {
 #ifdef DEV_ACPI
 	ACPI_TABLE_FADT *fadt;
 	vm_paddr_t physaddr;
 
 	physaddr = acpi_find_table(ACPI_SIG_FADT);
 	if (physaddr == 0)
 		return (false);
 	fadt = acpi_map_table(physaddr, ACPI_SIG_FADT);
 	if (fadt == NULL)
 		return (false);
 	*flagsp = fadt->BootFlags;
 	acpi_unmap_table(fadt);
 	return (true);
 #else
 	return (false);
 #endif
 }