Index: projects/hps_head/share/man/man9/Makefile =================================================================== --- projects/hps_head/share/man/man9/Makefile (revision 282413) +++ projects/hps_head/share/man/man9/Makefile (revision 282414) @@ -1,1767 +1,1768 @@ # $FreeBSD$ .include MAN= accept_filter.9 \ accf_data.9 \ accf_dns.9 \ accf_http.9 \ acl.9 \ alq.9 \ altq.9 \ atomic.9 \ bios.9 \ boot.9 \ bpf.9 \ buf.9 \ buf_ring.9 \ BUF_ISLOCKED.9 \ BUF_LOCK.9 \ BUF_LOCKFREE.9 \ BUF_LOCKINIT.9 \ BUF_RECURSED.9 \ BUF_TIMELOCK.9 \ BUF_UNLOCK.9 \ bus_activate_resource.9 \ BUS_ADD_CHILD.9 \ bus_adjust_resource.9 \ bus_alloc_resource.9 \ BUS_BIND_INTR.9 \ bus_child_present.9 \ BUS_CHILD_DELETED.9 \ BUS_CHILD_DETACHED.9 \ BUS_CONFIG_INTR.9 \ BUS_DESCRIBE_INTR.9 \ bus_dma.9 \ bus_generic_attach.9 \ bus_generic_detach.9 \ bus_generic_new_pass.9 \ bus_generic_print_child.9 \ bus_generic_read_ivar.9 \ bus_generic_shutdown.9 \ BUS_NEW_PASS.9 \ BUS_PRINT_CHILD.9 \ BUS_READ_IVAR.9 \ bus_release_resource.9 \ bus_set_pass.9 \ bus_set_resource.9 \ BUS_SETUP_INTR.9 \ bus_space.9 \ byteorder.9 \ casuword.9 \ cd.9 \ condvar.9 \ config_intrhook.9 \ contigmalloc.9 \ copy.9 \ counter.9 \ cr_cansee.9 \ critical_enter.9 \ cr_seeothergids.9 \ cr_seeotheruids.9 \ crypto.9 \ CTASSERT.9 \ DB_COMMAND.9 \ DECLARE_GEOM_CLASS.9 \ DECLARE_MODULE.9 \ DELAY.9 \ devclass.9 \ devclass_find.9 \ devclass_get_device.9 \ devclass_get_devices.9 \ devclass_get_drivers.9 \ devclass_get_maxunit.9 \ devclass_get_name.9 \ devclass_get_softc.9 \ dev_clone.9 \ devfs_set_cdevpriv.9 \ device.9 \ device_add_child.9 \ DEVICE_ATTACH.9 \ device_delete_child.9 \ DEVICE_DETACH.9 \ device_enable.9 \ device_find_child.9 \ device_get_children.9 \ device_get_devclass.9 \ device_get_driver.9 \ device_get_ivars.9 \ device_get_name.9 \ device_get_parent.9 \ device_get_softc.9 \ device_get_state.9 \ device_get_sysctl.9 \ device_get_unit.9 \ DEVICE_IDENTIFY.9 \ device_printf.9 \ DEVICE_PROBE.9 \ device_probe_and_attach.9 \ device_quiet.9 \ device_set_desc.9 \ device_set_driver.9 \ device_set_flags.9 \ DEVICE_SHUTDOWN.9 \ DEV_MODULE.9 \ devstat.9 \ devtoname.9 \ disk.9 \ domain.9 \ drbr.9 \ driver.9 \ DRIVER_MODULE.9 \ EVENTHANDLER.9 \ eventtimers.9 \ extattr.9 \ fail.9 \ fetch.9 \ firmware.9 \ fpu_kern.9 \ g_access.9 \ g_attach.9 \ g_bio.9 \ g_consumer.9 \ g_data.9 \ get_cyclecount.9 \ getenv.9 \ getnewvnode.9 \ g_event.9 \ g_geom.9 \ g_provider.9 \ g_provider_by_name.9 \ groupmember.9 \ g_wither_geom.9 \ hash.9 \ hashinit.9 \ hexdump.9 \ hhook.9 \ ieee80211.9 \ ieee80211_amrr.9 \ ieee80211_beacon.9 \ ieee80211_bmiss.9 \ ieee80211_crypto.9 \ ieee80211_ddb.9 \ ieee80211_input.9 \ ieee80211_node.9 \ ieee80211_output.9 \ ieee80211_proto.9 \ ieee80211_radiotap.9 \ ieee80211_regdomain.9 \ ieee80211_scan.9 \ ieee80211_vap.9 \ ifnet.9 \ inittodr.9 \ insmntque.9 \ intro.9 \ ithread.9 \ KASSERT.9 \ kernacc.9 \ kernel_mount.9 \ khelp.9 \ kobj.9 \ kproc.9 \ kqueue.9 \ kthread.9 \ ktr.9 \ lock.9 \ locking.9 \ LOCK_PROFILING.9 \ mac.9 \ make_dev.9 \ malloc.9 \ mbchain.9 \ mbpool.9 \ mbuf.9 \ mbuf_tags.9 \ MD5.9 \ mdchain.9 \ memcchr.9 \ memguard.9 \ microseq.9 \ microtime.9 \ microuptime.9 \ mi_switch.9 \ mod_cc.9 \ module.9 \ MODULE_DEPEND.9 \ MODULE_VERSION.9 \ mtx_pool.9 \ mutex.9 \ namei.9 \ netisr.9 \ osd.9 \ panic.9 \ pbuf.9 \ PCBGROUP.9 \ p_candebug.9 \ p_cansee.9 \ pci.9 \ PCI_ADD_VF.9 \ PCI_INIT_IOV.9 \ pci_iov_schema.9 \ PCI_UNINIT_IOV.9 \ pfil.9 \ pfind.9 \ pget.9 \ pgfind.9 \ physio.9 \ pmap.9 \ pmap_activate.9 \ pmap_clear_modify.9 \ pmap_copy.9 \ pmap_enter.9 \ pmap_extract.9 \ pmap_growkernel.9 \ pmap_init.9 \ pmap_is_modified.9 \ pmap_is_prefaultable.9 \ pmap_map.9 \ pmap_mincore.9 \ pmap_object_init_pt.9 \ pmap_page_exists_quick.9 \ pmap_page_init.9 \ pmap_pinit.9 \ pmap_protect.9 \ pmap_qenter.9 \ pmap_release.9 \ pmap_remove.9 \ pmap_resident_count.9 \ pmap_unwire.9 \ pmap_zero_page.9 \ printf.9 \ prison_check.9 \ priv.9 \ pseudofs.9 \ psignal.9 \ random.9 \ random_harvest.9 \ redzone.9 \ refcount.9 \ resettodr.9 \ resource_int_value.9 \ rijndael.9 \ rman.9 \ rmlock.9 \ rtalloc.9 \ rtentry.9 \ runqueue.9 \ rwlock.9 \ sbuf.9 \ scheduler.9 \ SDT.9 \ securelevel_gt.9 \ selrecord.9 \ sema.9 \ sf_buf.9 \ sglist.9 \ shm_map.9 \ signal.9 \ sleep.9 \ sleepqueue.9 \ socket.9 \ stack.9 \ store.9 \ style.9 \ swi.9 \ sx.9 \ SYSCALL_MODULE.9 \ sysctl.9 \ sysctl_add_oid.9 \ sysctl_ctx_init.9 \ SYSINIT.9 \ taskqueue.9 \ thread_exit.9 \ time.9 \ timeout.9 \ tvtohz.9 \ ucred.9 \ uidinfo.9 \ uio.9 \ unr.9 \ utopia.9 \ vaccess.9 \ vaccess_acl_nfs4.9 \ vaccess_acl_posix1e.9 \ vcount.9 \ vflush.9 \ VFS.9 \ vfs_busy.9 \ VFS_CHECKEXP.9 \ vfsconf.9 \ VFS_FHTOVP.9 \ vfs_getnewfsid.9 \ vfs_getopt.9 \ vfs_getvfs.9 \ VFS_MOUNT.9 \ vfs_mountedfrom.9 \ VFS_QUOTACTL.9 \ VFS_ROOT.9 \ vfs_rootmountalloc.9 \ VFS_SET.9 \ VFS_STATFS.9 \ vfs_suser.9 \ VFS_SYNC.9 \ vfs_timestamp.9 \ vfs_unbusy.9 \ VFS_UNMOUNT.9 \ vfs_unmountall.9 \ VFS_VGET.9 \ vget.9 \ vgone.9 \ vhold.9 \ vinvalbuf.9 \ vm_fault_prefault.9 \ vm_map.9 \ vm_map_check_protection.9 \ vm_map_create.9 \ vm_map_delete.9 \ vm_map_entry_resize_free.9 \ vm_map_find.9 \ vm_map_findspace.9 \ vm_map_inherit.9 \ vm_map_init.9 \ vm_map_insert.9 \ vm_map_lock.9 \ vm_map_lookup.9 \ vm_map_madvise.9 \ vm_map_max.9 \ vm_map_protect.9 \ vm_map_remove.9 \ vm_map_simplify_entry.9 \ vm_map_stack.9 \ vm_map_submap.9 \ vm_map_sync.9 \ vm_map_wire.9 \ vm_page_alloc.9 \ vm_page_bits.9 \ vm_page_busy.9 \ vm_page_cache.9 \ vm_page_deactivate.9 \ vm_page_dontneed.9 \ vm_page_aflag.9 \ vm_page_free.9 \ vm_page_grab.9 \ vm_page_hold.9 \ vm_page_insert.9 \ vm_page_lookup.9 \ vm_page_rename.9 \ vm_page_wire.9 \ vm_set_page_size.9 \ vmem.9 \ vn_fullpath.9 \ vn_isdisk.9 \ vnet.9 \ vnode.9 \ VOP_ACCESS.9 \ VOP_ACLCHECK.9 \ VOP_ADVISE.9 \ VOP_ADVLOCK.9 \ VOP_ALLOCATE.9 \ VOP_ATTRIB.9 \ VOP_BWRITE.9 \ VOP_CREATE.9 \ VOP_FSYNC.9 \ VOP_GETACL.9 \ VOP_GETEXTATTR.9 \ VOP_GETPAGES.9 \ VOP_INACTIVE.9 \ VOP_IOCTL.9 \ VOP_LINK.9 \ VOP_LISTEXTATTR.9 \ VOP_LOCK.9 \ VOP_LOOKUP.9 \ VOP_OPENCLOSE.9 \ VOP_PATHCONF.9 \ VOP_PRINT.9 \ VOP_RDWR.9 \ VOP_READDIR.9 \ VOP_READLINK.9 \ VOP_REALLOCBLKS.9 \ VOP_REMOVE.9 \ VOP_RENAME.9 \ VOP_REVOKE.9 \ VOP_SETACL.9 \ VOP_SETEXTATTR.9 \ VOP_STRATEGY.9 \ VOP_VPTOCNP.9 \ VOP_VPTOFH.9 \ vref.9 \ vrefcnt.9 \ vrele.9 \ vslock.9 \ watchdog.9 \ zone.9 MLINKS= unr.9 alloc_unr.9 \ unr.9 alloc_unrl.9 \ unr.9 alloc_unr_specific.9 \ unr.9 delete_unrhdr.9 \ unr.9 free_unr.9 \ unr.9 new_unrhdr.9 MLINKS+=accept_filter.9 accept_filt_add.9 \ accept_filter.9 accept_filt_del.9 \ accept_filter.9 accept_filt_generic_mod_event.9 \ accept_filter.9 accept_filt_get.9 MLINKS+=alq.9 ALQ.9 \ alq.9 alq_close.9 \ alq.9 alq_flush.9 \ alq.9 alq_get.9 \ alq.9 alq_getn.9 \ alq.9 alq_open.9 \ alq.9 alq_open_flags.9 \ alq.9 alq_post.9 \ alq.9 alq_post_flags.9 \ alq.9 alq_write.9 \ alq.9 alq_writen.9 MLINKS+=altq.9 ALTQ.9 MLINKS+=atomic.9 atomic_add.9 \ atomic.9 atomic_clear.9 \ atomic.9 atomic_cmpset.9 \ atomic.9 atomic_fetchadd.9 \ atomic.9 atomic_load.9 \ atomic.9 atomic_readandclear.9 \ atomic.9 atomic_set.9 \ atomic.9 atomic_store.9 \ atomic.9 atomic_subtract.9 \ atomic.9 atomic_swap.9 \ atomic.9 atomic_testandset.9 MLINKS+=bpf.9 bpfattach.9 \ bpf.9 bpfattach2.9 \ bpf.9 bpfdetach.9 \ bpf.9 bpf_filter.9 \ bpf.9 bpf_mtap.9 \ bpf.9 bpf_mtap2.9 \ bpf.9 bpf_tap.9 \ bpf.9 bpf_validate.9 MLINKS+=buf.9 bp.9 MLINKS+=buf_ring.9 buf_ring_alloc.9 \ buf_ring.9 buf_ring_free.9 \ buf_ring.9 buf_ring_enqueue.9 \ buf_ring.9 buf_ring_enqueue_bytes.9 \ buf_ring.9 buf_ring_dequeue_mc.9 \ buf_ring.9 buf_ring_dequeue_sc.9 \ buf_ring.9 buf_ring_count.9 \ buf_ring.9 buf_ring_empty.9 \ buf_ring.9 buf_ring_full.9 \ buf_ring.9 buf_ring_peek.9 MLINKS+=bus_activate_resource.9 bus_deactivate_resource.9 MLINKS+=bus_alloc_resource.9 bus_alloc_resource_any.9 MLINKS+=BUS_BIND_INTR.9 bus_bind_intr.9 MLINKS+=BUS_DESCRIBE_INTR.9 bus_describe_intr.9 MLINKS+=bus_dma.9 busdma.9 \ bus_dma.9 bus_dmamap_create.9 \ bus_dma.9 bus_dmamap_destroy.9 \ bus_dma.9 bus_dmamap_load.9 \ bus_dma.9 bus_dmamap_load_bio.9 \ bus_dma.9 bus_dmamap_load_ccb.9 \ bus_dma.9 bus_dmamap_load_mbuf.9 \ bus_dma.9 bus_dmamap_load_mbuf_sg.9 \ bus_dma.9 bus_dmamap_load_uio.9 \ bus_dma.9 bus_dmamap_sync.9 \ bus_dma.9 bus_dmamap_unload.9 \ bus_dma.9 bus_dmamem_alloc.9 \ bus_dma.9 bus_dmamem_free.9 \ bus_dma.9 bus_dma_tag_create.9 \ bus_dma.9 bus_dma_tag_destroy.9 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9 MLINKS+=BUS_SETUP_INTR.9 bus_setup_intr.9 \ BUS_SETUP_INTR.9 BUS_TEARDOWN_INTR.9 \ BUS_SETUP_INTR.9 bus_teardown_intr.9 MLINKS+=bus_space.9 bus_space_alloc.9 \ bus_space.9 bus_space_barrier.9 \ bus_space.9 bus_space_copy_region_1.9 \ bus_space.9 bus_space_copy_region_2.9 \ bus_space.9 bus_space_copy_region_4.9 \ bus_space.9 bus_space_copy_region_8.9 \ bus_space.9 bus_space_copy_region_stream_1.9 \ bus_space.9 bus_space_copy_region_stream_2.9 \ bus_space.9 bus_space_copy_region_stream_4.9 \ bus_space.9 bus_space_copy_region_stream_8.9 \ bus_space.9 bus_space_free.9 \ bus_space.9 bus_space_map.9 \ bus_space.9 bus_space_read_1.9 \ bus_space.9 bus_space_read_2.9 \ bus_space.9 bus_space_read_4.9 \ bus_space.9 bus_space_read_8.9 \ bus_space.9 bus_space_read_multi_1.9 \ bus_space.9 bus_space_read_multi_2.9 \ bus_space.9 bus_space_read_multi_4.9 \ bus_space.9 bus_space_read_multi_8.9 \ bus_space.9 bus_space_read_multi_stream_1.9 \ bus_space.9 bus_space_read_multi_stream_2.9 \ bus_space.9 bus_space_read_multi_stream_4.9 \ bus_space.9 bus_space_read_multi_stream_8.9 \ bus_space.9 bus_space_read_region_1.9 \ bus_space.9 bus_space_read_region_2.9 \ bus_space.9 bus_space_read_region_4.9 \ bus_space.9 bus_space_read_region_8.9 \ bus_space.9 bus_space_read_region_stream_1.9 \ bus_space.9 bus_space_read_region_stream_2.9 \ bus_space.9 bus_space_read_region_stream_4.9 \ bus_space.9 bus_space_read_region_stream_8.9 \ bus_space.9 bus_space_read_stream_1.9 \ bus_space.9 bus_space_read_stream_2.9 \ bus_space.9 bus_space_read_stream_4.9 \ bus_space.9 bus_space_read_stream_8.9 \ bus_space.9 bus_space_set_multi_1.9 \ bus_space.9 bus_space_set_multi_2.9 \ bus_space.9 bus_space_set_multi_4.9 \ bus_space.9 bus_space_set_multi_8.9 \ bus_space.9 bus_space_set_multi_stream_1.9 \ bus_space.9 bus_space_set_multi_stream_2.9 \ bus_space.9 bus_space_set_multi_stream_4.9 \ bus_space.9 bus_space_set_multi_stream_8.9 \ bus_space.9 bus_space_set_region_1.9 \ bus_space.9 bus_space_set_region_2.9 \ bus_space.9 bus_space_set_region_4.9 \ bus_space.9 bus_space_set_region_8.9 \ bus_space.9 bus_space_set_region_stream_1.9 \ bus_space.9 bus_space_set_region_stream_2.9 \ bus_space.9 bus_space_set_region_stream_4.9 \ bus_space.9 bus_space_set_region_stream_8.9 \ bus_space.9 bus_space_subregion.9 \ bus_space.9 bus_space_unmap.9 \ bus_space.9 bus_space_write_1.9 \ bus_space.9 bus_space_write_2.9 \ bus_space.9 bus_space_write_4.9 \ bus_space.9 bus_space_write_8.9 \ bus_space.9 bus_space_write_multi_1.9 \ bus_space.9 bus_space_write_multi_2.9 \ bus_space.9 bus_space_write_multi_4.9 \ bus_space.9 bus_space_write_multi_8.9 \ bus_space.9 bus_space_write_multi_stream_1.9 \ bus_space.9 bus_space_write_multi_stream_2.9 \ bus_space.9 bus_space_write_multi_stream_4.9 \ bus_space.9 bus_space_write_multi_stream_8.9 \ bus_space.9 bus_space_write_region_1.9 \ bus_space.9 bus_space_write_region_2.9 \ bus_space.9 bus_space_write_region_4.9 \ bus_space.9 bus_space_write_region_8.9 \ bus_space.9 bus_space_write_region_stream_1.9 \ bus_space.9 bus_space_write_region_stream_2.9 \ bus_space.9 bus_space_write_region_stream_4.9 \ bus_space.9 bus_space_write_region_stream_8.9 \ bus_space.9 bus_space_write_stream_1.9 \ bus_space.9 bus_space_write_stream_2.9 \ bus_space.9 bus_space_write_stream_4.9 \ bus_space.9 bus_space_write_stream_8.9 MLINKS+=byteorder.9 be16dec.9 \ byteorder.9 be16enc.9 \ byteorder.9 be16toh.9 \ byteorder.9 be32dec.9 \ byteorder.9 be32enc.9 \ byteorder.9 be32toh.9 \ byteorder.9 be64dec.9 \ byteorder.9 be64enc.9 \ byteorder.9 be64toh.9 \ byteorder.9 bswap16.9 \ byteorder.9 bswap32.9 \ byteorder.9 bswap64.9 \ byteorder.9 htobe16.9 \ byteorder.9 htobe32.9 \ byteorder.9 htobe64.9 \ byteorder.9 htole16.9 \ byteorder.9 htole32.9 \ byteorder.9 htole64.9 \ byteorder.9 le16dec.9 \ byteorder.9 le16enc.9 \ byteorder.9 le16toh.9 \ byteorder.9 le32dec.9 \ byteorder.9 le32enc.9 \ byteorder.9 le32toh.9 \ byteorder.9 le64dec.9 \ byteorder.9 le64enc.9 \ byteorder.9 le64toh.9 MLINKS+=condvar.9 cv_broadcast.9 \ condvar.9 cv_broadcastpri.9 \ condvar.9 cv_destroy.9 \ condvar.9 cv_init.9 \ condvar.9 cv_signal.9 \ condvar.9 cv_timedwait.9 \ condvar.9 cv_timedwait_sig.9 \ condvar.9 cv_timedwait_sig_sbt.9 \ condvar.9 cv_wait.9 \ condvar.9 cv_wait_sig.9 \ condvar.9 cv_wait_unlock.9 \ condvar.9 cv_wmesg.9 MLINKS+=config_intrhook.9 config_intrhook_disestablish.9 \ config_intrhook.9 config_intrhook_establish.9 MLINKS+=contigmalloc.9 contigfree.9 MLINKS+=casuword.9 casueword.9 \ casuword.9 casueword32.9 \ casuword.9 casuword32.9 MLINKS+=copy.9 copyin.9 \ copy.9 copyin_nofault.9 \ copy.9 copyinstr.9 \ copy.9 copyout.9 \ copy.9 copyout_nofault.9 \ copy.9 copystr.9 MLINKS+=counter.9 counter_u64_alloc.9 \ counter.9 counter_u64_free.9 \ counter.9 counter_u64_add.9 \ counter.9 counter_enter.9 \ counter.9 counter_exit.9 \ counter.9 counter_u64_add_protected.9 \ counter.9 counter_u64_fetch.9 \ counter.9 counter_u64_zero.9 MLINKS+=critical_enter.9 critical.9 \ critical_enter.9 critical_exit.9 MLINKS+=crypto.9 crypto_dispatch.9 \ crypto.9 crypto_done.9 \ crypto.9 crypto_freereq.9 \ crypto.9 crypto_freesession.9 \ crypto.9 crypto_get_driverid.9 \ crypto.9 crypto_getreq.9 \ crypto.9 crypto_kdispatch.9 \ crypto.9 crypto_kdone.9 \ crypto.9 crypto_kregister.9 \ crypto.9 crypto_newsession.9 \ crypto.9 crypto_register.9 \ crypto.9 crypto_unblock.9 \ crypto.9 crypto_unregister.9 \ crypto.9 crypto_unregister_all.9 MLINKS+=DB_COMMAND.9 DB_SHOW_ALL_COMMAND.9 \ DB_COMMAND.9 DB_SHOW_COMMAND.9 MLINKS+=dev_clone.9 drain_dev_clone_events.9 MLINKS+=devfs_set_cdevpriv.9 devfs_clear_cdevpriv.9 \ devfs_set_cdevpriv.9 devfs_get_cdevpriv.9 MLINKS+=device_add_child.9 device_add_child_ordered.9 MLINKS+=device_enable.9 device_disable.9 \ device_enable.9 device_is_enabled.9 MLINKS+=device_get_ivars.9 device_set_ivars.9 MLINKS+=device_get_name.9 device_get_nameunit.9 MLINKS+=device_get_state.9 device_busy.9 \ device_get_state.9 device_is_alive.9 \ device_get_state.9 device_is_attached.9 \ device_get_state.9 device_unbusy.9 MLINKS+=device_get_sysctl.9 device_get_sysctl_ctx.9 \ device_get_sysctl.9 device_get_sysctl_tree.9 MLINKS+=device_quiet.9 device_is_quiet.9 \ device_quiet.9 device_verbose.9 MLINKS+=device_set_desc.9 device_get_desc.9 \ device_set_desc.9 device_set_desc_copy.9 MLINKS+=device_set_flags.9 device_get_flags.9 MLINKS+=devstat.9 devicestat.9 \ devstat.9 devstat_add_entry.9 \ devstat.9 devstat_end_transaction.9 \ devstat.9 devstat_remove_entry.9 \ devstat.9 devstat_start_transaction.9 MLINKS+=disk.9 disk_alloc.9 \ disk.9 disk_create.9 \ disk.9 disk_destroy.9 \ disk.9 disk_gone.9 \ disk.9 disk_resize.9 MLINKS+=domain.9 DOMAIN_SET.9 \ domain.9 domain_add.9 \ domain.9 pfctlinput.9 \ domain.9 pfctlinput2.9 \ domain.9 pffinddomain.9 \ domain.9 pffindproto.9 \ domain.9 pffindtype.9 MLINKS+=drbr.9 drbr_free.9 \ drbr.9 drbr_enqueue.9 \ drbr.9 drbr_dequeue.9 \ drbr.9 drbr_dequeue_cond.9 \ drbr.9 drbr_flush.9 \ drbr.9 drbr_empty.9 \ drbr.9 drbr_inuse.9 \ drbr.9 drbr_stats_update.9 MLINKS+=DRIVER_MODULE.9 DRIVER_MODULE_ORDERED.9 \ DRIVER_MODULE.9 EARLY_DRIVER_MODULE.9 \ DRIVER_MODULE.9 EARLY_DRIVER_MODULE_ORDERED.9 MLINKS+=EVENTHANDLER.9 EVENTHANDLER_DECLARE.9 \ EVENTHANDLER.9 EVENTHANDLER_DEREGISTER.9 \ EVENTHANDLER.9 eventhandler_deregister.9 \ EVENTHANDLER.9 eventhandler_find_list.9 \ EVENTHANDLER.9 EVENTHANDLER_INVOKE.9 \ EVENTHANDLER.9 eventhandler_prune_list.9 \ EVENTHANDLER.9 EVENTHANDLER_REGISTER.9 \ EVENTHANDLER.9 eventhandler_register.9 MLINKS+=eventtimers.9 et_register.9 \ eventtimers.9 et_deregister.9 \ eventtimers.9 et_ban.9 \ eventtimers.9 et_find.9 \ eventtimers.9 et_free.9 \ eventtimers.9 et_init.9 \ eventtimers.9 ET_LOCK.9 \ eventtimers.9 ET_UNLOCK.9 \ eventtimers.9 et_start.9 \ eventtimers.9 et_stop.9 MLINKS+=fail.9 KFAIL_POINT_CODE.9 \ fail.9 KFAIL_POINT_ERROR.9 \ fail.9 KFAIL_POINT_GOTO.9 \ fail.9 KFAIL_POINT_RETURN.9 \ fail.9 KFAIL_POINT_RETURN_VOID.9 MLINKS+=fetch.9 fubyte.9 \ fetch.9 fuswintr.9 \ fetch.9 fuword.9 \ fetch.9 fuword16.9 \ fetch.9 fuword32.9 \ fetch.9 fuword64.9 \ fetch.9 fueword.9 \ fetch.9 fueword32.9 \ fetch.9 fueword64.9 MLINKS+=firmware.9 firmware_get.9 \ firmware.9 firmware_put.9 \ firmware.9 firmware_register.9 \ firmware.9 firmware_unregister.9 MLINKS+=fpu_kern.9 fpu_kern_alloc_ctx.9 \ fpu_kern.9 fpu_kern_free_ctx.9 \ fpu_kern.9 fpu_kern_enter.9 \ fpu_kern.9 fpu_kern_leave.9 \ fpu_kern.9 fpu_kern_thread.9 \ fpu_kern.9 is_fpu_kern_thread.9 MLINKS+=g_attach.9 g_detach.9 MLINKS+=g_bio.9 g_alloc_bio.9 \ g_bio.9 g_clone_bio.9 \ g_bio.9 g_destroy_bio.9 \ g_bio.9 g_duplicate_bio.9 \ g_bio.9 g_new_bio.9 \ g_bio.9 g_print_bio.9 MLINKS+=g_consumer.9 g_destroy_consumer.9 \ g_consumer.9 g_new_consumer.9 MLINKS+=g_data.9 g_read_data.9 \ g_data.9 g_write_data.9 MLINKS+=getenv.9 freeenv.9 \ getenv.9 getenv_int.9 \ getenv.9 getenv_long.9 \ getenv.9 getenv_string.9 \ getenv.9 getenv_quad.9 \ getenv.9 getenv_uint.9 \ getenv.9 getenv_ulong.9 \ getenv.9 setenv.9 \ getenv.9 testenv.9 \ getenv.9 unsetenv.9 MLINKS+=g_event.9 g_cancel_event.9 \ g_event.9 g_post_event.9 \ g_event.9 g_waitfor_event.9 MLINKS+=g_geom.9 g_destroy_geom.9 \ g_geom.9 g_new_geomf.9 MLINKS+=g_provider.9 g_destroy_provider.9 \ g_provider.9 g_error_provider.9 \ g_provider.9 g_new_providerf.9 MLINKS+=hash.9 hash32.9 \ hash.9 hash32_buf.9 \ hash.9 hash32_str.9 \ hash.9 hash32_stre.9 \ hash.9 hash32_strn.9 \ hash.9 hash32_strne.9 \ hash.9 jenkins_hash.9 \ hash.9 jenkins_hash32.9 MLINKS+=hashinit.9 hashdestroy.9 \ hashinit.9 hashinit_flags.9 \ hashinit.9 phashinit.9 MLINKS+=hhook.9 hhook_head_register.9 \ hhook.9 hhook_head_deregister.9 \ hhook.9 hhook_head_deregister_lookup.9 \ hhook.9 hhook_run_hooks.9 \ hhook.9 HHOOKS_RUN_IF.9 \ hhook.9 HHOOKS_RUN_LOOKUP_IF.9 MLINKS+=ieee80211.9 ieee80211_ifattach.9 \ ieee80211.9 ieee80211_ifdetach.9 MLINKS+=ieee80211_amrr.9 ieee80211_amrr_choose.9 \ ieee80211_amrr.9 ieee80211_amrr_cleanup.9 \ ieee80211_amrr.9 ieee80211_amrr_init.9 \ ieee80211_amrr.9 ieee80211_amrr_node_init.9 \ ieee80211_amrr.9 ieee80211_amrr_setinterval.9 \ ieee80211_amrr.9 ieee80211_amrr_tx_complete.9 \ ieee80211_amrr.9 ieee80211_amrr_tx_update.9 MLINKS+=ieee80211_beacon.9 ieee80211_beacon_alloc.9 \ ieee80211_beacon.9 ieee80211_beacon_notify.9 \ ieee80211_beacon.9 ieee80211_beacon_update.9 MLINKS+=ieee80211_bmiss.9 ieee80211_beacon_miss.9 MLINKS+=ieee80211_crypto.9 ieee80211_crypto_available.9 \ ieee80211_crypto.9 ieee80211_crypto_decap.9 \ ieee80211_crypto.9 ieee80211_crypto_delglobalkeys.9 \ ieee80211_crypto.9 ieee80211_crypto_delkey.9 \ ieee80211_crypto.9 ieee80211_crypto_demic.9 \ ieee80211_crypto.9 ieee80211_crypto_encap.9 \ ieee80211_crypto.9 ieee80211_crypto_enmic.9 \ ieee80211_crypto.9 ieee80211_crypto_newkey.9 \ ieee80211_crypto.9 ieee80211_crypto_register.9 \ ieee80211_crypto.9 ieee80211_crypto_reload_keys.9 \ ieee80211_crypto.9 ieee80211_crypto_setkey.9 \ ieee80211_crypto.9 ieee80211_crypto_unregister.9 \ ieee80211_crypto.9 ieee80211_key_update_begin.9 \ ieee80211_crypto.9 ieee80211_key_update_end.9 \ ieee80211_crypto.9 ieee80211_notify_michael_failure.9 \ ieee80211_crypto.9 ieee80211_notify_replay_failure.9 MLINKS+=ieee80211_input.9 ieee80211_input_all.9 MLINKS+=ieee80211_node.9 ieee80211_dump_node.9 \ ieee80211_node.9 ieee80211_dump_nodes.9 \ ieee80211_node.9 ieee80211_find_rxnode.9 \ ieee80211_node.9 ieee80211_find_rxnode_withkey.9 \ ieee80211_node.9 ieee80211_free_node.9 \ ieee80211_node.9 ieee80211_iterate_nodes.9 \ ieee80211_node.9 ieee80211_ref_node.9 \ ieee80211_node.9 ieee80211_unref_node.9 MLINKS+=ieee80211_output.9 ieee80211_process_callback.9 \ ieee80211_output.9 M_SEQNO_GET.9 \ ieee80211_output.9 M_WME_GETAC.9 MLINKS+=ieee80211_proto.9 ieee80211_new_state.9 \ ieee80211_proto.9 ieee80211_resume_all.9 \ ieee80211_proto.9 ieee80211_start_all.9 \ ieee80211_proto.9 ieee80211_stop_all.9 \ ieee80211_proto.9 ieee80211_suspend_all.9 \ ieee80211_proto.9 ieee80211_waitfor_parent.9 MLINKS+=ieee80211_radiotap.9 ieee80211_radiotap_active.9 \ ieee80211_radiotap.9 ieee80211_radiotap_active_vap.9 \ ieee80211_radiotap.9 ieee80211_radiotap_attach.9 \ ieee80211_radiotap.9 ieee80211_radiotap_tx.9 \ ieee80211_radiotap.9 radiotap.9 MLINKS+=ieee80211_regdomain.9 ieee80211_alloc_countryie.9 \ ieee80211_regdomain.9 ieee80211_init_channels.9 \ ieee80211_regdomain.9 ieee80211_sort_channels.9 MLINKS+=ieee80211_scan.9 ieee80211_add_scan.9 \ ieee80211_scan.9 ieee80211_bg_scan.9 \ ieee80211_scan.9 ieee80211_cancel_scan.9 \ ieee80211_scan.9 ieee80211_cancel_scan_any.9 \ ieee80211_scan.9 ieee80211_check_scan.9 \ ieee80211_scan.9 ieee80211_check_scan_current.9 \ ieee80211_scan.9 ieee80211_flush.9 \ ieee80211_scan.9 ieee80211_probe_curchan.9 \ ieee80211_scan.9 ieee80211_scan_assoc_fail.9 \ ieee80211_scan.9 ieee80211_scan_done.9 \ ieee80211_scan.9 ieee80211_scan_dump_channels.9 \ ieee80211_scan.9 ieee80211_scan_flush.9 \ ieee80211_scan.9 ieee80211_scan_iterate.9 \ ieee80211_scan.9 ieee80211_scan_next.9 \ ieee80211_scan.9 ieee80211_scan_timeout.9 \ ieee80211_scan.9 ieee80211_scanner_get.9 \ ieee80211_scan.9 ieee80211_scanner_register.9 \ ieee80211_scan.9 ieee80211_scanner_unregister.9 \ ieee80211_scan.9 ieee80211_scanner_unregister_all.9 \ ieee80211_scan.9 ieee80211_start_scan.9 MLINKS+=ieee80211_vap.9 ieee80211_vap_attach.9 \ ieee80211_vap.9 ieee80211_vap_detach.9 \ ieee80211_vap.9 ieee80211_vap_setup.9 MLINKS+=ifnet.9 if_addmulti.9 \ ifnet.9 if_alloc.9 \ ifnet.9 if_allmulti.9 \ ifnet.9 if_attach.9 \ ifnet.9 if_data.9 \ ifnet.9 IF_DEQUEUE.9 \ ifnet.9 if_delmulti.9 \ ifnet.9 if_detach.9 \ ifnet.9 if_down.9 \ ifnet.9 if_findmulti.9 \ ifnet.9 if_free.9 \ ifnet.9 if_free_type.9 \ ifnet.9 if_up.9 \ ifnet.9 ifa_free.9 \ ifnet.9 ifa_ifwithaddr.9 \ ifnet.9 ifa_ifwithdstaddr.9 \ ifnet.9 ifa_ifwithnet.9 \ ifnet.9 ifa_ref.9 \ ifnet.9 ifaddr.9 \ ifnet.9 ifaddr_byindex.9 \ ifnet.9 ifaof_ifpforaddr.9 \ ifnet.9 ifioctl.9 \ ifnet.9 ifpromisc.9 \ ifnet.9 ifqueue.9 \ ifnet.9 ifunit.9 \ ifnet.9 ifunit_ref.9 MLINKS+=insmntque.9 insmntque1.9 MLINKS+=ithread.9 ithread_add_handler.9 \ ithread.9 ithread_create.9 \ ithread.9 ithread_destroy.9 \ ithread.9 ithread_priority.9 \ ithread.9 ithread_remove_handler.9 \ ithread.9 ithread_schedule.9 MLINKS+=kernacc.9 useracc.9 MLINKS+=kernel_mount.9 free_mntarg.9 \ kernel_mount.9 kernel_vmount.9 \ kernel_mount.9 mount_arg.9 \ kernel_mount.9 mount_argb.9 \ kernel_mount.9 mount_argf.9 \ kernel_mount.9 mount_argsu.9 MLINKS+=khelp.9 khelp_add_hhook.9 \ khelp.9 KHELP_DECLARE_MOD.9 \ khelp.9 KHELP_DECLARE_MOD_UMA.9 \ khelp.9 khelp_destroy_osd.9 \ khelp.9 khelp_get_id.9 \ khelp.9 khelp_get_osd.9 \ khelp.9 khelp_init_osd.9 \ khelp.9 khelp_remove_hhook.9 MLINKS+=kobj.9 DEFINE_CLASS.9 \ kobj.9 kobj_class_compile.9 \ kobj.9 kobj_class_compile_static.9 \ kobj.9 kobj_class_free.9 \ kobj.9 kobj_create.9 \ kobj.9 kobj_delete.9 \ kobj.9 kobj_init.9 \ kobj.9 kobj_init_static.9 MLINKS+=kproc.9 kproc_create.9 \ kproc.9 kproc_exit.9 \ kproc.9 kproc_kthread_add.9 \ kproc.9 kproc_resume.9 \ kproc.9 kproc_shutdown.9 \ kproc.9 kproc_start.9 \ kproc.9 kproc_suspend.9 \ kproc.9 kproc_suspend_check.9 \ kproc.9 kthread_create.9 MLINKS+=kqueue.9 knlist_add.9 \ kqueue.9 knlist_clear.9 \ kqueue.9 knlist_delete.9 \ kqueue.9 knlist_destroy.9 \ kqueue.9 knlist_empty.9 \ kqueue.9 knlist_init.9 \ kqueue.9 knlist_init_mtx.9 \ kqueue.9 knlist_init_rw_reader.9 \ kqueue.9 knlist_remove.9 \ kqueue.9 knlist_remove_inevent.9 \ kqueue.9 knote_fdclose.9 \ kqueue.9 KNOTE_LOCKED.9 \ kqueue.9 KNOTE_UNLOCKED.9 \ kqueue.9 kqfd_register.9 \ kqueue.9 kqueue_add_filteropts.9 \ kqueue.9 kqueue_del_filteropts.9 MLINKS+=kthread.9 kthread_add.9 \ kthread.9 kthread_exit.9 \ kthread.9 kthread_resume.9 \ kthread.9 kthread_shutdown.9 \ kthread.9 kthread_start.9 \ kthread.9 kthread_suspend.9 \ kthread.9 kthread_suspend_check.9 MLINKS+=ktr.9 CTR0.9 \ ktr.9 CTR1.9 \ ktr.9 CTR2.9 \ ktr.9 CTR3.9 \ ktr.9 CTR4.9 \ ktr.9 CTR5.9 \ ktr.9 CTR6.9 MLINKS+=lock.9 lockdestroy.9 \ lock.9 lockinit.9 \ lock.9 lockmgr.9 \ lock.9 lockmgr_args.9 \ lock.9 lockmgr_args_rw.9 \ lock.9 lockmgr_assert.9 \ lock.9 lockmgr_disown.9 \ lock.9 lockmgr_printinfo.9 \ lock.9 lockmgr_recursed.9 \ lock.9 lockmgr_rw.9 \ lock.9 lockmgr_waiters.9 \ lock.9 lockstatus.9 MLINKS+=LOCK_PROFILING.9 MUTEX_PROFILING.9 MLINKS+=make_dev.9 destroy_dev.9 \ make_dev.9 destroy_dev_drain.9 \ make_dev.9 destroy_dev_sched.9 \ make_dev.9 destroy_dev_sched_cb.9 \ make_dev.9 dev_depends.9 \ make_dev.9 make_dev_alias.9 \ make_dev.9 make_dev_alias_p.9 \ make_dev.9 make_dev_cred.9 \ make_dev.9 make_dev_credf.9 \ make_dev.9 make_dev_p.9 MLINKS+=malloc.9 free.9 \ malloc.9 MALLOC_DECLARE.9 \ malloc.9 MALLOC_DEFINE.9 \ malloc.9 realloc.9 \ malloc.9 reallocf.9 MLINKS+=mbchain.9 mb_detach.9 \ mbchain.9 mb_done.9 \ mbchain.9 mb_fixhdr.9 \ mbchain.9 mb_init.9 \ mbchain.9 mb_initm.9 \ mbchain.9 mb_put_int64be.9 \ mbchain.9 mb_put_int64le.9 \ mbchain.9 mb_put_mbuf.9 \ mbchain.9 mb_put_mem.9 \ mbchain.9 mb_put_uint16be.9 \ mbchain.9 mb_put_uint16le.9 \ mbchain.9 mb_put_uint32be.9 \ mbchain.9 mb_put_uint32le.9 \ mbchain.9 mb_put_uint8.9 \ mbchain.9 mb_put_uio.9 \ mbchain.9 mb_reserve.9 MLINKS+=mbpool.9 mbp_alloc.9 \ mbpool.9 mbp_card_free.9 \ mbpool.9 mbp_count.9 \ mbpool.9 mbp_create.9 \ mbpool.9 mbp_destroy.9 \ mbpool.9 mbp_ext_free.9 \ mbpool.9 mbp_free.9 \ mbpool.9 mbp_get.9 \ mbpool.9 mbp_get_keep.9 \ mbpool.9 mbp_sync.9 MLINKS+=\ mbuf.9 m_adj.9 \ mbuf.9 m_align.9 \ mbuf.9 M_ALIGN.9 \ mbuf.9 m_append.9 \ mbuf.9 m_apply.9 \ mbuf.9 m_cat.9 \ mbuf.9 MCHTYPE.9 \ mbuf.9 MCLGET.9 \ mbuf.9 m_copyback.9 \ mbuf.9 m_copydata.9 \ mbuf.9 m_copym.9 \ mbuf.9 m_copypacket.9 \ mbuf.9 m_copyup.9 \ mbuf.9 m_defrag.9 \ mbuf.9 m_devget.9 \ mbuf.9 m_dup.9 \ mbuf.9 m_dup_pkthdr.9 \ mbuf.9 MEXTADD.9 \ mbuf.9 MEXT_ADD_REF.9 \ mbuf.9 MEXTFREE.9 \ mbuf.9 MEXT_IS_REF.9 \ mbuf.9 MEXT_REM_REF.9 \ mbuf.9 m_fixhdr.9 \ mbuf.9 MFREE.9 \ mbuf.9 m_free.9 \ mbuf.9 m_freem.9 \ mbuf.9 MGET.9 \ mbuf.9 m_get.9 \ mbuf.9 m_get2.9 \ mbuf.9 m_getjcl.9 \ mbuf.9 m_getcl.9 \ mbuf.9 m_getclr.9 \ mbuf.9 MGETHDR.9 \ mbuf.9 m_gethdr.9 \ mbuf.9 m_getm.9 \ mbuf.9 m_getptr.9 \ mbuf.9 MH_ALIGN.9 \ mbuf.9 M_LEADINGSPACE.9 \ mbuf.9 m_length.9 \ mbuf.9 M_MOVE_PKTHDR.9 \ mbuf.9 m_move_pkthdr.9 \ mbuf.9 M_PREPEND.9 \ mbuf.9 m_prepend.9 \ mbuf.9 m_pulldown.9 \ mbuf.9 m_pullup.9 \ mbuf.9 m_split.9 \ mbuf.9 mtod.9 \ mbuf.9 M_TRAILINGSPACE.9 \ mbuf.9 m_unshare.9 \ mbuf.9 M_WRITABLE.9 MLINKS+=\ mbuf_tags.9 m_tag_alloc.9 \ mbuf_tags.9 m_tag_copy.9 \ mbuf_tags.9 m_tag_copy_chain.9 \ mbuf_tags.9 m_tag_delete.9 \ mbuf_tags.9 m_tag_delete_chain.9 \ mbuf_tags.9 m_tag_delete_nonpersistent.9 \ mbuf_tags.9 m_tag_find.9 \ mbuf_tags.9 m_tag_first.9 \ mbuf_tags.9 m_tag_free.9 \ mbuf_tags.9 m_tag_get.9 \ mbuf_tags.9 m_tag_init.9 \ mbuf_tags.9 m_tag_locate.9 \ mbuf_tags.9 m_tag_next.9 \ mbuf_tags.9 m_tag_prepend.9 \ mbuf_tags.9 m_tag_unlink.9 MLINKS+=MD5.9 MD5Init.9 \ MD5.9 MD5Transform.9 MLINKS+=mdchain.9 md_append_record.9 \ mdchain.9 md_done.9 \ mdchain.9 md_get_int64.9 \ mdchain.9 md_get_int64be.9 \ mdchain.9 md_get_int64le.9 \ mdchain.9 md_get_mbuf.9 \ mdchain.9 md_get_mem.9 \ mdchain.9 md_get_uint16.9 \ mdchain.9 md_get_uint16be.9 \ mdchain.9 md_get_uint16le.9 \ mdchain.9 md_get_uint32.9 \ mdchain.9 md_get_uint32be.9 \ mdchain.9 md_get_uint32le.9 \ mdchain.9 md_get_uint8.9 \ mdchain.9 md_get_uio.9 \ mdchain.9 md_initm.9 \ mdchain.9 md_next_record.9 MLINKS+=microtime.9 bintime.9 \ microtime.9 getbintime.9 \ microtime.9 getmicrotime.9 \ microtime.9 getnanotime.9 \ microtime.9 nanotime.9 MLINKS+=microuptime.9 binuptime.9 \ microuptime.9 getbinuptime.9 \ microuptime.9 getmicrouptime.9 \ microuptime.9 getnanouptime.9 \ microuptime.9 getsbinuptime.9 \ microuptime.9 nanouptime.9 \ microuptime.9 sbinuptime.9 MLINKS+=mi_switch.9 cpu_switch.9 \ mi_switch.9 cpu_throw.9 MLINKS+=mod_cc.9 CCV.9 \ mod_cc.9 DECLARE_CC_MODULE.9 MLINKS+=mtx_pool.9 mtx_pool_alloc.9 \ mtx_pool.9 mtx_pool_create.9 \ mtx_pool.9 mtx_pool_destroy.9 \ mtx_pool.9 mtx_pool_find.9 \ mtx_pool.9 mtx_pool_lock.9 \ mtx_pool.9 mtx_pool_lock_spin.9 \ mtx_pool.9 mtx_pool_unlock.9 \ mtx_pool.9 mtx_pool_unlock_spin.9 MLINKS+=mutex.9 mtx_assert.9 \ mutex.9 mtx_destroy.9 \ mutex.9 mtx_init.9 \ mutex.9 mtx_initialized.9 \ mutex.9 mtx_lock.9 \ mutex.9 mtx_lock_flags.9 \ mutex.9 mtx_lock_spin.9 \ mutex.9 mtx_lock_spin_flags.9 \ mutex.9 mtx_owned.9 \ mutex.9 mtx_recursed.9 \ mutex.9 mtx_sleep.9 \ mutex.9 MTX_SYSINIT.9 \ mutex.9 mtx_trylock.9 \ mutex.9 mtx_trylock_flags.9 \ mutex.9 mtx_unlock.9 \ mutex.9 mtx_unlock_flags.9 \ mutex.9 mtx_unlock_spin.9 \ mutex.9 mtx_unlock_spin_flags.9 MLINKS+=namei.9 NDFREE.9 \ namei.9 NDHASGIANT.9 \ namei.9 NDINIT.9 MLINKS+=netisr.9 netisr_clearqdrops.9 \ netisr.9 netisr_default_flow2cpu.9 \ netisr.9 netisr_dispatch.9 \ netisr.9 netisr_dispatch_src.9 \ netisr.9 netisr_get_cpucount.9 \ netisr.9 netisr_get_cpuid.9 \ netisr.9 netisr_getqdrops.9 \ netisr.9 netisr_getqlimit.9 \ netisr.9 netisr_queue.9 \ netisr.9 netisr_queue_src.9 \ netisr.9 netisr_register.9 \ netisr.9 netisr_setqlimit.9 \ netisr.9 netisr_unregister.9 MLINKS+=osd.9 osd_call.9 \ osd.9 osd_del.9 \ osd.9 osd_deregister.9 \ osd.9 osd_exit.9 \ osd.9 osd_get.9 \ osd.9 osd_register.9 \ osd.9 osd_set.9 MLINKS+=panic.9 vpanic.9 MLINKS+=pbuf.9 getpbuf.9 \ pbuf.9 relpbuf.9 \ pbuf.9 trypbuf.9 MLINKS+=PCBGROUP.9 in_pcbgroup_byhash.9 \ PCBGROUP.9 in_pcbgroup_byinpcb.9 \ PCBGROUP.9 in_pcbgroup_destroy.9 \ PCBGROUP.9 in_pcbgroup_enabled.9 \ PCBGROUP.9 in_pcbgroup_init.9 \ PCBGROUP.9 in_pcbgroup_remove.9 \ PCBGROUP.9 in_pcbgroup_update.9 \ PCBGROUP.9 in_pcbgroup_update_mbuf.9 \ PCBGROUP.9 in6_pcbgroup_byhash.9 MLINKS+=pci.9 pci_alloc_msi.9 \ pci.9 pci_alloc_msix.9 \ pci.9 pci_disable_busmaster.9 \ pci.9 pci_disable_io.9 \ pci.9 pci_enable_busmaster.9 \ pci.9 pci_enable_io.9 \ pci.9 pci_find_bsf.9 \ pci.9 pci_find_cap.9 \ pci.9 pci_find_dbsf.9 \ pci.9 pci_find_device.9 \ pci.9 pci_find_extcap.9 \ pci.9 pci_find_htcap.9 \ pci.9 pci_get_max_read_req.9 \ pci.9 pci_get_powerstate.9 \ pci.9 pci_get_vpd_ident.9 \ pci.9 pci_get_vpd_readonly.9 \ pci.9 pci_iov_attach.9 \ pci.9 pci_iov_detach.9 \ pci.9 pci_msi_count.9 \ pci.9 pci_msix_count.9 \ pci.9 pci_pending_msix.9 \ pci.9 pci_read_config.9 \ pci.9 pci_release_msi.9 \ pci.9 pci_remap_msix.9 \ pci.9 pci_restore_state.9 \ pci.9 pci_save_state.9 \ pci.9 pci_set_powerstate.9 \ pci.9 pci_set_max_read_req.9 \ pci.9 pci_write_config.9 MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \ pci_iov_schema.9 pci_iov_schema_add_bool.9 \ pci_iov_schema.9 pci_iov_schema_add_string.9 \ pci_iov_schema.9 pci_iov_schema_add_uint8.9 \ pci_iov_schema.9 pci_iov_schema_add_uint16.9 \ pci_iov_schema.9 pci_iov_schema_add_uint32.9 \ pci_iov_schema.9 pci_iov_schema_add_uint64.9 \ pci_iov_schema.9 pci_iov_schema_add_unicast_mac.9 MLINKS+=pfil.9 pfil_add_hook.9 \ pfil.9 pfil_head_register.9 \ pfil.9 pfil_head_unregister.9 \ pfil.9 pfil_hook_get.9 \ pfil.9 pfil_remove_hook.9 \ pfil.9 pfil_rlock.9 \ pfil.9 pfil_run_hooks.9 \ pfil.9 pfil_runlock.9 \ pfil.9 pfil_wlock.9 \ pfil.9 pfil_wunlock.9 MLINKS+=pfind.9 zpfind.9 MLINKS+=pmap_copy.9 pmap_copy_page.9 MLINKS+=pmap_extract.9 pmap_extract_and_hold.9 MLINKS+=pmap_init.9 pmap_init2.9 MLINKS+=pmap_is_modified.9 pmap_ts_referenced.9 MLINKS+=pmap_pinit.9 pmap_pinit0.9 \ pmap_pinit.9 pmap_pinit2.9 MLINKS+=pmap_qenter.9 pmap_qremove.9 MLINKS+=pmap_remove.9 pmap_remove_all.9 \ pmap_remove.9 pmap_remove_pages.9 MLINKS+=pmap_resident_count.9 pmap_wired_count.9 MLINKS+=pmap_zero_page.9 pmap_zero_area.9 \ pmap_zero_page.9 pmap_zero_idle.9 MLINKS+=printf.9 log.9 \ printf.9 tprintf.9 \ printf.9 uprintf.9 MLINKS+=priv.9 priv_check.9 \ priv.9 priv_check_cred.9 MLINKS+=psignal.9 gsignal.9 \ psignal.9 pgsignal.9 \ psignal.9 tdsignal.9 MLINKS+=random.9 arc4rand.9 \ random.9 arc4random.9 \ random.9 read_random.9 \ random.9 srandom.9 MLINKS+=refcount.9 refcount_acquire.9 \ refcount.9 refcount_init.9 \ refcount.9 refcount_release.9 MLINKS+=resource_int_value.9 resource_long_value.9 \ resource_int_value.9 resource_string_value.9 MLINKS+=rman.9 rman_activate_resource.9 \ rman.9 rman_adjust_resource.9 \ rman.9 rman_await_resource.9 \ rman.9 rman_deactivate_resource.9 \ rman.9 rman_fini.9 \ rman.9 rman_first_free_region.9 \ rman.9 rman_get_bushandle.9 \ rman.9 rman_get_bustag.9 \ rman.9 rman_get_device.9 \ rman.9 rman_get_end.9 \ rman.9 rman_get_flags.9 \ rman.9 rman_get_rid.9 \ rman.9 rman_get_size.9 \ rman.9 rman_get_start.9 \ rman.9 rman_get_virtual.9 \ rman.9 rman_init.9 \ rman.9 rman_init_from_resource.9 \ rman.9 rman_is_region_manager.9 \ rman.9 rman_last_free_region.9 \ rman.9 rman_make_alignment_flags.9 \ rman.9 rman_manage_region.9 \ rman.9 rman_release_resource.9 \ rman.9 rman_reserve_resource.9 \ rman.9 rman_reserve_resource_bound.9 \ rman.9 rman_set_bushandle.9 \ rman.9 rman_set_bustag.9 \ rman.9 rman_set_rid.9 \ rman.9 rman_set_virtual.9 MLINKS+=rmlock.9 rm_assert.9 \ rmlock.9 rm_destroy.9 \ rmlock.9 rm_init.9 \ rmlock.9 rm_init_flags.9 \ rmlock.9 rm_rlock.9 \ rmlock.9 rm_runlock.9 \ rmlock.9 rm_sleep.9 \ rmlock.9 RM_SYSINIT.9 \ rmlock.9 rm_try_rlock.9 \ rmlock.9 rm_wlock.9 \ rmlock.9 rm_wowned.9 \ rmlock.9 rm_wunlock.9 MLINKS+=rtalloc.9 rtalloc1.9 \ rtalloc.9 rtalloc_ign.9 \ rtalloc.9 RT_ADDREF.9 \ rtalloc.9 RT_LOCK.9 \ rtalloc.9 RT_REMREF.9 \ rtalloc.9 RT_RTFREE.9 \ rtalloc.9 RT_UNLOCK.9 \ rtalloc.9 RTFREE_LOCKED.9 \ rtalloc.9 RTFREE.9 \ rtalloc.9 rtfree.9 \ rtalloc.9 rtalloc1_fib.9 \ rtalloc.9 rtalloc_ign_fib.9 \ rtalloc.9 rtalloc_fib.9 MLINKS+=runqueue.9 choosethread.9 \ runqueue.9 procrunnable.9 \ runqueue.9 remrunqueue.9 \ runqueue.9 setrunqueue.9 MLINKS+=rwlock.9 rw_assert.9 \ rwlock.9 rw_destroy.9 \ rwlock.9 rw_downgrade.9 \ rwlock.9 rw_init.9 \ rwlock.9 rw_init_flags.9 \ rwlock.9 rw_initialized.9 \ rwlock.9 rw_rlock.9 \ rwlock.9 rw_runlock.9 \ rwlock.9 rw_sleep.9 \ rwlock.9 RW_SYSINIT.9 \ rwlock.9 rw_try_rlock.9 \ rwlock.9 rw_try_upgrade.9 \ rwlock.9 rw_try_wlock.9 \ rwlock.9 rw_wlock.9 \ rwlock.9 rw_wowned.9 \ rwlock.9 rw_wunlock.9 MLINKS+=sbuf.9 sbuf_bcat.9 \ sbuf.9 sbuf_bcopyin.9 \ sbuf.9 sbuf_bcpy.9 \ sbuf.9 sbuf_cat.9 \ sbuf.9 sbuf_clear.9 \ sbuf.9 sbuf_copyin.9 \ sbuf.9 sbuf_cpy.9 \ sbuf.9 sbuf_data.9 \ sbuf.9 sbuf_delete.9 \ sbuf.9 sbuf_done.9 \ sbuf.9 sbuf_error.9 \ sbuf.9 sbuf_finish.9 \ sbuf.9 sbuf_len.9 \ sbuf.9 sbuf_new.9 \ sbuf.9 sbuf_new_auto.9 \ sbuf.9 sbuf_new_for_sysctl.9 \ sbuf.9 sbuf_printf.9 \ sbuf.9 sbuf_putc.9 \ sbuf.9 sbuf_set_drain.9 \ sbuf.9 sbuf_setpos.9 \ sbuf.9 sbuf_start_section.9 \ sbuf.9 sbuf_end_section.9 \ sbuf.9 sbuf_trim.9 \ sbuf.9 sbuf_vprintf.9 MLINKS+=scheduler.9 curpriority_cmp.9 \ scheduler.9 maybe_resched.9 \ scheduler.9 propagate_priority.9 \ scheduler.9 resetpriority.9 \ scheduler.9 roundrobin.9 \ scheduler.9 roundrobin_interval.9 \ scheduler.9 schedclock.9 \ scheduler.9 schedcpu.9 \ scheduler.9 sched_setup.9 \ scheduler.9 setrunnable.9 \ scheduler.9 updatepri.9 MLINKS+=SDT.9 SDT_PROVIDER_DECLARE.9 \ SDT.9 SDT_PROVIDER_DEFINE.9 \ SDT.9 SDT_PROBE_DECLARE.9 \ SDT.9 SDT_PROBE_DEFINE.9 \ SDT.9 SDT_PROBE.9 MLINKS+=securelevel_gt.9 securelevel_ge.9 MLINKS+=selrecord.9 seldrain.9 \ selrecord.9 selwakeup.9 MLINKS+=sema.9 sema_destroy.9 \ sema.9 sema_init.9 \ sema.9 sema_post.9 \ sema.9 sema_timedwait.9 \ sema.9 sema_trywait.9 \ sema.9 sema_value.9 \ sema.9 sema_wait.9 MLINKS+=sf_buf.9 sf_buf_alloc.9 \ sf_buf.9 sf_buf_free.9 \ sf_buf.9 sf_buf_kva.9 \ sf_buf.9 sf_buf_page.9 MLINKS+=sglist.9 sglist_alloc.9 \ sglist.9 sglist_append.9 \ sglist.9 sglist_append_bio.9 \ sglist.9 sglist_append_mbuf.9 \ sglist.9 sglist_append_phys.9 \ sglist.9 sglist_append_uio.9 \ sglist.9 sglist_append_user.9 \ sglist.9 sglist_build.9 \ sglist.9 sglist_clone.9 \ sglist.9 sglist_consume_uio.9 \ sglist.9 sglist_count.9 \ sglist.9 sglist_free.9 \ sglist.9 sglist_hold.9 \ sglist.9 sglist_init.9 \ sglist.9 sglist_join.9 \ sglist.9 sglist_length.9 \ sglist.9 sglist_reset.9 \ sglist.9 sglist_slice.9 \ sglist.9 sglist_split.9 MLINKS+=shm_map.9 shm_unmap.9 MLINKS+=signal.9 cursig.9 \ signal.9 execsigs.9 \ signal.9 issignal.9 \ signal.9 killproc.9 \ signal.9 pgsigio.9 \ signal.9 postsig.9 \ signal.9 SETSETNEQ.9 \ signal.9 SETSETOR.9 \ signal.9 SIGADDSET.9 \ signal.9 SIG_CONTSIGMASK.9 \ signal.9 SIGDELSET.9 \ signal.9 SIGEMPTYSET.9 \ signal.9 sigexit.9 \ signal.9 SIGFILLSET.9 \ signal.9 siginit.9 \ signal.9 SIGISEMPTY.9 \ signal.9 SIGISMEMBER.9 \ signal.9 SIGNOTEMPTY.9 \ signal.9 signotify.9 \ signal.9 SIGPENDING.9 \ signal.9 SIGSETAND.9 \ signal.9 SIGSETCANTMASK.9 \ signal.9 SIGSETEQ.9 \ signal.9 SIGSETNAND.9 \ signal.9 SIG_STOPSIGMASK.9 \ signal.9 trapsignal.9 MLINKS+=sleep.9 msleep.9 \ sleep.9 msleep_sbt.9 \ sleep.9 msleep_spin.9 \ sleep.9 msleep_spin_sbt.9 \ sleep.9 pause.9 \ sleep.9 pause_sbt.9 \ sleep.9 tsleep.9 \ sleep.9 tsleep_sbt.9 \ sleep.9 wakeup.9 \ sleep.9 wakeup_one.9 MLINKS+=sleepqueue.9 init_sleepqueues.9 \ sleepqueue.9 sleepq_abort.9 \ sleepqueue.9 sleepq_add.9 \ sleepqueue.9 sleepq_alloc.9 \ sleepqueue.9 sleepq_broadcast.9 \ sleepqueue.9 sleepq_free.9 \ sleepqueue.9 sleepq_lookup.9 \ sleepqueue.9 sleepq_lock.9 \ sleepqueue.9 sleepq_release.9 \ sleepqueue.9 sleepq_remove.9 \ sleepqueue.9 sleepq_set_timeout.9 \ sleepqueue.9 sleepq_set_timeout_sbt.9 \ sleepqueue.9 sleepq_signal.9 \ sleepqueue.9 sleepq_sleepcnt.9 \ sleepqueue.9 sleepq_timedwait.9 \ sleepqueue.9 sleepq_timedwait_sig.9 \ sleepqueue.9 sleepq_type.9 \ sleepqueue.9 sleepq_wait.9 \ sleepqueue.9 sleepq_wait_sig.9 MLINKS+=socket.9 soabort.9 \ socket.9 soaccept.9 \ socket.9 sobind.9 \ socket.9 socheckuid.9 \ socket.9 soclose.9 \ socket.9 soconnect.9 \ socket.9 socreate.9 \ socket.9 sodisconnect.9 \ socket.9 sodupsockaddr.9 \ socket.9 sofree.9 \ socket.9 sogetopt.9 \ socket.9 sohasoutofband.9 \ socket.9 solisten.9 \ socket.9 solisten_proto.9 \ socket.9 solisten_proto_check.9 \ socket.9 sonewconn.9 \ socket.9 sooptcopyin.9 \ socket.9 sooptcopyout.9 \ socket.9 sopoll.9 \ socket.9 sopoll_generic.9 \ socket.9 soreceive.9 \ socket.9 soreceive_dgram.9 \ socket.9 soreceive_generic.9 \ socket.9 soreceive_stream.9 \ socket.9 soreserve.9 \ socket.9 sorflush.9 \ socket.9 sosend.9 \ socket.9 sosend_dgram.9 \ socket.9 sosend_generic.9 \ socket.9 sosetopt.9 \ socket.9 soshutdown.9 \ socket.9 sotoxsocket.9 \ socket.9 soupcall_clear.9 \ socket.9 soupcall_set.9 \ socket.9 sowakeup.9 MLINKS+=stack.9 stack_copy.9 \ stack.9 stack_create.9 \ stack.9 stack_destroy.9 \ stack.9 stack_print.9 \ stack.9 stack_print_ddb.9 \ stack.9 stack_print_short.9 \ stack.9 stack_print_short_ddb.9 \ stack.9 stack_put.9 \ stack.9 stack_save.9 \ stack.9 stack_sbuf_print.9 \ stack.9 stack_sbuf_print_ddb.9 \ stack.9 stack_zero.9 MLINKS+=store.9 subyte.9 \ store.9 suswintr.9 \ store.9 suword.9 \ store.9 suword16.9 \ store.9 suword32.9 \ store.9 suword64.9 MLINKS+=swi.9 swi_add.9 \ swi.9 swi_remove.9 \ swi.9 swi_sched.9 MLINKS+=sx.9 sx_assert.9 \ sx.9 sx_destroy.9 \ sx.9 sx_downgrade.9 \ sx.9 sx_init.9 \ sx.9 sx_init_flags.9 \ sx.9 sx_sleep.9 \ sx.9 sx_slock.9 \ sx.9 sx_slock_sig.9 \ sx.9 sx_sunlock.9 \ sx.9 SX_SYSINIT.9 \ sx.9 sx_try_slock.9 \ sx.9 sx_try_upgrade.9 \ sx.9 sx_try_xlock.9 \ sx.9 sx_unlock.9 \ sx.9 sx_xholder.9 \ sx.9 sx_xlock.9 \ sx.9 sx_xlock_sig.9 \ sx.9 sx_xlocked.9 \ sx.9 sx_xunlock.9 MLINKS+=sysctl.9 SYSCTL_DECL.9 \ sysctl.9 SYSCTL_ADD_INT.9 \ sysctl.9 SYSCTL_ADD_LONG.9 \ sysctl.9 SYSCTL_ADD_NODE.9 \ sysctl.9 SYSCTL_ADD_OPAQUE.9 \ sysctl.9 SYSCTL_ADD_PROC.9 \ sysctl.9 SYSCTL_ADD_QUAD.9 \ sysctl.9 SYSCTL_ADD_ROOT_NODE.9 \ sysctl.9 SYSCTL_ADD_STRING.9 \ sysctl.9 SYSCTL_ADD_STRUCT.9 \ sysctl.9 SYSCTL_ADD_UAUTO.9 \ sysctl.9 SYSCTL_ADD_UINT.9 \ sysctl.9 SYSCTL_ADD_ULONG.9 \ sysctl.9 SYSCTL_ADD_UQUAD.9 \ sysctl.9 SYSCTL_CHILDREN.9 \ sysctl.9 SYSCTL_STATIC_CHILDREN.9 \ sysctl.9 SYSCTL_NODE_CHILDREN.9 \ sysctl.9 SYSCTL_PARENT.9 \ sysctl.9 SYSCTL_INT.9 \ sysctl.9 SYSCTL_LONG.9 \ sysctl.9 SYSCTL_NODE.9 \ sysctl.9 SYSCTL_OPAQUE.9 \ sysctl.9 SYSCTL_PROC.9 \ sysctl.9 SYSCTL_QUAD.9 \ sysctl.9 SYSCTL_ROOT_NODE.9 \ sysctl.9 SYSCTL_STRING.9 \ sysctl.9 SYSCTL_STRUCT.9 \ sysctl.9 SYSCTL_UINT.9 \ sysctl.9 SYSCTL_ULONG.9 \ sysctl.9 SYSCTL_UQUAD.9 MLINKS+=sysctl_add_oid.9 sysctl_move_oid.9 \ sysctl_add_oid.9 sysctl_remove_oid.9 \ sysctl_add_oid.9 sysctl_remove_name.9 MLINKS+=sysctl_ctx_init.9 sysctl_ctx_entry_add.9 \ sysctl_ctx_init.9 sysctl_ctx_entry_del.9 \ sysctl_ctx_init.9 sysctl_ctx_entry_find.9 \ sysctl_ctx_init.9 sysctl_ctx_free.9 MLINKS+=SYSINIT.9 SYSUNINIT.9 MLINKS+=taskqueue.9 TASK_INIT.9 \ taskqueue.9 TASK_INITIALIZER.9 \ taskqueue.9 taskqueue_block.9 \ taskqueue.9 taskqueue_cancel.9 \ taskqueue.9 taskqueue_cancel_timeout.9 \ taskqueue.9 taskqueue_create.9 \ taskqueue.9 taskqueue_create_fast.9 \ taskqueue.9 TASKQUEUE_DECLARE.9 \ taskqueue.9 TASKQUEUE_DEFINE.9 \ taskqueue.9 TASKQUEUE_DEFINE_THREAD.9 \ taskqueue.9 taskqueue_drain.9 \ taskqueue.9 taskqueue_drain_all.9 \ taskqueue.9 taskqueue_drain_timeout.9 \ taskqueue.9 taskqueue_enqueue.9 \ taskqueue.9 taskqueue_enqueue_fast.9 \ taskqueue.9 taskqueue_enqueue_timeout.9 \ taskqueue.9 TASKQUEUE_FAST_DEFINE.9 \ taskqueue.9 TASKQUEUE_FAST_DEFINE_THREAD.9 \ taskqueue.9 taskqueue_free.9 \ taskqueue.9 taskqueue_member.9 \ taskqueue.9 taskqueue_run.9 \ taskqueue.9 taskqueue_set_callback.9 \ taskqueue.9 taskqueue_start_threads.9 \ taskqueue.9 taskqueue_start_threads_pinned.9 \ taskqueue.9 taskqueue_unblock.9 \ taskqueue.9 TIMEOUT_TASK_INIT.9 MLINKS+=time.9 boottime.9 \ time.9 time_second.9 \ time.9 time_uptime.9 MLINKS+=timeout.9 callout.9 \ timeout.9 callout_active.9 \ timeout.9 callout_deactivate.9 \ timeout.9 callout_drain.9 \ + timeout.9 callout_drain_async.9 \ timeout.9 callout_handle_init.9 \ timeout.9 callout_init.9 \ timeout.9 callout_init_mtx.9 \ timeout.9 callout_init_rm.9 \ timeout.9 callout_init_rw.9 \ timeout.9 callout_pending.9 \ timeout.9 callout_reset.9 \ timeout.9 callout_reset_curcpu.9 \ timeout.9 callout_reset_on.9 \ timeout.9 callout_reset_sbt.9 \ timeout.9 callout_reset_sbt_curcpu.9 \ timeout.9 callout_reset_sbt_on.9 \ timeout.9 callout_schedule.9 \ timeout.9 callout_schedule_curcpu.9 \ timeout.9 callout_schedule_on.9 \ timeout.9 callout_schedule_sbt.9 \ timeout.9 callout_schedule_sbt_curcpu.9 \ timeout.9 callout_schedule_sbt_on.9 \ timeout.9 callout_stop.9 \ timeout.9 untimeout.9 MLINKS+=ucred.9 cred_update_thread.9 \ ucred.9 crcopy.9 \ ucred.9 crcopysafe.9 \ ucred.9 crdup.9 \ ucred.9 crfree.9 \ ucred.9 crget.9 \ ucred.9 crhold.9 \ ucred.9 crsetgroups.9 \ ucred.9 crshared.9 \ ucred.9 cru2x.9 MLINKS+=uidinfo.9 uifind.9 \ uidinfo.9 uifree.9 \ uidinfo.9 uihashinit.9 \ uidinfo.9 uihold.9 MLINKS+=uio.9 uiomove.9 \ uio.9 uiomove_nofault.9 .if ${MK_USB} != "no" MAN+= usbdi.9 MLINKS+=usbdi.9 usbd_do_request.9 \ usbdi.9 usbd_do_request_flags.9 \ usbdi.9 usbd_errstr.9 \ usbdi.9 usbd_lookup_id_by_info.9 \ usbdi.9 usbd_lookup_id_by_uaa.9 \ usbdi.9 usbd_transfer_clear_stall.9 \ usbdi.9 usbd_transfer_drain.9 \ usbdi.9 usbd_transfer_pending.9 \ usbdi.9 usbd_transfer_poll.9 \ usbdi.9 usbd_transfer_setup.9 \ usbdi.9 usbd_transfer_start.9 \ usbdi.9 usbd_transfer_stop.9 \ usbdi.9 usbd_transfer_submit.9 \ usbdi.9 usbd_transfer_unsetup.9 \ usbdi.9 usbd_xfer_clr_flag.9 \ usbdi.9 usbd_xfer_frame_data.9 \ usbdi.9 usbd_xfer_frame_len.9 \ usbdi.9 usbd_xfer_get_frame.9 \ usbdi.9 usbd_xfer_get_priv.9 \ usbdi.9 usbd_xfer_is_stalled.9 \ usbdi.9 usbd_xfer_max_framelen.9 \ usbdi.9 usbd_xfer_max_frames.9 \ usbdi.9 usbd_xfer_max_len.9 \ usbdi.9 usbd_xfer_set_flag.9 \ usbdi.9 usbd_xfer_set_frame_data.9 \ usbdi.9 usbd_xfer_set_frame_len.9 \ usbdi.9 usbd_xfer_set_frame_offset.9 \ usbdi.9 usbd_xfer_set_frames.9 \ usbdi.9 usbd_xfer_set_interval.9 \ usbdi.9 usbd_xfer_set_priv.9 \ usbdi.9 usbd_xfer_set_stall.9 \ usbdi.9 usbd_xfer_set_timeout.9 \ usbdi.9 usbd_xfer_softc.9 \ usbdi.9 usbd_xfer_state.9 \ usbdi.9 usbd_xfer_status.9 \ usbdi.9 usb_fifo_alloc_buffer.9 \ usbdi.9 usb_fifo_attach.9 \ usbdi.9 usb_fifo_detach.9 \ usbdi.9 usb_fifo_free_buffer.9 \ usbdi.9 usb_fifo_get_data.9 \ usbdi.9 usb_fifo_get_data_buffer.9 \ usbdi.9 usb_fifo_get_data_error.9 \ usbdi.9 usb_fifo_get_data_linear.9 \ usbdi.9 usb_fifo_put_bytes_max.9 \ usbdi.9 usb_fifo_put_data.9 \ usbdi.9 usb_fifo_put_data_buffer.9 \ usbdi.9 usb_fifo_put_data_error.9 \ usbdi.9 usb_fifo_put_data_linear.9 \ usbdi.9 usb_fifo_reset.9 \ usbdi.9 usb_fifo_softc.9 \ usbdi.9 usb_fifo_wakeup.9 .endif MLINKS+=vcount.9 count_dev.9 MLINKS+=vfsconf.9 vfs_modevent.9 \ vfsconf.9 vfs_register.9 \ vfsconf.9 vfs_unregister.9 MLINKS+=vfs_getopt.9 vfs_copyopt.9 \ vfs_getopt.9 vfs_filteropt.9 \ vfs_getopt.9 vfs_flagopt.9 \ vfs_getopt.9 vfs_getopts.9 \ vfs_getopt.9 vfs_scanopt.9 \ vfs_getopt.9 vfs_setopt.9 \ vfs_getopt.9 vfs_setopt_part.9 \ vfs_getopt.9 vfs_setopts.9 MLINKS+=vhold.9 vdrop.9 \ vhold.9 vdropl.9 \ vhold.9 vholdl.9 MLINKS+=vmem.9 vmem_add.9 \ vmem.9 vmem_alloc.9 \ vmem.9 vmem_create.9 \ vmem.9 vmem_destroy.9 \ vmem.9 vmem_free.9 \ vmem.9 vmem_xalloc.9 \ vmem.9 vmem_xfree.9 MLINKS+=vm_map_lock.9 vm_map_lock_downgrade.9 \ vm_map_lock.9 vm_map_lock_read.9 \ vm_map_lock.9 vm_map_lock_upgrade.9 \ vm_map_lock.9 vm_map_trylock.9 \ vm_map_lock.9 vm_map_trylock_read.9 \ vm_map_lock.9 vm_map_unlock.9 \ vm_map_lock.9 vm_map_unlock_read.9 MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9 MLINKS+=vm_map_max.9 vm_map_min.9 \ vm_map_max.9 vm_map_pmap.9 MLINKS+=vm_map_stack.9 vm_map_growstack.9 MLINKS+=vm_map_wire.9 vm_map_unwire.9 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \ vm_page_bits.9 vm_page_dirty.9 \ vm_page_bits.9 vm_page_is_valid.9 \ vm_page_bits.9 vm_page_set_invalid.9 \ vm_page_bits.9 vm_page_set_validclean.9 \ vm_page_bits.9 vm_page_test_dirty.9 \ vm_page_bits.9 vm_page_undirty.9 \ vm_page_bits.9 vm_page_zero_invalid.9 MLINKS+=vm_page_busy.9 vm_page_busied.9 \ vm_page_busy.9 vm_page_busy_downgrade.9 \ vm_page_busy.9 vm_page_busy_sleep.9 \ vm_page_busy.9 vm_page_sbusied.9 \ vm_page_busy.9 vm_page_sbusy.9 \ vm_page_busy.9 vm_page_sleep_if_busy.9 \ vm_page_busy.9 vm_page_sunbusy.9 \ vm_page_busy.9 vm_page_trysbusy.9 \ vm_page_busy.9 vm_page_tryxbusy.9 \ vm_page_busy.9 vm_page_xbusied.9 \ vm_page_busy.9 vm_page_xbusy.9 \ vm_page_busy.9 vm_page_xunbusy.9 \ vm_page_busy.9 vm_page_assert_sbusied.9 \ vm_page_busy.9 vm_page_assert_unbusied.9 \ vm_page_busy.9 vm_page_assert_xbusied.9 MLINKS+=vm_page_aflag.9 vm_page_aflag_clear.9 \ vm_page_aflag.9 vm_page_aflag_set.9 \ vm_page_aflag.9 vm_page_reference.9 MLINKS+=vm_page_free.9 vm_page_free_toq.9 \ vm_page_free.9 vm_page_free_zero.9 \ vm_page_free.9 vm_page_try_to_free.9 MLINKS+=vm_page_hold.9 vm_page_unhold.9 MLINKS+=vm_page_insert.9 vm_page_remove.9 MLINKS+=vm_page_wire.9 vm_page_unwire.9 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \ VOP_ATTRIB.9 VOP_SETATTR.9 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \ VOP_CREATE.9 VOP_MKNOD.9 \ VOP_CREATE.9 VOP_SYMLINK.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 MLINKS+=VOP_LOCK.9 vn_lock.9 \ VOP_LOCK.9 VOP_ISLOCKED.9 \ VOP_LOCK.9 VOP_UNLOCK.9 MLINKS+=VOP_OPENCLOSE.9 VOP_CLOSE.9 \ VOP_OPENCLOSE.9 VOP_OPEN.9 MLINKS+=VOP_RDWR.9 VOP_READ.9 \ VOP_RDWR.9 VOP_WRITE.9 MLINKS+=VOP_REMOVE.9 VOP_RMDIR.9 MLINKS+=vnet.9 vimage.9 MLINKS+=vref.9 VREF.9 MLINKS+=vrele.9 vput.9 \ vrele.9 vunref.9 MLINKS+=vslock.9 vsunlock.9 MLINKS+=zone.9 uma.9 \ zone.9 uma_find_refcnt.9 \ zone.9 uma_zalloc.9 \ zone.9 uma_zalloc_arg.9 \ zone.9 uma_zcreate.9 \ zone.9 uma_zdestroy.9 \ zone.9 uma_zfree.9 \ zone.9 uma_zfree_arg.9 \ zone.9 uma_zone_get_cur.9 \ zone.9 uma_zone_get_max.9 \ zone.9 uma_zone_set_max.9 \ zone.9 uma_zone_set_warning.9 .include Index: projects/hps_head/share/man/man9/timeout.9 =================================================================== --- projects/hps_head/share/man/man9/timeout.9 (revision 282413) +++ projects/hps_head/share/man/man9/timeout.9 (revision 282414) @@ -1,818 +1,778 @@ .\" $NetBSD: timeout.9,v 1.2 1996/06/23 22:32:34 pk Exp $ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This code is derived from software contributed to The NetBSD Foundation .\" by Paul Kranenburg. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 8, 2014 +.Dd January 24, 2015 .Dt TIMEOUT 9 .Os .Sh NAME .Nm callout_active , .Nm callout_deactivate , .Nm callout_drain , +.Nm callout_drain_async , .Nm callout_handle_init , .Nm callout_init , .Nm callout_init_mtx , .Nm callout_init_rm , .Nm callout_init_rw , .Nm callout_pending , .Nm callout_reset , .Nm callout_reset_curcpu , .Nm callout_reset_on , .Nm callout_reset_sbt , .Nm callout_reset_sbt_curcpu , .Nm callout_reset_sbt_on , .Nm callout_schedule , .Nm callout_schedule_curcpu , .Nm callout_schedule_on , .Nm callout_schedule_sbt , .Nm callout_schedule_sbt_curcpu , .Nm callout_schedule_sbt_on , .Nm callout_stop , .Nm timeout , .Nm untimeout .Nd execute a function after a specified length of time .Sh SYNOPSIS .In sys/types.h .In sys/systm.h .Bd -literal typedef void timeout_t (void *); +typedef void callout_func_t (void *); .Ed -.Ft int -.Fn callout_active "struct callout *c" -.Ft void -.Fn callout_deactivate "struct callout *c" -.Ft int -.Fn callout_drain "struct callout *c" -.Ft void -.Fn callout_handle_init "struct callout_handle *handle" -.Bd -literal -struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle); -.Ed -.Ft void -.Fn callout_init "struct callout *c" "int mpsafe" -.Ft void -.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" -.Ft void -.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" -.Ft void -.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" -.Ft int -.Fn callout_pending "struct callout *c" -.Ft int -.Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" -.Ft int -.Fn callout_reset_curcpu "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" -.Ft int -.Fn callout_reset_on "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" "int cpu" -.Ft int -.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int cpu" "int flags" -.Ft int -.Fn callout_schedule "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_curcpu "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" -.Ft int -.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int cpu" "int flags" -.Ft int -.Fn callout_stop "struct callout *c" -.Ft struct callout_handle -.Fn timeout "timeout_t *func" "void *arg" "int ticks" -.Ft void -.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" .Sh DESCRIPTION The .Nm callout -API is used to schedule a call to an arbitrary function at a specific -time in the future. -Consumers of this API are required to allocate a callout structure -.Pq struct callout +API is used to schedule a one-time call to an arbitrary function at a +specific time in the future. +Consumers of this API are required to allocate a +.Ft struct callout for each pending function invocation. -This structure stores state about the pending function invocation including -the function to be called and the time at which the function should be invoked. -Pending function calls can be cancelled or rescheduled to a different time. -In addition, -a callout structure may be reused to schedule a new function call after a -scheduled call is completed. -.Pp -Callouts only provide a single-shot mode. -If a consumer requires a periodic timer, -it must explicitly reschedule each function call. -This is normally done by rescheduling the subsequent call within the called -function. -.Pp -Callout functions must not sleep. -They may not acquire sleepable locks, -wait on condition variables, -perform blocking allocation requests, -or invoke any other action that might sleep. -.Pp -Each callout structure must be initialized by -.Fn callout_init , -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw -before it is passed to any of the other callout functions. The +.Ft struct callout +stores the full state about any pending function call and +must be drained by a call to +.Fn callout_drain +or +.Fn callout_drain_async +before freeing. +.Sh INITIALIZATION +.Ft void +.Fn callout_handle_init "struct callout_handle *handle" +This function is deprecated. +Please use .Fn callout_init -function initializes a callout structure in -.Fa c -that is not associated with a specific lock. +instead. +This function is used to prepare a +.Ft struct callout_handle +before it can be used the first time. +If this function is called on a pending timeout, the pending timeout +cannot be cancelled and the +.Fn untimeout +function will return as if no timeout was pending. +.Pp +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +This macro is deprecated. +This macro is used to statically initialize a +.Ft struct callout_handle . +Please use +.Fn callout_init +instead. +.Pp +.Ft void +.Fn callout_init "struct callout *c" "int mpsafe" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. If the .Fa mpsafe -argument is zero, -the callout structure is not considered to be -.Dq multi-processor safe ; -and the Giant lock will be acquired before calling the callout function -and released when the callout function returns. +argument is non-zero, the callback function will be running unlocked +and the callback is so-called "mpsafe". +.Bf Sy +It is the application's entire responsibility to not call any +.Fn callout_xxx +functions, including the +.Fn callout_drain +function, simultaneously on the same callout when the +.Fa mpsafe +argument is non-zero. +Otherwise, undefined behavior can happen. +Avoid simultaneous calls by obtaining an exclusive lock before calling +any +.Fn callout_xxx +functions other than the +.Fn callout_drain +function. +.Ef +If the +.Fa mpsafe +argument is zero, the Giant mutex will be locked before the callback +function is called. +If the +.Fa mpsafe +argument is zero, the Giant mutex is expected to be locked when calling +any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. .Pp +.Ft void +.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. The -.Fn callout_init_mtx , -.Fn callout_init_rm , -and -.Fn callout_init_rw -functions initialize a callout structure in -.Fa c -that is associated with a specific lock. -The lock is specified by the -.Fa mtx , -.Fa rm , -or -.Fa rw -parameter. -The associated lock must be held while stopping or rescheduling the -callout. -The callout subsystem acquires the associated lock before calling the -callout function and releases it after the function returns. -If the callout was cancelled while the callout subsystem waited for the -associated lock, -the callout function is not called, -and the associated lock is released. -This ensures that stopping or rescheduling the callout will abort any -previously scheduled invocation. -.Pp -Only regular mutexes may be used with -.Fn callout_init_mtx ; -spin mutexes are not supported. -A sleepable read-mostly lock -.Po -one initialized with the -.Dv RM_SLEEPABLE -flag -.Pc -may not be used with -.Fn callout_init_rm . -Similarly, other sleepable lock types such as -.Xr sx 9 -and -.Xr lockmgr 9 -cannot be used with callouts because sleeping is not permitted in -the callout subsystem. -.Pp -These +.Fa mtx +argument is a pointer to a valid spinlock type of mutex or a valid +regular non-sleepable mutex which the callback subsystem will lock +before calling the callback function. +The specified mutex is expected to be locked when calling any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. +Valid .Fa flags -may be specified for -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw : +are: .Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED" .It Dv CALLOUT_RETURNUNLOCKED -The callout function will release the associated lock itself, -so the callout subsystem should not attempt to unlock it -after the callout function returns. -.It Dv CALLOUT_SHAREDLOCK -The lock is only acquired in read mode when running the callout handler. -This flag is ignored by -.Fn callout_init_mtx . +The callout function is assumed to have released the specified mutex +before returning. +.It Dv 0 +The callout subsystem will release the specified mutex after the +callout function has returned. .El .Pp -The function -.Fn callout_stop -cancels a callout -.Fa c -if it is currently pending. -If the callout is pending, then -.Fn callout_stop -returns a non-zero value. -If the callout is not set, -has already been serviced, -or is currently being serviced, -then zero will be returned. -If the callout has an associated lock, -then that lock must be held when this function is called. +.Ft void +.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read-mostly type of lock. +The read-mostly lock must not be initialized with the +.Dv RM_SLEEPABLE +flag. .Pp -The function -.Fn callout_drain -is identical to -.Fn callout_stop -except that it will wait for the callout -.Fa c -to complete if it is already in progress. -This function MUST NOT be called while holding any -locks on which the callout might block, or deadlock will result. -Note that if the callout subsystem has already begun processing this -callout, then the callout function may be invoked before -.Fn callout_drain -returns. -However, the callout subsystem does guarantee that the callout will be -fully stopped before -.Fn callout_drain -returns. -.Pp -The +.Ft void +.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read/write type of lock. +.Sh SCHEDULING CALLOUTS +.Ft struct callout_handle +.Fn timeout "timeout_t *func" "void *arg" "int ticks" +This function is deprecated. +Please use .Fn callout_reset -and -.Fn callout_schedule -function families schedule a future function invocation for callout -.Fa c . -If -.Fa c -already has a pending callout, -it is cancelled before the new invocation is scheduled. -These functions return a non-zero value if a pending callout was cancelled -and zero if there was no pending callout. -If the callout has an associated lock, -then that lock must be held when any of these functions are called. -.Pp -The time at which the callout function will be invoked is determined by -either the -.Fa ticks -argument or the -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments. -When -.Fa ticks -is used, -the callout is scheduled to execute after +instead. +This function schedules a call to +.Fa func +to take place after .Fa ticks Ns No /hz seconds. Non-positive values of .Fa ticks are silently converted to the value .Sq 1 . -.Pp The -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments provide more control over the scheduled time including -support for higher resolution times, -specifying the precision of the scheduled time, -and setting an absolute deadline instead of a relative timeout. -The callout is scheduled to execute in a time window which begins at -the time specified in +.Fa func +argument is a valid pointer to a function that takes a single +.Fa void * +argument. +Upon invocation, the +.Fa func +function will receive +.Fa arg +as its only argument. +The Giant lock is locked when the +.Fa arg +function is invoked and should not be unlocked by this function. +The returned value from +.Fn timeout +is a +.Ft struct callout_handle +structure which can be used in conjunction with the +.Fn untimeout +function to request that a scheduled timeout be cancelled. +As handles are recycled by the system, it is possible, although unlikely, +that a handle from one invocation of +.Fn timeout +may match the handle of another invocation of +.Fn timeout +if both calls used the same function pointer and argument, and the first +timeout is expired or cancelled before the second call. +Please ensure that the function and argument pointers are unique when using this function. +.Pp +.Ft int +.Fn callout_reset "struct callout *c" "int ticks" "callout_func_t *func" "void *arg" +This function is used to schedule or re-schedule a callout. +This function at first stops the callout given by the +.Fa c +argument, if any. +Then it will start the callout given by the +.Fa c +argument. +The relative time until the timeout callback happens is given by the +.Fa ticks +argument. +The number of ticks in a second is defined by +.Dv hz +and can vary from system to system. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Otherwise, a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, this +function will always ensure that previous callback function, if any, +is never reached. +In other words, the callout will be atomically restarted. +Otherwise, there is no such guarantee. +The callback function is given by +.Fa func +and its function argument is given by +.Fa arg . +.Pp +.Ft int +.Fn callout_reset_curcpu "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the same CPU which called this function. +.Pp +.Ft int +.Fn callout_reset_on "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" "int cpu" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the CPU given by the +.Fa cpu +argument. +.Pp +.Ft int +.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works the same like the +.Fn callout_reset +function except the relative or absolute time after which the timeout +callback should happen is given by the .Fa sbt -and extends for the amount of time specified in +argument and extends for the amount of time specified in .Fa pr . +This function is used when high precision timeouts are needed. If .Fa sbt specifies a time in the past, the window is adjusted to start at the current time. A non-zero value for .Fa pr allows the callout subsystem to coalesce callouts scheduled close to each other into fewer timer interrupts, reducing processing overhead and power consumption. -These +The .Fa flags -may be specified to adjust the interpretation of +argument may be non-zero to adjust the interpretation of the .Fa sbt and -.Fa pr : +.Fa pr +arguments: .Bl -tag -width ".Dv C_DIRECT_EXEC" .It Dv C_ABSOLUTE Handle the .Fa sbt argument as an absolute time since boot. By default, .Fa sbt is treated as a relative amount of time, similar to .Fa ticks . .It Dv C_DIRECT_EXEC Run the handler directly from hardware interrupt context instead of from the softclock thread. This reduces latency and overhead, but puts more constraints on the callout function. Callout functions run in this context may use only spin mutexes for locking and should be as small as possible because they run with absolute priority. .It Fn C_PREL Specifies relative event time precision as binary logarithm of time interval -divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc. +divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on. Note that the larger of .Fa pr or this value is used as the length of the time window. Smaller values .Pq which result in larger time intervals allow the callout subsystem to aggregate more events in one timer interrupt. .It Dv C_HARDCLOCK Align the timeouts to .Fn hardclock calls if possible. .El .Pp -The -.Fn callout_reset -functions accept a +.Ft int +.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by the .Fa func -argument which identifies the function to be called when the time expires. -It must be a pointer to a function that takes a single -.Fa void * +argument will be executed on the CPU which called this function. +.Pp +.Ft int +.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int cpu" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by +.Fa func +will be executed on the CPU given by +.Fa cpu . +.Pp +.Ft int +.Fn callout_schedule "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_curcpu "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" +This function works the same like the +.Fn callout_reset_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int cpu" "int flags" +This function works the same like the +.Fn callout_reset_sbt_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Sh CHECKING THE STATE OF CALLOUTS +.Ft int +.Fn callout_pending "struct callout *c" +This function returns non-zero if the callout pointed to by the +.Fa c +argument is pending for callback. +Else this function returns zero. +This function returns zero when inside the callout function if the +callout is not re-scheduled. +.Pp +.Ft int +.Fn callout_active "struct callout *c" +This function is deprecated and returns non-zero if the callout +pointed to by the +.Fa c +argument was scheduled in the past. +Else this function returns zero. +This function also returns zero after the +.Fn callout_deactivate +or the +.Fn callout_stop +or the +.Fn callout_drain +or the +.Fn callout_drain_async +function is called on the same callout as given by the +.Fa c argument. -Upon invocation, +.Pp +.Ft void +.Fn callout_deactivate "struct callout *c" +This function is deprecated and ensures that subsequent calls to the +.Fn callout_activate +function returns zero until the callout is scheduled again. +.Sh STOPPING CALLOUTS +.Ft void +.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" +This function is deprecated and cancels the timeout associated with the +.Fa handle +argument using the function pointed to by the .Fa func -will receive +argument and having the .Fa arg -as its only argument. -The -.Fn callout_schedule -functions reuse the +arguments to validate the handle. +If the handle does not correspond to a timeout with +the function .Fa func -and +taking the argument .Fa arg -arguments from the previous callout. -Note that one of the -.Fn callout_reset -functions must always be called to initialize -.Fa func -and -.Fa arg -before one of the -.Fn callout_schedule -functions can be used. +no action is taken. The +.Fa handle +must be initialized by a previous call to +.Fn timeout , +.Fn callout_handle_init +or assigned the value of +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +before being passed to +.Fn untimeout . +The behavior of calling +.Fn untimeout +with an uninitialized handle +is undefined. .Pp -The callout subsystem provides a softclock thread for each CPU in the system. -Callouts are assigned to a single CPU and are executed by the softclock thread -for that CPU. -Initially, -callouts are assigned to CPU 0. -The -.Fn callout_reset_on , -.Fn callout_reset_sbt_on , -.Fn callout_schedule_on -and -.Fn callout_schedule_sbt_on -functions assign the callout to CPU -.Fa cpu . -The -.Fn callout_reset_curcpu , -.Fn callout_reset_sbt_curpu , -.Fn callout_schedule_curcpu -and -.Fn callout_schedule_sbt_curcpu -functions assign the callout to the current CPU. -The -.Fn callout_reset , -.Fn callout_reset_sbt , -.Fn callout_schedule -and -.Fn callout_schedule_sbt -functions schedule the callout to execute in the softclock thread of the CPU -to which it is currently assigned. +.Ft int +.Fn callout_stop "struct callout *c" +This function is used to stop a timeout function invocation associated with the callout pointed to by the +.Fa c +argument, in a non-blocking fashion. +This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Else a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, the +.Fn callout_stop +function will always ensure that the callback function is never reached. +In other words the callout will be atomically stopped. +Else there is no such guarantee. +.Sh DRAINING CALLOUTS +.Ft int +.Fn callout_drain "struct callout *c" +This function works the same like the +.Fn callout_stop +function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the +.Fa c +argument inside the callout subsystem before it returns. +Also this function ensures that the lock, if any, associated with the +callout is no longer being used. +When this function returns, it is safe to free the callout structure pointed to by the +.Fa c +argument. .Pp +.Ft int +.Fn callout_drain_async "struct callout *c" "callout_func_t *fn" "void *arg" +This function is non-blocking and works the same like the +.Fn callout_stop +function except if it returns non-zero it means the callback function pointed to by the +.Fa fn +argument will be called back with the +.Fa arg +argument when all references to the callout pointed to by the +.Fa c +argument are gone. +If this function returns non-zero it should not be called again until the callback function has been called. +If the +.Fn callout_drain +or +.Fn callout_drain_async +functions are called while an asynchronous drain is pending, +previously pending asynchronous drains might get cancelled. +If this function returns zero, it is safe to free the callout structure pointed to by the +.Fa c +argument right away. +.Sh CALLOUT FUNCTION RESTRICTIONS +Callout functions must not sleep. +They may not acquire sleepable locks, wait on condition variables, +perform blocking allocation requests, or invoke any other action that +might sleep. +.Sh CALLOUT SUBSYSTEM INTERNALS +The callout subsystem has its own set of spinlocks to protect its internal state. +The callout subsystem provides a softclock thread for each CPU in the +system. +Callouts are assigned to a single CPU and are executed by the +softclock thread for that CPU. +Initially, callouts are assigned to CPU 0. Softclock threads are not pinned to their respective CPUs by default. The softclock thread for CPU 0 can be pinned to CPU 0 by setting the .Va kern.pin_default_swi loader tunable to a non-zero value. Softclock threads for CPUs other than zero can be pinned to their respective CPUs by setting the .Va kern.pin_pcpu_swi loader tunable to a non-zero value. -.Pp -The macros -.Fn callout_pending , -.Fn callout_active -and -.Fn callout_deactivate -provide access to the current state of the callout. -The -.Fn callout_pending -macro checks whether a callout is -.Em pending ; -a callout is considered -.Em pending -when a timeout has been set but the time has not yet arrived. -Note that once the timeout time arrives and the callout subsystem -starts to process this callout, -.Fn callout_pending -will return -.Dv FALSE -even though the callout function may not have finished -.Pq or even begun -executing. -The -.Fn callout_active -macro checks whether a callout is marked as -.Em active , -and the -.Fn callout_deactivate -macro clears the callout's -.Em active -flag. -The callout subsystem marks a callout as -.Em active -when a timeout is set and it clears the -.Em active -flag in -.Fn callout_stop -and -.Fn callout_drain , -but it -.Em does not -clear it when a callout expires normally via the execution of the -callout function. -.Ss "Avoiding Race Conditions" +.Sh "AVOIDING RACE CONDITIONS" The callout subsystem invokes callout functions from its own thread context. Without some kind of synchronization, it is possible that a callout function will be invoked concurrently with an attempt to stop or reset the callout by another thread. In particular, since callout functions typically acquire a lock as their first action, the callout function may have already been invoked, but is blocked waiting for that lock at the time that another thread tries to reset or stop the callout. .Pp There are three main techniques for addressing these synchronization concerns. The first approach is preferred as it is the simplest: .Bl -enum -offset indent .It Callouts can be associated with a specific lock when they are initialized by .Fn callout_init_mtx , .Fn callout_init_rm , or .Fn callout_init_rw . When a callout is associated with a lock, the callout subsystem acquires the lock before the callout function is invoked. This allows the callout subsystem to transparently handle races between callout cancellation, scheduling, and execution. Note that the associated lock must be acquired before calling .Fn callout_stop or one of the .Fn callout_reset or .Fn callout_schedule functions to provide this safety. .Pp A callout initialized via .Fn callout_init with .Fa mpsafe set to zero is implicitly associated with the .Va Giant mutex. If .Va Giant is held when cancelling or rescheduling the callout, then its use will prevent races with the callout function. .It The return value from .Fn callout_stop .Po or the .Fn callout_reset and .Fn callout_schedule function families .Pc indicates whether or not the callout was removed. If it is known that the callout was set and the callout function has -not yet executed, then a return value of -.Dv FALSE -indicates that the callout function is about to be called. +not yet executed, then a return value of zero indicates that the +callout function is about to be called. For example: .Bd -literal -offset indent if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) { if (callout_stop(&sc->sc_callout)) { sc->sc_flags &= ~SCFLG_CALLOUT_RUNNING; /* successfully stopped */ } else { /* * callout has expired and callout * function is about to be executed */ } } .Ed .It The .Fn callout_pending , .Fn callout_active and .Fn callout_deactivate macros can be used together to work around the race conditions. When a callout's timeout is set, the callout subsystem marks the callout as both .Em active and .Em pending . When the timeout time arrives, the callout subsystem begins processing the callout by first clearing the .Em pending flag. It then invokes the callout function without changing the .Em active flag, and does not clear the .Em active flag even after the callout function returns. The mechanism described here requires the callout function itself to clear the .Em active flag using the .Fn callout_deactivate macro. The .Fn callout_stop and .Fn callout_drain functions always clear both the .Em active and .Em pending flags before returning. .Pp The callout function should first check the .Em pending flag and return without action if .Fn callout_pending -returns -.Dv TRUE . +returns non-zero. This indicates that the callout was rescheduled using .Fn callout_reset just before the callout function was invoked. If .Fn callout_active -returns -.Dv FALSE -then the callout function should also return without action. +returns zero then the callout function should also return without +action. This indicates that the callout has been stopped. Finally, the callout function should call .Fn callout_deactivate to clear the .Em active flag. For example: .Bd -literal -offset indent mtx_lock(&sc->sc_mtx); if (callout_pending(&sc->sc_callout)) { /* callout was reset */ mtx_unlock(&sc->sc_mtx); return; } if (!callout_active(&sc->sc_callout)) { /* callout was stopped */ mtx_unlock(&sc->sc_mtx); return; } callout_deactivate(&sc->sc_callout); /* rest of callout function */ .Ed .Pp Together with appropriate synchronization, such as the mutex used above, this approach permits the .Fn callout_stop and .Fn callout_reset functions to be used at any time without races. For example: .Bd -literal -offset indent mtx_lock(&sc->sc_mtx); callout_stop(&sc->sc_callout); /* The callout is effectively stopped now. */ .Ed .Pp If the callout is still pending then these functions operate normally, but if processing of the callout has already begun then the tests in the callout function cause it to return without further action. Synchronization between the callout function and other code ensures that stopping or resetting the callout will never be attempted while the callout function is past the .Fn callout_deactivate call. .Pp The above technique additionally ensures that the .Em active flag always reflects whether the callout is effectively enabled or disabled. If .Fn callout_active returns false, then the callout is effectively disabled, since even if the callout subsystem is actually just about to invoke the callout function, the callout function will return without action. .El .Pp There is one final race condition that must be considered when a callout is being stopped for the last time. In this case it may not be safe to let the callout function itself detect that the callout was stopped, since it may need to access data objects that have already been destroyed or recycled. To ensure that the callout is completely finished, a call to .Fn callout_drain should be used. In particular, a callout should always be drained prior to destroying its associated lock or releasing the storage for the callout structure. .Sh LEGACY API .Bf Sy -The functions below are a legacy API that will be removed in a future release. -New code should not use these routines. -.Ef -.Pp -The function +The .Fn timeout -schedules a call to the function given by the argument -.Fa func -to take place after -.Fa ticks Ns No /hz -seconds. -Non-positive values of -.Fa ticks -are silently converted to the value -.Sq 1 . -.Fa func -should be a pointer to a function that takes a -.Fa void * -argument. -Upon invocation, -.Fa func -will receive -.Fa arg -as its only argument. -The return value from -.Fn timeout -is a -.Ft struct callout_handle -which can be used in conjunction with the -.Fn untimeout -function to request that a scheduled timeout be canceled. -.Pp -The function -.Fn callout_handle_init -can be used to initialize a handle to a state which will cause -any calls to -.Fn untimeout -with that handle to return with no side -effects. -.Pp -Assigning a callout handle the value of -.Fn CALLOUT_HANDLE_INITIALIZER -performs the same function as -.Fn callout_handle_init -and is provided for use on statically declared or global callout handles. -.Pp -The function -.Fn untimeout -cancels the timeout associated with -.Fa handle -using the -.Fa func and -.Fa arg -arguments to validate the handle. -If the handle does not correspond to a timeout with -the function -.Fa func -taking the argument -.Fa arg -no action is taken. -.Fa handle -must be initialized by a previous call to -.Fn timeout , -.Fn callout_handle_init , -or assigned the value of -.Fn CALLOUT_HANDLE_INITIALIZER "&handle" -before being passed to -.Fn untimeout . -The behavior of calling .Fn untimeout -with an uninitialized handle -is undefined. -.Pp -As handles are recycled by the system, it is possible (although unlikely) -that a handle from one invocation of -.Fn timeout -may match the handle of another invocation of -.Fn timeout -if both calls used the same function pointer and argument, and the first -timeout is expired or canceled before the second call. -The timeout facility offers O(1) running time for -.Fn timeout -and -.Fn untimeout . -Timeouts are executed from -.Fn softclock -with the -.Va Giant -lock held. -Thus they are protected from re-entrancy. -.Sh RETURN VALUES -The -.Fn callout_active -macro returns the state of a callout's -.Em active -flag. -.Pp -The -.Fn callout_pending -macro returns the state of a callout's -.Em pending -flag. -.Pp -The -.Fn callout_reset -and -.Fn callout_schedule -function families return non-zero if the callout was pending before the new -function invocation was scheduled. -.Pp -The -.Fn callout_stop -and -.Fn callout_drain -functions return non-zero if the callout was still pending when it was -called or zero otherwise. -The -.Fn timeout -function returns a -.Ft struct callout_handle -that can be passed to -.Fn untimeout . +functions are a legacy API that will be removed in a future release. +New code should not use these routines. +.Ef .Sh HISTORY The current timeout and untimeout routines are based on the work of .An Adam M. Costello and .An George Varghese , published in a technical report entitled .%T "Redesigning the BSD Callout and Timer Facilities" and modified slightly for inclusion in .Fx by .An Justin T. Gibbs . The original work on the data structures used in this implementation was published by .An G. Varghese and .An A. Lauck in the paper .%T "Hashed and Hierarchical Timing Wheels: Data Structures for the Efficient Implementation of a Timer Facility" in the .%B "Proceedings of the 11th ACM Annual Symposium on Operating Systems Principles" . The current implementation replaces the long standing .Bx linked list callout mechanism which offered O(n) insertion and removal running time -but did not generate or require handles for untimeout operations. +and did not generate or require handles for untimeout operations. Index: projects/hps_head/sys/kern/init_main.c =================================================================== --- projects/hps_head/sys/kern/init_main.c (revision 282413) +++ projects/hps_head/sys/kern/init_main.c (revision 282414) @@ -1,867 +1,868 @@ /*- * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread thread0 __aligned(16); struct vmspace vmspace0; struct proc *initproc; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { register struct sysinit **sipp; /* system initialization*/ register struct sysinit **xipp; /* interior loop of sort*/ register struct sysinit *save; /* bubble*/ #if defined(VERBOSE_SYSINIT) int last; int verbose; #endif if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } #if defined(VERBOSE_SYSINIT) last = SI_SUB_COPYRIGHT; verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } /* *************************************************************************** **** **** The following SYSINIT's belong elsewhere, but have not yet **** been moved. **** *************************************************************************** */ static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused, struct syscall_args *sa __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, .sv_mask = 0, .sv_sigsize = 0, .sv_sigtbl = NULL, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_prepsyscall = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, }; /* *************************************************************************** **** **** The two following SYSINIT's are proc0 specific glue code. I am not **** convinced that they can not be safely combined, but their order of **** operation has been maintained as the same as the original init_main.c **** for right now. **** **** These probably belong in init_proc.c or kern_proc.c, since they **** deal with proc0 (the fork template process). **** *************************************************************************** */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM; p->p_flag2 = 0; p->p_state = PRS_NORMAL; knlist_init_mtx(&p->p_klist, &p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = 0; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_prison = &prison0; newcred->cr_loginclass = loginclass_find("default"); proc_set_cred_init(p, newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif td->td_ucred = crhold(newcred); /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_fd = fdinit(NULL, false); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ pmap_pinit0(vmspace_pmap(&vmspace0)); p->p_vmspace = &vmspace0; vmspace0.vm_refcnt = 1; /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_INVOKE(process_init, p); EVENTHANDLER_INVOKE(thread_init, td); EVENTHANDLER_INVOKE(process_ctor, p); EVENTHANDLER_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct timespec ts; struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ PROC_STATUNLOCK(p); p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); /* * Give the ``random'' number generator a thump. */ nanotime(&ts); srandom(ts.tv_sec ^ ts.tv_nsec); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); static void random_init(void *dummy __unused) { /* * After CPU has been started we have some randomness on most * platforms via get_cyclecount(). For platforms that don't * we will reseed random(9) in proc0_post() as well. */ srandom(get_cyclecount()); } SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* *************************************************************************** **** **** The following code probably belongs in another file, like **** kern/init_init.c. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { vm_offset_t addr; struct execve_args args; int options, error; char *var, *path, *next, *s; char *ucp, **uap, *arg0, *arg1; struct thread *td; struct proc *p; mtx_lock(&Giant); GIANT_REQUIRED; td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); /* * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = p->p_sysent->sv_usrstack - PAGE_SIZE; if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } for (path = init_path; *path != '\0'; path = next) { while (*path == ':') path++; if (*path == '\0') break; for (next = path; *next != '\0' && *next != ':'; next++) /* nothing */ ; if (bootverbose) printf("start_init: trying %.*s\n", (int)(next - path), path); /* * Move out the boot flag argument. */ options = 0; ucp = (char *)p->p_sysent->sv_usrstack; (void)subyte(--ucp, 0); /* trailing zero */ if (boothowto & RB_SINGLE) { (void)subyte(--ucp, 's'); options = 1; } #ifdef notyet if (boothowto & RB_FASTBOOT) { (void)subyte(--ucp, 'f'); options = 1; } #endif #ifdef BOOTCDROM (void)subyte(--ucp, 'C'); options = 1; #endif if (options == 0) (void)subyte(--ucp, '-'); (void)subyte(--ucp, '-'); /* leading hyphen */ arg1 = ucp; /* * Move out the file name (also arg 0). */ (void)subyte(--ucp, 0); for (s = next - 1; s >= path; s--) (void)subyte(--ucp, *s); arg0 = ucp; /* * Move out the arg pointers. */ uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1)); (void)suword((caddr_t)--uap, (long)0); /* terminator */ (void)suword((caddr_t)--uap, (long)(intptr_t)arg1); (void)suword((caddr_t)--uap, (long)(intptr_t)arg0); /* * Point at the arguments. */ args.fname = arg0; args.argv = uap; args.envv = NULL; /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ if ((error = sys_execve(td, &args)) == 0) { mtx_unlock(&Giant); return; } if (error != ENOENT) printf("exec %.*s: error %d\n", (int)(next - path), path, error); } printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in it's own address space. * We do this early to reserve pid 1. * * Note special case - do not make it runnable yet. Other work * in progress will change this more. */ static void create_init(const void *udata __unused) { struct ucred *newcred, *oldcred; int error; error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc, NULL, 0); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling); oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); thread_unlock(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); Index: projects/hps_head/sys/kern/kern_clocksource.c =================================================================== --- projects/hps_head/sys/kern/kern_clocksource.c (revision 282413) +++ projects/hps_head/sys/kern/kern_clocksource.c (revision 282414) @@ -1,949 +1,952 @@ /*- * Copyright (c) 2010-2013 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Common routines to manage event timers hardware. */ #include "opt_device_polling.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int cpu_deepest_sleep = 0; /* Deepest Cx state available. */ int cpu_disable_c2_sleep = 0; /* Timer dies in C2. */ int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */ static void setuptimer(void); static void loadtimer(sbintime_t now, int first); static int doconfigtimer(void); static void configtimer(int start); static int round_freq(struct eventtimer *et, int freq); static sbintime_t getnextcpuevent(int idle); static sbintime_t getnextevent(void); static int handleevents(sbintime_t now, int fake); static struct mtx et_hw_mtx; #define ET_HW_LOCK(state) \ { \ if (timer->et_flags & ET_FLAGS_PERCPU) \ mtx_lock_spin(&(state)->et_hw_mtx); \ else \ mtx_lock_spin(&et_hw_mtx); \ } #define ET_HW_UNLOCK(state) \ { \ if (timer->et_flags & ET_FLAGS_PERCPU) \ mtx_unlock_spin(&(state)->et_hw_mtx); \ else \ mtx_unlock_spin(&et_hw_mtx); \ } static struct eventtimer *timer = NULL; static sbintime_t timerperiod; /* Timer period for periodic mode. */ static sbintime_t statperiod; /* statclock() events period. */ static sbintime_t profperiod; /* profclock() events period. */ static sbintime_t nexttick; /* Next global timer tick time. */ static u_int busy = 1; /* Reconfiguration is in progress. */ static int profiling; /* Profiling events enabled. */ static char timername[32]; /* Wanted timer. */ TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername)); static int singlemul; /* Multiplier for periodic mode. */ SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RWTUN, &singlemul, 0, "Multiplier for periodic mode"); static u_int idletick; /* Run periodic events when idle. */ SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick, 0, "Run periodic events when idle"); static int periodic; /* Periodic or one-shot mode. */ static int want_periodic; /* What mode to prefer. */ TUNABLE_INT("kern.eventtimer.periodic", &want_periodic); struct pcpu_state { struct mtx et_hw_mtx; /* Per-CPU timer mutex. */ u_int action; /* Reconfiguration requests. */ u_int handle; /* Immediate handle resuests. */ sbintime_t now; /* Last tick time. */ sbintime_t nextevent; /* Next scheduled event on this CPU. */ sbintime_t nexttick; /* Next timer tick time. */ sbintime_t nexthard; /* Next hardlock() event. */ sbintime_t nextstat; /* Next statclock() event. */ sbintime_t nextprof; /* Next profclock() event. */ sbintime_t nextcall; /* Next callout event. */ sbintime_t nextcallopt; /* Next optional callout event. */ int ipi; /* This CPU needs IPI. */ int idle; /* This CPU is in idle mode. */ }; static DPCPU_DEFINE(struct pcpu_state, timerstate); DPCPU_DEFINE(sbintime_t, hardclocktime); /* * Timer broadcast IPI handler. */ int hardclockintr(void) { sbintime_t now; struct pcpu_state *state; int done; if (doconfigtimer() || busy) return (FILTER_HANDLED); state = DPCPU_PTR(timerstate); now = state->now; CTR3(KTR_SPARE2, "ipi at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = handleevents(now, 0); return (done ? FILTER_HANDLED : FILTER_STRAY); } /* * Handle all events for specified time on this CPU */ static int handleevents(sbintime_t now, int fake) { sbintime_t t, *hct; struct trapframe *frame; struct pcpu_state *state; int usermode; int done, runs; + KASSERT(curthread->td_critnest != 0, + ("Must be in a critical section")); + CTR3(KTR_SPARE2, "handle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = 0; if (fake) { frame = NULL; usermode = 0; } else { frame = curthread->td_intr_frame; usermode = TRAPF_USERMODE(frame); } state = DPCPU_PTR(timerstate); runs = 0; while (now >= state->nexthard) { state->nexthard += tick_sbt; runs++; } if (runs) { hct = DPCPU_PTR(hardclocktime); *hct = state->nexthard - tick_sbt; if (fake < 2) { hardclock_cnt(runs, usermode); done = 1; } } runs = 0; while (now >= state->nextstat) { state->nextstat += statperiod; runs++; } if (runs && fake < 2) { statclock_cnt(runs, usermode); done = 1; } if (profiling) { runs = 0; while (now >= state->nextprof) { state->nextprof += profperiod; runs++; } if (runs && !fake) { profclock_cnt(runs, usermode, TRAPF_PC(frame)); done = 1; } } else state->nextprof = state->nextstat; if (now >= state->nextcallopt) { state->nextcall = state->nextcallopt = SBT_MAX; callout_process(now); } t = getnextcpuevent(0); ET_HW_LOCK(state); if (!busy) { state->idle = 0; state->nextevent = t; loadtimer(now, (fake == 2) && (timer->et_flags & ET_FLAGS_PERCPU)); } ET_HW_UNLOCK(state); return (done); } /* * Schedule binuptime of the next event on current CPU. */ static sbintime_t getnextcpuevent(int idle) { sbintime_t event; struct pcpu_state *state; u_int hardfreq; state = DPCPU_PTR(timerstate); /* Handle hardclock() events, skipping some if CPU is idle. */ event = state->nexthard; if (idle) { hardfreq = (u_int)hz / 2; if (tc_min_ticktock_freq > 2 #ifdef SMP && curcpu == CPU_FIRST() #endif ) hardfreq = hz / tc_min_ticktock_freq; if (hardfreq > 1) event += tick_sbt * (hardfreq - 1); } /* Handle callout events. */ if (event > state->nextcall) event = state->nextcall; if (!idle) { /* If CPU is active - handle other types of events. */ if (event > state->nextstat) event = state->nextstat; if (profiling && event > state->nextprof) event = state->nextprof; } return (event); } /* * Schedule binuptime of the next event on all CPUs. */ static sbintime_t getnextevent(void) { struct pcpu_state *state; sbintime_t event; #ifdef SMP int cpu; #endif int c; state = DPCPU_PTR(timerstate); event = state->nextevent; c = -1; #ifdef SMP if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) { CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); if (event > state->nextevent) { event = state->nextevent; c = cpu; } } } #endif CTR4(KTR_SPARE2, "next at %d: next %d.%08x by %d", curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c); return (event); } /* Hardware timer callback function. */ static void timercb(struct eventtimer *et, void *arg) { sbintime_t now; sbintime_t *next; struct pcpu_state *state; #ifdef SMP int cpu, bcast; #endif /* Do not touch anything if somebody reconfiguring timers. */ if (busy) return; /* Update present and next tick times. */ state = DPCPU_PTR(timerstate); if (et->et_flags & ET_FLAGS_PERCPU) { next = &state->nexttick; } else next = &nexttick; now = sbinuptime(); if (periodic) *next = now + timerperiod; else *next = -1; /* Next tick is not scheduled yet. */ state->now = now; CTR3(KTR_SPARE2, "intr at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); #ifdef SMP /* Prepare broadcasting to other CPUs for non-per-CPU timers. */ bcast = 0; if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) { CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); state->now = now; if (now >= state->nextevent) { state->nextevent += SBT_1S; if (curcpu != cpu) { state->ipi = 1; bcast = 1; } } ET_HW_UNLOCK(state); } } #endif /* Handle events for this time on this CPU. */ handleevents(now, 0); #ifdef SMP /* Broadcast interrupt to other CPUs for non-per-CPU timers. */ if (bcast) { CPU_FOREACH(cpu) { if (curcpu == cpu) continue; state = DPCPU_ID_PTR(cpu, timerstate); if (state->ipi) { state->ipi = 0; ipi_cpu(cpu, IPI_HARDCLOCK); } } } #endif } /* * Load new value into hardware timer. */ static void loadtimer(sbintime_t now, int start) { struct pcpu_state *state; sbintime_t new; sbintime_t *next; uint64_t tmp; int eq; if (timer->et_flags & ET_FLAGS_PERCPU) { state = DPCPU_PTR(timerstate); next = &state->nexttick; } else next = &nexttick; if (periodic) { if (start) { /* * Try to start all periodic timers aligned * to period to make events synchronous. */ tmp = now % timerperiod; new = timerperiod - tmp; if (new < tmp) /* Left less then passed. */ new += timerperiod; CTR5(KTR_SPARE2, "load p at %d: now %d.%08x first in %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff), (int)(new >> 32), (u_int)(new & 0xffffffff)); *next = new + now; et_start(timer, new, timerperiod); } } else { new = getnextevent(); eq = (new == *next); CTR4(KTR_SPARE2, "load at %d: next %d.%08x eq %d", curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq); if (!eq) { *next = new; et_start(timer, new - now, 0); } } } /* * Prepare event timer parameters after configuration changes. */ static void setuptimer(void) { int freq; if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) periodic = 0; else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) periodic = 1; singlemul = MIN(MAX(singlemul, 1), 20); freq = hz * singlemul; while (freq < (profiling ? profhz : stathz)) freq += hz; freq = round_freq(timer, freq); timerperiod = SBT_1S / freq; } /* * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler. */ static int doconfigtimer(void) { sbintime_t now; struct pcpu_state *state; state = DPCPU_PTR(timerstate); switch (atomic_load_acq_int(&state->action)) { case 1: now = sbinuptime(); ET_HW_LOCK(state); loadtimer(now, 1); ET_HW_UNLOCK(state); state->handle = 0; atomic_store_rel_int(&state->action, 0); return (1); case 2: ET_HW_LOCK(state); et_stop(timer); ET_HW_UNLOCK(state); state->handle = 0; atomic_store_rel_int(&state->action, 0); return (1); } if (atomic_readandclear_int(&state->handle) && !busy) { now = sbinuptime(); handleevents(now, 0); return (1); } return (0); } /* * Reconfigure specified timer. * For per-CPU timers use IPI to make other CPUs to reconfigure. */ static void configtimer(int start) { sbintime_t now, next; struct pcpu_state *state; int cpu; if (start) { setuptimer(); now = sbinuptime(); } else now = 0; critical_enter(); ET_HW_LOCK(DPCPU_PTR(timerstate)); if (start) { /* Initialize time machine parameters. */ next = now + timerperiod; if (periodic) nexttick = next; else nexttick = -1; CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); state->now = now; if (!smp_started && cpu != CPU_FIRST()) state->nextevent = SBT_MAX; else state->nextevent = next; if (periodic) state->nexttick = next; else state->nexttick = -1; state->nexthard = next; state->nextstat = next; state->nextprof = next; state->nextcall = next; state->nextcallopt = next; hardclock_sync(cpu); } busy = 0; /* Start global timer or per-CPU timer of this CPU. */ loadtimer(now, 1); } else { busy = 1; /* Stop global timer or per-CPU timer of this CPU. */ et_stop(timer); } ET_HW_UNLOCK(DPCPU_PTR(timerstate)); #ifdef SMP /* If timer is global or there is no other CPUs yet - we are done. */ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) { critical_exit(); return; } /* Set reconfigure flags for other CPUs. */ CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); atomic_store_rel_int(&state->action, (cpu == curcpu) ? 0 : ( start ? 1 : 2)); } /* Broadcast reconfigure IPI. */ ipi_all_but_self(IPI_HARDCLOCK); /* Wait for reconfiguration completed. */ restart: cpu_spinwait(); CPU_FOREACH(cpu) { if (cpu == curcpu) continue; state = DPCPU_ID_PTR(cpu, timerstate); if (atomic_load_acq_int(&state->action)) goto restart; } #endif critical_exit(); } /* * Calculate nearest frequency supported by hardware timer. */ static int round_freq(struct eventtimer *et, int freq) { uint64_t div; if (et->et_frequency != 0) { div = lmax((et->et_frequency + freq / 2) / freq, 1); if (et->et_flags & ET_FLAGS_POW2DIV) div = 1 << (flsl(div + div / 2) - 1); freq = (et->et_frequency + div / 2) / div; } if (et->et_min_period > SBT_1S) panic("Event timer \"%s\" doesn't support sub-second periods!", et->et_name); else if (et->et_min_period != 0) freq = min(freq, SBT2FREQ(et->et_min_period)); if (et->et_max_period < SBT_1S && et->et_max_period != 0) freq = max(freq, SBT2FREQ(et->et_max_period)); return (freq); } /* * Configure and start event timers (BSP part). */ void cpu_initclocks_bsp(void) { struct pcpu_state *state; int base, div, cpu; mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN); CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN); state->nextcall = SBT_MAX; state->nextcallopt = SBT_MAX; } periodic = want_periodic; /* Grab requested timer or the best of present. */ if (timername[0]) timer = et_find(timername, 0, 0); if (timer == NULL && periodic) { timer = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); } if (timer == NULL) { timer = et_find(NULL, ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT); } if (timer == NULL && !periodic) { timer = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); } if (timer == NULL) panic("No usable event timer found!"); et_init(timer, timercb, NULL, NULL); /* Adapt to timer capabilities. */ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) periodic = 0; else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) periodic = 1; if (timer->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep++; /* * We honor the requested 'hz' value. * We want to run stathz in the neighborhood of 128hz. * We would like profhz to run as often as possible. */ if (singlemul <= 0 || singlemul > 20) { if (hz >= 1500 || (hz % 128) == 0) singlemul = 1; else if (hz >= 750) singlemul = 2; else singlemul = 4; } if (periodic) { base = round_freq(timer, hz * singlemul); singlemul = max((base + hz / 2) / hz, 1); hz = (base + singlemul / 2) / singlemul; if (base <= 128) stathz = base; else { div = base / 128; if (div >= singlemul && (div % singlemul) == 0) div++; stathz = base / div; } profhz = stathz; while ((profhz + stathz) <= 128 * 64) profhz += stathz; profhz = round_freq(timer, profhz); } else { hz = round_freq(timer, hz); stathz = round_freq(timer, 127); profhz = round_freq(timer, stathz * 64); } tick = 1000000 / hz; tick_sbt = SBT_1S / hz; tick_bt = sbttobt(tick_sbt); statperiod = SBT_1S / stathz; profperiod = SBT_1S / profhz; ET_LOCK(); configtimer(1); ET_UNLOCK(); } /* * Start per-CPU event timers on APs. */ void cpu_initclocks_ap(void) { sbintime_t now; struct pcpu_state *state; struct thread *td; state = DPCPU_PTR(timerstate); now = sbinuptime(); ET_HW_LOCK(state); state->now = now; hardclock_sync(curcpu); spinlock_enter(); ET_HW_UNLOCK(state); td = curthread; td->td_intr_nesting_level++; handleevents(state->now, 2); td->td_intr_nesting_level--; spinlock_exit(); } /* * Switch to profiling clock rates. */ void cpu_startprofclock(void) { ET_LOCK(); if (profiling == 0) { if (periodic) { configtimer(0); profiling = 1; configtimer(1); } else profiling = 1; } else profiling++; ET_UNLOCK(); } /* * Switch to regular clock rates. */ void cpu_stopprofclock(void) { ET_LOCK(); if (profiling == 1) { if (periodic) { configtimer(0); profiling = 0; configtimer(1); } else profiling = 0; } else profiling--; ET_UNLOCK(); } /* * Switch to idle mode (all ticks handled). */ sbintime_t cpu_idleclock(void) { sbintime_t now, t; struct pcpu_state *state; if (idletick || busy || (periodic && (timer->et_flags & ET_FLAGS_PERCPU)) #ifdef DEVICE_POLLING || curcpu == CPU_FIRST() #endif ) return (-1); state = DPCPU_PTR(timerstate); if (periodic) now = state->now; else now = sbinuptime(); CTR3(KTR_SPARE2, "idle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); t = getnextcpuevent(1); ET_HW_LOCK(state); state->idle = 1; state->nextevent = t; if (!periodic) loadtimer(now, 0); ET_HW_UNLOCK(state); return (MAX(t - now, 0)); } /* * Switch to active mode (skip empty ticks). */ void cpu_activeclock(void) { sbintime_t now; struct pcpu_state *state; struct thread *td; state = DPCPU_PTR(timerstate); if (state->idle == 0 || busy) return; if (periodic) now = state->now; else now = sbinuptime(); CTR3(KTR_SPARE2, "active at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); spinlock_enter(); td = curthread; td->td_intr_nesting_level++; handleevents(now, 1); td->td_intr_nesting_level--; spinlock_exit(); } /* * Change the frequency of the given timer. This changes et->et_frequency and * if et is the active timer it reconfigures the timer on all CPUs. This is * intended to be a private interface for the use of et_change_frequency() only. */ void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq) { ET_LOCK(); if (et == timer) { configtimer(0); et->et_frequency = newfreq; configtimer(1); } else et->et_frequency = newfreq; ET_UNLOCK(); } void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt) { struct pcpu_state *state; /* Do not touch anything if somebody reconfiguring timers. */ if (busy) return; CTR6(KTR_SPARE2, "new co at %d: on %d at %d.%08x - %d.%08x", curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff), (int)(bt >> 32), (u_int)(bt & 0xffffffff)); state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); /* * If there is callout time already set earlier -- do nothing. * This check may appear redundant because we check already in * callout_process() but this double check guarantees we're safe * with respect to race conditions between interrupts execution * and scheduling. */ state->nextcallopt = bt_opt; if (bt >= state->nextcall) goto done; state->nextcall = bt; /* If there is some other event set earlier -- do nothing. */ if (bt >= state->nextevent) goto done; state->nextevent = bt; /* If timer is periodic -- there is nothing to reprogram. */ if (periodic) goto done; /* If timer is global or of the current CPU -- reprogram it. */ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) { loadtimer(sbinuptime(), 0); done: ET_HW_UNLOCK(state); return; } /* Otherwise make other CPU to reprogram it. */ state->handle = 1; ET_HW_UNLOCK(state); #ifdef SMP ipi_cpu(cpu, IPI_HARDCLOCK); #endif } /* * Report or change the active event timers hardware. */ static int sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS) { char buf[32]; struct eventtimer *et; int error; ET_LOCK(); et = timer; snprintf(buf, sizeof(buf), "%s", et->et_name); ET_UNLOCK(); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); ET_LOCK(); et = timer; if (error != 0 || req->newptr == NULL || strcasecmp(buf, et->et_name) == 0) { ET_UNLOCK(); return (error); } et = et_find(buf, 0, 0); if (et == NULL) { ET_UNLOCK(); return (ENOENT); } configtimer(0); et_free(timer); if (et->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep++; if (timer->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep--; periodic = want_periodic; timer = et; et_init(timer, timercb, NULL, NULL); configtimer(1); ET_UNLOCK(); return (error); } SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer"); /* * Report or change the active event timer periodicity. */ static int sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS) { int error, val; val = periodic; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); ET_LOCK(); configtimer(0); periodic = want_periodic = val; configtimer(1); ET_UNLOCK(); return (error); } SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode"); #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(clocksource, db_show_clocksource) { struct pcpu_state *st; int c; CPU_FOREACH(c) { st = DPCPU_ID_PTR(c, timerstate); db_printf( "CPU %2d: action %d handle %d ipi %d idle %d\n" " now %#jx nevent %#jx (%jd)\n" " ntick %#jx (%jd) nhard %#jx (%jd)\n" " nstat %#jx (%jd) nprof %#jx (%jd)\n" " ncall %#jx (%jd) ncallopt %#jx (%jd)\n", c, st->action, st->handle, st->ipi, st->idle, (uintmax_t)st->now, (uintmax_t)st->nextevent, (uintmax_t)(st->nextevent - st->now) / tick_sbt, (uintmax_t)st->nexttick, (uintmax_t)(st->nexttick - st->now) / tick_sbt, (uintmax_t)st->nexthard, (uintmax_t)(st->nexthard - st->now) / tick_sbt, (uintmax_t)st->nextstat, (uintmax_t)(st->nextstat - st->now) / tick_sbt, (uintmax_t)st->nextprof, (uintmax_t)(st->nextprof - st->now) / tick_sbt, (uintmax_t)st->nextcall, (uintmax_t)(st->nextcall - st->now) / tick_sbt, (uintmax_t)st->nextcallopt, (uintmax_t)(st->nextcallopt - st->now) / tick_sbt); } } #endif Index: projects/hps_head/sys/kern/kern_condvar.c =================================================================== --- projects/hps_head/sys/kern/kern_condvar.c (revision 282413) +++ projects/hps_head/sys/kern/kern_condvar.c (revision 282414) @@ -1,457 +1,453 @@ /*- * Copyright (c) 2000 Jake Burkholder . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif /* * Common sanity checks for cv_wait* functions. */ #define CV_ASSERT(cvp, lock, td) do { \ KASSERT((td) != NULL, ("%s: td NULL", __func__)); \ KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ KASSERT((lock) != NULL, ("%s: lock NULL", __func__)); \ } while (0) /* * Initialize a condition variable. Must be called before use. */ void cv_init(struct cv *cvp, const char *desc) { cvp->cv_description = desc; cvp->cv_waiters = 0; } /* * Destroy a condition variable. The condition variable must be re-initialized * in order to be re-used. */ void cv_destroy(struct cv *cvp) { #ifdef INVARIANTS struct sleepqueue *sq; sleepq_lock(cvp); sq = sleepq_lookup(cvp); sleepq_release(cvp); KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__)); #endif } /* * Wait on a condition variable. The current thread is placed on the condition * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same * condition variable will resume the thread. The mutex is released before * sleeping and will be held on return. It is recommended that the mutex be * held when cv_signal or cv_broadcast are called. */ void _cv_wait(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; uintptr_t lock_state; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ return; } sleepq_lock(cvp); cvp->cv_waiters++; if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } } /* * Wait on a condition variable. This function differs from cv_wait by * not aquiring the mutex after condition variable was signaled. */ void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) { struct lock_class *class; struct thread *td; td = curthread; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); KASSERT(lock != &Giant.lock_object, ("cv_wait_unlock cannot be used with Giant")); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ class->lc_unlock(lock); return; } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); } /* * Wait on a condition variable, allowing interruption by signals. Return 0 if * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if * a signal was caught. If ERESTART is returned the system call should be * restarted if possible. */ int _cv_wait_sig(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; uintptr_t lock_state; int rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * procs or panic below, in case this is the idle process and * already asleep. */ return (0); } sleepq_lock(cvp); cvp->cv_waiters++; if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } rval = sleepq_wait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Wait on a condition variable for (at most) the value specified in sbt * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast, * EWOULDBLOCK if the timeout expires. */ int _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, sbintime_t pr, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state, rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); cvp->cv_waiters++; if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Wait on a condition variable for (at most) the value specified in sbt * argument, allowing interruption by signals. * Returns 0 if the thread was resumed by cv_signal or cv_broadcast, * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal * was caught. */ int _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, sbintime_t pr, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state, rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); cvp->cv_waiters++; if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Signal a condition variable, wakes up one waiting thread. Will also wakeup * the swapper if the process is not in memory, so that it can bring the * sleeping process in. Note that this may also result in additional threads * being made runnable. Should be called with the same mutex as was passed to * cv_wait held. */ void cv_signal(struct cv *cvp) { int wakeup_swapper; wakeup_swapper = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { cvp->cv_waiters--; wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0); } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } /* * Broadcast a signal to a condition variable. Wakes up all waiting threads. * Should be called with the same mutex as was passed to cv_wait held. */ void cv_broadcastpri(struct cv *cvp, int pri) { int wakeup_swapper; /* * XXX sleepq_broadcast pri argument changed from -1 meaning * no pri to 0 meaning no pri. */ wakeup_swapper = 0; if (pri == -1) pri = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { cvp->cv_waiters = 0; wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0); } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } Index: projects/hps_head/sys/kern/kern_lock.c =================================================================== --- projects/hps_head/sys/kern/kern_lock.c (revision 282413) +++ projects/hps_head/sys/kern/kern_lock.c (revision 282414) @@ -1,1551 +1,1553 @@ /*- * Copyright (c) 2008 Attilio Rao * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "opt_adaptive_lockmgrs.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #ifdef DEBUG_LOCKS #include #endif #include #include #include #ifdef DDB #include #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) == (LK_ADAPTIVE | LK_NOSHARE)); CTASSERT(LK_UNLOCKED == (LK_UNLOCKED & ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS))); #define SQ_EXCLUSIVE_QUEUE 0 #define SQ_SHARED_QUEUE 1 #ifndef INVARIANTS #define _lockmgr_assert(lk, what, file, line) #define TD_LOCKS_INC(td) #define TD_LOCKS_DEC(td) #else #define TD_LOCKS_INC(td) ((td)->td_locks++) #define TD_LOCKS_DEC(td) ((td)->td_locks--) #endif #define TD_SLOCKS_INC(td) ((td)->td_lk_slocks++) #define TD_SLOCKS_DEC(td) ((td)->td_lk_slocks--) #ifndef DEBUG_LOCKS #define STACK_PRINT(lk) #define STACK_SAVE(lk) #define STACK_ZERO(lk) #else #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack) #define STACK_SAVE(lk) stack_save(&(lk)->lk_stack) #define STACK_ZERO(lk) stack_zero(&(lk)->lk_stack) #endif #define LOCK_LOG2(lk, string, arg1, arg2) \ if (LOCK_LOG_TEST(&(lk)->lock_object, 0)) \ CTR2(KTR_LOCK, (string), (arg1), (arg2)) #define LOCK_LOG3(lk, string, arg1, arg2, arg3) \ if (LOCK_LOG_TEST(&(lk)->lock_object, 0)) \ CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3)) #define GIANT_DECLARE \ int _i = 0; \ WITNESS_SAVE_DECL(Giant) #define GIANT_RESTORE() do { \ if (_i > 0) { \ while (_i--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) #define GIANT_SAVE() do { \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ while (mtx_owned(&Giant)) { \ _i++; \ mtx_unlock(&Giant); \ } \ } \ } while (0) #define LK_CAN_SHARE(x, flags) \ (((x) & LK_SHARE) && \ (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \ (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) || \ (curthread->td_pflags & TDP_DEADLKTREAT))) #define LK_TRYOP(x) \ ((x) & LK_NOWAIT) #define LK_CAN_WITNESS(x) \ (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x)) #define LK_TRYWIT(x) \ (LK_TRYOP(x) ? LOP_TRYLOCK : 0) #define LK_CAN_ADAPT(lk, f) \ (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 && \ ((f) & LK_SLEEPFAIL) == 0) #define lockmgr_disowned(lk) \ (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC) #define lockmgr_xlocked(lk) \ (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread) static void assert_lockmgr(const struct lock_object *lock, int how); #ifdef DDB static void db_show_lockmgr(const struct lock_object *lock); #endif static void lock_lockmgr(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_lockmgr(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_lockmgr(struct lock_object *lock); struct lock_class lock_class_lockmgr = { .lc_name = "lockmgr", .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE, .lc_assert = assert_lockmgr, #ifdef DDB .lc_ddb_show = db_show_lockmgr, #endif .lc_lock = lock_lockmgr, .lc_unlock = unlock_lockmgr, #ifdef KDTRACE_HOOKS .lc_owner = owner_lockmgr, #endif }; #ifdef ADAPTIVE_LOCKMGRS static u_int alk_retries = 10; static u_int alk_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL, "lockmgr debugging"); SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, ""); SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, ""); #endif static __inline struct thread * lockmgr_xholder(const struct lock *lk) { uintptr_t x; x = lk->lk_lock; return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x)); } /* * It assumes sleepq_lock held and returns with this one unheld. * It also assumes the generic interlock is sane and previously checked. * If LK_INTERLOCK is specified the interlock is not reacquired after the * sleep. */ static __inline int sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk, const char *wmesg, int pri, int timo, int queue) { GIANT_DECLARE; struct lock_class *class; int catch, error; class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL; catch = pri & PCATCH; pri &= PRIMASK; error = 0; LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk, (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared"); if (flags & LK_INTERLOCK) class->lc_unlock(ilk); if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0) lk->lk_exslpfail++; GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ? SLEEPQ_INTERRUPTIBLE : 0), queue); - if ((flags & LK_TIMELOCK) && timo) + if ((flags & LK_TIMELOCK) && timo) { + sleepq_release(&lk->lock_object); sleepq_set_timeout(&lk->lock_object, timo); - + sleepq_lock(&lk->lock_object); + } /* * Decisional switch for real sleeping. */ if ((flags & LK_TIMELOCK) && timo && catch) error = sleepq_timedwait_sig(&lk->lock_object, pri); else if ((flags & LK_TIMELOCK) && timo) error = sleepq_timedwait(&lk->lock_object, pri); else if (catch) error = sleepq_wait_sig(&lk->lock_object, pri); else sleepq_wait(&lk->lock_object, pri); GIANT_RESTORE(); if ((flags & LK_SLEEPFAIL) && error == 0) error = ENOLCK; return (error); } static __inline int wakeupshlk(struct lock *lk, const char *file, int line) { uintptr_t v, x; u_int realexslp; int queue, wakeup_swapper; WITNESS_UNLOCK(&lk->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line); wakeup_swapper = 0; for (;;) { x = lk->lk_lock; /* * If there is more than one shared lock held, just drop one * and return. */ if (LK_SHARERS(x) > 1) { if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, x - LK_ONE_SHARER)) break; continue; } /* * If there are not waiters on the exclusive queue, drop the * lock quickly. */ if ((x & LK_ALL_WAITERS) == 0) { MPASS((x & ~LK_EXCLUSIVE_SPINNERS) == LK_SHARERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED)) break; continue; } /* * We should have a sharer with waiters, so enter the hard * path in order to handle wakeups correctly. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); v = LK_UNLOCKED; /* * If the lock has exclusive waiters, give them preference in * order to avoid deadlock with shared runners up. * If interruptible sleeps left the exclusive queue empty * avoid a starvation for the threads sleeping on the shared * queue by giving them precedence and cleaning up the * exclusive waiters bit anyway. * Please note that lk_exslpfail count may be lying about * the real number of waiters with the LK_SLEEPFAIL flag on * because they may be used in conjuction with interruptible * sleeps so lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) { if (lk->lk_exslpfail < realexslp) { lk->lk_exslpfail = 0; queue = SQ_EXCLUSIVE_QUEUE; v |= (x & LK_SHARED_WAITERS); } else { lk->lk_exslpfail = 0; LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); queue = SQ_SHARED_QUEUE; } } else { /* * Exclusive waiters sleeping with LK_SLEEPFAIL on * and using interruptible sleeps/timeout may have * left spourious lk_exslpfail counts on, so clean * it up anyway. */ lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; } if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x, v)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue); sleepq_release(&lk->lock_object); break; } lock_profile_release_lock(&lk->lock_object); TD_LOCKS_DEC(curthread); TD_SLOCKS_DEC(curthread); return (wakeup_swapper); } static void assert_lockmgr(const struct lock_object *lock, int what) { panic("lockmgr locks do not support assertions"); } static void lock_lockmgr(struct lock_object *lock, uintptr_t how) { panic("lockmgr locks do not support sleep interlocking"); } static uintptr_t unlock_lockmgr(struct lock_object *lock) { panic("lockmgr locks do not support sleep interlocking"); } #ifdef KDTRACE_HOOKS static int owner_lockmgr(const struct lock_object *lock, struct thread **owner) { panic("lockmgr locks do not support owner inquiring"); } #endif void lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags) { int iflags; MPASS((flags & ~LK_INIT_MASK) == 0); ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock, ("%s: lockmgr not aligned for %s: %p", __func__, wmesg, &lk->lk_lock)); iflags = LO_SLEEPABLE | LO_UPGRADABLE; if (flags & LK_CANRECURSE) iflags |= LO_RECURSABLE; if ((flags & LK_NODUP) == 0) iflags |= LO_DUPOK; if (flags & LK_NOPROFILE) iflags |= LO_NOPROFILE; if ((flags & LK_NOWITNESS) == 0) iflags |= LO_WITNESS; if (flags & LK_QUIET) iflags |= LO_QUIET; if (flags & LK_IS_VNODE) iflags |= LO_IS_VNODE; iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE); lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags); lk->lk_lock = LK_UNLOCKED; lk->lk_recurse = 0; lk->lk_exslpfail = 0; lk->lk_timo = timo; lk->lk_pri = pri; STACK_ZERO(lk); } /* * XXX: Gross hacks to manipulate external lock flags after * initialization. Used for certain vnode and buf locks. */ void lockallowshare(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags &= ~LK_NOSHARE; } void lockdisableshare(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags |= LK_NOSHARE; } void lockallowrecurse(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags |= LO_RECURSABLE; } void lockdisablerecurse(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags &= ~LO_RECURSABLE; } void lockdestroy(struct lock *lk) { KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held")); KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed")); KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters")); lock_destroy(&lk->lock_object); } int __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, const char *wmesg, int pri, int timo, const char *file, int line) { GIANT_DECLARE; struct lock_class *class; const char *iwmesg; uintptr_t tid, v, x; u_int op, realexslp; int error, ipri, itimo, queue, wakeup_swapper; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef ADAPTIVE_LOCKMGRS volatile struct thread *owner; u_int i, spintries = 0; #endif error = 0; tid = (uintptr_t)curthread; op = (flags & LK_TYPE_MASK); iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg; ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri; itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo; MPASS((flags & ~LK_TOTAL_MASK) == 0); KASSERT((op & (op - 1)) == 0, ("%s: Invalid requested operation @ %s:%d", __func__, file, line)); KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 || (op != LK_DOWNGRADE && op != LK_RELEASE), ("%s: Invalid flags in regard of the operation desired @ %s:%d", __func__, file, line)); KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL, ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d", __func__, file, line)); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread, lk->lock_object.lo_name, file, line)); class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL; if (panicstr != NULL) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); return (0); } if (lk->lock_object.lo_flags & LK_NOSHARE) { switch (op) { case LK_SHARED: op = LK_EXCLUSIVE; break; case LK_UPGRADE: case LK_TRYUPGRADE: case LK_DOWNGRADE: _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line); if (flags & LK_INTERLOCK) class->lc_unlock(ilk); return (0); } } wakeup_swapper = 0; switch (op) { case LK_SHARED: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, flags & LK_INTERLOCK ? ilk : NULL); for (;;) { x = lk->lk_lock; /* * If no other thread has an exclusive lock, or * no exclusive waiter is present, bump the count of * sharers. Since we have to preserve the state of * waiters, if we fail to acquire the shared lock * loop back and retry. */ if (LK_CAN_SHARE(x, flags)) { if (atomic_cmpset_acq_ptr(&lk->lk_lock, x, x + LK_ONE_SHARER)) break; continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is already held by curthread in * exclusive way avoid a deadlock. */ if (LK_HOLDER(x) == tid) { LOCK_LOG2(lk, "%s: %p already held in exclusive mode", __func__, lk); error = EDEADLK; break; } /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } #ifdef ADAPTIVE_LOCKMGRS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. We need a double-state handle here * because for a failed acquisition the lock can be * either held in exclusive mode or shared mode * (for the writer starvation avoidance technique). */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, lk, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "spinning", "lockname:\"%s\"", lk->lock_object.lo_name); /* * If we are holding also an interlock drop it * in order to avoid a deadlock if the lockmgr * owner is adaptively spinning on the * interlock itself. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); while (LK_HOLDER(lk->lk_lock) == (uintptr_t)owner && TD_IS_RUNNING(owner)) cpu_spinwait(); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(td), "running"); GIANT_RESTORE(); continue; } else if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) != 0 && LK_SHARERS(x) && spintries < alk_retries) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "spinning", "lockname:\"%s\"", lk->lock_object.lo_name); if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); spintries++; for (i = 0; i < alk_loops; i++) { if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, lk, spintries, i); x = lk->lk_lock; if ((x & LK_SHARE) == 0 || LK_CAN_SHARE(x, flags) != 0) break; cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(td), "running"); GIANT_RESTORE(); if (i != alk_loops) continue; } #endif /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock can be acquired in shared mode, try * again. */ if (LK_CAN_SHARE(x, flags)) { sleepq_release(&lk->lock_object); continue; } #ifdef ADAPTIVE_LOCKMGRS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owner) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&lk->lock_object); continue; } } #endif /* * Try to set the LK_SHARED_WAITERS flag. If we fail, * loop back and retry. */ if ((x & LK_SHARED_WAITERS) == 0) { if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x, x | LK_SHARED_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set shared waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * shared lock and the shared waiters flag is set, * we will sleep. */ error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo, SQ_SHARED_QUEUE); flags &= ~LK_INTERLOCK; if (error) { LOCK_LOG3(lk, "%s: interrupted sleep for %p with %d", __func__, lk, error); break; } LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line); WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); TD_SLOCKS_INC(curthread); STACK_SAVE(lk); } break; case LK_UPGRADE: case LK_TRYUPGRADE: _lockmgr_assert(lk, KA_SLOCKED, file, line); v = lk->lk_lock; x = v & LK_ALL_WAITERS; v &= LK_EXCLUSIVE_SPINNERS; /* * Try to switch from one shared lock to an exclusive one. * We need to preserve waiters flags during the operation. */ if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v, tid | x)) { LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file, line); WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_SLOCKS_DEC(curthread); break; } /* * In LK_TRYUPGRADE mode, do not drop the lock, * returning EBUSY instead. */ if (op == LK_TRYUPGRADE) { LOCK_LOG2(lk, "%s: %p failed the nowait upgrade", __func__, lk); error = EBUSY; break; } /* * We have been unable to succeed in upgrading, so just * give up the shared lock. */ wakeup_swapper |= wakeupshlk(lk, file, line); /* FALLTHROUGH */ case LK_EXCLUSIVE: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ? ilk : NULL); /* * If curthread already holds the lock and this one is * allowed to recurse, simply recurse on it. */ if (lockmgr_xlocked(lk)) { if ((flags & LK_CANRECURSE) == 0 && (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) { /* * If the lock is expected to not panic just * give up and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n", __func__, iwmesg, file, line); } lk->lk_recurse++; LOCK_LOG2(lk, "%s: %p recursing", __func__, lk); LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); break; } while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) { #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } #ifdef ADAPTIVE_LOCKMGRS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ x = lk->lk_lock; if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, lk, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "spinning", "lockname:\"%s\"", lk->lock_object.lo_name); /* * If we are holding also an interlock drop it * in order to avoid a deadlock if the lockmgr * owner is adaptively spinning on the * interlock itself. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); while (LK_HOLDER(lk->lk_lock) == (uintptr_t)owner && TD_IS_RUNNING(owner)) cpu_spinwait(); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(td), "running"); GIANT_RESTORE(); continue; } else if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) != 0 && LK_SHARERS(x) && spintries < alk_retries) { if ((x & LK_EXCLUSIVE_SPINNERS) == 0 && !atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_SPINNERS)) continue; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "spinning", "lockname:\"%s\"", lk->lock_object.lo_name); if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); spintries++; for (i = 0; i < alk_loops; i++) { if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, lk, spintries, i); if ((lk->lk_lock & LK_EXCLUSIVE_SPINNERS) == 0) break; cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(td), "running"); GIANT_RESTORE(); if (i != alk_loops) continue; } #endif /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock has been released while we spun on * the sleepqueue chain lock just try again. */ if (x == LK_UNLOCKED) { sleepq_release(&lk->lock_object); continue; } #ifdef ADAPTIVE_LOCKMGRS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owner) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&lk->lock_object); continue; } } #endif /* * The lock can be in the state where there is a * pending queue of waiters, but still no owner. * This happens when the lock is contested and an * owner is going to claim the lock. * If curthread is the one successfully acquiring it * claim lock ownership and return, preserving waiters * flags. */ v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); if ((x & ~v) == LK_UNLOCKED) { v &= ~LK_EXCLUSIVE_SPINNERS; if (atomic_cmpset_acq_ptr(&lk->lk_lock, x, tid | v)) { sleepq_release(&lk->lock_object); LOCK_LOG2(lk, "%s: %p claimed by a new writer", __func__, lk); break; } sleepq_release(&lk->lock_object); continue; } /* * Try to set the LK_EXCLUSIVE_WAITERS flag. If we * fail, loop back and retry. */ if ((x & LK_EXCLUSIVE_WAITERS) == 0) { if (!atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set excl waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * exclusive lock and the exclusive waiters flag * is set, we will sleep. */ error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo, SQ_EXCLUSIVE_QUEUE); flags &= ~LK_INTERLOCK; if (error) { LOCK_LOG3(lk, "%s: interrupted sleep for %p with %d", __func__, lk, error); break; } LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); STACK_SAVE(lk); } break; case LK_DOWNGRADE: _lockmgr_assert(lk, KA_XLOCKED, file, line); LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line); WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line); /* * Panic if the lock is recursed. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n", __func__, iwmesg, file, line); } TD_SLOCKS_INC(curthread); /* * In order to preserve waiters flags, just spin. */ for (;;) { x = lk->lk_lock; MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); x &= LK_ALL_WAITERS; if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x, LK_SHARERS_LOCK(1) | x)) break; cpu_spinwait(); } break; case LK_RELEASE: _lockmgr_assert(lk, KA_LOCKED, file, line); x = lk->lk_lock; if ((x & LK_SHARE) == 0) { /* * As first option, treact the lock as if it has not * any waiter. * Fix-up the tid var if the lock has been disowned. */ if (LK_HOLDER(x) == LK_KERNPROC) tid = LK_KERNPROC; else { WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_DEC(curthread); } LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); /* * The lock is held in exclusive mode. * If the lock is recursed also, then unrecurse it. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) { LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk); lk->lk_recurse--; break; } if (tid != LK_KERNPROC) lock_profile_release_lock(&lk->lock_object); if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) break; sleepq_lock(&lk->lock_object); x = lk->lk_lock; v = LK_UNLOCKED; /* * If the lock has exclusive waiters, give them * preference in order to avoid deadlock with * shared runners up. * If interruptible sleeps left the exclusive queue * empty avoid a starvation for the threads sleeping * on the shared queue by giving them precedence * and cleaning up the exclusive waiters bit anyway. * Please note that lk_exslpfail count may be lying * about the real number of waiters with the * LK_SLEEPFAIL flag on because they may be used in * conjuction with interruptible sleeps so * lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) { if (lk->lk_exslpfail < realexslp) { lk->lk_exslpfail = 0; queue = SQ_EXCLUSIVE_QUEUE; v |= (x & LK_SHARED_WAITERS); } else { lk->lk_exslpfail = 0; LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); queue = SQ_SHARED_QUEUE; } } else { /* * Exclusive waiters sleeping with LK_SLEEPFAIL * on and using interruptible sleeps/timeout * may have left spourious lk_exslpfail counts * on, so clean it up anyway. */ lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; } LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); atomic_store_rel_ptr(&lk->lk_lock, v); wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue); sleepq_release(&lk->lock_object); break; } else wakeup_swapper = wakeupshlk(lk, file, line); break; case LK_DRAIN: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ? ilk : NULL); /* * Trying to drain a lock we already own will result in a * deadlock. */ if (lockmgr_xlocked(lk)) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: draining %s with the lock held @ %s:%d\n", __func__, iwmesg, file, line); } while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) { #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock has been released while we spun on * the sleepqueue chain lock just try again. */ if (x == LK_UNLOCKED) { sleepq_release(&lk->lock_object); continue; } v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); if ((x & ~v) == LK_UNLOCKED) { v = (x & ~LK_EXCLUSIVE_SPINNERS); /* * If interruptible sleeps left the exclusive * queue empty avoid a starvation for the * threads sleeping on the shared queue by * giving them precedence and cleaning up the * exclusive waiters bit anyway. * Please note that lk_exslpfail count may be * lying about the real number of waiters with * the LK_SLEEPFAIL flag on because they may * be used in conjuction with interruptible * sleeps so lk_exslpfail might be considered * an 'upper limit' bound, including the edge * cases. */ if (v & LK_EXCLUSIVE_WAITERS) { queue = SQ_EXCLUSIVE_QUEUE; v &= ~LK_EXCLUSIVE_WAITERS; } else { /* * Exclusive waiters sleeping with * LK_SLEEPFAIL on and using * interruptible sleeps/timeout may * have left spourious lk_exslpfail * counts on, so clean it up anyway. */ MPASS(v & LK_SHARED_WAITERS); lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; v &= ~LK_SHARED_WAITERS; } if (queue == SQ_EXCLUSIVE_QUEUE) { realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if (lk->lk_exslpfail >= realexslp) { lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; v &= ~LK_SHARED_WAITERS; if (realexslp != 0) { LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast( &lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); } } else lk->lk_exslpfail = 0; } if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG3(lk, "%s: %p waking up all threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); wakeup_swapper |= sleepq_broadcast( &lk->lock_object, SLEEPQ_LK, 0, queue); /* * If shared waiters have been woken up we need * to wait for one of them to acquire the lock * before to set the exclusive waiters in * order to avoid a deadlock. */ if (queue == SQ_SHARED_QUEUE) { for (v = lk->lk_lock; (v & LK_SHARE) && !LK_SHARERS(v); v = lk->lk_lock) cpu_spinwait(); } } /* * Try to set the LK_EXCLUSIVE_WAITERS flag. If we * fail, loop back and retry. */ if ((x & LK_EXCLUSIVE_WAITERS) == 0) { if (!atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set drain waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * exclusive lock and the exclusive waiters flag * is set, we will sleep. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK, SQ_EXCLUSIVE_QUEUE); sleepq_wait(&lk->lock_object, ipri & PRIMASK); GIANT_RESTORE(); LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); STACK_SAVE(lk); } break; default: if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: unknown lockmgr request 0x%x\n", __func__, op); } if (flags & LK_INTERLOCK) class->lc_unlock(ilk); if (wakeup_swapper) kick_proc0(); return (error); } void _lockmgr_disown(struct lock *lk, const char *file, int line) { uintptr_t tid, x; if (SCHEDULER_STOPPED()) return; tid = (uintptr_t)curthread; _lockmgr_assert(lk, KA_XLOCKED, file, line); /* * Panic if the lock is recursed. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) panic("%s: disown a recursed lockmgr @ %s:%d\n", __func__, file, line); /* * If the owner is already LK_KERNPROC just skip the whole operation. */ if (LK_HOLDER(lk->lk_lock) != tid) return; lock_profile_release_lock(&lk->lock_object); LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line); WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_DEC(curthread); STACK_SAVE(lk); /* * In order to preserve waiters flags, just spin. */ for (;;) { x = lk->lk_lock; MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); x &= LK_ALL_WAITERS; if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x, LK_KERNPROC | x)) return; cpu_spinwait(); } } void lockmgr_printinfo(const struct lock *lk) { struct thread *td; uintptr_t x; if (lk->lk_lock == LK_UNLOCKED) printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name); else if (lk->lk_lock & LK_SHARE) printf("lock type %s: SHARED (count %ju)\n", lk->lock_object.lo_name, (uintmax_t)LK_SHARERS(lk->lk_lock)); else { td = lockmgr_xholder(lk); if (td == (struct thread *)LK_KERNPROC) printf("lock type %s: EXCL by KERNPROC\n", lk->lock_object.lo_name); else printf("lock type %s: EXCL by thread %p " "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name, td, td->td_proc->p_pid, td->td_proc->p_comm, td->td_tid); } x = lk->lk_lock; if (x & LK_EXCLUSIVE_WAITERS) printf(" with exclusive waiters pending\n"); if (x & LK_SHARED_WAITERS) printf(" with shared waiters pending\n"); if (x & LK_EXCLUSIVE_SPINNERS) printf(" with exclusive spinners pending\n"); STACK_PRINT(lk); } int lockstatus(const struct lock *lk) { uintptr_t v, x; int ret; ret = LK_SHARED; x = lk->lk_lock; v = LK_HOLDER(x); if ((x & LK_SHARE) == 0) { if (v == (uintptr_t)curthread || v == LK_KERNPROC) ret = LK_EXCLUSIVE; else ret = LK_EXCLOTHER; } else if (x == LK_UNLOCKED) ret = 0; return (ret); } #ifdef INVARIANT_SUPPORT FEATURE(invariant_support, "Support for modules compiled with INVARIANTS option"); #ifndef INVARIANTS #undef _lockmgr_assert #endif void _lockmgr_assert(const struct lock *lk, int what, const char *file, int line) { int slocked = 0; if (panicstr != NULL) return; switch (what) { case KA_SLOCKED: case KA_SLOCKED | KA_NOTRECURSED: case KA_SLOCKED | KA_RECURSED: slocked = 1; case KA_LOCKED: case KA_LOCKED | KA_NOTRECURSED: case KA_LOCKED | KA_RECURSED: #ifdef WITNESS /* * We cannot trust WITNESS if the lock is held in exclusive * mode and a call to lockmgr_disown() happened. * Workaround this skipping the check if the lock is held in * exclusive mode even for the KA_LOCKED case. */ if (slocked || (lk->lk_lock & LK_SHARE)) { witness_assert(&lk->lock_object, what, file, line); break; } #endif if (lk->lk_lock == LK_UNLOCKED || ((lk->lk_lock & LK_SHARE) == 0 && (slocked || (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))))) panic("Lock %s not %slocked @ %s:%d\n", lk->lock_object.lo_name, slocked ? "share" : "", file, line); if ((lk->lk_lock & LK_SHARE) == 0) { if (lockmgr_recursed(lk)) { if (what & KA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } else if (what & KA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } break; case KA_XLOCKED: case KA_XLOCKED | KA_NOTRECURSED: case KA_XLOCKED | KA_RECURSED: if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)) panic("Lock %s not exclusively locked @ %s:%d\n", lk->lock_object.lo_name, file, line); if (lockmgr_recursed(lk)) { if (what & KA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } else if (what & KA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); break; case KA_UNLOCKED: if (lockmgr_xlocked(lk) || lockmgr_disowned(lk)) panic("Lock %s exclusively locked @ %s:%d\n", lk->lock_object.lo_name, file, line); break; default: panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file, line); } } #endif #ifdef DDB int lockmgr_chain(struct thread *td, struct thread **ownerp) { struct lock *lk; lk = td->td_wchan; if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr) return (0); db_printf("blocked on lockmgr %s", lk->lock_object.lo_name); if (lk->lk_lock & LK_SHARE) db_printf("SHARED (count %ju)\n", (uintmax_t)LK_SHARERS(lk->lk_lock)); else db_printf("EXCL\n"); *ownerp = lockmgr_xholder(lk); return (1); } static void db_show_lockmgr(const struct lock_object *lock) { struct thread *td; const struct lock *lk; lk = (const struct lock *)lock; db_printf(" state: "); if (lk->lk_lock == LK_UNLOCKED) db_printf("UNLOCKED\n"); else if (lk->lk_lock & LK_SHARE) db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock)); else { td = lockmgr_xholder(lk); if (td == (struct thread *)LK_KERNPROC) db_printf("XLOCK: LK_KERNPROC\n"); else db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); if (lockmgr_recursed(lk)) db_printf(" recursed: %d\n", lk->lk_recurse); } db_printf(" waiters: "); switch (lk->lk_lock & LK_ALL_WAITERS) { case LK_SHARED_WAITERS: db_printf("shared\n"); break; case LK_EXCLUSIVE_WAITERS: db_printf("exclusive\n"); break; case LK_ALL_WAITERS: db_printf("shared and exclusive\n"); break; default: db_printf("none\n"); } db_printf(" spinners: "); if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS) db_printf("exclusive\n"); else db_printf("none\n"); } #endif Index: projects/hps_head/sys/kern/kern_switch.c =================================================================== --- projects/hps_head/sys/kern/kern_switch.c (revision 282413) +++ projects/hps_head/sys/kern/kern_switch.c (revision 282414) @@ -1,513 +1,511 @@ /*- * Copyright (c) 2001 Jake Burkholder * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include /* Uncomment this to enable logging of critical_enter/exit. */ #if 0 #define KTR_CRITICAL KTR_SCHED #else #define KTR_CRITICAL 0 #endif #ifdef FULL_PREEMPTION #ifndef PREEMPTION #error "The FULL_PREEMPTION option requires the PREEMPTION option" #endif #endif CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); /* * kern.sched.preemption allows user space to determine if preemption support * is compiled in or not. It is not currently a boot or runtime flag that * can be changed. */ #ifdef PREEMPTION static int kern_sched_preemption = 1; #else static int kern_sched_preemption = 0; #endif SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, &kern_sched_preemption, 0, "Kernel preemption enabled"); /* * Support for scheduler stats exported via kern.sched.stats. All stats may * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere * with SCHED_STAT_DEFINE(). */ #ifdef SCHED_STATS SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); /* Switch reasons from mi_switch(). */ DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]); SCHED_STAT_DEFINE_VAR(uncategorized, &DPCPU_NAME(sched_switch_stats[SWT_NONE]), ""); SCHED_STAT_DEFINE_VAR(preempt, &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), ""); SCHED_STAT_DEFINE_VAR(owepreempt, &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), ""); SCHED_STAT_DEFINE_VAR(turnstile, &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); SCHED_STAT_DEFINE_VAR(sleepq, &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); -SCHED_STAT_DEFINE_VAR(sleepqtimo, - &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); SCHED_STAT_DEFINE_VAR(relinquish, &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); SCHED_STAT_DEFINE_VAR(needresched, &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), ""); SCHED_STAT_DEFINE_VAR(idle, &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), ""); SCHED_STAT_DEFINE_VAR(iwait, &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), ""); SCHED_STAT_DEFINE_VAR(suspend, &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), ""); SCHED_STAT_DEFINE_VAR(remotepreempt, &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), ""); SCHED_STAT_DEFINE_VAR(remotewakeidle, &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), ""); static int sysctl_stats_reset(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *p; uintptr_t counter; int error; int val; int i; val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == 0) return (0); /* * Traverse the list of children of _kern_sched_stats and reset each * to 0. Skip the reset entry. */ SLIST_FOREACH(p, oidp->oid_parent, oid_link) { if (p == oidp || p->oid_arg1 == NULL) continue; counter = (uintptr_t)p->oid_arg1; CPU_FOREACH(i) { *(long *)(dpcpu_off[i] + counter) = 0; } } return (0); } SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); #endif /************************************************************************ * Functions that manipulate runnability from a thread perspective. * ************************************************************************/ /* * Select the thread that will be run next. */ struct thread * choosethread(void) { struct thread *td; retry: td = sched_choose(); /* * If we are in panic, only allow system threads, * plus the one we are running in, to be run. */ if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && (td->td_flags & TDF_INPANIC) == 0)) { /* note that it is no longer on the run queue */ TD_SET_CAN_RUN(td); goto retry; } TD_SET_RUNNING(td); return (td); } /* * Kernel thread preemption implementation. Critical sections mark * regions of code in which preemptions are not allowed. * * It might seem a good idea to inline critical_enter() but, in order * to prevent instructions reordering by the compiler, a __compiler_membar() * would have to be used here (the same as sched_pin()). The performance * penalty imposed by the membar could, then, produce slower code than * the function call itself, for most cases. */ void critical_enter(void) { struct thread *td; td = curthread; td->td_critnest++; CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, (long)td->td_proc->p_pid, td->td_name, td->td_critnest); } void critical_exit(void) { struct thread *td; int flags; td = curthread; KASSERT(td->td_critnest != 0, ("critical_exit: td_critnest == 0")); if (td->td_critnest == 1) { td->td_critnest = 0; if (td->td_owepreempt && !kdb_active) { td->td_critnest = 1; thread_lock(td); td->td_critnest--; flags = SW_INVOL | SW_PREEMPT; if (TD_IS_IDLETHREAD(td)) flags |= SWT_IDLE; else flags |= SWT_OWEPREEMPT; mi_switch(flags, NULL); thread_unlock(td); } } else td->td_critnest--; CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, (long)td->td_proc->p_pid, td->td_name, td->td_critnest); } /************************************************************************ * SYSTEM RUN QUEUE manipulations and tests * ************************************************************************/ /* * Initialize a run structure. */ void runq_init(struct runq *rq) { int i; bzero(rq, sizeof *rq); for (i = 0; i < RQ_NQS; i++) TAILQ_INIT(&rq->rq_queues[i]); } /* * Clear the status bit of the queue corresponding to priority level pri, * indicating that it is empty. */ static __inline void runq_clrbit(struct runq *rq, int pri) { struct rqbits *rqb; rqb = &rq->rq_status; CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", rqb->rqb_bits[RQB_WORD(pri)], rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), RQB_BIT(pri), RQB_WORD(pri)); rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); } /* * Find the index of the first non-empty run queue. This is done by * scanning the status bits, a set bit indicates a non-empty queue. */ static __inline int runq_findbit(struct runq *rq) { struct rqbits *rqb; int pri; int i; rqb = &rq->rq_status; for (i = 0; i < RQB_LEN; i++) if (rqb->rqb_bits[i]) { pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", rqb->rqb_bits[i], i, pri); return (pri); } return (-1); } static __inline int runq_findbit_from(struct runq *rq, u_char pri) { struct rqbits *rqb; rqb_word_t mask; int i; /* * Set the mask for the first word so we ignore priorities before 'pri'. */ mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); rqb = &rq->rq_status; again: for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { mask = rqb->rqb_bits[i] & mask; if (mask == 0) continue; pri = RQB_FFS(mask) + (i << RQB_L2BPW); CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", mask, i, pri); return (pri); } if (pri == 0) return (-1); /* * Wrap back around to the beginning of the list just once so we * scan the whole thing. */ pri = 0; goto again; } /* * Set the status bit of the queue corresponding to priority level pri, * indicating that it is non-empty. */ static __inline void runq_setbit(struct runq *rq, int pri) { struct rqbits *rqb; rqb = &rq->rq_status; CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", rqb->rqb_bits[RQB_WORD(pri)], rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), RQB_BIT(pri), RQB_WORD(pri)); rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); } /* * Add the thread to the queue specified by its priority, and set the * corresponding status bit. */ void runq_add(struct runq *rq, struct thread *td, int flags) { struct rqhead *rqh; int pri; pri = td->td_priority / RQ_PPQ; td->td_rqindex = pri; runq_setbit(rq, pri); rqh = &rq->rq_queues[pri]; CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p", td, td->td_priority, pri, rqh); if (flags & SRQ_PREEMPTED) { TAILQ_INSERT_HEAD(rqh, td, td_runq); } else { TAILQ_INSERT_TAIL(rqh, td, td_runq); } } void runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags) { struct rqhead *rqh; KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); td->td_rqindex = pri; runq_setbit(rq, pri); rqh = &rq->rq_queues[pri]; CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p", td, td->td_priority, pri, rqh); if (flags & SRQ_PREEMPTED) { TAILQ_INSERT_HEAD(rqh, td, td_runq); } else { TAILQ_INSERT_TAIL(rqh, td, td_runq); } } /* * Return true if there are runnable processes of any priority on the run * queue, false otherwise. Has no side effects, does not modify the run * queue structure. */ int runq_check(struct runq *rq) { struct rqbits *rqb; int i; rqb = &rq->rq_status; for (i = 0; i < RQB_LEN; i++) if (rqb->rqb_bits[i]) { CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", rqb->rqb_bits[i], i); return (1); } CTR0(KTR_RUNQ, "runq_check: empty"); return (0); } /* * Find the highest priority process on the run queue. */ struct thread * runq_choose_fuzz(struct runq *rq, int fuzz) { struct rqhead *rqh; struct thread *td; int pri; while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; /* fuzz == 1 is normal.. 0 or less are ignored */ if (fuzz > 1) { /* * In the first couple of entries, check if * there is one for our CPU as a preference. */ int count = fuzz; int cpu = PCPU_GET(cpuid); struct thread *td2; td2 = td = TAILQ_FIRST(rqh); while (count-- && td2) { if (td2->td_lastcpu == cpu) { td = td2; break; } td2 = TAILQ_NEXT(td2, td_runq); } } else td = TAILQ_FIRST(rqh); KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue")); CTR3(KTR_RUNQ, "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh); return (td); } CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); return (NULL); } /* * Find the highest priority process on the run queue. */ struct thread * runq_choose(struct runq *rq) { struct rqhead *rqh; struct thread *td; int pri; while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; td = TAILQ_FIRST(rqh); KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); CTR3(KTR_RUNQ, "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh); return (td); } CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri); return (NULL); } struct thread * runq_choose_from(struct runq *rq, u_char idx) { struct rqhead *rqh; struct thread *td; int pri; if ((pri = runq_findbit_from(rq, idx)) != -1) { rqh = &rq->rq_queues[pri]; td = TAILQ_FIRST(rqh); KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); CTR4(KTR_RUNQ, "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p", pri, td, td->td_rqindex, rqh); return (td); } CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri); return (NULL); } /* * Remove the thread from the queue specified by its priority, and clear the * corresponding status bit if the queue becomes empty. * Caller must set state afterwards. */ void runq_remove(struct runq *rq, struct thread *td) { runq_remove_idx(rq, td, NULL); } void runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx) { struct rqhead *rqh; u_char pri; KASSERT(td->td_flags & TDF_INMEM, ("runq_remove_idx: thread swapped out")); pri = td->td_rqindex; KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); rqh = &rq->rq_queues[pri]; CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p", td, td->td_priority, pri, rqh); TAILQ_REMOVE(rqh, td, td_runq); if (TAILQ_EMPTY(rqh)) { CTR0(KTR_RUNQ, "runq_remove_idx: empty"); runq_clrbit(rq, pri); if (idx != NULL && *idx == pri) *idx = (pri + 1) % RQ_NQS; } } Index: projects/hps_head/sys/kern/kern_synch.c =================================================================== --- projects/hps_head/sys/kern/kern_synch.c (revision 282413) +++ projects/hps_head/sys/kern/kern_synch.c (revision 282414) @@ -1,611 +1,618 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #define KTDSTATE(td) \ (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL); int hogticks; static uint8_t pause_wchan[MAXCPU]; static struct callout loadav_callout; struct loadavg averunnable = { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ static fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, ""); static void loadav(void *arg); SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE(sched, , , preempt); static void sleepinit(void *unused) { hogticks = (hz / 10) * 2; /* Default only. */ init_sleepqueues(); } /* * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure * it is available. */ SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, 0); /* * General sleep call. Suspends the current thread until a wakeup is * performed on the specified identifier. The thread will then be made * runnable with the specified priority. Sleeps at most sbt units of time * (0 means no timeout). If pri includes the PCATCH flag, let signals * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal becomes pending, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * * The lock argument is unlocked before the caller is suspended, and * re-locked before _sleep() returns. If priority includes the PDROP * flag the lock is not re-locked before returning. */ int _sleep(void *ident, struct lock_object *lock, int priority, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td; struct proc *p; struct lock_class *class; uintptr_t lock_state; int catch, pri, rval, sleepq_flags; WITNESS_SAVE_DECL(lock_witness); td = curthread; p = td->td_proc; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, wmesg); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); if (lock != NULL) class = LOCK_CLASS(lock); else class = NULL; if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); } catch = priority & PCATCH; pri = priority & PRIMASK; /* * If we are already on a sleep queue, then remove us from that * sleep queue first. We have to do this to handle recursive * sleeps. */ if (TD_ON_SLEEPQ(td)) sleepq_remove(td, td->td_wchan); if ((uint8_t *)ident >= &pause_wchan[0] && (uint8_t *)ident <= &pause_wchan[MAXCPU - 1]) sleepq_flags = SLEEPQ_PAUSE; else sleepq_flags = SLEEPQ_SLEEP; if (catch) sleepq_flags |= SLEEPQ_INTERRUPTIBLE; sleepq_lock(ident); CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(class->lc_flags & LC_SLEEPABLE)) { WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); } else /* GCC needs to follow the Yellow Brick Road */ lock_state = -1; /* * We put ourselves on the sleep queue and start our timeout * before calling thread_suspend_check, as we could stop there, * and a wakeup or a SIGCONT (or both) could occur while we were * stopped without resuming us. Thus, we must be ready for sleep * when cursig() is called. If the wakeup happens while we're * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ sleepq_add(ident, lock, wmesg, sleepq_flags, 0); - if (sbt != 0) - sleepq_set_timeout_sbt(ident, sbt, pr, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); + if (sbt != 0) + sleepq_set_timeout_sbt(ident, sbt, pr, flags); sleepq_lock(ident); + } else if (sbt != 0) { + sleepq_release(ident); + sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); } if (sbt != 0 && catch) rval = sleepq_timedwait_sig(ident, pri); else if (sbt != 0) rval = sleepq_timedwait(ident, pri); else if (catch) rval = sleepq_wait_sig(ident, pri); else { sleepq_wait(ident, pri); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } int msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td; struct proc *p; int rval; WITNESS_SAVE_DECL(mtx); td = curthread; p = td->td_proc; KASSERT(mtx != NULL, ("sleeping without a mutex")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ return (0); } sleepq_lock(ident); CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); DROP_GIANT(); mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); WITNESS_SAVE(&mtx->lock_object, mtx); mtx_unlock_spin(mtx); /* * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); - if (sbt != 0) + if (sbt != 0) { + sleepq_release(ident); sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); + } /* * Can't call ktrace with any spin locks held so it can lock the * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold * any spin lock. Thus, we have to drop the sleepq spin lock while * we handle those requests. This is safe since we have placed our * thread on the sleep queue already. */ #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) { sleepq_release(ident); ktrcsw(1, 0, wmesg); sleepq_lock(ident); } #endif #ifdef WITNESS sleepq_release(ident); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", wmesg); sleepq_lock(ident); #endif if (sbt != 0) rval = sleepq_timedwait(ident, 0); else { sleepq_wait(ident, 0); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); mtx_lock_spin(mtx); WITNESS_RESTORE(&mtx->lock_object, mtx); return (rval); } /* * pause() delays the calling thread by the given number of system ticks. * During cold bootup, pause() uses the DELAY() function instead of * the tsleep() function to do the waiting. The "timo" argument must be * greater than or equal to zero. A "timo" value of zero is equivalent * to a "timo" value of one. */ int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { KASSERT(sbt >= 0, ("pause: timeout must be >= 0")); /* silently convert invalid timeouts */ if (sbt == 0) sbt = tick_sbt; if (cold || kdb_active) { /* * We delay one second at a time to avoid overflowing the * system specific DELAY() function(s): */ while (sbt >= SBT_1S) { DELAY(1000000); sbt -= SBT_1S; } /* Do the delay remainder, if any */ sbt = (sbt + SBT_1US - 1) / SBT_1US; if (sbt > 0) DELAY(sbt); return (0); } return (_sleep(&pause_wchan[curcpu], NULL, 0, wmesg, sbt, pr, flags)); } /* * Make all threads sleeping on the specified identifier runnable. */ void wakeup(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) { KASSERT(ident != &proc0, ("wakeup and wakeup_swapper and proc0")); kick_proc0(); } } /* * Make a thread sleeping on the specified identifier runnable. * May wake more than one thread if a target thread is currently * swapped out. */ void wakeup_one(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } static void kdb_switch(void) { thread_unlock(curthread); kdb_backtrace(); kdb_reenter(); panic("%s: did not reenter debugger", __func__); } /* * The machine independent parts of context switching. */ void mi_switch(int flags, struct thread *newtd) { uint64_t runtime, new_switchtime; struct thread *td; struct proc *p; td = curthread; /* XXX */ THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); p = td->td_proc; /* XXX */ KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); #ifdef INVARIANTS if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) mtx_assert(&Giant, MA_NOTOWNED); #endif KASSERT(td->td_critnest == 1 || panicstr, ("mi_switch: switch in a critical section")); KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, ("mi_switch: switch must be voluntary or involuntary")); KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself")); /* * Don't perform context switches from the debugger. */ if (kdb_active) kdb_switch(); if (SCHEDULER_STOPPED()) return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; td->td_swvoltick = ticks; } else td->td_ru.ru_nivcsw++; #ifdef SCHED_STATS SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); #endif /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); td->td_generation++; /* bump preempt-detect counter */ PCPU_INC(cnt.v_swtch); PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); #if (KTR_COMPILE & KTR_SCHED) != 0 if (TD_IS_IDLETHREAD(td)) KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", "prio:%d", td->td_priority); else KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, "lockname:\"%s\"", td->td_lockname); #endif SDT_PROBE0(sched, , , preempt); sched_switch(td, newtd, flags); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); /* * If the last thread was exiting, finish cleaning it up. */ if ((td = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(td); } } /* * Change thread state to be runnable, placing it on the run queue if * it is in memory. If it is swapped out, return true so our caller * will know to awaken the swapper. */ int setrunnable(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td->td_proc->p_state != PRS_ZOMBIE, ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); switch (td->td_state) { case TDS_RUNNING: case TDS_RUNQ: return (0); case TDS_INHIBITED: /* * If we are only inhibited because we are swapped out * then arange to swap in this process. Otherwise just return. */ if (td->td_inhibitors != TDI_SWAPPED) return (0); /* FALLTHROUGH */ case TDS_CAN_RUN: break; default: printf("state is 0x%x", td->td_state); panic("setrunnable(2)"); } if ((td->td_flags & TDF_INMEM) == 0) { if ((td->td_flags & TDF_SWAPINREQ) == 0) { td->td_flags |= TDF_SWAPINREQ; return (1); } } else sched_wakeup(td); return (0); } /* * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ static void loadav(void *arg) { int i, nrun; struct loadavg *avg; nrun = sched_load(); avg = &averunnable; for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; /* * Schedule the next update to occur after 5 seconds, but add a * random variation to avoid synchronisation with processes that * run at regular intervals. */ callout_reset_sbt(&loadav_callout, SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US, loadav, NULL, C_DIRECT_EXEC | C_PREL(32)); } /* ARGSUSED */ static void synch_setup(void *dummy) { callout_init(&loadav_callout, CALLOUT_MPSAFE); /* Kick off timeout driven events by calling first time. */ loadav(NULL); } int should_yield(void) { return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks); } void maybe_yield(void) { if (should_yield()) kern_yield(PRI_USER); } void kern_yield(int prio) { struct thread *td; td = curthread; DROP_GIANT(); thread_lock(td); if (prio == PRI_USER) prio = td->td_user_pri; if (prio >= 0) sched_prio(td, prio); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); PICKUP_GIANT(); } /* * General purpose yield system call. */ int sys_yield(struct thread *td, struct yield_args *uap) { thread_lock(td); if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, PRI_MAX_TIMESHARE); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); td->td_retval[0] = 0; return (0); } Index: projects/hps_head/sys/kern/kern_thread.c =================================================================== --- projects/hps_head/sys/kern/kern_thread.c (revision 282413) +++ projects/hps_head/sys/kern/kern_thread.c (revision 282414) @@ -1,1114 +1,1120 @@ /*- * Copyright (C) 2001 Julian Elischer . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "opt_witness.h" #include "opt_hwpmc_hooks.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #include #include SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE(proc, , , lwp__exit); /* * thread related storage. */ static uma_zone_t thread_zone; TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); static struct mtx zombie_lock; MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN); static void thread_zombie(struct thread *); #define TID_BUFFER_SIZE 1024 struct mtx tid_lock; static struct unrhdr *tid_unrhdr; static lwpid_t tid_buffer[TID_BUFFER_SIZE]; static int tid_head, tid_tail; static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash"); struct tidhashhead *tidhashtbl; u_long tidhash; struct rwlock tidhash_lock; static lwpid_t tid_alloc(void) { lwpid_t tid; tid = alloc_unr(tid_unrhdr); if (tid != -1) return (tid); mtx_lock(&tid_lock); if (tid_head == tid_tail) { mtx_unlock(&tid_lock); return (-1); } tid = tid_buffer[tid_head]; tid_head = (tid_head + 1) % TID_BUFFER_SIZE; mtx_unlock(&tid_lock); return (tid); } static void tid_free(lwpid_t tid) { lwpid_t tmp_tid = -1; mtx_lock(&tid_lock); if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) { tmp_tid = tid_buffer[tid_head]; tid_head = (tid_head + 1) % TID_BUFFER_SIZE; } tid_buffer[tid_tail] = tid; tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE; mtx_unlock(&tid_lock); if (tmp_tid != -1) free_unr(tid_unrhdr, tmp_tid); } /* * Prepare a thread for use. */ static int thread_ctor(void *mem, int size, void *arg, int flags) { struct thread *td; td = (struct thread *)mem; td->td_state = TDS_INACTIVE; td->td_oncpu = NOCPU; td->td_tid = tid_alloc(); /* * Note that td_critnest begins life as 1 because the thread is not * running and is thereby implicitly waiting to be on the receiving * end of a context switch. */ td->td_critnest = 1; td->td_lend_user_pri = PRI_MAX; EVENTHANDLER_INVOKE(thread_ctor, td); #ifdef AUDIT audit_thread_alloc(td); #endif umtx_thread_alloc(td); + + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); return (0); } /* * Reclaim a thread after use. */ static void thread_dtor(void *mem, int size, void *arg) { struct thread *td; td = (struct thread *)mem; + /* make sure to drain any use of the "td->td_slpcallout" */ + callout_drain(&td->td_slpcallout); + mtx_destroy(&td->td_slpmutex); + #ifdef INVARIANTS /* Verify that this thread is in a safe state to free. */ switch (td->td_state) { case TDS_INHIBITED: case TDS_RUNNING: case TDS_CAN_RUN: case TDS_RUNQ: /* * We must never unlink a thread that is in one of * these states, because it is currently active. */ panic("bad state for thread unlinking"); /* NOTREACHED */ case TDS_INACTIVE: break; default: panic("bad thread state"); /* NOTREACHED */ } #endif #ifdef AUDIT audit_thread_free(td); #endif /* Free all OSD associated to this thread. */ osd_thread_exit(td); EVENTHANDLER_INVOKE(thread_dtor, td); tid_free(td->td_tid); } /* * Initialize type-stable parts of a thread (when newly created). */ static int thread_init(void *mem, int size, int flags) { struct thread *td; td = (struct thread *)mem; td->td_sleepqueue = sleepq_alloc(); td->td_turnstile = turnstile_alloc(); td->td_rlqe = NULL; EVENTHANDLER_INVOKE(thread_init, td); td->td_sched = (struct td_sched *)&td[1]; umtx_thread_init(td); td->td_kstack = 0; td->td_sel = NULL; return (0); } /* * Tear down type-stable parts of a thread (just before being discarded). */ static void thread_fini(void *mem, int size) { struct thread *td; td = (struct thread *)mem; EVENTHANDLER_INVOKE(thread_fini, td); rlqentry_free(td->td_rlqe); turnstile_free(td->td_turnstile); sleepq_free(td->td_sleepqueue); umtx_thread_fini(td); seltdfini(td); } /* * For a newly created process, * link up all the structures and its initial threads etc. * called from: * {arch}/{arch}/machdep.c {arch}_init(), init386() etc. * proc_dtor() (should go away) * proc_init() */ void proc_linkup0(struct proc *p, struct thread *td) { TAILQ_INIT(&p->p_threads); /* all threads in proc */ proc_linkup(p, td); } void proc_linkup(struct proc *p, struct thread *td) { sigqueue_init(&p->p_sigqueue, p); p->p_ksi = ksiginfo_alloc(1); if (p->p_ksi != NULL) { /* XXX p_ksi may be null if ksiginfo zone is not ready */ p->p_ksi->ksi_flags = KSI_EXT | KSI_INS; } LIST_INIT(&p->p_mqnotifier); p->p_numthreads = 0; thread_link(td, p); } /* * Initialize global thread allocation resources. */ void threadinit(void) { mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF); /* * pid_max cannot be greater than PID_MAX. * leave one number for thread0. */ tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock); thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), thread_ctor, thread_dtor, thread_init, thread_fini, 16 - 1, 0); tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash); rw_init(&tidhash_lock, "tidhash"); } /* * Place an unused thread on the zombie list. * Use the slpq as that must be unused by now. */ void thread_zombie(struct thread *td) { mtx_lock_spin(&zombie_lock); TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq); mtx_unlock_spin(&zombie_lock); } /* * Release a thread that has exited after cpu_throw(). */ void thread_stash(struct thread *td) { atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); thread_zombie(td); } /* * Reap zombie resources. */ void thread_reap(void) { struct thread *td_first, *td_next; /* * Don't even bother to lock if none at this instant, * we really don't care about the next instant.. */ if (!TAILQ_EMPTY(&zombie_threads)) { mtx_lock_spin(&zombie_lock); td_first = TAILQ_FIRST(&zombie_threads); if (td_first) TAILQ_INIT(&zombie_threads); mtx_unlock_spin(&zombie_lock); while (td_first) { td_next = TAILQ_NEXT(td_first, td_slpq); if (td_first->td_ucred) crfree(td_first->td_ucred); thread_free(td_first); td_first = td_next; } } } /* * Allocate a thread. */ struct thread * thread_alloc(int pages) { struct thread *td; thread_reap(); /* check if any zombies to get */ td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK); KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack")); if (!vm_thread_new(td, pages)) { uma_zfree(thread_zone, td); return (NULL); } cpu_thread_alloc(td); return (td); } int thread_alloc_stack(struct thread *td, int pages) { KASSERT(td->td_kstack == 0, ("thread_alloc_stack called on a thread with kstack")); if (!vm_thread_new(td, pages)) return (0); cpu_thread_alloc(td); return (1); } /* * Deallocate a thread. */ void thread_free(struct thread *td) { lock_profile_thread_exit(td); if (td->td_cpuset) cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; cpu_thread_free(td); if (td->td_kstack != 0) vm_thread_dispose(td); uma_zfree(thread_zone, td); } /* * Discard the current thread and exit from its context. * Always called with scheduler locked. * * Because we can't free a thread while we're operating under its context, * push the current thread into our CPU's deadthread holder. This means * we needn't worry about someone else grabbing our context before we * do a cpu_throw(). */ void thread_exit(void) { uint64_t runtime, new_switchtime; struct thread *td; struct thread *td2; struct proc *p; int wakeup_swapper; td = curthread; p = td->td_proc; PROC_SLOCK_ASSERT(p, MA_OWNED); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(p != NULL, ("thread exiting without a process")); CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, (long)p->p_pid, td->td_name); KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending")); #ifdef AUDIT AUDIT_SYSCALL_EXIT(0, td); #endif /* * drop FPU & debug register state storage, or any other * architecture specific resources that * would not be on a new untouched process. */ cpu_thread_exit(td); /* XXXSMP */ /* * The last thread is left attached to the process * So that the whole bundle gets recycled. Skip * all this stuff if we never had threads. * EXIT clears all sign of other threads when * it goes to single threading, so the last thread always * takes the short path. */ if (p->p_flag & P_HADTHREADS) { if (p->p_numthreads > 1) { atomic_add_int(&td->td_proc->p_exitthreads, 1); thread_unlink(td); td2 = FIRST_THREAD_IN_PROC(p); sched_exit_thread(td2, td); /* * The test below is NOT true if we are the * sole exiting thread. P_STOPPED_SINGLE is unset * in exit1() after it is the only survivor. */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { if (p->p_numthreads == p->p_suspcount) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one( p->p_singlethread, p); thread_unlock(p->p_singlethread); if (wakeup_swapper) kick_proc0(); } } PCPU_SET(deadthread, td); } else { /* * The last thread is exiting.. but not through exit() */ panic ("thread_exit: Last thread exiting on its own"); } } #ifdef HWPMC_HOOKS /* * If this thread is part of a process that is being tracked by hwpmc(4), * inform the module of the thread's impending exit. */ if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif PROC_UNLOCK(p); PROC_STATLOCK(p); thread_lock(td); PROC_SUNLOCK(p); /* Do the same timestamp bookkeeping that mi_switch() would do. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); PCPU_INC(cnt.v_swtch); /* Save our resource usage in our process. */ td->td_ru.ru_nvcsw++; ruxagg(p, td); rucollect(&p->p_ru, &td->td_ru); PROC_STATUNLOCK(p); td->td_state = TDS_INACTIVE; #ifdef WITNESS witness_thread_exit(td); #endif CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); sched_throw(td); panic("I'm a teapot!"); /* NOTREACHED */ } /* * Do any thread specific cleanups that may be needed in wait() * called with Giant, proc and schedlock not held. */ void thread_wait(struct proc *p) { struct thread *td; mtx_assert(&Giant, MA_NOTOWNED); KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()")); KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking")); td = FIRST_THREAD_IN_PROC(p); /* Lock the last thread so we spin until it exits cpu_throw(). */ thread_lock(td); thread_unlock(td); lock_profile_thread_exit(td); cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; cpu_thread_clean(td); crfree(td->td_ucred); thread_reap(); /* check for zombie threads etc. */ } /* * Link a thread to a process. * set up anything that needs to be initialized for it to * be used by the process. */ void thread_link(struct thread *td, struct proc *p) { /* * XXX This can't be enabled because it's called for proc0 before * its lock has been created. * PROC_LOCK_ASSERT(p, MA_OWNED); */ td->td_state = TDS_INACTIVE; td->td_proc = p; td->td_flags = TDF_INMEM; LIST_INIT(&td->td_contested); LIST_INIT(&td->td_lprof[0]); LIST_INIT(&td->td_lprof[1]); sigqueue_init(&td->td_sigqueue, p); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); p->p_numthreads++; } /* * Called from: * thread_exit() */ void thread_unlink(struct thread *td) { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); TAILQ_REMOVE(&p->p_threads, td, td_plist); p->p_numthreads--; /* could clear a few other things here */ /* Must NOT clear links to proc! */ } static int calc_remaining(struct proc *p, int mode) { int remaining; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); if (mode == SINGLE_EXIT) remaining = p->p_numthreads; else if (mode == SINGLE_BOUNDARY) remaining = p->p_numthreads - p->p_boundary_count; else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC) remaining = p->p_numthreads - p->p_suspcount; else panic("calc_remaining: wrong mode %d", mode); return (remaining); } static int remain_for_mode(int mode) { return (mode == SINGLE_ALLPROC ? 0 : 1); } static int weed_inhib(int mode, struct thread *td2, struct proc *p) { int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td2, MA_OWNED); wakeup_swapper = 0; switch (mode) { case SINGLE_EXIT: if (TD_IS_SUSPENDED(td2)) wakeup_swapper |= thread_unsuspend_one(td2, p); if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) wakeup_swapper |= sleepq_abort(td2, EINTR); break; case SINGLE_BOUNDARY: if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0) wakeup_swapper |= thread_unsuspend_one(td2, p); if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) wakeup_swapper |= sleepq_abort(td2, ERESTART); break; case SINGLE_NO_EXIT: if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0) wakeup_swapper |= thread_unsuspend_one(td2, p); if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) wakeup_swapper |= sleepq_abort(td2, ERESTART); break; case SINGLE_ALLPROC: /* * ALLPROC suspend tries to avoid spurious EINTR for * threads sleeping interruptable, by suspending the * thread directly, similarly to sig_suspend_threads(). * Since such sleep is not performed at the user * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP * is used to avoid immediate un-suspend. */ if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY | TDF_ALLPROCSUSP)) == 0) wakeup_swapper |= thread_unsuspend_one(td2, p); if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) { if ((td2->td_flags & TDF_SBDRY) == 0) { thread_suspend_one(td2); td2->td_flags |= TDF_ALLPROCSUSP; } else { wakeup_swapper |= sleepq_abort(td2, ERESTART); } } break; } return (wakeup_swapper); } /* * Enforce single-threading. * * Returns 1 if the caller must abort (another thread is waiting to * exit the process or similar). Process is locked! * Returns 0 when you are successfully the only thread running. * A process has successfully single threaded in the suspend mode when * There are no threads in user mode. Threads in the kernel must be * allowed to continue until they get to the user boundary. They may even * copy out their return values and data before suspending. They may however be * accelerated in reaching the user boundary as we will wake up * any sleeping threads that are interruptable. (PCATCH). */ int thread_single(struct proc *p, int mode) { struct thread *td; struct thread *td2; int remaining, wakeup_swapper; td = curthread; KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, ("invalid mode %d", mode)); /* * If allowing non-ALLPROC singlethreading for non-curproc * callers, calc_remaining() and remain_for_mode() should be * adjusted to also account for td->td_proc != p. For now * this is not implemented because it is not used. */ KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) || (mode != SINGLE_ALLPROC && td->td_proc == p), ("mode %d proc %p curproc %p", mode, p, td->td_proc)); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC) return (0); /* Is someone already single threading? */ if (p->p_singlethread != NULL && p->p_singlethread != td) return (1); if (mode == SINGLE_EXIT) { p->p_flag |= P_SINGLE_EXIT; p->p_flag &= ~P_SINGLE_BOUNDARY; } else { p->p_flag &= ~P_SINGLE_EXIT; if (mode == SINGLE_BOUNDARY) p->p_flag |= P_SINGLE_BOUNDARY; else p->p_flag &= ~P_SINGLE_BOUNDARY; } if (mode == SINGLE_ALLPROC) p->p_flag |= P_TOTAL_STOP; p->p_flag |= P_STOPPED_SINGLE; PROC_SLOCK(p); p->p_singlethread = td; remaining = calc_remaining(p, mode); while (remaining != remain_for_mode(mode)) { if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE) goto stopme; wakeup_swapper = 0; FOREACH_THREAD_IN_PROC(p, td2) { if (td2 == td) continue; thread_lock(td2); td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if (TD_IS_INHIBITED(td2)) { wakeup_swapper |= weed_inhib(mode, td2, p); #ifdef SMP } else if (TD_IS_RUNNING(td2) && td != td2) { forward_signal(td2); #endif } thread_unlock(td2); } if (wakeup_swapper) kick_proc0(); remaining = calc_remaining(p, mode); /* * Maybe we suspended some threads.. was it enough? */ if (remaining == remain_for_mode(mode)) break; stopme: /* * Wake us up when everyone else has suspended. * In the mean time we suspend as well. */ thread_suspend_switch(td, p); remaining = calc_remaining(p, mode); } if (mode == SINGLE_EXIT) { /* * Convert the process to an unthreaded process. The * SINGLE_EXIT is called by exit1() or execve(), in * both cases other threads must be retired. */ KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads")); p->p_singlethread = NULL; p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS); /* * Wait for any remaining threads to exit cpu_throw(). */ while (p->p_exitthreads != 0) { PROC_SUNLOCK(p); PROC_UNLOCK(p); sched_relinquish(td); PROC_LOCK(p); PROC_SLOCK(p); } } PROC_SUNLOCK(p); return (0); } bool thread_suspend_check_needed(void) { struct proc *p; struct thread *td; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 && (td->td_dbgflags & TDB_SUSPEND) != 0)); } /* * Called in from locations that can safely check to see * whether we have to suspend or at least throttle for a * single-thread event (e.g. fork). * * Such locations include userret(). * If the "return_instead" argument is non zero, the thread must be able to * accept 0 (caller may continue), or 1 (caller must abort) as a result. * * The 'return_instead' argument tells the function if it may do a * thread_exit() or suspend, or whether the caller must abort and back * out instead. * * If the thread that set the single_threading request has set the * P_SINGLE_EXIT bit in the process flags then this call will never return * if 'return_instead' is false, but will exit. * * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 *---------------+--------------------+--------------------- * 0 | returns 0 | returns 0 or 1 * | when ST ends | immediately *---------------+--------------------+--------------------- * 1 | thread exits | returns 1 * | | immediately * 0 = thread_exit() or suspension ok, * other = return error instead of stopping the thread. * * While a full suspension is under effect, even a single threading * thread would be suspended if it made this call (but it shouldn't). * This call should only be made from places where * thread_exit() would be safe as that may be the outcome unless * return_instead is set. */ int thread_suspend_check(int return_instead) { struct thread *td; struct proc *p; int wakeup_swapper; td = curthread; p = td->td_proc; mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); while (thread_suspend_check_needed()) { if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { KASSERT(p->p_singlethread != NULL, ("singlethread not set")); /* * The only suspension in action is a * single-threading. Single threader need not stop. * XXX Should be safe to access unlocked * as it can only be set to be true by us. */ if (p->p_singlethread == td) return (0); /* Exempt from stopping. */ } if ((p->p_flag & P_SINGLE_EXIT) && return_instead) return (EINTR); /* Should we goto user boundary if we didn't come from there? */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) return (ERESTART); /* * Ignore suspend requests for stop signals if they * are deferred. */ if ((P_SHOULDSTOP(p) == P_STOPPED_SIG || (p->p_flag & P_TOTAL_STOP) != 0) && (td->td_flags & TDF_SBDRY) != 0) { KASSERT(return_instead, ("TDF_SBDRY set for unsafe thread_suspend_check")); return (0); } /* * If the process is waiting for us to exit, * this thread should just suicide. * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. */ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { PROC_UNLOCK(p); tidhash_remove(td); PROC_LOCK(p); tdsigcleanup(td); umtx_thread_exit(td); PROC_SLOCK(p); thread_stopped(p); thread_exit(); } PROC_SLOCK(p); thread_stopped(p); if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { if (p->p_numthreads == p->p_suspcount + 1) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one(p->p_singlethread, p); thread_unlock(p->p_singlethread); if (wakeup_swapper) kick_proc0(); } } PROC_UNLOCK(p); thread_lock(td); /* * When a thread suspends, it just * gets taken off all queues. */ thread_suspend_one(td); if (return_instead == 0) { p->p_boundary_count++; td->td_flags |= TDF_BOUNDARY; } PROC_SUNLOCK(p); mi_switch(SW_INVOL | SWT_SUSPEND, NULL); if (return_instead == 0) td->td_flags &= ~TDF_BOUNDARY; thread_unlock(td); PROC_LOCK(p); if (return_instead == 0) { PROC_SLOCK(p); p->p_boundary_count--; PROC_SUNLOCK(p); } } return (0); } void thread_suspend_switch(struct thread *td, struct proc *p) { KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); /* * We implement thread_suspend_one in stages here to avoid * dropping the proc lock while the thread lock is owned. */ if (p == td->td_proc) { thread_stopped(p); p->p_suspcount++; } PROC_UNLOCK(p); thread_lock(td); td->td_flags &= ~TDF_NEEDSUSPCHK; TD_SET_SUSPENDED(td); sched_sleep(td, 0); PROC_SUNLOCK(p); DROP_GIANT(); mi_switch(SW_VOL | SWT_SUSPEND, NULL); thread_unlock(td); PICKUP_GIANT(); PROC_LOCK(p); PROC_SLOCK(p); } void thread_suspend_one(struct thread *td) { struct proc *p; p = td->td_proc; PROC_SLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); p->p_suspcount++; td->td_flags &= ~TDF_NEEDSUSPCHK; TD_SET_SUSPENDED(td); sched_sleep(td, 0); } int thread_unsuspend_one(struct thread *td, struct proc *p) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); TD_CLR_SUSPENDED(td); td->td_flags &= ~TDF_ALLPROCSUSP; if (td->td_proc == p) { PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_suspcount--; } return (setrunnable(td)); } /* * Allow all threads blocked by single threading to continue running. */ void thread_unsuspend(struct proc *p) { struct thread *td; int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); wakeup_swapper = 0; if (!P_SHOULDSTOP(p)) { FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_IS_SUSPENDED(td)) { wakeup_swapper |= thread_unsuspend_one(td, p); } thread_unlock(td); } } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && (p->p_numthreads == p->p_suspcount)) { /* * Stopping everything also did the job for the single * threading request. Now we've downgraded to single-threaded, * let it continue. */ if (p->p_singlethread->td_proc == p) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one( p->p_singlethread, p); thread_unlock(p->p_singlethread); } } if (wakeup_swapper) kick_proc0(); } /* * End the single threading mode.. */ void thread_single_end(struct proc *p, int mode) { struct thread *td; int wakeup_swapper; KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, ("invalid mode %d", mode)); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) || (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0), ("mode %d does not match P_TOTAL_STOP", mode)); p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY | P_TOTAL_STOP); PROC_SLOCK(p); p->p_singlethread = NULL; wakeup_swapper = 0; /* * If there are other threads they may now run, * unless of course there is a blanket 'stop order' * on the process. The single threader must be allowed * to continue however as this is a bad place to stop. */ if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) { FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_IS_SUSPENDED(td)) { wakeup_swapper |= thread_unsuspend_one(td, p); } thread_unlock(td); } } PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); } struct thread * thread_find(struct proc *p, lwpid_t tid) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); FOREACH_THREAD_IN_PROC(p, td) { if (td->td_tid == tid) break; } return (td); } /* Locate a thread by number; return with proc lock held. */ struct thread * tdfind(lwpid_t tid, pid_t pid) { #define RUN_THRESH 16 struct thread *td; int run = 0; rw_rlock(&tidhash_lock); LIST_FOREACH(td, TIDHASH(tid), td_hash) { if (td->td_tid == tid) { if (pid != -1 && td->td_proc->p_pid != pid) { td = NULL; break; } PROC_LOCK(td->td_proc); if (td->td_proc->p_state == PRS_NEW) { PROC_UNLOCK(td->td_proc); td = NULL; break; } if (run > RUN_THRESH) { if (rw_try_upgrade(&tidhash_lock)) { LIST_REMOVE(td, td_hash); LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); rw_wunlock(&tidhash_lock); return (td); } } break; } run++; } rw_runlock(&tidhash_lock); return (td); } void tidhash_add(struct thread *td) { rw_wlock(&tidhash_lock); LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); rw_wunlock(&tidhash_lock); } void tidhash_remove(struct thread *td) { rw_wlock(&tidhash_lock); LIST_REMOVE(td, td_hash); rw_wunlock(&tidhash_lock); } Index: projects/hps_head/sys/kern/kern_timeout.c =================================================================== --- projects/hps_head/sys/kern/kern_timeout.c (revision 282413) +++ projects/hps_head/sys/kern/kern_timeout.c (revision 282414) @@ -1,1596 +1,1498 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #if defined(__arm__) #include "opt_timer.h" #endif #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #ifdef SMP #include #endif #ifndef NO_EVENTTIMERS DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING -static int avg_depth; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, +static int avg_depth[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0, "Average number of items examined per softclock call. Units = 1/1000"); -static int avg_gcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, +static int avg_gcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); -static int avg_lockcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, +static int avg_lockcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); -static int avg_mpcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, +static int avg_mpcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); -static int avg_depth_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); -static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, - &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " + &avg_lockcalls[1], 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); -static int avg_mpcalls_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1], 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS static int pin_default_swi = 1; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; static int pin_pcpu_swi = 0; #endif SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); /* * TODO: * allocate more timeout table slots when table overflows. */ u_int callwheelsize, callwheelmask; +#define CALLOUT_RET_NORMAL 0 +#define CALLOUT_RET_CANCELLED 1 +#define CALLOUT_RET_DRAINING 2 + +struct callout_args { + sbintime_t time; /* absolute time for the event */ + sbintime_t precision; /* delta allowed wrt opt */ + void *arg; /* function argument */ + callout_func_t *func; /* function to call */ + int flags; /* flags passed to callout_reset() */ + int cpu; /* CPU we're scheduled on */ +}; + +typedef void callout_mutex_op_t(struct lock_object *); + +struct callout_mutex_ops { + callout_mutex_op_t *lock; + callout_mutex_op_t *unlock; +}; + +enum { + CALLOUT_LC_UNUSED_0, + CALLOUT_LC_UNUSED_1, + CALLOUT_LC_UNUSED_2, + CALLOUT_LC_UNUSED_3, + CALLOUT_LC_SPIN, + CALLOUT_LC_MUTEX, + CALLOUT_LC_RW, + CALLOUT_LC_RM, +}; + +static void +callout_mutex_op_none(struct lock_object *lock) +{ +} + +static void +callout_mutex_lock(struct lock_object *lock) +{ + + mtx_lock((struct mtx *)lock); +} + +static void +callout_mutex_unlock(struct lock_object *lock) +{ + + mtx_unlock((struct mtx *)lock); +} + +static void +callout_mutex_lock_spin(struct lock_object *lock) +{ + + mtx_lock_spin((struct mtx *)lock); +} + +static void +callout_mutex_unlock_spin(struct lock_object *lock) +{ + + mtx_unlock_spin((struct mtx *)lock); +} + +static void +callout_rm_wlock(struct lock_object *lock) +{ + + rm_wlock((struct rmlock *)lock); +} + +static void +callout_rm_wunlock(struct lock_object *lock) +{ + + rm_wunlock((struct rmlock *)lock); +} + +static void +callout_rw_wlock(struct lock_object *lock) +{ + + rw_wlock((struct rwlock *)lock); +} + +static void +callout_rw_wunlock(struct lock_object *lock) +{ + + rw_wunlock((struct rwlock *)lock); +} + +static const struct callout_mutex_ops callout_mutex_ops[8] = { + [CALLOUT_LC_UNUSED_0] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_1] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_2] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_3] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_SPIN] = { + .lock = callout_mutex_lock_spin, + .unlock = callout_mutex_unlock_spin, + }, + [CALLOUT_LC_MUTEX] = { + .lock = callout_mutex_lock, + .unlock = callout_mutex_unlock, + }, + [CALLOUT_LC_RW] = { + .lock = callout_rw_wlock, + .unlock = callout_rw_wunlock, + }, + [CALLOUT_LC_RM] = { + .lock = callout_rm_wlock, + .unlock = callout_rm_wunlock, + }, +}; + +static inline void +callout_lock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); +} + +static inline void +callout_unlock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); +} + /* - * The callout cpu exec entities represent informations necessary for - * describing the state of callouts currently running on the CPU and the ones - * necessary for migrating callouts to the new callout cpu. In particular, - * the first entry of the array cc_exec_entity holds informations for callout - * running in SWI thread context, while the second one holds informations - * for callout running directly from hardware interrupt context. - * The cached informations are very important for deferring migration when - * the migrating callout is already running. + * The callout CPU exec structure represent information necessary for + * describing the state of callouts currently running on the CPU and + * for handling deferred callout restarts. + * + * In particular, the first entry of the array cc_exec_entity holds + * information for callouts running from the SWI thread context, while + * the second one holds information for callouts running directly from + * the hardware interrupt context. */ struct cc_exec { + /* + * The "cc_curr" points to the currently executing callout and + * is protected by the "cc_lock" spinlock. If no callback is + * currently executing it is equal to "NULL". + */ struct callout *cc_curr; -#ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - sbintime_t ce_migration_time; - sbintime_t ce_migration_prec; + /* + * The "cc_restart_args" structure holds the argument for a + * deferred callback restart and is protected by the "cc_lock" + * spinlock. The structure is only valid if "cc_restart" is + * "true". If "cc_restart" is "false" the information in the + * "cc_restart_args" structure shall be ignored. + */ + struct callout_args cc_restart_args; + bool cc_restart; + /* + * The "cc_cancel" variable allows the currently pending + * callback to be atomically cancelled. This field is write + * protected by the "cc_lock" spinlock. + */ + bool cc_cancel; + /* + * The "cc_drain_fn" points to a function which shall be + * called with the argument stored in "cc_drain_arg" when an + * asynchronous drain is performed. This field is write + * protected by the "cc_lock" spinlock. + */ + callout_func_t *cc_drain_fn; + void *cc_drain_arg; + /* + * The following fields are used for callout profiling only: + */ +#ifdef CALLOUT_PROFILING + int cc_depth; + int cc_mpcalls; + int cc_lockcalls; + int cc_gcalls; #endif - bool cc_cancel; - bool cc_waiting; }; /* - * There is one struct callout_cpu per cpu, holding all relevant + * There is one "struct callout_cpu" per CPU, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; - struct callout *cc_next; struct callout *cc_callout; struct callout_list *cc_callwheel; + struct callout_list cc_tmplist; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; - u_int cc_bucket; - u_int cc_inited; char cc_ktr_event_name[20]; }; -#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) +#define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr +#define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args +#define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart +#define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel +#define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn +#define cc_exec_drain_arg(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_arg +#define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth +#define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls +#define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls +#define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls -#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr -#define cc_exec_next(cc) cc->cc_next -#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel -#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP -#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func -#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg -#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu -#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time -#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec - struct callout_cpu cc_cpu[MAXCPU]; -#define CPUBLOCK MAXCPU +#define CPUBLOCK -1 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else struct callout_cpu cc_cpu; #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); -static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct); +static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); -/** - * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is cc_curr. - * If cc_curr is non-NULL, threads waiting in - * callout_drain() will be woken up as soon as the - * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and cc_lock held - * guarantees that the current callout will not run. - * The softclock() function sets this to 0 before it - * drops callout_lock to acquire c_lock, and it calls - * the handler only if curr_cancelled is still 0 after - * cc_lock is successfully acquired. - * cc_waiting - If a thread is waiting in callout_drain(), then - * callout_wait is nonzero. Set only when - * cc_curr is non-NULL. - */ - /* - * Resets the execution entity tied to a specific callout cpu. + * Kernel low level callwheel initialization called from cpu0 during + * kernel startup: */ static void -cc_cce_cleanup(struct callout_cpu *cc, int direct) -{ - - cc_exec_curr(cc, direct) = NULL; - cc_exec_cancel(cc, direct) = false; - cc_exec_waiting(cc, direct) = false; -#ifdef SMP - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif -} - -/* - * Checks if migration is requested by a specific callout cpu. - */ -static int -cc_cce_migrating(struct callout_cpu *cc, int direct) -{ - -#ifdef SMP - return (cc_migration_cpu(cc, direct) != CPUBLOCK); -#else - return (0); -#endif -} - -/* - * Kernel low level callwheel initialization - * called on cpu0 during kernel startup. - */ -static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ memset(CC_CPU(0), 0, sizeof(cc_cpu)); ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Fetch whether we're pinning the swi's or not. */ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); /* * Only cpu0 handles timeout(9) and receives a preallocation. * * XXX: Once all timeout(9) consumers are converted this can * be removed. */ timeout_cpu = PCPU_GET(cpuid); cc = CC_CPU(timeout_cpu); cc->cc_callout = malloc(ncallout * sizeof(struct callout), M_CALLOUT, M_WAITOK); callout_cpu_init(cc, timeout_cpu); } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); - cc->cc_inited = 1; cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); + LIST_INIT(&cc->cc_tmplist); cc->cc_firstevent = SBT_MAX; - for (i = 0; i < 2; i++) - cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_iflags = CALLOUT_LOCAL_ALLOC; + c->c_flags |= CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } -#ifdef SMP -/* - * Switches the cpu tied to a specific callout. - * The function expects a locked incoming callout cpu and returns with - * locked outcoming callout cpu. - */ -static struct callout_cpu * -callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +#ifdef CALLOUT_PROFILING +static inline void +callout_clear_stats(struct callout_cpu *cc, const int direct) { - struct callout_cpu *new_cc; + cc_exec_depth(cc, direct) = 0; + cc_exec_mpcalls(cc, direct) = 0; + cc_exec_lockcalls(cc, direct) = 0; + cc_exec_gcalls(cc, direct) = 0; +} +#endif - MPASS(c != NULL && cc != NULL); - CC_LOCK_ASSERT(cc); - - /* - * Avoid interrupts and preemption firing after the callout cpu - * is blocked in order to avoid deadlocks as the new thread - * may be willing to acquire the callout cpu lock. - */ - c->c_cpu = CPUBLOCK; - spinlock_enter(); - CC_UNLOCK(cc); - new_cc = CC_CPU(new_cpu); - CC_LOCK(new_cc); - spinlock_exit(); - c->c_cpu = new_cpu; - return (new_cc); +#ifdef CALLOUT_PROFILING +static inline void +callout_update_stats(struct callout_cpu *cc, const int direct) +{ + avg_depth[direct] += + (cc_exec_depth(cc, direct) * 1000 - + avg_depth[direct]) >> 8; + avg_mpcalls[direct] += + (cc_exec_mpcalls(cc, direct) * 1000 - + avg_mpcalls[direct]) >> 8; + avg_lockcalls[direct] += + (cc_exec_lockcalls(cc, direct) * 1000 - + avg_lockcalls[direct]) >> 8; + avg_gcalls[direct] += + (cc_exec_gcalls(cc, direct) * 1000 - + avg_gcalls[direct]) >> 8; } #endif /* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; char name[MAXCOMLEN]; #ifdef SMP int cpu; struct intr_event *ie; #endif cc = CC_CPU(timeout_cpu); snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_default_swi && (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { printf("%s: timeout clock couldn't be pinned to cpu %d\n", __func__, timeout_cpu); } #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ callout_cpu_init(cc, cpu); snprintf(name, sizeof(name), "clock (%d)", cpu); ie = NULL; if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { printf("%s: per-cpu clock couldn't be pinned to " "cpu %d\n", __func__, cpu); } } #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { - struct callout *tmp, *tmpn; + struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; -#ifdef CALLOUT_PROFILING - int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; -#endif cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_LOCK(cc); +#ifdef CALLOUT_PROFILING + callout_clear_stats(cc, 1); +#endif /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = (SBT_1S / 2); first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; - tmp = LIST_FIRST(sc); - while (tmp != NULL) { + while (1) { + tmp = LIST_FIRST(sc); + if (tmp == NULL) + break; + + LIST_REMOVE(tmp, c_links.le); + /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* - * Consumer told us the callout may be run - * directly from hardware interrupt context. + * Consumer told us the callout may be + * run directly from the hardware + * interrupt context: */ - if (tmp->c_iflags & CALLOUT_DIRECT) { -#ifdef CALLOUT_PROFILING - ++depth_dir; -#endif - cc_exec_next(cc) = - LIST_NEXT(tmp, c_links.le); - cc->cc_bucket = firstb & callwheelmask; - LIST_REMOVE(tmp, c_links.le); - softclock_call_cc(tmp, cc, -#ifdef CALLOUT_PROFILING - &mpcalls_dir, &lockcalls_dir, NULL, -#endif - 1); - tmp = cc_exec_next(cc); - cc_exec_next(cc) = NULL; + if (tmp->c_flags & CALLOUT_DIRECT) { + softclock_call_cc(tmp, cc, 1); } else { - tmpn = LIST_NEXT(tmp, c_links.le); - LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); - tmp->c_iflags |= CALLOUT_PROCESSED; - tmp = tmpn; + tmp->c_flags |= CALLOUT_PROCESSED; } continue; } + + /* insert callout into temporary list */ + LIST_INSERT_HEAD(&cc->cc_tmplist, tmp, c_links.le); + /* Skip events from distant future. */ if (tmp->c_time >= max) - goto next; + continue; + /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; - goto next; + continue; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; -next: - tmp = LIST_NEXT(tmp, c_links.le); } + + /* Put temporary list back into the main bucket */ + LIST_SWAP(sc, &cc->cc_tmplist, callout, c_links.le); + /* Proceed with the next bucket. */ firstb++; + /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; #ifndef NO_EVENTTIMERS cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING - avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; - avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; - avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; + callout_update_stats(cc, 1); #endif - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_UNLOCK(cc); /* - * swi_sched acquires the thread lock, so we don't want to call it - * with cc_lock held; incorrect locking order. + * "swi_sched()" acquires the thread lock and we don't want to + * call it having cc_lock held because it leads to a locking + * order reversal issue. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { - while (c->c_cpu == CPUBLOCK) - cpu_spinwait(); + cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } -static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, - sbintime_t sbt, sbintime_t precision, void (*func)(void *), - void *arg, int cpu, int flags) +static struct callout_cpu * +callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, + struct callout_args *coa) { - int bucket; +#ifndef NO_EVENTTIMERS + sbintime_t sbt; +#endif + u_int bucket; CC_LOCK_ASSERT(cc); - if (sbt < cc->cc_lastscan) - sbt = cc->cc_lastscan; - c->c_arg = arg; - c->c_iflags |= CALLOUT_PENDING; - c->c_iflags &= ~CALLOUT_PROCESSED; - c->c_flags |= CALLOUT_ACTIVE; - if (flags & C_DIRECT_EXEC) - c->c_iflags |= CALLOUT_DIRECT; - c->c_func = func; - c->c_time = sbt; - c->c_precision = precision; + + /* update flags before swapping locks, if any */ + c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); + if (coa->flags & C_DIRECT_EXEC) + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); + else + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + +#ifdef SMP + /* only set the "c_cpu" if the CPU number changed and is valid */ + if (c->c_cpu != coa->cpu && coa->cpu > CPUBLOCK && + coa->cpu <= mp_maxid && !CPU_ABSENT(coa->cpu)) { + /* + * Avoid interrupts and preemption firing after the + * callout CPU is blocked in order to avoid deadlocks + * as the new thread may be willing to acquire the + * callout CPU lock: + */ + c->c_cpu = CPUBLOCK; + spinlock_enter(); + CC_UNLOCK(cc); + cc = CC_CPU(coa->cpu); + CC_LOCK(cc); + spinlock_exit(); + c->c_cpu = coa->cpu; + } +#endif + if (coa->time < cc->cc_lastscan) + coa->time = cc->cc_lastscan; + c->c_arg = coa->arg; + c->c_func = coa->func; + c->c_time = coa->time; + c->c_precision = coa->precision; + bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); - if (cc->cc_bucket == bucket) - cc_exec_next(cc) = c; + #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (SBT_MAX - c->c_time < c->c_precision) c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(cpu, sbt, c->c_time); + cpu_new_callout(c->c_cpu, sbt, c->c_time); } #endif + return (cc); } -static void +static inline void callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) - return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } -static void +static inline void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct) + const int direct) { - struct rm_priotracker tracker; - void (*c_func)(void *); + callout_func_t *c_func; void *c_arg; - struct lock_class *class; struct lock_object *c_lock; - uintptr_t lock_status; - int c_iflags; -#ifdef SMP - struct callout_cpu *new_cc; - void (*new_func)(void *); - void *new_arg; - int flags, new_cpu; - sbintime_t new_prec, new_time; -#endif + int c_flags; #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif - KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, - ("softclock_call_cc: pend %p %x", c, c->c_iflags)); - KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, - ("softclock_call_cc: act %p %x", c, c->c_flags)); - class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; - lock_status = 0; - if (c->c_flags & CALLOUT_SHAREDLOCK) { - if (class == &lock_class_rm) - lock_status = (uintptr_t)&tracker; - else - lock_status = 1; - } + KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == + (CALLOUT_PENDING | CALLOUT_ACTIVE), + ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); + c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; - c_iflags = c->c_iflags; - if (c->c_iflags & CALLOUT_LOCAL_ALLOC) - c->c_iflags = CALLOUT_LOCAL_ALLOC; - else - c->c_iflags &= ~CALLOUT_PENDING; - + c_flags = c->c_flags; + + /* remove pending bit */ + c->c_flags &= ~CALLOUT_PENDING; + + /* reset our local state */ cc_exec_curr(cc, direct) = c; - cc_exec_cancel(cc, direct) = false; - CC_UNLOCK(cc); + cc_exec_restart(cc, direct) = false; + cc_exec_drain_fn(cc, direct) = NULL; + cc_exec_drain_arg(cc, direct) = NULL; + if (c_lock != NULL) { - class->lc_lock(c_lock, lock_status); + cc_exec_cancel(cc, direct) = false; + CC_UNLOCK(cc); + + /* unlocked region for switching locks */ + + callout_lock_client(c_flags, c_lock); + /* - * The callout may have been cancelled - * while we switched locks. + * Check if the callout may have been cancelled while + * we were switching locks. Even though the callout is + * specifying a lock, it might not be certain this + * lock is locked when starting and stopping callouts. */ + CC_LOCK(cc); if (cc_exec_cancel(cc, direct)) { - class->lc_unlock(c_lock); - goto skip; + callout_unlock_client(c_flags, c_lock); + goto skip_cc_locked; } - /* The callout cannot be stopped now. */ - cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING - (*gcalls)++; + cc_exec_gcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING - (*lockcalls)++; + cc_exec_lockcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING - (*mpcalls)++; + cc_exec_mpcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } + /* The callout cannot be stopped now! */ + cc_exec_cancel(cc, direct) = true; + CC_UNLOCK(cc); + + /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); c_func(c_arg); SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) - class->lc_unlock(c_lock); -skip: + + /* + * At this point the callback structure might have been freed, + * so we need to check the previously copied value of + * "c->c_flags": + */ + if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) + callout_unlock_client(c_flags, c_lock); + CC_LOCK(cc); + +skip_cc_locked: KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; - if (cc_exec_waiting(cc, direct)) { + + /* Check if there is anything which needs draining */ + if (cc_exec_drain_fn(cc, direct) != NULL) { /* - * There is someone waiting for the - * callout to complete. - * If the callout was scheduled for - * migration just cancel it. + * Unlock the CPU callout last, so that any use of + * structures belonging to the callout are complete: */ - if (cc_cce_migrating(cc, direct)) { - cc_cce_cleanup(cc, direct); - - /* - * It should be assert here that the callout is not - * destroyed but that is not easy. - */ - c->c_iflags &= ~CALLOUT_DFRMIGRATION; - } - cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); - wakeup(&cc_exec_waiting(cc, direct)); + /* call drain function unlocked */ + cc_exec_drain_fn(cc, direct)( + cc_exec_drain_arg(cc, direct)); CC_LOCK(cc); - } else if (cc_cce_migrating(cc, direct)) { - KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, - ("Migrating legacy callout %p", c)); -#ifdef SMP - /* - * If the callout was scheduled for - * migration just perform it now. - */ - new_cpu = cc_migration_cpu(cc, direct); - new_time = cc_migration_time(cc, direct); - new_prec = cc_migration_prec(cc, direct); - new_func = cc_migration_func(cc, direct); - new_arg = cc_migration_arg(cc, direct); - cc_cce_cleanup(cc, direct); - - /* - * It should be assert here that the callout is not destroyed - * but that is not easy. - * - * As first thing, handle deferred callout stops. - */ - if (!callout_migrating(c)) { - CTR3(KTR_CALLOUT, - "deferred cancelled %p func %p arg %p", - c, new_func, new_arg); - callout_cc_del(c, cc); - return; + } else if (c_flags & CALLOUT_LOCAL_ALLOC) { + /* return callout back to freelist */ + callout_cc_del(c, cc); + } else if (cc_exec_restart(cc, direct)) { + struct callout_cpu *new_cc; + /* [re-]schedule callout, if any */ + new_cc = callout_cc_add_locked(c, cc, + &cc_exec_restart_args(cc, direct)); + if (new_cc != cc) { + /* switch locks back again */ + CC_UNLOCK(new_cc); + CC_LOCK(cc); } - c->c_iflags &= ~CALLOUT_DFRMIGRATION; - - new_cc = callout_cpu_switch(c, cc, new_cpu); - flags = (direct) ? C_DIRECT_EXEC : 0; - callout_cc_add(c, new_cc, new_time, new_prec, new_func, - new_arg, new_cpu, flags); - CC_UNLOCK(new_cc); - CC_LOCK(cc); -#else - panic("migration should not happen"); -#endif } - /* - * If the current callout is locally allocated (from - * timeout(9)) then put it on the freelist. - * - * Note: we need to check the cached copy of c_iflags because - * if it was not local, then it's not safe to deref the - * callout pointer. - */ - KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || - c->c_iflags == CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - if (c_iflags & CALLOUT_LOCAL_ALLOC) - callout_cc_del(c, cc); } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. */ void softclock(void *arg) { struct callout_cpu *cc; struct callout *c; -#ifdef CALLOUT_PROFILING - int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; -#endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING - &mpcalls, &lockcalls, &gcalls, + callout_clear_stats(cc, 0); #endif - 0); -#ifdef CALLOUT_PROFILING - ++depth; -#endif + while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + softclock_call_cc(c, cc, 0); } #ifdef CALLOUT_PROFILING - avg_depth += (depth * 1000 - avg_depth) >> 8; - avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; - avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; - avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; + callout_update_stats(cc, 0); #endif CC_UNLOCK(cc); } /* * timeout -- * Execute a function after a specified length of time. * * untimeout -- * Cancel previous timeout function call. * * callout_handle_init -- * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks) { struct callout_cpu *cc; struct callout *new; struct callout_handle handle; cc = CC_CPU(timeout_cpu); CC_LOCK(cc); /* Fill in the next free callout structure. */ new = SLIST_FIRST(&cc->cc_callfree); if (new == NULL) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); - callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); + callout_reset(new, to_ticks, ftn, arg); + return (handle); } void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; + bool match; /* * Check for a handle that was initialized * by callout_handle_init, but never used * for a real timeout. */ if (handle.callout == NULL) return; cc = callout_lock(handle.callout); - if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) - callout_stop(handle.callout); + match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); CC_UNLOCK(cc); + + if (match) + callout_stop(handle.callout); } void callout_handle_init(struct callout_handle *handle) { handle->callout = NULL; } +static int +callout_restart_async(struct callout *c, struct callout_args *coa, + callout_func_t *drain_fn, void *drain_arg) +{ + struct callout_cpu *cc; + int cancelled; + int direct; + + cc = callout_lock(c); + + /* Figure out if the callout is direct or not */ + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + + /* + * Check if the callback is currently scheduled for + * completion: + */ + if (cc_exec_curr(cc, direct) == c) { + /* + * Try to prevent the callback from running by setting + * the "cc_cancel" variable to "true". Also check if + * the callout was previously subject to a deferred + * callout restart: + */ + if (cc_exec_cancel(cc, direct) == false || + (c->c_flags & CALLOUT_DEFRESTART) != 0) { + cc_exec_cancel(cc, direct) = true; + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + /* + * Prevent callback restart if "callout_drain_xxx()" + * is being called or we are stopping the callout or + * the callback was preallocated by us: + */ + if (cc_exec_drain_fn(cc, direct) != NULL || + coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and draining" : "draining", + c, c->c_func, c->c_arg); + + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* clear restart flag, if any */ + cc_exec_restart(cc, direct) = false; + + /* set drain function, if any */ + if (drain_fn != NULL) { + cc_exec_drain_fn(cc, direct) = drain_fn; + cc_exec_drain_arg(cc, direct) = drain_arg; + cancelled |= CALLOUT_RET_DRAINING; + } + } else { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and restarting" : "restarting", + c, c->c_func, c->c_arg); + + /* get us back into the game */ + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART); + c->c_flags &= ~CALLOUT_PROCESSED; + + /* enable deferred restart */ + cc_exec_restart(cc, direct) = true; + + /* store arguments for the deferred restart, if any */ + cc_exec_restart_args(cc, direct) = *coa; + } + } else { + /* stop callout */ + if (c->c_flags & CALLOUT_PENDING) { + /* + * The callback has not yet been executed, and + * we simply just need to unlink it: + */ + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + LIST_REMOVE(c, c_links.le); + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "rescheduled" : "scheduled", + c, c->c_func, c->c_arg); + + /* [re-]schedule callout, if any */ + if (coa != NULL) { + cc = callout_cc_add_locked(c, cc, coa); + } else { + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* return callback to pre-allocated list, if any */ + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && + cancelled != CALLOUT_RET_NORMAL) { + callout_cc_del(c, cc); + } + } + } + CC_UNLOCK(cc); + return (cancelled); +} + /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - void (*ftn)(void *), void *arg, int cpu, int flags) + callout_func_t *ftn, void *arg, int cpu, int flags) { - sbintime_t to_sbt, pr; - struct callout_cpu *cc; - int cancelled, direct; - int ignore_cpu=0; + struct callout_args coa; - cancelled = 0; - if (cpu == -1) { - ignore_cpu = 1; - } else if ((cpu >= MAXCPU) || - ((CC_CPU(cpu))->cc_inited == 0)) { - /* Invalid CPU spec */ - panic("Invalid CPU in callout %d", cpu); - } - if (flags & C_ABSOLUTE) { - to_sbt = sbt; + /* store arguments for callout add function */ + coa.func = ftn; + coa.arg = arg; + coa.precision = precision; + coa.flags = flags; + coa.cpu = cpu; + + /* compute the rest of the arguments needed */ + if (coa.flags & C_ABSOLUTE) { + coa.time = sbt; } else { - if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) + sbintime_t pr; + + if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((flags & C_HARDCLOCK) || + if ((coa.flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - to_sbt = getsbinuptime(); + coa.time = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - to_sbt += tc_tick_sbt - tick_sbt; + coa.time += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); #else spinlock_enter(); - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((flags & C_HARDCLOCK) == 0) - to_sbt += tick_sbt; + if ((coa.flags & C_HARDCLOCK) == 0) + coa.time += tick_sbt; } else - to_sbt = sbinuptime(); - if (SBT_MAX - to_sbt < sbt) - to_sbt = SBT_MAX; + coa.time = sbinuptime(); + if (SBT_MAX - coa.time < sbt) + coa.time = SBT_MAX; else - to_sbt += sbt; - pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(flags)); - if (pr > precision) - precision = pr; + coa.time += sbt; + pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(coa.flags)); + if (pr > coa.precision) + coa.precision = pr; } - /* - * This flag used to be added by callout_cc_add, but the - * first time you call this we could end up with the - * wrong direct flag if we don't do it before we add. - */ - if (flags & C_DIRECT_EXEC) { - direct = 1; - } else { - direct = 0; - } - KASSERT(!direct || c->c_lock == NULL, - ("%s: direct callout %p has lock", __func__, c)); - cc = callout_lock(c); - /* - * Don't allow migration of pre-allocated callouts lest they - * become unbalanced or handle the case where the user does - * not care. - */ - if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || - ignore_cpu) { - cpu = c->c_cpu; - } - if (cc_exec_curr(cc, direct) == c) { - /* - * We're being asked to reschedule a callout which is - * currently in progress. If there is a lock then we - * can cancel the callout if it has not really started. - */ - if (c->c_lock != NULL && cc_exec_cancel(cc, direct)) - cancelled = cc_exec_cancel(cc, direct) = true; - if (cc_exec_waiting(cc, direct)) { - /* - * Someone has called callout_drain to kill this - * callout. Don't reschedule. - */ - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled" : "failed to cancel", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (cancelled); - } -#ifdef SMP - if (callout_migrating(c)) { - /* - * This only occurs when a second callout_reset_sbt_on - * is made after a previous one moved it into - * deferred migration (below). Note we do *not* change - * the prev_cpu even though the previous target may - * be different. - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - cancelled = 1; - CC_UNLOCK(cc); - return (cancelled); - } -#endif - } - if (c->c_iflags & CALLOUT_PENDING) { - if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - cancelled = 1; - c->c_iflags &= ~ CALLOUT_PENDING; - c->c_flags &= ~ CALLOUT_ACTIVE; - } - -#ifdef SMP - /* - * If the callout must migrate try to perform it immediately. - * If the callout is currently running, just defer the migration - * to a more appropriate moment. - */ - if (c->c_cpu != cpu) { - if (cc_exec_curr(cc, direct) == c) { - /* - * Pending will have been removed since we are - * actually executing the callout on another - * CPU. That callout should be waiting on the - * lock the caller holds. If we set both - * active/and/pending after we return and the - * lock on the executing callout proceeds, it - * will then see pending is true and return. - * At the return from the actual callout execution - * the migration will occur in softclock_call_cc - * and this new callout will be placed on the - * new CPU via a call to callout_cpu_switch() which - * will get the lock on the right CPU followed - * by a call callout_cc_add() which will add it there. - * (see above in softclock_call_cc()). - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); - c->c_flags |= CALLOUT_ACTIVE; - CTR6(KTR_CALLOUT, - "migration of %p func %p arg %p in %d.%08x to %u deferred", - c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff), cpu); - CC_UNLOCK(cc); - return (cancelled); - } - cc = callout_cpu_switch(c, cc, cpu); - } -#endif - - callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); - CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", - cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff)); - CC_UNLOCK(cc); - - return (cancelled); + /* get callback started, if any */ + return (callout_restart_async(c, &coa, NULL, NULL)); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int -_callout_stop_safe(struct callout *c, int safe) +callout_stop(struct callout *c) { - struct callout_cpu *cc, *old_cc; - struct lock_class *class; - int direct, sq_locked, use_lock; - int not_on_a_list; + /* get callback stopped, if any */ + return (callout_restart_async(c, NULL, NULL, NULL)); +} - if (safe) - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, - "calling %s", __func__); +static void +callout_drain_function(void *arg) +{ + wakeup(arg); +} - /* - * Some old subsystems don't hold Giant while running a callout_stop(), - * so just discard this check for the moment. - */ - if (!safe && c->c_lock != NULL) { - if (c->c_lock == &Giant.lock_object) - use_lock = mtx_owned(&Giant); - else { - use_lock = 1; - class = LOCK_CLASS(c->c_lock); - class->lc_assert(c->c_lock, LA_XLOCKED); - } - } else - use_lock = 0; - if (c->c_iflags & CALLOUT_DIRECT) { - direct = 1; - } else { - direct = 0; - } - sq_locked = 0; - old_cc = NULL; -again: - cc = callout_lock(c); +int +callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) +{ + /* get callback stopped, if any */ + return (callout_restart_async( + c, NULL, fn, arg) & CALLOUT_RET_DRAINING); +} - if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == - (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && - ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { - /* - * Special case where this slipped in while we - * were migrating *as* the callout is about to - * execute. The caller probably holds the lock - * the callout wants. - * - * Get rid of the migration first. Then set - * the flag that tells this code *not* to - * try to remove it from any lists (its not - * on one yet). When the callout wheel runs, - * it will ignore this callout. - */ - c->c_iflags &= ~CALLOUT_PENDING; - c->c_flags &= ~CALLOUT_ACTIVE; - not_on_a_list = 1; - } else { - not_on_a_list = 0; - } +int +callout_drain(struct callout *c) +{ + int cancelled; - /* - * If the callout was migrating while the callout cpu lock was - * dropped, just drop the sleepqueue lock and check the states - * again. - */ - if (sq_locked != 0 && cc != old_cc) { -#ifdef SMP - CC_UNLOCK(cc); - sleepq_release(&cc_exec_waiting(old_cc, direct)); - sq_locked = 0; - old_cc = NULL; - goto again; -#else - panic("migration should not happen"); -#endif - } + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Draining callout"); - /* - * If the callout isn't pending, it's not on the queue, so - * don't attempt to remove it from the queue. We can try to - * stop it by other means however. - */ - if (!(c->c_iflags & CALLOUT_PENDING)) { - c->c_flags &= ~CALLOUT_ACTIVE; + callout_lock_client(c->c_flags, c->c_lock); + /* at this point the "c->c_cpu" field is not changing */ + + cancelled = callout_drain_async(c, &callout_drain_function, c); + + if (cancelled != CALLOUT_RET_NORMAL) { + struct callout_cpu *cc; + int direct; + + CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", + c, c->c_func, c->c_arg); + + cc = callout_lock(c); + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + /* - * If it wasn't on the queue and it isn't the current - * callout, then we can't stop it, so just bail. + * We've gotten our callout CPU lock, it is safe to + * drop the initial lock: */ - if (cc_exec_curr(cc, direct) != c) { - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - return (0); - } + callout_unlock_client(c->c_flags, c->c_lock); - if (safe) { - /* - * The current callout is running (or just - * about to run) and blocking is allowed, so - * just wait for the current invocation to - * finish. - */ - while (cc_exec_curr(cc, direct) == c) { - /* - * Use direct calls to sleepqueue interface - * instead of cv/msleep in order to avoid - * a LOR between cc_lock and sleepqueue - * chain spinlocks. This piece of code - * emulates a msleep_spin() call actually. - * - * If we already have the sleepqueue chain - * locked, then we can safely block. If we - * don't already have it locked, however, - * we have to drop the cc_lock to lock - * it. This opens several races, so we - * restart at the beginning once we have - * both locks. If nothing has changed, then - * we will end up back here with sq_locked - * set. - */ - if (!sq_locked) { - CC_UNLOCK(cc); - sleepq_lock( - &cc_exec_waiting(cc, direct)); - sq_locked = 1; - old_cc = cc; - goto again; - } + /* Wait for drain to complete */ - /* - * Migration could be cancelled here, but - * as long as it is still not sure when it - * will be packed up, just let softclock() - * take care of it. - */ - cc_exec_waiting(cc, direct) = true; - DROP_GIANT(); - CC_UNLOCK(cc); - sleepq_add( - &cc_exec_waiting(cc, direct), - &cc->cc_lock.lock_object, "codrain", - SLEEPQ_SLEEP, 0); - sleepq_wait( - &cc_exec_waiting(cc, direct), - 0); - sq_locked = 0; - old_cc = NULL; + while (cc_exec_curr(cc, direct) == c) + msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); - /* Reacquire locks previously released. */ - PICKUP_GIANT(); - CC_LOCK(cc); - } - } else if (use_lock && - !cc_exec_cancel(cc, direct)) { - - /* - * The current callout is waiting for its - * lock which we hold. Cancel the callout - * and return. After our caller drops the - * lock, the callout will be skipped in - * softclock(). - */ - cc_exec_cancel(cc, direct) = true; - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", - c, c->c_func, c->c_arg); - KASSERT(!cc_cce_migrating(cc, direct), - ("callout wrongly scheduled for migration")); - if (callout_migrating(c)) { - c->c_iflags &= ~CALLOUT_DFRMIGRATION; -#ifdef SMP - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif - } - CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain locked")); - return (1); - } else if (callout_migrating(c)) { - /* - * The callout is currently being serviced - * and the "next" callout is scheduled at - * its completion with a migration. We remove - * the migration flag so it *won't* get rescheduled, - * but we can't stop the one thats running so - * we return 0. - */ - c->c_iflags &= ~CALLOUT_DFRMIGRATION; -#ifdef SMP - /* - * We can't call cc_cce_cleanup here since - * if we do it will remove .ce_curr and - * its still running. This will prevent a - * reschedule of the callout when the - * execution completes. - */ - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif - CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (0); - } - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain still locked")); - return (0); + } else { + callout_unlock_client(c->c_flags, c->c_lock); } - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - c->c_iflags &= ~CALLOUT_PENDING; - c->c_flags &= ~CALLOUT_ACTIVE; - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if (not_on_a_list == 0) { - if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - } - callout_cc_del(c, cc); - CC_UNLOCK(cc); - return (1); + + return (cancelled & CALLOUT_RET_CANCELLED); } void callout_init(struct callout *c, int mpsafe) { - bzero(c, sizeof *c); if (mpsafe) { - c->c_lock = NULL; - c->c_iflags = CALLOUT_RETURNUNLOCKED; + _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); } else { - c->c_lock = &Giant.lock_object; - c->c_iflags = 0; + _callout_init_lock(c, &Giant.lock_object, 0); } - c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); + KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: bad flags 0x%08x", flags)); + flags &= CALLOUT_RETURNUNLOCKED; + if (lock != NULL) { + struct lock_class *class = LOCK_CLASS(lock); + if (class == &lock_class_mtx_sleep) + flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); + else if (class == &lock_class_mtx_spin) + flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); + else if (class == &lock_class_rm) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); + else if (class == &lock_class_rw) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); + else + panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); + } else { + flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); + } c->c_lock = lock; - KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, - ("callout_init_lock: bad flags %d", flags)); - KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, - ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); - KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & - (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", - __func__)); - c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); + c->c_flags = flags; c->c_cpu = timeout_cpu; } #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after * an APM resume to recalculate the calltodo timer list values with the * number of hz's we have been sleeping. The next hardclock() will detect * that there are fired timers and run softclock() to execute them. * * Please note, I have not done an exhaustive analysis of what code this * might break. I am motivated to have my select()'s and alarm()'s that * have expired during suspend firing upon resume so that the applications * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers * firing, which seemed independant on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key */ void adjust_timeout_calltodo(struct timeval *time_change) { register struct callout *p; unsigned long delta_ticks; /* * How many ticks were we asleep? * (stolen from tvtohz()). */ /* Don't do anything */ if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) delta_ticks = (time_change->tv_sec * 1000000 + time_change->tv_usec + (tick - 1)) / tick + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + (time_change->tv_usec + (tick - 1)) / tick + 1; else delta_ticks = LONG_MAX; if (delta_ticks > INT_MAX) delta_ticks = INT_MAX; /* * Now rip through the timer calltodo list looking for timers * to expire. */ /* don't collide with softclock() */ CC_LOCK(cc); for (p = calltodo.c_next; p != NULL; p = p->c_next) { p->c_time -= delta_ticks; /* Break if the timer had more time on it than delta_ticks */ if (p->c_time > 0) break; /* take back the ticks the timer didn't use (p->c_time <= 0) */ delta_ticks = -p->c_time; } CC_UNLOCK(cc); return; } #endif /* APM_FIXUP_CALLTODO */ static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; #ifdef SMP int cpu; #endif val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); #ifdef SMP CPU_FOREACH(cpu) { cc = CC_CPU(cpu); #else cc = CC_CPU(timeout_cpu); #endif CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); #ifdef SMP } #endif for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); Index: projects/hps_head/sys/kern/subr_sleepqueue.c =================================================================== --- projects/hps_head/sys/kern/subr_sleepqueue.c (revision 282413) +++ projects/hps_head/sys/kern/subr_sleepqueue.c (revision 282414) @@ -1,1247 +1,1228 @@ /*- * Copyright (c) 2004 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Implementation of sleep queues used to hold queue of threads blocked on * a wait channel. Sleep queues different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile * and sleep queue implementations. (Note: turnstiles were implemented * first.) For example, both use a hash table of the same size where each * bucket is referred to as a "chain" that contains both a spin lock and * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to * embed it's queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same * wait channel in the case of multiple waiters. * * Some additional functionality provided by sleep queues include the * ability to set a timeout. The timeout is managed using a per-thread * callout that resumes a thread if it is asleep. A thread may also * catch signals while it is asleep (aka an interruptible sleep). The * signal code uses sleepq_abort() to interrupt a sleeping thread. Finally, * sleep queues also provide some extra assertions. One is not allowed to * mix the sleep/wakeup and cv APIs for a given wait channel. Also, one * must consistently use the same lock to synchronize with a wait channel, * though this check is currently only a warning for sleep/wakeup due to * pre-existing abuse of that API. The same lock must also be held when * awakening threads, though that is currently only enforced for condition * variables. */ #include __FBSDID("$FreeBSD$"); #include "opt_sleepqueue_profiling.h" #include "opt_ddb.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* * Constants for the hash table of sleep queue chains. * SC_TABLESIZE must be a power of two for SC_MASK to work properly. */ #define SC_TABLESIZE 256 /* Must be power of 2. */ #define SC_MASK (SC_TABLESIZE - 1) #define SC_SHIFT 8 #define SC_HASH(wc) ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \ SC_MASK) #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* * There two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached * to a wait channel. * * Each sleep queue also contains the wait channel it is attached to, the * list of threads blocked on that wait channel, flags specific to the * wait channel, and the lock used to synchronize with a wait channel. * The flags are used to catch mismatches between the various consumers * of the sleep queue API (e.g. sleep/wakeup and condition variables). * The lock pointer is only used when invariants are enabled for various * debugging checks. * * Locking key: * c - sleep queue chain lock */ struct sleepqueue { TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */ u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */ LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ void *sq_wchan; /* (c) Wait channel. */ int sq_type; /* (c) Queue type. */ #ifdef INVARIANTS struct lock_object *sq_lock; /* (c) Associated lock. */ #endif }; struct sleepqueue_chain { LIST_HEAD(, sleepqueue) sc_queues; /* List of sleep queues. */ struct mtx sc_lock; /* Spin lock for this chain. */ #ifdef SLEEPQUEUE_PROFILING u_int sc_depth; /* Length of sc_queues. */ u_int sc_max_depth; /* Max length of sc_queues. */ #endif }; #ifdef SLEEPQUEUE_PROFILING u_int sleepq_max_depth; static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling"); static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0, "sleepq chain stats"); SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth, 0, "maxmimum depth achieved of a single chain"); static void sleepq_profile(const char *wmesg); static int prof_enabled; #endif static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE]; static uma_zone_t sleepq_zone; /* * Prototypes for non-exported routines. */ static int sleepq_catch_signals(void *wchan, int pri); static int sleepq_check_signals(void); -static int sleepq_check_timeout(void); +static int sleepq_check_timeout(struct thread *); +static void sleepq_stop_timeout(struct thread *); #ifdef INVARIANTS static void sleepq_dtor(void *mem, int size, void *arg); #endif static int sleepq_init(void *mem, int size, int flags); static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri); static void sleepq_switch(void *wchan, int pri); static void sleepq_timeout(void *arg); SDT_PROBE_DECLARE(sched, , , sleep); SDT_PROBE_DECLARE(sched, , , wakeup); /* * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes. * Note that it must happen after sleepinit() has been fully executed, so * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup. */ #ifdef SLEEPQUEUE_PROFILING static void init_sleepqueue_profiling(void) { char chain_name[10]; struct sysctl_oid *chain_oid; u_int i; for (i = 0; i < SC_TABLESIZE; i++) { snprintf(chain_name, sizeof(chain_name), "%u", i); chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO, chain_name, CTLFLAG_RD, NULL, "sleepq chain stats"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0, NULL); } } SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY, init_sleepqueue_profiling, NULL); #endif /* * Early initialization of sleep queues that is called from the sleepinit() * SYSINIT. */ void init_sleepqueues(void) { int i; for (i = 0; i < SC_TABLESIZE; i++) { LIST_INIT(&sleepq_chains[i].sc_queues); mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL, MTX_SPIN | MTX_RECURSE); } sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue), #ifdef INVARIANTS NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #else NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #endif thread0.td_sleepqueue = sleepq_alloc(); } /* * Get a sleep queue for a new thread. */ struct sleepqueue * sleepq_alloc(void) { return (uma_zalloc(sleepq_zone, M_WAITOK)); } /* * Free a sleep queue when a thread is destroyed. */ void sleepq_free(struct sleepqueue *sq) { uma_zfree(sleepq_zone, sq); } /* * Lock the sleep queue chain associated with the specified wait channel. */ void sleepq_lock(void *wchan) { struct sleepqueue_chain *sc; sc = SC_LOOKUP(wchan); mtx_lock_spin(&sc->sc_lock); } /* * Look up the sleep queue associated with a given wait channel in the hash * table locking the associated sleep queue chain. If no queue is found in * the table, NULL is returned. */ struct sleepqueue * sleepq_lookup(void *wchan) { struct sleepqueue_chain *sc; struct sleepqueue *sq; KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); LIST_FOREACH(sq, &sc->sc_queues, sq_hash) if (sq->sq_wchan == wchan) return (sq); return (NULL); } /* * Unlock the sleep queue chain associated with a given wait channel. */ void sleepq_release(void *wchan) { struct sleepqueue_chain *sc; sc = SC_LOOKUP(wchan); mtx_unlock_spin(&sc->sc_lock); } /* * Places the current thread on the sleep queue for the specified wait * channel. If INVARIANTS is enabled, then it associates the passed in * lock with the sleepq to make sure it is held when that sleep queue is * woken up. */ void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, int queue) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); MPASS(td->td_sleepqueue != NULL); MPASS(wchan != NULL); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); /* If this thread is not allowed to sleep, die a horrible death. */ KASSERT(td->td_no_sleeping == 0, ("%s: td %p to sleep on wchan %p with sleeping prohibited", __func__, td, wchan)); /* Look up the sleep queue associated with the wait channel 'wchan'. */ sq = sleepq_lookup(wchan); /* * If the wait channel does not already have a sleep queue, use * this thread's sleep queue. Otherwise, insert the current thread * into the sleep queue already in use by this wait channel. */ if (sq == NULL) { #ifdef INVARIANTS int i; sq = td->td_sleepqueue; for (i = 0; i < NR_SLEEPQS; i++) { KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]), ("thread's sleep queue %d is not empty", i)); KASSERT(sq->sq_blockedcnt[i] == 0, ("thread's sleep queue %d count mismatches", i)); } KASSERT(LIST_EMPTY(&sq->sq_free), ("thread's sleep queue has a non-empty free list")); KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); sq->sq_lock = lock; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth++; if (sc->sc_depth > sc->sc_max_depth) { sc->sc_max_depth = sc->sc_depth; if (sc->sc_max_depth > sleepq_max_depth) sleepq_max_depth = sc->sc_max_depth; } #endif sq = td->td_sleepqueue; LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); sq->sq_wchan = wchan; sq->sq_type = flags & SLEEPQ_TYPE; } else { MPASS(wchan == sq->sq_wchan); MPASS(lock == sq->sq_lock); MPASS((flags & SLEEPQ_TYPE) == sq->sq_type); LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash); } thread_lock(td); TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq); sq->sq_blockedcnt[queue]++; td->td_sleepqueue = NULL; td->td_sqqueue = queue; td->td_wchan = wchan; td->td_wmesg = wmesg; if (flags & SLEEPQ_INTERRUPTIBLE) { td->td_flags |= TDF_SINTR; td->td_flags &= ~TDF_SLEEPABORT; } thread_unlock(td); } /* * Sets a timeout that will remove the current thread from the specified * sleep queue after timo ticks if the thread has not already been awakened. */ void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags) { - struct sleepqueue_chain *sc; struct thread *td; td = curthread; - sc = SC_LOOKUP(wchan); - mtx_assert(&sc->sc_lock, MA_OWNED); - MPASS(TD_ON_SLEEPQ(td)); - MPASS(td->td_sleepqueue == NULL); - MPASS(wchan != NULL); + + mtx_lock_spin(&td->td_slpmutex); callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); + mtx_unlock_spin(&td->td_slpmutex); } /* * Return the number of actual sleepers for the specified queue. */ u_int sleepq_sleepcnt(void *wchan, int queue) { struct sleepqueue *sq; KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); return (sq->sq_blockedcnt[queue]); } /* * Marks the pending sleep of the current thread as interruptible and * makes an initial check for pending signals before putting a thread * to sleep. Enters and exits with the thread lock held. Thread lock * may have transitioned from the sleepq lock to a run lock. */ static int sleepq_catch_signals(void *wchan, int pri) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; struct proc *p; struct sigacts *ps; int sig, ret; td = curthread; p = curproc; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); MPASS(wchan != NULL); if ((td->td_pflags & TDP_WAKEUP) != 0) { td->td_pflags &= ~TDP_WAKEUP; ret = EINTR; thread_lock(td); goto out; } /* * See if there are any pending signals for this thread. If not * we can switch immediately. Otherwise do the signal processing * directly. */ thread_lock(td); if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) { sleepq_switch(wchan, pri); return (0); } thread_unlock(td); mtx_unlock_spin(&sc->sc_lock); CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", (void *)td, (long)p->p_pid, td->td_name); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sig = cursig(td); if (sig == 0) { mtx_unlock(&ps->ps_mtx); ret = thread_suspend_check(1); MPASS(ret == 0 || ret == EINTR || ret == ERESTART); } else { if (SIGISMEMBER(ps->ps_sigintr, sig)) ret = EINTR; else ret = ERESTART; mtx_unlock(&ps->ps_mtx); } /* * Lock the per-process spinlock prior to dropping the PROC_LOCK * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and * thread_lock() are currently held in tdsendsignal(). */ PROC_SLOCK(p); mtx_lock_spin(&sc->sc_lock); PROC_UNLOCK(p); thread_lock(td); PROC_SUNLOCK(p); if (ret == 0) { sleepq_switch(wchan, pri); return (0); } out: /* * There were pending signals and this thread is still * on the sleep queue, remove it from the sleep queue. */ if (TD_ON_SLEEPQ(td)) { sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { #ifdef INVARIANTS /* * This thread hasn't gone to sleep yet, so it * should not be swapped out. */ panic("not waking up swapper"); #endif } } mtx_unlock_spin(&sc->sc_lock); MPASS(td->td_lock != &sc->sc_lock); return (ret); } /* * Switches to another thread if we are still asleep on a sleep queue. * Returns with thread lock. */ static void sleepq_switch(void *wchan, int pri) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If we have a sleep queue, then we've already been woken up, so * just return. */ if (td->td_sleepqueue != NULL) { mtx_unlock_spin(&sc->sc_lock); return; } /* * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the * thread and return. */ if (td->td_flags & TDF_TIMEOUT) { MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { #ifdef INVARIANTS /* * This thread hasn't gone to sleep yet, so it * should not be swapped out. */ panic("not waking up swapper"); #endif } mtx_unlock_spin(&sc->sc_lock); return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) sleepq_profile(td->td_wmesg); #endif MPASS(td->td_sleepqueue == NULL); sched_sleep(td, pri); thread_lock_set(td, &sc->sc_lock); SDT_PROBE0(sched, , , sleep); TD_SET_SLEEPING(td); mi_switch(SW_VOL | SWT_SLEEPQ, NULL); KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING")); CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); } /* * Check to see if we timed out. */ static int -sleepq_check_timeout(void) +sleepq_check_timeout(struct thread *td) { - struct thread *td; - - td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If TDF_TIMEOUT is set, we timed out. */ if (td->td_flags & TDF_TIMEOUT) { td->td_flags &= ~TDF_TIMEOUT; return (EWOULDBLOCK); } - - /* - * If TDF_TIMOFAIL is set, the timeout ran after we had - * already been woken up. - */ - if (td->td_flags & TDF_TIMOFAIL) - td->td_flags &= ~TDF_TIMOFAIL; - - /* - * If callout_stop() fails, then the timeout is running on - * another CPU, so synchronize with it to avoid having it - * accidentally wake up a subsequent sleep. - */ - else if (callout_stop(&td->td_slpcallout) == 0) { - td->td_flags |= TDF_TIMEOUT; - TD_SET_SLEEPING(td); - mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL); - } return (0); } /* + * Atomically stop the timeout by using a mutex. + */ +static void +sleepq_stop_timeout(struct thread *td) +{ + mtx_lock_spin(&td->td_slpmutex); + callout_stop(&td->td_slpcallout); + mtx_unlock_spin(&td->td_slpmutex); +} + +/* * Check to see if we were awoken by a signal. */ static int sleepq_check_signals(void) { struct thread *td; td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* We are no longer in an interruptible sleep. */ if (td->td_flags & TDF_SINTR) td->td_flags &= ~TDF_SINTR; if (td->td_flags & TDF_SLEEPABORT) { td->td_flags &= ~TDF_SLEEPABORT; return (td->td_intrval); } return (0); } /* * Block the current thread until it is awakened from its sleep queue. */ void sleepq_wait(void *wchan, int pri) { struct thread *td; td = curthread; MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); thread_unlock(td); } /* * Block the current thread until it is awakened from its sleep queue * or it is interrupted by a signal. */ int sleepq_wait_sig(void *wchan, int pri) { int rcatch; int rval; rcatch = sleepq_catch_signals(wchan, pri); rval = sleepq_check_signals(); thread_unlock(curthread); if (rcatch) return (rcatch); return (rval); } /* * Block the current thread until it is awakened from its sleep queue * or it times out while waiting. */ int sleepq_timedwait(void *wchan, int pri) { struct thread *td; int rval; td = curthread; MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); - rval = sleepq_check_timeout(); + rval = sleepq_check_timeout(td); thread_unlock(td); + sleepq_stop_timeout(td); + return (rval); } /* * Block the current thread until it is awakened from its sleep queue, * it is interrupted by a signal, or it times out waiting to be awakened. */ int sleepq_timedwait_sig(void *wchan, int pri) { + struct thread *td; int rcatch, rvalt, rvals; + td = curthread; + rcatch = sleepq_catch_signals(wchan, pri); - rvalt = sleepq_check_timeout(); + rvalt = sleepq_check_timeout(td); rvals = sleepq_check_signals(); - thread_unlock(curthread); + thread_unlock(td); + + sleepq_stop_timeout(td); + if (rcatch) return (rcatch); if (rvals) return (rvals); return (rvalt); } /* * Returns the type of sleepqueue given a waitchannel. */ int sleepq_type(void *wchan) { struct sleepqueue *sq; int type; MPASS(wchan != NULL); sleepq_lock(wchan); sq = sleepq_lookup(wchan); if (sq == NULL) { sleepq_release(wchan); return (-1); } type = sq->sq_type; sleepq_release(wchan); return (type); } /* * Removes a thread from a sleep queue and makes it * runnable. */ static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri) { struct sleepqueue_chain *sc; MPASS(td != NULL); MPASS(sq->sq_wchan != NULL); MPASS(td->td_wchan == sq->sq_wchan); MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0); THREAD_LOCK_ASSERT(td, MA_OWNED); sc = SC_LOOKUP(sq->sq_wchan); mtx_assert(&sc->sc_lock, MA_OWNED); SDT_PROBE2(sched, , , wakeup, td, td->td_proc); /* Remove the thread from the queue. */ sq->sq_blockedcnt[td->td_sqqueue]--; TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq); /* * Get a sleep queue for this thread. If this is the last waiter, * use the queue itself and take it out of the chain, otherwise, * remove a queue from the free list. */ if (LIST_EMPTY(&sq->sq_free)) { td->td_sleepqueue = sq; #ifdef INVARIANTS sq->sq_wchan = NULL; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth--; #endif } else td->td_sleepqueue = LIST_FIRST(&sq->sq_free); LIST_REMOVE(td->td_sleepqueue, sq_hash); td->td_wmesg = NULL; td->td_wchan = NULL; td->td_flags &= ~TDF_SINTR; CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, td->td_name); /* Adjust priority if requested. */ MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX)); if (pri != 0 && td->td_priority > pri && PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, pri); /* * Note that thread td might not be sleeping if it is running * sleepq_catch_signals() on another CPU or is blocked on its * proc lock to check signals. There's no need to mark the * thread runnable in that case. */ if (TD_IS_SLEEPING(td)) { TD_CLR_SLEEPING(td); return (setrunnable(td)); } return (0); } #ifdef INVARIANTS /* * UMA zone item deallocator. */ static void sleepq_dtor(void *mem, int size, void *arg) { struct sleepqueue *sq; int i; sq = mem; for (i = 0; i < NR_SLEEPQS; i++) { MPASS(TAILQ_EMPTY(&sq->sq_blocked[i])); MPASS(sq->sq_blockedcnt[i] == 0); } } #endif /* * UMA zone item initializer. */ static int sleepq_init(void *mem, int size, int flags) { struct sleepqueue *sq; int i; bzero(mem, size); sq = mem; for (i = 0; i < NR_SLEEPQS; i++) { TAILQ_INIT(&sq->sq_blocked[i]); sq->sq_blockedcnt[i] = 0; } LIST_INIT(&sq->sq_free); return (0); } /* * Find the highest priority thread sleeping on a wait channel and resume it. */ int sleepq_signal(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; struct thread *td, *besttd; int wakeup_swapper; CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); /* * Find the highest priority thread on the queue. If there is a * tie, use the thread that first appears in the queue as it has * been sleeping the longest since threads are always added to * the tail of sleep queues. */ besttd = NULL; TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) { if (besttd == NULL || td->td_priority < besttd->td_priority) besttd = td; } MPASS(besttd != NULL); thread_lock(besttd); wakeup_swapper = sleepq_resume_thread(sq, besttd, pri); thread_unlock(besttd); return (wakeup_swapper); } /* * Resume all threads sleeping on a specified wait channel. */ int sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; struct thread *td, *tdn; int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); /* Resume all blocked threads on the sleep queue. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); if (sleepq_resume_thread(sq, td, pri)) wakeup_swapper = 1; thread_unlock(td); } return (wakeup_swapper); } /* * Time sleeping threads out. When the timeout expires, the thread is * removed from the sleep queue and made runnable if it is still asleep. */ static void sleepq_timeout(void *arg) { - struct sleepqueue_chain *sc; - struct sleepqueue *sq; - struct thread *td; - void *wchan; - int wakeup_swapper; + struct thread *td = arg; + int wakeup_swapper = 0; - td = arg; - wakeup_swapper = 0; CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); - /* - * First, see if the thread is asleep and get the wait channel if - * it is. - */ - thread_lock(td); - if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { - wchan = td->td_wchan; - sc = SC_LOOKUP(wchan); - THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); - sq = sleepq_lookup(wchan); - MPASS(sq != NULL); - td->td_flags |= TDF_TIMEOUT; - wakeup_swapper = sleepq_resume_thread(sq, td, 0); - thread_unlock(td); - if (wakeup_swapper) - kick_proc0(); - return; - } + /* Handle the three cases which can happen */ - /* - * If the thread is on the SLEEPQ but isn't sleeping yet, it - * can either be on another CPU in between sleepq_add() and - * one of the sleepq_*wait*() routines or it can be in - * sleepq_catch_signals(). - */ + thread_lock(td); if (TD_ON_SLEEPQ(td)) { - td->td_flags |= TDF_TIMEOUT; - thread_unlock(td); - return; - } + if (TD_IS_SLEEPING(td)) { + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + void *wchan; - /* - * Now check for the edge cases. First, if TDF_TIMEOUT is set, - * then the other thread has already yielded to us, so clear - * the flag and resume it. If TDF_TIMEOUT is not set, then the - * we know that the other thread is not on a sleep queue, but it - * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL - * to let it know that the timeout has already run and doesn't - * need to be canceled. - */ - if (td->td_flags & TDF_TIMEOUT) { - MPASS(TD_IS_SLEEPING(td)); - td->td_flags &= ~TDF_TIMEOUT; - TD_CLR_SLEEPING(td); - wakeup_swapper = setrunnable(td); - } else - td->td_flags |= TDF_TIMOFAIL; + /* + * Case I - thread is asleep and needs to be + * awoken: + */ + wchan = td->td_wchan; + sc = SC_LOOKUP(wchan); + THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); + sq = sleepq_lookup(wchan); + MPASS(sq != NULL); + td->td_flags |= TDF_TIMEOUT; + wakeup_swapper = sleepq_resume_thread(sq, td, 0); + } else { + /* + * Case II - cancel going to sleep by setting + * the timeout flag because the target thread + * is not asleep yet. It can be on another CPU + * in between sleepq_add() and one of the + * sleepq_*wait*() routines or it can be in + * sleepq_catch_signals(). + */ + td->td_flags |= TDF_TIMEOUT; + } + } else { + /* + * Case III - thread is already woken up by a wakeup + * call and should not timeout. Nothing to do! + */ + } thread_unlock(td); if (wakeup_swapper) kick_proc0(); } /* * Resumes a specific thread from the sleep queue associated with a specific * wait channel if it is on that queue. */ void sleepq_remove(struct thread *td, void *wchan) { struct sleepqueue *sq; int wakeup_swapper; /* * Look up the sleep queue for this wait channel, then re-check * that the thread is asleep on that channel, if it is not, then * bail. */ MPASS(wchan != NULL); sleepq_lock(wchan); sq = sleepq_lookup(wchan); /* * We can not lock the thread here as it may be sleeping on a * different sleepq. However, holding the sleepq lock for this * wchan can guarantee that we do not miss a wakeup for this * channel. The asserts below will catch any false positives. */ if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) { sleepq_release(wchan); return; } /* Thread is asleep on sleep queue sq, so wake it up. */ thread_lock(td); MPASS(sq != NULL); MPASS(td->td_wchan == wchan); wakeup_swapper = sleepq_resume_thread(sq, td, 0); thread_unlock(td); sleepq_release(wchan); if (wakeup_swapper) kick_proc0(); } /* * Abort a thread as if an interrupt had occurred. Only abort * interruptible waits (unfortunately it isn't safe to abort others). */ int sleepq_abort(struct thread *td, int intrval) { struct sleepqueue *sq; void *wchan; THREAD_LOCK_ASSERT(td, MA_OWNED); MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_flags & TDF_SINTR); MPASS(intrval == EINTR || intrval == ERESTART); /* * If the TDF_TIMEOUT flag is set, just leave. A * timeout is scheduled anyhow. */ if (td->td_flags & TDF_TIMEOUT) return (0); CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); td->td_intrval = intrval; td->td_flags |= TDF_SLEEPABORT; /* * If the thread has not slept yet it will find the signal in * sleepq_catch_signals() and call sleepq_resume_thread. Otherwise * we have to do it here. */ if (!TD_IS_SLEEPING(td)) return (0); wchan = td->td_wchan; MPASS(wchan != NULL); sq = sleepq_lookup(wchan); MPASS(sq != NULL); /* Thread is asleep on sleep queue sq, so wake it up. */ return (sleepq_resume_thread(sq, td, 0)); } #ifdef SLEEPQUEUE_PROFILING #define SLEEPQ_PROF_LOCATIONS 1024 #define SLEEPQ_SBUFSIZE 512 struct sleepq_prof { LIST_ENTRY(sleepq_prof) sp_link; const char *sp_wmesg; long sp_count; }; LIST_HEAD(sqphead, sleepq_prof); struct sqphead sleepq_prof_free; struct sqphead sleepq_hash[SC_TABLESIZE]; static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS]; static struct mtx sleepq_prof_lock; MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN); static void sleepq_profile(const char *wmesg) { struct sleepq_prof *sp; mtx_lock_spin(&sleepq_prof_lock); if (prof_enabled == 0) goto unlock; LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link) if (sp->sp_wmesg == wmesg) goto done; sp = LIST_FIRST(&sleepq_prof_free); if (sp == NULL) goto unlock; sp->sp_wmesg = wmesg; LIST_REMOVE(sp, sp_link); LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link); done: sp->sp_count++; unlock: mtx_unlock_spin(&sleepq_prof_lock); return; } static void sleepq_prof_reset(void) { struct sleepq_prof *sp; int enabled; int i; mtx_lock_spin(&sleepq_prof_lock); enabled = prof_enabled; prof_enabled = 0; for (i = 0; i < SC_TABLESIZE; i++) LIST_INIT(&sleepq_hash[i]); LIST_INIT(&sleepq_prof_free); for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) { sp = &sleepq_profent[i]; sp->sp_wmesg = NULL; sp->sp_count = 0; LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link); } prof_enabled = enabled; mtx_unlock_spin(&sleepq_prof_lock); } static int enable_sleepq_prof(SYSCTL_HANDLER_ARGS) { int error, v; v = prof_enabled; error = sysctl_handle_int(oidp, &v, v, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == prof_enabled) return (0); if (v == 1) sleepq_prof_reset(); mtx_lock_spin(&sleepq_prof_lock); prof_enabled = !!v; mtx_unlock_spin(&sleepq_prof_lock); return (0); } static int reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS) { int error, v; v = 0; error = sysctl_handle_int(oidp, &v, 0, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == 0) return (0); sleepq_prof_reset(); return (0); } static int dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS) { struct sleepq_prof *sp; struct sbuf *sb; int enabled; int error; int i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req); sbuf_printf(sb, "\nwmesg\tcount\n"); enabled = prof_enabled; mtx_lock_spin(&sleepq_prof_lock); prof_enabled = 0; mtx_unlock_spin(&sleepq_prof_lock); for (i = 0; i < SC_TABLESIZE; i++) { LIST_FOREACH(sp, &sleepq_hash[i], sp_link) { sbuf_printf(sb, "%s\t%ld\n", sp->sp_wmesg, sp->sp_count); } } mtx_lock_spin(&sleepq_prof_lock); prof_enabled = enabled; mtx_unlock_spin(&sleepq_prof_lock); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics"); SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, reset_sleepq_prof_stats, "I", "Reset sleepqueue profiling statistics"); SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling"); #endif #ifdef DDB DB_SHOW_COMMAND(sleepq, db_show_sleepqueue) { struct sleepqueue_chain *sc; struct sleepqueue *sq; #ifdef INVARIANTS struct lock_object *lock; #endif struct thread *td; void *wchan; int i; if (!have_addr) return; /* * First, see if there is an active sleep queue for the wait channel * indicated by the address. */ wchan = (void *)addr; sc = SC_LOOKUP(wchan); LIST_FOREACH(sq, &sc->sc_queues, sq_hash) if (sq->sq_wchan == wchan) goto found; /* * Second, see if there is an active sleep queue at the address * indicated. */ for (i = 0; i < SC_TABLESIZE; i++) LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) { if (sq == (struct sleepqueue *)addr) goto found; } db_printf("Unable to locate a sleep queue via %p\n", (void *)addr); return; found: db_printf("Wait channel: %p\n", sq->sq_wchan); db_printf("Queue type: %d\n", sq->sq_type); #ifdef INVARIANTS if (sq->sq_lock) { lock = sq->sq_lock; db_printf("Associated Interlock: %p - (%s) %s\n", lock, LOCK_CLASS(lock)->lc_name, lock->lo_name); } #endif db_printf("Blocked threads:\n"); for (i = 0; i < NR_SLEEPQS; i++) { db_printf("\nQueue[%d]:\n", i); if (TAILQ_EMPTY(&sq->sq_blocked[i])) db_printf("\tempty\n"); else TAILQ_FOREACH(td, &sq->sq_blocked[0], td_slpq) { db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); } db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]); } } /* Alias 'show sleepqueue' to 'show sleepq'. */ DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue); #endif Index: projects/hps_head/sys/ofed/include/linux/completion.h =================================================================== --- projects/hps_head/sys/ofed/include/linux/completion.h (revision 282413) +++ projects/hps_head/sys/ofed/include/linux/completion.h (revision 282414) @@ -1,66 +1,67 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_COMPLETION_H_ #define _LINUX_COMPLETION_H_ #include struct completion { unsigned int done; }; #define INIT_COMPLETION(c) \ ((c).done = 0) #define init_completion(c) \ ((c)->done = 0) #define complete(c) \ linux_complete_common((c), 0) #define complete_all(c) \ linux_complete_common((c), 1) #define wait_for_completion(c) \ linux_wait_for_common((c), 0) #define wait_for_completion_interuptible(c) \ linux_wait_for_common((c), 1) #define wait_for_completion_timeout(c, timeout) \ linux_wait_for_timeout_common((c), (timeout), 0) #define wait_for_completion_interruptible_timeout(c, timeout) \ linux_wait_for_timeout_common((c), (timeout), 1) #define try_wait_for_completion(c) \ linux_try_wait_for_completion(c) #define completion_done(c) \ linux_completion_done(c) extern void linux_complete_common(struct completion *, int); extern long linux_wait_for_common(struct completion *, int); extern long linux_wait_for_timeout_common(struct completion *, long, int); extern int linux_try_wait_for_completion(struct completion *); extern int linux_completion_done(struct completion *); #endif /* _LINUX_COMPLETION_H_ */ + Index: projects/hps_head/sys/ofed/include/linux/linux_compat.c =================================================================== --- projects/hps_head/sys/ofed/include/linux/linux_compat.c (revision 282413) +++ projects/hps_head/sys/ofed/include/linux/linux_compat.c (revision 282414) @@ -1,933 +1,935 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); #include /* Undo Linux compat changes. */ #undef RB_ROOT #undef file #undef cdev #define RB_ROOT(head) (head)->rbh_root struct kobject class_root; struct device linux_rootdev; struct class miscclass; struct list_head pci_drivers; struct list_head pci_devices; struct net init_net; spinlock_t pci_lock; unsigned long linux_timer_hz_mask; int panic_cmp(struct rb_node *one, struct rb_node *two) { panic("no cmp"); } RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); int kobject_set_name(struct kobject *kobj, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); return (error); } static inline int kobject_add_complete(struct kobject *kobj, struct kobject *parent) { struct kobj_type *t; int error; kobj->parent = kobject_get(parent); error = sysfs_create_dir(kobj); if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { struct attribute **attr; t = kobj->ktype; for (attr = t->default_attrs; *attr != NULL; attr++) { error = sysfs_create_file(kobj, *attr); if (error) break; } if (error) sysfs_remove_dir(kobj); } return (error); } int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } void kobject_release(struct kref *kref) { struct kobject *kobj; char *name; kobj = container_of(kref, struct kobject, kref); sysfs_remove_dir(kobj); if (kobj->parent) kobject_put(kobj->parent); kobj->parent = NULL; name = kobj->name; if (kobj->ktype && kobj->ktype->release) kobj->ktype->release(kobj); kfree(name); } static void kobject_kfree(struct kobject *kobj) { kfree(kobj); } static void kobject_kfree_name(struct kobject *kobj) { if (kobj) { kfree(kobj->name); } } struct kobj_type kfree_type = { .release = kobject_kfree }; static void dev_release(struct device *dev) { pr_debug("dev_release: %s\n", dev_name(dev)); kfree(dev); } struct device * device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { struct device *dev; va_list args; dev = kzalloc(sizeof(*dev), M_WAITOK); dev->parent = parent; dev->class = class; dev->devt = devt; dev->driver_data = drvdata; dev->release = dev_release; va_start(args, fmt); kobject_set_name_vargs(&dev->kobj, fmt, args); va_end(args); device_register(dev); return (dev); } int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; int error; kobject_init(kobj, ktype); kobj->ktype = ktype; kobj->parent = parent; kobj->name = NULL; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } static void linux_file_dtor(void *cdp) { struct linux_file *filp; filp = cdp; filp->f_op->release(filp->f_vnode, filp); vdrop(filp->f_vnode); kfree(filp); } static int linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); filp = kzalloc(sizeof(*filp), GFP_KERNEL); filp->f_dentry = &filp->f_dentry_store; filp->f_op = ldev->ops; filp->f_flags = file->f_flag; vhold(file->f_vnode); filp->f_vnode = file->f_vnode; if (filp->f_op->open) { error = -filp->f_op->open(file->f_vnode, filp); if (error) { kfree(filp); return (error); } } error = devfs_set_cdevpriv(filp, linux_file_dtor); if (error) { filp->f_op->release(file->f_vnode, filp); kfree(filp); return (error); } return 0; } static int linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; devfs_clear_cdevpriv(); return (0); } static int linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; /* * Linux does not have a generic ioctl copyin/copyout layer. All * linux ioctls must be converted to void ioctls which pass a * pointer to the address of the data. We want the actual user * address so we dereference here. */ data = *(void **)data; if (filp->f_op->unlocked_ioctl) error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); else error = ENOTTY; return (error); } static int linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base = ((uint8_t *)uio->uio_iov->iov_base) + bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_write: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->write) { bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base = ((uint8_t *)uio->uio_iov->iov_base) + bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_poll(struct cdev *dev, int events, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int revents; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (revents); } static int linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { /* XXX memattr not honored. */ *paddr = offset; return (0); } static int linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; struct vm_area_struct vma; vm_paddr_t paddr; vm_page_t m; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); if (size != PAGE_SIZE) return (EINVAL); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; vma.vm_start = 0; vma.vm_end = PAGE_SIZE; vma.vm_pgoff = *offset / PAGE_SIZE; vma.vm_pfn = 0; vma.vm_page_prot = 0; if (filp->f_op->mmap) { error = -filp->f_op->mmap(filp, &vma); if (error == 0) { paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; *offset = paddr; m = PHYS_TO_VM_PAGE(paddr); *object = vm_pager_allocate(OBJT_DEVICE, dev, PAGE_SIZE, nprot, *offset, curthread->td_ucred); if (*object == NULL) return (EINVAL); if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, vma.vm_page_prot); } } else error = ENODEV; return (error); } struct cdevsw linuxcdevsw = { .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = linux_dev_open, .d_close = linux_dev_close, .d_read = linux_dev_read, .d_write = linux_dev_write, .d_ioctl = linux_dev_ioctl, .d_mmap_single = linux_dev_mmap_single, .d_mmap = linux_dev_mmap, .d_poll = linux_dev_poll, }; static int linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct linux_file *filp; ssize_t bytes; int error; error = 0; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_file_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base = ((uint8_t *)uio->uio_iov->iov_base) + bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_file_poll(struct file *file, int events, struct ucred *active_cred, struct thread *td) { struct linux_file *filp; int revents; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (0); } static int linux_file_close(struct file *file, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; error = -filp->f_op->release(NULL, filp); funsetown(&filp->f_sigio); kfree(filp); return (error); } static int linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)fp->f_data; filp->f_flags = fp->f_flag; error = 0; switch (cmd) { case FIONBIO: break; case FIOASYNC: if (filp->f_op->fasync == NULL) break; error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOSETOWN: error = fsetown(*(int *)data, &filp->f_sigio); if (error == 0) error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOGETOWN: *(int *)data = fgetown(&filp->f_sigio); break; default: error = ENOTTY; break; } return (error); } static int linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { return (EOPNOTSUPP); } static int linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { return (0); } struct fileops linuxfileops = { .fo_read = linux_file_read, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, .fo_kqfilter = invfo_kqfilter, .fo_stat = linux_file_stat, .fo_fill_kinfo = linux_file_fill_kinfo, .fo_poll = linux_file_poll, .fo_close = linux_file_close, .fo_ioctl = linux_file_ioctl, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, }; /* * Hash of vmmap addresses. This is infrequently accessed and does not * need to be particularly large. This is done because we must store the * caller's idea of the map size to properly unmap. */ struct vmmap { LIST_ENTRY(vmmap) vm_next; void *vm_addr; unsigned long vm_size; }; struct vmmaphd { struct vmmap *lh_first; }; #define VMMAP_HASH_SIZE 64 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; static struct mtx vmmaplock; static void vmmap_add(void *addr, unsigned long size) { struct vmmap *vmmap; vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); mtx_lock(&vmmaplock); vmmap->vm_size = size; vmmap->vm_addr = addr; LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); mtx_unlock(&vmmaplock); } static struct vmmap * vmmap_remove(void *addr) { struct vmmap *vmmap; mtx_lock(&vmmaplock); LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) if (vmmap->vm_addr == addr) break; if (vmmap) LIST_REMOVE(vmmap, vm_next); mtx_unlock(&vmmaplock); return (vmmap); } void * _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) { void *addr; addr = pmap_mapdev_attr(phys_addr, size, attr); if (addr == NULL) return (NULL); vmmap_add(addr, size); return (addr); } void iounmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } void * vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) { vm_offset_t off; size_t size; size = count * PAGE_SIZE; off = kva_alloc(size); if (off == 0) return (NULL); vmmap_add((void *)off, size); pmap_qenter(off, pages, count); return ((void *)off); } void vunmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); kva_free((vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } char * kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = kvasprintf(gfp, fmt, ap); va_end(ap); return p; } static int linux_timer_jiffies_until(unsigned long expires) { int delta = expires - jiffies; /* guard against already expired values */ if (delta < 1) delta = 1; return (delta); } static void linux_timer_callback_wrapper(void *context) { struct timer_list *timer; timer = context; timer->function(timer->data); } void mod_timer(struct timer_list *timer, unsigned long expires) { timer->expires = expires; callout_reset(&timer->timer_callout, linux_timer_jiffies_until(expires), &linux_timer_callback_wrapper, timer); } void add_timer(struct timer_list *timer) { callout_reset(&timer->timer_callout, linux_timer_jiffies_until(timer->expires), &linux_timer_callback_wrapper, timer); } static void linux_timer_init(void *arg) { /* * Compute an internal HZ value which can divide 2**32 to * avoid timer rounding problems when the tick value wraps * around 2**32: */ linux_timer_hz_mask = 1; while (linux_timer_hz_mask < (unsigned long)hz) linux_timer_hz_mask *= 2; linux_timer_hz_mask--; } SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); void linux_complete_common(struct completion *c, int all) { int wakeup_swapper; sleepq_lock(c); c->done++; if (all) wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); else wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); sleepq_release(c); if (wakeup_swapper) kick_proc0(); } /* * Indefinite wait for done != 0 with or without signals. */ long linux_wait_for_common(struct completion *c, int flags) { if (flags != 0) flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; else flags = SLEEPQ_SLEEP; for (;;) { sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_wait_sig(c, 0) != 0) return (-ERESTARTSYS); } else sleepq_wait(c, 0); } c->done--; sleepq_release(c); return (0); } /* * Time limited wait for done != 0 with or without signals. */ long linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) { long end = jiffies + timeout; if (flags != 0) flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; else flags = SLEEPQ_SLEEP; for (;;) { int ret; sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); + sleepq_release(c); sleepq_set_timeout(c, linux_timer_jiffies_until(end)); + sleepq_lock(c); if (flags & SLEEPQ_INTERRUPTIBLE) ret = sleepq_timedwait_sig(c, 0); else ret = sleepq_timedwait(c, 0); if (ret != 0) { /* check for timeout or signal */ if (ret == EWOULDBLOCK) return (0); else return (-ERESTARTSYS); } } c->done--; sleepq_release(c); /* return how many jiffies are left */ return (linux_timer_jiffies_until(end)); } int linux_try_wait_for_completion(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done) c->done--; else isdone = 0; sleepq_release(c); return (isdone); } int linux_completion_done(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done == 0) isdone = 0; sleepq_release(c); return (isdone); } static void linux_compat_init(void *arg) { struct sysctl_oid *rootoid; int i; rootoid = SYSCTL_ADD_ROOT_NODE(NULL, OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); kobject_init(&class_root, &class_ktype); kobject_set_name(&class_root, "class"); class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); kobject_init(&linux_rootdev.kobj, &dev_ktype); kobject_set_name(&linux_rootdev.kobj, "device"); linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, "device"); linux_rootdev.bsddev = root_bus; miscclass.name = "misc"; class_register(&miscclass); INIT_LIST_HEAD(&pci_drivers); INIT_LIST_HEAD(&pci_devices); spin_lock_init(&pci_lock); mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); for (i = 0; i < VMMAP_HASH_SIZE; i++) LIST_INIT(&vmmaphead[i]); } SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); static void linux_compat_uninit(void *arg) { kobject_kfree_name(&class_root); kobject_kfree_name(&linux_rootdev.kobj); kobject_kfree_name(&miscclass.kobj); } SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); Index: projects/hps_head/sys/sys/_callout.h =================================================================== --- projects/hps_head/sys/sys/_callout.h (revision 282413) +++ projects/hps_head/sys/sys/_callout.h (revision 282414) @@ -1,65 +1,66 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)callout.h 8.2 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS__CALLOUT_H #define _SYS__CALLOUT_H #include struct lock_object; LIST_HEAD(callout_list, callout); SLIST_HEAD(callout_slist, callout); TAILQ_HEAD(callout_tailq, callout); +typedef void callout_func_t(void *); + struct callout { union { LIST_ENTRY(callout) le; SLIST_ENTRY(callout) sle; TAILQ_ENTRY(callout) tqe; } c_links; sbintime_t c_time; /* ticks to the event */ sbintime_t c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ - void (*c_func)(void *); /* function to call */ + callout_func_t *c_func; /* function to call */ struct lock_object *c_lock; /* lock to handle */ - short c_flags; /* User State */ - short c_iflags; /* Internal State */ + int c_flags; /* state of this entry */ volatile int c_cpu; /* CPU we're scheduled on */ }; #endif Index: projects/hps_head/sys/sys/callout.h =================================================================== --- projects/hps_head/sys/sys/callout.h (revision 282413) +++ projects/hps_head/sys/sys/callout.h (revision 282414) @@ -1,128 +1,114 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)callout.h 8.2 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_CALLOUT_H_ #define _SYS_CALLOUT_H_ #include #define CALLOUT_LOCAL_ALLOC 0x0001 /* was allocated from callfree */ #define CALLOUT_ACTIVE 0x0002 /* callout is currently active */ #define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ -#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ -#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_UNUSED_5 0x0020 /* --available-- */ +#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ +#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */ +#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */ #define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ #define C_PRELBITS 7 #define C_PRELRANGE ((1 << C_PRELBITS) - 1) #define C_PREL(x) (((x) + 1) << 1) #define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1) #define C_HARDCLOCK 0x0100 /* align to hardclock() calls */ #define C_ABSOLUTE 0x0200 /* event time is absolute. */ struct callout_handle { struct callout *callout; }; #ifdef _KERNEL -/* - * Note the flags field is actually *two* fields. The c_flags - * field is the one that caller operations that may, or may not have - * a lock touches i.e. callout_deactivate(). The other, the c_iflags, - * is the internal flags that *must* be kept correct on which the - * callout system depend on e.g. callout_pending(). - * The c_iflag is used internally by the callout system to determine which - * list the callout is on and track internal state. Callers *should not* - * use the c_flags field directly but should use the macros provided. - * - * The c_iflags field holds internal flags that are protected by internal - * locks of the callout subsystem. The c_flags field holds external flags. - * The caller must hold its own lock while manipulating or reading external - * flags via callout_active(), callout_deactivate(), callout_reset*(), or - * callout_stop() to avoid races. - */ #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) -#define callout_drain(c) _callout_stop_safe(c, 1) +int callout_drain(struct callout *); +int callout_drain_async(struct callout *, callout_func_t *, void *); void callout_init(struct callout *, int); void _callout_init_lock(struct callout *, struct lock_object *, int); #define callout_init_mtx(c, mtx, flags) \ _callout_init_lock((c), ((mtx) != NULL) ? &(mtx)->lock_object : \ NULL, (flags)) #define callout_init_rm(c, rm, flags) \ _callout_init_lock((c), ((rm) != NULL) ? &(rm)->lock_object : \ NULL, (flags)) #define callout_init_rw(c, rw, flags) \ _callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \ NULL, (flags)) -#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING) +#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, - void (*)(void *), void *, int, int); + callout_func_t *, void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid),\ (flags)) #define callout_reset_on(c, to_ticks, fn, arg, cpu) \ callout_reset_sbt_on((c), tick_sbt * (to_ticks), 0, (fn), (arg), \ (cpu), C_HARDCLOCK) #define callout_reset(c, on_tick, fn, arg) \ callout_reset_on((c), (on_tick), (fn), (arg), -1) #define callout_reset_curcpu(c, on_tick, fn, arg) \ callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid)) #define callout_schedule_sbt_on(c, sbt, pr, cpu, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (c)->c_func, (c)->c_arg, \ (cpu), (flags)) #define callout_schedule_sbt(c, sbt, pr, flags) \ callout_schedule_sbt_on((c), (sbt), (pr), -1, (flags)) #define callout_schedule_sbt_curcpu(c, sbt, pr, flags) \ callout_schedule_sbt_on((c), (sbt), (pr), PCPU_GET(cpuid), (flags)) int callout_schedule(struct callout *, int); int callout_schedule_on(struct callout *, int, int); #define callout_schedule_curcpu(c, on_tick) \ callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) -#define callout_stop(c) _callout_stop_safe(c, 0) -int _callout_stop_safe(struct callout *, int); +int callout_stop(struct callout *); void callout_process(sbintime_t now); #endif #endif /* _SYS_CALLOUT_H_ */ Index: projects/hps_head/sys/sys/proc.h =================================================================== --- projects/hps_head/sys/sys/proc.h (revision 282413) +++ projects/hps_head/sys/sys/proc.h (revision 282414) @@ -1,1022 +1,1023 @@ /*- * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* For struct callout. */ #include /* For struct klist. */ #include #ifndef _KERNEL #include #endif #include #include #include #include #include #include #include /* XXX. */ #include #include #include #include #include #ifndef _KERNEL #include /* For structs itimerval, timeval. */ #else #include #endif #include #include #include /* Machine-dependent proc substruct. */ /* * One structure allocated per session. * * List of locks * (m) locked by s_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct session { u_int s_count; /* Ref cnt; pgrps in session - atomic. */ struct proc *s_leader; /* (m + e) Session leader. */ struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */ struct cdev_priv *s_ttydp; /* (m) Device of controlling tty. */ struct tty *s_ttyp; /* (e) Controlling tty. */ pid_t s_sid; /* (c) Session ID. */ /* (m) Setlogin() name: */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; struct mtx s_mtx; /* Mutex to protect members. */ }; /* * One structure allocated per process group. * * List of locks * (m) locked by pg_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* (e) Hash chain. */ LIST_HEAD(, proc) pg_members; /* (m + e) Pointer to pgrp members. */ struct session *pg_session; /* (c) Pointer to session. */ struct sigiolst pg_sigiolst; /* (m) List of sigio sources. */ pid_t pg_id; /* (c) Process group id. */ int pg_jobc; /* (m) Job control process count. */ struct mtx pg_mtx; /* Mutex to protect members */ }; /* * pargs, used to hold a copy of the command line, if it had a sane length. */ struct pargs { u_int ar_ref; /* Reference count. */ u_int ar_length; /* Length. */ u_char ar_args[1]; /* Arguments. */ }; /*- * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(CPU)" below, * which might be addressable only on a processor on which the process * is running. * * Below is a key of locks used to protect each member of struct proc. The * lock is indicated by a reference to a specific character in parens in the * associated comment. * * - not yet protected * a - only touched by curproc or parent during fork/wait * b - created at fork, never changes * (exception aiods switch vmspaces, but they are also * marked 'P_SYSTEM' so hopefully it will be left alone) * c - locked by proc mtx * d - locked by allproc_lock lock * e - locked by proctree_lock lock * f - session mtx * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy * o - ktrace lock * q - td_contested lock * r - p_peers lock * t - thread lock * u - process stat lock * w - process timer lock * x - created at fork, only changes during single threading in exec * y - created at first aio, doesn't change until exit or exec at which * point we are single-threaded and only curthread changes it * z - zombie threads lock * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held * for write access. */ struct cpuset; struct kaioinfo; struct kaudit_record; struct kdtrace_proc; struct kdtrace_thread; struct mqueue_notifier; struct nlminfo; struct p_sched; struct proc; struct procdesc; struct racct; struct sbuf; struct sleepqueue; struct td_sched; struct thread; struct trapframe; struct turnstile; /* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts * (with control info in the "previous" times), and are converted when * userland asks for rusage info. Backwards compatibility prevents putting * this directly in the user-visible rusage struct. * * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux. * Locking for td_rux: (t) for all fields. */ struct rusage_ext { uint64_t rux_runtime; /* (cu) Real time. */ uint64_t rux_uticks; /* (cu) Statclock hits in user mode. */ uint64_t rux_sticks; /* (cu) Statclock hits in sys mode. */ uint64_t rux_iticks; /* (cu) Statclock hits in intr mode. */ uint64_t rux_uu; /* (c) Previous user time in usec. */ uint64_t rux_su; /* (c) Previous sys time in usec. */ uint64_t rux_tu; /* (c) Previous total time in usec. */ }; /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. */ struct thread { struct mtx *volatile td_lock; /* replaces sched lock */ struct proc *td_proc; /* (*) Associated process. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ TAILQ_ENTRY(thread) td_runq; /* (t) Run queue. */ TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */ TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */ LIST_ENTRY(thread) td_hash; /* (d) Hash chain. */ struct cpuset *td_cpuset; /* (t) CPU affinity mask. */ struct seltd *td_sel; /* Select queue/channel. */ struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */ struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals u_char td_lend_user_pri; /* (t) Lend user pri. */ /* Cleared during fork1() */ #define td_startzero td_flags int td_flags; /* (t) TDF_* flags. */ int td_inhibitors; /* (t) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ int td_sqqueue; /* (t) Sleepqueue queue blocked on. */ void *td_wchan; /* (t) Sleep address. */ const char *td_wmesg; /* (t) Reason for sleep. */ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */ u_char td_tsqueue; /* (t) Turnstile queue blocked on. */ short td_locks; /* (k) Count of non-spin locks. */ short td_rw_rlocks; /* (k) Count of rwlock read locks. */ short td_lk_slocks; /* (k) Count of lockmgr shared locks. */ short td_stopsched; /* (k) Scheduler stopped. */ struct turnstile *td_blocked; /* (t) Lock thread is blocked on. */ const char *td_lockname; /* (t) Name of lock blocked on. */ LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ struct ucred *td_ucred; /* (k) Reference to credentials. */ u_int td_estcpu; /* (t) estimated cpu utilization */ int td_slptick; /* (t) Time at sleep. */ int td_blktick; /* (t) Time spent blocked. */ int td_swvoltick; /* (t) Time at last SW_VOL switch. */ u_int td_cow; /* (*) Number of copy-on-write faults */ struct rusage td_ru; /* (t) rusage information. */ struct rusage_ext td_rux; /* (t) Internal rusage information. */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ u_int td_pticks; /* (t) Statclock hits for profiling */ u_int td_sticks; /* (t) Statclock hits in system mode. */ u_int td_iticks; /* (t) Statclock hits in intr mode. */ u_int td_uticks; /* (t) Statclock hits in user mode. */ int td_intrval; /* (t) Return value for sleepq. */ sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ char td_name[MAXCOMLEN + 1]; /* (*) Thread name. */ struct file *td_fpop; /* (k) file referencing cdev under op */ int td_dbgflags; /* (c) Userland debugger flags */ struct ksiginfo td_dbgksi; /* (c) ksi reflected to debugger. */ int td_ng_outbound; /* (k) Thread entered ng from above. */ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ int td_no_sleeping; /* (k) Sleeping disabled count. */ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ #define td_startcopy td_endzero sigset_t td_sigmask; /* (c) Current signal mask. */ u_char td_rqindex; /* (t) Run queue index. */ u_char td_base_pri; /* (t) Thread base kernel priority. */ u_char td_priority; /* (t) Thread active priority. */ u_char td_pri_class; /* (t) Scheduling class. */ u_char td_user_pri; /* (t) User pri from estcpu and nice. */ u_char td_base_user_pri; /* (t) Base user pri */ #define td_endcopy td_pcb /* * Fields that must be manually set in fork1() or create_thread() * or already have been set in the allocator, constructor, etc. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ enum { TDS_INACTIVE = 0x0, TDS_INHIBITED, TDS_CAN_RUN, TDS_RUNQ, TDS_RUNNING } td_state; /* (t) thread state */ union { register_t tdu_retval[2]; off_t tdu_off; } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval struct callout td_slpcallout; /* (h) Callout for sleep. */ + struct mtx td_slpmutex; /* (h) Mutex for sleep callout */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ int td_kstack_pages; /* (a) Size of the kstack. */ volatile u_int td_critnest; /* (k*) Critical section nest level. */ struct mdthread td_md; /* (k) Any machine-dependent fields. */ struct td_sched *td_sched; /* (*) Scheduler-specific data. */ struct kaudit_record *td_ar; /* (k) Active audit record, if any. */ struct lpohead td_lprof[2]; /* (a) lock profiling objects. */ struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */ int td_errno; /* Error returned by last syscall. */ struct vnet *td_vnet; /* (k) Effective vnet. */ const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */ struct trapframe *td_intr_frame;/* (k) Frame of the current irq */ struct proc *td_rfppwait_p; /* (k) The vforked child */ struct vm_page **td_ma; /* (k) uio pages held */ int td_ma_cnt; /* (k) size of *td_ma */ }; struct mtx *thread_lock_block(struct thread *); void thread_lock_unblock(struct thread *, struct mtx *); void thread_lock_set(struct thread *, struct mtx *); #define THREAD_LOCK_ASSERT(td, type) \ do { \ struct mtx *__m = (td)->td_lock; \ if (__m != &blocked_lock) \ mtx_assert(__m, (type)); \ } while (0) #ifdef INVARIANTS #define THREAD_LOCKPTR_ASSERT(td, lock) \ do { \ struct mtx *__m = (td)->td_lock; \ KASSERT((__m == &blocked_lock || __m == (lock)), \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #else #define THREAD_LOCKPTR_ASSERT(td, lock) #endif /* * Flags kept in td_flags: * To change these you MUST have the scheduler lock. */ #define TDF_BORROWING 0x00000001 /* Thread is borrowing pri from another. */ #define TDF_INPANIC 0x00000002 /* Caused a panic, let it drive crashdump. */ #define TDF_INMEM 0x00000004 /* Thread's stack is in memory. */ #define TDF_SINTR 0x00000008 /* Sleep is interruptible. */ #define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */ #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ #define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ +#define TDF_UNUSED12 0x00001000 /* --available-- */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */ #define TDF_UNUSED19 0x00080000 /* --available-- */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_UNUSED21 0x00200000 /* --available-- */ #define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */ #define TDF_UNUSED23 0x00800000 /* --available-- */ #define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */ #define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */ #define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */ #define TDF_SCHED3 0x08000000 /* Reserved for scheduler private use */ #define TDF_ALRMPEND 0x10000000 /* Pending SIGVTALRM needs to be posted. */ #define TDF_PROFPEND 0x20000000 /* Pending SIGPROF needs to be posted. */ #define TDF_MACPEND 0x40000000 /* AST-based MAC event pending. */ /* Userland debug flags */ #define TDB_SUSPEND 0x00000001 /* Thread is suspended by debugger */ #define TDB_XSIG 0x00000002 /* Thread is exchanging signal under trace */ #define TDB_USERWR 0x00000004 /* Debugger modified memory or registers */ #define TDB_SCE 0x00000008 /* Thread performs syscall enter */ #define TDB_SCX 0x00000010 /* Thread performs syscall exit */ #define TDB_EXEC 0x00000020 /* TDB_SCX from exec(2) family */ #define TDB_FORK 0x00000040 /* TDB_SCX from fork(2) that created new process */ #define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child only) */ #define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */ /* * "Private" flags kept in td_pflags: * These are only written by curthread and thus need no locking. */ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ #define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ #define TDP_UNUSED9 0x00000100 /* --available-- */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ #define TDP_SCHED4 0x00008000 /* Reserved for scheduler private use */ #define TDP_GEOM 0x00010000 /* Settle GEOM before finishing syscall */ #define TDP_SOFTDEP 0x00020000 /* Stuck processing softdep worklist */ #define TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */ #define TDP_WAKEUP 0x00080000 /* Don't sleep in umtx cond_wait */ #define TDP_INBDFLUSH 0x00100000 /* Already in BO_BDFLUSH, do not recurse */ #define TDP_KTHREAD 0x00200000 /* This is an official kernel thread */ #define TDP_CALLCHAIN 0x00400000 /* Capture thread's callchain */ #define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */ #define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */ #define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ #define TDP_UNUSED29 0x20000000 /* --available-- */ #define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */ /* * Reasons that the current thread can not be run yet. * More than one may apply. */ #define TDI_SUSPENDED 0x0001 /* On suspension queue. */ #define TDI_SLEEPING 0x0002 /* Actually asleep! (tricky). */ #define TDI_SWAPPED 0x0004 /* Stack not in mem. Bad juju if run. */ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) #define TD_IS_SWAPPED(td) ((td)->td_inhibitors & TDI_SWAPPED) #define TD_ON_LOCK(td) ((td)->td_inhibitors & TDI_LOCK) #define TD_AWAITING_INTR(td) ((td)->td_inhibitors & TDI_IWAIT) #define TD_IS_RUNNING(td) ((td)->td_state == TDS_RUNNING) #define TD_ON_RUNQ(td) ((td)->td_state == TDS_RUNQ) #define TD_CAN_RUN(td) ((td)->td_state == TDS_CAN_RUN) #define TD_IS_INHIBITED(td) ((td)->td_state == TDS_INHIBITED) #define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED) #define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD) #define TD_SET_INHIB(td, inhib) do { \ (td)->td_state = TDS_INHIBITED; \ (td)->td_inhibitors |= (inhib); \ } while (0) #define TD_CLR_INHIB(td, inhib) do { \ if (((td)->td_inhibitors & (inhib)) && \ (((td)->td_inhibitors &= ~(inhib)) == 0)) \ (td)->td_state = TDS_CAN_RUN; \ } while (0) #define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) #define TD_SET_SWAPPED(td) TD_SET_INHIB((td), TDI_SWAPPED) #define TD_SET_LOCK(td) TD_SET_INHIB((td), TDI_LOCK) #define TD_SET_SUSPENDED(td) TD_SET_INHIB((td), TDI_SUSPENDED) #define TD_SET_IWAIT(td) TD_SET_INHIB((td), TDI_IWAIT) #define TD_SET_EXITING(td) TD_SET_INHIB((td), TDI_EXITING) #define TD_CLR_SLEEPING(td) TD_CLR_INHIB((td), TDI_SLEEPING) #define TD_CLR_SWAPPED(td) TD_CLR_INHIB((td), TDI_SWAPPED) #define TD_CLR_LOCK(td) TD_CLR_INHIB((td), TDI_LOCK) #define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) #define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) #define TD_SET_RUNNING(td) (td)->td_state = TDS_RUNNING #define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN /* * Process structure. */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */ struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Process limits. */ struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ int p_flag; /* (c) P_* flags. */ int p_flag2; /* (c) P2_* flags. */ enum { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ PRS_ZOMBIE } p_state; /* (j/c) Process status. */ pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ struct proc *p_reaper; /* (e) My reaper. */ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants (if I am reaper). */ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of the same reaper. */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ struct mtx p_profmtx; /* Lock for the profiling */ struct ksiginfo *p_ksi; /* Locked by parent proc lock */ sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */ #define p_siglist p_sigqueue.sq_signals /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */ struct vmspace *p_vmspace; /* (b) Address space. */ u_int p_swtick; /* (c) Tick when swapped in or out. */ struct itimerval p_realtimer; /* (c) Alarm timer. */ struct rusage p_ru; /* (a) Exit information. */ struct rusage_ext p_rux; /* (cu) Internal resource usage. */ struct rusage_ext p_crux; /* (c) Internal child resource usage. */ int p_profthreads; /* (c) Num threads in addupc_task. */ volatile int p_exitthreads; /* (j) Number of threads exiting */ int p_traceflag; /* (o) Kernel trace points. */ struct vnode *p_tracevp; /* (c + o) Trace to vnode. */ struct ucred *p_tracecred; /* (o) Credentials to trace with. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ u_int p_lock; /* (c) Proclock (prevent swap) count. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ int p_sigparent; /* (c) Signal to parent on exit. */ int p_sig; /* (n) For core dump/debugger XXX. */ u_long p_code; /* (n) For core dump/debugger XXX. */ u_int p_stops; /* (c) Stop event bitmask. */ u_int p_stype; /* (c) Stop event type. */ char p_step; /* (c) Process is stopped. */ u_char p_pfsflags; /* (c) Procfs flags. */ struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */ struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */ struct thread *p_singlethread;/* (c + j) If single threading this is it */ int p_suspcount; /* (j) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (j) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */ u_int p_treeflag; /* (e) P_TREE flags */ /* End area that is zeroed on creation. */ #define p_endzero p_magic /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_endzero u_int p_magic; /* (b) Magic number. */ int p_osrel; /* (x) osreldate for the binary (from ELF note, if any) */ char p_comm[MAXCOMLEN + 1]; /* (b) Process name. */ struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ struct pargs *p_args; /* (c) Process arguments. */ rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ int p_fibnum; /* in this routing domain XXX MRT */ pid_t p_reapsubtree; /* (e) Pid of the direct child of the reaper which spawned our subtree. */ /* End area that is copied on creation. */ #define p_endcopy p_xstat u_short p_xstat; /* (c) Exit status; also stop sig. */ struct knlist p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ struct proc *p_peers; /* (r) */ struct proc *p_leader; /* (b) */ void *p_emuldata; /* (c) Emulator state data. */ struct label *p_label; /* (*) Proc (not subject) MAC label. */ struct p_sched *p_sched; /* (*) Scheduler-specific data. */ STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */ LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/ struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */ struct cv p_pwait; /* (*) wait cv for exit/exec. */ struct cv p_dbgwait; /* (*) wait cv for debugger attach after fork. */ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ u_char p_throttled; /* (c) Flag for racct pcpu throttling */ /* * An orphan is the child that has beed re-parented to the * debugger as a result of attaching to it. Need to keep * track of them for parent to be able to collect the exit * status of what used to be children. */ LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */ }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id #define NOCPU (-1) /* For when we aren't on a CPU. */ #define NOCPU_OLD (255) #define MAXCPU_OLD (254) #define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock) #define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock) #define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type)) #define PROC_STATLOCK(p) mtx_lock_spin(&(p)->p_statmtx) #define PROC_STATUNLOCK(p) mtx_unlock_spin(&(p)->p_statmtx) #define PROC_STATLOCK_ASSERT(p, type) mtx_assert(&(p)->p_statmtx, (type)) #define PROC_ITIMLOCK(p) mtx_lock_spin(&(p)->p_itimmtx) #define PROC_ITIMUNLOCK(p) mtx_unlock_spin(&(p)->p_itimmtx) #define PROC_ITIMLOCK_ASSERT(p, type) mtx_assert(&(p)->p_itimmtx, (type)) #define PROC_PROFLOCK(p) mtx_lock_spin(&(p)->p_profmtx) #define PROC_PROFUNLOCK(p) mtx_unlock_spin(&(p)->p_profmtx) #define PROC_PROFLOCK_ASSERT(p, type) mtx_assert(&(p)->p_profmtx, (type)) /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00002 /* Has a controlling terminal. */ #define P_KTHREAD 0x00004 /* Kernel thread (*). */ #define P_FOLLOWFORK 0x00008 /* Attach parent debugger to children. */ #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_PROFIL 0x00020 /* Has started profiling. */ #define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */ #define P_HADTHREADS 0x00080 /* Has had threads (no cleanup shortcuts) */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ #define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait. */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Someone is waiting for us. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ #define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ #define P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */ #define P_PROTECTED 0x100000 /* Do not kill on memory overcommit. */ #define P_SIGEVENT 0x200000 /* Process pending signals changed. */ #define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */ #define P_HWPMC 0x800000 /* Process is using HWPMCs */ #define P_JAILED 0x1000000 /* Process is in jail. */ #define P_TOTAL_STOP 0x2000000 /* Stopped in proc_stop_total. */ #define P_INEXEC 0x4000000 /* Process is in execve(). */ #define P_STATCHILD 0x8000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory. */ #define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */ #define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */ #define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) #define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* These flags are kept in p_flag2. */ #define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */ #define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */ #define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan list */ #define P_TREE_REAPER 0x00000004 /* Reaper of subtree */ /* * These were process status values (p_stat), now they are only used in * legacy conversion code. */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SWAIT 6 /* Waiting for interrupt. */ #define SLOCK 7 /* Blocked on a lock. */ #define P_MAGIC 0xbeefface #ifdef _KERNEL /* Types and flags for mi_switch(). */ #define SW_TYPE_MASK 0xff /* First 8 bits are switch type */ #define SWT_NONE 0 /* Unspecified switch. */ #define SWT_PREEMPT 1 /* Switching due to preemption. */ #define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ -#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ +#define SWT_UNUSED5 5 /* --available-- */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */ #define SWT_IWAIT 9 /* Waiting for interrupts. */ #define SWT_SUSPEND 10 /* Thread suspended. */ #define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */ #define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */ #define SWT_COUNT 13 /* Number of switch types. */ /* Flags */ #define SW_VOL 0x0100 /* Voluntary switch. */ #define SW_INVOL 0x0200 /* Involuntary switch. */ #define SW_PREEMPT 0x0400 /* The invol switch is a preemption */ /* How values for thread_single(). */ #define SINGLE_NO_EXIT 0 #define SINGLE_EXIT 1 #define SINGLE_BOUNDARY 2 #define SINGLE_ALLPROC 3 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PARGS); MALLOC_DECLARE(M_PGRP); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); #endif #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) /* * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit * in a pid_t, as it is used to represent "no process group". */ #define PID_MAX 99999 #define NO_PID 100000 extern pid_t pid_max; #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) #define STOPEVENT(p, e, v) do { \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "checking stopevent %d", (e)); \ if ((p)->p_stops & (e)) { \ PROC_LOCK(p); \ stopevent((p), (e), (v)); \ PROC_UNLOCK(p); \ } \ } while (0) #define _STOPEVENT(p, e, v) do { \ PROC_LOCK_ASSERT(p, MA_OWNED); \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, \ "checking stopevent %d", (e)); \ if ((p)->p_stops & (e)) \ stopevent((p), (e), (v)); \ } while (0) /* Lock and unlock a process. */ #define PROC_LOCK(p) mtx_lock(&(p)->p_mtx) #define PROC_TRYLOCK(p) mtx_trylock(&(p)->p_mtx) #define PROC_UNLOCK(p) mtx_unlock(&(p)->p_mtx) #define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) #define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) /* Lock and unlock a process group. */ #define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) #define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) #define PGRP_LOCKED(pg) mtx_owned(&(pg)->pg_mtx) #define PGRP_LOCK_ASSERT(pg, type) mtx_assert(&(pg)->pg_mtx, (type)) #define PGRP_LOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_LOCK(pg); \ } while (0) #define PGRP_UNLOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_UNLOCK(pg); \ } while (0) /* Lock and unlock a session. */ #define SESS_LOCK(s) mtx_lock(&(s)->s_mtx) #define SESS_UNLOCK(s) mtx_unlock(&(s)->s_mtx) #define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx) #define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type)) /* Hold process U-area in memory, normally for ptrace/procfs work. */ #define PHOLD(p) do { \ PROC_LOCK(p); \ _PHOLD(p); \ PROC_UNLOCK(p); \ } while (0) #define _PHOLD(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process")); \ (p)->p_lock++; \ if (((p)->p_flag & P_INMEM) == 0) \ faultin((p)); \ } while (0) #define PROC_ASSERT_HELD(p) do { \ KASSERT((p)->p_lock > 0, ("process not held")); \ } while (0) #define PRELE(p) do { \ PROC_LOCK((p)); \ _PRELE((p)); \ PROC_UNLOCK((p)); \ } while (0) #define _PRELE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ PROC_ASSERT_HELD(p); \ (--(p)->p_lock); \ if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0) \ wakeup(&(p)->p_lock); \ } while (0) #define PROC_ASSERT_NOT_HELD(p) do { \ KASSERT((p)->p_lock == 0, ("process held")); \ } while (0) /* Check whether a thread is safe to be swapped out. */ #define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP) /* Control whether or not it is safe for curthread to sleep. */ #define THREAD_NO_SLEEPING() ((curthread)->td_no_sleeping++) #define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--) #define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; #define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash]) extern LIST_HEAD(tidhashhead, thread) *tidhashtbl; extern u_long tidhash; extern struct rwlock tidhash_lock; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct sx allproc_lock; extern int allproc_gen; extern struct sx proctree_lock; extern struct mtx ppeers_lock; extern struct proc proc0; /* Process slot for swapper. */ extern struct thread thread0; /* Primary thread in proc0. */ extern struct vmspace vmspace0; /* VM space for proc0. */ extern int hogticks; /* Limit on kernel cpu hogs. */ extern int lastpid; extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern u_long ps_arg_cache_limit; LIST_HEAD(proclist, proc); TAILQ_HEAD(procqueue, proc); TAILQ_HEAD(threadqueue, thread); extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct uma_zone *proc_zone; struct proc *pfind(pid_t); /* Find process by id. */ struct proc *pfind_locked(pid_t pid); struct pgrp *pgfind(pid_t); /* Find process group by id. */ struct proc *zpfind(pid_t); /* Find zombie process by id. */ /* * pget() flags. */ #define PGET_HOLD 0x00001 /* Hold the process. */ #define PGET_CANSEE 0x00002 /* Check against p_cansee(). */ #define PGET_CANDEBUG 0x00004 /* Check against p_candebug(). */ #define PGET_ISCURRENT 0x00008 /* Check that the found process is current. */ #define PGET_NOTWEXIT 0x00010 /* Check that the process is not in P_WEXIT. */ #define PGET_NOTINEXEC 0x00020 /* Check that the process is not in P_INEXEC. */ #define PGET_NOTID 0x00040 /* Do not assume tid if pid > PID_MAX. */ #define PGET_WANTREAD (PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT) int pget(pid_t pid, int flags, struct proc **pp); void ast(struct trapframe *framep); struct thread *choosethread(void); int cr_cansignal(struct ucred *cred, struct proc *proc, int signum); int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess); int enterthispgrp(struct proc *p, struct pgrp *pgrp); void faultin(struct proc *p); void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); int fork1(struct thread *, int, int, struct proc **, int *, int); void fork_exit(void (*)(void *, struct trapframe *), void *, struct trapframe *); void fork_return(struct thread *, struct trapframe *); int inferior(struct proc *p); void kern_yield(int); void kick_proc0(void); int leavepgrp(struct proc *p); int maybe_preempt(struct thread *td); void maybe_yield(void); void mi_switch(int flags, struct thread *newtd); int p_candebug(struct thread *td, struct proc *p); int p_cansee(struct thread *td, struct proc *p); int p_cansched(struct thread *td, struct proc *p); int p_cansignal(struct thread *td, struct proc *p, int signum); int p_canwait(struct thread *td, struct proc *p); struct pargs *pargs_alloc(int len); void pargs_drop(struct pargs *pa); void pargs_hold(struct pargs *pa); int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); struct proc *proc_realparent(struct proc *child); void proc_reap(struct thread *td, struct proc *p, int *status, int options); void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); void pstats_free(struct pstats *ps); void reaper_abandon_children(struct proc *p, bool exiting); int securelevel_ge(struct ucred *cr, int level); int securelevel_gt(struct ucred *cr, int level); void sess_hold(struct session *); void sess_release(struct session *); int setrunnable(struct thread *); void setsugid(struct proc *p); int should_yield(void); int sigonstack(size_t sp); void stopevent(struct proc *, u_int, u_int); struct thread *tdfind(lwpid_t, pid_t); void threadinit(void); void tidhash_add(struct thread *); void tidhash_remove(struct thread *); void cpu_idle(int); int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int) __dead2; struct syscall_args; int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); void cpu_set_syscall_retval(struct thread *, int); void cpu_set_upcall(struct thread *td, struct thread *td0); void cpu_set_upcall_kse(struct thread *, void (*)(void *), void *, stack_t *); int cpu_set_user_tls(struct thread *, void *tls_base); void cpu_thread_alloc(struct thread *); void cpu_thread_clean(struct thread *); void cpu_thread_exit(struct thread *); void cpu_thread_free(struct thread *); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); struct thread *thread_alloc(int pages); int thread_alloc_stack(struct thread *, int pages); void thread_exit(void) __dead2; void thread_free(struct thread *td); void thread_link(struct thread *td, struct proc *p); void thread_reap(void); int thread_single(struct proc *p, int how); void thread_single_end(struct proc *p, int how); void thread_stash(struct thread *td); void thread_stopped(struct proc *p); void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); int thread_suspend_check(int how); bool thread_suspend_check_needed(void); void thread_suspend_switch(struct thread *, struct proc *p); void thread_suspend_one(struct thread *td); void thread_unlink(struct thread *td); void thread_unsuspend(struct proc *p); int thread_unsuspend_one(struct thread *td, struct proc *p); void thread_wait(struct proc *p); struct thread *thread_find(struct proc *p, lwpid_t tid); void stop_all_proc(void); void resume_all_proc(void); static __inline int curthread_pflags_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags & flags); td->td_pflags |= flags; return (save); } static __inline void curthread_pflags_restore(int save) { curthread->td_pflags &= save; } #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */