Index: projects/vnet/etc/defaults/rc.conf
===================================================================
--- projects/vnet/etc/defaults/rc.conf	(revision 301522)
+++ projects/vnet/etc/defaults/rc.conf	(revision 301523)
@@ -1,743 +1,743 @@
 #!/bin/sh
 
 # This is rc.conf - a file full of useful variables that you can set
 # to change the default startup behavior of your system.  You should
 # not edit this file!  Put any overrides into one of the ${rc_conf_files}
 # instead and you will be able to update these defaults later without
 # spamming your local configuration information.
 #
 # The ${rc_conf_files} files should only contain values which override
 # values set in this file.  This eases the upgrade path when defaults
 # are changed and new features are added.
 #
 # All arguments must be in double or single quotes.
 #
 # For a more detailed explanation of all the rc.conf variables, please
 # refer to the rc.conf(5) manual page.
 #
 # $FreeBSD$
 
 ##############################################################
 ###  Important initial Boot-time options  ####################
 ##############################################################
 
 #rc_debug="NO"		# Set to YES to enable debugging output from rc.d
 rc_info="NO"		# Enables display of informational messages at boot.
 rc_startmsgs="YES" 	# Show "Starting foo:" messages at boot
 rcshutdown_timeout="90" # Seconds to wait before terminating rc.shutdown
 early_late_divider="FILESYSTEMS"	# Script that separates early/late
 			# stages of the boot process.  Make sure you know
 			# the ramifications if you change this.
 			# See rc.conf(5) for more details.
 always_force_depends="NO"	# Set to check that indicated dependencies are
 				# running during boot (can increase boot time).
 
 apm_enable="NO"		# Set to YES to enable APM BIOS functions (or NO).
 apmd_enable="NO"	# Run apmd to handle APM event from userland.
 apmd_flags=""		# Flags to apmd (if enabled).
 ddb_enable="NO"		# Set to YES to load ddb scripts at boot.
 ddb_config="/etc/ddb.conf"	# ddb(8) config file.
 devd_enable="YES" 	# Run devd, to trigger programs on device tree changes.
 devd_flags=""		# Additional flags for devd(8).
 #kld_list="" 		# Kernel modules to load after local disks are mounted
 kldxref_enable="NO"	# Build linker.hints files with kldxref(8).
 kldxref_clobber="NO"	# Overwrite old linker.hints at boot.
 kldxref_module_path=""	# Override kern.module_path. A ';'-delimited list.
 powerd_enable="NO" 	# Run powerd to lower our power usage.
 powerd_flags=""		# Flags to powerd (if enabled).
 tmpmfs="AUTO"		# Set to YES to always create an mfs /tmp, NO to never
 tmpsize="20m"		# Size of mfs /tmp if created
 tmpmfs_flags="-S"	# Extra mdmfs options for the mfs /tmp
 varmfs="AUTO"		# Set to YES to always create an mfs /var, NO to never
 varsize="32m"		# Size of mfs /var if created
 varmfs_flags="-S"	# Extra mount options for the mfs /var
 populate_var="AUTO"	# Set to YES to always (re)populate /var, NO to never
 cleanvar_enable="YES" 	# Clean the /var directory
 local_startup="/usr/local/etc/rc.d" # startup script dirs.
 script_name_sep=" "	# Change if your startup scripts' names contain spaces
 rc_conf_files="/etc/rc.conf /etc/rc.conf.local"
 
 # ZFS support
 zfs_enable="NO"		# Set to YES to automatically mount ZFS file systems
 
 # ZFSD support
 zfsd_enable="NO"	# Set to YES to automatically start the ZFS fault
 			# management daemon.
 
 gptboot_enable="YES"	# GPT boot success/failure reporting.
 
 # Experimental - test before enabling
 gbde_autoattach_all="NO" # YES automatically mounts gbde devices from fstab
 gbde_devices="NO" 	# Devices to automatically attach (list, or AUTO)
 gbde_attach_attempts="3" # Number of times to attempt attaching gbde devices
 gbde_lockdir="/etc"	# Where to look for gbde lockfiles
 
 # GELI disk encryption configuration.
 geli_devices=""		# List of devices to automatically attach in addition to
 			# GELI devices listed in /etc/fstab.
 geli_tries=""		# Number of times to attempt attaching geli device.
 			# If empty, kern.geom.eli.tries will be used.
 geli_default_flags=""	# Default flags for geli(8).
 geli_autodetach="YES"	# Automatically detach on last close.
 			# Providers are marked as such when all file systems are
 			# mounted.
 # Example use.
 #geli_devices="da1 mirror/home"
 #geli_da1_flags="-p -k /etc/geli/da1.keys"
 #geli_da1_autodetach="NO"
 #geli_mirror_home_flags="-k /etc/geli/home.keys"
 
 root_rw_mount="YES"	# Set to NO to inhibit remounting root read-write.
 root_hold_delay="30"	# Time to wait for root mount hold release.
 fsck_y_enable="NO"	# Set to YES to do fsck -y if the initial preen fails.
 fsck_y_flags=""		# Additional flags for fsck -y
 background_fsck="YES"	# Attempt to run fsck in the background where possible.
 background_fsck_delay="60" # Time to wait (seconds) before starting the fsck.
 netfs_types="nfs:NFS smbfs:SMB" # Net filesystems.
 extra_netfs_types="NO"	# List of network extra filesystem types for delayed
 			# mount at startup (or NO).
 
 ##############################################################
 ###  Network configuration sub-section  ######################
 ##############################################################
 
 ### Basic network and firewall/security options: ###
 hostname=""			# Set this!
 hostid_enable="YES"		# Set host UUID.
 hostid_file="/etc/hostid"	# File with hostuuid.
 nisdomainname="NO"		# Set to NIS domain if using NIS (or NO).
 dhclient_program="/sbin/dhclient"	# Path to dhcp client program.
 dhclient_flags=""		# Extra flags to pass to dhcp client.
 #dhclient_flags_fxp0=""		# Extra dhclient flags for fxp0 only
 background_dhclient="NO"	# Start dhcp client in the background.
 #background_dhclient_fxp0="YES"	# Start dhcp client on fxp0 in the background.
 synchronous_dhclient="NO"	# Start dhclient directly on configured
 				# interfaces during startup.
 defaultroute_delay="30"		# Time to wait for a default route on a DHCP interface.
 defaultroute_carrier_delay="5"	# Time to wait for carrier while waiting for a default route.
 netif_enable="YES"		# Set to YES to initialize network interfaces
 netif_ipexpand_max="2048"	# Maximum number of IP addrs in a range spec.
 wpa_supplicant_program="/usr/sbin/wpa_supplicant"
 wpa_supplicant_flags="-s"	# Extra flags to pass to wpa_supplicant
 wpa_supplicant_conf_file="/etc/wpa_supplicant.conf"
 #
 firewall_enable="NO"		# Set to YES to enable firewall functionality
 firewall_script="/etc/rc.firewall" # Which script to run to set up the firewall
 firewall_type="UNKNOWN"		# Firewall type (see /etc/rc.firewall)
 firewall_quiet="NO"		# Set to YES to suppress rule display
 firewall_logging="NO"		# Set to YES to enable events logging
 firewall_logif="NO"		# Set to YES to create logging-pseudo interface
 firewall_flags=""		# Flags passed to ipfw when type is a file
 firewall_coscripts=""		# List of executables/scripts to run after
 				# firewall starts/stops
 firewall_client_net="192.0.2.0/24" # IPv4 Network address for "client"
 				# firewall.
 #firewall_client_net_ipv6="2001:db8:2:1::/64" # IPv6 network prefix for
 				# "client" firewall.
 firewall_simple_iif="ed1"	# Inside network interface for "simple"
 				# firewall.
 firewall_simple_inet="192.0.2.16/28" # Inside network address for "simple"
 				# firewall.
 firewall_simple_oif="ed0"	# Outside network interface for "simple"
 				# firewall.
 firewall_simple_onet="192.0.2.0/28" # Outside network address for "simple"
 				# firewall.
 #firewall_simple_iif_ipv6="ed1"	# Inside IPv6 network interface for "simple"
 				# firewall.
 #firewall_simple_inet_ipv6="2001:db8:2:800::/56" # Inside IPv6 network prefix
 				# for "simple" firewall.
 #firewall_simple_oif_ipv6="ed0"	# Outside IPv6 network interface for "simple"
 				# firewall.
 #firewall_simple_onet_ipv6="2001:db8:2:0::/56" # Outside IPv6 network prefix
 				# for "simple" firewall.
 firewall_myservices=""		# List of TCP ports on which this host
 				# offers services for "workstation" firewall.
 firewall_allowservices=""	# List of IPs which have access to
 				# $firewall_myservices for "workstation"
 				# firewall.
 firewall_trusted=""		# List of IPs which have full access to this
 				# host for "workstation" firewall.
 firewall_logdeny="NO"		# Set to YES to log default denied incoming
 				# packets for "workstation" firewall.
 firewall_nologports="135-139,445 1026,1027 1433,1434" # List of TCP/UDP ports
 				# for which denied incoming packets are not
 				# logged for "workstation" firewall.
 firewall_nat_enable="NO"	# Enable kernel NAT (if firewall_enable == YES)
 firewall_nat_interface=""	# Public interface or IPaddress to use
 firewall_nat_flags=""		# Additional configuration parameters
 dummynet_enable="NO"		# Load the dummynet(4) module
 ip_portrange_first="NO"		# Set first dynamically allocated port
 ip_portrange_last="NO"		# Set last dynamically allocated port
 ike_enable="NO"			# Enable IKE daemon (usually racoon or isakmpd)
 ike_program="/usr/local/sbin/isakmpd"	# Path to IKE daemon
 ike_flags=""			# Additional flags for IKE daemon
 ipsec_enable="NO"		# Set to YES to run setkey on ipsec_file
 ipsec_file="/etc/ipsec.conf"	# Name of config file for setkey
 natd_program="/sbin/natd"	# path to natd, if you want a different one.
 natd_enable="NO"		# Enable natd (if firewall_enable == YES).
 natd_interface=""		# Public interface or IPaddress to use.
 natd_flags=""			# Additional flags for natd.
 ipfilter_enable="NO"		# Set to YES to enable ipfilter functionality
 ipfilter_program="/sbin/ipf"	# where the ipfilter program lives
 ipfilter_rules="/etc/ipf.rules"	# rules definition file for ipfilter, see
 				# /usr/src/contrib/ipfilter/rules for examples
 ipfilter_flags=""		# additional flags for ipfilter
 ipnat_enable="NO"		# Set to YES to enable ipnat functionality
 ipnat_program="/sbin/ipnat"	# where the ipnat program lives
 ipnat_rules="/etc/ipnat.rules"	# rules definition file for ipnat
 ipnat_flags=""			# additional flags for ipnat
 ipmon_enable="NO"		# Set to YES for ipmon; needs ipfilter or ipnat
 ipmon_program="/sbin/ipmon"	# where the ipfilter monitor program lives
 ipmon_flags="-Ds"		# typically "-Ds" or "-D /var/log/ipflog"
 ipfs_enable="NO"		# Set to YES to enable saving and restoring
 				# of state tables at shutdown and boot
 ipfs_program="/sbin/ipfs"	# where the ipfs program lives
 ipfs_flags=""			# additional flags for ipfs
 pf_enable="NO"			# Set to YES to enable packet filter (pf)
 pf_rules="/etc/pf.conf"		# rules definition file for pf
 pf_program="/sbin/pfctl"	# where the pfctl program lives
 pf_flags=""			# additional flags for pfctl
 pflog_enable="NO"		# Set to YES to enable packet filter logging
 pflog_logfile="/var/log/pflog"	# where pflogd should store the logfile
 pflog_program="/sbin/pflogd"	# where the pflogd program lives
 pflog_flags=""			# additional flags for pflogd
 ftpproxy_enable="NO"		# Set to YES to enable ftp-proxy(8) for pf
 ftpproxy_flags=""		# additional flags for ftp-proxy(8)
 pfsync_enable="NO"		# Expose pf state to other hosts for syncing
 pfsync_syncdev=""		# Interface for pfsync to work through
 pfsync_syncpeer=""		# IP address of pfsync peer host
 pfsync_ifconfig=""		# Additional options to ifconfig(8) for pfsync
 tcp_extensions="YES"		# Set to NO to turn off RFC1323 extensions.
 log_in_vain="0"			# >=1 to log connects to ports w/o listeners.
 tcp_keepalive="YES"		# Enable stale TCP connection timeout (or NO).
 tcp_drop_synfin="NO"		# Set to YES to drop TCP packets with SYN+FIN
 				# NOTE: this violates the TCP specification
 icmp_drop_redirect="NO" 	# Set to YES to ignore ICMP REDIRECT packets
 icmp_log_redirect="NO"		# Set to YES to log ICMP REDIRECT packets
 network_interfaces="auto"	# List of network interfaces (or "auto").
 cloned_interfaces=""		# List of cloned network interfaces to create.
 #cloned_interfaces="gif0 gif1 gif2 gif3" # Pre-cloning GENERIC config.
 #ifconfig_lo0="inet 127.0.0.1"	# default loopback device configuration.
 #ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry.
 #ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" # Sample IPv6 addr entry
 #ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" # Sample IPv6 alias
 #ifconfig_fxp0_name="net0"	# Change interface name from fxp0 to net0.
 #vlans_fxp0="101 vlan0"		# vlan(4) interfaces for fxp0 device
 #create_args_vlan0="vlan 102"	# vlan tag for vlan0 device
 #wlans_ath0="wlan0"		# wlan(4) interfaces for ath0 device
 #wlandebug_wlan0="scan+auth+assoc"	# Set debug flags with wlandebug(8)
 #ipv4_addrs_fxp0="192.168.0.1/24 192.168.1.1-5/28" # example IPv4 address entry.
 #
 #autobridge_interfaces="bridge0"	# List of bridges to check
 #autobridge_bridge0="tap* vlan0"	# Interface glob to automatically add to the bridge
 #
 # If you have any sppp(4) interfaces above, you might also want to set
 # the following parameters.  Refer to spppcontrol(8) for their meaning.
 sppp_interfaces=""		# List of sppp interfaces.
 #sppp_interfaces="...0"		# example: sppp over ...
 #spppconfig_...0="authproto=chap myauthname=foo myauthsecret='top secret' hisauthname=some-gw hisauthsecret='another secret'"
 
 # User ppp configuration.
 ppp_enable="NO"		# Start user-ppp (or NO).
 ppp_program="/usr/sbin/ppp"	# Path to user-ppp program.
 ppp_mode="auto"		# Choice of "auto", "ddial", "direct" or "dedicated".
 			# For details see man page for ppp(8). Default is auto.
 ppp_nat="YES"		# Use PPP's internal network address translation or NO.
 ppp_profile="papchap"	# Which profile to use from /etc/ppp/ppp.conf.
 ppp_user="root"		# Which user to run ppp as
 
 # Start multiple instances of ppp at boot time
 #ppp_profile="profile1 profile2 profile3"	# Which profiles to use
 #ppp_profile1_mode="ddial"	# Override ppp mode for profile1
 #ppp_profile2_nat="NO"		# Override nat mode for profile2
 # profile3 uses default ppp_mode and ppp_nat
 
 ### Network daemon (miscellaneous) ###
 hostapd_enable="NO"		# Run hostap daemon.
 syslogd_enable="YES"		# Run syslog daemon (or NO).
 syslogd_program="/usr/sbin/syslogd" # path to syslogd, if you want a different one.
 syslogd_flags="-s"		# Flags to syslogd (if enabled).
 syslogd_oomprotect="YES"	# Don't kill syslogd when swap space is exhausted. 
 altlog_proglist=""		# List of chrooted applicatioins in /var
 inetd_enable="NO"		# Run the network daemon dispatcher (YES/NO).
 inetd_program="/usr/sbin/inetd"	# path to inetd, if you want a different one.
 inetd_flags="-wW -C 60"		# Optional flags to inetd
 iscsid_enable="NO"		# iSCSI initiator daemon.
 iscsictl_enable="NO"		# iSCSI initiator autostart.
 iscsictl_flags="-Aa"		# Optional flags to iscsictl.
 hastd_enable="NO"		# Run the HAST daemon (YES/NO).
 hastd_program="/sbin/hastd"	# path to hastd, if you want a different one.
 hastd_flags=""			# Optional flags to hastd.
 ctld_enable="NO"		# CAM Target Layer / iSCSI target daemon.
 local_unbound_enable="NO"	# local caching resolver
-blacklistd_enable="YES" 	# Run blacklistd daemon (YES/NO).
+blacklistd_enable="NO" 	# Run blacklistd daemon (YES/NO).
 blacklistd_flags=""		# Optional flags for blacklistd(8).
 
 #
 # kerberos. Do not run the admin daemons on slave servers
 #
 kdc_enable="NO"			# Run a kerberos 5 KDC (or NO).
 kdc_program="/usr/libexec/kdc"	# path to kerberos 5 KDC
 kdc_flags=""			# Additional flags to the kerberos 5 KDC
 kadmind_enable="NO"		# Run kadmind (or NO)
 kadmind_program="/usr/libexec/kadmind"	# path to kadmind
 kpasswdd_enable="NO"		# Run kpasswdd (or NO)
 kpasswdd_program="/usr/libexec/kpasswdd" # path to kpasswdd
 kfd_enable="NO"			# Run kfd (or NO)
 kfd_program="/usr/libexec/kfd"	# path to kerberos 5 kfd daemon
 kfd_flags=""
 ipropd_master_enable="NO"	# Run Heimdal incremental propagation daemon
 				# (master daemon).
 ipropd_master_program="/usr/libexec/ipropd-master"
 ipropd_master_flags=""		# Flags to ipropd-master.
 ipropd_master_keytab="/etc/krb5.keytab"	# keytab for ipropd-master.
 ipropd_master_slaves=""		# slave node names used for /var/heimdal/slaves.
 ipropd_slave_enable="NO"	# Run Heimdal incremental propagation daemon
 				# (slave daemon).
 ipropd_slave_program="/usr/libexec/ipropd-slave"
 ipropd_slave_flags=""		# Flags to ipropd-slave.
 ipropd_slave_keytab="/etc/krb5.keytab"	# keytab for ipropd-slave.
 ipropd_slave_master=""		# master node name.
 
 gssd_enable="NO"		# Run the gssd daemon (or NO).
 gssd_program="/usr/sbin/gssd"	# Path to gssd.
 gssd_flags=""			# Flags for gssd.
 
 rwhod_enable="NO"		# Run the rwho daemon (or NO).
 rwhod_flags=""			# Flags for rwhod
 rarpd_enable="NO"		# Run rarpd (or NO).
 rarpd_flags="-a"		# Flags to rarpd.
 bootparamd_enable="NO"		# Run bootparamd (or NO).
 bootparamd_flags=""		# Flags to bootparamd
 pppoed_enable="NO"		# Run the PPP over Ethernet daemon.
 pppoed_provider="*"		# Provider and ppp(8) config file entry.
 pppoed_flags="-P /var/run/pppoed.pid"	# Flags to pppoed (if enabled).
 pppoed_interface="fxp0"		# The interface that pppoed runs on.
 sshd_enable="NO"		# Enable sshd
 sshd_program="/usr/sbin/sshd"	# path to sshd, if you want a different one.
 sshd_flags=""			# Additional flags for sshd.
 ftpd_enable="NO"		# Enable stand-alone ftpd.
 ftpd_program="/usr/libexec/ftpd" # Path to ftpd, if you want a different one.
 ftpd_flags=""			# Additional flags to stand-alone ftpd.
 
 ### Network daemon (NFS): All need rpcbind_enable="YES" ###
 amd_enable="NO"			# Run amd service with $amd_flags (or NO).
 amd_program="/usr/sbin/amd"	# path to amd, if you want a different one.
 amd_flags="-a /.amd_mnt -l syslog /host /etc/amd.map /net /etc/amd.map"
 amd_map_program="NO"		# Can be set to "ypcat -k amd.master"
 autofs_enable="NO"		# Run autofs daemons.
 automount_flags=""		# Flags to automount(8) (if autofs enabled).
 automountd_flags=""		# Flags to automountd(8) (if autofs enabled).
 autounmountd_flags=""		# Flags to autounmountd(8) (if autofs enabled).
 nfs_client_enable="NO"		# This host is an NFS client (or NO).
 nfs_access_cache="60"		# Client cache timeout in seconds
 nfs_server_enable="NO"		# This host is an NFS server (or NO).
 nfs_server_flags="-u -t"	# Flags to nfsd (if enabled).
 nfs_server_managegids="NO"	# The NFS server maps gids for AUTH_SYS (or NO).
 mountd_enable="NO"		# Run mountd (or NO).
 mountd_flags="-r -S"		# Flags to mountd (if NFS server enabled).
 weak_mountd_authentication="NO"	# Allow non-root mount requests to be served.
 nfs_reserved_port_only="NO"	# Provide NFS only on secure port (or NO).
 nfs_bufpackets=""		# bufspace (in packets) for client
 rpc_lockd_enable="NO"		# Run NFS rpc.lockd needed for client/server.
 rpc_lockd_flags=""		# Flags to rpc.lockd (if enabled).
 rpc_statd_enable="NO"		# Run NFS rpc.statd needed for client/server.
 rpc_statd_flags=""		# Flags to rpc.statd (if enabled).
 rpcbind_enable="NO"		# Run the portmapper service (YES/NO).
 rpcbind_program="/usr/sbin/rpcbind"	# path to rpcbind, if you want a different one.
 rpcbind_flags=""		# Flags to rpcbind (if enabled).
 rpc_ypupdated_enable="NO"	# Run if NIS master and SecureRPC (or NO).
 keyserv_enable="NO"		# Run the SecureRPC keyserver (or NO).
 keyserv_flags=""		# Flags to keyserv (if enabled).
 nfsv4_server_enable="NO"	# Enable support for NFSv4
 nfscbd_enable="NO"		# NFSv4 client side callback daemon
 nfscbd_flags=""			# Flags for nfscbd
 nfsuserd_enable="NO"		# NFSv4 user/group name mapping daemon
 nfsuserd_flags=""		# Flags for nfsuserd
 
 ### Network Time Services options: ###
 timed_enable="NO"		# Run the time daemon (or NO).
 timed_flags=""			# Flags to timed (if enabled).
 ntpdate_enable="NO"		# Run ntpdate to sync time on boot (or NO).
 ntpdate_program="/usr/sbin/ntpdate"	# path to ntpdate, if you want a different one.
 ntpdate_flags="-b"		# Flags to ntpdate (if enabled).
 ntpdate_config="/etc/ntp.conf"	# ntpdate(8) configuration file
 ntpdate_hosts=""		# Whitespace-separated list of ntpdate(8) servers.
 ntpd_enable="NO"		# Run ntpd Network Time Protocol (or NO).
 ntpd_program="/usr/sbin/ntpd"	# path to ntpd, if you want a different one.
 ntpd_config="/etc/ntp.conf"	# ntpd(8) configuration file
 ntpd_sync_on_start="NO"		# Sync time on ntpd startup, even if offset is high
 ntpd_flags="-p /var/run/ntpd.pid -f /var/db/ntpd.drift"
 				# Flags to ntpd (if enabled).
 ntp_src_leapfile="/etc/ntp/leap-seconds"
 				# Initial source for ntpd leapfile
 ntp_db_leapfile="/var/db/ntpd.leap-seconds.list"
 				# Working copy (updated weekly) leapfile
 ntp_leapfile_sources="https://www.ietf.org/timezones/data/leap-seconds.list"
 				# Source from which to fetch leapfile
 ntp_leapfile_fetch_opts="-mq"	# Options to use for ntp leapfile fetch,
 				# e.g. --no-verify-peer
 ntp_leapfile_expiry_days=30	# Check for new leapfile 30 days prior to
 				# expiry.
 ntp_leapfile_fetch_verbose="NO"	# Be verbose during NTP leapfile fetch
 
 # Network Information Services (NIS) options: All need rpcbind_enable="YES" ###
 nis_client_enable="NO"		# We're an NIS client (or NO).
 nis_client_flags=""		# Flags to ypbind (if enabled).
 nis_ypset_enable="NO"		# Run ypset at boot time (or NO).
 nis_ypset_flags=""		# Flags to ypset (if enabled).
 nis_server_enable="NO"		# We're an NIS server (or NO).
 nis_server_flags=""		# Flags to ypserv (if enabled).
 nis_ypxfrd_enable="NO"		# Run rpc.ypxfrd at boot time (or NO).
 nis_ypxfrd_flags=""		# Flags to rpc.ypxfrd (if enabled).
 nis_yppasswdd_enable="NO"	# Run rpc.yppasswdd at boot time (or NO).
 nis_yppasswdd_flags=""		# Flags to rpc.yppasswdd (if enabled).
 nis_ypldap_enable="NO"		# Run ypldap at boot time (or NO).
 nis_ypldap_flags=""		# Flags to ypldap (if enabled).
 
 ### SNMP daemon ###
 # Be sure to understand the security implications of running SNMP v1/v2
 # in your network.
 bsnmpd_enable="NO"		# Run the SNMP daemon (or NO).
 bsnmpd_flags=""			# Flags for bsnmpd.
 
 ### Network routing options: ###
 defaultrouter="NO"		# Set to default gateway (or NO).
 static_arp_pairs=""		# Set to static ARP list (or leave empty).
 static_ndp_pairs=""		# Set to static NDP list (or leave empty).
 static_routes=""		# Set to static route list (or leave empty).
 natm_static_routes=""		# Set to static route list for NATM (or leave empty).
 gateway_enable="NO"		# Set to YES if this host will be a gateway.
 routed_enable="NO"		# Set to YES to enable a routing daemon.
 routed_program="/sbin/routed"	# Name of routing daemon to use if enabled.
 routed_flags="-q"		# Flags for routing daemon.
 arpproxy_all="NO"		# replaces obsolete kernel option ARP_PROXYALL.
 forward_sourceroute="NO"	# do source routing (only if gateway_enable is set to "YES")
 accept_sourceroute="NO"		# accept source routed packets to us
 
 ### ATM interface options: ###
 atm_enable="NO"			# Configure ATM interfaces (or NO).
 #atm_netif_hea0="atm 1"		# Network interfaces for physical interface.
 #atm_sigmgr_hea0="uni31"	# Signalling manager for physical interface.
 #atm_prefix_hea0="ILMI"		# NSAP prefix (UNI interfaces only) (or ILMI).
 #atm_macaddr_hea0="NO"		# Override physical MAC address (or NO).
 #atm_arpserver_atm0="0x47.0005.80.999999.9999.9999.9999.999999999999.00" # ATMARP server address (or local).
 #atm_scsparp_atm0="NO"		# Run SCSP/ATMARP on network interface (or NO).
 atm_pvcs=""			# Set to PVC list (or leave empty).
 atm_arps=""			# Set to permanent ARP list (or leave empty).
 
 ### Bluetooth ###
 hcsecd_enable="NO"		# Enable hcsecd(8) (or NO)
 hcsecd_config="/etc/bluetooth/hcsecd.conf" # hcsecd(8) configuration file
 
 sdpd_enable="NO"		# Enable sdpd(8) (or NO)
 sdpd_control="/var/run/sdp"	# sdpd(8) control socket
 sdpd_groupname="nobody"		# set spdp(8) user/group to run as after
 sdpd_username="nobody"		# it initializes
 
 bthidd_enable="NO"		# Enable bthidd(8) (or NO)
 bthidd_config="/etc/bluetooth/bthidd.conf" # bthidd(8) configuration file
 bthidd_hids="/var/db/bthidd.hids" # bthidd(8) known HID devices file
 
 rfcomm_pppd_server_enable="NO"	# Enable rfcomm_pppd(8) in server mode (or NO)
 rfcomm_pppd_server_profile="one two"	# Profile to use from /etc/ppp/ppp.conf
 #
 #rfcomm_pppd_server_one_bdaddr=""	# Override local bdaddr for 'one'
 rfcomm_pppd_server_one_channel="1"	# Override local channel for 'one'
 #rfcomm_pppd_server_one_register_sp="NO"	# Override SP and DUN register
 #rfcomm_pppd_server_one_register_dun="NO"	# for 'one'
 #
 #rfcomm_pppd_server_two_bdaddr=""	# Override local bdaddr for 'two'
 rfcomm_pppd_server_two_channel="3"	# Override local channel for 'two'
 #rfcomm_pppd_server_two_register_sp="NO"	# Override SP and DUN register
 #rfcomm_pppd_server_two_register_dun="NO"	# for 'two'
 
 ubthidhci_enable="NO"		# Switch an USB BT controller present on
 #ubthidhci_busnum="3"		# bus 3 and addr 2 from HID mode to HCI mode.
 #ubthidhci_addr="2"		# Check usbconfig list to find the correct
 				# numbers for your system.
 
 ### Network link/usability verification options
 netwait_enable="NO"		# Enable rc.d/netwait (or NO)
 #netwait_ip=""			# Wait for ping response from any IP in this list.
 netwait_timeout="60"		# Total number of seconds to perform pings.
 #netwait_if=""			# Wait for active link on each intf in this list.
 netwait_if_timeout="30"		# Total number of seconds to monitor link state.
 
 ### Miscellaneous network options: ###
 icmp_bmcastecho="NO"	# respond to broadcast ping packets
 
 ### IPv6 options: ###
 ipv6_network_interfaces="auto"	# List of IPv6 network interfaces
 				# (or "auto" or "none").
 ipv6_activate_all_interfaces="NO"	# If NO, interfaces which have no
 					# corresponding $ifconfig_IF_ipv6 is
 					# marked as IFDISABLED for security
 					# reason.
 ipv6_defaultrouter="NO"		# Set to IPv6 default gateway (or NO).
 #ipv6_defaultrouter="2002:c058:6301::"	# Use this for 6to4 (RFC 3068)
 ipv6_static_routes=""		# Set to static route list (or leave empty).
 #ipv6_static_routes="xxx"	# An example to set fec0:0000:0000:0006::/64
 				#  route toward loopback interface.
 #ipv6_route_xxx="fec0:0000:0000:0006:: -prefixlen 64 ::1"
 ipv6_gateway_enable="NO"	# Set to YES if this host will be a gateway.
 ipv6_cpe_wanif="NO"		# Set to the upstram interface name if this
 				# node will work as a router to forward IPv6
 				# packets not explicitly addressed to itself.
 ipv6_privacy="NO"		# Use privacy address on RA-receiving IFs
 				# (RFC 4941)
 
 route6d_enable="NO"		# Set to YES to enable an IPv6 routing daemon.
 route6d_program="/usr/sbin/route6d"	# Name of IPv6 routing daemon.
 route6d_flags=""		# Flags to IPv6 routing daemon.
 #route6d_flags="-l"		# Example for route6d with only IPv6 site local
 				# addrs.
 #route6d_flags="-q"		# If you want to run a routing daemon on an end
 				# node, you should stop advertisement.
 #ipv6_network_interfaces="ed0 ep0"	# Examples for router
 					# or static configuration for end node.
 					# Choose correct prefix value.
 #ipv6_prefix_ed0="fec0:0000:0000:0001 fec0:0000:0000:0002"  # Examples for rtr.
 #ipv6_prefix_ep0="fec0:0000:0000:0003 fec0:0000:0000:0004"  # Examples for rtr.
 ipv6_default_interface="NO"	# Default output interface for scoped addrs.
 				# This works only with
 				# ipv6_gateway_enable="NO".
 rtsol_flags=""			# Flags to IPv6 router solicitation.
 rtsold_enable="NO"		# Set to YES to enable an IPv6 router
 				# solicitation daemon.
 rtsold_flags="-a"		# Flags to an IPv6 router solicitation
 				# daemon.
 rtadvd_enable="NO"		# Set to YES to enable an IPv6 router
 				# advertisement daemon. If set to YES,
 				# this router becomes a possible candidate
 				# IPv6 default router for local subnets.
 rtadvd_interfaces=""		# Interfaces rtadvd sends RA packets.
 mroute6d_enable="NO"		# Do IPv6 multicast routing.
 mroute6d_program="/usr/local/sbin/pim6dd"	# Name of IPv6 multicast
 						# routing daemon.  You need to
 						# install it from package or
 						# port.
 mroute6d_flags=""		# Flags to IPv6 multicast routing daemon.
 stf_interface_ipv4addr=""	# Local IPv4 addr for 6to4 IPv6 over IPv4
 				# tunneling interface. Specify this entry
 				# to enable 6to4 interface.
 stf_interface_ipv4plen="0"	# Prefix length for 6to4 IPv4 addr,
 				# to limit peer addr range. Effective value
 				# is 0-31.
 stf_interface_ipv6_ifid="0:0:0:1"	# IPv6 interface id for stf0.
 				# If you like, you can set "AUTO" for this.
 stf_interface_ipv6_slaid="0000"	# IPv6 Site Level Aggregator for stf0
 ipv6_ipv4mapping="NO"		# Set to "YES" to enable IPv4 mapped IPv6 addr
 				# communication. (like ::ffff:a.b.c.d)
 ipv6_ipfilter_rules="/etc/ipf6.rules"	# rules definition file for ipfilter,
 					# see /usr/src/contrib/ipfilter/rules
 					# for examples
 ip6addrctl_enable="YES"	# Set to YES to enable default address selection
 ip6addrctl_verbose="NO"	# Set to YES to enable verbose configuration messages
 ip6addrctl_policy="AUTO"	# A pre-defined address selection policy
 				# (ipv4_prefer, ipv6_prefer, or AUTO)
 
 ##############################################################
 ###  System console options  #################################
 ##############################################################
 
 keyboard=""		# keyboard device to use (default /dev/kbd0).
 keymap="NO"		# keymap in /usr/share/{syscons,vt}/keymaps/* (or NO).
 keyrate="NO"		# keyboard rate to: slow, normal, fast (or NO).
 keybell="NO" 		# See kbdcontrol(1) for options.  Use "off" to disable.
 keychange="NO"		# function keys default values (or NO).
 cursor="NO"		# cursor type {normal|blink|destructive} (or NO).
 scrnmap="NO"		# screen map in /usr/share/syscons/scrnmaps/* (or NO).
 font8x16="NO"		# font 8x16 from /usr/share/{syscons,vt}/fonts/* (or NO).
 font8x14="NO"		# font 8x14 from /usr/share/{syscons,vt}/fonts/* (or NO).
 font8x8="NO"		# font 8x8 from /usr/share/{syscons,vt}/fonts/* (or NO).
 blanktime="300"		# blank time (in seconds) or "NO" to turn it off.
 saver="NO"		# screen saver: Uses /boot/kernel/${saver}_saver.ko
 moused_nondefault_enable="YES" # Treat non-default mice as enabled unless
 			       # specifically overriden in rc.conf(5).
 moused_enable="NO"	# Run the mouse daemon.
 moused_type="auto"	# See man page for rc.conf(5) for available settings.
 moused_port="/dev/psm0"	# Set to your mouse port.
 moused_flags=""		# Any additional flags to moused.
 mousechar_start="NO"	# if 0xd0-0xd3 default range is occupied in your
 			# language code table, specify alternative range
 			# start like mousechar_start=3, see vidcontrol(1)
 allscreens_flags=""	# Set this vidcontrol mode for all virtual screens
 allscreens_kbdflags=""	# Set this kbdcontrol mode for all virtual screens
 
 ##############################################################
 ###  Mail Transfer Agent (MTA) options  ######################
 ##############################################################
 
 mta_start_script="/etc/rc.sendmail"
 			# Script to start your chosen MTA, called by /etc/rc.
 # Settings for /etc/rc.sendmail and /etc/rc.d/sendmail:
 sendmail_enable="NO"	# Run the sendmail inbound daemon (YES/NO).
 sendmail_pidfile="/var/run/sendmail.pid"	# sendmail pid file
 sendmail_procname="/usr/sbin/sendmail"		# sendmail process name
 sendmail_flags="-L sm-mta -bd -q30m" # Flags to sendmail (as a server)
 sendmail_cert_create="YES"	# Create a server certificate if none (YES/NO)
 #sendmail_cert_cn="CN"   	# CN of the generate certificate
 sendmail_submit_enable="YES"	# Start a localhost-only MTA for mail submission
 sendmail_submit_flags="-L sm-mta -bd -q30m -ODaemonPortOptions=Addr=localhost"
 				# Flags for localhost-only MTA
 sendmail_outbound_enable="YES"	# Dequeue stuck mail (YES/NO).
 sendmail_outbound_flags="-L sm-queue -q30m" # Flags to sendmail (outbound only)
 sendmail_msp_queue_enable="YES"	# Dequeue stuck clientmqueue mail (YES/NO).
 sendmail_msp_queue_flags="-L sm-msp-queue -Ac -q30m"
 				# Flags for sendmail_msp_queue daemon.
 sendmail_rebuild_aliases="NO"	# Run newaliases if necessary (YES/NO).
 
 
 ##############################################################
 ###  Miscellaneous administrative options  ###################
 ##############################################################
 
 auditd_enable="NO"	# Run the audit daemon.
 auditd_program="/usr/sbin/auditd"	# Path to the audit daemon.
 auditd_flags=""		# Which options to pass to the audit daemon.
 auditdistd_enable="NO"	# Run the audit daemon.
 auditdistd_program="/usr/sbin/auditdistd"	# Path to the auditdistd daemon.
 auditdistd_flags=""	# Which options to pass to the auditdistd daemon.
 cron_enable="YES"	# Run the periodic job daemon.
 cron_program="/usr/sbin/cron"	# Which cron executable to run (if enabled).
 cron_dst="YES"		# Handle DST transitions intelligently (YES/NO)
 cron_flags=""		# Which options to pass to the cron daemon.
 lpd_enable="NO"		# Run the line printer daemon.
 lpd_program="/usr/sbin/lpd"	# path to lpd, if you want a different one.
 lpd_flags=""		# Flags to lpd (if enabled).
 nscd_enable="NO"	# Run the nsswitch caching daemon.
 chkprintcap_enable="NO"	# Run chkprintcap(8) before running lpd.
 chkprintcap_flags="-d"	# Create missing directories by default.
 dumpdev="AUTO"		# Device to crashdump to (device name, AUTO, or NO).
 dumpdir="/var/crash"	# Directory where crash dumps are to be stored
 savecore_enable="YES"	# Extract core from dump devices if any
 savecore_flags="-m 10"	# Used if dumpdev is enabled above, and present.
 			# By default, only the 10 most recent kernel dumps
 			# are saved.
 crashinfo_enable="YES"	# Automatically generate crash dump summary.
 crashinfo_program="/usr/sbin/crashinfo"	# Script to generate crash dump summary.
 quota_enable="NO"	# turn on quotas on startup (or NO).
 check_quotas="YES"	# Check quotas on startup (or NO).
 quotaon_flags="-a"	# Turn quotas on for all file systems (if enabled)
 quotaoff_flags="-a"	# Turn quotas off for all file systems at shutdown
 quotacheck_flags="-a"	# Check all file system quotas (if enabled)
 accounting_enable="NO"	# Turn on process accounting (or NO).
 ibcs2_enable="NO"	# Ibcs2 (SCO) emulation loaded at startup (or NO).
 ibcs2_loaders="coff"	# List of additional Ibcs2 loaders (or NO).
 firstboot_sentinel="/firstboot"	# Scripts with "firstboot" keyword are run if
 			# this file exists.  Should be on a R/W filesystem so
 			# the file can be deleted after the boot completes.
 
 # Emulation/compatibility services provided by /etc/rc.d/abi
 sysvipc_enable="NO"	# Load System V IPC primitives at startup (or NO).
 linux_enable="NO"	# Linux binary compatibility loaded at startup (or NO).
 svr4_enable="NO"	# SysVR4 emulation loaded at startup (or NO).
 clear_tmp_enable="NO"	# Clear /tmp at startup.
 clear_tmp_X="YES" 	# Clear and recreate X11-related directories in /tmp
 ldconfig_insecure="NO"	# Set to YES to disable ldconfig security checks
 ldconfig_paths="/usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg"
 			# shared library search paths
 ldconfig32_paths="/usr/lib32 /usr/lib32/compat"
 			# 32-bit compatibility shared library search paths
 ldconfigsoft_paths="/usr/libsoft /usr/libsoft/compat /usr/local/libsoft"
 			# soft float compatibility shared library search paths
 			# Note: temporarily with extra stuff for transition
 ldconfig_paths_aout="/usr/lib/compat/aout /usr/local/lib/aout"
 			# a.out shared library search paths
 ldconfig_local_dirs="/usr/local/libdata/ldconfig"
 			# Local directories with ldconfig configuration files.
 ldconfig_local32_dirs="/usr/local/libdata/ldconfig32"
 			# Local directories with 32-bit compatibility ldconfig
 			# configuration files.
 ldconfig_localsoft_dirs="/usr/local/libdata/ldconfigsoft"
 			# Local directories with soft float compatibility ldconfig
 			# configuration files.
 kern_securelevel_enable="NO"	# kernel security level (see security(7))
 kern_securelevel="-1"	# range: -1..3 ; `-1' is the most insecure
 			# Note that setting securelevel to 0 will result
 			# in the system booting with securelevel set to 1, as
 			# init(8) will raise the level when rc(8) completes.
 update_motd="YES"	# update version info in /etc/motd (or NO)
 entropy_boot_file="/boot/entropy"	# Set to NO to disable very early
 			# (used at early boot time) entropy caching through reboots.
 entropy_file="/entropy"	# Set to NO to disable late (used when going multi-user)
 			# entropy through reboots.
 			# /var/db/entropy-file is preferred if / is not avail.
 entropy_dir="/var/db/entropy" # Set to NO to disable caching entropy via cron.
 entropy_save_sz="4096"	# Size of the entropy cache files.
 entropy_save_num="8"	# Number of entropy cache files to save.
 harvest_mask="511"	# Entropy device harvests all but the very invasive sources.
 			# (See 'sysctl kern.random.harvest' and random(4))
 dmesg_enable="YES"	# Save dmesg(8) to /var/run/dmesg.boot
 watchdogd_enable="NO"	# Start the software watchdog daemon
 watchdogd_flags=""	# Flags to watchdogd (if enabled)
 devfs_rulesets="/etc/defaults/devfs.rules /etc/devfs.rules" # Files containing
 							    # devfs(8) rules.
 devfs_system_ruleset=""	# The name (NOT number) of a ruleset to apply to /dev
 devfs_set_rulesets=""	# A list of /mount/dev=ruleset_name settings to
 			# apply (must be mounted already, i.e. fstab(5))
 devfs_load_rulesets="YES"	# Enable to always load the default rulesets
 performance_cx_lowest="C2"	# Online CPU idle state
 performance_cpu_freq="NONE"	# Online CPU frequency
 economy_cx_lowest="Cmax"	# Offline CPU idle state
 economy_cpu_freq="NONE"		# Offline CPU frequency
 virecover_enable="YES"	# Perform housekeeping for the vi(1) editor
 ugidfw_enable="NO"	# Load mac_bsdextended(4) rules on boot
 bsdextended_script="/etc/rc.bsdextended"	# Default mac_bsdextended(4)
 						# ruleset file.
 newsyslog_enable="YES"	# Run newsyslog at startup.
 newsyslog_flags="-CN"	# Newsyslog flags to create marked files
 mixer_enable="YES"	# Run the sound mixer.
 opensm_enable="NO"	# Opensm(8) for infiniband devices defaults to off
 
 # rctl(8) requires kernel options RACCT and RCTL
 rctl_enable="YES"		# Load rctl(8) rules on boot
 rctl_rules="/etc/rctl.conf"	# rctl(8) ruleset. See rctl.conf(5).
 
 iovctl_files=""		# Config files for iovctl(8)
 
 ##############################################################
 ### Jail Configuration (see rc.conf(5) manual page) ##########
 ##############################################################
 jail_enable="NO"	# Set to NO to disable starting of any jails
 jail_parallel_start="NO"	# Start jails in the background
 jail_list=""		# Space separated list of names of jails
 jail_reverse_stop="NO"	# Stop jails in reverse order
 
 ##############################################################
 ### Define source_rc_confs, the mechanism used by /etc/rc.* ##
 ### scripts to source rc_conf_files overrides safely.	    ##
 ##############################################################
 
 if [ -z "${source_rc_confs_defined}" ]; then
 	source_rc_confs_defined=yes
 	source_rc_confs() {
 		local i sourced_files
 		for i in ${rc_conf_files}; do
 			case ${sourced_files} in
 			*:$i:*)
 				;;
 			*)
 				sourced_files="${sourced_files}:$i:"
 				if [ -r $i ]; then
 					. $i
 				fi
 				;;
 			esac
 		done
 		# Re-do process to pick up [possibly] redefined $rc_conf_files
 		for i in ${rc_conf_files}; do
 			case ${sourced_files} in
 			*:$i:*)
 				;;
 			*)
 				sourced_files="${sourced_files}:$i:"
 				if [ -r $i ]; then
 					. $i
 				fi
 				;;
 			esac
 		done
 	}
 fi
Index: projects/vnet/libexec/ftpd/blacklist.c
===================================================================
--- projects/vnet/libexec/ftpd/blacklist.c	(revision 301522)
+++ projects/vnet/libexec/ftpd/blacklist.c	(revision 301523)
@@ -1,55 +1,53 @@
 /*-
  * Copyright (c) 2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Kurt Lidl under sponsorship from the
  * FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 
 /* $FreeBSD$ */
 
 #include <ctype.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include "blacklist_client.h"
 #include <blacklist.h>
 
 static struct blacklist *blstate;
 
 void
 blacklist_init(void)
 {
 	blstate = blacklist_open();
 }
 
 void
 blacklist_notify(int action, int fd, char *msg)
 {
 	if (blstate == NULL)
-		blacklist_init();
-	if (blstate == NULL)
 		return;
 	(void)blacklist_r(blstate, action, fd, msg);
 }
Index: projects/vnet/libexec/ftpd/ftpd.c
===================================================================
--- projects/vnet/libexec/ftpd/ftpd.c	(revision 301522)
+++ projects/vnet/libexec/ftpd/ftpd.c	(revision 301523)
@@ -1,3505 +1,3505 @@
 /*
  * Copyright (c) 1985, 1988, 1990, 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char copyright[] =
 "@(#) Copyright (c) 1985, 1988, 1990, 1992, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 #endif
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)ftpd.c	8.4 (Berkeley) 4/16/94";
 #endif
 #endif /* not lint */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * FTP server.
  */
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 
 #define	FTP_NAMES
 #include <arpa/ftp.h>
 #include <arpa/inet.h>
 #include <arpa/telnet.h>
 
 #include <ctype.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <glob.h>
 #include <limits.h>
 #include <netdb.h>
 #include <pwd.h>
 #include <grp.h>
 #include <opie.h>
 #include <signal.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
 #include <time.h>
 #include <unistd.h>
 #include <libutil.h>
 #ifdef	LOGIN_CAP
 #include <login_cap.h>
 #endif
 
 #ifdef USE_PAM
 #include <security/pam_appl.h>
 #endif
 
 #ifdef USE_BLACKLIST
 #include "blacklist_client.h"
 #endif
 
 #include "pathnames.h"
 #include "extern.h"
 
 #include <stdarg.h>
 
 static char version[] = "Version 6.00LS";
 #undef main
 
 union sockunion ctrl_addr;
 union sockunion data_source;
 union sockunion data_dest;
 union sockunion his_addr;
 union sockunion pasv_addr;
 
 int	daemon_mode;
 int	data;
 int	dataport;
 int	hostinfo = 1;	/* print host-specific info in messages */
 int	logged_in;
 struct	passwd *pw;
 char	*homedir;
 int	ftpdebug;
 int	timeout = 900;    /* timeout after 15 minutes of inactivity */
 int	maxtimeout = 7200;/* don't allow idle time to be set beyond 2 hours */
 int	logging;
 int	restricted_data_ports = 1;
 int	paranoid = 1;	  /* be extra careful about security */
 int	anon_only = 0;    /* Only anonymous ftp allowed */
 int	assumeutf8 = 0;   /* Assume that server file names are in UTF-8 */
 int	guest;
 int	dochroot;
 char	*chrootdir;
 int	dowtmp = 1;
 int	stats;
 int	statfd = -1;
 int	type;
 int	form;
 int	stru;			/* avoid C keyword */
 int	mode;
 int	usedefault = 1;		/* for data transfers */
 int	pdata = -1;		/* for passive mode */
 int	readonly = 0;		/* Server is in readonly mode.	*/
 int	noepsv = 0;		/* EPSV command is disabled.	*/
 int	noretr = 0;		/* RETR command is disabled.	*/
 int	noguestretr = 0;	/* RETR command is disabled for anon users. */
 int	noguestmkd = 0;		/* MKD command is disabled for anon users. */
 int	noguestmod = 1;		/* anon users may not modify existing files. */
 
 off_t	file_size;
 off_t	byte_count;
 #if !defined(CMASK) || CMASK == 0
 #undef CMASK
 #define CMASK 027
 #endif
 int	defumask = CMASK;		/* default umask value */
 char	tmpline[7];
 char	*hostname;
 int	epsvall = 0;
 
 #ifdef VIRTUAL_HOSTING
 char	*ftpuser;
 
 static struct ftphost {
 	struct ftphost	*next;
 	struct addrinfo *hostinfo;
 	char		*hostname;
 	char		*anonuser;
 	char		*statfile;
 	char		*welcome;
 	char		*loginmsg;
 } *thishost, *firsthost;
 
 #endif
 char	remotehost[NI_MAXHOST];
 char	*ident = NULL;
 
 static char	wtmpid[20];
 
 #ifdef USE_PAM
 static int	auth_pam(struct passwd**, const char*);
 pam_handle_t	*pamh = NULL;
 #endif
 
 static struct opie	opiedata;
 static char		opieprompt[OPIE_CHALLENGE_MAX+1];
 static int		pwok;
 
 char	*pid_file = NULL; /* means default location to pidfile(3) */
 
 /*
  * Limit number of pathnames that glob can return.
  * A limit of 0 indicates the number of pathnames is unlimited.
  */
 #define MAXGLOBARGS	16384
 #
 
 /*
  * Timeout intervals for retrying connections
  * to hosts that don't accept PORT cmds.  This
  * is a kludge, but given the problems with TCP...
  */
 #define	SWAITMAX	90	/* wait at most 90 seconds */
 #define	SWAITINT	5	/* interval between retries */
 
 int	swaitmax = SWAITMAX;
 int	swaitint = SWAITINT;
 
 #ifdef SETPROCTITLE
 #ifdef OLD_SETPROCTITLE
 char	**Argv = NULL;		/* pointer to argument vector */
 char	*LastArgv = NULL;	/* end of argv */
 #endif /* OLD_SETPROCTITLE */
 char	proctitle[LINE_MAX];	/* initial part of title */
 #endif /* SETPROCTITLE */
 
 #define LOGCMD(cmd, file)		logcmd((cmd), (file), NULL, -1)
 #define LOGCMD2(cmd, file1, file2)	logcmd((cmd), (file1), (file2), -1)
 #define LOGBYTES(cmd, file, cnt)	logcmd((cmd), (file), NULL, (cnt))
 
 static	volatile sig_atomic_t recvurg;
 static	int transflag;		/* NB: for debugging only */
 
 #define STARTXFER	flagxfer(1)
 #define ENDXFER		flagxfer(0)
 
 #define START_UNSAFE	maskurg(1)
 #define END_UNSAFE	maskurg(0)
 
 /* It's OK to put an `else' clause after this macro. */
 #define CHECKOOB(action)						\
 	if (recvurg) {							\
 		recvurg = 0;						\
 		if (myoob()) {						\
 			ENDXFER;					\
 			action;						\
 		}							\
 	}
 
 #ifdef VIRTUAL_HOSTING
 static void	 inithosts(int);
 static void	 selecthost(union sockunion *);
 #endif
 static void	 ack(char *);
 static void	 sigurg(int);
 static void	 maskurg(int);
 static void	 flagxfer(int);
 static int	 myoob(void);
 static int	 checkuser(char *, char *, int, char **, int *);
 static FILE	*dataconn(char *, off_t, char *);
 static void	 dolog(struct sockaddr *);
 static void	 end_login(void);
 static FILE	*getdatasock(char *);
 static int	 guniquefd(char *, char **);
 static void	 lostconn(int);
 static void	 sigquit(int);
 static int	 receive_data(FILE *, FILE *);
 static int	 send_data(FILE *, FILE *, size_t, off_t, int);
 static struct passwd *
 		 sgetpwnam(char *);
 static char	*sgetsave(char *);
 static void	 reapchild(int);
 static void	 appendf(char **, char *, ...) __printflike(2, 3);
 static void	 logcmd(char *, char *, char *, off_t);
 static void      logxfer(char *, off_t, time_t);
 static char	*doublequote(char *);
 static int	*socksetup(int, char *, const char *);
 
 int
 main(int argc, char *argv[], char **envp)
 {
 	socklen_t addrlen;
-	int ch, on = 1, tos;
+	int ch, on = 1, tos, s = STDIN_FILENO;
 	char *cp, line[LINE_MAX];
 	FILE *fd;
 	char	*bindname = NULL;
 	const char *bindport = "ftp";
 	int	family = AF_UNSPEC;
 	struct sigaction sa;
 
 	tzset();		/* in case no timezone database in ~ftp */
 	sigemptyset(&sa.sa_mask);
 	sa.sa_flags = SA_RESTART;
 
 #ifdef OLD_SETPROCTITLE
 	/*
 	 *  Save start and extent of argv for setproctitle.
 	 */
 	Argv = argv;
 	while (*envp)
 		envp++;
 	LastArgv = envp[-1] + strlen(envp[-1]);
 #endif /* OLD_SETPROCTITLE */
 
 	/*
 	 * Prevent diagnostic messages from appearing on stderr.
 	 * We run as a daemon or from inetd; in both cases, there's
 	 * more reason in logging to syslog.
 	 */
 	(void) freopen(_PATH_DEVNULL, "w", stderr);
 	opterr = 0;
 
 	/*
 	 * LOG_NDELAY sets up the logging connection immediately,
 	 * necessary for anonymous ftp's that chroot and can't do it later.
 	 */
 	openlog("ftpd", LOG_PID | LOG_NDELAY, LOG_FTP);
 
 	while ((ch = getopt(argc, argv,
 	                    "468a:AdDEhlmMoOp:P:rRSt:T:u:UvW")) != -1) {
 		switch (ch) {
 		case '4':
 			family = (family == AF_INET6) ? AF_UNSPEC : AF_INET;
 			break;
 
 		case '6':
 			family = (family == AF_INET) ? AF_UNSPEC : AF_INET6;
 			break;
 
 		case '8':
 			assumeutf8 = 1;
 			break;
 
 		case 'a':
 			bindname = optarg;
 			break;
 
 		case 'A':
 			anon_only = 1;
 			break;
 
 		case 'd':
 			ftpdebug++;
 			break;
 
 		case 'D':
 			daemon_mode++;
 			break;
 
 		case 'E':
 			noepsv = 1;
 			break;
 
 		case 'h':
 			hostinfo = 0;
 			break;
 
 		case 'l':
 			logging++;	/* > 1 == extra logging */
 			break;
 
 		case 'm':
 			noguestmod = 0;
 			break;
 
 		case 'M':
 			noguestmkd = 1;
 			break;
 
 		case 'o':
 			noretr = 1;
 			break;
 
 		case 'O':
 			noguestretr = 1;
 			break;
 
 		case 'p':
 			pid_file = optarg;
 			break;
 
 		case 'P':
 			bindport = optarg;
 			break;
 
 		case 'r':
 			readonly = 1;
 			break;
 
 		case 'R':
 			paranoid = 0;
 			break;
 
 		case 'S':
 			stats++;
 			break;
 
 		case 't':
 			timeout = atoi(optarg);
 			if (maxtimeout < timeout)
 				maxtimeout = timeout;
 			break;
 
 		case 'T':
 			maxtimeout = atoi(optarg);
 			if (timeout > maxtimeout)
 				timeout = maxtimeout;
 			break;
 
 		case 'u':
 		    {
 			long val = 0;
 
 			val = strtol(optarg, &optarg, 8);
 			if (*optarg != '\0' || val < 0)
 				syslog(LOG_WARNING, "bad value for -u");
 			else
 				defumask = val;
 			break;
 		    }
 		case 'U':
 			restricted_data_ports = 0;
 			break;
 
 		case 'v':
 			ftpdebug++;
 			break;
 
 		case 'W':
 			dowtmp = 0;
 			break;
 
 		default:
 			syslog(LOG_WARNING, "unknown flag -%c ignored", optopt);
 			break;
 		}
 	}
 
 	if (daemon_mode) {
 		int *ctl_sock, fd, maxfd = -1, nfds, i;
 		fd_set defreadfds, readfds;
 		pid_t pid;
 		struct pidfh *pfh;
 
 		if ((pfh = pidfile_open(pid_file, 0600, &pid)) == NULL) {
 			if (errno == EEXIST) {
 				syslog(LOG_ERR, "%s already running, pid %d",
 				       getprogname(), (int)pid);
 				exit(1);
 			}
 			syslog(LOG_WARNING, "pidfile_open: %m");
 		}
 
 		/*
 		 * Detach from parent.
 		 */
 		if (daemon(1, 1) < 0) {
 			syslog(LOG_ERR, "failed to become a daemon");
 			exit(1);
 		}
 
 		if (pfh != NULL && pidfile_write(pfh) == -1)
 			syslog(LOG_WARNING, "pidfile_write: %m");
 
 		sa.sa_handler = reapchild;
 		(void)sigaction(SIGCHLD, &sa, NULL);
 
 #ifdef VIRTUAL_HOSTING
 		inithosts(family);
 #endif
 
 		/*
 		 * Open a socket, bind it to the FTP port, and start
 		 * listening.
 		 */
 		ctl_sock = socksetup(family, bindname, bindport);
 		if (ctl_sock == NULL)
 			exit(1);
 
 		FD_ZERO(&defreadfds);
 		for (i = 1; i <= *ctl_sock; i++) {
 			FD_SET(ctl_sock[i], &defreadfds);
 			if (listen(ctl_sock[i], 32) < 0) {
 				syslog(LOG_ERR, "control listen: %m");
 				exit(1);
 			}
 			if (maxfd < ctl_sock[i])
 				maxfd = ctl_sock[i];
 		}
 
 		/*
 		 * Loop forever accepting connection requests and forking off
 		 * children to handle them.
 		 */
 		while (1) {
 			FD_COPY(&defreadfds, &readfds);
 			nfds = select(maxfd + 1, &readfds, NULL, NULL, 0);
 			if (nfds <= 0) {
 				if (nfds < 0 && errno != EINTR)
 					syslog(LOG_WARNING, "select: %m");
 				continue;
 			}
 
 			pid = -1;
                         for (i = 1; i <= *ctl_sock; i++)
 				if (FD_ISSET(ctl_sock[i], &readfds)) {
 					addrlen = sizeof(his_addr);
 					fd = accept(ctl_sock[i],
 					    (struct sockaddr *)&his_addr,
 					    &addrlen);
 					if (fd == -1) {
 						syslog(LOG_WARNING,
 						       "accept: %m");
 						continue;
 					}
 					switch (pid = fork()) {
 					case 0:
 						/* child */
-						(void) dup2(fd, 0);
-						(void) dup2(fd, 1);
+						(void) dup2(fd, s);
+						(void) dup2(fd, STDOUT_FILENO);
 						(void) close(fd);
 						for (i = 1; i <= *ctl_sock; i++)
 							close(ctl_sock[i]);
 						if (pfh != NULL)
 							pidfile_close(pfh);
 						goto gotchild;
 					case -1:
 						syslog(LOG_WARNING, "fork: %m");
 						/* FALLTHROUGH */
 					default:
 						close(fd);
 					}
 				}
 		}
 	} else {
 		addrlen = sizeof(his_addr);
-		if (getpeername(0, (struct sockaddr *)&his_addr, &addrlen) < 0) {
+		if (getpeername(s, (struct sockaddr *)&his_addr, &addrlen) < 0) {
 			syslog(LOG_ERR, "getpeername (%s): %m",argv[0]);
 			exit(1);
 		}
 
 #ifdef VIRTUAL_HOSTING
 		if (his_addr.su_family == AF_INET6 &&
 		    IN6_IS_ADDR_V4MAPPED(&his_addr.su_sin6.sin6_addr))
 			family = AF_INET;
 		else
 			family = his_addr.su_family;
 		inithosts(family);
 #endif
 	}
 
 gotchild:
 	sa.sa_handler = SIG_DFL;
 	(void)sigaction(SIGCHLD, &sa, NULL);
 
 	sa.sa_handler = sigurg;
 	sa.sa_flags = 0;		/* don't restart syscalls for SIGURG */
 	(void)sigaction(SIGURG, &sa, NULL);
 
 	sigfillset(&sa.sa_mask);	/* block all signals in handler */
 	sa.sa_flags = SA_RESTART;
 	sa.sa_handler = sigquit;
 	(void)sigaction(SIGHUP, &sa, NULL);
 	(void)sigaction(SIGINT, &sa, NULL);
 	(void)sigaction(SIGQUIT, &sa, NULL);
 	(void)sigaction(SIGTERM, &sa, NULL);
 
 	sa.sa_handler = lostconn;
 	(void)sigaction(SIGPIPE, &sa, NULL);
 
 	addrlen = sizeof(ctrl_addr);
-	if (getsockname(0, (struct sockaddr *)&ctrl_addr, &addrlen) < 0) {
+	if (getsockname(s, (struct sockaddr *)&ctrl_addr, &addrlen) < 0) {
 		syslog(LOG_ERR, "getsockname (%s): %m",argv[0]);
 		exit(1);
 	}
 	dataport = ntohs(ctrl_addr.su_port) - 1; /* as per RFC 959 */
 #ifdef VIRTUAL_HOSTING
 	/* select our identity from virtual host table */
 	selecthost(&ctrl_addr);
 #endif
 #ifdef IP_TOS
 	if (ctrl_addr.su_family == AF_INET)
       {
 	tos = IPTOS_LOWDELAY;
-	if (setsockopt(0, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0)
+	if (setsockopt(s, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0)
 		syslog(LOG_WARNING, "control setsockopt (IP_TOS): %m");
       }
 #endif
 	/*
 	 * Disable Nagle on the control channel so that we don't have to wait
 	 * for peer's ACK before issuing our next reply.
 	 */
-	if (setsockopt(0, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)) < 0)
+	if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)) < 0)
 		syslog(LOG_WARNING, "control setsockopt (TCP_NODELAY): %m");
 
 	data_source.su_port = htons(ntohs(ctrl_addr.su_port) - 1);
 
 	(void)snprintf(wtmpid, sizeof(wtmpid), "%xftpd", getpid());
 
 	/* Try to handle urgent data inline */
 #ifdef SO_OOBINLINE
-	if (setsockopt(0, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)) < 0)
+	if (setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)) < 0)
 		syslog(LOG_WARNING, "control setsockopt (SO_OOBINLINE): %m");
 #endif
 
 #ifdef	F_SETOWN
-	if (fcntl(fileno(stdin), F_SETOWN, getpid()) == -1)
+	if (fcntl(s, F_SETOWN, getpid()) == -1)
 		syslog(LOG_ERR, "fcntl F_SETOWN: %m");
 #endif
 	dolog((struct sockaddr *)&his_addr);
 	/*
 	 * Set up default state
 	 */
 	data = -1;
 	type = TYPE_A;
 	form = FORM_N;
 	stru = STRU_F;
 	mode = MODE_S;
 	tmpline[0] = '\0';
 
 	/* If logins are disabled, print out the message. */
 	if ((fd = fopen(_PATH_NOLOGIN,"r")) != NULL) {
 		while (fgets(line, sizeof(line), fd) != NULL) {
 			if ((cp = strchr(line, '\n')) != NULL)
 				*cp = '\0';
 			lreply(530, "%s", line);
 		}
 		(void) fflush(stdout);
 		(void) fclose(fd);
 		reply(530, "System not available.");
 		exit(0);
 	}
 #ifdef VIRTUAL_HOSTING
 	fd = fopen(thishost->welcome, "r");
 #else
 	fd = fopen(_PATH_FTPWELCOME, "r");
 #endif
 	if (fd != NULL) {
 		while (fgets(line, sizeof(line), fd) != NULL) {
 			if ((cp = strchr(line, '\n')) != NULL)
 				*cp = '\0';
 			lreply(220, "%s", line);
 		}
 		(void) fflush(stdout);
 		(void) fclose(fd);
 		/* reply(220,) must follow */
 	}
 #ifndef VIRTUAL_HOSTING
 	if ((hostname = malloc(MAXHOSTNAMELEN)) == NULL)
 		fatalerror("Ran out of memory.");
 	if (gethostname(hostname, MAXHOSTNAMELEN - 1) < 0)
 		hostname[0] = '\0';
 	hostname[MAXHOSTNAMELEN - 1] = '\0';
 #endif
 	if (hostinfo)
 		reply(220, "%s FTP server (%s) ready.", hostname, version);
 	else
 		reply(220, "FTP server ready.");
 #ifdef USE_BLACKLIST
 	blacklist_init();
 #endif
 	for (;;)
 		(void) yyparse();
 	/* NOTREACHED */
 }
 
 static void
 lostconn(int signo)
 {
 
 	if (ftpdebug)
 		syslog(LOG_DEBUG, "lost connection");
 	dologout(1);
 }
 
 static void
 sigquit(int signo)
 {
 
 	syslog(LOG_ERR, "got signal %d", signo);
 	dologout(1);
 }
 
 #ifdef VIRTUAL_HOSTING
 /*
  * read in virtual host tables (if they exist)
  */
 
 static void
 inithosts(int family)
 {
 	int insert;
 	size_t len;
 	FILE *fp;
 	char *cp, *mp, *line;
 	char *hostname;
 	char *vhost, *anonuser, *statfile, *welcome, *loginmsg;
 	struct ftphost *hrp, *lhrp;
 	struct addrinfo hints, *res, *ai;
 
 	/*
 	 * Fill in the default host information
 	 */
 	if ((hostname = malloc(MAXHOSTNAMELEN)) == NULL)
 		fatalerror("Ran out of memory.");
 	if (gethostname(hostname, MAXHOSTNAMELEN - 1) < 0)
 		hostname[0] = '\0';
 	hostname[MAXHOSTNAMELEN - 1] = '\0';
 	if ((hrp = malloc(sizeof(struct ftphost))) == NULL)
 		fatalerror("Ran out of memory.");
 	hrp->hostname = hostname;
 	hrp->hostinfo = NULL;
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_flags = AI_PASSIVE;
 	hints.ai_family = family;
 	hints.ai_socktype = SOCK_STREAM;
 	if (getaddrinfo(hrp->hostname, NULL, &hints, &res) == 0)
 		hrp->hostinfo = res;
 	hrp->statfile = _PATH_FTPDSTATFILE;
 	hrp->welcome  = _PATH_FTPWELCOME;
 	hrp->loginmsg = _PATH_FTPLOGINMESG;
 	hrp->anonuser = "ftp";
 	hrp->next = NULL;
 	thishost = firsthost = lhrp = hrp;
 	if ((fp = fopen(_PATH_FTPHOSTS, "r")) != NULL) {
 		int addrsize, gothost;
 		void *addr;
 		struct hostent *hp;
 
 		while ((line = fgetln(fp, &len)) != NULL) {
 			int	i, hp_error;
 
 			/* skip comments */
 			if (line[0] == '#')
 				continue;
 			if (line[len - 1] == '\n') {
 				line[len - 1] = '\0';
 				mp = NULL;
 			} else {
 				if ((mp = malloc(len + 1)) == NULL)
 					fatalerror("Ran out of memory.");
 				memcpy(mp, line, len);
 				mp[len] = '\0';
 				line = mp;
 			}
 			cp = strtok(line, " \t");
 			/* skip empty lines */
 			if (cp == NULL)
 				goto nextline;
 			vhost = cp;
 
 			/* set defaults */
 			anonuser = "ftp";
 			statfile = _PATH_FTPDSTATFILE;
 			welcome  = _PATH_FTPWELCOME;
 			loginmsg = _PATH_FTPLOGINMESG;
 
 			/*
 			 * Preparse the line so we can use its info
 			 * for all the addresses associated with
 			 * the virtual host name.
 			 * Field 0, the virtual host name, is special:
 			 * it's already parsed off and will be strdup'ed
 			 * later, after we know its canonical form.
 			 */
 			for (i = 1; i < 5 && (cp = strtok(NULL, " \t")); i++)
 				if (*cp != '-' && (cp = strdup(cp)))
 					switch (i) {
 					case 1:	/* anon user permissions */
 						anonuser = cp;
 						break;
 					case 2: /* statistics file */
 						statfile = cp;
 						break;
 					case 3: /* welcome message */
 						welcome  = cp;
 						break;
 					case 4: /* login message */
 						loginmsg = cp;
 						break;
 					default: /* programming error */
 						abort();
 						/* NOTREACHED */
 					}
 
 			hints.ai_flags = AI_PASSIVE;
 			hints.ai_family = family;
 			hints.ai_socktype = SOCK_STREAM;
 			if (getaddrinfo(vhost, NULL, &hints, &res) != 0)
 				goto nextline;
 			for (ai = res; ai != NULL && ai->ai_addr != NULL;
 			     ai = ai->ai_next) {
 
 			gothost = 0;
 			for (hrp = firsthost; hrp != NULL; hrp = hrp->next) {
 				struct addrinfo *hi;
 
 				for (hi = hrp->hostinfo; hi != NULL;
 				     hi = hi->ai_next)
 					if (hi->ai_addrlen == ai->ai_addrlen &&
 					    memcmp(hi->ai_addr,
 						   ai->ai_addr,
 						   ai->ai_addr->sa_len) == 0) {
 						gothost++;
 						break;
 					}
 				if (gothost)
 					break;
 			}
 			if (hrp == NULL) {
 				if ((hrp = malloc(sizeof(struct ftphost))) == NULL)
 					goto nextline;
 				hrp->hostname = NULL;
 				insert = 1;
 			} else {
 				if (hrp->hostinfo && hrp->hostinfo != res)
 					freeaddrinfo(hrp->hostinfo);
 				insert = 0; /* host already in the chain */
 			}
 			hrp->hostinfo = res;
 
 			/*
 			 * determine hostname to use.
 			 * force defined name if there is a valid alias
 			 * otherwise fallback to primary hostname
 			 */
 			/* XXX: getaddrinfo() can't do alias check */
 			switch(hrp->hostinfo->ai_family) {
 			case AF_INET:
 				addr = &((struct sockaddr_in *)hrp->hostinfo->ai_addr)->sin_addr;
 				addrsize = sizeof(struct in_addr);
 				break;
 			case AF_INET6:
 				addr = &((struct sockaddr_in6 *)hrp->hostinfo->ai_addr)->sin6_addr;
 				addrsize = sizeof(struct in6_addr);
 				break;
 			default:
 				/* should not reach here */
 				freeaddrinfo(hrp->hostinfo);
 				if (insert)
 					free(hrp); /*not in chain, can free*/
 				else
 					hrp->hostinfo = NULL; /*mark as blank*/
 				goto nextline;
 				/* NOTREACHED */
 			}
 			if ((hp = getipnodebyaddr(addr, addrsize,
 						  hrp->hostinfo->ai_family,
 						  &hp_error)) != NULL) {
 				if (strcmp(vhost, hp->h_name) != 0) {
 					if (hp->h_aliases == NULL)
 						vhost = hp->h_name;
 					else {
 						i = 0;
 						while (hp->h_aliases[i] &&
 						       strcmp(vhost, hp->h_aliases[i]) != 0)
 							++i;
 						if (hp->h_aliases[i] == NULL)
 							vhost = hp->h_name;
 					}
 				}
 			}
 			if (hrp->hostname &&
 			    strcmp(hrp->hostname, vhost) != 0) {
 				free(hrp->hostname);
 				hrp->hostname = NULL;
 			}
 			if (hrp->hostname == NULL &&
 			    (hrp->hostname = strdup(vhost)) == NULL) {
 				freeaddrinfo(hrp->hostinfo);
 				hrp->hostinfo = NULL; /* mark as blank */
 				if (hp)
 					freehostent(hp);
 				goto nextline;
 			}
 			hrp->anonuser = anonuser;
 			hrp->statfile = statfile;
 			hrp->welcome  = welcome;
 			hrp->loginmsg = loginmsg;
 			if (insert) {
 				hrp->next  = NULL;
 				lhrp->next = hrp;
 				lhrp = hrp;
 			}
 			if (hp)
 				freehostent(hp);
 		      }
 nextline:
 			if (mp)
 				free(mp);
 		}
 		(void) fclose(fp);
 	}
 }
 
 static void
 selecthost(union sockunion *su)
 {
 	struct ftphost	*hrp;
 	u_int16_t port;
 #ifdef INET6
 	struct in6_addr *mapped_in6 = NULL;
 #endif
 	struct addrinfo *hi;
 
 #ifdef INET6
 	/*
 	 * XXX IPv4 mapped IPv6 addr consideraton,
 	 * specified in rfc2373.
 	 */
 	if (su->su_family == AF_INET6 &&
 	    IN6_IS_ADDR_V4MAPPED(&su->su_sin6.sin6_addr))
 		mapped_in6 = &su->su_sin6.sin6_addr;
 #endif
 
 	hrp = thishost = firsthost;	/* default */
 	port = su->su_port;
 	su->su_port = 0;
 	while (hrp != NULL) {
 	    for (hi = hrp->hostinfo; hi != NULL; hi = hi->ai_next) {
 		if (memcmp(su, hi->ai_addr, hi->ai_addrlen) == 0) {
 			thishost = hrp;
 			goto found;
 		}
 #ifdef INET6
 		/* XXX IPv4 mapped IPv6 addr consideraton */
 		if (hi->ai_addr->sa_family == AF_INET && mapped_in6 != NULL &&
 		    (memcmp(&mapped_in6->s6_addr[12],
 			    &((struct sockaddr_in *)hi->ai_addr)->sin_addr,
 			    sizeof(struct in_addr)) == 0)) {
 			thishost = hrp;
 			goto found;
 		}
 #endif
 	    }
 	    hrp = hrp->next;
 	}
 found:
 	su->su_port = port;
 	/* setup static variables as appropriate */
 	hostname = thishost->hostname;
 	ftpuser = thishost->anonuser;
 }
 #endif
 
 /*
  * Helper function for sgetpwnam().
  */
 static char *
 sgetsave(char *s)
 {
 	char *new = malloc(strlen(s) + 1);
 
 	if (new == NULL) {
 		reply(421, "Ran out of memory.");
 		dologout(1);
 		/* NOTREACHED */
 	}
 	(void) strcpy(new, s);
 	return (new);
 }
 
 /*
  * Save the result of a getpwnam.  Used for USER command, since
  * the data returned must not be clobbered by any other command
  * (e.g., globbing).
  * NB: The data returned by sgetpwnam() will remain valid until
  * the next call to this function.  Its difference from getpwnam()
  * is that sgetpwnam() is known to be called from ftpd code only.
  */
 static struct passwd *
 sgetpwnam(char *name)
 {
 	static struct passwd save;
 	struct passwd *p;
 
 	if ((p = getpwnam(name)) == NULL)
 		return (p);
 	if (save.pw_name) {
 		free(save.pw_name);
 		free(save.pw_passwd);
 		free(save.pw_class);
 		free(save.pw_gecos);
 		free(save.pw_dir);
 		free(save.pw_shell);
 	}
 	save = *p;
 	save.pw_name = sgetsave(p->pw_name);
 	save.pw_passwd = sgetsave(p->pw_passwd);
 	save.pw_class = sgetsave(p->pw_class);
 	save.pw_gecos = sgetsave(p->pw_gecos);
 	save.pw_dir = sgetsave(p->pw_dir);
 	save.pw_shell = sgetsave(p->pw_shell);
 	return (&save);
 }
 
 static int login_attempts;	/* number of failed login attempts */
 static int askpasswd;		/* had user command, ask for passwd */
 static char curname[MAXLOGNAME];	/* current USER name */
 
 /*
  * USER command.
  * Sets global passwd pointer pw if named account exists and is acceptable;
  * sets askpasswd if a PASS command is expected.  If logged in previously,
  * need to reset state.  If name is "ftp" or "anonymous", the name is not in
  * _PATH_FTPUSERS, and ftp account exists, set guest and pw, then just return.
  * If account doesn't exist, ask for passwd anyway.  Otherwise, check user
  * requesting login privileges.  Disallow anyone who does not have a standard
  * shell as returned by getusershell().  Disallow anyone mentioned in the file
  * _PATH_FTPUSERS to allow people such as root and uucp to be avoided.
  */
 void
 user(char *name)
 {
 	int ecode;
 	char *cp, *shell;
 
 	if (logged_in) {
 		if (guest) {
 			reply(530, "Can't change user from guest login.");
 			return;
 		} else if (dochroot) {
 			reply(530, "Can't change user from chroot user.");
 			return;
 		}
 		end_login();
 	}
 
 	guest = 0;
 #ifdef VIRTUAL_HOSTING
 	pw = sgetpwnam(thishost->anonuser);
 #else
 	pw = sgetpwnam("ftp");
 #endif
 	if (strcmp(name, "ftp") == 0 || strcmp(name, "anonymous") == 0) {
 		if (checkuser(_PATH_FTPUSERS, "ftp", 0, NULL, &ecode) ||
 		    (ecode != 0 && ecode != ENOENT))
 			reply(530, "User %s access denied.", name);
 		else if (checkuser(_PATH_FTPUSERS, "anonymous", 0, NULL, &ecode) ||
 		    (ecode != 0 && ecode != ENOENT))
 			reply(530, "User %s access denied.", name);
 		else if (pw != NULL) {
 			guest = 1;
 			askpasswd = 1;
 			reply(331,
 			"Guest login ok, send your email address as password.");
 		} else
 			reply(530, "User %s unknown.", name);
 		if (!askpasswd && logging)
 			syslog(LOG_NOTICE,
 			    "ANONYMOUS FTP LOGIN REFUSED FROM %s", remotehost);
 		return;
 	}
 	if (anon_only != 0) {
 		reply(530, "Sorry, only anonymous ftp allowed.");
 		return;
 	}
 		
 	if ((pw = sgetpwnam(name))) {
 		if ((shell = pw->pw_shell) == NULL || *shell == 0)
 			shell = _PATH_BSHELL;
 		setusershell();
 		while ((cp = getusershell()) != NULL)
 			if (strcmp(cp, shell) == 0)
 				break;
 		endusershell();
 
 		if (cp == NULL || 
 		    (checkuser(_PATH_FTPUSERS, name, 1, NULL, &ecode) ||
 		    (ecode != 0 && ecode != ENOENT))) {
 			reply(530, "User %s access denied.", name);
 			if (logging)
 				syslog(LOG_NOTICE,
 				    "FTP LOGIN REFUSED FROM %s, %s",
 				    remotehost, name);
 			pw = NULL;
 			return;
 		}
 	}
 	if (logging)
 		strncpy(curname, name, sizeof(curname)-1);
 
 	pwok = 0;
 #ifdef USE_PAM
 	/* XXX Kluge! The conversation mechanism needs to be fixed. */
 #endif
 	if (opiechallenge(&opiedata, name, opieprompt) == 0) {
 		pwok = (pw != NULL) &&
 		       opieaccessfile(remotehost) &&
 		       opiealways(pw->pw_dir);
 		reply(331, "Response to %s %s for %s.",
 		      opieprompt, pwok ? "requested" : "required", name);
 	} else {
 		pwok = 1;
 		reply(331, "Password required for %s.", name);
 	}
 	askpasswd = 1;
 	/*
 	 * Delay before reading passwd after first failed
 	 * attempt to slow down passwd-guessing programs.
 	 */
 	if (login_attempts)
 		sleep(login_attempts);
 }
 
 /*
  * Check if a user is in the file "fname",
  * return a pointer to a malloc'd string with the rest
  * of the matching line in "residue" if not NULL.
  */
 static int
 checkuser(char *fname, char *name, int pwset, char **residue, int *ecode)
 {
 	FILE *fd;
 	int found = 0;
 	size_t len;
 	char *line, *mp, *p;
 
 	if (ecode != NULL)
 		*ecode = 0;
 	if ((fd = fopen(fname, "r")) != NULL) {
 		while (!found && (line = fgetln(fd, &len)) != NULL) {
 			/* skip comments */
 			if (line[0] == '#')
 				continue;
 			if (line[len - 1] == '\n') {
 				line[len - 1] = '\0';
 				mp = NULL;
 			} else {
 				if ((mp = malloc(len + 1)) == NULL)
 					fatalerror("Ran out of memory.");
 				memcpy(mp, line, len);
 				mp[len] = '\0';
 				line = mp;
 			}
 			/* avoid possible leading and trailing whitespace */
 			p = strtok(line, " \t");
 			/* skip empty lines */
 			if (p == NULL)
 				goto nextline;
 			/*
 			 * if first chr is '@', check group membership
 			 */
 			if (p[0] == '@') {
 				int i = 0;
 				struct group *grp;
 
 				if (p[1] == '\0') /* single @ matches anyone */
 					found = 1;
 				else {
 					if ((grp = getgrnam(p+1)) == NULL)
 						goto nextline;
 					/*
 					 * Check user's default group
 					 */
 					if (pwset && grp->gr_gid == pw->pw_gid)
 						found = 1;
 					/*
 					 * Check supplementary groups
 					 */
 					while (!found && grp->gr_mem[i])
 						found = strcmp(name,
 							grp->gr_mem[i++])
 							== 0;
 				}
 			}
 			/*
 			 * Otherwise, just check for username match
 			 */
 			else
 				found = strcmp(p, name) == 0;
 			/*
 			 * Save the rest of line to "residue" if matched
 			 */
 			if (found && residue) {
 				if ((p = strtok(NULL, "")) != NULL)
 					p += strspn(p, " \t");
 				if (p && *p) {
 				 	if ((*residue = strdup(p)) == NULL)
 						fatalerror("Ran out of memory.");
 				} else
 					*residue = NULL;
 			}
 nextline:
 			if (mp)
 				free(mp);
 		}
 		(void) fclose(fd);
 	} else if (ecode != NULL)
 		*ecode = errno;
 	return (found);
 }
 
 /*
  * Terminate login as previous user, if any, resetting state;
  * used when USER command is given or login fails.
  */
 static void
 end_login(void)
 {
 #ifdef USE_PAM
 	int e;
 #endif
 
 	(void) seteuid(0);
 	if (logged_in && dowtmp)
 		ftpd_logwtmp(wtmpid, NULL, NULL);
 	pw = NULL;
 #ifdef	LOGIN_CAP
 	setusercontext(NULL, getpwuid(0), 0, LOGIN_SETALL & ~(LOGIN_SETLOGIN |
 		       LOGIN_SETUSER | LOGIN_SETGROUP | LOGIN_SETPATH |
 		       LOGIN_SETENV));
 #endif
 #ifdef USE_PAM
 	if (pamh) {
 		if ((e = pam_setcred(pamh, PAM_DELETE_CRED)) != PAM_SUCCESS)
 			syslog(LOG_ERR, "pam_setcred: %s", pam_strerror(pamh, e));
 		if ((e = pam_close_session(pamh,0)) != PAM_SUCCESS)
 			syslog(LOG_ERR, "pam_close_session: %s", pam_strerror(pamh, e));
 		if ((e = pam_end(pamh, e)) != PAM_SUCCESS)
 			syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e));
 		pamh = NULL;
 	}
 #endif
 	logged_in = 0;
 	guest = 0;
 	dochroot = 0;
 }
 
 #ifdef USE_PAM
 
 /*
  * the following code is stolen from imap-uw PAM authentication module and
  * login.c
  */
 #define COPY_STRING(s) (s ? strdup(s) : NULL)
 
 struct cred_t {
 	const char *uname;		/* user name */
 	const char *pass;		/* password */
 };
 typedef struct cred_t cred_t;
 
 static int
 auth_conv(int num_msg, const struct pam_message **msg,
 	  struct pam_response **resp, void *appdata)
 {
 	int i;
 	cred_t *cred = (cred_t *) appdata;
 	struct pam_response *reply;
 
 	reply = calloc(num_msg, sizeof *reply);
 	if (reply == NULL)
 		return PAM_BUF_ERR;
 
 	for (i = 0; i < num_msg; i++) {
 		switch (msg[i]->msg_style) {
 		case PAM_PROMPT_ECHO_ON:	/* assume want user name */
 			reply[i].resp_retcode = PAM_SUCCESS;
 			reply[i].resp = COPY_STRING(cred->uname);
 			/* PAM frees resp. */
 			break;
 		case PAM_PROMPT_ECHO_OFF:	/* assume want password */
 			reply[i].resp_retcode = PAM_SUCCESS;
 			reply[i].resp = COPY_STRING(cred->pass);
 			/* PAM frees resp. */
 			break;
 		case PAM_TEXT_INFO:
 		case PAM_ERROR_MSG:
 			reply[i].resp_retcode = PAM_SUCCESS;
 			reply[i].resp = NULL;
 			break;
 		default:			/* unknown message style */
 			free(reply);
 			return PAM_CONV_ERR;
 		}
 	}
 
 	*resp = reply;
 	return PAM_SUCCESS;
 }
 
 /*
  * Attempt to authenticate the user using PAM.  Returns 0 if the user is
  * authenticated, or 1 if not authenticated.  If some sort of PAM system
  * error occurs (e.g., the "/etc/pam.conf" file is missing) then this
  * function returns -1.  This can be used as an indication that we should
  * fall back to a different authentication mechanism.
  */
 static int
 auth_pam(struct passwd **ppw, const char *pass)
 {
 	const char *tmpl_user;
 	const void *item;
 	int rval;
 	int e;
 	cred_t auth_cred = { (*ppw)->pw_name, pass };
 	struct pam_conv conv = { &auth_conv, &auth_cred };
 
 	e = pam_start("ftpd", (*ppw)->pw_name, &conv, &pamh);
 	if (e != PAM_SUCCESS) {
 		/*
 		 * In OpenPAM, it's OK to pass NULL to pam_strerror()
 		 * if context creation has failed in the first place.
 		 */
 		syslog(LOG_ERR, "pam_start: %s", pam_strerror(NULL, e));
 		return -1;
 	}
 
 	e = pam_set_item(pamh, PAM_RHOST, remotehost);
 	if (e != PAM_SUCCESS) {
 		syslog(LOG_ERR, "pam_set_item(PAM_RHOST): %s",
 			pam_strerror(pamh, e));
 		if ((e = pam_end(pamh, e)) != PAM_SUCCESS) {
 			syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e));
 		}
 		pamh = NULL;
 		return -1;
 	}
 
 	e = pam_authenticate(pamh, 0);
 	switch (e) {
 	case PAM_SUCCESS:
 		/*
 		 * With PAM we support the concept of a "template"
 		 * user.  The user enters a login name which is
 		 * authenticated by PAM, usually via a remote service
 		 * such as RADIUS or TACACS+.  If authentication
 		 * succeeds, a different but related "template" name
 		 * is used for setting the credentials, shell, and
 		 * home directory.  The name the user enters need only
 		 * exist on the remote authentication server, but the
 		 * template name must be present in the local password
 		 * database.
 		 *
 		 * This is supported by two various mechanisms in the
 		 * individual modules.  However, from the application's
 		 * point of view, the template user is always passed
 		 * back as a changed value of the PAM_USER item.
 		 */
 		if ((e = pam_get_item(pamh, PAM_USER, &item)) ==
 		    PAM_SUCCESS) {
 			tmpl_user = (const char *) item;
 			if (strcmp((*ppw)->pw_name, tmpl_user) != 0)
 				*ppw = getpwnam(tmpl_user);
 		} else
 			syslog(LOG_ERR, "Couldn't get PAM_USER: %s",
 			    pam_strerror(pamh, e));
 		rval = 0;
 		break;
 
 	case PAM_AUTH_ERR:
 	case PAM_USER_UNKNOWN:
 	case PAM_MAXTRIES:
 		rval = 1;
 		break;
 
 	default:
 		syslog(LOG_ERR, "pam_authenticate: %s", pam_strerror(pamh, e));
 		rval = -1;
 		break;
 	}
 
 	if (rval == 0) {
 		e = pam_acct_mgmt(pamh, 0);
 		if (e != PAM_SUCCESS) {
 			syslog(LOG_ERR, "pam_acct_mgmt: %s",
 						pam_strerror(pamh, e));
 			rval = 1;
 		}
 	}
 
 	if (rval != 0) {
 		if ((e = pam_end(pamh, e)) != PAM_SUCCESS) {
 			syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e));
 		}
 		pamh = NULL;
 	}
 	return rval;
 }
 
 #endif /* USE_PAM */
 
 void
 pass(char *passwd)
 {
 	int rval, ecode;
 	FILE *fd;
 #ifdef	LOGIN_CAP
 	login_cap_t *lc = NULL;
 #endif
 #ifdef USE_PAM
 	int e;
 #endif
 	char *residue = NULL;
 	char *xpasswd;
 
 	if (logged_in || askpasswd == 0) {
 		reply(503, "Login with USER first.");
 		return;
 	}
 	askpasswd = 0;
 	if (!guest) {		/* "ftp" is only account allowed no password */
 		if (pw == NULL) {
 			rval = 1;	/* failure below */
 			goto skip;
 		}
 #ifdef USE_PAM
 		rval = auth_pam(&pw, passwd);
 		if (rval >= 0) {
 			opieunlock();
 			goto skip;
 		}
 #endif
 		if (opieverify(&opiedata, passwd) == 0)
 			xpasswd = pw->pw_passwd;
 		else if (pwok) {
 			xpasswd = crypt(passwd, pw->pw_passwd);
 			if (passwd[0] == '\0' && pw->pw_passwd[0] != '\0')
 				xpasswd = ":";
 		} else {
 			rval = 1;
 			goto skip;
 		}
 		rval = strcmp(pw->pw_passwd, xpasswd);
 		if (pw->pw_expire && time(NULL) >= pw->pw_expire)
 			rval = 1;	/* failure */
 skip:
 		/*
 		 * If rval == 1, the user failed the authentication check
 		 * above.  If rval == 0, either PAM or local authentication
 		 * succeeded.
 		 */
 		if (rval) {
 			reply(530, "Login incorrect.");
 #ifdef USE_BLACKLIST
-			blacklist_notify(1, 0, "Login incorrect");
+			blacklist_notify(1, STDIN_FILENO, "Login incorrect");
 #endif
 			if (logging) {
 				syslog(LOG_NOTICE,
 				    "FTP LOGIN FAILED FROM %s",
 				    remotehost);
 				syslog(LOG_AUTHPRIV | LOG_NOTICE,
 				    "FTP LOGIN FAILED FROM %s, %s",
 				    remotehost, curname);
 			}
 			pw = NULL;
 			if (login_attempts++ >= 5) {
 				syslog(LOG_NOTICE,
 				    "repeated login failures from %s",
 				    remotehost);
 				exit(0);
 			}
 			return;
 		}
 #ifdef USE_BLACKLIST
 		 else {
-			blacklist_notify(0, 0, "Login successful");
+			blacklist_notify(0, STDIN_FILENO, "Login successful");
 		}
 #endif
 	}
 	login_attempts = 0;		/* this time successful */
 	if (setegid(pw->pw_gid) < 0) {
 		reply(550, "Can't set gid.");
 		return;
 	}
 	/* May be overridden by login.conf */
 	(void) umask(defumask);
 #ifdef	LOGIN_CAP
 	if ((lc = login_getpwclass(pw)) != NULL) {
 		char	remote_ip[NI_MAXHOST];
 
 		if (getnameinfo((struct sockaddr *)&his_addr, his_addr.su_len,
 			remote_ip, sizeof(remote_ip) - 1, NULL, 0,
 			NI_NUMERICHOST))
 				*remote_ip = 0;
 		remote_ip[sizeof(remote_ip) - 1] = 0;
 		if (!auth_hostok(lc, remotehost, remote_ip)) {
 			syslog(LOG_INFO|LOG_AUTH,
 			    "FTP LOGIN FAILED (HOST) as %s: permission denied.",
 			    pw->pw_name);
 			reply(530, "Permission denied.");
 			pw = NULL;
 			return;
 		}
 		if (!auth_timeok(lc, time(NULL))) {
 			reply(530, "Login not available right now.");
 			pw = NULL;
 			return;
 		}
 	}
 	setusercontext(lc, pw, 0, LOGIN_SETALL &
 		       ~(LOGIN_SETUSER | LOGIN_SETPATH | LOGIN_SETENV));
 #else
 	setlogin(pw->pw_name);
 	(void) initgroups(pw->pw_name, pw->pw_gid);
 #endif
 
 #ifdef USE_PAM
 	if (pamh) {
 		if ((e = pam_open_session(pamh, 0)) != PAM_SUCCESS) {
 			syslog(LOG_ERR, "pam_open_session: %s", pam_strerror(pamh, e));
 		} else if ((e = pam_setcred(pamh, PAM_ESTABLISH_CRED)) != PAM_SUCCESS) {
 			syslog(LOG_ERR, "pam_setcred: %s", pam_strerror(pamh, e));
 		}
 	}
 #endif
 
 	dochroot =
 		checkuser(_PATH_FTPCHROOT, pw->pw_name, 1, &residue, &ecode)
 #ifdef	LOGIN_CAP	/* Allow login.conf configuration as well */
 		|| login_getcapbool(lc, "ftp-chroot", 0)
 #endif
 	;
 	/*
 	 * It is possible that checkuser() failed to open the chroot file.
 	 * If this is the case, report that logins are un-available, since we
 	 * have no way of checking whether or not the user should be chrooted.
 	 * We ignore ENOENT since it is not required that this file be present.
 	 */
 	if (ecode != 0 && ecode != ENOENT) {
 		reply(530, "Login not available right now.");
 		return;
 	}
 	chrootdir = NULL;
 
 	/* Disable wtmp logging when chrooting. */
 	if (dochroot || guest)
 		dowtmp = 0;
 	if (dowtmp)
 		ftpd_logwtmp(wtmpid, pw->pw_name,
 		    (struct sockaddr *)&his_addr);
 	logged_in = 1;
 
 	if (guest && stats && statfd < 0)
 #ifdef VIRTUAL_HOSTING
 		statfd = open(thishost->statfile, O_WRONLY|O_APPEND);
 #else
 		statfd = open(_PATH_FTPDSTATFILE, O_WRONLY|O_APPEND);
 #endif
 		if (statfd < 0)
 			stats = 0;
 
 	/*
 	 * For a chrooted local user,
 	 * a) see whether ftpchroot(5) specifies a chroot directory,
 	 * b) extract the directory pathname from the line,
 	 * c) expand it to the absolute pathname if necessary.
 	 */
 	if (dochroot && residue &&
 	    (chrootdir = strtok(residue, " \t")) != NULL) {
 		if (chrootdir[0] != '/')
 			asprintf(&chrootdir, "%s/%s", pw->pw_dir, chrootdir);
 		else
 			chrootdir = strdup(chrootdir); /* make it permanent */
 		if (chrootdir == NULL)
 			fatalerror("Ran out of memory.");
 	}
 	if (guest || dochroot) {
 		/*
 		 * If no chroot directory set yet, use the login directory.
 		 * Copy it so it can be modified while pw->pw_dir stays intact.
 		 */
 		if (chrootdir == NULL &&
 		    (chrootdir = strdup(pw->pw_dir)) == NULL)
 			fatalerror("Ran out of memory.");
 		/*
 		 * Check for the "/chroot/./home" syntax,
 		 * separate the chroot and home directory pathnames.
 		 */
 		if ((homedir = strstr(chrootdir, "/./")) != NULL) {
 			*(homedir++) = '\0';	/* wipe '/' */
 			homedir++;		/* skip '.' */
 		} else {
 			/*
 			 * We MUST do a chdir() after the chroot. Otherwise
 			 * the old current directory will be accessible as "."
 			 * outside the new root!
 			 */
 			homedir = "/";
 		}
 		/*
 		 * Finally, do chroot()
 		 */
 		if (chroot(chrootdir) < 0) {
 			reply(550, "Can't change root.");
 			goto bad;
 		}
 		__FreeBSD_libc_enter_restricted_mode();
 	} else	/* real user w/o chroot */
 		homedir = pw->pw_dir;
 	/*
 	 * Set euid *before* doing chdir() so
 	 * a) the user won't be carried to a directory that he couldn't reach
 	 *    on his own due to no permission to upper path components,
 	 * b) NFS mounted homedirs w/restrictive permissions will be accessible
 	 *    (uid 0 has no root power over NFS if not mapped explicitly.)
 	 */
 	if (seteuid(pw->pw_uid) < 0) {
 		reply(550, "Can't set uid.");
 		goto bad;
 	}
 	if (chdir(homedir) < 0) {
 		if (guest || dochroot) {
 			reply(550, "Can't change to base directory.");
 			goto bad;
 		} else {
 			if (chdir("/") < 0) {
 				reply(550, "Root is inaccessible.");
 				goto bad;
 			}
 			lreply(230, "No directory! Logging in with home=/.");
 		}
 	}
 
 	/*
 	 * Display a login message, if it exists.
 	 * N.B. reply(230,) must follow the message.
 	 */
 #ifdef VIRTUAL_HOSTING
 	fd = fopen(thishost->loginmsg, "r");
 #else
 	fd = fopen(_PATH_FTPLOGINMESG, "r");
 #endif
 	if (fd != NULL) {
 		char *cp, line[LINE_MAX];
 
 		while (fgets(line, sizeof(line), fd) != NULL) {
 			if ((cp = strchr(line, '\n')) != NULL)
 				*cp = '\0';
 			lreply(230, "%s", line);
 		}
 		(void) fflush(stdout);
 		(void) fclose(fd);
 	}
 	if (guest) {
 		if (ident != NULL)
 			free(ident);
 		ident = strdup(passwd);
 		if (ident == NULL)
 			fatalerror("Ran out of memory.");
 
 		reply(230, "Guest login ok, access restrictions apply.");
 #ifdef SETPROCTITLE
 #ifdef VIRTUAL_HOSTING
 		if (thishost != firsthost)
 			snprintf(proctitle, sizeof(proctitle),
 				 "%s: anonymous(%s)/%s", remotehost, hostname,
 				 passwd);
 		else
 #endif
 			snprintf(proctitle, sizeof(proctitle),
 				 "%s: anonymous/%s", remotehost, passwd);
 		setproctitle("%s", proctitle);
 #endif /* SETPROCTITLE */
 		if (logging)
 			syslog(LOG_INFO, "ANONYMOUS FTP LOGIN FROM %s, %s",
 			    remotehost, passwd);
 	} else {
 		if (dochroot)
 			reply(230, "User %s logged in, "
 				   "access restrictions apply.", pw->pw_name);
 		else
 			reply(230, "User %s logged in.", pw->pw_name);
 
 #ifdef SETPROCTITLE
 		snprintf(proctitle, sizeof(proctitle),
 			 "%s: user/%s", remotehost, pw->pw_name);
 		setproctitle("%s", proctitle);
 #endif /* SETPROCTITLE */
 		if (logging)
 			syslog(LOG_INFO, "FTP LOGIN FROM %s as %s",
 			    remotehost, pw->pw_name);
 	}
 	if (logging && (guest || dochroot))
 		syslog(LOG_INFO, "session root changed to %s", chrootdir);
 #ifdef	LOGIN_CAP
 	login_close(lc);
 #endif
 	if (residue)
 		free(residue);
 	return;
 bad:
 	/* Forget all about it... */
 #ifdef	LOGIN_CAP
 	login_close(lc);
 #endif
 	if (residue)
 		free(residue);
 	end_login();
 }
 
 void
 retrieve(char *cmd, char *name)
 {
 	FILE *fin, *dout;
 	struct stat st;
 	int (*closefunc)(FILE *);
 	time_t start;
 	char line[BUFSIZ];
 
 	if (cmd == 0) {
 		fin = fopen(name, "r"), closefunc = fclose;
 		st.st_size = 0;
 	} else {
 		(void) snprintf(line, sizeof(line), cmd, name);
 		name = line;
 		fin = ftpd_popen(line, "r"), closefunc = ftpd_pclose;
 		st.st_size = -1;
 		st.st_blksize = BUFSIZ;
 	}
 	if (fin == NULL) {
 		if (errno != 0) {
 			perror_reply(550, name);
 			if (cmd == 0) {
 				LOGCMD("get", name);
 			}
 		}
 		return;
 	}
 	byte_count = -1;
 	if (cmd == 0) {
 		if (fstat(fileno(fin), &st) < 0) {
 			perror_reply(550, name);
 			goto done;
 		}
 		if (!S_ISREG(st.st_mode)) {
 			/*
 			 * Never sending a raw directory is a workaround
 			 * for buggy clients that will attempt to RETR
 			 * a directory before listing it, e.g., Mozilla.
 			 * Preventing a guest from getting irregular files
 			 * is a simple security measure.
 			 */
 			if (S_ISDIR(st.st_mode) || guest) {
 				reply(550, "%s: not a plain file.", name);
 				goto done;
 			}
 			st.st_size = -1;
 			/* st.st_blksize is set for all descriptor types */
 		}
 	}
 	if (restart_point) {
 		if (type == TYPE_A) {
 			off_t i, n;
 			int c;
 
 			n = restart_point;
 			i = 0;
 			while (i++ < n) {
 				if ((c=getc(fin)) == EOF) {
 					perror_reply(550, name);
 					goto done;
 				}
 				if (c == '\n')
 					i++;
 			}
 		} else if (lseek(fileno(fin), restart_point, L_SET) < 0) {
 			perror_reply(550, name);
 			goto done;
 		}
 	}
 	dout = dataconn(name, st.st_size, "w");
 	if (dout == NULL)
 		goto done;
 	time(&start);
 	send_data(fin, dout, st.st_blksize, st.st_size,
 		  restart_point == 0 && cmd == 0 && S_ISREG(st.st_mode));
 	if (cmd == 0 && guest && stats && byte_count > 0)
 		logxfer(name, byte_count, start);
 	(void) fclose(dout);
 	data = -1;
 	pdata = -1;
 done:
 	if (cmd == 0)
 		LOGBYTES("get", name, byte_count);
 	(*closefunc)(fin);
 }
 
 void
 store(char *name, char *mode, int unique)
 {
 	int fd;
 	FILE *fout, *din;
 	int (*closefunc)(FILE *);
 
 	if (*mode == 'a') {		/* APPE */
 		if (unique) {
 			/* Programming error */
 			syslog(LOG_ERR, "Internal: unique flag to APPE");
 			unique = 0;
 		}
 		if (guest && noguestmod) {
 			reply(550, "Appending to existing file denied.");
 			goto err;
 		}
 		restart_point = 0;	/* not affected by preceding REST */
 	}
 	if (unique)			/* STOU overrides REST */
 		restart_point = 0;
 	if (guest && noguestmod) {
 		if (restart_point) {	/* guest STOR w/REST */
 			reply(550, "Modifying existing file denied.");
 			goto err;
 		} else			/* treat guest STOR as STOU */
 			unique = 1;
 	}
 
 	if (restart_point)
 		mode = "r+";	/* so ASCII manual seek can work */
 	if (unique) {
 		if ((fd = guniquefd(name, &name)) < 0)
 			goto err;
 		fout = fdopen(fd, mode);
 	} else
 		fout = fopen(name, mode);
 	closefunc = fclose;
 	if (fout == NULL) {
 		perror_reply(553, name);
 		goto err;
 	}
 	byte_count = -1;
 	if (restart_point) {
 		if (type == TYPE_A) {
 			off_t i, n;
 			int c;
 
 			n = restart_point;
 			i = 0;
 			while (i++ < n) {
 				if ((c=getc(fout)) == EOF) {
 					perror_reply(550, name);
 					goto done;
 				}
 				if (c == '\n')
 					i++;
 			}
 			/*
 			 * We must do this seek to "current" position
 			 * because we are changing from reading to
 			 * writing.
 			 */
 			if (fseeko(fout, 0, SEEK_CUR) < 0) {
 				perror_reply(550, name);
 				goto done;
 			}
 		} else if (lseek(fileno(fout), restart_point, L_SET) < 0) {
 			perror_reply(550, name);
 			goto done;
 		}
 	}
 	din = dataconn(name, -1, "r");
 	if (din == NULL)
 		goto done;
 	if (receive_data(din, fout) == 0) {
 		if (unique)
 			reply(226, "Transfer complete (unique file name:%s).",
 			    name);
 		else
 			reply(226, "Transfer complete.");
 	}
 	(void) fclose(din);
 	data = -1;
 	pdata = -1;
 done:
 	LOGBYTES(*mode == 'a' ? "append" : "put", name, byte_count);
 	(*closefunc)(fout);
 	return;
 err:
 	LOGCMD(*mode == 'a' ? "append" : "put" , name);
 	return;
 }
 
 static FILE *
 getdatasock(char *mode)
 {
 	int on = 1, s, t, tries;
 
 	if (data >= 0)
 		return (fdopen(data, mode));
 
 	s = socket(data_dest.su_family, SOCK_STREAM, 0);
 	if (s < 0)
 		goto bad;
 	if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
 		syslog(LOG_WARNING, "data setsockopt (SO_REUSEADDR): %m");
 	/* anchor socket to avoid multi-homing problems */
 	data_source = ctrl_addr;
 	data_source.su_port = htons(dataport);
 	(void) seteuid(0);
 	for (tries = 1; ; tries++) {
 		/*
 		 * We should loop here since it's possible that
 		 * another ftpd instance has passed this point and is
 		 * trying to open a data connection in active mode now.
 		 * Until the other connection is opened, we'll be getting
 		 * EADDRINUSE because no SOCK_STREAM sockets in the system
 		 * can share both local and remote addresses, localIP:20
 		 * and *:* in this case.
 		 */
 		if (bind(s, (struct sockaddr *)&data_source,
 		    data_source.su_len) >= 0)
 			break;
 		if (errno != EADDRINUSE || tries > 10)
 			goto bad;
 		sleep(tries);
 	}
 	(void) seteuid(pw->pw_uid);
 #ifdef IP_TOS
 	if (data_source.su_family == AF_INET)
       {
 	on = IPTOS_THROUGHPUT;
 	if (setsockopt(s, IPPROTO_IP, IP_TOS, &on, sizeof(int)) < 0)
 		syslog(LOG_WARNING, "data setsockopt (IP_TOS): %m");
       }
 #endif
 #ifdef TCP_NOPUSH
 	/*
 	 * Turn off push flag to keep sender TCP from sending short packets
 	 * at the boundaries of each write().
 	 */
 	on = 1;
 	if (setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &on, sizeof on) < 0)
 		syslog(LOG_WARNING, "data setsockopt (TCP_NOPUSH): %m");
 #endif
 	return (fdopen(s, mode));
 bad:
 	/* Return the real value of errno (close may change it) */
 	t = errno;
 	(void) seteuid(pw->pw_uid);
 	(void) close(s);
 	errno = t;
 	return (NULL);
 }
 
 static FILE *
 dataconn(char *name, off_t size, char *mode)
 {
 	char sizebuf[32];
 	FILE *file;
 	int retry = 0, tos, conerrno;
 
 	file_size = size;
 	byte_count = 0;
 	if (size != -1)
 		(void) snprintf(sizebuf, sizeof(sizebuf),
 				" (%jd bytes)", (intmax_t)size);
 	else
 		*sizebuf = '\0';
 	if (pdata >= 0) {
 		union sockunion from;
 		socklen_t fromlen = ctrl_addr.su_len;
 		int flags, s;
 		struct timeval timeout;
 		fd_set set;
 
 		FD_ZERO(&set);
 		FD_SET(pdata, &set);
 
 		timeout.tv_usec = 0;
 		timeout.tv_sec = 120;
 
 		/*
 		 * Granted a socket is in the blocking I/O mode,
 		 * accept() will block after a successful select()
 		 * if the selected connection dies in between.
 		 * Therefore set the non-blocking I/O flag here.
 		 */
 		if ((flags = fcntl(pdata, F_GETFL, 0)) == -1 ||
 		    fcntl(pdata, F_SETFL, flags | O_NONBLOCK) == -1)
 			goto pdata_err;
 		if (select(pdata+1, &set, NULL, NULL, &timeout) <= 0 ||
 		    (s = accept(pdata, (struct sockaddr *) &from, &fromlen)) < 0)
 			goto pdata_err;
 		(void) close(pdata);
 		pdata = s;
 		/*
 		 * Unset the inherited non-blocking I/O flag
 		 * on the child socket so stdio can work on it.
 		 */
 		if ((flags = fcntl(pdata, F_GETFL, 0)) == -1 ||
 		    fcntl(pdata, F_SETFL, flags & ~O_NONBLOCK) == -1)
 			goto pdata_err;
 #ifdef IP_TOS
 		if (from.su_family == AF_INET)
 	      {
 		tos = IPTOS_THROUGHPUT;
 		if (setsockopt(s, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0)
 			syslog(LOG_WARNING, "pdata setsockopt (IP_TOS): %m");
 	      }
 #endif
 		reply(150, "Opening %s mode data connection for '%s'%s.",
 		     type == TYPE_A ? "ASCII" : "BINARY", name, sizebuf);
 		return (fdopen(pdata, mode));
 pdata_err:
 		reply(425, "Can't open data connection.");
 		(void) close(pdata);
 		pdata = -1;
 		return (NULL);
 	}
 	if (data >= 0) {
 		reply(125, "Using existing data connection for '%s'%s.",
 		    name, sizebuf);
 		usedefault = 1;
 		return (fdopen(data, mode));
 	}
 	if (usedefault)
 		data_dest = his_addr;
 	usedefault = 1;
 	do {
 		file = getdatasock(mode);
 		if (file == NULL) {
 			char hostbuf[NI_MAXHOST], portbuf[NI_MAXSERV];
 
 			if (getnameinfo((struct sockaddr *)&data_source,
 				data_source.su_len,
 				hostbuf, sizeof(hostbuf) - 1,
 				portbuf, sizeof(portbuf) - 1,
 				NI_NUMERICHOST|NI_NUMERICSERV))
 					*hostbuf = *portbuf = 0;
 			hostbuf[sizeof(hostbuf) - 1] = 0;
 			portbuf[sizeof(portbuf) - 1] = 0;
 			reply(425, "Can't create data socket (%s,%s): %s.",
 				hostbuf, portbuf, strerror(errno));
 			return (NULL);
 		}
 		data = fileno(file);
 		conerrno = 0;
 		if (connect(data, (struct sockaddr *)&data_dest,
 		    data_dest.su_len) == 0)
 			break;
 		conerrno = errno;
 		(void) fclose(file);
 		data = -1;
 		if (conerrno == EADDRINUSE) {
 			sleep(swaitint);
 			retry += swaitint;
 		} else {
 			break;
 		}
 	} while (retry <= swaitmax);
 	if (conerrno != 0) {
 		reply(425, "Can't build data connection: %s.",
 			   strerror(conerrno));
 		return (NULL);
 	}
 	reply(150, "Opening %s mode data connection for '%s'%s.",
 	     type == TYPE_A ? "ASCII" : "BINARY", name, sizebuf);
 	return (file);
 }
 
 /*
  * A helper macro to avoid code duplication
  * in send_data() and receive_data().
  *
  * XXX We have to block SIGURG during putc() because BSD stdio
  * is unable to restart interrupted write operations and hence
  * the entire buffer contents will be lost as soon as a write()
  * call indicates EINTR to stdio.
  */
 #define FTPD_PUTC(ch, file, label)					\
 	do {								\
 		int ret;						\
 									\
 		do {							\
 			START_UNSAFE;					\
 			ret = putc((ch), (file));			\
 			END_UNSAFE;					\
 			CHECKOOB(return (-1))				\
 			else if (ferror(file))				\
 				goto label;				\
 			clearerr(file);					\
 		} while (ret == EOF);					\
 	} while (0)
 
 /*
  * Transfer the contents of "instr" to "outstr" peer using the appropriate
  * encapsulation of the data subject to Mode, Structure, and Type.
  *
  * NB: Form isn't handled.
  */
 static int
 send_data(FILE *instr, FILE *outstr, size_t blksize, off_t filesize, int isreg)
 {
 	int c, cp, filefd, netfd;
 	char *buf;
 
 	STARTXFER;
 
 	switch (type) {
 
 	case TYPE_A:
 		cp = EOF;
 		for (;;) {
 			c = getc(instr);
 			CHECKOOB(return (-1))
 			else if (c == EOF && ferror(instr))
 				goto file_err;
 			if (c == EOF) {
 				if (ferror(instr)) {	/* resume after OOB */
 					clearerr(instr);
 					continue;
 				}
 				if (feof(instr))	/* EOF */
 					break;
 				syslog(LOG_ERR, "Internal: impossible condition"
 						" on file after getc()");
 				goto file_err;
 			}
 			if (c == '\n' && cp != '\r') {
 				FTPD_PUTC('\r', outstr, data_err);
 				byte_count++;
 			}
 			FTPD_PUTC(c, outstr, data_err);
 			byte_count++;
 			cp = c;
 		}
 #ifdef notyet	/* BSD stdio isn't ready for that */
 		while (fflush(outstr) == EOF) {
 			CHECKOOB(return (-1))
 			else
 				goto data_err;
 			clearerr(outstr);
 		}
 		ENDXFER;
 #else
 		ENDXFER;
 		if (fflush(outstr) == EOF)
 			goto data_err;
 #endif
 		reply(226, "Transfer complete.");
 		return (0);
 
 	case TYPE_I:
 	case TYPE_L:
 		/*
 		 * isreg is only set if we are not doing restart and we
 		 * are sending a regular file
 		 */
 		netfd = fileno(outstr);
 		filefd = fileno(instr);
 
 		if (isreg) {
 			char *msg = "Transfer complete.";
 			off_t cnt, offset;
 			int err;
 
 			cnt = offset = 0;
 
 			while (filesize > 0) {
 				err = sendfile(filefd, netfd, offset, 0,
 					       NULL, &cnt, 0);
 				/*
 				 * Calculate byte_count before OOB processing.
 				 * It can be used in myoob() later.
 				 */
 				byte_count += cnt;
 				offset += cnt;
 				filesize -= cnt;
 				CHECKOOB(return (-1))
 				else if (err == -1) {
 					if (errno != EINTR &&
 					    cnt == 0 && offset == 0)
 						goto oldway;
 					goto data_err;
 				}
 				if (err == -1)	/* resume after OOB */
 					continue;
 				/*
 				 * We hit the EOF prematurely.
 				 * Perhaps the file was externally truncated.
 				 */
 				if (cnt == 0) {
 					msg = "Transfer finished due to "
 					      "premature end of file.";
 					break;
 				}
 			}
 			ENDXFER;
 			reply(226, "%s", msg);
 			return (0);
 		}
 
 oldway:
 		if ((buf = malloc(blksize)) == NULL) {
 			ENDXFER;
 			reply(451, "Ran out of memory.");
 			return (-1);
 		}
 
 		for (;;) {
 			int cnt, len;
 			char *bp;
 
 			cnt = read(filefd, buf, blksize);
 			CHECKOOB(free(buf); return (-1))
 			else if (cnt < 0) {
 				free(buf);
 				goto file_err;
 			}
 			if (cnt < 0)	/* resume after OOB */
 				continue;
 			if (cnt == 0)	/* EOF */
 				break;
 			for (len = cnt, bp = buf; len > 0;) {
 				cnt = write(netfd, bp, len);
 				CHECKOOB(free(buf); return (-1))
 				else if (cnt < 0) {
 					free(buf);
 					goto data_err;
 				}
 				if (cnt <= 0)
 					continue;
 				len -= cnt;
 				bp += cnt;
 				byte_count += cnt;
 			}
 		}
 		ENDXFER;
 		free(buf);
 		reply(226, "Transfer complete.");
 		return (0);
 	default:
 		ENDXFER;
 		reply(550, "Unimplemented TYPE %d in send_data.", type);
 		return (-1);
 	}
 
 data_err:
 	ENDXFER;
 	perror_reply(426, "Data connection");
 	return (-1);
 
 file_err:
 	ENDXFER;
 	perror_reply(551, "Error on input file");
 	return (-1);
 }
 
 /*
  * Transfer data from peer to "outstr" using the appropriate encapulation of
  * the data subject to Mode, Structure, and Type.
  *
  * N.B.: Form isn't handled.
  */
 static int
 receive_data(FILE *instr, FILE *outstr)
 {
 	int c, cp;
 	int bare_lfs = 0;
 
 	STARTXFER;
 
 	switch (type) {
 
 	case TYPE_I:
 	case TYPE_L:
 		for (;;) {
 			int cnt, len;
 			char *bp;
 			char buf[BUFSIZ];
 
 			cnt = read(fileno(instr), buf, sizeof(buf));
 			CHECKOOB(return (-1))
 			else if (cnt < 0)
 				goto data_err;
 			if (cnt < 0)	/* resume after OOB */
 				continue;
 			if (cnt == 0)	/* EOF */
 				break;
 			for (len = cnt, bp = buf; len > 0;) {
 				cnt = write(fileno(outstr), bp, len);
 				CHECKOOB(return (-1))
 				else if (cnt < 0)
 					goto file_err;
 				if (cnt <= 0)
 					continue;
 				len -= cnt;
 				bp += cnt;
 				byte_count += cnt;
 			}
 		}
 		ENDXFER;
 		return (0);
 
 	case TYPE_E:
 		ENDXFER;
 		reply(553, "TYPE E not implemented.");
 		return (-1);
 
 	case TYPE_A:
 		cp = EOF;
 		for (;;) {
 			c = getc(instr);
 			CHECKOOB(return (-1))
 			else if (c == EOF && ferror(instr))
 				goto data_err;
 			if (c == EOF && ferror(instr)) { /* resume after OOB */
 				clearerr(instr);
 				continue;
 			}
 
 			if (cp == '\r') {
 				if (c != '\n')
 					FTPD_PUTC('\r', outstr, file_err);
 			} else
 				if (c == '\n')
 					bare_lfs++;
 			if (c == '\r') {
 				byte_count++;
 				cp = c;
 				continue;
 			}
 
 			/* Check for EOF here in order not to lose last \r. */
 			if (c == EOF) {
 				if (feof(instr))	/* EOF */
 					break;
 				syslog(LOG_ERR, "Internal: impossible condition"
 						" on data stream after getc()");
 				goto data_err;
 			}
 
 			byte_count++;
 			FTPD_PUTC(c, outstr, file_err);
 			cp = c;
 		}
 #ifdef notyet	/* BSD stdio isn't ready for that */
 		while (fflush(outstr) == EOF) {
 			CHECKOOB(return (-1))
 			else
 				goto file_err;
 			clearerr(outstr);
 		}
 		ENDXFER;
 #else
 		ENDXFER;
 		if (fflush(outstr) == EOF)
 			goto file_err;
 #endif
 		if (bare_lfs) {
 			lreply(226,
 		"WARNING! %d bare linefeeds received in ASCII mode.",
 			    bare_lfs);
 		(void)printf("   File may not have transferred correctly.\r\n");
 		}
 		return (0);
 	default:
 		ENDXFER;
 		reply(550, "Unimplemented TYPE %d in receive_data.", type);
 		return (-1);
 	}
 
 data_err:
 	ENDXFER;
 	perror_reply(426, "Data connection");
 	return (-1);
 
 file_err:
 	ENDXFER;
 	perror_reply(452, "Error writing to file");
 	return (-1);
 }
 
 void
 statfilecmd(char *filename)
 {
 	FILE *fin;
 	int atstart;
 	int c, code;
 	char line[LINE_MAX];
 	struct stat st;
 
 	code = lstat(filename, &st) == 0 && S_ISDIR(st.st_mode) ? 212 : 213;
 	(void)snprintf(line, sizeof(line), _PATH_LS " -lgA %s", filename);
 	fin = ftpd_popen(line, "r");
 	if (fin == NULL) {
 		perror_reply(551, filename);
 		return;
 	}
 	lreply(code, "Status of %s:", filename);
 	atstart = 1;
 	while ((c = getc(fin)) != EOF) {
 		if (c == '\n') {
 			if (ferror(stdout)){
 				perror_reply(421, "Control connection");
 				(void) ftpd_pclose(fin);
 				dologout(1);
 				/* NOTREACHED */
 			}
 			if (ferror(fin)) {
 				perror_reply(551, filename);
 				(void) ftpd_pclose(fin);
 				return;
 			}
 			(void) putc('\r', stdout);
 		}
 		/*
 		 * RFC 959 says neutral text should be prepended before
 		 * a leading 3-digit number followed by whitespace, but
 		 * many ftp clients can be confused by any leading digits,
 		 * as a matter of fact.
 		 */
 		if (atstart && isdigit(c))
 			(void) putc(' ', stdout);
 		(void) putc(c, stdout);
 		atstart = (c == '\n');
 	}
 	(void) ftpd_pclose(fin);
 	reply(code, "End of status.");
 }
 
 void
 statcmd(void)
 {
 	union sockunion *su;
 	u_char *a, *p;
 	char hname[NI_MAXHOST];
 	int ispassive;
 
 	if (hostinfo) {
 		lreply(211, "%s FTP server status:", hostname);
 		printf("     %s\r\n", version);
 	} else
 		lreply(211, "FTP server status:");
 	printf("     Connected to %s", remotehost);
 	if (!getnameinfo((struct sockaddr *)&his_addr, his_addr.su_len,
 			 hname, sizeof(hname) - 1, NULL, 0, NI_NUMERICHOST)) {
 		hname[sizeof(hname) - 1] = 0;
 		if (strcmp(hname, remotehost) != 0)
 			printf(" (%s)", hname);
 	}
 	printf("\r\n");
 	if (logged_in) {
 		if (guest)
 			printf("     Logged in anonymously\r\n");
 		else
 			printf("     Logged in as %s\r\n", pw->pw_name);
 	} else if (askpasswd)
 		printf("     Waiting for password\r\n");
 	else
 		printf("     Waiting for user name\r\n");
 	printf("     TYPE: %s", typenames[type]);
 	if (type == TYPE_A || type == TYPE_E)
 		printf(", FORM: %s", formnames[form]);
 	if (type == TYPE_L)
 #if CHAR_BIT == 8
 		printf(" %d", CHAR_BIT);
 #else
 		printf(" %d", bytesize);	/* need definition! */
 #endif
 	printf("; STRUcture: %s; transfer MODE: %s\r\n",
 	    strunames[stru], modenames[mode]);
 	if (data != -1)
 		printf("     Data connection open\r\n");
 	else if (pdata != -1) {
 		ispassive = 1;
 		su = &pasv_addr;
 		goto printaddr;
 	} else if (usedefault == 0) {
 		ispassive = 0;
 		su = &data_dest;
 printaddr:
 #define UC(b) (((int) b) & 0xff)
 		if (epsvall) {
 			printf("     EPSV only mode (EPSV ALL)\r\n");
 			goto epsvonly;
 		}
 
 		/* PORT/PASV */
 		if (su->su_family == AF_INET) {
 			a = (u_char *) &su->su_sin.sin_addr;
 			p = (u_char *) &su->su_sin.sin_port;
 			printf("     %s (%d,%d,%d,%d,%d,%d)\r\n",
 				ispassive ? "PASV" : "PORT",
 				UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]),
 				UC(p[0]), UC(p[1]));
 		}
 
 		/* LPRT/LPSV */
 	    {
 		int alen, af, i;
 
 		switch (su->su_family) {
 		case AF_INET:
 			a = (u_char *) &su->su_sin.sin_addr;
 			p = (u_char *) &su->su_sin.sin_port;
 			alen = sizeof(su->su_sin.sin_addr);
 			af = 4;
 			break;
 		case AF_INET6:
 			a = (u_char *) &su->su_sin6.sin6_addr;
 			p = (u_char *) &su->su_sin6.sin6_port;
 			alen = sizeof(su->su_sin6.sin6_addr);
 			af = 6;
 			break;
 		default:
 			af = 0;
 			break;
 		}
 		if (af) {
 			printf("     %s (%d,%d,", ispassive ? "LPSV" : "LPRT",
 				af, alen);
 			for (i = 0; i < alen; i++)
 				printf("%d,", UC(a[i]));
 			printf("%d,%d,%d)\r\n", 2, UC(p[0]), UC(p[1]));
 		}
 	    }
 
 epsvonly:;
 		/* EPRT/EPSV */
 	    {
 		int af;
 
 		switch (su->su_family) {
 		case AF_INET:
 			af = 1;
 			break;
 		case AF_INET6:
 			af = 2;
 			break;
 		default:
 			af = 0;
 			break;
 		}
 		if (af) {
 			union sockunion tmp;
 
 			tmp = *su;
 			if (tmp.su_family == AF_INET6)
 				tmp.su_sin6.sin6_scope_id = 0;
 			if (!getnameinfo((struct sockaddr *)&tmp, tmp.su_len,
 					hname, sizeof(hname) - 1, NULL, 0,
 					NI_NUMERICHOST)) {
 				hname[sizeof(hname) - 1] = 0;
 				printf("     %s |%d|%s|%d|\r\n",
 					ispassive ? "EPSV" : "EPRT",
 					af, hname, htons(tmp.su_port));
 			}
 		}
 	    }
 #undef UC
 	} else
 		printf("     No data connection\r\n");
 	reply(211, "End of status.");
 }
 
 void
 fatalerror(char *s)
 {
 
 	reply(451, "Error in server: %s", s);
 	reply(221, "Closing connection due to server error.");
 	dologout(0);
 	/* NOTREACHED */
 }
 
 void
 reply(int n, const char *fmt, ...)
 {
 	va_list ap;
 
 	(void)printf("%d ", n);
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
 	(void)printf("\r\n");
 	(void)fflush(stdout);
 	if (ftpdebug) {
 		syslog(LOG_DEBUG, "<--- %d ", n);
 		va_start(ap, fmt);
 		vsyslog(LOG_DEBUG, fmt, ap);
 		va_end(ap);
 	}
 }
 
 void
 lreply(int n, const char *fmt, ...)
 {
 	va_list ap;
 
 	(void)printf("%d- ", n);
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
 	(void)printf("\r\n");
 	(void)fflush(stdout);
 	if (ftpdebug) {
 		syslog(LOG_DEBUG, "<--- %d- ", n);
 		va_start(ap, fmt);
 		vsyslog(LOG_DEBUG, fmt, ap);
 		va_end(ap);
 	}
 }
 
 static void
 ack(char *s)
 {
 
 	reply(250, "%s command successful.", s);
 }
 
 void
 nack(char *s)
 {
 
 	reply(502, "%s command not implemented.", s);
 }
 
 /* ARGSUSED */
 void
 yyerror(char *s)
 {
 	char *cp;
 
 	if ((cp = strchr(cbuf,'\n')))
 		*cp = '\0';
 	reply(500, "%s: command not understood.", cbuf);
 }
 
 void
 delete(char *name)
 {
 	struct stat st;
 
 	LOGCMD("delete", name);
 	if (lstat(name, &st) < 0) {
 		perror_reply(550, name);
 		return;
 	}
 	if (S_ISDIR(st.st_mode)) {
 		if (rmdir(name) < 0) {
 			perror_reply(550, name);
 			return;
 		}
 		goto done;
 	}
 	if (guest && noguestmod) {
 		reply(550, "Operation not permitted.");
 		return;
 	}
 	if (unlink(name) < 0) {
 		perror_reply(550, name);
 		return;
 	}
 done:
 	ack("DELE");
 }
 
 void
 cwd(char *path)
 {
 
 	if (chdir(path) < 0)
 		perror_reply(550, path);
 	else
 		ack("CWD");
 }
 
 void
 makedir(char *name)
 {
 	char *s;
 
 	LOGCMD("mkdir", name);
 	if (guest && noguestmkd)
 		reply(550, "Operation not permitted.");
 	else if (mkdir(name, 0777) < 0)
 		perror_reply(550, name);
 	else {
 		if ((s = doublequote(name)) == NULL)
 			fatalerror("Ran out of memory.");
 		reply(257, "\"%s\" directory created.", s);
 		free(s);
 	}
 }
 
 void
 removedir(char *name)
 {
 
 	LOGCMD("rmdir", name);
 	if (rmdir(name) < 0)
 		perror_reply(550, name);
 	else
 		ack("RMD");
 }
 
 void
 pwd(void)
 {
 	char *s, path[MAXPATHLEN + 1];
 
 	if (getcwd(path, sizeof(path)) == NULL)
 		perror_reply(550, "Get current directory");
 	else {
 		if ((s = doublequote(path)) == NULL)
 			fatalerror("Ran out of memory.");
 		reply(257, "\"%s\" is current directory.", s);
 		free(s);
 	}
 }
 
 char *
 renamefrom(char *name)
 {
 	struct stat st;
 
 	if (guest && noguestmod) {
 		reply(550, "Operation not permitted.");
 		return (NULL);
 	}
 	if (lstat(name, &st) < 0) {
 		perror_reply(550, name);
 		return (NULL);
 	}
 	reply(350, "File exists, ready for destination name.");
 	return (name);
 }
 
 void
 renamecmd(char *from, char *to)
 {
 	struct stat st;
 
 	LOGCMD2("rename", from, to);
 
 	if (guest && (stat(to, &st) == 0)) {
 		reply(550, "%s: permission denied.", to);
 		return;
 	}
 
 	if (rename(from, to) < 0)
 		perror_reply(550, "rename");
 	else
 		ack("RNTO");
 }
 
 static void
 dolog(struct sockaddr *who)
 {
 	char who_name[NI_MAXHOST];
 
 	realhostname_sa(remotehost, sizeof(remotehost) - 1, who, who->sa_len);
 	remotehost[sizeof(remotehost) - 1] = 0;
 	if (getnameinfo(who, who->sa_len,
 		who_name, sizeof(who_name) - 1, NULL, 0, NI_NUMERICHOST))
 			*who_name = 0;
 	who_name[sizeof(who_name) - 1] = 0;
 
 #ifdef SETPROCTITLE
 #ifdef VIRTUAL_HOSTING
 	if (thishost != firsthost)
 		snprintf(proctitle, sizeof(proctitle), "%s: connected (to %s)",
 			 remotehost, hostname);
 	else
 #endif
 		snprintf(proctitle, sizeof(proctitle), "%s: connected",
 			 remotehost);
 	setproctitle("%s", proctitle);
 #endif /* SETPROCTITLE */
 
 	if (logging) {
 #ifdef VIRTUAL_HOSTING
 		if (thishost != firsthost)
 			syslog(LOG_INFO, "connection from %s (%s) to %s",
 			       remotehost, who_name, hostname);
 		else
 #endif
 			syslog(LOG_INFO, "connection from %s (%s)",
 			       remotehost, who_name);
 	}
 }
 
 /*
  * Record logout in wtmp file
  * and exit with supplied status.
  */
 void
 dologout(int status)
 {
 
 	if (logged_in && dowtmp) {
 		(void) seteuid(0);
 		ftpd_logwtmp(wtmpid, NULL, NULL);
 	}
 	/* beware of flushing buffers after a SIGPIPE */
 	_exit(status);
 }
 
 static void
 sigurg(int signo)
 {
 
 	recvurg = 1;
 }
 
 static void
 maskurg(int flag)
 {
 	int oerrno;
 	sigset_t sset;
 
 	if (!transflag) {
 		syslog(LOG_ERR, "Internal: maskurg() while no transfer");
 		return;
 	}
 	oerrno = errno;
 	sigemptyset(&sset);
 	sigaddset(&sset, SIGURG);
 	sigprocmask(flag ? SIG_BLOCK : SIG_UNBLOCK, &sset, NULL);
 	errno = oerrno;
 }
 
 static void
 flagxfer(int flag)
 {
 
 	if (flag) {
 		if (transflag)
 			syslog(LOG_ERR, "Internal: flagxfer(1): "
 					"transfer already under way");
 		transflag = 1;
 		maskurg(0);
 		recvurg = 0;
 	} else {
 		if (!transflag)
 			syslog(LOG_ERR, "Internal: flagxfer(0): "
 					"no active transfer");
 		maskurg(1);
 		transflag = 0;
 	}
 }
 
 /*
  * Returns 0 if OK to resume or -1 if abort requested.
  */
 static int
 myoob(void)
 {
 	char *cp;
 	int ret;
 
 	if (!transflag) {
 		syslog(LOG_ERR, "Internal: myoob() while no transfer");
 		return (0);
 	}
 	cp = tmpline;
 	ret = get_line(cp, 7, stdin);
 	if (ret == -1) {
 		reply(221, "You could at least say goodbye.");
 		dologout(0);
 	} else if (ret == -2) {
 		/* Ignore truncated command. */
 		return (0);
 	}
 	upper(cp);
 	if (strcmp(cp, "ABOR\r\n") == 0) {
 		tmpline[0] = '\0';
 		reply(426, "Transfer aborted. Data connection closed.");
 		reply(226, "Abort successful.");
 		return (-1);
 	}
 	if (strcmp(cp, "STAT\r\n") == 0) {
 		tmpline[0] = '\0';
 		if (file_size != -1)
 			reply(213, "Status: %jd of %jd bytes transferred.",
 				   (intmax_t)byte_count, (intmax_t)file_size);
 		else
 			reply(213, "Status: %jd bytes transferred.",
 				   (intmax_t)byte_count);
 	}
 	return (0);
 }
 
 /*
  * Note: a response of 425 is not mentioned as a possible response to
  *	the PASV command in RFC959. However, it has been blessed as
  *	a legitimate response by Jon Postel in a telephone conversation
  *	with Rick Adams on 25 Jan 89.
  */
 void
 passive(void)
 {
 	socklen_t len;
 	int on;
 	char *p, *a;
 
 	if (pdata >= 0)		/* close old port if one set */
 		close(pdata);
 
 	pdata = socket(ctrl_addr.su_family, SOCK_STREAM, 0);
 	if (pdata < 0) {
 		perror_reply(425, "Can't open passive connection");
 		return;
 	}
 	on = 1;
 	if (setsockopt(pdata, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
 		syslog(LOG_WARNING, "pdata setsockopt (SO_REUSEADDR): %m");
 
 	(void) seteuid(0);
 
 #ifdef IP_PORTRANGE
 	if (ctrl_addr.su_family == AF_INET) {
 	    on = restricted_data_ports ? IP_PORTRANGE_HIGH
 				       : IP_PORTRANGE_DEFAULT;
 
 	    if (setsockopt(pdata, IPPROTO_IP, IP_PORTRANGE,
 			    &on, sizeof(on)) < 0)
 		    goto pasv_error;
 	}
 #endif
 #ifdef IPV6_PORTRANGE
 	if (ctrl_addr.su_family == AF_INET6) {
 	    on = restricted_data_ports ? IPV6_PORTRANGE_HIGH
 				       : IPV6_PORTRANGE_DEFAULT;
 
 	    if (setsockopt(pdata, IPPROTO_IPV6, IPV6_PORTRANGE,
 			    &on, sizeof(on)) < 0)
 		    goto pasv_error;
 	}
 #endif
 
 	pasv_addr = ctrl_addr;
 	pasv_addr.su_port = 0;
 	if (bind(pdata, (struct sockaddr *)&pasv_addr, pasv_addr.su_len) < 0)
 		goto pasv_error;
 
 	(void) seteuid(pw->pw_uid);
 
 	len = sizeof(pasv_addr);
 	if (getsockname(pdata, (struct sockaddr *) &pasv_addr, &len) < 0)
 		goto pasv_error;
 	if (listen(pdata, 1) < 0)
 		goto pasv_error;
 	if (pasv_addr.su_family == AF_INET)
 		a = (char *) &pasv_addr.su_sin.sin_addr;
 	else if (pasv_addr.su_family == AF_INET6 &&
 		 IN6_IS_ADDR_V4MAPPED(&pasv_addr.su_sin6.sin6_addr))
 		a = (char *) &pasv_addr.su_sin6.sin6_addr.s6_addr[12];
 	else
 		goto pasv_error;
 		
 	p = (char *) &pasv_addr.su_port;
 
 #define UC(b) (((int) b) & 0xff)
 
 	reply(227, "Entering Passive Mode (%d,%d,%d,%d,%d,%d)", UC(a[0]),
 		UC(a[1]), UC(a[2]), UC(a[3]), UC(p[0]), UC(p[1]));
 	return;
 
 pasv_error:
 	(void) seteuid(pw->pw_uid);
 	(void) close(pdata);
 	pdata = -1;
 	perror_reply(425, "Can't open passive connection");
 	return;
 }
 
 /*
  * Long Passive defined in RFC 1639.
  *     228 Entering Long Passive Mode
  *         (af, hal, h1, h2, h3,..., pal, p1, p2...)
  */
 
 void
 long_passive(char *cmd, int pf)
 {
 	socklen_t len;
 	int on;
 	char *p, *a;
 
 	if (pdata >= 0)		/* close old port if one set */
 		close(pdata);
 
 	if (pf != PF_UNSPEC) {
 		if (ctrl_addr.su_family != pf) {
 			switch (ctrl_addr.su_family) {
 			case AF_INET:
 				pf = 1;
 				break;
 			case AF_INET6:
 				pf = 2;
 				break;
 			default:
 				pf = 0;
 				break;
 			}
 			/*
 			 * XXX
 			 * only EPRT/EPSV ready clients will understand this
 			 */
 			if (strcmp(cmd, "EPSV") == 0 && pf) {
 				reply(522, "Network protocol mismatch, "
 					"use (%d)", pf);
 			} else
 				reply(501, "Network protocol mismatch."); /*XXX*/
 
 			return;
 		}
 	}
 		
 	pdata = socket(ctrl_addr.su_family, SOCK_STREAM, 0);
 	if (pdata < 0) {
 		perror_reply(425, "Can't open passive connection");
 		return;
 	}
 	on = 1;
 	if (setsockopt(pdata, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
 		syslog(LOG_WARNING, "pdata setsockopt (SO_REUSEADDR): %m");
 
 	(void) seteuid(0);
 
 	pasv_addr = ctrl_addr;
 	pasv_addr.su_port = 0;
 	len = pasv_addr.su_len;
 
 #ifdef IP_PORTRANGE
 	if (ctrl_addr.su_family == AF_INET) {
 	    on = restricted_data_ports ? IP_PORTRANGE_HIGH
 				       : IP_PORTRANGE_DEFAULT;
 
 	    if (setsockopt(pdata, IPPROTO_IP, IP_PORTRANGE,
 			    &on, sizeof(on)) < 0)
 		    goto pasv_error;
 	}
 #endif
 #ifdef IPV6_PORTRANGE
 	if (ctrl_addr.su_family == AF_INET6) {
 	    on = restricted_data_ports ? IPV6_PORTRANGE_HIGH
 				       : IPV6_PORTRANGE_DEFAULT;
 
 	    if (setsockopt(pdata, IPPROTO_IPV6, IPV6_PORTRANGE,
 			    &on, sizeof(on)) < 0)
 		    goto pasv_error;
 	}
 #endif
 
 	if (bind(pdata, (struct sockaddr *)&pasv_addr, len) < 0)
 		goto pasv_error;
 
 	(void) seteuid(pw->pw_uid);
 
 	if (getsockname(pdata, (struct sockaddr *) &pasv_addr, &len) < 0)
 		goto pasv_error;
 	if (listen(pdata, 1) < 0)
 		goto pasv_error;
 
 #define UC(b) (((int) b) & 0xff)
 
 	if (strcmp(cmd, "LPSV") == 0) {
 		p = (char *)&pasv_addr.su_port;
 		switch (pasv_addr.su_family) {
 		case AF_INET:
 			a = (char *) &pasv_addr.su_sin.sin_addr;
 		v4_reply:
 			reply(228,
 "Entering Long Passive Mode (%d,%d,%d,%d,%d,%d,%d,%d,%d)",
 			      4, 4, UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]),
 			      2, UC(p[0]), UC(p[1]));
 			return;
 		case AF_INET6:
 			if (IN6_IS_ADDR_V4MAPPED(&pasv_addr.su_sin6.sin6_addr)) {
 				a = (char *) &pasv_addr.su_sin6.sin6_addr.s6_addr[12];
 				goto v4_reply;
 			}
 			a = (char *) &pasv_addr.su_sin6.sin6_addr;
 			reply(228,
 "Entering Long Passive Mode "
 "(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)",
 			      6, 16, UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]),
 			      UC(a[4]), UC(a[5]), UC(a[6]), UC(a[7]),
 			      UC(a[8]), UC(a[9]), UC(a[10]), UC(a[11]),
 			      UC(a[12]), UC(a[13]), UC(a[14]), UC(a[15]),
 			      2, UC(p[0]), UC(p[1]));
 			return;
 		}
 	} else if (strcmp(cmd, "EPSV") == 0) {
 		switch (pasv_addr.su_family) {
 		case AF_INET:
 		case AF_INET6:
 			reply(229, "Entering Extended Passive Mode (|||%d|)",
 				ntohs(pasv_addr.su_port));
 			return;
 		}
 	} else {
 		/* more proper error code? */
 	}
 
 pasv_error:
 	(void) seteuid(pw->pw_uid);
 	(void) close(pdata);
 	pdata = -1;
 	perror_reply(425, "Can't open passive connection");
 	return;
 }
 
 /*
  * Generate unique name for file with basename "local"
  * and open the file in order to avoid possible races.
  * Try "local" first, then "local.1", "local.2" etc, up to "local.99".
  * Return descriptor to the file, set "name" to its name.
  *
  * Generates failure reply on error.
  */
 static int
 guniquefd(char *local, char **name)
 {
 	static char new[MAXPATHLEN];
 	struct stat st;
 	char *cp;
 	int count;
 	int fd;
 
 	cp = strrchr(local, '/');
 	if (cp)
 		*cp = '\0';
 	if (stat(cp ? local : ".", &st) < 0) {
 		perror_reply(553, cp ? local : ".");
 		return (-1);
 	}
 	if (cp) {
 		/*
 		 * Let not overwrite dirname with counter suffix.
 		 * -4 is for /nn\0
 		 * In this extreme case dot won't be put in front of suffix.
 		 */
 		if (strlen(local) > sizeof(new) - 4) {
 			reply(553, "Pathname too long.");
 			return (-1);
 		}
 		*cp = '/';
 	}
 	/* -4 is for the .nn<null> we put on the end below */
 	(void) snprintf(new, sizeof(new) - 4, "%s", local);
 	cp = new + strlen(new);
 	/* 
 	 * Don't generate dotfile unless requested explicitly.
 	 * This covers the case when basename gets truncated off
 	 * by buffer size.
 	 */
 	if (cp > new && cp[-1] != '/')
 		*cp++ = '.';
 	for (count = 0; count < 100; count++) {
 		/* At count 0 try unmodified name */
 		if (count)
 			(void)sprintf(cp, "%d", count);
 		if ((fd = open(count ? new : local,
 		    O_RDWR | O_CREAT | O_EXCL, 0666)) >= 0) {
 			*name = count ? new : local;
 			return (fd);
 		}
 		if (errno != EEXIST) {
 			perror_reply(553, count ? new : local);
 			return (-1);
 		}
 	}
 	reply(452, "Unique file name cannot be created.");
 	return (-1);
 }
 
 /*
  * Format and send reply containing system error number.
  */
 void
 perror_reply(int code, char *string)
 {
 
 	reply(code, "%s: %s.", string, strerror(errno));
 }
 
 static char *onefile[] = {
 	"",
 	0
 };
 
 void
 send_file_list(char *whichf)
 {
 	struct stat st;
 	DIR *dirp = NULL;
 	struct dirent *dir;
 	FILE *dout = NULL;
 	char **dirlist, *dirname;
 	int simple = 0;
 	int freeglob = 0;
 	glob_t gl;
 
 	if (strpbrk(whichf, "~{[*?") != NULL) {
 		int flags = GLOB_BRACE|GLOB_NOCHECK|GLOB_TILDE;
 
 		memset(&gl, 0, sizeof(gl));
 		gl.gl_matchc = MAXGLOBARGS;
 		flags |= GLOB_LIMIT;
 		freeglob = 1;
 		if (glob(whichf, flags, 0, &gl)) {
 			reply(550, "No matching files found.");
 			goto out;
 		} else if (gl.gl_pathc == 0) {
 			errno = ENOENT;
 			perror_reply(550, whichf);
 			goto out;
 		}
 		dirlist = gl.gl_pathv;
 	} else {
 		onefile[0] = whichf;
 		dirlist = onefile;
 		simple = 1;
 	}
 
 	while ((dirname = *dirlist++)) {
 		if (stat(dirname, &st) < 0) {
 			/*
 			 * If user typed "ls -l", etc, and the client
 			 * used NLST, do what the user meant.
 			 */
 			if (dirname[0] == '-' && *dirlist == NULL &&
 			    dout == NULL)
 				retrieve(_PATH_LS " %s", dirname);
 			else
 				perror_reply(550, whichf);
 			goto out;
 		}
 
 		if (S_ISREG(st.st_mode)) {
 			if (dout == NULL) {
 				dout = dataconn("file list", -1, "w");
 				if (dout == NULL)
 					goto out;
 				STARTXFER;
 			}
 			START_UNSAFE;
 			fprintf(dout, "%s%s\n", dirname,
 				type == TYPE_A ? "\r" : "");
 			END_UNSAFE;
 			if (ferror(dout))
 				goto data_err;
 			byte_count += strlen(dirname) +
 				      (type == TYPE_A ? 2 : 1);
 			CHECKOOB(goto abrt);
 			continue;
 		} else if (!S_ISDIR(st.st_mode))
 			continue;
 
 		if ((dirp = opendir(dirname)) == NULL)
 			continue;
 
 		while ((dir = readdir(dirp)) != NULL) {
 			char nbuf[MAXPATHLEN];
 
 			CHECKOOB(goto abrt);
 
 			if (dir->d_name[0] == '.' && dir->d_namlen == 1)
 				continue;
 			if (dir->d_name[0] == '.' && dir->d_name[1] == '.' &&
 			    dir->d_namlen == 2)
 				continue;
 
 			snprintf(nbuf, sizeof(nbuf),
 				"%s/%s", dirname, dir->d_name);
 
 			/*
 			 * We have to do a stat to insure it's
 			 * not a directory or special file.
 			 */
 			if (simple || (stat(nbuf, &st) == 0 &&
 			    S_ISREG(st.st_mode))) {
 				if (dout == NULL) {
 					dout = dataconn("file list", -1, "w");
 					if (dout == NULL)
 						goto out;
 					STARTXFER;
 				}
 				START_UNSAFE;
 				if (nbuf[0] == '.' && nbuf[1] == '/')
 					fprintf(dout, "%s%s\n", &nbuf[2],
 						type == TYPE_A ? "\r" : "");
 				else
 					fprintf(dout, "%s%s\n", nbuf,
 						type == TYPE_A ? "\r" : "");
 				END_UNSAFE;
 				if (ferror(dout))
 					goto data_err;
 				byte_count += strlen(nbuf) +
 					      (type == TYPE_A ? 2 : 1);
 				CHECKOOB(goto abrt);
 			}
 		}
 		(void) closedir(dirp);
 		dirp = NULL;
 	}
 
 	if (dout == NULL)
 		reply(550, "No files found.");
 	else if (ferror(dout))
 data_err:	perror_reply(550, "Data connection");
 	else
 		reply(226, "Transfer complete.");
 out:
 	if (dout) {
 		ENDXFER;
 abrt:
 		(void) fclose(dout);
 		data = -1;
 		pdata = -1;
 	}
 	if (dirp)
 		(void) closedir(dirp);
 	if (freeglob) {
 		freeglob = 0;
 		globfree(&gl);
 	}
 }
 
 void
 reapchild(int signo)
 {
 	while (waitpid(-1, NULL, WNOHANG) > 0);
 }
 
 #ifdef OLD_SETPROCTITLE
 /*
  * Clobber argv so ps will show what we're doing.  (Stolen from sendmail.)
  * Warning, since this is usually started from inetd.conf, it often doesn't
  * have much of an environment or arglist to overwrite.
  */
 void
 setproctitle(const char *fmt, ...)
 {
 	int i;
 	va_list ap;
 	char *p, *bp, ch;
 	char buf[LINE_MAX];
 
 	va_start(ap, fmt);
 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 
 	/* make ps print our process name */
 	p = Argv[0];
 	*p++ = '-';
 
 	i = strlen(buf);
 	if (i > LastArgv - p - 2) {
 		i = LastArgv - p - 2;
 		buf[i] = '\0';
 	}
 	bp = buf;
 	while (ch = *bp++)
 		if (ch != '\n' && ch != '\r')
 			*p++ = ch;
 	while (p < LastArgv)
 		*p++ = ' ';
 }
 #endif /* OLD_SETPROCTITLE */
 
 static void
 appendf(char **strp, char *fmt, ...)
 {
 	va_list ap;
 	char *ostr, *p;
 
 	va_start(ap, fmt);
 	vasprintf(&p, fmt, ap);
 	va_end(ap);
 	if (p == NULL)
 		fatalerror("Ran out of memory.");
 	if (*strp == NULL)
 		*strp = p;
 	else {
 		ostr = *strp;
 		asprintf(strp, "%s%s", ostr, p);
 		if (*strp == NULL)
 			fatalerror("Ran out of memory.");
 		free(ostr);
 	}
 }
 
 static void
 logcmd(char *cmd, char *file1, char *file2, off_t cnt)
 {
 	char *msg = NULL;
 	char wd[MAXPATHLEN + 1];
 
 	if (logging <= 1)
 		return;
 
 	if (getcwd(wd, sizeof(wd) - 1) == NULL)
 		strcpy(wd, strerror(errno));
 
 	appendf(&msg, "%s", cmd);
 	if (file1)
 		appendf(&msg, " %s", file1);
 	if (file2)
 		appendf(&msg, " %s", file2);
 	if (cnt >= 0)
 		appendf(&msg, " = %jd bytes", (intmax_t)cnt);
 	appendf(&msg, " (wd: %s", wd);
 	if (guest || dochroot)
 		appendf(&msg, "; chrooted");
 	appendf(&msg, ")");
 	syslog(LOG_INFO, "%s", msg);
 	free(msg);
 }
 
 static void
 logxfer(char *name, off_t size, time_t start)
 {
 	char buf[MAXPATHLEN + 1024];
 	char path[MAXPATHLEN + 1];
 	time_t now;
 
 	if (statfd >= 0) {
 		time(&now);
 		if (realpath(name, path) == NULL) {
 			syslog(LOG_NOTICE, "realpath failed on %s: %m", path);
 			return;
 		}
 		snprintf(buf, sizeof(buf), "%.20s!%s!%s!%s!%jd!%ld\n",
 			ctime(&now)+4, ident, remotehost,
 			path, (intmax_t)size,
 			(long)(now - start + (now == start)));
 		write(statfd, buf, strlen(buf));
 	}
 }
 
 static char *
 doublequote(char *s)
 {
 	int n;
 	char *p, *s2;
 
 	for (p = s, n = 0; *p; p++)
 		if (*p == '"')
 			n++;
 
 	if ((s2 = malloc(p - s + n + 1)) == NULL)
 		return (NULL);
 
 	for (p = s2; *s; s++, p++) {
 		if ((*p = *s) == '"')
 			*(++p) = '"';
 	}
 	*p = '\0';
 
 	return (s2);
 }
 
 /* setup server socket for specified address family */
 /* if af is PF_UNSPEC more than one socket may be returned */
 /* the returned list is dynamically allocated, so caller needs to free it */
 static int *
 socksetup(int af, char *bindname, const char *bindport)
 {
 	struct addrinfo hints, *res, *r;
 	int error, maxs, *s, *socks;
 	const int on = 1;
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_flags = AI_PASSIVE;
 	hints.ai_family = af;
 	hints.ai_socktype = SOCK_STREAM;
 	error = getaddrinfo(bindname, bindport, &hints, &res);
 	if (error) {
 		syslog(LOG_ERR, "%s", gai_strerror(error));
 		if (error == EAI_SYSTEM)
 			syslog(LOG_ERR, "%s", strerror(errno));
 		return NULL;
 	}
 
 	/* Count max number of sockets we may open */
 	for (maxs = 0, r = res; r; r = r->ai_next, maxs++)
 		;
 	socks = malloc((maxs + 1) * sizeof(int));
 	if (!socks) {
 		freeaddrinfo(res);
 		syslog(LOG_ERR, "couldn't allocate memory for sockets");
 		return NULL;
 	}
 
 	*socks = 0;   /* num of sockets counter at start of array */
 	s = socks + 1;
 	for (r = res; r; r = r->ai_next) {
 		*s = socket(r->ai_family, r->ai_socktype, r->ai_protocol);
 		if (*s < 0) {
 			syslog(LOG_DEBUG, "control socket: %m");
 			continue;
 		}
 		if (setsockopt(*s, SOL_SOCKET, SO_REUSEADDR,
 		    &on, sizeof(on)) < 0)
 			syslog(LOG_WARNING,
 			    "control setsockopt (SO_REUSEADDR): %m");
 		if (r->ai_family == AF_INET6) {
 			if (setsockopt(*s, IPPROTO_IPV6, IPV6_V6ONLY,
 			    &on, sizeof(on)) < 0)
 				syslog(LOG_WARNING,
 				    "control setsockopt (IPV6_V6ONLY): %m");
 		}
 		if (bind(*s, r->ai_addr, r->ai_addrlen) < 0) {
 			syslog(LOG_DEBUG, "control bind: %m");
 			close(*s);
 			continue;
 		}
 		(*socks)++;
 		s++;
 	}
 
 	if (res)
 		freeaddrinfo(res);
 
 	if (*socks == 0) {
 		syslog(LOG_ERR, "control socket: Couldn't bind to any socket");
 		free(socks);
 		return NULL;
 	}
 	return(socks);
 }
Index: projects/vnet/share/man/man4/ddb.4
===================================================================
--- projects/vnet/share/man/man4/ddb.4	(revision 301522)
+++ projects/vnet/share/man/man4/ddb.4	(revision 301523)
@@ -1,1542 +1,1553 @@
 .\"
 .\" Mach Operating System
 .\" Copyright (c) 1991,1990 Carnegie Mellon University
 .\" Copyright (c) 2007 Robert N. M. Watson
 .\" All Rights Reserved.
 .\"
 .\" Permission to use, copy, modify and distribute this software and its
 .\" documentation is hereby granted, provided that both the copyright
 .\" notice and this permission notice appear in all copies of the
 .\" software, derivative works or modified versions, and any portions
 .\" thereof, and that both notices appear in supporting documentation.
 .\"
 .\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 .\" CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 .\" ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 .\"
 .\" Carnegie Mellon requests users of this software to return to
 .\"
 .\"  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 .\"  School of Computer Science
 .\"  Carnegie Mellon University
 .\"  Pittsburgh PA 15213-3890
 .\"
 .\" any improvements or extensions that they make and grant Carnegie Mellon
 .\" the rights to redistribute these changes.
 .\"
 .\" changed a \# to #, since groff choked on it.
 .\"
 .\" HISTORY
 .\" ddb.4,v
 .\" Revision 1.1  1993/07/15  18:41:02  brezak
 .\" Man page for DDB
 .\"
 .\" Revision 2.6  92/04/08  08:52:57  rpd
 .\" 	Changes from OSF.
 .\" 	[92/01/17  14:19:22  jsb]
 .\" 	Changes for OSF debugger modifications.
 .\" 	[91/12/12            tak]
 .\"
 .\" Revision 2.5  91/06/25  13:50:22  rpd
 .\" 	Added some watchpoint explanation.
 .\" 	[91/06/25            rpd]
 .\"
 .\" Revision 2.4  91/06/17  15:47:31  jsb
 .\" 	Added documentation for continue/c, match, search, and watchpoints.
 .\" 	I've not actually explained what a watchpoint is; maybe Rich can
 .\" 	do that (hint, hint).
 .\" 	[91/06/17  10:58:08  jsb]
 .\"
 .\" Revision 2.3  91/05/14  17:04:23  mrt
 .\" 	Correcting copyright
 .\"
 .\" Revision 2.2  91/02/14  14:10:06  mrt
 .\" 	Changed to new Mach copyright
 .\" 	[91/02/12  18:10:12  mrt]
 .\"
 .\" Revision 2.2  90/08/30  14:23:15  dbg
 .\" 	Created.
 .\" 	[90/08/30            dbg]
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 18, 2016
+.Dd June 6, 2016
 .Dt DDB 4
 .Os
 .Sh NAME
 .Nm ddb
 .Nd interactive kernel debugger
 .Sh SYNOPSIS
 In order to enable kernel debugging facilities include:
 .Bd -ragged -offset indent
 .Cd options KDB
 .Cd options DDB
 .Ed
 .Pp
 To prevent activation of the debugger on kernel
 .Xr panic 9 :
 .Bd -ragged -offset indent
 .Cd options KDB_UNATTENDED
 .Ed
 .Pp
 In order to print a stack trace of the current thread on the console
 for a panic:
 .Bd -ragged -offset indent
 .Cd options KDB_TRACE
 .Ed
 .Pp
 To print the numerical value of symbols in addition to the symbolic
 representation, define:
 .Bd -ragged -offset indent
 .Cd options DDB_NUMSYM
 .Ed
 .Pp
 To enable the
 .Xr gdb 1
 backend, so that remote debugging with
 .Xr kgdb 1
 is possible, include:
 .Bd -ragged -offset indent
 .Cd options GDB
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 kernel debugger is an interactive debugger with a syntax inspired by
 .Xr gdb 1 .
 If linked into the running kernel,
 it can be invoked locally with the
 .Ql debug
 .Xr keymap 5
 action.
 The debugger is also invoked on kernel
 .Xr panic 9
 if the
 .Va debug.debugger_on_panic
 .Xr sysctl 8
 MIB variable is set non-zero,
 which is the default
 unless the
 .Dv KDB_UNATTENDED
 option is specified.
 .Pp
 The current location is called
 .Va dot .
 The
 .Va dot
 is displayed with
 a hexadecimal format at a prompt.
 The commands
 .Ic examine
 and
 .Ic write
 update
 .Va dot
 to the address of the last line
 examined or the last location modified, and set
 .Va next
 to the address of
 the next location to be examined or changed.
 Other commands do not change
 .Va dot ,
 and set
 .Va next
 to be the same as
 .Va dot .
 .Pp
 The general command syntax is:
 .Ar command Ns Op Li / Ns Ar modifier
 .Ar address Ns Op Li , Ns Ar count
 .Pp
 A blank line repeats the previous command from the address
 .Va next
 with
 count 1 and no modifiers.
 Specifying
 .Ar address
 sets
 .Va dot
 to the address.
 Omitting
 .Ar address
 uses
 .Va dot .
 A missing
 .Ar count
 is taken
 to be 1 for printing commands or infinity for stack traces.
 .Pp
 The
 .Nm
 debugger has a pager feature (like the
 .Xr more 1
 command)
 for the output.
 If an output line exceeds the number set in the
 .Va lines
 variable, it displays
 .Dq Li --More--
 and waits for a response.
 The valid responses for it are:
 .Pp
 .Bl -tag -compact -width ".Li SPC"
 .It Li SPC
 one more page
 .It Li RET
 one more line
 .It Li q
 abort the current command, and return to the command input mode
 .El
 .Pp
 Finally,
 .Nm
 provides a small (currently 10 items) command history, and offers
 simple
 .Nm emacs Ns -style
 command line editing capabilities.
 In addition to
 the
 .Nm emacs
 control keys, the usual
 .Tn ANSI
 arrow keys may be used to
 browse through the history buffer, and move the cursor within the
 current line.
 .Sh COMMANDS
 .Bl -tag -width indent -compact
 .It Ic examine
 .It Ic x
 Display the addressed locations according to the formats in the modifier.
 Multiple modifier formats display multiple locations.
 If no format is specified, the last format specified for this command
 is used.
 .Pp
 The format characters are:
 .Bl -tag -compact -width indent
 .It Cm b
 look at by bytes (8 bits)
 .It Cm h
 look at by half words (16 bits)
 .It Cm l
 look at by long words (32 bits)
 .It Cm g
 look at by quad words (64 bits)
 .It Cm a
 print the location being displayed
 .It Cm A
 print the location with a line number if possible
 .It Cm x
 display in unsigned hex
 .It Cm z
 display in signed hex
 .It Cm o
 display in unsigned octal
 .It Cm d
 display in signed decimal
 .It Cm u
 display in unsigned decimal
 .It Cm r
 display in current radix, signed
 .It Cm c
 display low 8 bits as a character.
 Non-printing characters are displayed as an octal escape code (e.g.,
 .Ql \e000 ) .
 .It Cm s
 display the null-terminated string at the location.
 Non-printing characters are displayed as octal escapes.
 .It Cm m
 display in unsigned hex with character dump at the end of each line.
 The location is also displayed in hex at the beginning of each line.
 .It Cm i
 display as an instruction
 .It Cm I
 display as an instruction with possible alternate formats depending on the
 machine, but none of the supported architectures have an alternate format.
 .It Cm S
 display a symbol name for the pointer stored at the address
 .El
 .Pp
 .It Ic xf
 Examine forward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the next address displayed by it is used as the start address.
 .Pp
 .It Ic xb
 Examine backward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the last start address subtracted by the size displayed by it
 is used as the start address.
 .Pp
 .It Ic print Ns Op Li / Ns Cm acdoruxz
 .It Ic p Ns Op Li / Ns Cm acdoruxz
 Print
 .Ar addr Ns s
 according to the modifier character (as described above for
 .Cm examine ) .
 Valid formats are:
 .Cm a , x , z , o , d , u , r ,
 and
 .Cm c .
 If no modifier is specified, the last one specified to it is used.
 The argument
 .Ar addr
 can be a string, in which case it is printed as it is.
 For example:
 .Bd -literal -offset indent
 print/x "eax = " $eax "\enecx = " $ecx "\en"
 .Ed
 .Pp
 will print like:
 .Bd -literal -offset indent
 eax = xxxxxx
 ecx = yyyyyy
 .Ed
 .Pp
 .It Xo
 .Ic write Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 .It Xo
 .Ic w Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 Write the expressions specified after
 .Ar addr
 on the command line at succeeding locations starting with
 .Ar addr .
 The write unit size can be specified in the modifier with a letter
 .Cm b
 (byte),
 .Cm h
 (half word) or
 .Cm l
 (long word) respectively.
 If omitted,
 long word is assumed.
 .Pp
 .Sy Warning :
 since there is no delimiter between expressions, strange
 things may happen.
 It is best to enclose each expression in parentheses.
 .Pp
 .It Ic set Li $ Ns Ar variable Oo Li = Oc Ar expr
 Set the named variable or register with the value of
 .Ar expr .
 Valid variable names are described below.
 .Pp
 .It Ic break Ns Op Li / Ns Cm u
 .It Ic b Ns Op Li / Ns Cm u
 Set a break point at
 .Ar addr .
 If
 .Ar count
 is supplied, continues
 .Ar count
 \- 1 times before stopping at the
 break point.
 If the break point is set, a break point number is
 printed with
 .Ql # .
 This number can be used in deleting the break point
 or adding conditions to it.
 .Pp
 If the
 .Cm u
 modifier is specified, this command sets a break point in user
 address space.
 Without the
 .Cm u
 option, the address is considered to be in the kernel
 space, and a wrong space address is rejected with an error message.
 This modifier can be used only if it is supported by machine dependent
 routines.
 .Pp
 .Sy Warning :
 If a user text is shadowed by a normal user space debugger,
 user space break points may not work correctly.
 Setting a break
 point at the low-level code paths may also cause strange behavior.
 .Pp
 .It Ic delete Ar addr
 .It Ic d Ar addr
 .It Ic delete Li # Ns Ar number
 .It Ic d Li # Ns Ar number
 Delete the break point.
 The target break point can be specified by a
 break point number with
 .Ql # ,
 or by using the same
 .Ar addr
 specified in the original
 .Ic break
 command.
 .Pp
 .It Ic watch Ar addr Ns Li , Ns Ar size
 Set a watchpoint for a region.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 If you specify a wrong space address, the request is rejected
 with an error message.
 .Pp
 .Sy Warning :
 Attempts to watch wired kernel memory
 may cause unrecoverable error in some systems such as i386.
 Watchpoints on user addresses work best.
 .Pp
 .It Ic hwatch Ar addr Ns Li , Ns Ar size
 Set a hardware watchpoint for a region if supported by the
 architecture.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 .Pp
 .Sy Warning :
 The hardware debug facilities do not have a concept of separate
 address spaces like the watch command does.
 Use
 .Ic hwatch
 for setting watchpoints on kernel address locations only, and avoid
 its use on user mode address spaces.
 .Pp
 .It Ic dhwatch Ar addr Ns Li , Ns Ar size
 Delete specified hardware watchpoint.
 .Pp
 .It Ic step Ns Op Li / Ns Cm p
 .It Ic s Ns Op Li / Ns Cm p
 Single step
 .Ar count
 times (the comma is a mandatory part of the syntax).
 If the
 .Cm p
 modifier is specified, print each instruction at each step.
 Otherwise, only print the last instruction.
 .Pp
 .Sy Warning :
 depending on machine type, it may not be possible to
 single-step through some low-level code paths or user space code.
 On machines with software-emulated single-stepping (e.g., pmax),
 stepping through code executed by interrupt handlers will probably
 do the wrong thing.
 .Pp
 .It Ic continue Ns Op Li / Ns Cm c
 .It Ic c Ns Op Li / Ns Cm c
 Continue execution until a breakpoint or watchpoint.
 If the
 .Cm c
 modifier is specified, count instructions while executing.
 Some machines (e.g., pmax) also count loads and stores.
 .Pp
 .Sy Warning :
 when counting, the debugger is really silently single-stepping.
 This means that single-stepping on low-level code may cause strange
 behavior.
 .Pp
 .It Ic until Ns Op Li / Ns Cm p
 Stop at the next call or return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise,
 only print when the matching return is hit.
 .Pp
 .It Ic next Ns Op Li / Ns Cm p
 .It Ic match Ns Op Li / Ns Cm p
 Stop at the matching return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise, only print when the matching return is hit.
 .Pp
 .It Xo
 .Ic trace Ns Op Li / Ns Cm u
 .Op Ar pid | tid
 .Op Li , Ns Ar count
 .Xc
 .It Xo
 .Ic t Ns Op Li / Ns Cm u
 .Op Ar pid | tid
 .Op Li , Ns Ar count
 .Xc
 .It Xo
 .Ic where Ns Op Li / Ns Cm u
 .Op Ar pid | tid
 .Op Li , Ns Ar count
 .Xc
 .It Xo
 .Ic bt Ns Op Li / Ns Cm u
 .Op Ar pid | tid
 .Op Li , Ns Ar count
 .Xc
 Stack trace.
 The
 .Cm u
 option traces user space; if omitted,
 .Ic trace
 only traces
 kernel space.
 The optional argument
 .Ar count
 is the number of frames to be traced.
 If
 .Ar count
 is omitted, all frames are printed.
 .Pp
 .Sy Warning :
 User space stack trace is valid
 only if the machine dependent code supports it.
 .Pp
 .It Xo
 .Ic search Ns Op Li / Ns Cm bhl
 .Ar addr
 .Ar value
 .Op Ar mask
 .Op Li , Ns Ar count
 .Xc
 Search memory for
 .Ar value .
 This command might fail in interesting
 ways if it does not find the searched-for value.
 This is because
 .Nm
 does not always recover from touching bad memory.
 The optional
 .Ar count
 argument limits the search.
 .\"
 .Pp
 .It Xo
 .Ic findstack
 .Ar addr
 .Xc
 Prints the thread address for a thread kernel-mode stack of which contains the
 specified address.
 If the thread is not found, search the thread stack cache and prints the
 cached stack address.
 Otherwise, prints nothing.
 .Pp
 .It Ic show Cm all procs Ns Op Li / Ns Cm m
 .It Ic ps Ns Op Li / Ns Cm m
 Display all process information.
 The process information may not be shown if it is not
 supported in the machine, or the bottom of the stack of the
 target process is not in the main memory at that time.
 The
 .Cm m
 modifier will alter the display to show VM map
 addresses for the process and not show other information.
 .\"
 .Pp
 .It Ic show Cm all ttys
 Show all TTY's within the system.
 Output is similar to
 .Xr pstat 8 ,
 but also includes the address of the TTY structure.
 .\"
 .Pp
 .It Ic show Cm all vnets
 Show the same output as "show vnet" does, but lists all
 virtualized network stacks within the system.
 .\"
 .Pp
 .It Ic show Cm allchains
 Show the same information like "show lockchain" does, but
 for every thread in the system.
 .\"
 .Pp
 .It Ic show Cm alllocks
 Show all locks that are currently held.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm allpcpu
 The same as "show pcpu", but for every CPU present in the system.
 .\"
 .Pp
 .It Ic show Cm allrman
 Show information related with resource management, including
 interrupt request lines, DMA request lines, I/O ports, I/O memory
 addresses, and Resource IDs.
 .\"
 .Pp
 .It Ic show Cm apic
 Dump data about APIC IDT vector mappings.
 .\"
 .Pp
 .It Ic show Cm breaks
 Show breakpoints set with the "break" command.
 .\"
 .Pp
 .It Ic show Cm bio Ar addr
 Show information about the bio structure
 .Vt struct bio
 present at
 .Ar addr .
 See the
 .Pa sys/bio.h
 header file and
 .Xr g_bio 9
 for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm buffer Ar addr
 Show information about the buf structure
 .Vt struct buf
 present at
 .Ar addr .
 See the
 .Pa sys/buf.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
+.It Ic show Cm callout Ar addr
+Show information about the callout structure
+.Vt struct callout
+present at
+.Ar addr .
+.\"
+.Pp
 .It Ic show Cm cbstat
 Show brief information about the TTY subsystem.
 .\"
 .Pp
 .It Ic show Cm cdev
 Without argument, show the list of all created cdev's, consisting of devfs
 node name and struct cdev address.
 When address of cdev is supplied, show some internal devfs state of the cdev.
 .\"
 .Pp
 .It Ic show Cm conifhk
 Lists hooks currently waiting for completion in
 run_interrupt_driven_config_hooks().
 .\"
 .Pp
 .It Ic show Cm cpusets
 Print numbered root and assigned CPU affinity sets.
 See
 .Xr cpuset 2
 for more details.
 .\"
 .Pp
 .It Ic show Cm cyrixreg
 Show registers specific to the Cyrix processor.
 .\"
 .Pp
 .It Ic show Cm devmap
 Prints the contents of the static device mapping table.
 Currently only available on the
 ARM
 architecture.
 .\"
 .Pp
 .It Ic show Cm domain Ar addr
 Print protocol domain structure
 .Vt struct domain
 at address
 .Ar addr .
 See the
 .Pa sys/domain.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm ffs Op Ar addr
 Show brief information about ffs mount at the address
 .Ar addr ,
 if argument is given.
 Otherwise, provides the summary about each ffs mount.
 .\"
 .Pp
 .It Ic show Cm file Ar addr
 Show information about the file structure
 .Vt struct file
 present at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm files
 Show information about every file structure in the system.
 .\"
 .Pp
 .It Ic show Cm freepages
 Show the number of physical pages in each of the free lists.
 .\"
 .Pp
 .It Ic show Cm geom Op Ar addr
 If the
 .Ar addr
 argument is not given, displays the entire GEOM topology.
 If
 .Ar addr
 is given, displays details about the given GEOM object (class, geom,
 provider or consumer).
 .\"
 .Pp
 .It Ic show Cm idt
 Show IDT layout.
 The first column specifies the IDT vector.
 The second one is the name of the interrupt/trap handler.
 Those functions are machine dependent.
 .\"
 .Pp
 .It Ic show Cm inodedeps Op Ar addr
 Show brief information about each inodedep structure.
 If
 .Ar addr
 is given, only inodedeps belonging to the fs located at the
 supplied address are shown.
 .\"
 .Pp
 .It Ic show Cm inpcb Ar addr
 Show information on IP Control Block
 .Vt struct in_pcb
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm intr
 Dump information about interrupt handlers.
 .\"
 .Pp
 .It Ic show Cm intrcnt
 Dump the interrupt statistics.
 .\"
 .Pp
 .It Ic show Cm irqs
 Show interrupt lines and their respective kernel threads.
 .\"
 .Pp
 .It Ic show Cm jails
 Show the list of
 .Xr jail 8
 instances.
 In addition to what
 .Xr jls 8
 shows, also list kernel internal details.
 .\"
 .Pp
 .It Ic show Cm lapic
 Show information from the local APIC registers for this CPU.
 .\"
 .Pp
 .It Ic show Cm lock Ar addr
 Show lock structure.
 The output format is as follows:
 .Bl -tag -width "flags"
 .It Ic class:
 Class of the lock.
 Possible types include
 .Xr mutex 9 ,
 .Xr rmlock 9 ,
 .Xr rwlock 9 ,
 .Xr sx 9 .
 .It Ic name:
 Name of the lock.
 .It Ic flags:
 Flags passed to the lock initialization function.
 For exact possibilities see manual pages of possible lock types.
 .It Ic state:
 Current state of a lock.
 As well as
 .Ic flags
 it's lock-specific.
 .It Ic owner:
 Lock owner.
 .El
 .\"
 .Pp
 .It Ic show Cm lockchain Ar addr
 Show all threads a particular thread at address
 .Ar addr
 is waiting on based on non-sleepable and non-spin locks.
 .\"
 .Pp
 .It Ic show Cm lockedbufs
 Show the same information as "show buf", but for every locked
 .Vt struct buf
 object.
 .\"
 .Pp
 .It Ic show Cm lockedvnods
 List all locked vnodes in the system.
 .\"
 .Pp
 .It Ic show Cm locks
 Prints all locks that are currently acquired.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm locktree
 .\"
 .Pp
 .It Ic show Cm malloc
 Prints
 .Xr malloc 9
 memory allocator statistics.
 The output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Requests"
 .It Ic Type
 Specifies a type of memory.
 It is the same as a description string used while defining the
 given memory type with
 .Xr MALLOC_DECLARE 9 .
 .It Ic InUse
 Number of memory allocations of the given type, for which
 .Xr free 9
 has not been called yet.
 .It Ic MemUse
 Total memory consumed by the given allocation type.
 .It Ic Requests
 Number of memory allocation requests for the given
 memory type.
 .El
 .Pp
 The same information can be gathered in userspace with
 .Dq Nm vmstat Fl m .
 .\"
 .Pp
 .It Ic show Cm map Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM map at
 .Ar addr .
 If the
 .Cm f
 modifier is specified the
 complete map is printed.
 .\"
 .Pp
 .It Ic show Cm msgbuf
 Print the system's message buffer.
 It is the same output as in the
 .Dq Nm dmesg
 case.
 It is useful if you got a kernel panic, attached a serial cable
 to the machine and want to get the boot messages from before the
 system hang.
 .\"
 .It Ic show Cm mount
 Displays short info about all currently mounted file systems.
 .Pp
 .It Ic show Cm mount Ar addr
 Displays details about the given mount point.
 .\"
 .Pp
 .It Ic show Cm object Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM object at
 .Ar addr .
 If the
 .Cm f
 option is specified the
 complete object is printed.
+.\"
+.Pp
+.It Ic show Cm panic
+Print the panic message if set.
 .\"
 .Pp
 .It Ic show Cm page
 Show statistics on VM pages.
 .\"
 .Pp
 .It Ic show Cm pageq
 Show statistics on VM page queues.
 .\"
 .Pp
 .It Ic show Cm pciregs
 Print PCI bus registers.
 The same information can be gathered in userspace by running
 .Dq Nm pciconf Fl lv .
 .\"
 .Pp
 .It Ic show Cm pcpu
 Print current processor state.
 The output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "spin locks held:"
 .It Ic cpuid
 Processor identifier.
 .It Ic curthread
 Thread pointer, process identifier and the name of the process.
 .It Ic curpcb
 Control block pointer.
 .It Ic fpcurthread
 FPU thread pointer.
 .It Ic idlethread
 Idle thread pointer.
 .It Ic APIC ID
 CPU identifier coming from APIC.
 .It Ic currentldt
 LDT pointer.
 .It Ic spin locks held
 Names of spin locks held.
 .El
 .\"
 .Pp
 .It Ic show Cm pgrpdump
 Dump process groups present within the system.
 .\"
 .Pp
 .It Ic show Cm proc Op Ar addr
 If no
 .Op Ar addr
 is specified, print information about the current process.
 Otherwise, show information about the process at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm procvm
 Show process virtual memory layout.
 .\"
 .Pp
 .It Ic show Cm protosw Ar addr
 Print protocol switch structure
 .Vt struct protosw
 at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm registers Ns Op Li / Ns Cm u
 Display the register set.
 If the
 .Cm u
 modifier is specified, it displays user registers instead of
 kernel registers or the currently saved one.
 .Pp
 .Sy Warning :
 The support of the
 .Cm u
 modifier depends on the machine.
 If not supported, incorrect information will be displayed.
 .\"
 .Pp
 .It Ic show Cm rman Ar addr
 Show resource manager object
 .Vt struct rman
 at address
 .Ar addr .
 Addresses of particular pointers can be gathered with "show allrman"
 command.
 .\"
 .Pp
 .It Ic show Cm rtc
 Show real time clock value.
 Useful for long debugging sessions.
 .\"
 .Pp
 .It Ic show Cm sleepchain
 Show all the threads a particular thread is waiting on based on
 sleepable locks.
 .\"
 .Pp
 .It Ic show Cm sleepq
 .It Ic show Cm sleepqueue
 Both commands provide the same functionality.
 They show sleepqueue
 .Vt struct sleepqueue
 structure.
 Sleepqueues are used within the
 .Fx
 kernel to implement sleepable
 synchronization primitives (thread holding a lock might sleep or
 be context switched), which at the time of writing are:
 .Xr condvar 9 ,
 .Xr sx 9
 and standard
 .Xr msleep 9
 interface.
 .\"
 .Pp
 .It Ic show Cm sockbuf Ar addr
 .It Ic show Cm socket Ar addr
 Those commands print
 .Vt struct sockbuf
 and
 .Vt struct socket
 objects placed at
 .Ar addr .
 Output consists of all values present in structures mentioned.
 For exact interpretation and more details, visit
 .Pa sys/socket.h
 header file.
 .\"
 .Pp
 .It Ic show Cm sysregs
 Show system registers (e.g.,
 .Li cr0-4
 on i386.)
 Not present on some platforms.
 .\"
 .Pp
 .It Ic show Cm tcpcb Ar addr
 Print TCP control block
 .Vt struct tcpcb
 lying at address
 .Ar addr .
 For exact interpretation of output, visit
 .Pa netinet/tcp.h
 header file.
 .\"
 .Pp
 .It Ic show Cm thread Op Ar addr
 If no
 .Ar addr
 is specified, show detailed information about current thread.
 Otherwise, information about thread at
 .Ar addr
 is printed.
 .\"
 .Pp
 .It Ic show Cm threads
 Show all threads within the system.
 Output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Second column"
 .It Ic First column
 Thread identifier (TID)
 .It Ic Second column
 Thread structure address
 .It Ic Third column
 Backtrace.
 .El
 .\"
 .Pp
 .It Ic show Cm tty Ar addr
 Display the contents of a TTY structure in a readable form.
 .\"
 .Pp
 .It Ic show Cm turnstile Ar addr
 Show turnstile
 .Vt struct turnstile
 structure at address
 .Ar addr .
 Turnstiles are structures used within the
 .Fx
 kernel to implement
 synchronization primitives which, while holding a specific type of lock, cannot
 sleep or context switch to another thread.
 Currently, those are:
 .Xr mutex 9 ,
 .Xr rwlock 9 ,
 .Xr rmlock 9 .
 .\"
 .Pp
 .It Ic show Cm uma
 Show UMA allocator statistics.
 Output consists five columns:
 .Pp
 .Bl -tag -compact -offset indent -width "Requests"
 .It Cm "Zone"
 Name of the UMA zone.
 The same string that was passed to
 .Xr uma_zcreate 9
 as a first argument.
 .It Cm "Size"
 Size of a given memory object (slab).
 .It Cm "Used"
 Number of slabs being currently used.
 .It Cm "Free"
 Number of free slabs within the UMA zone.
 .It Cm "Requests"
 Number of allocations requests to the given zone.
 .El
 .Pp
 The very same information might be gathered in the userspace
 with the help of
 .Dq Nm vmstat Fl z .
 .\"
 .Pp
 .It Ic show Cm unpcb Ar addr
 Shows UNIX domain socket private control block
 .Vt struct unpcb
 present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vmochk
 Prints, whether the internal VM objects are in a map somewhere
 and none have zero ref counts.
 .\"
 .Pp
 .It Ic show Cm vmopag
 This is supposed to show physical addresses consumed by a
 VM object.
 Currently, it is not possible to use this command when
 .Xr witness 4
 is compiled in the kernel.
 .\"
 .Pp
 .It Ic show Cm vnet Ar addr
 Prints virtualized network stack
 .Vt struct vnet
 structure present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vnode Op Ar addr
 Prints vnode
 .Vt struct vnode
 structure lying at
 .Op Ar addr .
 For the exact interpretation of the output, look at the
 .Pa sys/vnode.h
 header file.
 .\"
 .Pp
 .It Ic show Cm vnodebufs Ar addr
 Shows clean/dirty buffer lists of the vnode located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm watches
 Displays all watchpoints.
 Shows watchpoints set with "watch" command.
 .\"
 .Pp
 .It Ic show Cm witness
 Shows information about lock acquisition coming from the
 .Xr witness 4
 subsystem.
 .\"
 .Pp
 .It Ic gdb
 Toggles between remote GDB and DDB mode.
 In remote GDB mode, another machine is required that runs
 .Xr gdb 1
 using the remote debug feature, with a connection to the serial
 console port on the target machine.
 Currently only available on the
 i386
 architecture.
 .Pp
 .It Ic halt
 Halt the system.
 .Pp
 .It Ic kill Ar sig pid
 Send signal
 .Ar sig
 to process
 .Ar pid .
 The signal is acted on upon returning from the debugger.
 This command can be used to kill a process causing resource contention
 in the case of a hung system.
 See
 .Xr signal 3
 for a list of signals.
 Note that the arguments are reversed relative to
 .Xr kill 2 .
 .Pp
 .It Ic reboot Op Ar seconds
 .It Ic reset Op Ar seconds
 Hard reset the system.
 If the optional argument
 .Ar seconds
 is given, the debugger will wait for this long, at most a week,
 before rebooting.
 .Pp
 .It Ic help
 Print a short summary of the available commands and command
 abbreviations.
 .Pp
 .It Ic capture on
 .It Ic capture off
 .It Ic capture reset
 .It Ic capture status
 .Nm
 supports a basic output capture facility, which can be used to retrieve the
 results of debugging commands from userspace using
 .Xr sysctl 3 .
 .Ic capture on
 enables output capture;
 .Ic capture off
 disables capture.
 .Ic capture reset
 will clear the capture buffer and disable capture.
 .Ic capture status
 will report current buffer use, buffer size, and disposition of output
 capture.
 .Pp
 Userspace processes may inspect and manage
 .Nm
 capture state using
 .Xr sysctl 8 :
 .Pp
 .Dv debug.ddb.capture.bufsize
 may be used to query or set the current capture buffer size.
 .Pp
 .Dv debug.ddb.capture.maxbufsize
 may be used to query the compile-time limit on the capture buffer size.
 .Pp
 .Dv debug.ddb.capture.bytes
 may be used to query the number of bytes of output currently in the capture
 buffer.
 .Pp
 .Dv debug.ddb.capture.data
 returns the contents of the buffer as a string to an appropriately privileged
 process.
 .Pp
 This facility is particularly useful in concert with the scripting and
 .Xr textdump 4
 facilities, allowing scripted debugging output to be captured and
 committed to disk as part of a textdump for later analysis.
 The contents of the capture buffer may also be inspected in a kernel core dump
 using
 .Xr kgdb 1 .
 .Pp
 .It Ic run
 .It Ic script
 .It Ic scripts
 .It Ic unscript
 Run, define, list, and delete scripts.
 See the
 .Sx SCRIPTING
 section for more information on the scripting facility.
 .Pp
 .It Ic textdump dump
 .It Ic textdump set
 .It Ic textdump status
 .It Ic textdump unset
 Use the
 .Ic textdump dump
 command to immediately perform a textdump.
 More information may be found in
 .Xr textdump 4 .
 The
 .Ic textdump set
 command may be used to force the next kernel core dump to be a textdump
 rather than a traditional memory dump or minidump.
 .Ic textdump status
 reports whether a textdump has been scheduled.
 .Ic textdump unset
 cancels a request to perform a textdump as the next kernel core dump.
 .El
 .Sh VARIABLES
 The debugger accesses registers and variables as
 .Li $ Ns Ar name .
 Register names are as in the
 .Dq Ic show Cm registers
 command.
 Some variables are suffixed with numbers, and may have some modifier
 following a colon immediately after the variable name.
 For example, register variables can have a
 .Cm u
 modifier to indicate user register (e.g.,
 .Dq Li $eax:u ) .
 .Pp
 Built-in variables currently supported are:
 .Pp
 .Bl -tag -width ".Va tabstops" -compact
 .It Va radix
 Input and output radix.
 .It Va maxoff
 Addresses are printed as
 .Dq Ar symbol Ns Li + Ns Ar offset
 unless
 .Ar offset
 is greater than
 .Va maxoff .
 .It Va maxwidth
 The width of the displayed line.
 .It Va lines
 The number of lines.
 It is used by the built-in pager.
 .It Va tabstops
 Tab stop width.
 .It Va work Ns Ar xx
 Work variable;
 .Ar xx
 can take values from 0 to 31.
 .El
 .Sh EXPRESSIONS
 Most expression operators in C are supported except
 .Ql ~ ,
 .Ql ^ ,
 and unary
 .Ql & .
 Special rules in
 .Nm
 are:
 .Bl -tag -width ".No Identifiers"
 .It Identifiers
 The name of a symbol is translated to the value of the symbol, which
 is the address of the corresponding object.
 .Ql \&.
 and
 .Ql \&:
 can be used in the identifier.
 If supported by an object format dependent routine,
 .Sm off
 .Oo Ar filename : Oc Ar func : lineno ,
 .Sm on
 .Oo Ar filename : Oc Ns Ar variable ,
 and
 .Oo Ar filename : Oc Ns Ar lineno
 can be accepted as a symbol.
 .It Numbers
 Radix is determined by the first two letters:
 .Ql 0x :
 hex,
 .Ql 0o :
 octal,
 .Ql 0t :
 decimal; otherwise, follow current radix.
 .It Li \&.
 .Va dot
 .It Li +
 .Va next
 .It Li ..
 address of the start of the last line examined.
 Unlike
 .Va dot
 or
 .Va next ,
 this is only changed by
 .Ic examine
 or
 .Ic write
 command.
 .It Li '
 last address explicitly specified.
 .It Li $ Ns Ar variable
 Translated to the value of the specified variable.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .It Ar a Ns Li # Ns Ar b
 A binary operator which rounds up the left hand side to the next
 multiple of right hand side.
 .It Li * Ns Ar expr
 Indirection.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .El
 .Sh SCRIPTING
 .Nm
 supports a basic scripting facility to allow automating tasks or responses to
 specific events.
 Each script consists of a list of DDB commands to be executed sequentially,
 and is assigned a unique name.
 Certain script names have special meaning, and will be automatically run on
 various
 .Nm
 events if scripts by those names have been defined.
 .Pp
 The
 .Ic script
 command may be used to define a script by name.
 Scripts consist of a series of
 .Nm
 commands separated with the
 .Ql \&;
 character.
 For example:
 .Bd -literal -offset indent
 script kdb.enter.panic=bt; show pcpu
 script lockinfo=show alllocks; show lockedvnods
 .Ed
 .Pp
 The
 .Ic scripts
 command lists currently defined scripts.
 .Pp
 The
 .Ic run
 command execute a script by name.
 For example:
 .Bd -literal -offset indent
 run lockinfo
 .Ed
 .Pp
 The
 .Ic unscript
 command may be used to delete a script by name.
 For example:
 .Bd -literal -offset indent
 unscript kdb.enter.panic
 .Ed
 .Pp
 These functions may also be performed from userspace using the
 .Xr ddb 8
 command.
 .Pp
 Certain scripts are run automatically, if defined, for specific
 .Nm
 events.
 The follow scripts are run when various events occur:
 .Bl -tag -width kdb.enter.powerfail
 .It Dv kdb.enter.acpi
 The kernel debugger was entered as a result of an
 .Xr acpi 4
 event.
 .It Dv kdb.enter.bootflags
 The kernel debugger was entered at boot as a result of the debugger boot
 flag being set.
 .It Dv kdb.enter.break
 The kernel debugger was entered as a result of a serial or console break.
 .It Dv kdb.enter.cam
 The kernel debugger was entered as a result of a
 .Xr CAM 4
 event.
 .It Dv kdb.enter.mac
 The kernel debugger was entered as a result of an assertion failure in the
 .Xr mac_test 4
 module of the
 TrustedBSD MAC Framework.
 .It Dv kdb.enter.ndis
 The kernel debugger was entered as a result of an
 .Xr ndis 4
 breakpoint event.
 .It Dv kdb.enter.netgraph
 The kernel debugger was entered as a result of a
 .Xr netgraph 4
 event.
 .It Dv kdb.enter.panic
 .Xr panic 9
 was called.
 .It Dv kdb.enter.powerfail
 The kernel debugger was entered as a result of a powerfail NMI on the sparc64
 platform.
 .It Dv kdb.enter.powerpc
 The kernel debugger was entered as a result of an unimplemented interrupt
 type on the powerpc platform.
 .It Dv kdb.enter.sysctl
 The kernel debugger was entered as a result of the
 .Dv debug.kdb.enter
 sysctl being set.
 .It Dv kdb.enter.trapsig
 The kernel debugger was entered as a result of a trapsig event on the sparc64
 platform.
 .It Dv kdb.enter.unionfs
 The kernel debugger was entered as a result of an assertion failure in the
 union file system.
 .It Dv kdb.enter.unknown
 The kernel debugger was entered, but no reason has been set.
 .It Dv kdb.enter.vfslock
 The kernel debugger was entered as a result of a VFS lock violation.
 .It Dv kdb.enter.watchdog
 The kernel debugger was entered as a result of a watchdog firing.
 .It Dv kdb.enter.witness
 The kernel debugger was entered as a result of a
 .Xr witness 4
 violation.
 .El
 .Pp
 In the event that none of these scripts is found,
 .Nm
 will attempt to execute a default script:
 .Bl -tag -width kdb.enter.powerfail
 .It Dv kdb.enter.default
 The kernel debugger was entered, but a script exactly matching the reason for
 entering was not defined.
 This can be used as a catch-all to handle cases not specifically of interest;
 for example,
 .Dv kdb.enter.witness
 might be defined to have special handling, and
 .Dv kdb.enter.default
 might be defined to simply panic and reboot.
 .El
 .Sh HINTS
 On machines with an ISA expansion bus, a simple NMI generation card can be
 constructed by connecting a push button between the A01 and B01 (CHCHK# and
 GND) card fingers.
 Momentarily shorting these two fingers together may cause the bridge chipset to
 generate an NMI, which causes the kernel to pass control to
 .Nm .
 Some bridge chipsets do not generate a NMI on CHCHK#, so your mileage may vary.
 The NMI allows one to break into the debugger on a wedged machine to
 diagnose problems.
 Other bus' bridge chipsets may be able to generate NMI using bus specific
 methods.
 There are many PCI and PCIe add-in cards which can generate NMI for
 debugging.
 Modern server systems typically use IPMI to generate signals to enter the
 debugger.
 The
 .Dv devel/ipmitool
 port can be used to send the
 .Cd chassis power diag
 command which delivers an NMI to the processor.
 Embedded systems often use JTAG for debugging, but rarely use it in
 combination with
 .Nm .
 .Pp
 For serial consoles, you can enter the debugger by sending a BREAK
 condition on the serial line if
 .Cd options BREAK_TO_DEBUGGER
 is specified in the kernel.
 Most terminal emulation programs can send a break sequence with a
 special key sequence or via a menu item.
 However, in some setups, sending the break can be difficult to arrange
 or happens spuriously, so if the kernel contains
 .Cd options ALT_BREAK_TO_DEBUGGER
 then the sequence of CR TILDE CTRL-B enters the debugger;
 CR TILDE CTRL-P causes a panic instead of entering the
 debugger; and
 CR TILDE CTRL-R causes an immediate reboot.
 In all the above sequences, CR is a Carriage Return and is usually
 sent by hitting the Enter or Return key.
 TILDE is the ASCII tilde character (~).
 CTRL-x is Control x created by hitting the control key and then x
 and then releasing both.
 .Pp
 The break to enter the debugger behavior may be enabled at run-time
 by setting the
 .Xr sysctl 8
 .Dv debug.kdb.break_to_debugger
 to 1.
 The alternate sequence to enter the debugger behavior may be enabled
 at run-time by setting the
 .Xr sysctl 8
 .Dv debug.kdb.alt_break_to_debugger
 to 1.
 The debugger may be entered by setting the
 .Xr sysctl 8
 .Dv debug.kdb.enter
 to 1.
 .Sh FILES
 Header files mentioned in this manual page can be found below
 .Pa /usr/include
 directory.
 .Pp
 .Bl -dash -compact
 .It
 .Pa sys/buf.h
 .It
 .Pa sys/domain.h
 .It
 .Pa netinet/in_pcb.h
 .It
 .Pa sys/socket.h
 .It
 .Pa sys/vnode.h
 .El
 .Sh SEE ALSO
 .Xr gdb 1 ,
 .Xr kgdb 1 ,
 .Xr acpi 4 ,
 .Xr CAM 4 ,
 .Xr mac_test 4 ,
 .Xr ndis 4 ,
 .Xr netgraph 4 ,
 .Xr textdump 4 ,
 .Xr witness 4 ,
 .Xr ddb 8 ,
 .Xr sysctl 8 ,
 .Xr panic 9
 .Sh HISTORY
 The
 .Nm
 debugger was developed for Mach, and ported to
 .Bx 386 0.1 .
 This manual page translated from
 .Xr man 7
 macros by
 .An Garrett Wollman .
 .Pp
 .An Robert N. M. Watson
 added support for
 .Nm
 output capture,
 .Xr textdump 4
 and scripting in
 .Fx 7.1 .
Index: projects/vnet/sys/dev/acpica/acpi_thermal.c
===================================================================
--- projects/vnet/sys/dev/acpica/acpi_thermal.c	(revision 301522)
+++ projects/vnet/sys/dev/acpica/acpi_thermal.c	(revision 301523)
@@ -1,1214 +1,1223 @@
 /*-
  * Copyright (c) 2000, 2001 Michael Smith
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/power.h>
 
 #include "cpufreq_if.h"
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 
 #include <dev/acpica/acpivar.h>
 
 /* Hooks for the ACPI CA debugging infrastructure */
 #define _COMPONENT	ACPI_THERMAL
 ACPI_MODULE_NAME("THERMAL")
 
 #define TZ_ZEROC	2731
 #define TZ_KELVTOC(x)	(((x) - TZ_ZEROC) / 10), abs(((x) - TZ_ZEROC) % 10)
 
 #define TZ_NOTIFY_TEMPERATURE	0x80 /* Temperature changed. */
 #define TZ_NOTIFY_LEVELS	0x81 /* Cooling levels changed. */
 #define TZ_NOTIFY_DEVICES	0x82 /* Device lists changed. */
 #define TZ_NOTIFY_CRITICAL	0xcc /* Fake notify that _CRT/_HOT reached. */
 
 /* Check for temperature changes every 10 seconds by default */
 #define TZ_POLLRATE	10
 
 /* Make sure the reported temperature is valid for this number of polls. */
 #define TZ_VALIDCHECKS	3
 
 /* Notify the user we will be shutting down in one more poll cycle. */
 #define TZ_NOTIFYCOUNT	(TZ_VALIDCHECKS - 1)
 
 /* ACPI spec defines this */
 #define TZ_NUMLEVELS	10
 struct acpi_tz_zone {
     int		ac[TZ_NUMLEVELS];
     ACPI_BUFFER	al[TZ_NUMLEVELS];
     int		crt;
     int		hot;
     ACPI_BUFFER	psl;
     int		psv;
     int		tc1;
     int		tc2;
     int		tsp;
     int		tzp;
 };
 
 struct acpi_tz_softc {
     device_t			tz_dev;
     ACPI_HANDLE			tz_handle;	/*Thermal zone handle*/
     int				tz_temperature;	/*Current temperature*/
     int				tz_active;	/*Current active cooling*/
 #define TZ_ACTIVE_NONE		-1
 #define TZ_ACTIVE_UNKNOWN	-2
     int				tz_requested;	/*Minimum active cooling*/
     int				tz_thflags;	/*Current temp-related flags*/
 #define TZ_THFLAG_NONE		0
 #define TZ_THFLAG_PSV		(1<<0)
 #define TZ_THFLAG_HOT		(1<<2)
 #define TZ_THFLAG_CRT		(1<<3)
     int				tz_flags;
 #define TZ_FLAG_NO_SCP		(1<<0)		/*No _SCP method*/
 #define TZ_FLAG_GETPROFILE	(1<<1)		/*Get power_profile in timeout*/
 #define TZ_FLAG_GETSETTINGS	(1<<2)		/*Get devs/setpoints*/
     struct timespec		tz_cooling_started;
 					/*Current cooling starting time*/
 
     struct sysctl_ctx_list	tz_sysctl_ctx;
     struct sysctl_oid		*tz_sysctl_tree;
     eventhandler_tag		tz_event;
 
     struct acpi_tz_zone 	tz_zone;	/*Thermal zone parameters*/
     int				tz_validchecks;
     int				tz_insane_tmp_notified;
 
     /* passive cooling */
     struct proc			*tz_cooling_proc;
     int				tz_cooling_proc_running;
     int				tz_cooling_enabled;
     int				tz_cooling_active;
     int				tz_cooling_updated;
     int				tz_cooling_saved_freq;
 };
 
 #define	TZ_ACTIVE_LEVEL(act)	((act) >= 0 ? (act) : TZ_NUMLEVELS)
 
 #define CPUFREQ_MAX_LEVELS	64 /* XXX cpufreq should export this */
 
 static int	acpi_tz_probe(device_t dev);
 static int	acpi_tz_attach(device_t dev);
 static int	acpi_tz_establish(struct acpi_tz_softc *sc);
 static void	acpi_tz_monitor(void *Context);
 static void	acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg);
 static void	acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg);
 static void	acpi_tz_getparam(struct acpi_tz_softc *sc, char *node,
 				 int *data);
 static void	acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what);
 static int	acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS);
 static void	acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify,
 				       void *context);
 static void	acpi_tz_signal(struct acpi_tz_softc *sc, int flags);
 static void	acpi_tz_timeout(struct acpi_tz_softc *sc, int flags);
 static void	acpi_tz_power_profile(void *arg);
 static void	acpi_tz_thread(void *arg);
 static int	acpi_tz_cooling_is_available(struct acpi_tz_softc *sc);
 static int	acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc);
 
 static device_method_t acpi_tz_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,	acpi_tz_probe),
     DEVMETHOD(device_attach,	acpi_tz_attach),
 
     DEVMETHOD_END
 };
 
 static driver_t acpi_tz_driver = {
     "acpi_tz",
     acpi_tz_methods,
     sizeof(struct acpi_tz_softc),
 };
 
 static char *acpi_tz_tmp_name = "_TMP";
 
 static devclass_t acpi_tz_devclass;
 DRIVER_MODULE(acpi_tz, acpi, acpi_tz_driver, acpi_tz_devclass, 0, 0);
 MODULE_DEPEND(acpi_tz, acpi, 1, 1, 1);
 
 static struct sysctl_ctx_list	acpi_tz_sysctl_ctx;
 static struct sysctl_oid	*acpi_tz_sysctl_tree;
 
 /* Minimum cooling run time */
 static int			acpi_tz_min_runtime;
 static int			acpi_tz_polling_rate = TZ_POLLRATE;
 static int			acpi_tz_override;
 
 /* Timezone polling thread */
 static struct proc		*acpi_tz_proc;
 ACPI_LOCK_DECL(thermal, "ACPI thermal zone");
 
 static int			acpi_tz_cooling_unit = -1;
 
 static int
 acpi_tz_probe(device_t dev)
 {
     int		result;
 
     if (acpi_get_type(dev) == ACPI_TYPE_THERMAL && !acpi_disabled("thermal")) {
 	device_set_desc(dev, "Thermal Zone");
 	result = -10;
     } else
 	result = ENXIO;
     return (result);
 }
 
 static int
 acpi_tz_attach(device_t dev)
 {
     struct acpi_tz_softc	*sc;
     struct acpi_softc		*acpi_sc;
     int				error;
     char			oidname[8];
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = device_get_softc(dev);
     sc->tz_dev = dev;
     sc->tz_handle = acpi_get_handle(dev);
     sc->tz_requested = TZ_ACTIVE_NONE;
     sc->tz_active = TZ_ACTIVE_UNKNOWN;
     sc->tz_thflags = TZ_THFLAG_NONE;
     sc->tz_cooling_proc = NULL;
     sc->tz_cooling_proc_running = FALSE;
     sc->tz_cooling_active = FALSE;
     sc->tz_cooling_updated = FALSE;
     sc->tz_cooling_enabled = FALSE;
 
     /*
      * Parse the current state of the thermal zone and build control
      * structures.  We don't need to worry about interference with the
      * control thread since we haven't fully attached this device yet.
      */
     if ((error = acpi_tz_establish(sc)) != 0)
 	return (error);
 
     /*
      * Register for any Notify events sent to this zone.
      */
     AcpiInstallNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY,
 			     acpi_tz_notify_handler, sc);
 
     /*
      * Create our sysctl nodes.
      *
      * XXX we need a mechanism for adding nodes under ACPI.
      */
     if (device_get_unit(dev) == 0) {
 	acpi_sc = acpi_device_get_parent_softc(dev);
 	sysctl_ctx_init(&acpi_tz_sysctl_ctx);
 	acpi_tz_sysctl_tree = SYSCTL_ADD_NODE(&acpi_tz_sysctl_ctx,
 			      SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree),
 			      OID_AUTO, "thermal", CTLFLAG_RD, 0, "");
 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree),
 		       OID_AUTO, "min_runtime", CTLFLAG_RW,
 		       &acpi_tz_min_runtime, 0,
 		       "minimum cooling run time in sec");
 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree),
 		       OID_AUTO, "polling_rate", CTLFLAG_RW,
 		       &acpi_tz_polling_rate, 0, "monitor polling interval in seconds");
 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO,
 		       "user_override", CTLFLAG_RW, &acpi_tz_override, 0,
 		       "allow override of thermal settings");
     }
     sysctl_ctx_init(&sc->tz_sysctl_ctx);
     sprintf(oidname, "tz%d", device_get_unit(dev));
     sc->tz_sysctl_tree = SYSCTL_ADD_NODE(&sc->tz_sysctl_ctx,
 					 SYSCTL_CHILDREN(acpi_tz_sysctl_tree),
 					 OID_AUTO, oidname, CTLFLAG_RD, 0, "");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD,
 		    &sc->tz_temperature, 0, sysctl_handle_int,
 		    "IK", "current thermal zone temperature");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "active", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, 0, acpi_tz_active_sysctl, "I", "cooling is active");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "passive_cooling", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, 0, acpi_tz_cooling_sysctl, "I",
 		    "enable passive (speed reduction) cooling");
 
     SYSCTL_ADD_INT(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		   OID_AUTO, "thermal_flags", CTLFLAG_RD,
 		   &sc->tz_thflags, 0, "thermal zone flags");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_PSV", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.psv),
 		    acpi_tz_temp_sysctl, "IK", "passive cooling temp setpoint");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_HOT", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.hot),
 		    acpi_tz_temp_sysctl, "IK",
 		    "too hot temp setpoint (suspend now)");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_CRT", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.crt),
 		    acpi_tz_temp_sysctl, "IK",
 		    "critical temp setpoint (shutdown now)");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_ACx", CTLTYPE_INT | CTLFLAG_RD,
 		    &sc->tz_zone.ac, sizeof(sc->tz_zone.ac),
 		    sysctl_handle_opaque, "IK", "");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_TC1", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.tc1),
 		    acpi_tz_passive_sysctl, "I",
 		    "thermal constant 1 for passive cooling");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_TC2", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.tc2),
 		    acpi_tz_passive_sysctl, "I",
 		    "thermal constant 2 for passive cooling");
     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
 		    OID_AUTO, "_TSP", CTLTYPE_INT | CTLFLAG_RW,
 		    sc, offsetof(struct acpi_tz_softc, tz_zone.tsp),
 		    acpi_tz_passive_sysctl, "I",
 		    "thermal sampling period for passive cooling");
 
     /*
-     * Create thread to service all of the thermal zones.  Register
-     * our power profile event handler.
+     * Register our power profile event handler.
      */
     sc->tz_event = EVENTHANDLER_REGISTER(power_profile_change,
 	acpi_tz_power_profile, sc, 0);
-    if (acpi_tz_proc == NULL) {
-	error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc,
-	    RFHIGHPID, 0, "acpi_thermal");
-	if (error != 0) {
-	    device_printf(sc->tz_dev, "could not create thread - %d", error);
-	    goto out;
-	}
-    }
 
     /*
+     * Flag the event handler for a manual invocation by our timeout.
+     * We defer it like this so that the rest of the subsystem has time
+     * to come up.  Don't bother evaluating/printing the temperature at
+     * this point; on many systems it'll be bogus until the EC is running.
+     */
+    sc->tz_flags |= TZ_FLAG_GETPROFILE;
+
+    return_VALUE (0);
+}
+
+static void
+acpi_tz_startup(void *arg __unused)
+{
+    struct acpi_tz_softc *sc;
+    device_t *devs;
+    int devcount, error, i;
+
+    devclass_get_devices(acpi_tz_devclass, &devs, &devcount);
+    if (devcount == 0)
+	return;
+
+    /*
+     * Create thread to service all of the thermal zones.
+     */
+    error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, RFHIGHPID, 0,
+	"acpi_thermal");
+    if (error != 0)
+	printf("acpi_tz: could not create thread - %d", error);
+
+    /*
      * Create a thread to handle passive cooling for 1st zone which
      * has _PSV, _TSP, _TC1 and _TC2.  Users can enable it for other
      * zones manually for now.
      *
      * XXX We enable only one zone to avoid multiple zones conflict
      * with each other since cpufreq currently sets all CPUs to the
      * given frequency whereas it's possible for different thermal
      * zones to specify independent settings for multiple CPUs.
      */
-    if (acpi_tz_cooling_unit < 0 && acpi_tz_cooling_is_available(sc))
-	sc->tz_cooling_enabled = TRUE;
-    if (sc->tz_cooling_enabled) {
-	error = acpi_tz_cooling_thread_start(sc);
-	if (error != 0) {
-	    sc->tz_cooling_enabled = FALSE;
-	    goto out;
+    for (i = 0; i < devcount; i++) {
+	sc = device_get_softc(devs[i]);
+	if (acpi_tz_cooling_is_available(sc)) {
+	    sc->tz_cooling_enabled = TRUE;
+	    error = acpi_tz_cooling_thread_start(sc);
+	    if (error != 0) {
+		sc->tz_cooling_enabled = FALSE;
+		break;
+	    }
+	    acpi_tz_cooling_unit = device_get_unit(devs[i]);
+	    break;
 	}
-	acpi_tz_cooling_unit = device_get_unit(dev);
     }
-
-    /*
-     * Flag the event handler for a manual invocation by our timeout.
-     * We defer it like this so that the rest of the subsystem has time
-     * to come up.  Don't bother evaluating/printing the temperature at
-     * this point; on many systems it'll be bogus until the EC is running.
-     */
-    sc->tz_flags |= TZ_FLAG_GETPROFILE;
-
-out:
-    if (error != 0) {
-	EVENTHANDLER_DEREGISTER(power_profile_change, sc->tz_event);
-	AcpiRemoveNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY,
-	    acpi_tz_notify_handler);
-	sysctl_ctx_free(&sc->tz_sysctl_ctx);
-    }
-    return_VALUE (error);
+    free(devs, M_TEMP);
 }
+SYSINIT(acpi_tz, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, acpi_tz_startup, NULL);
 
 /*
  * Parse the current state of this thermal zone and set up to use it.
  *
  * Note that we may have previous state, which will have to be discarded.
  */
 static int
 acpi_tz_establish(struct acpi_tz_softc *sc)
 {
     ACPI_OBJECT	*obj;
     int		i;
     char	nbuf[8];
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Erase any existing state. */
     for (i = 0; i < TZ_NUMLEVELS; i++)
 	if (sc->tz_zone.al[i].Pointer != NULL)
 	    AcpiOsFree(sc->tz_zone.al[i].Pointer);
     if (sc->tz_zone.psl.Pointer != NULL)
 	AcpiOsFree(sc->tz_zone.psl.Pointer);
 
     /*
      * XXX: We initialize only ACPI_BUFFER to avoid race condition
      * with passive cooling thread which refers psv, tc1, tc2 and tsp.
      */
     bzero(sc->tz_zone.ac, sizeof(sc->tz_zone.ac));
     bzero(sc->tz_zone.al, sizeof(sc->tz_zone.al));
     bzero(&sc->tz_zone.psl, sizeof(sc->tz_zone.psl));
 
     /* Evaluate thermal zone parameters. */
     for (i = 0; i < TZ_NUMLEVELS; i++) {
 	sprintf(nbuf, "_AC%d", i);
 	acpi_tz_getparam(sc, nbuf, &sc->tz_zone.ac[i]);
 	sprintf(nbuf, "_AL%d", i);
 	sc->tz_zone.al[i].Length = ACPI_ALLOCATE_BUFFER;
 	sc->tz_zone.al[i].Pointer = NULL;
 	AcpiEvaluateObject(sc->tz_handle, nbuf, NULL, &sc->tz_zone.al[i]);
 	obj = (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer;
 	if (obj != NULL) {
 	    /* Should be a package containing a list of power objects */
 	    if (obj->Type != ACPI_TYPE_PACKAGE) {
 		device_printf(sc->tz_dev, "%s has unknown type %d, rejecting\n",
 			      nbuf, obj->Type);
 		return_VALUE (ENXIO);
 	    }
 	}
     }
     acpi_tz_getparam(sc, "_CRT", &sc->tz_zone.crt);
     acpi_tz_getparam(sc, "_HOT", &sc->tz_zone.hot);
     sc->tz_zone.psl.Length = ACPI_ALLOCATE_BUFFER;
     sc->tz_zone.psl.Pointer = NULL;
     AcpiEvaluateObject(sc->tz_handle, "_PSL", NULL, &sc->tz_zone.psl);
     acpi_tz_getparam(sc, "_PSV", &sc->tz_zone.psv);
     acpi_tz_getparam(sc, "_TC1", &sc->tz_zone.tc1);
     acpi_tz_getparam(sc, "_TC2", &sc->tz_zone.tc2);
     acpi_tz_getparam(sc, "_TSP", &sc->tz_zone.tsp);
     acpi_tz_getparam(sc, "_TZP", &sc->tz_zone.tzp);
 
     /*
      * Sanity-check the values we've been given.
      *
      * XXX what do we do about systems that give us the same value for
      *     more than one of these setpoints?
      */
     acpi_tz_sanity(sc, &sc->tz_zone.crt, "_CRT");
     acpi_tz_sanity(sc, &sc->tz_zone.hot, "_HOT");
     acpi_tz_sanity(sc, &sc->tz_zone.psv, "_PSV");
     for (i = 0; i < TZ_NUMLEVELS; i++)
 	acpi_tz_sanity(sc, &sc->tz_zone.ac[i], "_ACx");
 
     return_VALUE (0);
 }
 
 static char *aclevel_string[] = {
     "NONE", "_AC0", "_AC1", "_AC2", "_AC3", "_AC4",
     "_AC5", "_AC6", "_AC7", "_AC8", "_AC9"
 };
 
 static __inline const char *
 acpi_tz_aclevel_string(int active)
 {
     if (active < -1 || active >= TZ_NUMLEVELS)
 	return (aclevel_string[0]);
 
     return (aclevel_string[active + 1]);
 }
 
 /*
  * Get the current temperature.
  */
 static int
 acpi_tz_get_temperature(struct acpi_tz_softc *sc)
 {
     int		temp;
     ACPI_STATUS	status;
 
     ACPI_FUNCTION_NAME ("acpi_tz_get_temperature");
 
     /* Evaluate the thermal zone's _TMP method. */
     status = acpi_GetInteger(sc->tz_handle, acpi_tz_tmp_name, &temp);
     if (ACPI_FAILURE(status)) {
 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 	    "error fetching current temperature -- %s\n",
 	     AcpiFormatException(status));
 	return (FALSE);
     }
 
     /* Check it for validity. */
     acpi_tz_sanity(sc, &temp, acpi_tz_tmp_name);
     if (temp == -1)
 	return (FALSE);
 
     ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "got %d.%dC\n", TZ_KELVTOC(temp)));
     sc->tz_temperature = temp;
     return (TRUE);
 }
 
 /*
  * Evaluate the condition of a thermal zone, take appropriate actions.
  */
 static void
 acpi_tz_monitor(void *Context)
 {
     struct acpi_tz_softc *sc;
     struct	timespec curtime;
     int		temp;
     int		i;
     int		newactive, newflags;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = (struct acpi_tz_softc *)Context;
 
     /* Get the current temperature. */
     if (!acpi_tz_get_temperature(sc)) {
 	/* XXX disable zone? go to max cooling? */
 	return_VOID;
     }
     temp = sc->tz_temperature;
 
     /*
      * Work out what we ought to be doing right now.
      *
      * Note that the _ACx levels sort from hot to cold.
      */
     newactive = TZ_ACTIVE_NONE;
     for (i = TZ_NUMLEVELS - 1; i >= 0; i--) {
 	if (sc->tz_zone.ac[i] != -1 && temp >= sc->tz_zone.ac[i])
 	    newactive = i;
     }
 
     /*
      * We are going to get _ACx level down (colder side), but give a guaranteed
      * minimum cooling run time if requested.
      */
     if (acpi_tz_min_runtime > 0 && sc->tz_active != TZ_ACTIVE_NONE &&
 	sc->tz_active != TZ_ACTIVE_UNKNOWN &&
 	(newactive == TZ_ACTIVE_NONE || newactive > sc->tz_active)) {
 
 	getnanotime(&curtime);
 	timespecsub(&curtime, &sc->tz_cooling_started);
 	if (curtime.tv_sec < acpi_tz_min_runtime)
 	    newactive = sc->tz_active;
     }
 
     /* Handle user override of active mode */
     if (sc->tz_requested != TZ_ACTIVE_NONE && (newactive == TZ_ACTIVE_NONE
         || sc->tz_requested < newactive))
 	newactive = sc->tz_requested;
 
     /* update temperature-related flags */
     newflags = TZ_THFLAG_NONE;
     if (sc->tz_zone.psv != -1 && temp >= sc->tz_zone.psv)
 	newflags |= TZ_THFLAG_PSV;
     if (sc->tz_zone.hot != -1 && temp >= sc->tz_zone.hot)
 	newflags |= TZ_THFLAG_HOT;
     if (sc->tz_zone.crt != -1 && temp >= sc->tz_zone.crt)
 	newflags |= TZ_THFLAG_CRT;
 
     /* If the active cooling state has changed, we have to switch things. */
     if (sc->tz_active == TZ_ACTIVE_UNKNOWN) {
 	/*
 	 * We don't know which cooling device is on or off,
 	 * so stop them all, because we now know which
 	 * should be on (if any).
 	 */
 	for (i = 0; i < TZ_NUMLEVELS; i++) {
 	    if (sc->tz_zone.al[i].Pointer != NULL) {
 		acpi_ForeachPackageObject(
 		    (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
 		    acpi_tz_switch_cooler_off, sc);
 	    }
 	}
 	/* now we know that all devices are off */
 	sc->tz_active = TZ_ACTIVE_NONE;
     }
 
     if (newactive != sc->tz_active) {
 	/* Turn off unneeded cooling devices that are on, if any are */
 	for (i = TZ_ACTIVE_LEVEL(sc->tz_active);
 	     i < TZ_ACTIVE_LEVEL(newactive); i++) {
 	    acpi_ForeachPackageObject(
 		(ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
 		acpi_tz_switch_cooler_off, sc);
 	}
 	/* Turn on cooling devices that are required, if any are */
 	for (i = TZ_ACTIVE_LEVEL(sc->tz_active) - 1;
 	     i >= TZ_ACTIVE_LEVEL(newactive); i--) {
 	    acpi_ForeachPackageObject(
 		(ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
 		acpi_tz_switch_cooler_on, sc);
 	}
 
 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 		    "switched from %s to %s: %d.%dC\n",
 		    acpi_tz_aclevel_string(sc->tz_active),
 		    acpi_tz_aclevel_string(newactive), TZ_KELVTOC(temp));
 	sc->tz_active = newactive;
 	getnanotime(&sc->tz_cooling_started);
     }
 
     /* XXX (de)activate any passive cooling that may be required. */
 
     /*
      * If the temperature is at _HOT or _CRT, increment our event count.
      * If it has occurred enough times, shutdown the system.  This is
      * needed because some systems will report an invalid high temperature
      * for one poll cycle.  It is suspected this is due to the embedded
      * controller timing out.  A typical value is 138C for one cycle on
      * a system that is otherwise 65C.
      *
      * If we're almost at that threshold, notify the user through devd(8).
      */
     if ((newflags & (TZ_THFLAG_HOT | TZ_THFLAG_CRT)) != 0) {
 	sc->tz_validchecks++;
 	if (sc->tz_validchecks == TZ_VALIDCHECKS) {
 	    device_printf(sc->tz_dev,
 		"WARNING - current temperature (%d.%dC) exceeds safe limits\n",
 		TZ_KELVTOC(sc->tz_temperature));
 	    shutdown_nice(RB_POWEROFF);
 	} else if (sc->tz_validchecks == TZ_NOTIFYCOUNT)
 	    acpi_UserNotify("Thermal", sc->tz_handle, TZ_NOTIFY_CRITICAL);
     } else {
 	sc->tz_validchecks = 0;
     }
     sc->tz_thflags = newflags;
 
     return_VOID;
 }
 
 /*
  * Given an object, verify that it's a reference to a device of some sort,
  * and try to switch it off.
  */
 static void
 acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg)
 {
     ACPI_HANDLE			cooler;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     cooler = acpi_GetReference(NULL, obj);
     if (cooler == NULL) {
 	ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n"));
 	return_VOID;
     }
 
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s off\n",
 		     acpi_name(cooler)));
     acpi_pwr_switch_consumer(cooler, ACPI_STATE_D3);
 
     return_VOID;
 }
 
 /*
  * Given an object, verify that it's a reference to a device of some sort,
  * and try to switch it on.
  *
  * XXX replication of off/on function code is bad.
  */
 static void
 acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg)
 {
     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)arg;
     ACPI_HANDLE			cooler;
     ACPI_STATUS			status;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     cooler = acpi_GetReference(NULL, obj);
     if (cooler == NULL) {
 	ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n"));
 	return_VOID;
     }
 
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s on\n",
 		     acpi_name(cooler)));
     status = acpi_pwr_switch_consumer(cooler, ACPI_STATE_D0);
     if (ACPI_FAILURE(status)) {
 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 		    "failed to activate %s - %s\n", acpi_name(cooler),
 		    AcpiFormatException(status));
     }
 
     return_VOID;
 }
 
 /*
  * Read/debug-print a parameter, default it to -1.
  */
 static void
 acpi_tz_getparam(struct acpi_tz_softc *sc, char *node, int *data)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(acpi_GetInteger(sc->tz_handle, node, data))) {
 	*data = -1;
     } else {
 	ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "%s.%s = %d\n",
 			 acpi_name(sc->tz_handle), node, *data));
     }
 
     return_VOID;
 }
 
 /*
  * Sanity-check a temperature value.  Assume that setpoints
  * should be between 0C and 200C.
  */
 static void
 acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what)
 {
     if (*val != -1 && (*val < TZ_ZEROC || *val > TZ_ZEROC + 2000)) {
 	/*
 	 * If the value we are checking is _TMP, warn the user only
 	 * once. This avoids spamming messages if, for instance, the
 	 * sensor is broken and always returns an invalid temperature.
 	 *
 	 * This is only done for _TMP; other values always emit a
 	 * warning.
 	 */
 	if (what != acpi_tz_tmp_name || !sc->tz_insane_tmp_notified) {
 	    device_printf(sc->tz_dev, "%s value is absurd, ignored (%d.%dC)\n",
 			  what, TZ_KELVTOC(*val));
 
 	    /* Don't warn the user again if the read value doesn't improve. */
 	    if (what == acpi_tz_tmp_name)
 		sc->tz_insane_tmp_notified = 1;
 	}
 	*val = -1;
 	return;
     }
 
     /* This value is correct. Warn if it's incorrect again. */
     if (what == acpi_tz_tmp_name)
 	sc->tz_insane_tmp_notified = 0;
 }
 
 /*
  * Respond to a sysctl on the active state node.
  */
 static int
 acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS)
 {
     struct acpi_tz_softc	*sc;
     int				active;
     int		 		error;
 
     sc = (struct acpi_tz_softc *)oidp->oid_arg1;
     active = sc->tz_active;
     error = sysctl_handle_int(oidp, &active, 0, req);
 
     /* Error or no new value */
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (active < -1 || active >= TZ_NUMLEVELS)
 	return (EINVAL);
 
     /* Set new preferred level and re-switch */
     sc->tz_requested = active;
     acpi_tz_signal(sc, 0);
     return (0);
 }
 
 static int
 acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS)
 {
     struct acpi_tz_softc *sc;
     int enabled, error;
 
     sc = (struct acpi_tz_softc *)oidp->oid_arg1;
     enabled = sc->tz_cooling_enabled;
     error = sysctl_handle_int(oidp, &enabled, 0, req);
 
     /* Error or no new value */
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (enabled != TRUE && enabled != FALSE)
 	return (EINVAL);
 
     if (enabled) {
 	if (acpi_tz_cooling_is_available(sc))
 	    error = acpi_tz_cooling_thread_start(sc);
 	else
 	    error = ENODEV;
 	if (error)
 	    enabled = FALSE;
     }
     sc->tz_cooling_enabled = enabled;
     return (error);
 }
 
 static int
 acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS)
 {
     struct acpi_tz_softc	*sc;
     int				temp, *temp_ptr;
     int		 		error;
 
     sc = oidp->oid_arg1;
     temp_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2);
     temp = *temp_ptr;
     error = sysctl_handle_int(oidp, &temp, 0, req);
 
     /* Error or no new value */
     if (error != 0 || req->newptr == NULL)
 	return (error);
 
     /* Only allow changing settings if override is set. */
     if (!acpi_tz_override)
 	return (EPERM);
 
     /* Check user-supplied value for sanity. */
     acpi_tz_sanity(sc, &temp, "user-supplied temp");
     if (temp == -1)
 	return (EINVAL);
 
     *temp_ptr = temp;
     return (0);
 }
 
 static int
 acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS)
 {
     struct acpi_tz_softc	*sc;
     int				val, *val_ptr;
     int				error;
 
     sc = oidp->oid_arg1;
     val_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2);
     val = *val_ptr;
     error = sysctl_handle_int(oidp, &val, 0, req);
 
     /* Error or no new value */
     if (error != 0 || req->newptr == NULL)
 	return (error);
 
     /* Only allow changing settings if override is set. */
     if (!acpi_tz_override)
 	return (EPERM);
 
     *val_ptr = val;
     return (0);
 }
 
 static void
 acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify, void *context)
 {
     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     switch (notify) {
     case TZ_NOTIFY_TEMPERATURE:
 	/* Temperature change occurred */
 	acpi_tz_signal(sc, 0);
 	break;
     case TZ_NOTIFY_DEVICES:
     case TZ_NOTIFY_LEVELS:
 	/* Zone devices/setpoints changed */
 	acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS);
 	break;
     default:
 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 		    "unknown Notify event 0x%x\n", notify);
 	break;
     }
 
     acpi_UserNotify("Thermal", h, notify);
 
     return_VOID;
 }
 
 static void
 acpi_tz_signal(struct acpi_tz_softc *sc, int flags)
 {
     ACPI_LOCK(thermal);
     sc->tz_flags |= flags;
     ACPI_UNLOCK(thermal);
     wakeup(&acpi_tz_proc);
 }
 
 /*
  * Notifies can be generated asynchronously but have also been seen to be
  * triggered by other thermal methods.  One system generates a notify of
  * 0x81 when the fan is turned on or off.  Another generates it when _SCP
  * is called.  To handle these situations, we check the zone via
  * acpi_tz_monitor() before evaluating changes to setpoints or the cooling
  * policy.
  */
 static void
 acpi_tz_timeout(struct acpi_tz_softc *sc, int flags)
 {
 
     /* Check the current temperature and take action based on it */
     acpi_tz_monitor(sc);
 
     /* If requested, get the power profile settings. */
     if (flags & TZ_FLAG_GETPROFILE)
 	acpi_tz_power_profile(sc);
 
     /*
      * If requested, check for new devices/setpoints.  After finding them,
      * check if we need to switch fans based on the new values.
      */
     if (flags & TZ_FLAG_GETSETTINGS) {
 	acpi_tz_establish(sc);
 	acpi_tz_monitor(sc);
     }
 
     /* XXX passive cooling actions? */
 }
 
 /*
  * System power profile may have changed; fetch and notify the
  * thermal zone accordingly.
  *
  * Since this can be called from an arbitrary eventhandler, it needs
  * to get the ACPI lock itself.
  */
 static void
 acpi_tz_power_profile(void *arg)
 {
     ACPI_STATUS			status;
     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)arg;
     int				state;
 
     state = power_profile_get_state();
     if (state != POWER_PROFILE_PERFORMANCE && state != POWER_PROFILE_ECONOMY)
 	return;
 
     /* check that we haven't decided there's no _SCP method */
     if ((sc->tz_flags & TZ_FLAG_NO_SCP) == 0) {
 
 	/* Call _SCP to set the new profile */
 	status = acpi_SetInteger(sc->tz_handle, "_SCP",
 	    (state == POWER_PROFILE_PERFORMANCE) ? 0 : 1);
 	if (ACPI_FAILURE(status)) {
 	    if (status != AE_NOT_FOUND)
 		ACPI_VPRINT(sc->tz_dev,
 			    acpi_device_get_parent_softc(sc->tz_dev),
 			    "can't evaluate %s._SCP - %s\n",
 			    acpi_name(sc->tz_handle),
 			    AcpiFormatException(status));
 	    sc->tz_flags |= TZ_FLAG_NO_SCP;
 	} else {
 	    /* We have to re-evaluate the entire zone now */
 	    acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS);
 	}
     }
 }
 
 /*
  * Thermal zone monitor thread.
  */
 static void
 acpi_tz_thread(void *arg)
 {
     device_t	*devs;
     int		devcount, i;
     int		flags;
     struct acpi_tz_softc **sc;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     devs = NULL;
     devcount = 0;
     sc = NULL;
 
     for (;;) {
 	/* If the number of devices has changed, re-evaluate. */
 	if (devclass_get_count(acpi_tz_devclass) != devcount) {
 	    if (devs != NULL) {
 		free(devs, M_TEMP);
 		free(sc, M_TEMP);
 	    }
 	    devclass_get_devices(acpi_tz_devclass, &devs, &devcount);
 	    sc = malloc(sizeof(struct acpi_tz_softc *) * devcount, M_TEMP,
 			M_WAITOK | M_ZERO);
 	    for (i = 0; i < devcount; i++)
 		sc[i] = device_get_softc(devs[i]);
 	}
 
 	/* Check for temperature events and act on them. */
 	for (i = 0; i < devcount; i++) {
 	    ACPI_LOCK(thermal);
 	    flags = sc[i]->tz_flags;
 	    sc[i]->tz_flags &= TZ_FLAG_NO_SCP;
 	    ACPI_UNLOCK(thermal);
 	    acpi_tz_timeout(sc[i], flags);
 	}
 
 	/* If more work to do, don't go to sleep yet. */
 	ACPI_LOCK(thermal);
 	for (i = 0; i < devcount; i++) {
 	    if (sc[i]->tz_flags & ~TZ_FLAG_NO_SCP)
 		break;
 	}
 
 	/*
 	 * If we have no more work, sleep for a while, setting PDROP so that
 	 * the mutex will not be reacquired.  Otherwise, drop the mutex and
 	 * loop to handle more events.
 	 */
 	if (i == devcount)
 	    msleep(&acpi_tz_proc, &thermal_mutex, PZERO | PDROP, "tzpoll",
 		hz * acpi_tz_polling_rate);
 	else
 	    ACPI_UNLOCK(thermal);
     }
 }
 
 static int
 acpi_tz_cpufreq_restore(struct acpi_tz_softc *sc)
 {
     device_t dev;
     int error;
 
     if (!sc->tz_cooling_updated)
 	return (0);
     if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL)
 	return (ENXIO);
     ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 	"temperature %d.%dC: resuming previous clock speed (%d MHz)\n",
 	TZ_KELVTOC(sc->tz_temperature), sc->tz_cooling_saved_freq);
     error = CPUFREQ_SET(dev, NULL, CPUFREQ_PRIO_KERN);
     if (error == 0)
 	sc->tz_cooling_updated = FALSE;
     return (error);
 }
 
 static int
 acpi_tz_cpufreq_update(struct acpi_tz_softc *sc, int req)
 {
     device_t dev;
     struct cf_level *levels;
     int num_levels, error, freq, desired_freq, perf, i;
 
     levels = malloc(CPUFREQ_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT);
     if (levels == NULL)
 	return (ENOMEM);
 
     /*
      * Find the main device, cpufreq0.  We don't yet support independent
      * CPU frequency control on SMP.
      */
     if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL) {
 	error = ENXIO;
 	goto out;
     }
 
     /* Get the current frequency. */
     error = CPUFREQ_GET(dev, &levels[0]);
     if (error)
 	goto out;
     freq = levels[0].total_set.freq;
 
     /* Get the current available frequency levels. */
     num_levels = CPUFREQ_MAX_LEVELS;
     error = CPUFREQ_LEVELS(dev, levels, &num_levels);
     if (error) {
 	if (error == E2BIG)
 	    printf("cpufreq: need to increase CPUFREQ_MAX_LEVELS\n");
 	goto out;
     }
 
     /* Calculate the desired frequency as a percent of the max frequency. */
     perf = 100 * freq / levels[0].total_set.freq - req;
     if (perf < 0)
 	perf = 0;
     else if (perf > 100)
 	perf = 100;
     desired_freq = levels[0].total_set.freq * perf / 100;
 
     if (desired_freq < freq) {
 	/* Find the closest available frequency, rounding down. */
 	for (i = 0; i < num_levels; i++)
 	    if (levels[i].total_set.freq <= desired_freq)
 		break;
 
 	/* If we didn't find a relevant setting, use the lowest. */
 	if (i == num_levels)
 	    i--;
     } else {
 	/* If we didn't decrease frequency yet, don't increase it. */
 	if (!sc->tz_cooling_updated) {
 	    sc->tz_cooling_active = FALSE;
 	    goto out;
 	}
 
 	/* Use saved cpu frequency as maximum value. */
 	if (desired_freq > sc->tz_cooling_saved_freq)
 	    desired_freq = sc->tz_cooling_saved_freq;
 
 	/* Find the closest available frequency, rounding up. */
 	for (i = num_levels - 1; i >= 0; i--)
 	    if (levels[i].total_set.freq >= desired_freq)
 		break;
 
 	/* If we didn't find a relevant setting, use the highest. */
 	if (i == -1)
 	    i++;
 
 	/* If we're going to the highest frequency, restore the old setting. */
 	if (i == 0 || desired_freq == sc->tz_cooling_saved_freq) {
 	    error = acpi_tz_cpufreq_restore(sc);
 	    if (error == 0)
 		sc->tz_cooling_active = FALSE;
 	    goto out;
 	}
     }
 
     /* If we are going to a new frequency, activate it. */
     if (levels[i].total_set.freq != freq) {
 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
 	    "temperature %d.%dC: %screasing clock speed "
 	    "from %d MHz to %d MHz\n",
 	    TZ_KELVTOC(sc->tz_temperature),
 	    (freq > levels[i].total_set.freq) ? "de" : "in",
 	    freq, levels[i].total_set.freq);
 	error = CPUFREQ_SET(dev, &levels[i], CPUFREQ_PRIO_KERN);
 	if (error == 0 && !sc->tz_cooling_updated) {
 	    sc->tz_cooling_saved_freq = freq;
 	    sc->tz_cooling_updated = TRUE;
 	}
     }
 
 out:
     if (levels)
 	free(levels, M_TEMP);
     return (error);
 }
 
 /*
  * Passive cooling thread; monitors current temperature according to the
  * cooling interval and calculates whether to scale back CPU frequency.
  */
 static void
 acpi_tz_cooling_thread(void *arg)
 {
     struct acpi_tz_softc *sc;
     int error, perf, curr_temp, prev_temp;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = (struct acpi_tz_softc *)arg;
 
     prev_temp = sc->tz_temperature;
     while (sc->tz_cooling_enabled) {
 	if (sc->tz_cooling_active)
 	    (void)acpi_tz_get_temperature(sc);
 	curr_temp = sc->tz_temperature;
 	if (curr_temp >= sc->tz_zone.psv)
 	    sc->tz_cooling_active = TRUE;
 	if (sc->tz_cooling_active) {
 	    perf = sc->tz_zone.tc1 * (curr_temp - prev_temp) +
 		   sc->tz_zone.tc2 * (curr_temp - sc->tz_zone.psv);
 	    perf /= 10;
 
 	    if (perf != 0) {
 		error = acpi_tz_cpufreq_update(sc, perf);
 
 		/*
 		 * If error and not simply a higher priority setting was
 		 * active, disable cooling.
 		 */
 		if (error != 0 && error != EPERM) {
 		    device_printf(sc->tz_dev,
 			"failed to set new freq, disabling passive cooling\n");
 		    sc->tz_cooling_enabled = FALSE;
 		}
 	    }
 	}
 	prev_temp = curr_temp;
 	tsleep(&sc->tz_cooling_proc, PZERO, "cooling",
 	    hz * sc->tz_zone.tsp / 10);
     }
     if (sc->tz_cooling_active) {
 	acpi_tz_cpufreq_restore(sc);
 	sc->tz_cooling_active = FALSE;
     }
     sc->tz_cooling_proc = NULL;
     ACPI_LOCK(thermal);
     sc->tz_cooling_proc_running = FALSE;
     ACPI_UNLOCK(thermal);
     kproc_exit(0);
 }
 
 /*
  * TODO: We ignore _PSL (list of cooling devices) since cpufreq enumerates
  * all CPUs for us.  However, it's possible in the future _PSL will
  * reference non-CPU devices so we may want to support it then.
  */
 static int
 acpi_tz_cooling_is_available(struct acpi_tz_softc *sc)
 {
     return (sc->tz_zone.tc1 != -1 && sc->tz_zone.tc2 != -1 &&
 	sc->tz_zone.tsp != -1 && sc->tz_zone.tsp != 0 &&
 	sc->tz_zone.psv != -1);
 }
 
 static int
 acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc)
 {
     int error;
 
     ACPI_LOCK(thermal);
     if (sc->tz_cooling_proc_running) {
 	ACPI_UNLOCK(thermal);
 	return (0);
     }
     sc->tz_cooling_proc_running = TRUE;
     ACPI_UNLOCK(thermal);
     error = 0;
     if (sc->tz_cooling_proc == NULL) {
 	error = kproc_create(acpi_tz_cooling_thread, sc,
 	    &sc->tz_cooling_proc, RFHIGHPID, 0, "acpi_cooling%d",
 	    device_get_unit(sc->tz_dev));
 	if (error != 0) {
 	    device_printf(sc->tz_dev, "could not create thread - %d", error);
 	    ACPI_LOCK(thermal);
 	    sc->tz_cooling_proc_running = FALSE;
 	    ACPI_UNLOCK(thermal);
 	}
     }
     return (error);
 }
Index: projects/vnet/sys/dev/cxgbe/t4_ioctl.h
===================================================================
--- projects/vnet/sys/dev/cxgbe/t4_ioctl.h	(revision 301522)
+++ projects/vnet/sys/dev/cxgbe/t4_ioctl.h	(revision 301523)
@@ -1,345 +1,347 @@
 /*-
  * Copyright (c) 2011 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #ifndef __T4_IOCTL_H__
 #define __T4_IOCTL_H__
 
 #include <sys/types.h>
 #include <net/ethernet.h>
 
 /*
  * Ioctl commands specific to this driver.
  */
 enum {
 	T4_GETREG = 0x40,		/* read register */
 	T4_SETREG,			/* write register */
 	T4_REGDUMP,			/* dump of all registers */
 	T4_GET_FILTER_MODE,		/* get global filter mode */
 	T4_SET_FILTER_MODE,		/* set global filter mode */
 	T4_GET_FILTER,			/* get information about a filter */
 	T4_SET_FILTER,			/* program a filter */
 	T4_DEL_FILTER,			/* delete a filter */
 	T4_GET_SGE_CONTEXT,		/* get SGE context for a queue */
 	T4_LOAD_FW,			/* flash firmware */
 	T4_GET_MEM,			/* read memory */
 	T4_GET_I2C,			/* read from i2c addressible device */
 	T4_CLEAR_STATS,			/* clear a port's MAC statistics */
 	T4_SET_OFLD_POLICY,		/* Set offload policy */
 	T4_SET_SCHED_CLASS,             /* set sched class */
 	T4_SET_SCHED_QUEUE,             /* set queue class */
 	T4_GET_TRACER,			/* get information about a tracer */
 	T4_SET_TRACER,			/* program a tracer */
 };
 
 struct t4_reg {
 	uint32_t addr;
 	uint32_t size;
 	uint64_t val;
 };
 
 #define T4_REGDUMP_SIZE  (160 * 1024)
 #define T5_REGDUMP_SIZE  (332 * 1024)
 struct t4_regdump {
 	uint32_t version;
 	uint32_t len; /* bytes */
 	uint32_t *data;
 };
 
 struct t4_data {
 	uint32_t len;
 	uint8_t *data;
 };
 
 struct t4_i2c_data {
 	uint8_t port_id;
 	uint8_t dev_addr;
 	uint8_t offset;
 	uint8_t len;
 	uint8_t data[8];
 };
 
 /*
  * A hardware filter is some valid combination of these.
  */
 #define T4_FILTER_IPv4		0x1	/* IPv4 packet */
 #define T4_FILTER_IPv6		0x2	/* IPv6 packet */
 #define T4_FILTER_IP_SADDR	0x4	/* Source IP address or network */
 #define T4_FILTER_IP_DADDR	0x8	/* Destination IP address or network */
 #define T4_FILTER_IP_SPORT	0x10	/* Source IP port */
 #define T4_FILTER_IP_DPORT	0x20	/* Destination IP port */
 #define T4_FILTER_FCoE		0x40	/* Fibre Channel over Ethernet packet */
 #define T4_FILTER_PORT		0x80	/* Physical ingress port */
 #define T4_FILTER_VNIC		0x100	/* VNIC id or outer VLAN */
 #define T4_FILTER_VLAN		0x200	/* VLAN ID */
 #define T4_FILTER_IP_TOS	0x400	/* IPv4 TOS/IPv6 Traffic Class */
 #define T4_FILTER_IP_PROTO	0x800	/* IP protocol */
 #define T4_FILTER_ETH_TYPE	0x1000	/* Ethernet Type */
 #define T4_FILTER_MAC_IDX	0x2000	/* MPS MAC address match index */
 #define T4_FILTER_MPS_HIT_TYPE	0x4000	/* MPS match type */
 #define T4_FILTER_IP_FRAGMENT	0x8000	/* IP fragment */
 
 #define T4_FILTER_IC_VNIC	0x80000000	/* TP Ingress Config's F_VNIC
 						   bit.  It indicates whether
 						   T4_FILTER_VNIC bit means VNIC
 						   id (PF/VF) or outer VLAN.
 						   0 = oVLAN, 1 = VNIC */
 
 /* Filter action */
 enum {
 	FILTER_PASS = 0,	/* default */
 	FILTER_DROP,
 	FILTER_SWITCH
 };
 
 /* 802.1q manipulation on FILTER_SWITCH */
 enum {
 	VLAN_NOCHANGE = 0,	/* default */
 	VLAN_REMOVE,
 	VLAN_INSERT,
 	VLAN_REWRITE
 };
 
 /* MPS match type */
 enum {
 	UCAST_EXACT = 0,       /* exact unicast match */
 	UCAST_HASH  = 1,       /* inexact (hashed) unicast match */
 	MCAST_EXACT = 2,       /* exact multicast match */
 	MCAST_HASH  = 3,       /* inexact (hashed) multicast match */
 	PROMISC     = 4,       /* no match but port is promiscuous */
 	HYPPROMISC  = 5,       /* port is hypervisor-promisuous + not bcast */
 	BCAST       = 6,       /* broadcast packet */
 };
 
 /* Rx steering */
 enum {
 	DST_MODE_QUEUE,        /* queue is directly specified by filter */
 	DST_MODE_RSS_QUEUE,    /* filter specifies RSS entry containing queue */
 	DST_MODE_RSS,          /* queue selected by default RSS hash lookup */
 	DST_MODE_FILT_RSS      /* queue selected by hashing in filter-specified
 				  RSS subtable */
 };
 
 struct t4_filter_tuple {
 	/*
 	 * These are always available.
 	 */
 	uint8_t sip[16];	/* source IP address (IPv4 in [3:0]) */
 	uint8_t dip[16];	/* destinatin IP address (IPv4 in [3:0]) */
 	uint16_t sport;		/* source port */
 	uint16_t dport;		/* destination port */
 
 	/*
 	 * A combination of these (up to 36 bits) is available.  TP_VLAN_PRI_MAP
 	 * is used to select the global mode and all filters are limited to the
 	 * set of fields allowed by the global mode.
 	 */
 	uint16_t vnic;		/* VNIC id (PF/VF) or outer VLAN tag */
 	uint16_t vlan;		/* VLAN tag */
 	uint16_t ethtype;	/* Ethernet type */
 	uint8_t  tos;		/* TOS/Traffic Type */
 	uint8_t  proto;		/* protocol type */
 	uint32_t fcoe:1;	/* FCoE packet */
 	uint32_t iport:3;	/* ingress port */
 	uint32_t matchtype:3;	/* MPS match type */
 	uint32_t frag:1;	/* fragmentation extension header */
 	uint32_t macidx:9;	/* exact match MAC index */
 	uint32_t vlan_vld:1;	/* VLAN valid */
 	uint32_t ovlan_vld:1;	/* outer VLAN tag valid, value in "vnic" */
 	uint32_t pfvf_vld:1;	/* VNIC id (PF/VF) valid, value in "vnic" */
 };
 
 struct t4_filter_specification {
 	uint32_t hitcnts:1;	/* count filter hits in TCB */
 	uint32_t prio:1;	/* filter has priority over active/server */
 	uint32_t type:1;	/* 0 => IPv4, 1 => IPv6 */
 	uint32_t action:2;	/* drop, pass, switch */
 	uint32_t rpttid:1;	/* report TID in RSS hash field */
 	uint32_t dirsteer:1;	/* 0 => RSS, 1 => steer to iq */
 	uint32_t iq:10;		/* ingress queue */
 	uint32_t maskhash:1;	/* dirsteer=0: store RSS hash in TCB */
 	uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */
 				/*             1 => TCB contains IQ ID */
 
 	/*
 	 * Switch proxy/rewrite fields.  An ingress packet which matches a
 	 * filter with "switch" set will be looped back out as an egress
 	 * packet -- potentially with some Ethernet header rewriting.
 	 */
 	uint32_t eport:2;	/* egress port to switch packet out */
 	uint32_t newdmac:1;	/* rewrite destination MAC address */
 	uint32_t newsmac:1;	/* rewrite source MAC address */
 	uint32_t newvlan:2;	/* rewrite VLAN Tag */
 	uint8_t dmac[ETHER_ADDR_LEN];	/* new destination MAC address */
 	uint8_t smac[ETHER_ADDR_LEN];	/* new source MAC address */
 	uint16_t vlan;		/* VLAN Tag to insert */
 
 	/*
 	 * Filter rule value/mask pairs.
 	 */
 	struct t4_filter_tuple val;
 	struct t4_filter_tuple mask;
 };
 
 struct t4_filter {
 	uint32_t idx;
 	uint16_t l2tidx;
 	uint16_t smtidx;
 	uint64_t hits;
 	struct t4_filter_specification fs;
 };
 
+/* Tx Scheduling Class parameters */
+struct t4_sched_class_params {
+	int8_t   level;		/* scheduler hierarchy level */
+	int8_t   mode;		/* per-class or per-flow */
+	int8_t   rateunit;	/* bit or packet rate */
+	int8_t   ratemode;	/* %port relative or kbps absolute */
+	int8_t   channel;	/* scheduler channel [0..N] */
+	int8_t   cl;		/* scheduler class [0..N] */
+	int32_t  minrate;	/* minimum rate */
+	int32_t  maxrate;	/* maximum rate */
+	int16_t  weight;	/* percent weight */
+	int16_t  pktsize;	/* average packet size */
+};
+
 /*
  * Support for "sched-class" command to allow a TX Scheduling Class to be
  * programmed with various parameters.
  */
 struct t4_sched_params {
 	int8_t   subcmd;		/* sub-command */
 	int8_t   type;			/* packet or flow */
 	union {
 		struct {		/* sub-command SCHED_CLASS_CONFIG */
 			int8_t   minmax;	/* minmax enable */
 		} config;
-		struct {		/* sub-command SCHED_CLASS_PARAMS */
-			int8_t   level;		/* scheduler hierarchy level */
-			int8_t   mode;		/* per-class or per-flow */
-			int8_t   rateunit;	/* bit or packet rate */
-			int8_t   ratemode;	/* %port relative or kbps
-						   absolute */
-			int8_t   channel;	/* scheduler channel [0..N] */
-			int8_t   cl;		/* scheduler class [0..N] */
-			int32_t  minrate;	/* minimum rate */
-			int32_t  maxrate;	/* maximum rate */
-			int16_t  weight;	/* percent weight */
-			int16_t  pktsize;	/* average packet size */
-		} params;
+		struct t4_sched_class_params params;
 		uint8_t     reserved[6 + 8 * 8];
 	} u;
 };
 
 enum {
 	SCHED_CLASS_SUBCMD_CONFIG,	/* config sub-command */
 	SCHED_CLASS_SUBCMD_PARAMS,	/* params sub-command */
 };
 
 enum {
 	SCHED_CLASS_TYPE_PACKET,
 };
 
 enum {
 	SCHED_CLASS_LEVEL_CL_RL,	/* class rate limiter */
 	SCHED_CLASS_LEVEL_CL_WRR,	/* class weighted round robin */
 	SCHED_CLASS_LEVEL_CH_RL,	/* channel rate limiter */
 };
 
 enum {
 	SCHED_CLASS_MODE_CLASS,		/* per-class scheduling */
 	SCHED_CLASS_MODE_FLOW,		/* per-flow scheduling */
 };
 
 enum {
 	SCHED_CLASS_RATEUNIT_BITS,	/* bit rate scheduling */
 	SCHED_CLASS_RATEUNIT_PKTS,	/* packet rate scheduling */
 };
 
 enum {
 	SCHED_CLASS_RATEMODE_REL,	/* percent of port bandwidth */
 	SCHED_CLASS_RATEMODE_ABS,	/* Kb/s */
 };
 
 /*
  * Support for "sched_queue" command to allow one or more NIC TX Queues to be
  * bound to a TX Scheduling Class.
  */
 struct t4_sched_queue {
 	uint8_t  port;
 	int8_t   queue;	/* queue index; -1 => all queues */
 	int8_t   cl;	/* class index; -1 => unbind */
 };
 
 #define T4_SGE_CONTEXT_SIZE 24
 enum {
 	SGE_CONTEXT_EGRESS,
 	SGE_CONTEXT_INGRESS,
 	SGE_CONTEXT_FLM,
 	SGE_CONTEXT_CNM
 };
 
 struct t4_sge_context {
 	uint32_t mem_id;
 	uint32_t cid;
 	uint32_t data[T4_SGE_CONTEXT_SIZE / 4];
 };
 
 struct t4_mem_range {
 	uint32_t addr;
 	uint32_t len;
 	uint32_t *data;
 };
 
 #define T4_TRACE_LEN 112
 struct t4_trace_params {
 	uint32_t data[T4_TRACE_LEN / 4];
 	uint32_t mask[T4_TRACE_LEN / 4];
 	uint16_t snap_len;
 	uint16_t min_len;
 	uint8_t skip_ofst;
 	uint8_t skip_len;
 	uint8_t invert;
 	uint8_t port;
 };
 
 struct t4_tracer {
 	uint8_t idx;
 	uint8_t enabled;
 	uint8_t valid;
 	struct t4_trace_params tp;
 };
 
 #define CHELSIO_T4_GETREG	_IOWR('f', T4_GETREG, struct t4_reg)
 #define CHELSIO_T4_SETREG	_IOW('f', T4_SETREG, struct t4_reg)
 #define CHELSIO_T4_REGDUMP	_IOWR('f', T4_REGDUMP, struct t4_regdump)
 #define CHELSIO_T4_GET_FILTER_MODE _IOWR('f', T4_GET_FILTER_MODE, uint32_t)
 #define CHELSIO_T4_SET_FILTER_MODE _IOW('f', T4_SET_FILTER_MODE, uint32_t)
 #define CHELSIO_T4_GET_FILTER	_IOWR('f', T4_GET_FILTER, struct t4_filter)
 #define CHELSIO_T4_SET_FILTER	_IOW('f', T4_SET_FILTER, struct t4_filter)
 #define CHELSIO_T4_DEL_FILTER	_IOW('f', T4_DEL_FILTER, struct t4_filter)
 #define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \
     struct t4_sge_context)
 #define CHELSIO_T4_LOAD_FW	_IOW('f', T4_LOAD_FW, struct t4_data)
 #define CHELSIO_T4_GET_MEM	_IOW('f', T4_GET_MEM, struct t4_mem_range)
 #define CHELSIO_T4_GET_I2C	_IOWR('f', T4_GET_I2C, struct t4_i2c_data)
 #define CHELSIO_T4_CLEAR_STATS	_IOW('f', T4_CLEAR_STATS, uint32_t)
 #define CHELSIO_T4_SCHED_CLASS  _IOW('f', T4_SET_SCHED_CLASS, \
     struct t4_sched_params)
 #define CHELSIO_T4_SCHED_QUEUE  _IOW('f', T4_SET_SCHED_QUEUE, \
     struct t4_sched_queue)
 #define CHELSIO_T4_GET_TRACER	_IOWR('f', T4_GET_TRACER, struct t4_tracer)
 #define CHELSIO_T4_SET_TRACER	_IOW('f', T4_SET_TRACER, struct t4_tracer)
 #endif
Index: projects/vnet/sys/dev/xen/netfront/netfront.c
===================================================================
--- projects/vnet/sys/dev/xen/netfront/netfront.c	(revision 301522)
+++ projects/vnet/sys/dev/xen/netfront/netfront.c	(revision 301523)
@@ -1,2333 +1,2346 @@
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/sockio.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_media.h>
 #include <net/bpf.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <sys/bus.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/gnttab.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
 #include "xenbus_if.h"
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
 #define XN_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
 
 /*
  * Should the driver do LRO on the RX end
  *  this can be toggled on the fly, but the
  *  interface must be reset (down/up) for it
  *  to take effect.
  */
 static int xn_enable_lro = 1;
 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
 
 /*
  * Number of pairs of queues.
  */
 static unsigned long xn_num_queues = 4;
 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues);
 
 /**
  * \brief The maximum allowed data fragments in a single transmit
  *        request.
  *
  * This limit is imposed by the backend driver.  We assume here that
  * we are dealing with a Linux driver domain and have set our limit
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
 
 #define RX_COPY_THRESHOLD 256
 
 #define net_ratelimit() 0
 
 struct netfront_rxq;
 struct netfront_txq;
 struct netfront_info;
 struct netfront_rx_info;
 
 static void xn_txeof(struct netfront_txq *);
 static void xn_rxeof(struct netfront_rxq *);
 static void xn_alloc_rx_buffers(struct netfront_rxq *);
 static void xn_alloc_rx_buffers_callout(void *arg);
 
 static void xn_release_rx_bufs(struct netfront_rxq *);
 static void xn_release_tx_bufs(struct netfront_txq *);
 
 static void xn_rxq_intr(struct netfront_rxq *);
 static void xn_txq_intr(struct netfront_txq *);
 static void xn_intr(void *);
 static inline int xn_count_frags(struct mbuf *m);
 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *);
 static int xn_ioctl(struct ifnet *, u_long, caddr_t);
 static void xn_ifinit_locked(struct netfront_info *);
 static void xn_ifinit(void *);
 static void xn_stop(struct netfront_info *);
 static void xn_query_features(struct netfront_info *np);
 static int xn_configure_features(struct netfront_info *np);
 static void netif_free(struct netfront_info *info);
 static int netfront_detach(device_t dev);
 
 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *);
 static int xn_txq_mq_start(struct ifnet *, struct mbuf *);
 
 static int talk_to_backend(device_t dev, struct netfront_info *info);
 static int create_netdev(device_t dev);
 static void netif_disconnect_backend(struct netfront_info *info);
 static int setup_device(device_t dev, struct netfront_info *info,
     unsigned long);
 static int xn_ifmedia_upd(struct ifnet *ifp);
 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 
-int xn_connect(struct netfront_info *);
+static int xn_connect(struct netfront_info *);
+static void xn_kick_rings(struct netfront_info *);
 
 static int xn_get_responses(struct netfront_rxq *,
     struct netfront_rx_info *, RING_IDX, RING_IDX *,
     struct mbuf **);
 
 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT)
 
 #define INVALID_P2M_ENTRY (~0UL)
 
 struct xn_rx_stats
 {
 	u_long	rx_packets;	/* total packets received	*/
 	u_long	rx_bytes;	/* total bytes received 	*/
 	u_long	rx_errors;	/* bad packets received		*/
 };
 
 struct xn_tx_stats
 {
 	u_long	tx_packets;	/* total packets transmitted	*/
 	u_long	tx_bytes;	/* total bytes transmitted	*/
 	u_long	tx_errors;	/* packet transmit problems	*/
 };
 
 #define XN_QUEUE_NAME_LEN  8	/* xn{t,r}x_%u, allow for two digits */
 struct netfront_rxq {
 	struct netfront_info 	*info;
 	u_int			id;
 	char			name[XN_QUEUE_NAME_LEN];
 	struct mtx		lock;
 
 	int			ring_ref;
 	netif_rx_front_ring_t 	ring;
 	xen_intr_handle_t	xen_intr_handle;
 
 	grant_ref_t 		gref_head;
 	grant_ref_t 		grant_ref[NET_TX_RING_SIZE + 1];
 
 	struct mbuf		*mbufs[NET_RX_RING_SIZE + 1];
 
 	struct lro_ctrl		lro;
 
 	struct callout		rx_refill;
 
 	struct xn_rx_stats	stats;
 };
 
 struct netfront_txq {
 	struct netfront_info 	*info;
 	u_int 			id;
 	char			name[XN_QUEUE_NAME_LEN];
 	struct mtx		lock;
 
 	int			ring_ref;
 	netif_tx_front_ring_t	ring;
 	xen_intr_handle_t 	xen_intr_handle;
 
 	grant_ref_t		gref_head;
 	grant_ref_t		grant_ref[NET_TX_RING_SIZE + 1];
 
 	struct mbuf		*mbufs[NET_TX_RING_SIZE + 1];
 	int			mbufs_cnt;
 	struct buf_ring		*br;
 
 	struct taskqueue 	*tq;
 	struct task       	defrtask;
 
 	bool			full;
 
 	struct xn_tx_stats	stats;
 };
 
 struct netfront_info {
 	struct ifnet 		*xn_ifp;
 
 	struct mtx   		sc_lock;
 
 	u_int  num_queues;
 	struct netfront_rxq 	*rxq;
 	struct netfront_txq 	*txq;
 
 	u_int			carrier;
 	u_int			maxfrags;
 
 	device_t		xbdev;
 	uint8_t			mac[ETHER_ADDR_LEN];
 
 	int			xn_if_flags;
 
 	struct ifmedia		sc_media;
 
 	bool			xn_reset;
 };
 
 struct netfront_rx_info {
 	struct netif_rx_response rx;
 	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
 
 #define XN_RX_LOCK(_q)         mtx_lock(&(_q)->lock)
 #define XN_RX_UNLOCK(_q)       mtx_unlock(&(_q)->lock)
 
 #define XN_TX_LOCK(_q)         mtx_lock(&(_q)->lock)
 #define XN_TX_TRYLOCK(_q)      mtx_trylock(&(_q)->lock)
 #define XN_TX_UNLOCK(_q)       mtx_unlock(&(_q)->lock)
 
 #define XN_LOCK(_sc)           mtx_lock(&(_sc)->sc_lock);
 #define XN_UNLOCK(_sc)         mtx_unlock(&(_sc)->sc_lock);
 
 #define XN_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->sc_lock, MA_OWNED);
 #define XN_RX_LOCK_ASSERT(_q)  mtx_assert(&(_q)->lock, MA_OWNED);
 #define XN_TX_LOCK_ASSERT(_q)  mtx_assert(&(_q)->lock, MA_OWNED);
 
 #define netfront_carrier_on(netif)	((netif)->carrier = 1)
 #define netfront_carrier_off(netif)	((netif)->carrier = 0)
 #define netfront_carrier_ok(netif)	((netif)->carrier)
 
 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
 
 static inline void
 add_id_to_freelist(struct mbuf **list, uintptr_t id)
 {
 
 	KASSERT(id != 0,
 		("%s: the head item (0) must always be free.", __func__));
 	list[id] = list[0];
 	list[0]  = (struct mbuf *)id;
 }
 
 static inline unsigned short
 get_id_from_freelist(struct mbuf **list)
 {
 	uintptr_t id;
 
 	id = (uintptr_t)list[0];
 	KASSERT(id != 0,
 		("%s: the head item (0) must always remain free.", __func__));
 	list[0] = list[id];
 	return (id);
 }
 
 static inline int
 xn_rxidx(RING_IDX idx)
 {
 
 	return idx & (NET_RX_RING_SIZE - 1);
 }
 
 static inline struct mbuf *
 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri)
 {
 	int i;
 	struct mbuf *m;
 
 	i = xn_rxidx(ri);
 	m = rxq->mbufs[i];
 	rxq->mbufs[i] = NULL;
 	return (m);
 }
 
 static inline grant_ref_t
 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
 {
 	int i = xn_rxidx(ri);
 	grant_ref_t ref = rxq->grant_ref[i];
 
 	KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n"));
 	rxq->grant_ref[i] = GRANT_REF_INVALID;
 	return (ref);
 }
 
 #define IPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #ifdef INVARIANTS
 #define WPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #else
 #define WPRINTK(fmt, args...)
 #endif
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) \
     printf("[XEN] %s: " fmt, __func__, ##args)
 #else
 #define DPRINTK(fmt, args...)
 #endif
 
 /**
  * Read the 'mac' node at the given device's node in the store, and parse that
  * as colon-separated octets, placing result the given mac array.  mac must be
  * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
  * Return 0 on success, or errno on error.
  */
 static int
 xen_net_read_mac(device_t dev, uint8_t mac[])
 {
 	int error, i;
 	char *s, *e, *macstr;
 	const char *path;
 
 	path = xenbus_get_node(dev);
 	error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	if (error == ENOENT) {
 		/*
 		 * Deal with missing mac XenStore nodes on devices with
 		 * HVM emulation (the 'ioemu' configuration attribute)
 		 * enabled.
 		 *
 		 * The HVM emulator may execute in a stub device model
 		 * domain which lacks the permission, only given to Dom0,
 		 * to update the guest's XenStore tree.  For this reason,
 		 * the HVM emulator doesn't even attempt to write the
 		 * front-side mac node, even when operating in Dom0.
 		 * However, there should always be a mac listed in the
 		 * backend tree.  Fallback to this version if our query
 		 * of the front side XenStore location doesn't find
 		 * anything.
 		 */
 		path = xenbus_get_otherend_path(dev);
 		error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	}
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "parsing %s/mac", path);
 		return (error);
 	}
 
 	s = macstr;
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		mac[i] = strtoul(s, &e, 16);
 		if (s == e || (e[0] != ':' && e[0] != 0)) {
 			free(macstr, M_XENBUS);
 			return (ENOENT);
 		}
 		s = &e[1];
 	}
 	free(macstr, M_XENBUS);
 	return (0);
 }
 
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
  * inform the backend of the appropriate details for those.  Switch to
  * Connected state.
  */
 static int
 netfront_probe(device_t dev)
 {
 
 	if (xen_hvm_domain() && xen_disable_pv_nics != 0)
 		return (ENXIO);
 
 	if (!strcmp(xenbus_get_type(dev), "vif")) {
 		device_set_desc(dev, "Virtual Network Interface");
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 netfront_attach(device_t dev)
 {
 	int err;
 
 	err = create_netdev(dev);
 	if (err != 0) {
 		xenbus_dev_fatal(dev, err, "creating netdev");
 		return (err);
 	}
 
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "enable_lro", CTLFLAG_RW,
 	    &xn_enable_lro, 0, "Large Receive Offload");
 
 	SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "num_queues", CTLFLAG_RD,
 	    &xn_num_queues, "Number of pairs of queues");
 
 	return (0);
 }
 
 static int
 netfront_suspend(device_t dev)
 {
 	struct netfront_info *np = device_get_softc(dev);
 	u_int i;
 
 	for (i = 0; i < np->num_queues; i++) {
 		XN_RX_LOCK(&np->rxq[i]);
 		XN_TX_LOCK(&np->txq[i]);
 	}
 	netfront_carrier_off(np);
 	for (i = 0; i < np->num_queues; i++) {
 		XN_RX_UNLOCK(&np->rxq[i]);
 		XN_TX_UNLOCK(&np->txq[i]);
 	}
 	return (0);
 }
 
 /**
  * We are reconnecting to the backend, due to a suspend/resume, or a backend
  * driver restart.  We tear down our netif structure and recreate it, but
  * leave the device-layer structures intact so that this is transparent to the
  * rest of the kernel.
  */
 static int
 netfront_resume(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	netif_disconnect_backend(info);
 	return (0);
 }
 
 static int
 write_queue_xenstore_keys(device_t dev,
     struct netfront_rxq *rxq,
     struct netfront_txq *txq,
     struct xs_transaction *xst, bool hierarchy)
 {
 	int err;
 	const char *message;
 	const char *node = xenbus_get_node(dev);
 	char *path;
 	size_t path_size;
 
 	KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids"));
 	/* Split event channel support is not yet there. */
 	KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle,
 	    ("Split event channels are not supported"));
 
 	if (hierarchy) {
 		path_size = strlen(node) + 10;
 		path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO);
 		snprintf(path, path_size, "%s/queue-%u", node, rxq->id);
 	} else {
 		path_size = strlen(node) + 1;
 		path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO);
 		snprintf(path, path_size, "%s", node);
 	}
 
 	err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref);
 	if (err != 0) {
 		message = "writing tx ring-ref";
 		goto error;
 	}
 	err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref);
 	if (err != 0) {
 		message = "writing rx ring-ref";
 		goto error;
 	}
 	err = xs_printf(*xst, path, "event-channel", "%u",
 	    xen_intr_port(rxq->xen_intr_handle));
 	if (err != 0) {
 		message = "writing event-channel";
 		goto error;
 	}
 
 	free(path, M_DEVBUF);
 
 	return (0);
 
 error:
 	free(path, M_DEVBUF);
 	xenbus_dev_fatal(dev, err, "%s", message);
 
 	return (err);
 }
 
 /* Common code used when first setting up, and when resuming. */
 static int
 talk_to_backend(device_t dev, struct netfront_info *info)
 {
 	const char *message;
 	struct xs_transaction xst;
 	const char *node = xenbus_get_node(dev);
 	int err;
 	unsigned long num_queues, max_queues = 0;
 	unsigned int i;
 
 	err = xen_net_read_mac(dev, info->mac);
 	if (err != 0) {
 		xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
 		goto out;
 	}
 
 	err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev),
 	    "multi-queue-max-queues", NULL, "%lu", &max_queues);
 	if (err != 0)
 		max_queues = 1;
 	num_queues = xn_num_queues;
 	if (num_queues > max_queues)
 		num_queues = max_queues;
 
 	err = setup_device(dev, info, num_queues);
 	if (err != 0)
 		goto out;
 
  again:
 	err = xs_transaction_start(&xst);
 	if (err != 0) {
 		xenbus_dev_fatal(dev, err, "starting transaction");
 		goto free;
 	}
 
 	if (info->num_queues == 1) {
 		err = write_queue_xenstore_keys(dev, &info->rxq[0],
 		    &info->txq[0], &xst, false);
 		if (err != 0)
 			goto abort_transaction_no_def_error;
 	} else {
 		err = xs_printf(xst, node, "multi-queue-num-queues",
 		    "%u", info->num_queues);
 		if (err != 0) {
 			message = "writing multi-queue-num-queues";
 			goto abort_transaction;
 		}
 
 		for (i = 0; i < info->num_queues; i++) {
 			err = write_queue_xenstore_keys(dev, &info->rxq[i],
 			    &info->txq[i], &xst, true);
 			if (err != 0)
 				goto abort_transaction_no_def_error;
 		}
 	}
 
 	err = xs_printf(xst, node, "request-rx-copy", "%u", 1);
 	if (err != 0) {
 		message = "writing request-rx-copy";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-rx-notify", "%d", 1);
 	if (err != 0) {
 		message = "writing feature-rx-notify";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-sg", "%d", 1);
 	if (err != 0) {
 		message = "writing feature-sg";
 		goto abort_transaction;
 	}
 	if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) {
 		err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1);
 		if (err != 0) {
 			message = "writing feature-gso-tcpv4";
 			goto abort_transaction;
 		}
 	}
 	if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) {
 		err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1);
 		if (err != 0) {
 			message = "writing feature-no-csum-offload";
 			goto abort_transaction;
 		}
 	}
 
 	err = xs_transaction_end(xst, 0);
 	if (err != 0) {
 		if (err == EAGAIN)
 			goto again;
 		xenbus_dev_fatal(dev, err, "completing transaction");
 		goto free;
 	}
 
 	return 0;
 
  abort_transaction:
 	xenbus_dev_fatal(dev, err, "%s", message);
  abort_transaction_no_def_error:
 	xs_transaction_end(xst, 1);
  free:
 	netif_free(info);
  out:
 	return (err);
 }
 
 static void
 xn_rxq_intr(struct netfront_rxq *rxq)
 {
 
 	XN_RX_LOCK(rxq);
 	xn_rxeof(rxq);
 	XN_RX_UNLOCK(rxq);
 }
 
 static void
 xn_txq_start(struct netfront_txq *txq)
 {
 	struct netfront_info *np = txq->info;
 	struct ifnet *ifp = np->xn_ifp;
 
 	XN_TX_LOCK_ASSERT(txq);
 	if (!drbr_empty(ifp, txq->br))
 		xn_txq_mq_start_locked(txq, NULL);
 }
 
 static void
 xn_txq_intr(struct netfront_txq *txq)
 {
 
 	XN_TX_LOCK(txq);
 	if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring))
 		xn_txeof(txq);
 	xn_txq_start(txq);
 	XN_TX_UNLOCK(txq);
 }
 
 static void
 xn_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct netfront_txq *txq = xtxq;
 
 	XN_TX_LOCK(txq);
 	xn_txq_start(txq);
 	XN_TX_UNLOCK(txq);
 }
 
 static void
 disconnect_rxq(struct netfront_rxq *rxq)
 {
 
 	xn_release_rx_bufs(rxq);
 	gnttab_free_grant_references(rxq->gref_head);
 	gnttab_end_foreign_access(rxq->ring_ref, NULL);
 	/*
 	 * No split event channel support at the moment, handle will
 	 * be unbound in tx. So no need to call xen_intr_unbind here,
 	 * but we do want to reset the handler to 0.
 	 */
 	rxq->xen_intr_handle = 0;
 }
 
 static void
 destroy_rxq(struct netfront_rxq *rxq)
 {
 
 	callout_drain(&rxq->rx_refill);
 	free(rxq->ring.sring, M_DEVBUF);
 }
 
 static void
 destroy_rxqs(struct netfront_info *np)
 {
 	int i;
 
 	for (i = 0; i < np->num_queues; i++)
 		destroy_rxq(&np->rxq[i]);
 
 	free(np->rxq, M_DEVBUF);
 	np->rxq = NULL;
 }
 
 static int
 setup_rxqs(device_t dev, struct netfront_info *info,
 	   unsigned long num_queues)
 {
 	int q, i;
 	int error;
 	netif_rx_sring_t *rxs;
 	struct netfront_rxq *rxq;
 
 	info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues,
 	    M_DEVBUF, M_WAITOK|M_ZERO);
 
 	for (q = 0; q < num_queues; q++) {
 		rxq = &info->rxq[q];
 
 		rxq->id = q;
 		rxq->info = info;
 		rxq->ring_ref = GRANT_REF_INVALID;
 		rxq->ring.sring = NULL;
 		snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q);
 		mtx_init(&rxq->lock, rxq->name, "netfront receive lock",
 		    MTX_DEF);
 
 		for (i = 0; i <= NET_RX_RING_SIZE; i++) {
 			rxq->mbufs[i] = NULL;
 			rxq->grant_ref[i] = GRANT_REF_INVALID;
 		}
 
 		/* Start resources allocation */
 
 		if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
 		    &rxq->gref_head) != 0) {
 			device_printf(dev, "allocating rx gref");
 			error = ENOMEM;
 			goto fail;
 		}
 
 		rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF,
 		    M_WAITOK|M_ZERO);
 		SHARED_RING_INIT(rxs);
 		FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE);
 
 		error = xenbus_grant_ring(dev, virt_to_mfn(rxs),
 		    &rxq->ring_ref);
 		if (error != 0) {
 			device_printf(dev, "granting rx ring page");
 			goto fail_grant_ring;
 		}
 
 		callout_init(&rxq->rx_refill, 1);
 	}
 
 	return (0);
 
 fail_grant_ring:
 	gnttab_free_grant_references(rxq->gref_head);
 	free(rxq->ring.sring, M_DEVBUF);
 fail:
 	for (; q >= 0; q--) {
 		disconnect_rxq(&info->rxq[q]);
 		destroy_rxq(&info->rxq[q]);
 	}
 
 	free(info->rxq, M_DEVBUF);
 	return (error);
 }
 
 static void
 disconnect_txq(struct netfront_txq *txq)
 {
 
 	xn_release_tx_bufs(txq);
 	gnttab_free_grant_references(txq->gref_head);
 	gnttab_end_foreign_access(txq->ring_ref, NULL);
 	xen_intr_unbind(&txq->xen_intr_handle);
 }
 
 static void
 destroy_txq(struct netfront_txq *txq)
 {
 
 	free(txq->ring.sring, M_DEVBUF);
 	buf_ring_free(txq->br, M_DEVBUF);
 	taskqueue_drain_all(txq->tq);
 	taskqueue_free(txq->tq);
 }
 
 static void
 destroy_txqs(struct netfront_info *np)
 {
 	int i;
 
 	for (i = 0; i < np->num_queues; i++)
 		destroy_txq(&np->txq[i]);
 
 	free(np->txq, M_DEVBUF);
 	np->txq = NULL;
 }
 
 static int
 setup_txqs(device_t dev, struct netfront_info *info,
 	   unsigned long num_queues)
 {
 	int q, i;
 	int error;
 	netif_tx_sring_t *txs;
 	struct netfront_txq *txq;
 
 	info->txq = malloc(sizeof(struct netfront_txq) * num_queues,
 	    M_DEVBUF, M_WAITOK|M_ZERO);
 
 	for (q = 0; q < num_queues; q++) {
 		txq = &info->txq[q];
 
 		txq->id = q;
 		txq->info = info;
 
 		txq->ring_ref = GRANT_REF_INVALID;
 		txq->ring.sring = NULL;
 
 		snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q);
 
 		mtx_init(&txq->lock, txq->name, "netfront transmit lock",
 		    MTX_DEF);
 
 		for (i = 0; i <= NET_TX_RING_SIZE; i++) {
 			txq->mbufs[i] = (void *) ((u_long) i+1);
 			txq->grant_ref[i] = GRANT_REF_INVALID;
 		}
 		txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
 
 		/* Start resources allocation. */
 
 		if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
 		    &txq->gref_head) != 0) {
 			device_printf(dev, "failed to allocate tx grant refs\n");
 			error = ENOMEM;
 			goto fail;
 		}
 
 		txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF,
 		    M_WAITOK|M_ZERO);
 		SHARED_RING_INIT(txs);
 		FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE);
 
 		error = xenbus_grant_ring(dev, virt_to_mfn(txs),
 		    &txq->ring_ref);
 		if (error != 0) {
 			device_printf(dev, "failed to grant tx ring\n");
 			goto fail_grant_ring;
 		}
 
 		txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF,
 		    M_WAITOK, &txq->lock);
 		TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq);
 
 		txq->tq = taskqueue_create(txq->name, M_WAITOK,
 		    taskqueue_thread_enqueue, &txq->tq);
 
 		error = taskqueue_start_threads(&txq->tq, 1, PI_NET,
 		    "%s txq %d", device_get_nameunit(dev), txq->id);
 		if (error != 0) {
 			device_printf(dev, "failed to start tx taskq %d\n",
 			    txq->id);
 			goto fail_start_thread;
 		}
 
 		error = xen_intr_alloc_and_bind_local_port(dev,
 		    xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr,
 		    &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY,
 		    &txq->xen_intr_handle);
 
 		if (error != 0) {
 			device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n");
 			goto fail_bind_port;
 		}
 	}
 
 	return (0);
 
 fail_bind_port:
 	taskqueue_drain_all(txq->tq);
 fail_start_thread:
 	buf_ring_free(txq->br, M_DEVBUF);
 	taskqueue_free(txq->tq);
 	gnttab_end_foreign_access(txq->ring_ref, NULL);
 fail_grant_ring:
 	gnttab_free_grant_references(txq->gref_head);
 	free(txq->ring.sring, M_DEVBUF);
 fail:
 	for (; q >= 0; q--) {
 		disconnect_txq(&info->txq[q]);
 		destroy_txq(&info->txq[q]);
 	}
 
 	free(info->txq, M_DEVBUF);
 	return (error);
 }
 
 static int
 setup_device(device_t dev, struct netfront_info *info,
     unsigned long num_queues)
 {
 	int error;
 	int q;
 
 	if (info->txq)
 		destroy_txqs(info);
 
 	if (info->rxq)
 		destroy_rxqs(info);
 
 	info->num_queues = 0;
 
 	error = setup_rxqs(dev, info, num_queues);
 	if (error != 0)
 		goto out;
 	error = setup_txqs(dev, info, num_queues);
 	if (error != 0)
 		goto out;
 
 	info->num_queues = num_queues;
 
 	/* No split event channel at the moment. */
 	for (q = 0; q < num_queues; q++)
 		info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle;
 
 	return (0);
 
 out:
 	KASSERT(error != 0, ("Error path taken without providing an error code"));
 	return (error);
 }
 
 #ifdef INET
 /**
  * If this interface has an ipv4 address, send an arp for it. This
  * helps to get the network going again after migrating hosts.
  */
 static void
 netfront_send_fake_arp(device_t dev, struct netfront_info *info)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	ifp = info->xn_ifp;
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			arp_ifinit(ifp, ifa);
 		}
 	}
 }
 #endif
 
 /**
  * Callback received when the backend's state changes.
  */
 static void
 netfront_backend_changed(device_t dev, XenbusState newstate)
 {
 	struct netfront_info *sc = device_get_softc(dev);
 
 	DPRINTK("newstate=%d\n", newstate);
 
 	switch (newstate) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
 	case XenbusStateUnknown:
 	case XenbusStateReconfigured:
 	case XenbusStateReconfiguring:
 		break;
 	case XenbusStateInitWait:
 		if (xenbus_get_state(dev) != XenbusStateInitialising)
 			break;
 		if (xn_connect(sc) != 0)
 			break;
-		xenbus_set_state(dev, XenbusStateConnected);
+		/* Switch to connected state before kicking the rings. */
+		xenbus_set_state(sc->xbdev, XenbusStateConnected);
+		xn_kick_rings(sc);
 		break;
 	case XenbusStateClosing:
 		xenbus_set_state(dev, XenbusStateClosed);
 		break;
 	case XenbusStateClosed:
 		if (sc->xn_reset) {
 			netif_disconnect_backend(sc);
 			xenbus_set_state(dev, XenbusStateInitialising);
 			sc->xn_reset = false;
 		}
 		break;
 	case XenbusStateConnected:
 #ifdef INET
 		netfront_send_fake_arp(dev, sc);
 #endif
 		break;
 	}
 }
 
 /**
  * \brief Verify that there is sufficient space in the Tx ring
  *        buffer for a maximally sized request to be enqueued.
  *
  * A transmit request requires a transmit descriptor for each packet
  * fragment, plus up to 2 entries for "options" (e.g. TSO).
  */
 static inline int
 xn_tx_slot_available(struct netfront_txq *txq)
 {
 
 	return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2));
 }
 
 static void
 xn_release_tx_bufs(struct netfront_txq *txq)
 {
 	int i;
 
 	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
 		struct mbuf *m;
 
 		m = txq->mbufs[i];
 
 		/*
 		 * We assume that no kernel addresses are
 		 * less than NET_TX_RING_SIZE.  Any entry
 		 * in the table that is below this number
 		 * must be an index from free-list tracking.
 		 */
 		if (((uintptr_t)m) <= NET_TX_RING_SIZE)
 			continue;
 		gnttab_end_foreign_access_ref(txq->grant_ref[i]);
 		gnttab_release_grant_reference(&txq->gref_head,
 		    txq->grant_ref[i]);
 		txq->grant_ref[i] = GRANT_REF_INVALID;
 		add_id_to_freelist(txq->mbufs, i);
 		txq->mbufs_cnt--;
 		if (txq->mbufs_cnt < 0) {
 			panic("%s: tx_chain_cnt must be >= 0", __func__);
 		}
 		m_free(m);
 	}
 }
 
 static struct mbuf *
 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq)
 {
 	struct mbuf *m;
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
 	if (m == NULL)
 		return NULL;
 	m->m_len = m->m_pkthdr.len = MJUMPAGESIZE;
 
 	return (m);
 }
 
 static void
 xn_alloc_rx_buffers(struct netfront_rxq *rxq)
 {
 	RING_IDX req_prod;
 	int notify;
 
 	XN_RX_LOCK_ASSERT(rxq);
 
 	if (__predict_false(rxq->info->carrier == 0))
 		return;
 
 	for (req_prod = rxq->ring.req_prod_pvt;
 	     req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE;
 	     req_prod++) {
 		struct mbuf *m;
 		unsigned short id;
 		grant_ref_t ref;
 		struct netif_rx_request *req;
 		unsigned long pfn;
 
 		m = xn_alloc_one_rx_buffer(rxq);
 		if (m == NULL)
 			break;
 
 		id = xn_rxidx(req_prod);
 
 		KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain"));
 		rxq->mbufs[id] = m;
 
 		ref = gnttab_claim_grant_reference(&rxq->gref_head);
 		KASSERT(ref != GNTTAB_LIST_END,
 		    ("reserved grant references exhuasted"));
 		rxq->grant_ref[id] = ref;
 
 		pfn = atop(vtophys(mtod(m, vm_offset_t)));
 		req = RING_GET_REQUEST(&rxq->ring, req_prod);
 
 		gnttab_grant_foreign_access_ref(ref,
 		    xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0);
 		req->id = id;
 		req->gref = ref;
 	}
 
 	rxq->ring.req_prod_pvt = req_prod;
 
 	/* Not enough requests? Try again later. */
 	if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) {
 		callout_reset_curcpu(&rxq->rx_refill, hz/10,
 		    xn_alloc_rx_buffers_callout, rxq);
 		return;
 	}
 
 	wmb();		/* barrier so backend seens requests */
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify);
 	if (notify)
 		xen_intr_signal(rxq->xen_intr_handle);
 }
 
 static void xn_alloc_rx_buffers_callout(void *arg)
 {
 	struct netfront_rxq *rxq;
 
 	rxq = (struct netfront_rxq *)arg;
 	XN_RX_LOCK(rxq);
 	xn_alloc_rx_buffers(rxq);
 	XN_RX_UNLOCK(rxq);
 }
 
 static void
 xn_release_rx_bufs(struct netfront_rxq *rxq)
 {
 	int i,  ref;
 	struct mbuf *m;
 
 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
 		m = rxq->mbufs[i];
 
 		if (m == NULL)
 			continue;
 
 		ref = rxq->grant_ref[i];
 		if (ref == GRANT_REF_INVALID)
 			continue;
 
 		gnttab_end_foreign_access_ref(ref);
 		gnttab_release_grant_reference(&rxq->gref_head, ref);
 		rxq->mbufs[i] = NULL;
 		rxq->grant_ref[i] = GRANT_REF_INVALID;
 		m_freem(m);
 	}
 }
 
 static void
 xn_rxeof(struct netfront_rxq *rxq)
 {
 	struct ifnet *ifp;
 	struct netfront_info *np = rxq->info;
 #if (defined(INET) || defined(INET6))
 	struct lro_ctrl *lro = &rxq->lro;
 #endif
 	struct netfront_rx_info rinfo;
 	struct netif_rx_response *rx = &rinfo.rx;
 	struct netif_extra_info *extras = rinfo.extras;
 	RING_IDX i, rp;
 	struct mbuf *m;
 	struct mbufq mbufq_rxq, mbufq_errq;
 	int err, work_to_do;
 
 	do {
 		XN_RX_LOCK_ASSERT(rxq);
 		if (!netfront_carrier_ok(np))
 			return;
 
 		/* XXX: there should be some sane limit. */
 		mbufq_init(&mbufq_errq, INT_MAX);
 		mbufq_init(&mbufq_rxq, INT_MAX);
 
 		ifp = np->xn_ifp;
 
 		rp = rxq->ring.sring->rsp_prod;
 		rmb();	/* Ensure we see queued responses up to 'rp'. */
 
 		i = rxq->ring.rsp_cons;
 		while ((i != rp)) {
 			memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx));
 			memset(extras, 0, sizeof(rinfo.extras));
 
 			m = NULL;
 			err = xn_get_responses(rxq, &rinfo, rp, &i, &m);
 
 			if (__predict_false(err)) {
 				if (m)
 					(void )mbufq_enqueue(&mbufq_errq, m);
 				rxq->stats.rx_errors++;
 				continue;
 			}
 
 			m->m_pkthdr.rcvif = ifp;
 			if ( rx->flags & NETRXF_data_validated ) {
 				/* Tell the stack the checksums are okay */
 				/*
 				 * XXX this isn't necessarily the case - need to add
 				 * check
 				 */
 
 				m->m_pkthdr.csum_flags |=
 					(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
 					    | CSUM_PSEUDO_HDR);
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			if ((rx->flags & NETRXF_extra_info) != 0 &&
 			    (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type ==
 			    XEN_NETIF_EXTRA_TYPE_GSO)) {
 				m->m_pkthdr.tso_segsz =
 				extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size;
 				m->m_pkthdr.csum_flags |= CSUM_TSO;
 			}
 
 			rxq->stats.rx_packets++;
 			rxq->stats.rx_bytes += m->m_pkthdr.len;
 
 			(void )mbufq_enqueue(&mbufq_rxq, m);
 			rxq->ring.rsp_cons = i;
 		}
 
 		mbufq_drain(&mbufq_errq);
 
 		/*
 		 * Process all the mbufs after the remapping is complete.
 		 * Break the mbuf chain first though.
 		 */
 		while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 
 			/* XXX: Do we really need to drop the rx lock? */
 			XN_RX_UNLOCK(rxq);
 #if (defined(INET) || defined(INET6))
 			/* Use LRO if possible */
 			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
 			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
 				/*
 				 * If LRO fails, pass up to the stack
 				 * directly.
 				 */
 				(*ifp->if_input)(ifp, m);
 			}
 #else
 			(*ifp->if_input)(ifp, m);
 #endif
 
 			XN_RX_LOCK(rxq);
 		}
 
 		rxq->ring.rsp_cons = i;
 
 #if (defined(INET) || defined(INET6))
 		/*
 		 * Flush any outstanding LRO work
 		 */
 		tcp_lro_flush_all(lro);
 #endif
 
 		xn_alloc_rx_buffers(rxq);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do);
 	} while (work_to_do);
 }
 
 static void
 xn_txeof(struct netfront_txq *txq)
 {
 	RING_IDX i, prod;
 	unsigned short id;
 	struct ifnet *ifp;
 	netif_tx_response_t *txr;
 	struct mbuf *m;
 	struct netfront_info *np = txq->info;
 
 	XN_TX_LOCK_ASSERT(txq);
 
 	if (!netfront_carrier_ok(np))
 		return;
 
 	ifp = np->xn_ifp;
 
 	do {
 		prod = txq->ring.sring->rsp_prod;
 		rmb(); /* Ensure we see responses up to 'rp'. */
 
 		for (i = txq->ring.rsp_cons; i != prod; i++) {
 			txr = RING_GET_RESPONSE(&txq->ring, i);
 			if (txr->status == NETIF_RSP_NULL)
 				continue;
 
 			if (txr->status != NETIF_RSP_OKAY) {
 				printf("%s: WARNING: response is %d!\n",
 				       __func__, txr->status);
 			}
 			id = txr->id;
 			m = txq->mbufs[id];
 			KASSERT(m != NULL, ("mbuf not found in chain"));
 			KASSERT((uintptr_t)m > NET_TX_RING_SIZE,
 				("mbuf already on the free list, but we're "
 				"trying to free it again!"));
 			M_ASSERTVALID(m);
 
 			/*
 			 * Increment packet count if this is the last
 			 * mbuf of the chain.
 			 */
 			if (!m->m_next)
 				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 			if (__predict_false(gnttab_query_foreign_access(
 			    txq->grant_ref[id]) != 0)) {
 				panic("%s: grant id %u still in use by the "
 				    "backend", __func__, id);
 			}
 			gnttab_end_foreign_access_ref(txq->grant_ref[id]);
 			gnttab_release_grant_reference(
 				&txq->gref_head, txq->grant_ref[id]);
 			txq->grant_ref[id] = GRANT_REF_INVALID;
 
 			txq->mbufs[id] = NULL;
 			add_id_to_freelist(txq->mbufs, id);
 			txq->mbufs_cnt--;
 			m_free(m);
 			/* Only mark the txq active if we've freed up at least one slot to try */
 			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		}
 		txq->ring.rsp_cons = prod;
 
 		/*
 		 * Set a new event, then check for race with update of
 		 * tx_cons. Note that it is essential to schedule a
 		 * callback, no matter how few buffers are pending. Even if
 		 * there is space in the transmit ring, higher layers may
 		 * be blocked because too much data is outstanding: in such
 		 * cases notification from Xen is likely to be the only kick
 		 * that we'll get.
 		 */
 		txq->ring.sring->rsp_event =
 		    prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1;
 
 		mb();
 	} while (prod != txq->ring.sring->rsp_prod);
 
 	if (txq->full &&
 	    ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
 		txq->full = false;
 		xn_txq_start(txq);
 	}
 }
 
 static void
 xn_intr(void *xsc)
 {
 	struct netfront_txq *txq = xsc;
 	struct netfront_info *np = txq->info;
 	struct netfront_rxq *rxq = &np->rxq[txq->id];
 
 	/* kick both tx and rx */
 	xn_rxq_intr(rxq);
 	xn_txq_intr(txq);
 }
 
 static void
 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m,
     grant_ref_t ref)
 {
 	int new = xn_rxidx(rxq->ring.req_prod_pvt);
 
 	KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL"));
 	rxq->mbufs[new] = m;
 	rxq->grant_ref[new] = ref;
 	RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new;
 	RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref;
 	rxq->ring.req_prod_pvt++;
 }
 
 static int
 xn_get_extras(struct netfront_rxq *rxq,
     struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons)
 {
 	struct netif_extra_info *extra;
 
 	int err = 0;
 
 	do {
 		struct mbuf *m;
 		grant_ref_t ref;
 
 		if (__predict_false(*cons + 1 == rp)) {
 			err = EINVAL;
 			break;
 		}
 
 		extra = (struct netif_extra_info *)
 		RING_GET_RESPONSE(&rxq->ring, ++(*cons));
 
 		if (__predict_false(!extra->type ||
 			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 			err = EINVAL;
 		} else {
 			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
 		}
 
 		m = xn_get_rx_mbuf(rxq, *cons);
 		ref = xn_get_rx_ref(rxq,  *cons);
 		xn_move_rx_slot(rxq, m, ref);
 	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
 	return err;
 }
 
 static int
 xn_get_responses(struct netfront_rxq *rxq,
     struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
     struct mbuf  **list)
 {
 	struct netif_rx_response *rx = &rinfo->rx;
 	struct netif_extra_info *extras = rinfo->extras;
 	struct mbuf *m, *m0, *m_prev;
 	grant_ref_t ref = xn_get_rx_ref(rxq, *cons);
 	RING_IDX ref_cons = *cons;
 	int frags = 1;
 	int err = 0;
 	u_long ret;
 
 	m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons);
 
 	if (rx->flags & NETRXF_extra_info) {
 		err = xn_get_extras(rxq, extras, rp, cons);
 	}
 
 	if (m0 != NULL) {
 		m0->m_pkthdr.len = 0;
 		m0->m_next = NULL;
 	}
 
 	for (;;) {
 #if 0
 		DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
 			rx->status, rx->offset, frags);
 #endif
 		if (__predict_false(rx->status < 0 ||
 			rx->offset + rx->status > PAGE_SIZE)) {
 
 			xn_move_rx_slot(rxq, m, ref);
 			if (m0 == m)
 				m0 = NULL;
 			m = NULL;
 			err = EINVAL;
 			goto next_skip_queue;
 		}
 
 		/*
 		 * This definitely indicates a bug, either in this driver or in
 		 * the backend driver. In future this should flag the bad
 		 * situation to the system controller to reboot the backed.
 		 */
 		if (ref == GRANT_REF_INVALID) {
 			printf("%s: Bad rx response id %d.\n", __func__, rx->id);
 			err = EINVAL;
 			goto next;
 		}
 
 		ret = gnttab_end_foreign_access_ref(ref);
 		KASSERT(ret, ("Unable to end access to grant references"));
 
 		gnttab_release_grant_reference(&rxq->gref_head, ref);
 
 next:
 		if (m == NULL)
 			break;
 
 		m->m_len = rx->status;
 		m->m_data += rx->offset;
 		m0->m_pkthdr.len += rx->status;
 
 next_skip_queue:
 		if (!(rx->flags & NETRXF_more_data))
 			break;
 
 		if (*cons + frags == rp) {
 			if (net_ratelimit())
 				WPRINTK("Need more frags\n");
 			err = ENOENT;
 			printf("%s: cons %u frags %u rp %u, not enough frags\n",
 			       __func__, *cons, frags, rp);
 			break;
 		}
 		/*
 		 * Note that m can be NULL, if rx->status < 0 or if
 		 * rx->offset + rx->status > PAGE_SIZE above.
 		 */
 		m_prev = m;
 
 		rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags);
 		m = xn_get_rx_mbuf(rxq, *cons + frags);
 
 		/*
 		 * m_prev == NULL can happen if rx->status < 0 or if
 		 * rx->offset + * rx->status > PAGE_SIZE above.
 		 */
 		if (m_prev != NULL)
 			m_prev->m_next = m;
 
 		/*
 		 * m0 can be NULL if rx->status < 0 or if * rx->offset +
 		 * rx->status > PAGE_SIZE above.
 		 */
 		if (m0 == NULL)
 			m0 = m;
 		m->m_next = NULL;
 		ref = xn_get_rx_ref(rxq, *cons + frags);
 		ref_cons = *cons + frags;
 		frags++;
 	}
 	*list = m0;
 	*cons += frags;
 
 	return (err);
 }
 
 /**
  * \brief Count the number of fragments in an mbuf chain.
  *
  * Surprisingly, there isn't an M* macro for this.
  */
 static inline int
 xn_count_frags(struct mbuf *m)
 {
 	int nfrags;
 
 	for (nfrags = 0; m != NULL; m = m->m_next)
 		nfrags++;
 
 	return (nfrags);
 }
 
 /**
  * Given an mbuf chain, make sure we have enough room and then push
  * it onto the transmit ring.
  */
 static int
 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 {
 	struct mbuf *m;
 	struct netfront_info *np = txq->info;
 	struct ifnet *ifp = np->xn_ifp;
 	u_int nfrags;
 	int otherend_id;
 
 	/**
 	 * Defragment the mbuf if necessary.
 	 */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether this request is longer than netback
 	 * can handle, and try to defrag it.
 	 */
 	/**
 	 * It is a bit lame, but the netback driver in Linux can't
 	 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
 	 * the Linux network stack.
 	 */
 	if (nfrags > np->maxfrags) {
 		m = m_defrag(m_head, M_NOWAIT);
 		if (!m) {
 			/*
 			 * Defrag failed, so free the mbuf and
 			 * therefore drop the packet.
 			 */
 			m_freem(m_head);
 			return (EMSGSIZE);
 		}
 		m_head = m;
 	}
 
 	/* Determine how many fragments now exist */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether the defragmented packet has too many
 	 * segments for the Linux netback driver.
 	 */
 	/**
 	 * The FreeBSD TCP stack, with TSO enabled, can produce a chain
 	 * of mbufs longer than Linux can handle.  Make sure we don't
 	 * pass a too-long chain over to the other side by dropping the
 	 * packet.  It doesn't look like there is currently a way to
 	 * tell the TCP stack to generate a shorter chain of packets.
 	 */
 	if (nfrags > MAX_TX_REQ_FRAGS) {
 #ifdef DEBUG
 		printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback "
 		       "won't be able to handle it, dropping\n",
 		       __func__, nfrags, MAX_TX_REQ_FRAGS);
 #endif
 		m_freem(m_head);
 		return (EMSGSIZE);
 	}
 
 	/*
 	 * This check should be redundant.  We've already verified that we
 	 * have enough slots in the ring to handle a packet of maximum
 	 * size, and that our packet is less than the maximum size.  Keep
 	 * it in here as an assert for now just to make certain that
 	 * chain_cnt is accurate.
 	 */
 	KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE,
 		("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE "
 		 "(%d)!", __func__, (int) txq->mbufs_cnt,
                     (int) nfrags, (int) NET_TX_RING_SIZE));
 
 	/*
 	 * Start packing the mbufs in this chain into
 	 * the fragment pointers. Stop when we run out
 	 * of fragments or hit the end of the mbuf chain.
 	 */
 	m = m_head;
 	otherend_id = xenbus_get_otherend_id(np->xbdev);
 	for (m = m_head; m; m = m->m_next) {
 		netif_tx_request_t *tx;
 		uintptr_t id;
 		grant_ref_t ref;
 		u_long mfn; /* XXX Wrong type? */
 
 		tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt);
 		id = get_id_from_freelist(txq->mbufs);
 		if (id == 0)
 			panic("%s: was allocated the freelist head!\n",
 			    __func__);
 		txq->mbufs_cnt++;
 		if (txq->mbufs_cnt > NET_TX_RING_SIZE)
 			panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
 			    __func__);
 		txq->mbufs[id] = m;
 		tx->id = id;
 		ref = gnttab_claim_grant_reference(&txq->gref_head);
 		KASSERT((short)ref >= 0, ("Negative ref"));
 		mfn = virt_to_mfn(mtod(m, vm_offset_t));
 		gnttab_grant_foreign_access_ref(ref, otherend_id,
 		    mfn, GNTMAP_readonly);
 		tx->gref = txq->grant_ref[id] = ref;
 		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
 		tx->flags = 0;
 		if (m == m_head) {
 			/*
 			 * The first fragment has the entire packet
 			 * size, subsequent fragments have just the
 			 * fragment size. The backend works out the
 			 * true size of the first fragment by
 			 * subtracting the sizes of the other
 			 * fragments.
 			 */
 			tx->size = m->m_pkthdr.len;
 
 			/*
 			 * The first fragment contains the checksum flags
 			 * and is optionally followed by extra data for
 			 * TSO etc.
 			 */
 			/**
 			 * CSUM_TSO requires checksum offloading.
 			 * Some versions of FreeBSD fail to
 			 * set CSUM_TCP in the CSUM_TSO case,
 			 * so we have to test for CSUM_TSO
 			 * explicitly.
 			 */
 			if (m->m_pkthdr.csum_flags
 			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
 				tx->flags |= (NETTXF_csum_blank
 				    | NETTXF_data_validated);
 			}
 			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 				struct netif_extra_info *gso =
 					(struct netif_extra_info *)
 					RING_GET_REQUEST(&txq->ring,
 							 ++txq->ring.req_prod_pvt);
 
 				tx->flags |= NETTXF_extra_info;
 
 				gso->u.gso.size = m->m_pkthdr.tso_segsz;
 				gso->u.gso.type =
 					XEN_NETIF_GSO_TYPE_TCPV4;
 				gso->u.gso.pad = 0;
 				gso->u.gso.features = 0;
 
 				gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 				gso->flags = 0;
 			}
 		} else {
 			tx->size = m->m_len;
 		}
 		if (m->m_next)
 			tx->flags |= NETTXF_more_data;
 
 		txq->ring.req_prod_pvt++;
 	}
 	BPF_MTAP(ifp, m_head);
 
 	xn_txeof(txq);
 
 	txq->stats.tx_bytes += m_head->m_pkthdr.len;
 	txq->stats.tx_packets++;
 
 	return (0);
 }
 
 /* equivalent of network_open() in Linux */
 static void
 xn_ifinit_locked(struct netfront_info *np)
 {
 	struct ifnet *ifp;
 	int i;
 	struct netfront_rxq *rxq;
 
 	XN_LOCK_ASSERT(np);
 
 	ifp = np->xn_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np))
 		return;
 
 	xn_stop(np);
 
 	for (i = 0; i < np->num_queues; i++) {
 		rxq = &np->rxq[i];
 		XN_RX_LOCK(rxq);
 		xn_alloc_rx_buffers(rxq);
 		rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1;
 		if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring))
 			xn_rxeof(rxq);
 		XN_RX_UNLOCK(rxq);
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 static void
 xn_ifinit(void *xsc)
 {
 	struct netfront_info *sc = xsc;
 
 	XN_LOCK(sc);
 	xn_ifinit_locked(sc);
 	XN_UNLOCK(sc);
 }
 
 static int
 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct netfront_info *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *) data;
 	device_t dev;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	int mask, error = 0;
 
 	dev = sc->xbdev;
 
 	switch(cmd) {
 	case SIOCSIFADDR:
 #ifdef INET
 		XN_LOCK(sc);
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				xn_ifinit_locked(sc);
 			arp_ifinit(ifp, ifa);
 			XN_UNLOCK(sc);
 		} else {
 			XN_UNLOCK(sc);
 #endif
 			error = ether_ioctl(ifp, cmd, data);
 #ifdef INET
 		}
 #endif
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		xn_ifinit(sc);
 		break;
 	case SIOCSIFFLAGS:
 		XN_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 			xn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				xn_stop(sc);
 			}
 		}
 		sc->xn_if_flags = ifp->if_flags;
 		XN_UNLOCK(sc);
 		break;
 	case SIOCSIFCAP:
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
 				    | CSUM_IP | CSUM_TSO);
 			} else {
 				ifp->if_capenable |= IFCAP_TXCSUM;
 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
 				    | CSUM_IP);
 			}
 		}
 		if (mask & IFCAP_RXCSUM) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		}
 		if (mask & IFCAP_TSO4) {
 			if (IFCAP_TSO4 & ifp->if_capenable) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_TSO;
 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable |= IFCAP_TSO4;
 				ifp->if_hwassist |= CSUM_TSO;
 			} else {
 				IPRINTK("Xen requires tx checksum offload"
 				    " be enabled to use TSO\n");
 				error = EINVAL;
 			}
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		}
 		/*
 		 * We must reset the interface so the backend picks up the
 		 * new features.
 		 */
 		XN_LOCK(sc);
 		netfront_carrier_off(sc);
 		sc->xn_reset = true;
 		/*
 		 * NB: the pending packet queue is not flushed, since
 		 * the interface should still support the old options.
 		 */
 		XN_UNLOCK(sc);
 		/*
 		 * Delete the xenstore nodes that export features.
 		 *
 		 * NB: There's a xenbus state called
 		 * "XenbusStateReconfiguring", which is what we should set
 		 * here. Sadly none of the backends know how to handle it,
 		 * and simply disconnect from the frontend, so we will just
 		 * switch back to XenbusStateInitialising in order to force
 		 * a reconnection.
 		 */
 		xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4");
 		xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload");
 		xenbus_set_state(dev, XenbusStateClosing);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 	}
 
 	return (error);
 }
 
 static void
 xn_stop(struct netfront_info *sc)
 {
 	struct ifnet *ifp;
 
 	XN_LOCK_ASSERT(sc);
 
 	ifp = sc->xn_ifp;
 
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 static void
 xn_rebuild_rx_bufs(struct netfront_rxq *rxq)
 {
 	int requeue_idx, i;
 	grant_ref_t ref;
 	netif_rx_request_t *req;
 
 	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
 		struct mbuf *m;
 		u_long pfn;
 
 		if (rxq->mbufs[i] == NULL)
 			continue;
 
 		m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i);
 		ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i);
 
 		req = RING_GET_REQUEST(&rxq->ring, requeue_idx);
 		pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
 
 		gnttab_grant_foreign_access_ref(ref,
 		    xenbus_get_otherend_id(rxq->info->xbdev),
 		    pfn, 0);
 
 		req->gref = ref;
 		req->id   = requeue_idx;
 
 		requeue_idx++;
 	}
 
 	rxq->ring.req_prod_pvt = requeue_idx;
 }
 
 /* START of Xenolinux helper functions adapted to FreeBSD */
-int
+static int
 xn_connect(struct netfront_info *np)
 {
 	int i, error;
 	u_int feature_rx_copy;
 	struct netfront_rxq *rxq;
 	struct netfront_txq *txq;
 
 	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 	    "feature-rx-copy", NULL, "%u", &feature_rx_copy);
 	if (error != 0)
 		feature_rx_copy = 0;
 
 	/* We only support rx copy. */
 	if (!feature_rx_copy)
 		return (EPROTONOSUPPORT);
 
 	/* Recovery procedure: */
 	error = talk_to_backend(np->xbdev, np);
 	if (error != 0)
 		return (error);
 
 	/* Step 1: Reinitialise variables. */
 	xn_query_features(np);
 	xn_configure_features(np);
 
 	/* Step 2: Release TX buffer */
 	for (i = 0; i < np->num_queues; i++) {
 		txq = &np->txq[i];
 		xn_release_tx_bufs(txq);
 	}
 
 	/* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */
 	for (i = 0; i < np->num_queues; i++) {
 		rxq = &np->rxq[i];
 		xn_rebuild_rx_bufs(rxq);
 	}
 
 	/* Step 4: All public and private state should now be sane.  Get
 	 * ready to start sending and receiving packets and give the driver
 	 * domain a kick because we've probably just requeued some
 	 * packets.
 	 */
 	netfront_carrier_on(np);
+
+	return (0);
+}
+
+static void
+xn_kick_rings(struct netfront_info *np)
+{
+	struct netfront_rxq *rxq;
+	struct netfront_txq *txq;
+	int i;
+
 	for (i = 0; i < np->num_queues; i++) {
 		txq = &np->txq[i];
+		rxq = &np->rxq[i];
 		xen_intr_signal(txq->xen_intr_handle);
 		XN_TX_LOCK(txq);
 		xn_txeof(txq);
 		XN_TX_UNLOCK(txq);
 		XN_RX_LOCK(rxq);
 		xn_alloc_rx_buffers(rxq);
 		XN_RX_UNLOCK(rxq);
 	}
-
-	return (0);
 }
 
 static void
 xn_query_features(struct netfront_info *np)
 {
 	int val;
 
 	device_printf(np->xbdev, "backend features:");
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-sg", NULL, "%d", &val) != 0)
 		val = 0;
 
 	np->maxfrags = 1;
 	if (val) {
 		np->maxfrags = MAX_TX_REQ_FRAGS;
 		printf(" feature-sg");
 	}
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-gso-tcpv4", NULL, "%d", &val) != 0)
 		val = 0;
 
 	np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO);
 	if (val) {
 		np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
 		printf(" feature-gso-tcp4");
 	}
 
 	/*
 	 * HW CSUM offload is assumed to be available unless
 	 * feature-no-csum-offload is set in xenstore.
 	 */
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-no-csum-offload", NULL, "%d", &val) != 0)
 		val = 0;
 
 	np->xn_ifp->if_capabilities |= IFCAP_HWCSUM;
 	if (val) {
 		np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM);
 		printf(" feature-no-csum-offload");
 	}
 
 	printf("\n");
 }
 
 static int
 xn_configure_features(struct netfront_info *np)
 {
 	int err, cap_enabled;
 #if (defined(INET) || defined(INET6))
 	int i;
 #endif
 	struct ifnet *ifp;
 
 	ifp = np->xn_ifp;
 	err = 0;
 
 	if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) {
 		/* Current options are available, no need to do anything. */
 		return (0);
 	}
 
 	/* Try to preserve as many options as possible. */
 	cap_enabled = ifp->if_capenable;
 	ifp->if_capenable = ifp->if_hwassist = 0;
 
 #if (defined(INET) || defined(INET6))
 	if ((cap_enabled & IFCAP_LRO) != 0)
 		for (i = 0; i < np->num_queues; i++)
 			tcp_lro_free(&np->rxq[i].lro);
 	if (xn_enable_lro &&
 	    (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) {
 	    	ifp->if_capenable |= IFCAP_LRO;
 		for (i = 0; i < np->num_queues; i++) {
 			err = tcp_lro_init(&np->rxq[i].lro);
 			if (err != 0) {
 				device_printf(np->xbdev,
 				    "LRO initialization failed\n");
 				ifp->if_capenable &= ~IFCAP_LRO;
 				break;
 			}
 			np->rxq[i].lro.ifp = ifp;
 		}
 	}
 	if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) {
 		ifp->if_capenable |= IFCAP_TSO4;
 		ifp->if_hwassist |= CSUM_TSO;
 	}
 #endif
 	if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) {
 		ifp->if_capenable |= IFCAP_TXCSUM;
 		ifp->if_hwassist |= CSUM_TCP|CSUM_UDP;
 	}
 	if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0)
 		ifp->if_capenable |= IFCAP_RXCSUM;
 
 	return (err);
 }
 
 static int
 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m)
 {
 	struct netfront_info *np;
 	struct ifnet *ifp;
 	struct buf_ring *br;
 	int error, notify;
 
 	np = txq->info;
 	br = txq->br;
 	ifp = np->xn_ifp;
 	error = 0;
 
 	XN_TX_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    !netfront_carrier_ok(np)) {
 		if (m != NULL)
 			error = drbr_enqueue(ifp, br, m);
 		return (error);
 	}
 
 	if (m != NULL) {
 		error = drbr_enqueue(ifp, br, m);
 		if (error != 0)
 			return (error);
 	}
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (!xn_tx_slot_available(txq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		error = xn_assemble_tx_request(txq, m);
 		/* xn_assemble_tx_request always consumes the mbuf*/
 		if (error != 0) {
 			drbr_advance(ifp, br);
 			break;
 		}
 
 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify);
 		if (notify)
 			xen_intr_signal(txq->xen_intr_handle);
 
 		drbr_advance(ifp, br);
 	}
 
 	if (RING_FULL(&txq->ring))
 		txq->full = true;
 
 	return (0);
 }
 
 static int
 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct netfront_info *np;
 	struct netfront_txq *txq;
 	int i, npairs, error;
 
 	np = ifp->if_softc;
 	npairs = np->num_queues;
 
 	KASSERT(npairs != 0, ("called with 0 available queues"));
 
 	/* check if flowid is set */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % npairs;
 	else
 		i = curcpu % npairs;
 
 	txq = &np->txq[i];
 
 	if (XN_TX_TRYLOCK(txq) != 0) {
 		error = xn_txq_mq_start_locked(txq, m);
 		XN_TX_UNLOCK(txq);
 	} else {
 		error = drbr_enqueue(ifp, txq->br, m);
 		taskqueue_enqueue(txq->tq, &txq->defrtask);
 	}
 
 	return (error);
 }
 
 static void
 xn_qflush(struct ifnet *ifp)
 {
 	struct netfront_info *np;
 	struct netfront_txq *txq;
 	struct mbuf *m;
 	int i;
 
 	np = ifp->if_softc;
 
 	for (i = 0; i < np->num_queues; i++) {
 		txq = &np->txq[i];
 
 		XN_TX_LOCK(txq);
 		while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
 			m_freem(m);
 		XN_TX_UNLOCK(txq);
 	}
 
 	if_qflush(ifp);
 }
 
 /**
  * Create a network device.
  * @param dev  Newbus device representing this virtual NIC.
  */
 int
 create_netdev(device_t dev)
 {
 	struct netfront_info *np;
 	int err;
 	struct ifnet *ifp;
 
 	np = device_get_softc(dev);
 
 	np->xbdev         = dev;
 
 	mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF);
 
 	ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts);
 	ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL);
 
 	err = xen_net_read_mac(dev, np->mac);
 	if (err != 0)
 		goto error;
 
 	/* Set up ifnet structure */
 	ifp = np->xn_ifp = if_alloc(IFT_ETHER);
     	ifp->if_softc = np;
     	if_initname(ifp, "xn",  device_get_unit(dev));
     	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
     	ifp->if_ioctl = xn_ioctl;
 
 	ifp->if_transmit = xn_txq_mq_start;
 	ifp->if_qflush = xn_qflush;
 
     	ifp->if_init = xn_ifinit;
 
     	ifp->if_hwassist = XN_CSUM_FEATURES;
 	/* Enable all supported features at device creation. */
 	ifp->if_capenable = ifp->if_capabilities =
 	    IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO;
 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS;
 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 
     	ether_ifattach(ifp, np->mac);
 	netfront_carrier_off(np);
 
 	return (0);
 
 error:
 	KASSERT(err != 0, ("Error path with no error code specified"));
 	return (err);
 }
 
 static int
 netfront_detach(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	DPRINTK("%s\n", xenbus_get_node(dev));
 
 	netif_free(info);
 
 	return 0;
 }
 
 static void
 netif_free(struct netfront_info *np)
 {
 
 	XN_LOCK(np);
 	xn_stop(np);
 	XN_UNLOCK(np);
 	netif_disconnect_backend(np);
 	ether_ifdetach(np->xn_ifp);
 	free(np->rxq, M_DEVBUF);
 	free(np->txq, M_DEVBUF);
 	if_free(np->xn_ifp);
 	np->xn_ifp = NULL;
 	ifmedia_removeall(&np->sc_media);
 }
 
 static void
 netif_disconnect_backend(struct netfront_info *np)
 {
 	u_int i;
 
 	for (i = 0; i < np->num_queues; i++) {
 		XN_RX_LOCK(&np->rxq[i]);
 		XN_TX_LOCK(&np->txq[i]);
 	}
 	netfront_carrier_off(np);
 	for (i = 0; i < np->num_queues; i++) {
 		XN_RX_UNLOCK(&np->rxq[i]);
 		XN_TX_UNLOCK(&np->txq[i]);
 	}
 
 	for (i = 0; i < np->num_queues; i++) {
 		disconnect_rxq(&np->rxq[i]);
 		disconnect_txq(&np->txq[i]);
 	}
 }
 
 static int
 xn_ifmedia_upd(struct ifnet *ifp)
 {
 
 	return (0);
 }
 
 static void
 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 
 	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 /* ** Driver registration ** */
 static device_method_t netfront_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,         netfront_probe),
 	DEVMETHOD(device_attach,        netfront_attach),
 	DEVMETHOD(device_detach,        netfront_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD(device_suspend,       netfront_suspend),
 	DEVMETHOD(device_resume,        netfront_resume),
 
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed),
 
 	DEVMETHOD_END
 };
 
 static driver_t netfront_driver = {
 	"xn",
 	netfront_methods,
 	sizeof(struct netfront_info),
 };
 devclass_t netfront_devclass;
 
 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL,
     NULL);
Index: projects/vnet/sys/kern/kern_shutdown.c
===================================================================
--- projects/vnet/sys/kern/kern_shutdown.c	(revision 301522)
+++ projects/vnet/sys/kern/kern_shutdown.c	(revision 301523)
@@ -1,942 +1,942 @@
 /*-
  * Copyright (c) 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_kdb.h"
 #include "opt_panic.h"
 #include "opt_sched.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/eventhandler.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 
 #include <ddb/ddb.h>
 
 #include <machine/cpu.h>
 #include <machine/dump.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
 #include <sys/signalvar.h>
 
 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer");
 
 #ifndef PANIC_REBOOT_WAIT_TIME
 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
 #endif
 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME;
 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN,
     &panic_reboot_wait_time, 0,
     "Seconds to wait before rebooting after a panic");
 
 /*
  * Note that stdarg.h and the ANSI style va_start macro is used for both
  * ANSI and traditional C compilers.
  */
 #include <machine/stdarg.h>
 
 #ifdef KDB
 #ifdef KDB_UNATTENDED
 int debugger_on_panic = 0;
 #else
 int debugger_on_panic = 1;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &debugger_on_panic, 0, "Run debugger on kernel panic");
 
 #ifdef KDB_TRACE
 static int trace_on_panic = 1;
 #else
 static int trace_on_panic = 0;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &trace_on_panic, 0, "Print stack trace on kernel panic");
 #endif /* KDB */
 
 static int sync_on_panic = 0;
 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN,
 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
 
 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0,
     "Shutdown environment");
 
 #ifndef DIAGNOSTIC
 static int show_busybufs;
 #else
 static int show_busybufs = 1;
 #endif
 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
 	&show_busybufs, 0, "");
 
 int suspend_blocked = 0;
 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
 	&suspend_blocked, 0, "Block suspend due to a pending shutdown");
 
 /*
  * Variable panicstr contains argument to first call to panic; used as flag
  * to indicate that the kernel has already called panic.
  */
 const char *panicstr;
 
 int dumping;				/* system is dumping */
 int rebooting;				/* system is rebooting */
 static struct dumperinfo dumper;	/* our selected dumper */
 
 /* Context information for dump-debuggers. */
 static struct pcb dumppcb;		/* Registers. */
 lwpid_t dumptid;			/* Thread ID. */
 
 static struct cdevsw reroot_cdevsw = {
      .d_version = D_VERSION,
      .d_name    = "reroot",
 };
 
 static void poweroff_wait(void *, int);
 static void shutdown_halt(void *junk, int howto);
 static void shutdown_panic(void *junk, int howto);
 static void shutdown_reset(void *junk, int howto);
 static int kern_reroot(void);
 
 /* register various local shutdown events */
 static void
 shutdown_conf(void *unused)
 {
 
 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
 	    SHUTDOWN_PRI_FIRST);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
 	    SHUTDOWN_PRI_LAST + 200);
 }
 
 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
 
 /*
  * The only reason this exists is to create the /dev/reroot/ directory,
  * used by reroot code in init(8) as a mountpoint for tmpfs.
  */
 static void
 reroot_conf(void *unused)
 {
 	int error;
 	struct cdev *cdev;
 
 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
 	    &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
 	if (error != 0) {
 		printf("%s: failed to create device node, error %d",
 		    __func__, error);
 	}
 }
 
 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
 
 /*
  * The system call that results in a reboot.
  */
 /* ARGSUSED */
 int
 sys_reboot(struct thread *td, struct reboot_args *uap)
 {
 	int error;
 
 	error = 0;
 #ifdef MAC
 	error = mac_system_check_reboot(td->td_ucred, uap->opt);
 #endif
 	if (error == 0)
 		error = priv_check(td, PRIV_REBOOT);
 	if (error == 0) {
 		if (uap->opt & RB_REROOT) {
 			error = kern_reroot();
 		} else {
 			mtx_lock(&Giant);
 			kern_reboot(uap->opt);
 			mtx_unlock(&Giant);
 		}
 	}
 	return (error);
 }
 
 /*
  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
  */
 void
 shutdown_nice(int howto)
 {
 
 	if (initproc != NULL) {
 		/* Send a signal to init(8) and have it shutdown the world. */
 		PROC_LOCK(initproc);
 		if (howto & RB_POWEROFF)
 			kern_psignal(initproc, SIGUSR2);
 		else if (howto & RB_HALT)
 			kern_psignal(initproc, SIGUSR1);
 		else
 			kern_psignal(initproc, SIGINT);
 		PROC_UNLOCK(initproc);
 	} else {
 		/* No init(8) running, so simply reboot. */
 		kern_reboot(howto | RB_NOSYNC);
 	}
 }
 
 static void
 print_uptime(void)
 {
 	int f;
 	struct timespec ts;
 
 	getnanouptime(&ts);
 	printf("Uptime: ");
 	f = 0;
 	if (ts.tv_sec >= 86400) {
 		printf("%ldd", (long)ts.tv_sec / 86400);
 		ts.tv_sec %= 86400;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 3600) {
 		printf("%ldh", (long)ts.tv_sec / 3600);
 		ts.tv_sec %= 3600;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 60) {
 		printf("%ldm", (long)ts.tv_sec / 60);
 		ts.tv_sec %= 60;
 		f = 1;
 	}
 	printf("%lds\n", (long)ts.tv_sec);
 }
 
 int
 doadump(boolean_t textdump)
 {
 	boolean_t coredump;
 	int error;
 
 	error = 0;
 	if (dumping)
 		return (EBUSY);
 	if (dumper.dumper == NULL)
 		return (ENXIO);
 
 	savectx(&dumppcb);
 	dumptid = curthread->td_tid;
 	dumping++;
 
 	coredump = TRUE;
 #ifdef DDB
 	if (textdump && textdump_pending) {
 		coredump = FALSE;
 		textdump_dumpsys(&dumper);
 	}
 #endif
 	if (coredump)
 		error = dumpsys(&dumper);
 
 	dumping--;
 	return (error);
 }
 
 /*
  * Shutdown the system cleanly to prepare for reboot, halt, or power off.
  */
 void
 kern_reboot(int howto)
 {
 	static int once = 0;
 
 #if defined(SMP)
 	/*
 	 * Bind us to CPU 0 so that all shutdown code runs there.  Some
 	 * systems don't shutdown properly (i.e., ACPI power off) if we
 	 * run on another processor.
 	 */
 	if (!SCHEDULER_STOPPED()) {
 		thread_lock(curthread);
 		sched_bind(curthread, 0);
 		thread_unlock(curthread);
 		KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0"));
 	}
 #endif
 	/* We're in the process of rebooting. */
 	rebooting = 1;
 
 	/* We are out of the debugger now. */
 	kdb_active = 0;
 
 	/*
 	 * Do any callouts that should be done BEFORE syncing the filesystems.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
 
 	/* 
 	 * Now sync filesystems
 	 */
 	if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) {
 		once = 1;
 		bufshutdown(show_busybufs);
 	}
 
 	print_uptime();
 
 	cngrab();
 
 	/*
 	 * Ok, now do things that assume all filesystem activity has
 	 * been completed.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 
 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
 		doadump(TRUE);
 
 	/* Now that we're going to really halt the system... */
 	EVENTHANDLER_INVOKE(shutdown_final, howto);
 
 	for(;;) ;	/* safety against shutdown_reset not working */
 	/* NOTREACHED */
 }
 
 /*
  * The system call that results in changing the rootfs.
  */
 static int
 kern_reroot(void)
 {
 	struct vnode *oldrootvnode, *vp;
 	struct mount *mp, *devmp;
 	int error;
 
 	if (curproc != initproc)
 		return (EPERM);
 
 	/*
 	 * Mark the filesystem containing currently-running executable
 	 * (the temporary copy of init(8)) busy.
 	 */
 	vp = curproc->p_textvp;
 	error = vn_lock(vp, LK_SHARED);
 	if (error != 0)
 		return (error);
 	mp = vp->v_mount;
 	error = vfs_busy(mp, MBF_NOWAIT);
 	if (error != 0) {
 		vfs_ref(mp);
 		VOP_UNLOCK(vp, 0);
 		error = vfs_busy(mp, 0);
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		vfs_rel(mp);
 		if (error != 0) {
 			VOP_UNLOCK(vp, 0);
 			return (ENOENT);
 		}
 		if (vp->v_iflag & VI_DOOMED) {
 			VOP_UNLOCK(vp, 0);
 			vfs_unbusy(mp);
 			return (ENOENT);
 		}
 	}
 	VOP_UNLOCK(vp, 0);
 
 	/*
 	 * Remove the filesystem containing currently-running executable
 	 * from the mount list, to prevent it from being unmounted
 	 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
 	 *
 	 * Also preserve /dev - forcibly unmounting it could cause driver
 	 * reinitialization.
 	 */
 
 	vfs_ref(rootdevmp);
 	devmp = rootdevmp;
 	rootdevmp = NULL;
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	TAILQ_REMOVE(&mountlist, devmp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 
 	oldrootvnode = rootvnode;
 
 	/*
 	 * Unmount everything except for the two filesystems preserved above.
 	 */
 	vfs_unmountall();
 
 	/*
 	 * Add /dev back; vfs_mountroot() will move it into its new place.
 	 */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	rootdevmp = devmp;
 	vfs_rel(rootdevmp);
 
 	/*
 	 * Mount the new rootfs.
 	 */
 	vfs_mountroot();
 
 	/*
 	 * Update all references to the old rootvnode.
 	 */
 	mountcheckdirs(oldrootvnode, rootvnode);
 
 	/*
 	 * Add the temporary filesystem back and unbusy it.
 	 */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	vfs_unbusy(mp);
 
 	return (0);
 }
 
 /*
  * If the shutdown was a clean halt, behave accordingly.
  */
 static void
 shutdown_halt(void *junk, int howto)
 {
 
 	if (howto & RB_HALT) {
 		printf("\n");
 		printf("The operating system has halted.\n");
 		printf("Please press any key to reboot.\n\n");
 		switch (cngetc()) {
 		case -1:		/* No console, just die */
 			cpu_halt();
 			/* NOTREACHED */
 		default:
 			howto &= ~RB_HALT;
 			break;
 		}
 	}
 }
 
 /*
  * Check to see if the system paniced, pause and then reboot
  * according to the specified delay.
  */
 static void
 shutdown_panic(void *junk, int howto)
 {
 	int loop;
 
 	if (howto & RB_DUMP) {
 		if (panic_reboot_wait_time != 0) {
 			if (panic_reboot_wait_time != -1) {
 				printf("Automatic reboot in %d seconds - "
 				       "press a key on the console to abort\n",
 					panic_reboot_wait_time);
 				for (loop = panic_reboot_wait_time * 10;
 				     loop > 0; --loop) {
 					DELAY(1000 * 100); /* 1/10th second */
 					/* Did user type a key? */
 					if (cncheckc() != -1)
 						break;
 				}
 				if (!loop)
 					return;
 			}
 		} else { /* zero time specified - reboot NOW */
 			return;
 		}
 		printf("--> Press a key on the console to reboot,\n");
 		printf("--> or switch off the system now.\n");
 		cngetc();
 	}
 }
 
 /*
  * Everything done, now reset
  */
 static void
 shutdown_reset(void *junk, int howto)
 {
 
 	printf("Rebooting...\n");
 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
 
 	/*
 	 * Acquiring smp_ipi_mtx here has a double effect:
 	 * - it disables interrupts avoiding CPU0 preemption
 	 *   by fast handlers (thus deadlocking  against other CPUs)
 	 * - it avoids deadlocks against smp_rendezvous() or, more 
 	 *   generally, threads busy-waiting, with this spinlock held,
 	 *   and waiting for responses by threads on other CPUs
 	 *   (ie. smp_tlb_shootdown()).
 	 *
 	 * For the !SMP case it just needs to handle the former problem.
 	 */
 #ifdef SMP
 	mtx_lock_spin(&smp_ipi_mtx);
 #else
 	spinlock_enter();
 #endif
 
 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
 	cpu_reset();
 	/* NOTREACHED */ /* assuming reset worked */
 }
 
 #if defined(WITNESS) || defined(INVARIANTS)
 static int kassert_warn_only = 0;
 #ifdef KDB
 static int kassert_do_kdb = 0;
 #endif
 #ifdef KTR
 static int kassert_do_ktr = 0;
 #endif
 static int kassert_do_log = 1;
 static int kassert_log_pps_limit = 4;
 static int kassert_log_mute_at = 0;
 static int kassert_log_panic_at = 0;
 static int kassert_warnings = 0;
 
 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN,
     &kassert_warn_only, 0,
     "KASSERT triggers a panic (1) or just a warning (0)");
 
 #ifdef KDB
 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN,
     &kassert_do_kdb, 0, "KASSERT will enter the debugger");
 #endif
 
 #ifdef KTR
 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN,
     &kassert_do_ktr, 0,
     "KASSERT does a KTR, set this to the KTRMASK you want");
 #endif
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN,
     &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN,
     &kassert_warnings, 0, "number of KASSERTs that have been triggered");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN,
     &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN,
     &kassert_log_pps_limit, 0, "limit number of log messages per second");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN,
     &kassert_log_mute_at, 0, "max number of KASSERTS to log");
 
 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
     kassert_sysctl_kassert, "I", "set to trigger a test kassert");
 
 static int
 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
 	return (0);
 }
 
 /*
  * Called by KASSERT, this decides if we will panic
  * or if we will log via printf and/or ktr.
  */
 void
 kassert_panic(const char *fmt, ...)
 {
 	static char buf[256];
 	va_list ap;
 
 	va_start(ap, fmt);
 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
 
 	/*
 	 * panic if we're not just warning, or if we've exceeded
 	 * kassert_log_panic_at warnings.
 	 */
 	if (!kassert_warn_only ||
 	    (kassert_log_panic_at > 0 &&
 	     kassert_warnings >= kassert_log_panic_at)) {
 		va_start(ap, fmt);
 		vpanic(fmt, ap);
 		/* NORETURN */
 	}
 #ifdef KTR
 	if (kassert_do_ktr)
 		CTR0(ktr_mask, buf);
 #endif /* KTR */
 	/*
 	 * log if we've not yet met the mute limit.
 	 */
 	if (kassert_do_log &&
 	    (kassert_log_mute_at == 0 ||
 	     kassert_warnings < kassert_log_mute_at)) {
 		static  struct timeval lasterr;
 		static  int curerr;
 
 		if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
 			printf("KASSERT failed: %s\n", buf);
 			kdb_backtrace();
 		}
 	}
 #ifdef KDB
 	if (kassert_do_kdb) {
 		kdb_enter(KDB_WHY_KASSERT, buf);
 	}
 #endif
 	atomic_add_int(&kassert_warnings, 1);
 }
 #endif
 
 /*
  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  * and then reboots.  If we are called twice, then we avoid trying to sync
  * the disks as this often leads to recursive panics.
  */
 void
 panic(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	vpanic(fmt, ap);
 }
 
 void
 vpanic(const char *fmt, va_list ap)
 {
 #ifdef SMP
 	cpuset_t other_cpus;
 #endif
 	struct thread *td = curthread;
 	int bootopt, newpanic;
 	static char buf[256];
 
 	spinlock_enter();
 
 #ifdef SMP
 	/*
 	 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
 	 * concurrently entering panic.  Only the winner will proceed
 	 * further.
 	 */
 	if (panicstr == NULL && !kdb_active) {
 		other_cpus = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 	}
 
 	/*
 	 * Ensure that the scheduler is stopped while panicking, even if panic
 	 * has been entered from kdb.
 	 */
 	td->td_stopsched = 1;
 #endif
 
 	bootopt = RB_AUTOBOOT;
 	newpanic = 0;
 	if (panicstr)
 		bootopt |= RB_NOSYNC;
 	else {
 		bootopt |= RB_DUMP;
 		panicstr = fmt;
 		newpanic = 1;
 	}
 
 	if (newpanic) {
 		(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 		panicstr = buf;
 		cngrab();
 		printf("panic: %s\n", buf);
 	} else {
 		printf("panic: ");
 		vprintf(fmt, ap);
 		printf("\n");
 	}
 #ifdef SMP
 	printf("cpuid = %d\n", PCPU_GET(cpuid));
 #endif
 
 #ifdef KDB
 	if (newpanic && trace_on_panic)
 		kdb_backtrace();
 	if (debugger_on_panic)
 		kdb_enter(KDB_WHY_PANIC, "panic");
 #endif
 	/*thread_lock(td); */
 	td->td_flags |= TDF_INPANIC;
 	/* thread_unlock(td); */
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
 	kern_reboot(bootopt);
 }
 
 /*
  * Support for poweroff delay.
  *
  * Please note that setting this delay too short might power off your machine
  * before the write cache on your hard disk has been flushed, leading to
  * soft-updates inconsistencies.
  */
 #ifndef POWEROFF_DELAY
 # define POWEROFF_DELAY 5000
 #endif
 static int poweroff_delay = POWEROFF_DELAY;
 
 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
     &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
 
 static void
 poweroff_wait(void *junk, int howto)
 {
 
 	if (!(howto & RB_POWEROFF) || poweroff_delay <= 0)
 		return;
 	DELAY(poweroff_delay * 1000);
 }
 
 /*
  * Some system processes (e.g. syncer) need to be stopped at appropriate
  * points in their main loops prior to a system shutdown, so that they
  * won't interfere with the shutdown process (e.g. by holding a disk buf
  * to cause sync to fail).  For each of these system processes, register
  * shutdown_kproc() as a handler for one of shutdown events.
  */
 static int kproc_shutdown_wait = 60;
 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
     &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
 
 void
 kproc_shutdown(void *arg, int howto)
 {
 	struct proc *p;
 	int error;
 
 	if (panicstr)
 		return;
 
 	p = (struct proc *)arg;
 	printf("Waiting (max %d seconds) for system process `%s' to stop... ",
 	    kproc_shutdown_wait, p->p_comm);
 	error = kproc_suspend(p, kproc_shutdown_wait * hz);
 
 	if (error == EWOULDBLOCK)
 		printf("timed out\n");
 	else
 		printf("done\n");
 }
 
 void
 kthread_shutdown(void *arg, int howto)
 {
 	struct thread *td;
 	int error;
 
 	if (panicstr)
 		return;
 
 	td = (struct thread *)arg;
 	printf("Waiting (max %d seconds) for system thread `%s' to stop... ",
 	    kproc_shutdown_wait, td->td_name);
 	error = kthread_suspend(td, kproc_shutdown_wait * hz);
 
 	if (error == EWOULDBLOCK)
 		printf("timed out\n");
 	else
 		printf("done\n");
 }
 
 static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)];
 SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD,
     dumpdevname, 0, "Device for kernel dumps");
 
 /* Registration of dumpers */
 int
 set_dumper(struct dumperinfo *di, const char *devname, struct thread *td)
 {
 	size_t wantcopy;
 	int error;
 
 	error = priv_check(td, PRIV_SETDUMPER);
 	if (error != 0)
 		return (error);
 
 	if (di == NULL) {
 		if (dumper.blockbuf != NULL)
 			free(dumper.blockbuf, M_DUMPER);
 		bzero(&dumper, sizeof(dumper));
 		dumpdevname[0] = '\0';
 		return (0);
 	}
 	if (dumper.dumper != NULL)
 		return (EBUSY);
 	dumper = *di;
 	wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname));
 	if (wantcopy >= sizeof(dumpdevname)) {
 		printf("set_dumper: device name truncated from '%s' -> '%s'\n",
 			devname, dumpdevname);
 	}
 	dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO);
 	return (0);
 }
 
 /* Call dumper with bounds checking. */
 int
 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical,
     off_t offset, size_t length)
 {
 
 	if (length != 0 && (offset < di->mediaoffset ||
 	    offset - di->mediaoffset + length > di->mediasize)) {
 		printf("Attempt to write outside dump device boundaries.\n"
 	    "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n",
 		    (intmax_t)offset, (intmax_t)di->mediaoffset,
 		    (uintmax_t)length, (intmax_t)di->mediasize);
 		return (ENOSPC);
 	}
 	return (di->dumper(di->priv, virtual, physical, offset, length));
 }
 
 /* Call dumper with bounds checking. */
 int
 dump_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical,
     off_t offset, size_t length, size_t *size)
 {
 	char *temp;
 	int ret;
 
 	if (length > di->blocksize)
 		return (ENOMEM);
 
 	*size = di->blocksize;
 	if (length == di->blocksize)
 		temp = virtual;
 	else {
 		temp = di->blockbuf;
 		memset(temp + length, 0, di->blocksize - length);
 		memcpy(temp, virtual, length);
 	}
 	ret = dump_write(di, temp, physical, offset, *size);
 
 	return (ret);
 }
 
 
 void
 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver,
     uint64_t dumplen, uint32_t blksz)
 {
 
 	bzero(kdh, sizeof(*kdh));
 	strlcpy(kdh->magic, magic, sizeof(kdh->magic));
 	strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
 	kdh->version = htod32(KERNELDUMPVERSION);
 	kdh->architectureversion = htod32(archver);
 	kdh->dumplength = htod64(dumplen);
 	kdh->dumptime = htod64(time_second);
 	kdh->blocksize = htod32(blksz);
 	strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
 	strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring));
 	if (panicstr != NULL)
 		strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
 	kdh->parity = kerneldump_parity(kdh);
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND(panic, db_show_panic)
 {
 
 	if (panicstr == NULL)
-		db_printf("Not paniced\n");
+		db_printf("panicstr not set\n");
 	else
 		db_printf("panic: %s\n", panicstr);
 }
 #endif
Index: projects/vnet/sys/kern/kern_timeout.c
===================================================================
--- projects/vnet/sys/kern/kern_timeout.c	(revision 301522)
+++ projects/vnet/sys/kern/kern_timeout.c	(revision 301523)
@@ -1,1655 +1,1654 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_callout_profiling.h"
 #include "opt_ddb.h"
 #if defined(__arm__)
 #include "opt_timer.h"
 #endif
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/file.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <machine/_inttypes.h>
 #endif
 
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
 #ifndef NO_EVENTTIMERS
 DPCPU_DECLARE(sbintime_t, hardclocktime);
 #endif
 
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
 #ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
     "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
 static int avg_lockcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
     "Average number of lock callouts made per softclock call. Units = 1/1000");
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
 static int avg_depth_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
     "Average number of direct callouts examined per callout_process call. "
     "Units = 1/1000");
 static int avg_lockcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
     &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
     "callout_process call. Units = 1/1000");
 static int avg_mpcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
     0, "Average number of MP direct callouts made per callout_process call. "
     "Units = 1/1000");
 #endif
 
 static int ncallout;
 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
     "Number of entries in callwheel and size of timeout() preallocation");
 
 #ifdef	RSS
 static int pin_default_swi = 1;
 static int pin_pcpu_swi = 1;
 #else
 static int pin_default_swi = 0;
 static int pin_pcpu_swi = 0;
 #endif
 
 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
     0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
     0, "Pin the per-CPU swis (except PCPU 0, which is also default");
 
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
 u_int callwheelsize, callwheelmask;
 
 /*
  * The callout cpu exec entities represent informations necessary for
  * describing the state of callouts currently running on the CPU and the ones
  * necessary for migrating callouts to the new callout cpu. In particular,
  * the first entry of the array cc_exec_entity holds informations for callout
  * running in SWI thread context, while the second one holds informations
  * for callout running directly from hardware interrupt context.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
 struct cc_exec {
 	struct callout		*cc_curr;
 	void			(*cc_drain)(void *);
 #ifdef SMP
 	void			(*ce_migration_func)(void *);
 	void			*ce_migration_arg;
 	int			ce_migration_cpu;
 	sbintime_t		ce_migration_time;
 	sbintime_t		ce_migration_prec;
 #endif
 	bool			cc_cancel;
 	bool			cc_waiting;
 };
 
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
  */
 struct callout_cpu {
 	struct mtx_padalign	cc_lock;
 	struct cc_exec 		cc_exec_entity[2];
 	struct callout		*cc_next;
 	struct callout		*cc_callout;
 	struct callout_list	*cc_callwheel;
 	struct callout_tailq	cc_expireq;
 	struct callout_slist	cc_callfree;
 	sbintime_t		cc_firstevent;
 	sbintime_t		cc_lastscan;
 	void			*cc_cookie;
 	u_int			cc_bucket;
 	u_int			cc_inited;
 	char			cc_ktr_event_name[20];
 };
 
 #define	callout_migrating(c)	((c)->c_iflags & CALLOUT_DFRMIGRATION)
 
 #define	cc_exec_curr(cc, dir)		cc->cc_exec_entity[dir].cc_curr
 #define	cc_exec_drain(cc, dir)		cc->cc_exec_entity[dir].cc_drain
 #define	cc_exec_next(cc)		cc->cc_next
 #define	cc_exec_cancel(cc, dir)		cc->cc_exec_entity[dir].cc_cancel
 #define	cc_exec_waiting(cc, dir)	cc->cc_exec_entity[dir].cc_waiting
 #ifdef SMP
 #define	cc_migration_func(cc, dir)	cc->cc_exec_entity[dir].ce_migration_func
 #define	cc_migration_arg(cc, dir)	cc->cc_exec_entity[dir].ce_migration_arg
 #define	cc_migration_cpu(cc, dir)	cc->cc_exec_entity[dir].ce_migration_cpu
 #define	cc_migration_time(cc, dir)	cc->cc_exec_entity[dir].ce_migration_time
 #define	cc_migration_prec(cc, dir)	cc->cc_exec_entity[dir].ce_migration_prec
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
 #define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
 #define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
 #else
 struct callout_cpu cc_cpu;
 #define	CC_CPU(cpu)	&cc_cpu
 #define	CC_SELF()	&cc_cpu
 #endif
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int timeout_cpu;
 
 static void	callout_cpu_init(struct callout_cpu *cc, int cpu);
 static void	softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
 		    int *mpcalls, int *lockcalls, int *gcalls,
 #endif
 		    int direct);
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
  *   cc_curr         - If a callout is in progress, it is cc_curr.
  *                     If cc_curr is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
  *                     The softclock() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
  *                     cc_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
  *                     cc_curr is non-NULL.
  */
 
 /*
  * Resets the execution entity tied to a specific callout cpu.
  */
 static void
 cc_cce_cleanup(struct callout_cpu *cc, int direct)
 {
 
 	cc_exec_curr(cc, direct) = NULL;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_waiting(cc, direct) = false;
 #ifdef SMP
 	cc_migration_cpu(cc, direct) = CPUBLOCK;
 	cc_migration_time(cc, direct) = 0;
 	cc_migration_prec(cc, direct) = 0;
 	cc_migration_func(cc, direct) = NULL;
 	cc_migration_arg(cc, direct) = NULL;
 #endif
 }
 
 /*
  * Checks if migration is requested by a specific callout cpu.
  */
 static int
 cc_cce_migrating(struct callout_cpu *cc, int direct)
 {
 
 #ifdef SMP
 	return (cc_migration_cpu(cc, direct) != CPUBLOCK);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Kernel low level callwheel initialization
  * called on cpu0 during kernel startup.
  */
 static void
 callout_callwheel_init(void *dummy)
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Calculate the size of the callout wheel and the preallocated
 	 * timeout() structures.
 	 * XXX: Clip callout to result of previous function of maxusers
 	 * maximum 384.  This is still huge, but acceptable.
 	 */
 	memset(CC_CPU(0), 0, sizeof(cc_cpu));
 	ncallout = imin(16 + maxproc + maxfiles, 18508);
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
 	 * Calculate callout wheel size, should be next power of two higher
 	 * than 'ncallout'.
 	 */
 	callwheelsize = 1 << fls(ncallout);
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Fetch whether we're pinning the swi's or not.
 	 */
 	TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
 	TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
 
 	/*
 	 * Only cpu0 handles timeout(9) and receives a preallocation.
 	 *
 	 * XXX: Once all timeout(9) consumers are converted this can
 	 * be removed.
 	 */
 	timeout_cpu = PCPU_GET(cpuid);
 	cc = CC_CPU(timeout_cpu);
 	cc->cc_callout = malloc(ncallout * sizeof(struct callout),
 	    M_CALLOUT, M_WAITOK);
 	callout_cpu_init(cc, timeout_cpu);
 }
 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
 
 /*
  * Initialize the per-cpu callout structures.
  */
 static void
 callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
 	struct callout *c;
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 	SLIST_INIT(&cc->cc_callfree);
 	cc->cc_inited = 1;
 	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
 	    M_CALLOUT, M_WAITOK);
 	for (i = 0; i < callwheelsize; i++)
 		LIST_INIT(&cc->cc_callwheel[i]);
 	TAILQ_INIT(&cc->cc_expireq);
 	cc->cc_firstevent = SBT_MAX;
 	for (i = 0; i < 2; i++)
 		cc_cce_cleanup(cc, i);
 	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
 	    "callwheel cpu %d", cpu);
 	if (cc->cc_callout == NULL)	/* Only cpu0 handles timeout(9) */
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
 		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
 
 #ifdef SMP
 /*
  * Switches the cpu tied to a specific callout.
  * The function expects a locked incoming callout cpu and returns with
  * locked outcoming callout cpu.
  */
 static struct callout_cpu *
 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 {
 	struct callout_cpu *new_cc;
 
 	MPASS(c != NULL && cc != NULL);
 	CC_LOCK_ASSERT(cc);
 
 	/*
 	 * Avoid interrupts and preemption firing after the callout cpu
 	 * is blocked in order to avoid deadlocks as the new thread
 	 * may be willing to acquire the callout cpu lock.
 	 */
 	c->c_cpu = CPUBLOCK;
 	spinlock_enter();
 	CC_UNLOCK(cc);
 	new_cc = CC_CPU(new_cpu);
 	CC_LOCK(new_cc);
 	spinlock_exit();
 	c->c_cpu = new_cpu;
 	return (new_cc);
 }
 #endif
 
 /*
  * Start standard softclock thread.
  */
 static void
 start_softclock(void *dummy)
 {
 	struct callout_cpu *cc;
 	char name[MAXCOMLEN];
 #ifdef SMP
 	int cpu;
 	struct intr_event *ie;
 #endif
 
 	cc = CC_CPU(timeout_cpu);
 	snprintf(name, sizeof(name), "clock (%d)", timeout_cpu);
 	if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK,
 	    INTR_MPSAFE, &cc->cc_cookie))
 		panic("died while creating standard software ithreads");
 	if (pin_default_swi &&
 	    (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) {
 		printf("%s: timeout clock couldn't be pinned to cpu %d\n",
 		    __func__,
 		    timeout_cpu);
 	}
 
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		if (cpu == timeout_cpu)
 			continue;
 		cc = CC_CPU(cpu);
 		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(9). */
 		callout_cpu_init(cc, cpu);
 		snprintf(name, sizeof(name), "clock (%d)", cpu);
 		ie = NULL;
 		if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
 		if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) {
 			printf("%s: per-cpu clock couldn't be pinned to "
 			    "cpu %d\n",
 			    __func__,
 			    cpu);
 		}
 	}
 #endif
 }
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
 #define	CC_HASH_SHIFT	8
 
 static inline u_int
 callout_hash(sbintime_t sbt)
 {
 
 	return (sbt >> (32 - CC_HASH_SHIFT));
 }
 
 static inline u_int
 callout_get_bucket(sbintime_t sbt)
 {
 
 	return (callout_hash(sbt) & callwheelmask);
 }
 
 void
 callout_process(sbintime_t now)
 {
 	struct callout *tmp, *tmpn;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t first, last, max, tmp_max;
 	uint32_t lookahead;
 	u_int firstb, lastb, nowb;
 #ifdef CALLOUT_PROFILING
 	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
 #endif
 
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
 
 	/* Compute the buckets of the last scan and present times. */
 	firstb = callout_hash(cc->cc_lastscan);
 	cc->cc_lastscan = now;
 	nowb = callout_hash(now);
 
 	/* Compute the last bucket and minimum time of the bucket after it. */
 	if (nowb == firstb)
 		lookahead = (SBT_1S / 16);
 	else if (nowb - firstb == 1)
 		lookahead = (SBT_1S / 8);
 	else
 		lookahead = (SBT_1S / 2);
 	first = last = now;
 	first += (lookahead / 2);
 	last += lookahead;
 	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
 	lastb = callout_hash(last) - 1;
 	max = last;
 
 	/*
 	 * Check if we wrapped around the entire wheel from the last scan.
 	 * In case, we need to scan entirely the wheel for pending callouts.
 	 */
 	if (lastb - firstb >= callwheelsize) {
 		lastb = firstb + callwheelsize - 1;
 		if (nowb - firstb >= callwheelsize)
 			nowb = lastb;
 	}
 
 	/* Iterate callwheel from firstb to nowb and then up to lastb. */
 	do {
 		sc = &cc->cc_callwheel[firstb & callwheelmask];
 		tmp = LIST_FIRST(sc);
 		while (tmp != NULL) {
 			/* Run the callout if present time within allowed. */
 			if (tmp->c_time <= now) {
 				/*
 				 * Consumer told us the callout may be run
 				 * directly from hardware interrupt context.
 				 */
 				if (tmp->c_iflags & CALLOUT_DIRECT) {
 #ifdef CALLOUT_PROFILING
 					++depth_dir;
 #endif
 					cc_exec_next(cc) =
 					    LIST_NEXT(tmp, c_links.le);
 					cc->cc_bucket = firstb & callwheelmask;
 					LIST_REMOVE(tmp, c_links.le);
 					softclock_call_cc(tmp, cc,
 #ifdef CALLOUT_PROFILING
 					    &mpcalls_dir, &lockcalls_dir, NULL,
 #endif
 					    1);
 					tmp = cc_exec_next(cc);
 					cc_exec_next(cc) = NULL;
 				} else {
 					tmpn = LIST_NEXT(tmp, c_links.le);
 					LIST_REMOVE(tmp, c_links.le);
 					TAILQ_INSERT_TAIL(&cc->cc_expireq,
 					    tmp, c_links.tqe);
 					tmp->c_iflags |= CALLOUT_PROCESSED;
 					tmp = tmpn;
 				}
 				continue;
 			}
 			/* Skip events from distant future. */
 			if (tmp->c_time >= max)
 				goto next;
 			/*
 			 * Event minimal time is bigger than present maximal
 			 * time, so it cannot be aggregated.
 			 */
 			if (tmp->c_time > last) {
 				lastb = nowb;
 				goto next;
 			}
 			/* Update first and last time, respecting this event. */
 			if (tmp->c_time < first)
 				first = tmp->c_time;
 			tmp_max = tmp->c_time + tmp->c_precision;
 			if (tmp_max < last)
 				last = tmp_max;
 next:
 			tmp = LIST_NEXT(tmp, c_links.le);
 		}
 		/* Proceed with the next bucket. */
 		firstb++;
 		/*
 		 * Stop if we looked after present time and found
 		 * some event we can't execute at now.
 		 * Stop if we looked far enough into the future.
 		 */
 	} while (((int)(firstb - lastb)) <= 0);
 	cc->cc_firstevent = last;
 #ifndef NO_EVENTTIMERS
 	cpu_new_callout(curcpu, last, first);
 #endif
 #ifdef CALLOUT_PROFILING
 	avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
 	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
 	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
 #endif
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 	/*
 	 * swi_sched acquires the thread lock, so we don't want to call it
 	 * with cc_lock held; incorrect locking order.
 	 */
 	if (!TAILQ_EMPTY(&cc->cc_expireq))
 		swi_sched(cc->cc_cookie, 0);
 }
 
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	for (;;) {
 		cpu = c->c_cpu;
 #ifdef SMP
 		if (cpu == CPUBLOCK) {
 			while (c->c_cpu == CPUBLOCK)
 				cpu_spinwait();
 			continue;
 		}
 #endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
 			break;
 		CC_UNLOCK(cc);
 	}
 	return (cc);
 }
 
 static void
 callout_cc_add(struct callout *c, struct callout_cpu *cc,
     sbintime_t sbt, sbintime_t precision, void (*func)(void *),
     void *arg, int cpu, int flags)
 {
 	int bucket;
 
 	CC_LOCK_ASSERT(cc);
 	if (sbt < cc->cc_lastscan)
 		sbt = cc->cc_lastscan;
 	c->c_arg = arg;
 	c->c_iflags |= CALLOUT_PENDING;
 	c->c_iflags &= ~CALLOUT_PROCESSED;
 	c->c_flags |= CALLOUT_ACTIVE;
 	if (flags & C_DIRECT_EXEC)
 		c->c_iflags |= CALLOUT_DIRECT;
 	c->c_func = func;
 	c->c_time = sbt;
 	c->c_precision = precision;
 	bucket = callout_get_bucket(c->c_time);
 	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
 	    c, (int)(c->c_precision >> 32),
 	    (u_int)(c->c_precision & 0xffffffff));
 	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
 	if (cc->cc_bucket == bucket)
 		cc_exec_next(cc) = c;
 #ifndef NO_EVENTTIMERS
 	/*
 	 * Inform the eventtimers(4) subsystem there's a new callout
 	 * that has been inserted, but only if really required.
 	 */
 	if (SBT_MAX - c->c_time < c->c_precision)
 		c->c_precision = SBT_MAX - c->c_time;
 	sbt = c->c_time + c->c_precision;
 	if (sbt < cc->cc_firstevent) {
 		cc->cc_firstevent = sbt;
 		cpu_new_callout(cpu, sbt, c->c_time);
 	}
 #endif
 }
 
 static void
 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 {
 
 	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
 		return;
 	c->c_func = NULL;
 	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 }
 
 static void
 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
     int *mpcalls, int *lockcalls, int *gcalls,
 #endif
     int direct)
 {
 	struct rm_priotracker tracker;
 	void (*c_func)(void *);
 	void *c_arg;
 	struct lock_class *class;
 	struct lock_object *c_lock;
 	uintptr_t lock_status;
 	int c_iflags;
 #ifdef SMP
 	struct callout_cpu *new_cc;
 	void (*new_func)(void *);
 	void *new_arg;
 	int flags, new_cpu;
 	sbintime_t new_prec, new_time;
 #endif
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
 	sbintime_t sbt1, sbt2;
 	struct timespec ts2;
 	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
 	static timeout_t *lastfunc;
 #endif
 
 	KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
 	    ("softclock_call_cc: pend %p %x", c, c->c_iflags));
 	KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
 	    ("softclock_call_cc: act %p %x", c, c->c_flags));
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 	lock_status = 0;
 	if (c->c_flags & CALLOUT_SHAREDLOCK) {
 		if (class == &lock_class_rm)
 			lock_status = (uintptr_t)&tracker;
 		else
 			lock_status = 1;
 	}
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
 	c_iflags = c->c_iflags;
 	if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
 		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 	else
 		c->c_iflags &= ~CALLOUT_PENDING;
 	
 	cc_exec_curr(cc, direct) = c;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_drain(cc, direct) = NULL;
 	CC_UNLOCK(cc);
 	if (c_lock != NULL) {
 		class->lc_lock(c_lock, lock_status);
 		/*
 		 * The callout may have been cancelled
 		 * while we switched locks.
 		 */
 		if (cc_exec_cancel(cc, direct)) {
 			class->lc_unlock(c_lock);
 			goto skip;
 		}
 		/* The callout cannot be stopped now. */
 		cc_exec_cancel(cc, direct) = true;
 		if (c_lock == &Giant.lock_object) {
 #ifdef CALLOUT_PROFILING
 			(*gcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
 #ifdef CALLOUT_PROFILING
 			(*lockcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
 #ifdef CALLOUT_PROFILING
 		(*mpcalls)++;
 #endif
 		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
 	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
 	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt1 = sbinuptime();
 #endif
 	THREAD_NO_SLEEPING();
 	SDT_PROBE1(callout_execute, , , callout__start, c);
 	c_func(c_arg);
 	SDT_PROBE1(callout_execute, , , callout__end, c);
 	THREAD_SLEEPING_OK();
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt2 = sbinuptime();
 	sbt2 -= sbt1;
 	if (sbt2 > maxdt) {
 		if (lastfunc != c_func || sbt2 > maxdt * 2) {
 			ts2 = sbttots(sbt2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
 		maxdt = sbt2;
 		lastfunc = c_func;
 	}
 #endif
 	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
 	if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
 		class->lc_unlock(c_lock);
 skip:
 	CC_LOCK(cc);
 	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
 	cc_exec_curr(cc, direct) = NULL;
 	if (cc_exec_drain(cc, direct)) {
 		void (*drain)(void *);
 		
 		drain = cc_exec_drain(cc, direct);
 		cc_exec_drain(cc, direct) = NULL;
 		CC_UNLOCK(cc);
 		drain(c_arg);
 		CC_LOCK(cc);
 	}
 	if (cc_exec_waiting(cc, direct)) {
 		/*
 		 * There is someone waiting for the
 		 * callout to complete.
 		 * If the callout was scheduled for
 		 * migration just cancel it.
 		 */
 		if (cc_cce_migrating(cc, direct)) {
 			cc_cce_cleanup(cc, direct);
 
 			/*
 			 * It should be assert here that the callout is not
 			 * destroyed but that is not easy.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 		}
 		cc_exec_waiting(cc, direct) = false;
 		CC_UNLOCK(cc);
 		wakeup(&cc_exec_waiting(cc, direct));
 		CC_LOCK(cc);
 	} else if (cc_cce_migrating(cc, direct)) {
 		KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
 		    ("Migrating legacy callout %p", c));
 #ifdef SMP
 		/*
 		 * If the callout was scheduled for
 		 * migration just perform it now.
 		 */
 		new_cpu = cc_migration_cpu(cc, direct);
 		new_time = cc_migration_time(cc, direct);
 		new_prec = cc_migration_prec(cc, direct);
 		new_func = cc_migration_func(cc, direct);
 		new_arg = cc_migration_arg(cc, direct);
 		cc_cce_cleanup(cc, direct);
 
 		/*
 		 * It should be assert here that the callout is not destroyed
 		 * but that is not easy.
 		 *
 		 * As first thing, handle deferred callout stops.
 		 */
 		if (!callout_migrating(c)) {
 			CTR3(KTR_CALLOUT,
 			     "deferred cancelled %p func %p arg %p",
 			     c, new_func, new_arg);
 			callout_cc_del(c, cc);
 			return;
 		}
 		c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
 		flags = (direct) ? C_DIRECT_EXEC : 0;
 		callout_cc_add(c, new_cc, new_time, new_prec, new_func,
 		    new_arg, new_cpu, flags);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
 		panic("migration should not happen");
 #endif
 	}
 	/*
 	 * If the current callout is locally allocated (from
 	 * timeout(9)) then put it on the freelist.
 	 *
 	 * Note: we need to check the cached copy of c_iflags because
 	 * if it was not local, then it's not safe to deref the
 	 * callout pointer.
 	 */
 	KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
 	    c->c_iflags == CALLOUT_LOCAL_ALLOC,
 	    ("corrupted callout"));
 	if (c_iflags & CALLOUT_LOCAL_ALLOC)
 		callout_cc_del(c, cc);
 }
 
 /*
  * The callout mechanism is based on the work of Adam M. Costello and
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  * used in this implementation was published by G. Varghese and T. Lauck in
  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  * the 11th ACM Annual Symposium on Operating Systems Principles,
  * Austin, Texas Nov 1987.
  */
 
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
  */
 void
 softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
 #ifdef CALLOUT_PROFILING
 	int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
 #endif
 
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
 	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
 		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		softclock_call_cc(c, cc,
 #ifdef CALLOUT_PROFILING
 		    &mpcalls, &lockcalls, &gcalls,
 #endif
 		    0);
 #ifdef CALLOUT_PROFILING
 		++depth;
 #endif
 	}
 #ifdef CALLOUT_PROFILING
 	avg_depth += (depth * 1000 - avg_depth) >> 8;
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
 #endif
 	CC_UNLOCK(cc);
 }
 
 /*
  * timeout --
  *	Execute a function after a specified length of time.
  *
  * untimeout --
  *	Cancel previous timeout function call.
  *
  * callout_handle_init --
  *	Initialize a handle so that using it with untimeout is benign.
  *
  *	See AT&T BCI Driver Reference Manual for specification.  This
  *	implementation differs from that one in that although an
  *	identification value is returned from timeout, the original
  *	arguments to timeout as well as the identifier are used to
  *	identify entries for untimeout.
  */
 struct callout_handle
 timeout(timeout_t *ftn, void *arg, int to_ticks)
 {
 	struct callout_cpu *cc;
 	struct callout *new;
 	struct callout_handle handle;
 
 	cc = CC_CPU(timeout_cpu);
 	CC_LOCK(cc);
 	/* Fill in the next free callout structure. */
 	new = SLIST_FIRST(&cc->cc_callfree);
 	if (new == NULL)
 		/* XXX Attempt to malloc first */
 		panic("timeout table full");
 	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
 	callout_reset(new, to_ticks, ftn, arg);
 	handle.callout = new;
 	CC_UNLOCK(cc);
 
 	return (handle);
 }
 
 void
 untimeout(timeout_t *ftn, void *arg, struct callout_handle handle)
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Check for a handle that was initialized
 	 * by callout_handle_init, but never used
 	 * for a real timeout.
 	 */
 	if (handle.callout == NULL)
 		return;
 
 	cc = callout_lock(handle.callout);
 	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
 		callout_stop(handle.callout);
 	CC_UNLOCK(cc);
 }
 
 void
 callout_handle_init(struct callout_handle *handle)
 {
 	handle->callout = NULL;
 }
 
 /*
  * New interface; clients allocate their own callout structures.
  *
  * callout_reset() - establish or change a timeout
  * callout_stop() - disestablish a timeout
  * callout_init() - initialize a callout structure so that it can
  *	safely be passed to callout_reset() and callout_stop()
  *
  * <sys/callout.h> defines three convenience macros:
  *
  * callout_active() - returns truth if callout has not been stopped,
  *	drained, or deactivated since the last time the callout was
  *	reset.
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
 int
 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
     void (*ftn)(void *), void *arg, int cpu, int flags)
 {
 	sbintime_t to_sbt, pr;
 	struct callout_cpu *cc;
 	int cancelled, direct;
 	int ignore_cpu=0;
 
 	cancelled = 0;
 	if (cpu == -1) {
 		ignore_cpu = 1;
 	} else if ((cpu >= MAXCPU) ||
 		   ((CC_CPU(cpu))->cc_inited == 0)) {
 		/* Invalid CPU spec */
 		panic("Invalid CPU in callout %d", cpu);
 	}
 	if (flags & C_ABSOLUTE) {
 		to_sbt = sbt;
 	} else {
 		if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
 			sbt = tick_sbt;
 		if ((flags & C_HARDCLOCK) ||
 #ifdef NO_EVENTTIMERS
 		    sbt >= sbt_timethreshold) {
 			to_sbt = getsbinuptime();
 
 			/* Add safety belt for the case of hz > 1000. */
 			to_sbt += tc_tick_sbt - tick_sbt;
 #else
 		    sbt >= sbt_tickthreshold) {
 			/*
 			 * Obtain the time of the last hardclock() call on
 			 * this CPU directly from the kern_clocksource.c.
 			 * This value is per-CPU, but it is equal for all
 			 * active ones.
 			 */
 #ifdef __LP64__
 			to_sbt = DPCPU_GET(hardclocktime);
 #else
 			spinlock_enter();
 			to_sbt = DPCPU_GET(hardclocktime);
 			spinlock_exit();
 #endif
 #endif
 			if ((flags & C_HARDCLOCK) == 0)
 				to_sbt += tick_sbt;
 		} else
 			to_sbt = sbinuptime();
 		if (SBT_MAX - to_sbt < sbt)
 			to_sbt = SBT_MAX;
 		else
 			to_sbt += sbt;
 		pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
 		    sbt >> C_PRELGET(flags));
 		if (pr > precision)
 			precision = pr;
 	}
 	/* 
 	 * This flag used to be added by callout_cc_add, but the
 	 * first time you call this we could end up with the
 	 * wrong direct flag if we don't do it before we add.
 	 */
 	if (flags & C_DIRECT_EXEC) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	KASSERT(!direct || c->c_lock == NULL,
 	    ("%s: direct callout %p has lock", __func__, c));
 	cc = callout_lock(c);
 	/*
 	 * Don't allow migration of pre-allocated callouts lest they
 	 * become unbalanced or handle the case where the user does
 	 * not care. 
 	 */
 	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
 	    ignore_cpu) {
 		cpu = c->c_cpu;
 	}
 
 	if (cc_exec_curr(cc, direct) == c) {
 		/*
 		 * We're being asked to reschedule a callout which is
 		 * currently in progress.  If there is a lock then we
 		 * can cancel the callout if it has not really started.
 		 */
 		if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
 			cancelled = cc_exec_cancel(cc, direct) = true;
 		if (cc_exec_waiting(cc, direct)) {
 			/*
 			 * Someone has called callout_drain to kill this
 			 * callout.  Don't reschedule.
 			 */
 			CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 			    cancelled ? "cancelled" : "failed to cancel",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #ifdef SMP
 		if (callout_migrating(c)) {
 			/* 
 			 * This only occurs when a second callout_reset_sbt_on
 			 * is made after a previous one moved it into
 			 * deferred migration (below). Note we do *not* change
 			 * the prev_cpu even though the previous target may
 			 * be different.
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			cancelled = 1;
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #endif
 	}
 	if (c->c_iflags & CALLOUT_PENDING) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 		cancelled = 1;
 		c->c_iflags &= ~ CALLOUT_PENDING;
 		c->c_flags &= ~ CALLOUT_ACTIVE;
 	}
 
 #ifdef SMP
 	/*
 	 * If the callout must migrate try to perform it immediately.
 	 * If the callout is currently running, just defer the migration
 	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
 		if (cc_exec_curr(cc, direct) == c) {
 			/* 
 			 * Pending will have been removed since we are
 			 * actually executing the callout on another
 			 * CPU. That callout should be waiting on the
 			 * lock the caller holds. If we set both
 			 * active/and/pending after we return and the
 			 * lock on the executing callout proceeds, it
 			 * will then see pending is true and return.
 			 * At the return from the actual callout execution
 			 * the migration will occur in softclock_call_cc
 			 * and this new callout will be placed on the 
 			 * new CPU via a call to callout_cpu_switch() which
 			 * will get the lock on the right CPU followed
 			 * by a call callout_cc_add() which will add it there.
 			 * (see above in softclock_call_cc()).
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
 			c->c_flags |= CALLOUT_ACTIVE;
 			CTR6(KTR_CALLOUT,
 		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
 			    c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 			    (u_int)(to_sbt & 0xffffffff), cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 		cc = callout_cpu_switch(c, cc, cpu);
 	}
 #endif
 
 	callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
 	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
 	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 	    (u_int)(to_sbt & 0xffffffff));
 	CC_UNLOCK(cc);
 
 	return (cancelled);
 }
 
 /*
  * Common idioms that can be optimized in the future.
  */
 int
 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 }
 
 int
 callout_schedule(struct callout *c, int to_ticks)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 }
 
 int
 _callout_stop_safe(struct callout *c, int flags, void (*drain)(void *))
 {
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
 	int direct, sq_locked, use_lock;
 	int not_on_a_list;
 
 	if ((flags & CS_DRAIN) != 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
 		    "calling %s", __func__);
 
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
 	 * so just discard this check for the moment.
 	 */
 	if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
 		if (c->c_lock == &Giant.lock_object)
 			use_lock = mtx_owned(&Giant);
 		else {
 			use_lock = 1;
 			class = LOCK_CLASS(c->c_lock);
 			class->lc_assert(c->c_lock, LA_XLOCKED);
 		}
 	} else
 		use_lock = 0;
 	if (c->c_iflags & CALLOUT_DIRECT) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	sq_locked = 0;
 	old_cc = NULL;
 again:
 	cc = callout_lock(c);
 
 	if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
 	    (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
 	    ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
 		/*
 		 * Special case where this slipped in while we
 		 * were migrating *as* the callout is about to
 		 * execute. The caller probably holds the lock
 		 * the callout wants.
 		 *
 		 * Get rid of the migration first. Then set
 		 * the flag that tells this code *not* to
 		 * try to remove it from any lists (its not
 		 * on one yet). When the callout wheel runs,
 		 * it will ignore this callout.
 		 */
 		c->c_iflags &= ~CALLOUT_PENDING;
 		c->c_flags &= ~CALLOUT_ACTIVE;
 		not_on_a_list = 1;
 	} else {
 		not_on_a_list = 0;
 	}
 
 	/*
 	 * If the callout was migrating while the callout cpu lock was
 	 * dropped,  just drop the sleepqueue lock and check the states
 	 * again.
 	 */
 	if (sq_locked != 0 && cc != old_cc) {
 #ifdef SMP
 		CC_UNLOCK(cc);
 		sleepq_release(&cc_exec_waiting(old_cc, direct));
 		sq_locked = 0;
 		old_cc = NULL;
 		goto again;
 #else
 		panic("migration should not happen");
 #endif
 	}
 
 	/*
 	 * If the callout isn't pending, it's not on the queue, so
 	 * don't attempt to remove it from the queue.  We can try to
 	 * stop it by other means however.
 	 */
 	if (!(c->c_iflags & CALLOUT_PENDING)) {
 		/*
 		 * If it wasn't on the queue and it isn't the current
 		 * callout, then we can't stop it, so just bail.
 		 * It probably has already been run (if locking
 		 * is properly done). You could get here if the caller
 		 * calls stop twice in a row for example. The second
 		 * call would fall here without CALLOUT_ACTIVE set.
 		 */
 		c->c_flags &= ~CALLOUT_ACTIVE;
 		if (cc_exec_curr(cc, direct) != c) {
 			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			if (sq_locked)
 				sleepq_release(&cc_exec_waiting(cc, direct));
 			return (-1);
 		}
 
 		if ((flags & CS_DRAIN) != 0) {
 			/*
 			 * The current callout is running (or just
 			 * about to run) and blocking is allowed, so
 			 * just wait for the current invocation to
 			 * finish.
 			 */
 			while (cc_exec_curr(cc, direct) == c) {
 				/*
 				 * Use direct calls to sleepqueue interface
 				 * instead of cv/msleep in order to avoid
 				 * a LOR between cc_lock and sleepqueue
 				 * chain spinlocks.  This piece of code
 				 * emulates a msleep_spin() call actually.
 				 *
 				 * If we already have the sleepqueue chain
 				 * locked, then we can safely block.  If we
 				 * don't already have it locked, however,
 				 * we have to drop the cc_lock to lock
 				 * it.  This opens several races, so we
 				 * restart at the beginning once we have
 				 * both locks.  If nothing has changed, then
 				 * we will end up back here with sq_locked
 				 * set.
 				 */
 				if (!sq_locked) {
 					CC_UNLOCK(cc);
 					sleepq_lock(
 					    &cc_exec_waiting(cc, direct));
 					sq_locked = 1;
 					old_cc = cc;
 					goto again;
 				}
 
 				/*
 				 * Migration could be cancelled here, but
 				 * as long as it is still not sure when it
 				 * will be packed up, just let softclock()
 				 * take care of it.
 				 */
 				cc_exec_waiting(cc, direct) = true;
 				DROP_GIANT();
 				CC_UNLOCK(cc);
 				sleepq_add(
 				    &cc_exec_waiting(cc, direct),
 				    &cc->cc_lock.lock_object, "codrain",
 				    SLEEPQ_SLEEP, 0);
 				sleepq_wait(
 				    &cc_exec_waiting(cc, direct),
 					     0);
 				sq_locked = 0;
 				old_cc = NULL;
 
 				/* Reacquire locks previously released. */
 				PICKUP_GIANT();
 				CC_LOCK(cc);
 			}
 		} else if (use_lock &&
 			   !cc_exec_cancel(cc, direct) && (drain == NULL)) {
 			
 			/*
 			 * The current callout is waiting for its
 			 * lock which we hold.  Cancel the callout
 			 * and return.  After our caller drops the
 			 * lock, the callout will be skipped in
 			 * softclock(). This *only* works with a
 			 * callout_stop() *not* callout_drain() or
 			 * callout_async_drain().
 			 */
 			cc_exec_cancel(cc, direct) = true;
 			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			KASSERT(!cc_cce_migrating(cc, direct),
 			    ("callout wrongly scheduled for migration"));
 			if (callout_migrating(c)) {
 				c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 				cc_migration_cpu(cc, direct) = CPUBLOCK;
 				cc_migration_time(cc, direct) = 0;
 				cc_migration_prec(cc, direct) = 0;
 				cc_migration_func(cc, direct) = NULL;
 				cc_migration_arg(cc, direct) = NULL;
 #endif
 			}
 			CC_UNLOCK(cc);
 			KASSERT(!sq_locked, ("sleepqueue chain locked"));
 			return (1);
 		} else if (callout_migrating(c)) {
 			/*
 			 * The callout is currently being serviced
 			 * and the "next" callout is scheduled at
 			 * its completion with a migration. We remove
 			 * the migration flag so it *won't* get rescheduled,
 			 * but we can't stop the one thats running so
 			 * we return 0.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 			/* 
 			 * We can't call cc_cce_cleanup here since
 			 * if we do it will remove .ce_curr and
 			 * its still running. This will prevent a
 			 * reschedule of the callout when the 
 			 * execution completes.
 			 */
 			cc_migration_cpu(cc, direct) = CPUBLOCK;
 			cc_migration_time(cc, direct) = 0;
 			cc_migration_prec(cc, direct) = 0;
 			cc_migration_func(cc, direct) = NULL;
 			cc_migration_arg(cc, direct) = NULL;
 #endif
 			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
  			if (drain) {
 				cc_exec_drain(cc, direct) = drain;
 			}
 			CC_UNLOCK(cc);
 			return ((flags & CS_MIGRBLOCK) != 0);
 		}
 		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
 		if (drain) {
 			cc_exec_drain(cc, direct) = drain;
 		}
 		CC_UNLOCK(cc);
 		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 		return (0);
 	}
 	if (sq_locked)
 		sleepq_release(&cc_exec_waiting(cc, direct));
 
 	c->c_iflags &= ~CALLOUT_PENDING;
 	c->c_flags &= ~CALLOUT_ACTIVE;
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
 	if (not_on_a_list == 0) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 	}
 	callout_cc_del(c, cc);
 	CC_UNLOCK(cc);
 	return (1);
 }
 
 void
 callout_init(struct callout *c, int mpsafe)
 {
 	bzero(c, sizeof *c);
 	if (mpsafe) {
 		c->c_lock = NULL;
 		c->c_iflags = CALLOUT_RETURNUNLOCKED;
 	} else {
 		c->c_lock = &Giant.lock_object;
 		c->c_iflags = 0;
 	}
 	c->c_cpu = timeout_cpu;
 }
 
 void
 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
 {
 	bzero(c, sizeof *c);
 	c->c_lock = lock;
 	KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 	    ("callout_init_lock: bad flags %d", flags));
 	KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
 	    (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
 	    __func__));
 	c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 	c->c_cpu = timeout_cpu;
 }
 
 #ifdef APM_FIXUP_CALLTODO
 /* 
  * Adjust the kernel calltodo timeout list.  This routine is used after 
  * an APM resume to recalculate the calltodo timer list values with the 
  * number of hz's we have been sleeping.  The next hardclock() will detect 
  * that there are fired timers and run softclock() to execute them.
  *
  * Please note, I have not done an exhaustive analysis of what code this
  * might break.  I am motivated to have my select()'s and alarm()'s that
  * have expired during suspend firing upon resume so that the applications
  * which set the timer can do the maintanence the timer was for as close
  * as possible to the originally intended time.  Testing this code for a 
  * week showed that resuming from a suspend resulted in 22 to 25 timers 
  * firing, which seemed independent on whether the suspend was 2 hours or
  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
  */
 void
 adjust_timeout_calltodo(struct timeval *time_change)
 {
 	register struct callout *p;
 	unsigned long delta_ticks;
 
 	/* 
 	 * How many ticks were we asleep?
 	 * (stolen from tvtohz()).
 	 */
 
 	/* Don't do anything */
 	if (time_change->tv_sec < 0)
 		return;
 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
 		delta_ticks = howmany(time_change->tv_sec * 1000000 +
 		    time_change->tv_usec, tick) + 1;
 	else if (time_change->tv_sec <= LONG_MAX / hz)
 		delta_ticks = time_change->tv_sec * hz +
 		    howmany(time_change->tv_usec, tick) + 1;
 	else
 		delta_ticks = LONG_MAX;
 
 	if (delta_ticks > INT_MAX)
 		delta_ticks = INT_MAX;
 
 	/* 
 	 * Now rip through the timer calltodo list looking for timers
 	 * to expire.
 	 */
 
 	/* don't collide with softclock() */
 	CC_LOCK(cc);
 	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
 		p->c_time -= delta_ticks;
 
 		/* Break if the timer had more time on it than delta_ticks */
 		if (p->c_time > 0)
 			break;
 
 		/* take back the ticks the timer didn't use (p->c_time <= 0) */
 		delta_ticks = -p->c_time;
 	}
 	CC_UNLOCK(cc);
 
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */
 
 static int
 flssbt(sbintime_t sbt)
 {
 
 	sbt += (uint64_t)sbt >> 1;
 	if (sizeof(long) >= sizeof(sbintime_t))
 		return (flsl(sbt));
 	if (sbt >= SBT_1S)
 		return (flsl(((uint64_t)sbt) >> 32) + 32);
 	return (flsl(sbt));
 }
 
 /*
  * Dump immediate statistic snapshot of the scheduled callouts.
  */
 static int
 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 {
 	struct callout *tmp;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 	int ct[64], cpr[64], ccpbk[32];
 	int error, val, i, count, tcum, pcum, maxc, c, medc;
 #ifdef SMP
 	int cpu;
 #endif
 
 	val = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	count = maxc = 0;
 	st = spr = maxt = maxpr = 0;
 	bzero(ccpbk, sizeof(ccpbk));
 	bzero(ct, sizeof(ct));
 	bzero(cpr, sizeof(cpr));
 	now = sbinuptime();
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
 #else
 		cc = CC_CPU(timeout_cpu);
 #endif
 		CC_LOCK(cc);
 		for (i = 0; i < callwheelsize; i++) {
 			sc = &cc->cc_callwheel[i];
 			c = 0;
 			LIST_FOREACH(tmp, sc, c_links.le) {
 				c++;
 				t = tmp->c_time - now;
 				if (t < 0)
 					t = 0;
 				st += t / SBT_1US;
 				spr += tmp->c_precision / SBT_1US;
 				if (t > maxt)
 					maxt = t;
 				if (tmp->c_precision > maxpr)
 					maxpr = tmp->c_precision;
 				ct[flssbt(t)]++;
 				cpr[flssbt(tmp->c_precision)]++;
 			}
 			if (c > maxc)
 				maxc = c;
 			ccpbk[fls(c + c / 2)]++;
 			count += c;
 		}
 		CC_UNLOCK(cc);
 #ifdef SMP
 	}
 #endif
 
 	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 		tcum += ct[i];
 	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 		pcum += cpr[i];
 	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 		c += ccpbk[i];
 	medc = (i >= 2) ? (1 << (i - 2)) : 0;
 
 	printf("Scheduled callouts statistic snapshot:\n");
 	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 	    medc,
 	    count / callwheelsize / mp_ncpus,
 	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 	    maxc);
 	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 	    (st / count) / 1000000, (st / count) % 1000000,
 	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 	    (spr / count) / 1000000, (spr / count) % 1000000,
 	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 	    "   prec\t   pcum\n");
 	for (i = 0, tcum = pcum = 0; i < 64; i++) {
 		if (ct[i] == 0 && cpr[i] == 0)
 			continue;
 		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 		tcum += ct[i];
 		pcum += cpr[i];
 		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 		    i - 1 - (32 - CC_HASH_SHIFT),
 		    ct[i], tcum, cpr[i], pcum);
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_callout_stat, "I",
     "Dump immediate statistic snapshot of the scheduled callouts");
 
 #ifdef DDB
-
 static void
 _show_callout(struct callout *c)
 {
 
 	db_printf("callout %p\n", c);
 #define	C_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, c->e);
 	db_printf("   &c_links = %p\n", &(c->c_links));
 	C_DB_PRINTF("%" PRId64,	c_time);
 	C_DB_PRINTF("%" PRId64,	c_precision);
 	C_DB_PRINTF("%p",	c_arg);
 	C_DB_PRINTF("%p",	c_func);
 	C_DB_PRINTF("%p",	c_lock);
 	C_DB_PRINTF("%#x",	c_flags);
 	C_DB_PRINTF("%#x",	c_iflags);
 	C_DB_PRINTF("%d",	c_cpu);
 #undef	C_DB_PRINTF
 }
 
 DB_SHOW_COMMAND(callout, db_show_callout)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show callout <struct callout *>\n");
 		return;
 	}
 
 	_show_callout((struct callout *)addr);
 }
 #endif /* DDB */
Index: projects/vnet/sys/xen/xen-os.h
===================================================================
--- projects/vnet/sys/xen/xen-os.h	(revision 301522)
+++ projects/vnet/sys/xen/xen-os.h	(revision 301523)
@@ -1,145 +1,145 @@
 /******************************************************************************
  * xen/xen-os.h
  * 
  * Random collection of macros and definition
  *
  * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
  * All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  * DEALINGS IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef _XEN_XEN_OS_H_
 #define _XEN_XEN_OS_H_
 
 #if !defined(__XEN_INTERFACE_VERSION__)  
 #define  __XEN_INTERFACE_VERSION__ 0x00030208
 #endif  
 
 #define GRANT_REF_INVALID   0xffffffff
 
 #ifdef LOCORE
 #define __ASSEMBLY__
 #endif
 
 #include <machine/xen/xen-os.h>
 
 #include <xen/interface/xen.h>
 
 /* Everything below this point is not included by assembler (.S) files. */
 #ifndef __ASSEMBLY__
 
 extern shared_info_t *HYPERVISOR_shared_info;
 extern start_info_t *HYPERVISOR_start_info;
 
 /* XXX: we need to get rid of this and use HYPERVISOR_start_info directly */
 extern char *console_page;
 
 extern int xen_disable_pv_disks;
 extern int xen_disable_pv_nics;
 
 enum xen_domain_type {
 	XEN_NATIVE,             /* running on bare hardware    */
 	XEN_PV_DOMAIN,          /* running in a PV domain      */
 	XEN_HVM_DOMAIN,         /* running in a Xen hvm domain */
 };
 
 extern enum xen_domain_type xen_domain_type;
 
 static inline int
 xen_domain(void)
 {
 	return (xen_domain_type != XEN_NATIVE);
 }
 
 static inline int
 xen_pv_domain(void)
 {
 	return (xen_domain_type == XEN_PV_DOMAIN);
 }
 
 static inline int
 xen_hvm_domain(void)
 {
 	return (xen_domain_type == XEN_HVM_DOMAIN);
 }
 
 static inline bool
 xen_initial_domain(void)
 {
 	return (xen_domain() && HYPERVISOR_start_info != NULL &&
 	    (HYPERVISOR_start_info->flags & SIF_INITDOMAIN) != 0);
 }
 
 /*
  * Based on ofed/include/linux/bitops.h
  *
  * Those helpers are prefixed by xen_ because xen-os.h is widely included
  * and we don't want the other drivers using them.
  *
  */
 #define NBPL (NBBY * sizeof(long))
 
 static inline bool
 xen_test_bit(int bit, volatile long *addr)
 {
 	unsigned long mask = 1UL << (bit % NBPL);
 
 	return !!(atomic_load_acq_long(&addr[bit / NBPL]) & mask);
 }
 
 static inline void
 xen_set_bit(int bit, volatile long *addr)
 {
 	atomic_set_long(&addr[bit / NBPL], 1UL << (bit % NBPL));
 }
 
 static inline void
 xen_clear_bit(int bit, volatile long *addr)
 {
 	atomic_clear_long(&addr[bit / NBPL], 1UL << (bit % NBPL));
 }
 
-#undef NPBL
+#undef NBPL
 
 /*
  * Functions to allocate/free unused memory in order
  * to map memory from other domains.
  */
 struct resource *xenmem_alloc(device_t dev, int *res_id, size_t size);
 int xenmem_free(device_t dev, int res_id, struct resource *res);
 
 /* Debug/emergency function, prints directly to hypervisor console */
 void xc_printf(const char *, ...) __printflike(1, 2);
 
 #ifndef xen_mb
 #define xen_mb() mb()
 #endif
 #ifndef xen_rmb
 #define xen_rmb() rmb()
 #endif
 #ifndef xen_wmb
 #define xen_wmb() wmb()
 #endif
 
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _XEN_XEN_OS_H_ */
Index: projects/vnet/tools/tools/cxgbetool/cxgbetool.c
===================================================================
--- projects/vnet/tools/tools/cxgbetool/cxgbetool.c	(revision 301522)
+++ projects/vnet/tools/tools/cxgbetool/cxgbetool.c	(revision 301523)
@@ -1,2718 +1,2718 @@
 /*-
  * Copyright (c) 2011 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 
 #include <arpa/inet.h>
 #include <net/ethernet.h>
 #include <net/sff8472.h>
 #include <netinet/in.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "t4_ioctl.h"
 
 #define in_range(val, lo, hi) ( val < 0 || (val <= hi && val >= lo))
 #define	max(x, y) ((x) > (y) ? (x) : (y))
 
 static const char *progname, *nexus;
 static int chip_id;	/* 4 for T4, 5 for T5 */
 
 struct reg_info {
 	const char *name;
 	uint32_t addr;
 	uint32_t len;
 };
 
 struct mod_regs {
 	const char *name;
 	const struct reg_info *ri;
 };
 
 struct field_desc {
 	const char *name;     /* Field name */
 	unsigned short start; /* Start bit position */
 	unsigned short end;   /* End bit position */
 	unsigned char shift;  /* # of low order bits omitted and implicitly 0 */
 	unsigned char hex;    /* Print field in hex instead of decimal */
 	unsigned char islog2; /* Field contains the base-2 log of the value */
 };
 
 #include "reg_defs_t4.c"
 #include "reg_defs_t5.c"
 #include "reg_defs_t6.c"
 #include "reg_defs_t4vf.c"
 
 static void
 usage(FILE *fp)
 {
 	fprintf(fp, "Usage: %s <nexus> [operation]\n", progname);
 	fprintf(fp,
 	    "\tclearstats <port>                   clear port statistics\n"
 	    "\tcontext <type> <id>                 show an SGE context\n"
 	    "\tfilter <idx> [<param> <val>] ...    set a filter\n"
 	    "\tfilter <idx> delete|clear           delete a filter\n"
 	    "\tfilter list                         list all filters\n"
 	    "\tfilter mode [<match>] ...           get/set global filter mode\n"
 	    "\ti2c <port> <devaddr> <addr> [<len>] read from i2c device\n"
 	    "\tloadfw <fw-image.bin>               install firmware\n"
 	    "\tmemdump <addr> <len>                dump a memory range\n"
 	    "\tmodinfo <port> [raw]                optics/cable information\n"
 	    "\treg <address>[=<val>]               read/write register\n"
 	    "\treg64 <address>[=<val>]             read/write 64 bit register\n"
 	    "\tregdump [<module>] ...              dump registers\n"
 	    "\tsched-class params <param> <val> .. configure TX scheduler class\n"
 	    "\tsched-queue <port> <queue> <class>  bind NIC queues to TX Scheduling class\n"
 	    "\tstdio                               interactive mode\n"
 	    "\ttcb <tid>                           read TCB\n"
 	    "\ttracer <idx> tx<n>|rx<n>            set and enable a tracer\n"
 	    "\ttracer <idx> disable|enable         disable or enable a tracer\n"
 	    "\ttracer list                         list all tracers\n"
 	    );
 }
 
 static inline unsigned int
 get_card_vers(unsigned int version)
 {
 	return (version & 0x3ff);
 }
 
 static int
 real_doit(unsigned long cmd, void *data, const char *cmdstr)
 {
 	static int fd = -1;
 	int rc = 0;
 
 	if (fd == -1) {
 		char buf[64];
 
 		snprintf(buf, sizeof(buf), "/dev/%s", nexus);
 		if ((fd = open(buf, O_RDWR)) < 0) {
 			warn("open(%s)", nexus);
 			rc = errno;
 			return (rc);
 		}
 		chip_id = nexus[1] - '0';
 	}
 
 	rc = ioctl(fd, cmd, data);
 	if (rc < 0) {
 		warn("%s", cmdstr);
 		rc = errno;
 	}
 
 	return (rc);
 }
 #define doit(x, y) real_doit(x, y, #x)
 
 static char *
 str_to_number(const char *s, long *val, long long *vall)
 {
 	char *p;
 
 	if (vall)
 		*vall = strtoll(s, &p, 0);
 	else if (val)
 		*val = strtol(s, &p, 0);
 	else
 		p = NULL;
 
 	return (p);
 }
 
 static int
 read_reg(long addr, int size, long long *val)
 {
 	struct t4_reg reg;
 	int rc;
 
 	reg.addr = (uint32_t) addr;
 	reg.size = (uint32_t) size;
 	reg.val = 0;
 
 	rc = doit(CHELSIO_T4_GETREG, &reg);
 
 	*val = reg.val;
 
 	return (rc);
 }
 
 static int
 write_reg(long addr, int size, long long val)
 {
 	struct t4_reg reg;
 
 	reg.addr = (uint32_t) addr;
 	reg.size = (uint32_t) size;
 	reg.val = (uint64_t) val;
 
 	return doit(CHELSIO_T4_SETREG, &reg);
 }
 
 static int
 register_io(int argc, const char *argv[], int size)
 {
 	char *p, *v;
 	long addr;
 	long long val;
 	int w = 0, rc;
 
 	if (argc == 1) {
 		/* <reg> OR <reg>=<value> */
 
 		p = str_to_number(argv[0], &addr, NULL);
 		if (*p) {
 			if (*p != '=') {
 				warnx("invalid register \"%s\"", argv[0]);
 				return (EINVAL);
 			}
 
 			w = 1;
 			v = p + 1;
 			p = str_to_number(v, NULL, &val);
 
 			if (*p) {
 				warnx("invalid value \"%s\"", v);
 				return (EINVAL);
 			}
 		}
 
 	} else if (argc == 2) {
 		/* <reg> <value> */
 
 		w = 1;
 
 		p = str_to_number(argv[0], &addr, NULL);
 		if (*p) {
 			warnx("invalid register \"%s\"", argv[0]);
 			return (EINVAL);
 		}
 
 		p = str_to_number(argv[1], NULL, &val);
 		if (*p) {
 			warnx("invalid value \"%s\"", argv[1]);
 			return (EINVAL);
 		}
 	} else {
 		warnx("reg: invalid number of arguments (%d)", argc);
 		return (EINVAL);
 	}
 
 	if (w)
 		rc = write_reg(addr, size, val);
 	else {
 		rc = read_reg(addr, size, &val);
 		if (rc == 0)
 			printf("0x%llx [%llu]\n", val, val);
 	}
 
 	return (rc);
 }
 
 static inline uint32_t
 xtract(uint32_t val, int shift, int len)
 {
 	return (val >> shift) & ((1 << len) - 1);
 }
 
 static int
 dump_block_regs(const struct reg_info *reg_array, const uint32_t *regs)
 {
 	uint32_t reg_val = 0;
 
 	for ( ; reg_array->name; ++reg_array)
 		if (!reg_array->len) {
 			reg_val = regs[reg_array->addr / 4];
 			printf("[%#7x] %-47s %#-10x %u\n", reg_array->addr,
 			       reg_array->name, reg_val, reg_val);
 		} else {
 			uint32_t v = xtract(reg_val, reg_array->addr,
 					    reg_array->len);
 
 			printf("    %*u:%u %-47s %#-10x %u\n",
 			       reg_array->addr < 10 ? 3 : 2,
 			       reg_array->addr + reg_array->len - 1,
 			       reg_array->addr, reg_array->name, v, v);
 		}
 
 	return (1);
 }
 
 static int
 dump_regs_table(int argc, const char *argv[], const uint32_t *regs,
     const struct mod_regs *modtab, int nmodules)
 {
 	int i, j, match;
 
 	for (i = 0; i < argc; i++) {
 		for (j = 0; j < nmodules; j++) {
 			if (!strcmp(argv[i], modtab[j].name))
 				break;
 		}
 
 		if (j == nmodules) {
 			warnx("invalid register block \"%s\"", argv[i]);
 			fprintf(stderr, "\nAvailable blocks:");
 			for ( ; nmodules; nmodules--, modtab++)
 				fprintf(stderr, " %s", modtab->name);
 			fprintf(stderr, "\n");
 			return (EINVAL);
 		}
 	}
 
 	for ( ; nmodules; nmodules--, modtab++) {
 
 		match = argc == 0 ? 1 : 0;
 		for (i = 0; !match && i < argc; i++) {
 			if (!strcmp(argv[i], modtab->name))
 				match = 1;
 		}
 
 		if (match)
 			dump_block_regs(modtab->ri, regs);
 	}
 
 	return (0);
 }
 
 #define T4_MODREGS(name) { #name, t4_##name##_regs }
 static int
 dump_regs_t4(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t4_mod[] = {
 		T4_MODREGS(sge),
 		{ "pci", t4_pcie_regs },
 		T4_MODREGS(dbg),
 		T4_MODREGS(mc),
 		T4_MODREGS(ma),
 		{ "edc0", t4_edc_0_regs },
 		{ "edc1", t4_edc_1_regs },
 		T4_MODREGS(cim),
 		T4_MODREGS(tp),
 		T4_MODREGS(ulp_rx),
 		T4_MODREGS(ulp_tx),
 		{ "pmrx", t4_pm_rx_regs },
 		{ "pmtx", t4_pm_tx_regs },
 		T4_MODREGS(mps),
 		{ "cplsw", t4_cpl_switch_regs },
 		T4_MODREGS(smb),
 		{ "i2c", t4_i2cm_regs },
 		T4_MODREGS(mi),
 		T4_MODREGS(uart),
 		T4_MODREGS(pmu),
 		T4_MODREGS(sf),
 		T4_MODREGS(pl),
 		T4_MODREGS(le),
 		T4_MODREGS(ncsi),
 		T4_MODREGS(xgmac)
 	};
 
 	return dump_regs_table(argc, argv, regs, t4_mod, nitems(t4_mod));
 }
 #undef T4_MODREGS
 
 #define T5_MODREGS(name) { #name, t5_##name##_regs }
 static int
 dump_regs_t5(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t5_mod[] = {
 		T5_MODREGS(sge),
 		{ "pci", t5_pcie_regs },
 		T5_MODREGS(dbg),
 		{ "mc0", t5_mc_0_regs },
 		{ "mc1", t5_mc_1_regs },
 		T5_MODREGS(ma),
 		{ "edc0", t5_edc_t50_regs },
 		{ "edc1", t5_edc_t51_regs },
 		T5_MODREGS(cim),
 		T5_MODREGS(tp),
 		{ "ulprx", t5_ulp_rx_regs },
 		{ "ulptx", t5_ulp_tx_regs },
 		{ "pmrx", t5_pm_rx_regs },
 		{ "pmtx", t5_pm_tx_regs },
 		T5_MODREGS(mps),
 		{ "cplsw", t5_cpl_switch_regs },
 		T5_MODREGS(smb),
 		{ "i2c", t5_i2cm_regs },
 		T5_MODREGS(mi),
 		T5_MODREGS(uart),
 		T5_MODREGS(pmu),
 		T5_MODREGS(sf),
 		T5_MODREGS(pl),
 		T5_MODREGS(le),
 		T5_MODREGS(ncsi),
 		T5_MODREGS(mac),
 		{ "hma", t5_hma_t5_regs }
 	};
 
 	return dump_regs_table(argc, argv, regs, t5_mod, nitems(t5_mod));
 }
 #undef T5_MODREGS
 
 #define T6_MODREGS(name) { #name, t6_##name##_regs }
 static int
 dump_regs_t6(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t6_mod[] = {
 		T6_MODREGS(sge),
 		{ "pci", t6_pcie_regs },
 		T6_MODREGS(dbg),
 		{ "mc0", t6_mc_0_regs },
 		T6_MODREGS(ma),
 		{ "edc0", t6_edc_t60_regs },
 		{ "edc1", t6_edc_t61_regs },
 		T6_MODREGS(cim),
 		T6_MODREGS(tp),
 		{ "ulprx", t6_ulp_rx_regs },
 		{ "ulptx", t6_ulp_tx_regs },
 		{ "pmrx", t6_pm_rx_regs },
 		{ "pmtx", t6_pm_tx_regs },
 		T6_MODREGS(mps),
 		{ "cplsw", t6_cpl_switch_regs },
 		T6_MODREGS(smb),
 		{ "i2c", t6_i2cm_regs },
 		T6_MODREGS(mi),
 		T6_MODREGS(uart),
 		T6_MODREGS(pmu),
 		T6_MODREGS(sf),
 		T6_MODREGS(pl),
 		T6_MODREGS(le),
 		T6_MODREGS(ncsi),
 		T6_MODREGS(mac),
 		{ "hma", t6_hma_t6_regs }
 	};
 
 	return dump_regs_table(argc, argv, regs, t6_mod, nitems(t6_mod));
 }
 #undef T6_MODREGS
 
 static int
 dump_regs_t4vf(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t4vf_mod[] = {
 		{ "sge", t4vf_sge_regs },
 		{ "mps", t4vf_mps_regs },
 		{ "pl", t4vf_pl_regs },
 		{ "mbdata", t4vf_mbdata_regs },
 		{ "cim", t4vf_cim_regs },
 	};
 
 	return dump_regs_table(argc, argv, regs, t4vf_mod, nitems(t4vf_mod));
 }
 
 static int
 dump_regs_t5vf(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t5vf_mod[] = {
 		{ "sge", t5vf_sge_regs },
 		{ "mps", t4vf_mps_regs },
 		{ "pl", t5vf_pl_regs },
 		{ "mbdata", t4vf_mbdata_regs },
 		{ "cim", t4vf_cim_regs },
 	};
 
 	return dump_regs_table(argc, argv, regs, t5vf_mod, nitems(t5vf_mod));
 }
 
 static int
 dump_regs_t6vf(int argc, const char *argv[], const uint32_t *regs)
 {
 	static struct mod_regs t6vf_mod[] = {
 		{ "sge", t5vf_sge_regs },
 		{ "mps", t4vf_mps_regs },
 		{ "pl", t6vf_pl_regs },
 		{ "mbdata", t4vf_mbdata_regs },
 		{ "cim", t4vf_cim_regs },
 	};
 
 	return dump_regs_table(argc, argv, regs, t6vf_mod, nitems(t6vf_mod));
 }
 
 static int
 dump_regs(int argc, const char *argv[])
 {
 	int vers, revision, rc;
 	struct t4_regdump regs;
 	uint32_t len;
 
 	len = max(T4_REGDUMP_SIZE, T5_REGDUMP_SIZE);
 	regs.data = calloc(1, len);
 	if (regs.data == NULL) {
 		warnc(ENOMEM, "regdump");
 		return (ENOMEM);
 	}
 
 	regs.len = len;
 	rc = doit(CHELSIO_T4_REGDUMP, &regs);
 	if (rc != 0)
 		return (rc);
 
 	vers = get_card_vers(regs.version);
 	revision = (regs.version >> 10) & 0x3f;
 
 	if (vers == 4) {
 		if (revision == 0x3f)
 			rc = dump_regs_t4vf(argc, argv, regs.data);
 		else
 			rc = dump_regs_t4(argc, argv, regs.data);
 	} else if (vers == 5) {
 		if (revision == 0x3f)
 			rc = dump_regs_t5vf(argc, argv, regs.data);
 		else
 			rc = dump_regs_t5(argc, argv, regs.data);
 	} else if (vers == 6) {
 		if (revision == 0x3f)
 			rc = dump_regs_t6vf(argc, argv, regs.data);
 		else
 			rc = dump_regs_t6(argc, argv, regs.data);
 	} else {
 		warnx("%s (type %d, rev %d) is not a known card.",
 		    nexus, vers, revision);
 		return (ENOTSUP);
 	}
 
 	free(regs.data);
 	return (rc);
 }
 
 static void
 do_show_info_header(uint32_t mode)
 {
 	uint32_t i;
 
 	printf("%4s %8s", "Idx", "Hits");
 	for (i = T4_FILTER_FCoE; i <= T4_FILTER_IP_FRAGMENT; i <<= 1) {
 		switch (mode & i) {
 		case T4_FILTER_FCoE:
 			printf(" FCoE");
 			break;
 
 		case T4_FILTER_PORT:
 			printf(" Port");
 			break;
 
 		case T4_FILTER_VNIC:
 			if (mode & T4_FILTER_IC_VNIC)
 				printf("   VFvld:PF:VF");
 			else
 				printf("     vld:oVLAN");
 			break;
 
 		case T4_FILTER_VLAN:
 			printf("      vld:VLAN");
 			break;
 
 		case T4_FILTER_IP_TOS:
 			printf("   TOS");
 			break;
 
 		case T4_FILTER_IP_PROTO:
 			printf("  Prot");
 			break;
 
 		case T4_FILTER_ETH_TYPE:
 			printf("   EthType");
 			break;
 
 		case T4_FILTER_MAC_IDX:
 			printf("  MACIdx");
 			break;
 
 		case T4_FILTER_MPS_HIT_TYPE:
 			printf(" MPS");
 			break;
 
 		case T4_FILTER_IP_FRAGMENT:
 			printf(" Frag");
 			break;
 
 		default:
 			/* compressed filter field not enabled */
 			break;
 		}
 	}
 	printf(" %20s %20s %9s %9s %s\n",
 	    "DIP", "SIP", "DPORT", "SPORT", "Action");
 }
 
 /*
  * Parse an argument sub-vector as a { <parameter name> <value>[:<mask>] }
  * ordered tuple.  If the parameter name in the argument sub-vector does not
  * match the passed in parameter name, then a zero is returned for the
  * function and no parsing is performed.  If there is a match, then the value
  * and optional mask are parsed and returned in the provided return value
  * pointers.  If no optional mask is specified, then a default mask of all 1s
  * will be returned.
  *
  * An error in parsing the value[:mask] will result in an error message and
  * program termination.
  */
 static int
 parse_val_mask(const char *param, const char *args[], uint32_t *val,
     uint32_t *mask)
 {
 	char *p;
 
 	if (strcmp(param, args[0]) != 0)
 		return (EINVAL);
 
 	*val = strtoul(args[1], &p, 0);
 	if (p > args[1]) {
 		if (p[0] == 0) {
 			*mask = ~0;
 			return (0);
 		}
 
 		if (p[0] == ':' && p[1] != 0) {
 			*mask = strtoul(p+1, &p, 0);
 			if (p[0] == 0)
 				return (0);
 		}
 	}
 
 	warnx("parameter \"%s\" has bad \"value[:mask]\" %s",
 	    args[0], args[1]);
 
 	return (EINVAL);
 }
 
 /*
  * Parse an argument sub-vector as a { <parameter name> <addr>[/<mask>] }
  * ordered tuple.  If the parameter name in the argument sub-vector does not
  * match the passed in parameter name, then a zero is returned for the
  * function and no parsing is performed.  If there is a match, then the value
  * and optional mask are parsed and returned in the provided return value
  * pointers.  If no optional mask is specified, then a default mask of all 1s
  * will be returned.
  *
  * The value return parameter "afp" is used to specify the expected address
  * family -- IPv4 or IPv6 -- of the address[/mask] and return its actual
  * format.  A passed in value of AF_UNSPEC indicates that either IPv4 or IPv6
  * is acceptable; AF_INET means that only IPv4 addresses are acceptable; and
  * AF_INET6 means that only IPv6 are acceptable.  AF_INET is returned for IPv4
  * and AF_INET6 for IPv6 addresses, respectively.  IPv4 address/mask pairs are
  * returned in the first four bytes of the address and mask return values with
  * the address A.B.C.D returned with { A, B, C, D } returned in addresses { 0,
  * 1, 2, 3}, respectively.
  *
  * An error in parsing the value[:mask] will result in an error message and
  * program termination.
  */
 static int
 parse_ipaddr(const char *param, const char *args[], int *afp, uint8_t addr[],
     uint8_t mask[])
 {
 	const char *colon, *afn;
 	char *slash;
 	uint8_t *m;
 	int af, ret;
 	unsigned int masksize;
 
 	/*
 	 * Is this our parameter?
 	 */
 	if (strcmp(param, args[0]) != 0)
 		return (EINVAL);
 
 	/*
 	 * Fundamental IPv4 versus IPv6 selection.
 	 */
 	colon = strchr(args[1], ':');
 	if (!colon) {
 		afn = "IPv4";
 		af = AF_INET;
 		masksize = 32;
 	} else {
 		afn = "IPv6";
 		af = AF_INET6;
 		masksize = 128;
 	}
 	if (*afp == AF_UNSPEC)
 		*afp = af;
 	else if (*afp != af) {
 		warnx("address %s is not of expected family %s",
 		    args[1], *afp == AF_INET ? "IP" : "IPv6");
 		return (EINVAL);
 	}
 
 	/*
 	 * Parse address (temporarily stripping off any "/mask"
 	 * specification).
 	 */
 	slash = strchr(args[1], '/');
 	if (slash)
 		*slash = 0;
 	ret = inet_pton(af, args[1], addr);
 	if (slash)
 		*slash = '/';
 	if (ret <= 0) {
 		warnx("Cannot parse %s %s address %s", param, afn, args[1]);
 		return (EINVAL);
 	}
 
 	/*
 	 * Parse optional mask specification.
 	 */
 	if (slash) {
 		char *p;
 		unsigned int prefix = strtoul(slash + 1, &p, 10);
 
 		if (p == slash + 1) {
 			warnx("missing address prefix for %s", param);
 			return (EINVAL);
 		}
 		if (*p) {
 			warnx("%s is not a valid address prefix", slash + 1);
 			return (EINVAL);
 		}
 		if (prefix > masksize) {
 			warnx("prefix %u is too long for an %s address",
 			     prefix, afn);
 			return (EINVAL);
 		}
 		memset(mask, 0, masksize / 8);
 		masksize = prefix;
 	}
 
 	/*
 	 * Fill in mask.
 	 */
 	for (m = mask; masksize >= 8; m++, masksize -= 8)
 		*m = ~0;
 	if (masksize)
 		*m = ~0 << (8 - masksize);
 
 	return (0);
 }
 
 /*
  * Parse an argument sub-vector as a { <parameter name> <value> } ordered
  * tuple.  If the parameter name in the argument sub-vector does not match the
  * passed in parameter name, then a zero is returned for the function and no
  * parsing is performed.  If there is a match, then the value is parsed and
  * returned in the provided return value pointer.
  */
 static int
 parse_val(const char *param, const char *args[], uint32_t *val)
 {
 	char *p;
 
 	if (strcmp(param, args[0]) != 0)
 		return (EINVAL);
 
 	*val = strtoul(args[1], &p, 0);
 	if (p > args[1] && p[0] == 0)
 		return (0);
 
 	warnx("parameter \"%s\" has bad \"value\" %s", args[0], args[1]);
 	return (EINVAL);
 }
 
 static void
 filters_show_ipaddr(int type, uint8_t *addr, uint8_t *addrm)
 {
 	int noctets, octet;
 
 	printf(" ");
 	if (type == 0) {
 		noctets = 4;
 		printf("%3s", " ");
 	} else
 	noctets = 16;
 
 	for (octet = 0; octet < noctets; octet++)
 		printf("%02x", addr[octet]);
 	printf("/");
 	for (octet = 0; octet < noctets; octet++)
 		printf("%02x", addrm[octet]);
 }
 
 static void
 do_show_one_filter_info(struct t4_filter *t, uint32_t mode)
 {
 	uint32_t i;
 
 	printf("%4d", t->idx);
 	if (t->hits == UINT64_MAX)
 		printf(" %8s", "-");
 	else
 		printf(" %8ju", t->hits);
 
 	/*
 	 * Compressed header portion of filter.
 	 */
 	for (i = T4_FILTER_FCoE; i <= T4_FILTER_IP_FRAGMENT; i <<= 1) {
 		switch (mode & i) {
 		case T4_FILTER_FCoE:
 			printf("  %1d/%1d", t->fs.val.fcoe, t->fs.mask.fcoe);
 			break;
 
 		case T4_FILTER_PORT:
 			printf("  %1d/%1d", t->fs.val.iport, t->fs.mask.iport);
 			break;
 
 		case T4_FILTER_VNIC:
 			if (mode & T4_FILTER_IC_VNIC) {
 				printf(" %1d:%1x:%02x/%1d:%1x:%02x",
 				    t->fs.val.pfvf_vld,
 				    (t->fs.val.vnic >> 13) & 0x7,
 				    t->fs.val.vnic & 0x1fff,
 				    t->fs.mask.pfvf_vld,
 				    (t->fs.mask.vnic >> 13) & 0x7,
 				    t->fs.mask.vnic & 0x1fff);
 			} else {
 				printf(" %1d:%04x/%1d:%04x",
 				    t->fs.val.ovlan_vld, t->fs.val.vnic,
 				    t->fs.mask.ovlan_vld, t->fs.mask.vnic);
 			}
 			break;
 
 		case T4_FILTER_VLAN:
 			printf(" %1d:%04x/%1d:%04x",
 			    t->fs.val.vlan_vld, t->fs.val.vlan,
 			    t->fs.mask.vlan_vld, t->fs.mask.vlan);
 			break;
 
 		case T4_FILTER_IP_TOS:
 			printf(" %02x/%02x", t->fs.val.tos, t->fs.mask.tos);
 			break;
 
 		case T4_FILTER_IP_PROTO:
 			printf(" %02x/%02x", t->fs.val.proto, t->fs.mask.proto);
 			break;
 
 		case T4_FILTER_ETH_TYPE:
 			printf(" %04x/%04x", t->fs.val.ethtype,
 			    t->fs.mask.ethtype);
 			break;
 
 		case T4_FILTER_MAC_IDX:
 			printf(" %03x/%03x", t->fs.val.macidx,
 			    t->fs.mask.macidx);
 			break;
 
 		case T4_FILTER_MPS_HIT_TYPE:
 			printf(" %1x/%1x", t->fs.val.matchtype,
 			    t->fs.mask.matchtype);
 			break;
 
 		case T4_FILTER_IP_FRAGMENT:
 			printf("  %1d/%1d", t->fs.val.frag, t->fs.mask.frag);
 			break;
 
 		default:
 			/* compressed filter field not enabled */
 			break;
 		}
 	}
 
 	/*
 	 * Fixed portion of filter.
 	 */
 	filters_show_ipaddr(t->fs.type, t->fs.val.dip, t->fs.mask.dip);
 	filters_show_ipaddr(t->fs.type, t->fs.val.sip, t->fs.mask.sip);
 	printf(" %04x/%04x %04x/%04x",
 		 t->fs.val.dport, t->fs.mask.dport,
 		 t->fs.val.sport, t->fs.mask.sport);
 
 	/*
 	 * Variable length filter action.
 	 */
 	if (t->fs.action == FILTER_DROP)
 		printf(" Drop");
 	else if (t->fs.action == FILTER_SWITCH) {
 		printf(" Switch: port=%d", t->fs.eport);
 	if (t->fs.newdmac)
 		printf(
 			", dmac=%02x:%02x:%02x:%02x:%02x:%02x "
 			", l2tidx=%d",
 			t->fs.dmac[0], t->fs.dmac[1],
 			t->fs.dmac[2], t->fs.dmac[3],
 			t->fs.dmac[4], t->fs.dmac[5],
 			t->l2tidx);
 	if (t->fs.newsmac)
 		printf(
 			", smac=%02x:%02x:%02x:%02x:%02x:%02x "
 			", smtidx=%d",
 			t->fs.smac[0], t->fs.smac[1],
 			t->fs.smac[2], t->fs.smac[3],
 			t->fs.smac[4], t->fs.smac[5],
 			t->smtidx);
 	if (t->fs.newvlan == VLAN_REMOVE)
 		printf(", vlan=none");
 	else if (t->fs.newvlan == VLAN_INSERT)
 		printf(", vlan=insert(%x)", t->fs.vlan);
 	else if (t->fs.newvlan == VLAN_REWRITE)
 		printf(", vlan=rewrite(%x)", t->fs.vlan);
 	} else {
 		printf(" Pass: Q=");
 		if (t->fs.dirsteer == 0) {
 			printf("RSS");
 			if (t->fs.maskhash)
 				printf("(TCB=hash)");
 		} else {
 			printf("%d", t->fs.iq);
 			if (t->fs.dirsteerhash == 0)
 				printf("(QID)");
 			else
 				printf("(hash)");
 		}
 	}
 	if (t->fs.prio)
 		printf(" Prio");
 	if (t->fs.rpttid)
 		printf(" RptTID");
 	printf("\n");
 }
 
 static int
 show_filters(void)
 {
 	uint32_t mode = 0, header = 0;
 	struct t4_filter t;
 	int rc;
 
 	/* Get the global filter mode first */
 	rc = doit(CHELSIO_T4_GET_FILTER_MODE, &mode);
 	if (rc != 0)
 		return (rc);
 
 	t.idx = 0;
 	for (t.idx = 0; ; t.idx++) {
 		rc = doit(CHELSIO_T4_GET_FILTER, &t);
 		if (rc != 0 || t.idx == 0xffffffff)
 			break;
 
 		if (!header) {
 			do_show_info_header(mode);
 			header = 1;
 		}
 		do_show_one_filter_info(&t, mode);
 	};
 
 	return (rc);
 }
 
 static int
 get_filter_mode(void)
 {
 	uint32_t mode = 0;
 	int rc;
 
 	rc = doit(CHELSIO_T4_GET_FILTER_MODE, &mode);
 	if (rc != 0)
 		return (rc);
 
 	if (mode & T4_FILTER_IPv4)
 		printf("ipv4 ");
 
 	if (mode & T4_FILTER_IPv6)
 		printf("ipv6 ");
 
 	if (mode & T4_FILTER_IP_SADDR)
 		printf("sip ");
 
 	if (mode & T4_FILTER_IP_DADDR)
 		printf("dip ");
 
 	if (mode & T4_FILTER_IP_SPORT)
 		printf("sport ");
 
 	if (mode & T4_FILTER_IP_DPORT)
 		printf("dport ");
 
 	if (mode & T4_FILTER_IP_FRAGMENT)
 		printf("frag ");
 
 	if (mode & T4_FILTER_MPS_HIT_TYPE)
 		printf("matchtype ");
 
 	if (mode & T4_FILTER_MAC_IDX)
 		printf("macidx ");
 
 	if (mode & T4_FILTER_ETH_TYPE)
 		printf("ethtype ");
 
 	if (mode & T4_FILTER_IP_PROTO)
 		printf("proto ");
 
 	if (mode & T4_FILTER_IP_TOS)
 		printf("tos ");
 
 	if (mode & T4_FILTER_VLAN)
 		printf("vlan ");
 
 	if (mode & T4_FILTER_VNIC) {
 		if (mode & T4_FILTER_IC_VNIC)
 			printf("vnic_id ");
 		else
 			printf("ovlan ");
 	}
 
 	if (mode & T4_FILTER_PORT)
 		printf("iport ");
 
 	if (mode & T4_FILTER_FCoE)
 		printf("fcoe ");
 
 	printf("\n");
 
 	return (0);
 }
 
 static int
 set_filter_mode(int argc, const char *argv[])
 {
 	uint32_t mode = 0;
 	int vnic = 0, ovlan = 0;
 
 	for (; argc; argc--, argv++) {
 		if (!strcmp(argv[0], "frag"))
 			mode |= T4_FILTER_IP_FRAGMENT;
 
 		if (!strcmp(argv[0], "matchtype"))
 			mode |= T4_FILTER_MPS_HIT_TYPE;
 
 		if (!strcmp(argv[0], "macidx"))
 			mode |= T4_FILTER_MAC_IDX;
 
 		if (!strcmp(argv[0], "ethtype"))
 			mode |= T4_FILTER_ETH_TYPE;
 
 		if (!strcmp(argv[0], "proto"))
 			mode |= T4_FILTER_IP_PROTO;
 
 		if (!strcmp(argv[0], "tos"))
 			mode |= T4_FILTER_IP_TOS;
 
 		if (!strcmp(argv[0], "vlan"))
 			mode |= T4_FILTER_VLAN;
 
 		if (!strcmp(argv[0], "ovlan")) {
 			mode |= T4_FILTER_VNIC;
 			ovlan++;
 		}
 
 		if (!strcmp(argv[0], "vnic_id")) {
 			mode |= T4_FILTER_VNIC;
 			mode |= T4_FILTER_IC_VNIC;
 			vnic++;
 		}
 
 		if (!strcmp(argv[0], "iport"))
 			mode |= T4_FILTER_PORT;
 
 		if (!strcmp(argv[0], "fcoe"))
 			mode |= T4_FILTER_FCoE;
 	}
 
 	if (vnic > 0 && ovlan > 0) {
 		warnx("\"vnic_id\" and \"ovlan\" are mutually exclusive.");
 		return (EINVAL);
 	}
 
 	return doit(CHELSIO_T4_SET_FILTER_MODE, &mode);
 }
 
 static int
 del_filter(uint32_t idx)
 {
 	struct t4_filter t;
 
 	t.idx = idx;
 
 	return doit(CHELSIO_T4_DEL_FILTER, &t);
 }
 
 static int
 set_filter(uint32_t idx, int argc, const char *argv[])
 {
 	int af = AF_UNSPEC, start_arg = 0;
 	struct t4_filter t;
 
 	if (argc < 2) {
 		warnc(EINVAL, "%s", __func__);
 		return (EINVAL);
 	};
 	bzero(&t, sizeof (t));
 	t.idx = idx;
 	t.fs.hitcnts = 1;
 
 	for (start_arg = 0; start_arg + 2 <= argc; start_arg += 2) {
 		const char **args = &argv[start_arg];
 		uint32_t val, mask;
 
 		if (!strcmp(argv[start_arg], "type")) {
 			int newaf;
 			if (!strcasecmp(argv[start_arg + 1], "ipv4"))
 				newaf = AF_INET;
 			else if (!strcasecmp(argv[start_arg + 1], "ipv6"))
 				newaf = AF_INET6;
 			else {
 				warnx("invalid type \"%s\"; "
 				    "must be one of \"ipv4\" or \"ipv6\"",
 				    argv[start_arg + 1]);
 				return (EINVAL);
 			}
 
 			if (af != AF_UNSPEC && af != newaf) {
 				warnx("conflicting IPv4/IPv6 specifications.");
 				return (EINVAL);
 			}
 			af = newaf;
 		} else if (!parse_val_mask("fcoe", args, &val, &mask)) {
 			t.fs.val.fcoe = val;
 			t.fs.mask.fcoe = mask;
 		} else if (!parse_val_mask("iport", args, &val, &mask)) {
 			t.fs.val.iport = val;
 			t.fs.mask.iport = mask;
 		} else if (!parse_val_mask("ovlan", args, &val, &mask)) {
 			t.fs.val.vnic = val;
 			t.fs.mask.vnic = mask;
 			t.fs.val.ovlan_vld = 1;
 			t.fs.mask.ovlan_vld = 1;
 		} else if (!parse_val_mask("ivlan", args, &val, &mask)) {
 			t.fs.val.vlan = val;
 			t.fs.mask.vlan = mask;
 			t.fs.val.vlan_vld = 1;
 			t.fs.mask.vlan_vld = 1;
 		} else if (!parse_val_mask("pf", args, &val, &mask)) {
 			t.fs.val.vnic &= 0x1fff;
 			t.fs.val.vnic |= (val & 0x7) << 13;
 			t.fs.mask.vnic &= 0x1fff;
 			t.fs.mask.vnic |= (mask & 0x7) << 13;
 			t.fs.val.pfvf_vld = 1;
 			t.fs.mask.pfvf_vld = 1;
 		} else if (!parse_val_mask("vf", args, &val, &mask)) {
 			t.fs.val.vnic &= 0xe000;
 			t.fs.val.vnic |= val & 0x1fff;
 			t.fs.mask.vnic &= 0xe000;
 			t.fs.mask.vnic |= mask & 0x1fff;
 			t.fs.val.pfvf_vld = 1;
 			t.fs.mask.pfvf_vld = 1;
 		} else if (!parse_val_mask("tos", args, &val, &mask)) {
 			t.fs.val.tos = val;
 			t.fs.mask.tos = mask;
 		} else if (!parse_val_mask("proto", args, &val, &mask)) {
 			t.fs.val.proto = val;
 			t.fs.mask.proto = mask;
 		} else if (!parse_val_mask("ethtype", args, &val, &mask)) {
 			t.fs.val.ethtype = val;
 			t.fs.mask.ethtype = mask;
 		} else if (!parse_val_mask("macidx", args, &val, &mask)) {
 			t.fs.val.macidx = val;
 			t.fs.mask.macidx = mask;
 		} else if (!parse_val_mask("matchtype", args, &val, &mask)) {
 			t.fs.val.matchtype = val;
 			t.fs.mask.matchtype = mask;
 		} else if (!parse_val_mask("frag", args, &val, &mask)) {
 			t.fs.val.frag = val;
 			t.fs.mask.frag = mask;
 		} else if (!parse_val_mask("dport", args, &val, &mask)) {
 			t.fs.val.dport = val;
 			t.fs.mask.dport = mask;
 		} else if (!parse_val_mask("sport", args, &val, &mask)) {
 			t.fs.val.sport = val;
 			t.fs.mask.sport = mask;
 		} else if (!parse_ipaddr("dip", args, &af, t.fs.val.dip,
 		    t.fs.mask.dip)) {
 			/* nada */;
 		} else if (!parse_ipaddr("sip", args, &af, t.fs.val.sip,
 		    t.fs.mask.sip)) {
 			/* nada */;
 		} else if (!strcmp(argv[start_arg], "action")) {
 			if (!strcmp(argv[start_arg + 1], "pass"))
 				t.fs.action = FILTER_PASS;
 			else if (!strcmp(argv[start_arg + 1], "drop"))
 				t.fs.action = FILTER_DROP;
 			else if (!strcmp(argv[start_arg + 1], "switch"))
 				t.fs.action = FILTER_SWITCH;
 			else {
 				warnx("invalid action \"%s\"; must be one of"
 				     " \"pass\", \"drop\" or \"switch\"",
 				     argv[start_arg + 1]);
 				return (EINVAL);
 			}
 		} else if (!parse_val("hitcnts", args, &val)) {
 			t.fs.hitcnts = val;
 		} else if (!parse_val("prio", args, &val)) {
 			t.fs.prio = val;
 		} else if (!parse_val("rpttid", args, &val)) {
 			t.fs.rpttid = 1;
 		} else if (!parse_val("queue", args, &val)) {
 			t.fs.dirsteer = 1;
 			t.fs.iq = val;
 		} else if (!parse_val("tcbhash", args, &val)) {
 			t.fs.maskhash = 1;
 			t.fs.dirsteerhash = 1;
 		} else if (!parse_val("eport", args, &val)) {
 			t.fs.eport = val;
 		} else if (!strcmp(argv[start_arg], "dmac")) {
 			struct ether_addr *daddr;
 
 			daddr = ether_aton(argv[start_arg + 1]);
 			if (daddr == NULL) {
 				warnx("invalid dmac address \"%s\"",
 				    argv[start_arg + 1]);
 				return (EINVAL);
 			}
 			memcpy(t.fs.dmac, daddr, ETHER_ADDR_LEN);
 			t.fs.newdmac = 1;
 		} else if (!strcmp(argv[start_arg], "smac")) {
 			struct ether_addr *saddr;
 
 			saddr = ether_aton(argv[start_arg + 1]);
 			if (saddr == NULL) {
 				warnx("invalid smac address \"%s\"",
 				    argv[start_arg + 1]);
 				return (EINVAL);
 			}
 			memcpy(t.fs.smac, saddr, ETHER_ADDR_LEN);
 			t.fs.newsmac = 1;
 		} else if (!strcmp(argv[start_arg], "vlan")) {
 			char *p;
 			if (!strcmp(argv[start_arg + 1], "none")) {
 				t.fs.newvlan = VLAN_REMOVE;
 			} else if (argv[start_arg + 1][0] == '=') {
 				t.fs.newvlan = VLAN_REWRITE;
 			} else if (argv[start_arg + 1][0] == '+') {
 				t.fs.newvlan = VLAN_INSERT;
 			} else if (isdigit(argv[start_arg + 1][0]) &&
 			    !parse_val_mask("vlan", args, &val, &mask)) {
 				t.fs.val.vlan = val;
 				t.fs.mask.vlan = mask;
 				t.fs.val.vlan_vld = 1;
 				t.fs.mask.vlan_vld = 1;
 			} else {
 				warnx("unknown vlan parameter \"%s\"; must"
 				     " be one of \"none\", \"=<vlan>\", "
 				     " \"+<vlan>\", or \"<vlan>\"",
 				     argv[start_arg + 1]);
 				return (EINVAL);
 			}
 			if (t.fs.newvlan == VLAN_REWRITE ||
 			    t.fs.newvlan == VLAN_INSERT) {
 				t.fs.vlan = strtoul(argv[start_arg + 1] + 1,
 				    &p, 0);
 				if (p == argv[start_arg + 1] + 1 || p[0] != 0) {
 					warnx("invalid vlan \"%s\"",
 					     argv[start_arg + 1]);
 					return (EINVAL);
 				}
 			}
 		} else {
 			warnx("invalid parameter \"%s\"", argv[start_arg]);
 			return (EINVAL);
 		}
 	}
 	if (start_arg != argc) {
 		warnx("no value for \"%s\"", argv[start_arg]);
 		return (EINVAL);
 	}
 
 	/*
 	 * Check basic sanity of option combinations.
 	 */
 	if (t.fs.action != FILTER_SWITCH &&
 	    (t.fs.eport || t.fs.newdmac || t.fs.newsmac || t.fs.newvlan)) {
 		warnx("prio, port dmac, smac and vlan only make sense with"
 		     " \"action switch\"");
 		return (EINVAL);
 	}
 	if (t.fs.action != FILTER_PASS &&
 	    (t.fs.rpttid || t.fs.dirsteer || t.fs.maskhash)) {
 		warnx("rpttid, queue and tcbhash don't make sense with"
 		     " action \"drop\" or \"switch\"");
 		return (EINVAL);
 	}
 	if (t.fs.val.ovlan_vld && t.fs.val.pfvf_vld) {
 		warnx("ovlan and vnic_id (pf/vf) are mutually exclusive");
 		return (EINVAL);
 	}
 
 	t.fs.type = (af == AF_INET6 ? 1 : 0); /* default IPv4 */
 	return doit(CHELSIO_T4_SET_FILTER, &t);
 }
 
 static int
 filter_cmd(int argc, const char *argv[])
 {
 	long long val;
 	uint32_t idx;
 	char *s;
 
 	if (argc == 0) {
 		warnx("filter: no arguments.");
 		return (EINVAL);
 	};
 
 	/* list */
 	if (strcmp(argv[0], "list") == 0) {
 		if (argc != 1)
 			warnx("trailing arguments after \"list\" ignored.");
 
 		return show_filters();
 	}
 
 	/* mode */
 	if (argc == 1 && strcmp(argv[0], "mode") == 0)
 		return get_filter_mode();
 
 	/* mode <mode> */
 	if (strcmp(argv[0], "mode") == 0)
 		return set_filter_mode(argc - 1, argv + 1);
 
 	/* <idx> ... */
 	s = str_to_number(argv[0], NULL, &val);
 	if (*s || val > 0xffffffffU) {
 		warnx("\"%s\" is neither an index nor a filter subcommand.",
 		    argv[0]);
 		return (EINVAL);
 	}
 	idx = (uint32_t) val;
 
 	/* <idx> delete|clear */
 	if (argc == 2 &&
 	    (strcmp(argv[1], "delete") == 0 || strcmp(argv[1], "clear") == 0)) {
 		return del_filter(idx);
 	}
 
 	/* <idx> [<param> <val>] ... */
 	return set_filter(idx, argc - 1, argv + 1);
 }
 
 /*
  * Shows the fields of a multi-word structure.  The structure is considered to
  * consist of @nwords 32-bit words (i.e, it's an (@nwords * 32)-bit structure)
  * whose fields are described by @fd.  The 32-bit words are given in @words
  * starting with the least significant 32-bit word.
  */
 static void
 show_struct(const uint32_t *words, int nwords, const struct field_desc *fd)
 {
 	unsigned int w = 0;
 	const struct field_desc *p;
 
 	for (p = fd; p->name; p++)
 		w = max(w, strlen(p->name));
 
 	while (fd->name) {
 		unsigned long long data;
 		int first_word = fd->start / 32;
 		int shift = fd->start % 32;
 		int width = fd->end - fd->start + 1;
 		unsigned long long mask = (1ULL << width) - 1;
 
 		data = (words[first_word] >> shift) |
 		       ((uint64_t)words[first_word + 1] << (32 - shift));
 		if (shift)
 		       data |= ((uint64_t)words[first_word + 2] << (64 - shift));
 		data &= mask;
 		if (fd->islog2)
 			data = 1 << data;
 		printf("%-*s ", w, fd->name);
 		printf(fd->hex ? "%#llx\n" : "%llu\n", data << fd->shift);
 		fd++;
 	}
 }
 
 #define FIELD(name, start, end) { name, start, end, 0, 0, 0 }
 #define FIELD1(name, start) FIELD(name, start, start)
 
 static void
 show_t5_ctxt(const struct t4_sge_context *p)
 {
 	static struct field_desc egress_t5[] = {
 		FIELD("DCA_ST:", 181, 191),
 		FIELD1("StatusPgNS:", 180),
 		FIELD1("StatusPgRO:", 179),
 		FIELD1("FetchNS:", 178),
 		FIELD1("FetchRO:", 177),
 		FIELD1("Valid:", 176),
 		FIELD("PCIeDataChannel:", 174, 175),
 		FIELD1("StatusPgTPHintEn:", 173),
 		FIELD("StatusPgTPHint:", 171, 172),
 		FIELD1("FetchTPHintEn:", 170),
 		FIELD("FetchTPHint:", 168, 169),
 		FIELD1("FCThreshOverride:", 167),
 		{ "WRLength:", 162, 166, 9, 0, 1 },
 		FIELD1("WRLengthKnown:", 161),
 		FIELD1("ReschedulePending:", 160),
 		FIELD1("OnChipQueue:", 159),
 		FIELD1("FetchSizeMode:", 158),
 		{ "FetchBurstMin:", 156, 157, 4, 0, 1 },
 		FIELD1("FLMPacking:", 155),
 		FIELD("FetchBurstMax:", 153, 154),
 		FIELD("uPToken:", 133, 152),
 		FIELD1("uPTokenEn:", 132),
 		FIELD1("UserModeIO:", 131),
 		FIELD("uPFLCredits:", 123, 130),
 		FIELD1("uPFLCreditEn:", 122),
 		FIELD("FID:", 111, 121),
 		FIELD("HostFCMode:", 109, 110),
 		FIELD1("HostFCOwner:", 108),
 		{ "CIDXFlushThresh:", 105, 107, 0, 0, 1 },
 		FIELD("CIDX:", 89, 104),
 		FIELD("PIDX:", 73, 88),
 		{ "BaseAddress:", 18, 72, 9, 1 },
 		FIELD("QueueSize:", 2, 17),
 		FIELD1("QueueType:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc fl_t5[] = {
 		FIELD("DCA_ST:", 181, 191),
 		FIELD1("StatusPgNS:", 180),
 		FIELD1("StatusPgRO:", 179),
 		FIELD1("FetchNS:", 178),
 		FIELD1("FetchRO:", 177),
 		FIELD1("Valid:", 176),
 		FIELD("PCIeDataChannel:", 174, 175),
 		FIELD1("StatusPgTPHintEn:", 173),
 		FIELD("StatusPgTPHint:", 171, 172),
 		FIELD1("FetchTPHintEn:", 170),
 		FIELD("FetchTPHint:", 168, 169),
 		FIELD1("FCThreshOverride:", 167),
 		FIELD1("ReschedulePending:", 160),
 		FIELD1("OnChipQueue:", 159),
 		FIELD1("FetchSizeMode:", 158),
 		{ "FetchBurstMin:", 156, 157, 4, 0, 1 },
 		FIELD1("FLMPacking:", 155),
 		FIELD("FetchBurstMax:", 153, 154),
 		FIELD1("FLMcongMode:", 152),
 		FIELD("MaxuPFLCredits:", 144, 151),
 		FIELD("FLMcontextID:", 133, 143),
 		FIELD1("uPTokenEn:", 132),
 		FIELD1("UserModeIO:", 131),
 		FIELD("uPFLCredits:", 123, 130),
 		FIELD1("uPFLCreditEn:", 122),
 		FIELD("FID:", 111, 121),
 		FIELD("HostFCMode:", 109, 110),
 		FIELD1("HostFCOwner:", 108),
 		{ "CIDXFlushThresh:", 105, 107, 0, 0, 1 },
 		FIELD("CIDX:", 89, 104),
 		FIELD("PIDX:", 73, 88),
 		{ "BaseAddress:", 18, 72, 9, 1 },
 		FIELD("QueueSize:", 2, 17),
 		FIELD1("QueueType:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc ingress_t5[] = {
 		FIELD("DCA_ST:", 143, 153),
 		FIELD1("ISCSICoalescing:", 142),
 		FIELD1("Queue_Valid:", 141),
 		FIELD1("TimerPending:", 140),
 		FIELD1("DropRSS:", 139),
 		FIELD("PCIeChannel:", 137, 138),
 		FIELD1("SEInterruptArmed:", 136),
 		FIELD1("CongestionMgtEnable:", 135),
 		FIELD1("NoSnoop:", 134),
 		FIELD1("RelaxedOrdering:", 133),
 		FIELD1("GTSmode:", 132),
 		FIELD1("TPHintEn:", 131),
 		FIELD("TPHint:", 129, 130),
 		FIELD1("UpdateScheduling:", 128),
 		FIELD("UpdateDelivery:", 126, 127),
 		FIELD1("InterruptSent:", 125),
 		FIELD("InterruptIDX:", 114, 124),
 		FIELD1("InterruptDestination:", 113),
 		FIELD1("InterruptArmed:", 112),
 		FIELD("RxIntCounter:", 106, 111),
 		FIELD("RxIntCounterThreshold:", 104, 105),
 		FIELD1("Generation:", 103),
 		{ "BaseAddress:", 48, 102, 9, 1 },
 		FIELD("PIDX:", 32, 47),
 		FIELD("CIDX:", 16, 31),
 		{ "QueueSize:", 4, 15, 4, 0 },
 		{ "QueueEntrySize:", 2, 3, 4, 0, 1 },
 		FIELD1("QueueEntryOverride:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc flm_t5[] = {
 		FIELD1("Valid:", 89),
 		FIELD("SplitLenMode:", 87, 88),
 		FIELD1("TPHintEn:", 86),
 		FIELD("TPHint:", 84, 85),
 		FIELD1("NoSnoop:", 83),
 		FIELD1("RelaxedOrdering:", 82),
 		FIELD("DCA_ST:", 71, 81),
 		FIELD("EQid:", 54, 70),
 		FIELD("SplitEn:", 52, 53),
 		FIELD1("PadEn:", 51),
 		FIELD1("PackEn:", 50),
 		FIELD1("Cache_Lock :", 49),
 		FIELD1("CongDrop:", 48),
 		FIELD("PackOffset:", 16, 47),
 		FIELD("CIDX:", 8, 15),
 		FIELD("PIDX:", 0, 7),
 		{ NULL }
 	};
 	static struct field_desc conm_t5[] = {
 		FIELD1("CngMPSEnable:", 21),
 		FIELD("CngTPMode:", 19, 20),
 		FIELD1("CngDBPHdr:", 18),
 		FIELD1("CngDBPData:", 17),
 		FIELD1("CngIMSG:", 16),
 		{ "CngChMap:", 0, 15, 0, 1, 0 },
 		{ NULL }
 	};
 
 	if (p->mem_id == SGE_CONTEXT_EGRESS)
 		show_struct(p->data, 6, (p->data[0] & 2) ? fl_t5 : egress_t5);
 	else if (p->mem_id == SGE_CONTEXT_FLM)
 		show_struct(p->data, 3, flm_t5);
 	else if (p->mem_id == SGE_CONTEXT_INGRESS)
 		show_struct(p->data, 5, ingress_t5);
 	else if (p->mem_id == SGE_CONTEXT_CNM)
 		show_struct(p->data, 1, conm_t5);
 }
 
 static void
 show_t4_ctxt(const struct t4_sge_context *p)
 {
 	static struct field_desc egress_t4[] = {
 		FIELD1("StatusPgNS:", 180),
 		FIELD1("StatusPgRO:", 179),
 		FIELD1("FetchNS:", 178),
 		FIELD1("FetchRO:", 177),
 		FIELD1("Valid:", 176),
 		FIELD("PCIeDataChannel:", 174, 175),
 		FIELD1("DCAEgrQEn:", 173),
 		FIELD("DCACPUID:", 168, 172),
 		FIELD1("FCThreshOverride:", 167),
 		FIELD("WRLength:", 162, 166),
 		FIELD1("WRLengthKnown:", 161),
 		FIELD1("ReschedulePending:", 160),
 		FIELD1("OnChipQueue:", 159),
 		FIELD1("FetchSizeMode", 158),
 		{ "FetchBurstMin:", 156, 157, 4, 0, 1 },
 		{ "FetchBurstMax:", 153, 154, 6, 0, 1 },
 		FIELD("uPToken:", 133, 152),
 		FIELD1("uPTokenEn:", 132),
 		FIELD1("UserModeIO:", 131),
 		FIELD("uPFLCredits:", 123, 130),
 		FIELD1("uPFLCreditEn:", 122),
 		FIELD("FID:", 111, 121),
 		FIELD("HostFCMode:", 109, 110),
 		FIELD1("HostFCOwner:", 108),
 		{ "CIDXFlushThresh:", 105, 107, 0, 0, 1 },
 		FIELD("CIDX:", 89, 104),
 		FIELD("PIDX:", 73, 88),
 		{ "BaseAddress:", 18, 72, 9, 1 },
 		FIELD("QueueSize:", 2, 17),
 		FIELD1("QueueType:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc fl_t4[] = {
 		FIELD1("StatusPgNS:", 180),
 		FIELD1("StatusPgRO:", 179),
 		FIELD1("FetchNS:", 178),
 		FIELD1("FetchRO:", 177),
 		FIELD1("Valid:", 176),
 		FIELD("PCIeDataChannel:", 174, 175),
 		FIELD1("DCAEgrQEn:", 173),
 		FIELD("DCACPUID:", 168, 172),
 		FIELD1("FCThreshOverride:", 167),
 		FIELD1("ReschedulePending:", 160),
 		FIELD1("OnChipQueue:", 159),
 		FIELD1("FetchSizeMode", 158),
 		{ "FetchBurstMin:", 156, 157, 4, 0, 1 },
 		{ "FetchBurstMax:", 153, 154, 6, 0, 1 },
 		FIELD1("FLMcongMode:", 152),
 		FIELD("MaxuPFLCredits:", 144, 151),
 		FIELD("FLMcontextID:", 133, 143),
 		FIELD1("uPTokenEn:", 132),
 		FIELD1("UserModeIO:", 131),
 		FIELD("uPFLCredits:", 123, 130),
 		FIELD1("uPFLCreditEn:", 122),
 		FIELD("FID:", 111, 121),
 		FIELD("HostFCMode:", 109, 110),
 		FIELD1("HostFCOwner:", 108),
 		{ "CIDXFlushThresh:", 105, 107, 0, 0, 1 },
 		FIELD("CIDX:", 89, 104),
 		FIELD("PIDX:", 73, 88),
 		{ "BaseAddress:", 18, 72, 9, 1 },
 		FIELD("QueueSize:", 2, 17),
 		FIELD1("QueueType:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc ingress_t4[] = {
 		FIELD1("NoSnoop:", 145),
 		FIELD1("RelaxedOrdering:", 144),
 		FIELD1("GTSmode:", 143),
 		FIELD1("ISCSICoalescing:", 142),
 		FIELD1("Valid:", 141),
 		FIELD1("TimerPending:", 140),
 		FIELD1("DropRSS:", 139),
 		FIELD("PCIeChannel:", 137, 138),
 		FIELD1("SEInterruptArmed:", 136),
 		FIELD1("CongestionMgtEnable:", 135),
 		FIELD1("DCAIngQEnable:", 134),
 		FIELD("DCACPUID:", 129, 133),
 		FIELD1("UpdateScheduling:", 128),
 		FIELD("UpdateDelivery:", 126, 127),
 		FIELD1("InterruptSent:", 125),
 		FIELD("InterruptIDX:", 114, 124),
 		FIELD1("InterruptDestination:", 113),
 		FIELD1("InterruptArmed:", 112),
 		FIELD("RxIntCounter:", 106, 111),
 		FIELD("RxIntCounterThreshold:", 104, 105),
 		FIELD1("Generation:", 103),
 		{ "BaseAddress:", 48, 102, 9, 1 },
 		FIELD("PIDX:", 32, 47),
 		FIELD("CIDX:", 16, 31),
 		{ "QueueSize:", 4, 15, 4, 0 },
 		{ "QueueEntrySize:", 2, 3, 4, 0, 1 },
 		FIELD1("QueueEntryOverride:", 1),
 		FIELD1("CachePriority:", 0),
 		{ NULL }
 	};
 	static struct field_desc flm_t4[] = {
 		FIELD1("NoSnoop:", 79),
 		FIELD1("RelaxedOrdering:", 78),
 		FIELD1("Valid:", 77),
 		FIELD("DCACPUID:", 72, 76),
 		FIELD1("DCAFLEn:", 71),
 		FIELD("EQid:", 54, 70),
 		FIELD("SplitEn:", 52, 53),
 		FIELD1("PadEn:", 51),
 		FIELD1("PackEn:", 50),
 		FIELD1("DBpriority:", 48),
 		FIELD("PackOffset:", 16, 47),
 		FIELD("CIDX:", 8, 15),
 		FIELD("PIDX:", 0, 7),
 		{ NULL }
 	};
 	static struct field_desc conm_t4[] = {
 		FIELD1("CngDBPHdr:", 6),
 		FIELD1("CngDBPData:", 5),
 		FIELD1("CngIMSG:", 4),
 		{ "CngChMap:", 0, 3, 0, 1, 0},
 		{ NULL }
 	};
 
 	if (p->mem_id == SGE_CONTEXT_EGRESS)
 		show_struct(p->data, 6, (p->data[0] & 2) ? fl_t4 : egress_t4);
 	else if (p->mem_id == SGE_CONTEXT_FLM)
 		show_struct(p->data, 3, flm_t4);
 	else if (p->mem_id == SGE_CONTEXT_INGRESS)
 		show_struct(p->data, 5, ingress_t4);
 	else if (p->mem_id == SGE_CONTEXT_CNM)
 		show_struct(p->data, 1, conm_t4);
 }
 
 #undef FIELD
 #undef FIELD1
 
 static int
 get_sge_context(int argc, const char *argv[])
 {
 	int rc;
 	char *p;
 	long cid;
 	struct t4_sge_context cntxt = {0};
 
 	if (argc != 2) {
 		warnx("sge_context: incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	if (!strcmp(argv[0], "egress"))
 		cntxt.mem_id = SGE_CONTEXT_EGRESS;
 	else if (!strcmp(argv[0], "ingress"))
 		cntxt.mem_id = SGE_CONTEXT_INGRESS;
 	else if (!strcmp(argv[0], "fl"))
 		cntxt.mem_id = SGE_CONTEXT_FLM;
 	else if (!strcmp(argv[0], "cong"))
 		cntxt.mem_id = SGE_CONTEXT_CNM;
 	else {
 		warnx("unknown context type \"%s\"; known types are egress, "
 		    "ingress, fl, and cong.", argv[0]);
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[1], &cid, NULL);
 	if (*p) {
 		warnx("invalid context id \"%s\"", argv[1]);
 		return (EINVAL);
 	}
 	cntxt.cid = cid;
 
 	rc = doit(CHELSIO_T4_GET_SGE_CONTEXT, &cntxt);
 	if (rc != 0)
 		return (rc);
 
 	if (chip_id == 4)
 		show_t4_ctxt(&cntxt);
 	else
 		show_t5_ctxt(&cntxt);
 
 	return (0);
 }
 
 static int
 loadfw(int argc, const char *argv[])
 {
 	int rc, fd;
 	struct t4_data data = {0};
 	const char *fname = argv[0];
 	struct stat st = {0};
 
 	if (argc != 1) {
 		warnx("loadfw: incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	fd = open(fname, O_RDONLY);
 	if (fd < 0) {
 		warn("open(%s)", fname);
 		return (errno);
 	}
 
 	if (fstat(fd, &st) < 0) {
 		warn("fstat");
 		close(fd);
 		return (errno);
 	}
 
 	data.len = st.st_size;
 	data.data = mmap(0, data.len, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (data.data == MAP_FAILED) {
 		warn("mmap");
 		close(fd);
 		return (errno);
 	}
 
 	rc = doit(CHELSIO_T4_LOAD_FW, &data);
 	munmap(data.data, data.len);
 	close(fd);
 	return (rc);
 }
 
 static int
 read_mem(uint32_t addr, uint32_t len, void (*output)(uint32_t *, uint32_t))
 {
 	int rc;
 	struct t4_mem_range mr;
 
 	mr.addr = addr;
 	mr.len = len;
 	mr.data = malloc(mr.len);
 
 	if (mr.data == 0) {
 		warn("read_mem: malloc");
 		return (errno);
 	}
 
 	rc = doit(CHELSIO_T4_GET_MEM, &mr);
 	if (rc != 0)
 		goto done;
 
 	if (output)
 		(*output)(mr.data, mr.len);
 done:
 	free(mr.data);
 	return (rc);
 }
 
 /*
  * Display memory as list of 'n' 4-byte values per line.
  */
 static void
 show_mem(uint32_t *buf, uint32_t len)
 {
 	const char *s;
 	int i, n = 8;
 
 	while (len) {
 		for (i = 0; len && i < n; i++, buf++, len -= 4) {
 			s = i ? " " : "";
 			printf("%s%08x", s, htonl(*buf));
 		}
 		printf("\n");
 	}
 }
 
 static int
 memdump(int argc, const char *argv[])
 {
 	char *p;
 	long l;
 	uint32_t addr, len;
 
 	if (argc != 2) {
 		warnx("incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &l, NULL);
 	if (*p) {
 		warnx("invalid address \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 	addr = l;
 
 	p = str_to_number(argv[1], &l, NULL);
 	if (*p) {
 		warnx("memdump: invalid length \"%s\"", argv[1]);
 		return (EINVAL);
 	}
 	len = l;
 
 	return (read_mem(addr, len, show_mem));
 }
 
 /*
  * Display TCB as list of 'n' 4-byte values per line.
  */
 static void
 show_tcb(uint32_t *buf, uint32_t len)
 {
 	const char *s;
 	int i, n = 8;
 
 	while (len) {
 		for (i = 0; len && i < n; i++, buf++, len -= 4) {
 			s = i ? " " : "";
 			printf("%s%08x", s, htonl(*buf));
 		}
 		printf("\n");
 	}
 }
 
 #define A_TP_CMM_TCB_BASE 0x7d10
 #define TCB_SIZE 128
 static int
 read_tcb(int argc, const char *argv[])
 {
 	char *p;
 	long l;
 	long long val;
 	unsigned int tid;
 	uint32_t addr;
 	int rc;
 
 	if (argc != 1) {
 		warnx("incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &l, NULL);
 	if (*p) {
 		warnx("invalid tid \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 	tid = l;
 
 	rc = read_reg(A_TP_CMM_TCB_BASE, 4, &val);
 	if (rc != 0)
 		return (rc);
 
 	addr = val + tid * TCB_SIZE;
 
 	return (read_mem(addr, TCB_SIZE, show_tcb));
 }
 
 static int
 read_i2c(int argc, const char *argv[])
 {
 	char *p;
 	long l;
 	struct t4_i2c_data i2cd;
 	int rc, i;
 
 	if (argc < 3 || argc > 4) {
 		warnx("incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &l, NULL);
 	if (*p || l > UCHAR_MAX) {
 		warnx("invalid port id \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 	i2cd.port_id = l;
 
 	p = str_to_number(argv[1], &l, NULL);
 	if (*p || l > UCHAR_MAX) {
 		warnx("invalid i2c device address \"%s\"", argv[1]);
 		return (EINVAL);
 	}
 	i2cd.dev_addr = l;
 
 	p = str_to_number(argv[2], &l, NULL);
 	if (*p || l > UCHAR_MAX) {
 		warnx("invalid byte offset \"%s\"", argv[2]);
 		return (EINVAL);
 	}
 	i2cd.offset = l;
 
 	if (argc == 4) {
 		p = str_to_number(argv[3], &l, NULL);
 		if (*p || l > sizeof(i2cd.data)) {
 			warnx("invalid number of bytes \"%s\"", argv[3]);
 			return (EINVAL);
 		}
 		i2cd.len = l;
 	} else
 		i2cd.len = 1;
 
 	rc = doit(CHELSIO_T4_GET_I2C, &i2cd);
 	if (rc != 0)
 		return (rc);
 
 	for (i = 0; i < i2cd.len; i++)
 		printf("0x%x [%u]\n", i2cd.data[i], i2cd.data[i]);
 
 	return (0);
 }
 
 static int
 clearstats(int argc, const char *argv[])
 {
 	char *p;
 	long l;
 	uint32_t port;
 
 	if (argc != 1) {
 		warnx("incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &l, NULL);
 	if (*p) {
 		warnx("invalid port id \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 	port = l;
 
 	return doit(CHELSIO_T4_CLEAR_STATS, &port);
 }
 
 static int
 show_tracers(void)
 {
 	struct t4_tracer t;
 	char *s;
 	int rc, port_idx, i;
 	long long val;
 
 	/* Magic values: MPS_TRC_CFG = 0x9800. MPS_TRC_CFG[1:1] = TrcEn */
 	rc = read_reg(0x9800, 4, &val);
 	if (rc != 0)
 		return (rc);
 	printf("tracing is %s\n", val & 2 ? "ENABLED" : "DISABLED");
 
 	t.idx = 0;
 	for (t.idx = 0; ; t.idx++) {
 		rc = doit(CHELSIO_T4_GET_TRACER, &t);
 		if (rc != 0 || t.idx == 0xff)
 			break;
 
 		if (t.tp.port < 4) {
 			s = "Rx";
 			port_idx = t.tp.port;
 		} else if (t.tp.port < 8) {
 			s = "Tx";
 			port_idx = t.tp.port - 4;
 		} else if (t.tp.port < 12) {
 			s = "loopback";
 			port_idx = t.tp.port - 8;
 		} else if (t.tp.port < 16) {
 			s = "MPS Rx";
 			port_idx = t.tp.port - 12;
 		} else if (t.tp.port < 20) {
 			s = "MPS Tx";
 			port_idx = t.tp.port - 16;
 		} else {
 			s = "unknown";
 			port_idx = t.tp.port;
 		}
 
 		printf("\ntracer %u (currently %s) captures ", t.idx,
 		    t.enabled ? "ENABLED" : "DISABLED");
 		if (t.tp.port < 8)
 			printf("port %u %s, ", port_idx, s);
 		else
 			printf("%s %u, ", s, port_idx);
 		printf("snap length: %u, min length: %u\n", t.tp.snap_len,
 		    t.tp.min_len);
 		printf("packets captured %smatch filter\n",
 		    t.tp.invert ? "do not " : "");
 		if (t.tp.skip_ofst) {
 			printf("filter pattern: ");
 			for (i = 0; i < t.tp.skip_ofst * 2; i += 2)
 				printf("%08x%08x", t.tp.data[i],
 				    t.tp.data[i + 1]);
 			printf("/");
 			for (i = 0; i < t.tp.skip_ofst * 2; i += 2)
 				printf("%08x%08x", t.tp.mask[i],
 				    t.tp.mask[i + 1]);
 			printf("@0\n");
 		}
 		printf("filter pattern: ");
 		for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2)
 			printf("%08x%08x", t.tp.data[i], t.tp.data[i + 1]);
 		printf("/");
 		for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2)
 			printf("%08x%08x", t.tp.mask[i], t.tp.mask[i + 1]);
 		printf("@%u\n", (t.tp.skip_ofst + t.tp.skip_len) * 8);
 	}
 
 	return (rc);
 }
 
 static int
 tracer_onoff(uint8_t idx, int enabled)
 {
 	struct t4_tracer t;
 
 	t.idx = idx;
 	t.enabled = enabled;
 	t.valid = 0;
 
 	return doit(CHELSIO_T4_SET_TRACER, &t);
 }
 
 static void
 create_tracing_ifnet()
 {
 	char *cmd[] = {
 		"/sbin/ifconfig", __DECONST(char *, nexus), "create", NULL
 	};
 	char *env[] = {NULL};
 
 	if (vfork() == 0) {
 		close(STDERR_FILENO);
 		execve(cmd[0], cmd, env);
 		_exit(0);
 	}
 }
 
 /*
  * XXX: Allow user to specify snaplen, minlen, and pattern (including inverted
  * matching).  Right now this is a quick-n-dirty implementation that traces the
  * first 128B of all tx or rx on a port
  */
 static int
 set_tracer(uint8_t idx, int argc, const char *argv[])
 {
 	struct t4_tracer t;
 	int len, port;
 
 	bzero(&t, sizeof (t));
 	t.idx = idx;
 	t.enabled = 1;
 	t.valid = 1;
 
 	if (argc != 1) {
 		warnx("must specify tx<n> or rx<n>.");
 		return (EINVAL);
 	}
 
 	len = strlen(argv[0]);
 	if (len != 3) {
 		warnx("argument must be 3 characters (tx<n> or rx<n>)");
 		return (EINVAL);
 	}
 
 	if (strncmp(argv[0], "tx", 2) == 0) {
 		port = argv[0][2] - '0';
 		if (port < 0 || port > 3) {
 			warnx("'%c' in %s is invalid", argv[0][2], argv[0]);
 			return (EINVAL);
 		}
 		port += 4;
 	} else if (strncmp(argv[0], "rx", 2) == 0) {
 		port = argv[0][2] - '0';
 		if (port < 0 || port > 3) {
 			warnx("'%c' in %s is invalid", argv[0][2], argv[0]);
 			return (EINVAL);
 		}
 	} else {
 		warnx("argument '%s' isn't tx<n> or rx<n>", argv[0]);
 		return (EINVAL);
 	}
 
 	t.tp.snap_len = 128;
 	t.tp.min_len = 0;
 	t.tp.skip_ofst = 0;
 	t.tp.skip_len = 0;
 	t.tp.invert = 0;
 	t.tp.port = port;
 
 	create_tracing_ifnet();
 	return doit(CHELSIO_T4_SET_TRACER, &t);
 }
 
 static int
 tracer_cmd(int argc, const char *argv[])
 {
 	long long val;
 	uint8_t idx;
 	char *s;
 
 	if (argc == 0) {
 		warnx("tracer: no arguments.");
 		return (EINVAL);
 	};
 
 	/* list */
 	if (strcmp(argv[0], "list") == 0) {
 		if (argc != 1)
 			warnx("trailing arguments after \"list\" ignored.");
 
 		return show_tracers();
 	}
 
 	/* <idx> ... */
 	s = str_to_number(argv[0], NULL, &val);
 	if (*s || val > 0xff) {
 		warnx("\"%s\" is neither an index nor a tracer subcommand.",
 		    argv[0]);
 		return (EINVAL);
 	}
 	idx = (int8_t)val;
 
 	/* <idx> disable */
 	if (argc == 2 && strcmp(argv[1], "disable") == 0)
 		return tracer_onoff(idx, 0);
 
 	/* <idx> enable */
 	if (argc == 2 && strcmp(argv[1], "enable") == 0)
 		return tracer_onoff(idx, 1);
 
 	/* <idx> ... */
 	return set_tracer(idx, argc - 1, argv + 1);
 }
 
 static int
 modinfo_raw(int port_id)
 {
 	uint8_t offset;
 	struct t4_i2c_data i2cd;
 	int rc;
 
 	for (offset = 0; offset < 96; offset += sizeof(i2cd.data)) {
 		bzero(&i2cd, sizeof(i2cd));
 		i2cd.port_id = port_id;
 		i2cd.dev_addr = 0xa0;
 		i2cd.offset = offset;
 		i2cd.len = sizeof(i2cd.data);
 		rc = doit(CHELSIO_T4_GET_I2C, &i2cd);
 		if (rc != 0)
 			return (rc);
 		printf("%02x:  %02x %02x %02x %02x  %02x %02x %02x %02x",
 		    offset, i2cd.data[0], i2cd.data[1], i2cd.data[2],
 		    i2cd.data[3], i2cd.data[4], i2cd.data[5], i2cd.data[6],
 		    i2cd.data[7]);
 
 		printf("  %c%c%c%c %c%c%c%c\n",
 		    isprint(i2cd.data[0]) ? i2cd.data[0] : '.',
 		    isprint(i2cd.data[1]) ? i2cd.data[1] : '.',
 		    isprint(i2cd.data[2]) ? i2cd.data[2] : '.',
 		    isprint(i2cd.data[3]) ? i2cd.data[3] : '.',
 		    isprint(i2cd.data[4]) ? i2cd.data[4] : '.',
 		    isprint(i2cd.data[5]) ? i2cd.data[5] : '.',
 		    isprint(i2cd.data[6]) ? i2cd.data[6] : '.',
 		    isprint(i2cd.data[7]) ? i2cd.data[7] : '.');
 	}
 
 	return (0);
 }
 
 static int
 modinfo(int argc, const char *argv[])
 {
 	long port;
 	char string[16], *p;
 	struct t4_i2c_data i2cd;
 	int rc, i;
 	uint16_t temp, vcc, tx_bias, tx_power, rx_power;
 
 	if (argc < 1) {
 		warnx("must supply a port");
 		return (EINVAL);
 	}
 
 	if (argc > 2) {
 		warnx("too many arguments");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &port, NULL);
 	if (*p || port > UCHAR_MAX) {
 		warnx("invalid port id \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 
 	if (argc == 2) {
 		if (!strcmp(argv[1], "raw"))
 			return (modinfo_raw(port));
 		else {
 			warnx("second argument can only be \"raw\"");
 			return (EINVAL);
 		}
 	}
 
 	bzero(&i2cd, sizeof(i2cd));
 	i2cd.len = 1;
 	i2cd.port_id = port;
 	i2cd.dev_addr = SFF_8472_BASE;
 
 	i2cd.offset = SFF_8472_ID;
 	if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 		goto fail;
 
 	if (i2cd.data[0] > SFF_8472_ID_LAST)
 		printf("Unknown ID\n");
 	else
 		printf("ID: %s\n", sff_8472_id[i2cd.data[0]]);
 
 	bzero(&string, sizeof(string));
 	for (i = SFF_8472_VENDOR_START; i < SFF_8472_VENDOR_END; i++) {
 		i2cd.offset = i;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		string[i - SFF_8472_VENDOR_START] = i2cd.data[0];
 	}
 	printf("Vendor %s\n", string);
 
 	bzero(&string, sizeof(string));
 	for (i = SFF_8472_SN_START; i < SFF_8472_SN_END; i++) {
 		i2cd.offset = i;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		string[i - SFF_8472_SN_START] = i2cd.data[0];
 	}
 	printf("SN %s\n", string);
 
 	bzero(&string, sizeof(string));
 	for (i = SFF_8472_PN_START; i < SFF_8472_PN_END; i++) {
 		i2cd.offset = i;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		string[i - SFF_8472_PN_START] = i2cd.data[0];
 	}
 	printf("PN %s\n", string);
 
 	bzero(&string, sizeof(string));
 	for (i = SFF_8472_REV_START; i < SFF_8472_REV_END; i++) {
 		i2cd.offset = i;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		string[i - SFF_8472_REV_START] = i2cd.data[0];
 	}
 	printf("Rev %s\n", string);
 
 	i2cd.offset = SFF_8472_DIAG_TYPE;
 	if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 		goto fail;
 
 	if ((char )i2cd.data[0] & (SFF_8472_DIAG_IMPL |
 				   SFF_8472_DIAG_INTERNAL)) {
 
 		/* Switch to reading from the Diagnostic address. */
 		i2cd.dev_addr = SFF_8472_DIAG;
 		i2cd.len = 1;
 
 		i2cd.offset = SFF_8472_TEMP;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		temp = i2cd.data[0] << 8;
 		printf("Temp: ");
 		if ((temp & SFF_8472_TEMP_SIGN) == SFF_8472_TEMP_SIGN)
 			printf("-");
 		else
 			printf("+");
 		printf("%dC\n", (temp & SFF_8472_TEMP_MSK) >>
 		    SFF_8472_TEMP_SHIFT);
 
 		i2cd.offset = SFF_8472_VCC;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		vcc = i2cd.data[0] << 8;
 		printf("Vcc %fV\n", vcc / SFF_8472_VCC_FACTOR);
 
 		i2cd.offset = SFF_8472_TX_BIAS;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		tx_bias = i2cd.data[0] << 8;
 		printf("TX Bias %fuA\n", tx_bias / SFF_8472_BIAS_FACTOR);
 
 		i2cd.offset = SFF_8472_TX_POWER;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		tx_power = i2cd.data[0] << 8;
 		printf("TX Power %fmW\n", tx_power / SFF_8472_POWER_FACTOR);
 
 		i2cd.offset = SFF_8472_RX_POWER;
 		if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0)
 			goto fail;
 		rx_power = i2cd.data[0] << 8;
 		printf("RX Power %fmW\n", rx_power / SFF_8472_POWER_FACTOR);
 
 	} else
 		printf("Diagnostics not supported.\n");
 
 	return(0);
 
 fail:
 	if (rc == EPERM)
 		warnx("No module/cable in port %ld", port);
 	return (rc);
 
 }
 
 /* XXX: pass in a low/high and do range checks as well */
 static int
 get_sched_param(const char *param, const char *args[], long *val)
 {
 	char *p;
 
 	if (strcmp(param, args[0]) != 0)
 		return (EINVAL);
 
 	p = str_to_number(args[1], val, NULL);
 	if (*p) {
 		warnx("parameter \"%s\" has bad value \"%s\"", args[0],
 		    args[1]);
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 sched_class(int argc, const char *argv[])
 {
 	struct t4_sched_params op;
 	int errs, i;
 
 	memset(&op, 0xff, sizeof(op));
 	op.subcmd = -1;
 	op.type = -1;
 	if (argc == 0) {
 		warnx("missing scheduling sub-command");
 		return (EINVAL);
 	}
 	if (!strcmp(argv[0], "config")) {
 		op.subcmd = SCHED_CLASS_SUBCMD_CONFIG;
 		op.u.config.minmax = -1;
 	} else if (!strcmp(argv[0], "params")) {
 		op.subcmd = SCHED_CLASS_SUBCMD_PARAMS;
 		op.u.params.level = op.u.params.mode = op.u.params.rateunit =
 		    op.u.params.ratemode = op.u.params.channel =
 		    op.u.params.cl = op.u.params.minrate = op.u.params.maxrate =
 		    op.u.params.weight = op.u.params.pktsize = -1;
 	} else {
 		warnx("invalid scheduling sub-command \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 
 	/* Decode remaining arguments ... */
 	errs = 0;
 	for (i = 1; i < argc; i += 2) {
 		const char **args = &argv[i];
 		long l;
 
 		if (i + 1 == argc) {
 			warnx("missing argument for \"%s\"", args[0]);
 			errs++;
 			break;
 		}
 
 		if (!strcmp(args[0], "type")) {
 			if (!strcmp(args[1], "packet"))
 				op.type = SCHED_CLASS_TYPE_PACKET;
 			else {
 				warnx("invalid type parameter \"%s\"", args[1]);
 				errs++;
 			}
 
 			continue;
 		}
 
 		if (op.subcmd == SCHED_CLASS_SUBCMD_CONFIG) {
 			if(!get_sched_param("minmax", args, &l))
 				op.u.config.minmax = (int8_t)l;
 			else {
 				warnx("unknown scheduler config parameter "
 				    "\"%s\"", args[0]);
 				errs++;
 			}
 
 			continue;
 		}
 
 		/* Rest applies only to SUBCMD_PARAMS */
 		if (op.subcmd != SCHED_CLASS_SUBCMD_PARAMS)
 			continue;
 
 		if (!strcmp(args[0], "level")) {
 			if (!strcmp(args[1], "cl-rl"))
 				op.u.params.level = SCHED_CLASS_LEVEL_CL_RL;
 			else if (!strcmp(args[1], "cl-wrr"))
 				op.u.params.level = SCHED_CLASS_LEVEL_CL_WRR;
 			else if (!strcmp(args[1], "ch-rl"))
 				op.u.params.level = SCHED_CLASS_LEVEL_CH_RL;
 			else {
 				warnx("invalid level parameter \"%s\"",
 				    args[1]);
 				errs++;
 			}
 		} else if (!strcmp(args[0], "mode")) {
 			if (!strcmp(args[1], "class"))
 				op.u.params.mode = SCHED_CLASS_MODE_CLASS;
 			else if (!strcmp(args[1], "flow"))
 				op.u.params.mode = SCHED_CLASS_MODE_FLOW;
 			else {
 				warnx("invalid mode parameter \"%s\"", args[1]);
 				errs++;
 			}
 		} else if (!strcmp(args[0], "rate-unit")) {
 			if (!strcmp(args[1], "bits"))
 				op.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
 			else if (!strcmp(args[1], "pkts"))
 				op.u.params.rateunit = SCHED_CLASS_RATEUNIT_PKTS;
 			else {
 				warnx("invalid rate-unit parameter \"%s\"",
 				    args[1]);
 				errs++;
 			}
 		} else if (!strcmp(args[0], "rate-mode")) {
 			if (!strcmp(args[1], "relative"))
 				op.u.params.ratemode = SCHED_CLASS_RATEMODE_REL;
 			else if (!strcmp(args[1], "absolute"))
 				op.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
 			else {
 				warnx("invalid rate-mode parameter \"%s\"",
 				    args[1]);
 				errs++;
 			}
 		} else if (!get_sched_param("channel", args, &l))
 			op.u.params.channel = (int8_t)l;
 		else if (!get_sched_param("class", args, &l))
 			op.u.params.cl = (int8_t)l;
 		else if (!get_sched_param("min-rate", args, &l))
 			op.u.params.minrate = (int32_t)l;
 		else if (!get_sched_param("max-rate", args, &l))
 			op.u.params.maxrate = (int32_t)l;
 		else if (!get_sched_param("weight", args, &l))
 			op.u.params.weight = (int16_t)l;
 		else if (!get_sched_param("pkt-size", args, &l))
 			op.u.params.pktsize = (int16_t)l;
 		else {
 			warnx("unknown scheduler parameter \"%s\"", args[0]);
 			errs++;
 		}
 	}
 
 	/*
 	 * Catch some logical fallacies in terms of argument combinations here
 	 * so we can offer more than just the EINVAL return from the driver.
 	 * The driver will be able to catch a lot more issues since it knows
 	 * the specifics of the device hardware capabilities like how many
 	 * channels, classes, etc. the device supports.
 	 */
 	if (op.type < 0) {
 		warnx("sched \"type\" parameter missing");
 		errs++;
 	}
 	if (op.subcmd == SCHED_CLASS_SUBCMD_CONFIG) {
 		if (op.u.config.minmax < 0) {
 			warnx("sched config \"minmax\" parameter missing");
 			errs++;
 		}
 	}
 	if (op.subcmd == SCHED_CLASS_SUBCMD_PARAMS) {
 		if (op.u.params.level < 0) {
 			warnx("sched params \"level\" parameter missing");
 			errs++;
 		}
 		if (op.u.params.mode < 0) {
 			warnx("sched params \"mode\" parameter missing");
 			errs++;
 		}
 		if (op.u.params.rateunit < 0) {
 			warnx("sched params \"rate-unit\" parameter missing");
 			errs++;
 		}
 		if (op.u.params.ratemode < 0) {
 			warnx("sched params \"rate-mode\" parameter missing");
 			errs++;
 		}
 		if (op.u.params.channel < 0) {
 			warnx("sched params \"channel\" missing");
 			errs++;
 		}
 		if (op.u.params.cl < 0) {
 			warnx("sched params \"class\" missing");
 			errs++;
 		}
 		if (op.u.params.maxrate < 0 &&
 		    (op.u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
 		    op.u.params.level == SCHED_CLASS_LEVEL_CH_RL)) {
 			warnx("sched params \"max-rate\" missing for "
 			    "rate-limit level");
 			errs++;
 		}
 		if (op.u.params.weight < 0 &&
 		    op.u.params.level == SCHED_CLASS_LEVEL_CL_WRR) {
 			warnx("sched params \"weight\" missing for "
 			    "weighted-round-robin level");
 			errs++;
 		}
 		if (op.u.params.pktsize < 0 &&
 		    (op.u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
 		    op.u.params.level == SCHED_CLASS_LEVEL_CH_RL)) {
 			warnx("sched params \"pkt-size\" missing for "
 			    "rate-limit level");
 			errs++;
 		}
 		if (op.u.params.mode == SCHED_CLASS_MODE_FLOW &&
 		    op.u.params.ratemode != SCHED_CLASS_RATEMODE_ABS) {
 			warnx("sched params mode flow needs rate-mode absolute");
 			errs++;
 		}
 		if (op.u.params.ratemode == SCHED_CLASS_RATEMODE_REL &&
 		    !in_range(op.u.params.maxrate, 1, 100)) {
                         warnx("sched params \"max-rate\" takes "
 			    "percentage value(1-100) for rate-mode relative");
                         errs++;
                 }
                 if (op.u.params.ratemode == SCHED_CLASS_RATEMODE_ABS &&
-		    !in_range(op.u.params.maxrate, 1, 10000000)) {
+		    !in_range(op.u.params.maxrate, 1, 100000000)) {
                         warnx("sched params \"max-rate\" takes "
-			    "value(1-10000000) for rate-mode absolute");
+			    "value(1-100000000) for rate-mode absolute");
                         errs++;
                 }
                 if (op.u.params.maxrate > 0 &&
 		    op.u.params.maxrate < op.u.params.minrate) {
                         warnx("sched params \"max-rate\" is less than "
 			    "\"min-rate\"");
                         errs++;
                 }
 	}
 
 	if (errs > 0) {
 		warnx("%d error%s in sched-class command", errs,
 		    errs == 1 ? "" : "s");
 		return (EINVAL);
 	}
 
 	return doit(CHELSIO_T4_SCHED_CLASS, &op);
 }
 
 static int
 sched_queue(int argc, const char *argv[])
 {
 	struct t4_sched_queue op = {0};
 	char *p;
 	long val;
 
 	if (argc != 3) {
 		/* need "<port> <queue> <class> */
 		warnx("incorrect number of arguments.");
 		return (EINVAL);
 	}
 
 	p = str_to_number(argv[0], &val, NULL);
 	if (*p || val > UCHAR_MAX) {
 		warnx("invalid port id \"%s\"", argv[0]);
 		return (EINVAL);
 	}
 	op.port = (uint8_t)val;
 
 	if (!strcmp(argv[1], "all") || !strcmp(argv[1], "*"))
 		op.queue = -1;
 	else {
 		p = str_to_number(argv[1], &val, NULL);
 		if (*p || val < -1) {
 			warnx("invalid queue \"%s\"", argv[1]);
 			return (EINVAL);
 		}
 		op.queue = (int8_t)val;
 	}
 
 	if (!strcmp(argv[2], "unbind") || !strcmp(argv[2], "clear"))
 		op.cl = -1;
 	else {
 		p = str_to_number(argv[2], &val, NULL);
 		if (*p || val < -1) {
 			warnx("invalid class \"%s\"", argv[2]);
 			return (EINVAL);
 		}
 		op.cl = (int8_t)val;
 	}
 
 	return doit(CHELSIO_T4_SCHED_QUEUE, &op);
 }
 
 static int
 run_cmd(int argc, const char *argv[])
 {
 	int rc = -1;
 	const char *cmd = argv[0];
 
 	/* command */
 	argc--;
 	argv++;
 
 	if (!strcmp(cmd, "reg") || !strcmp(cmd, "reg32"))
 		rc = register_io(argc, argv, 4);
 	else if (!strcmp(cmd, "reg64"))
 		rc = register_io(argc, argv, 8);
 	else if (!strcmp(cmd, "regdump"))
 		rc = dump_regs(argc, argv);
 	else if (!strcmp(cmd, "filter"))
 		rc = filter_cmd(argc, argv);
 	else if (!strcmp(cmd, "context"))
 		rc = get_sge_context(argc, argv);
 	else if (!strcmp(cmd, "loadfw"))
 		rc = loadfw(argc, argv);
 	else if (!strcmp(cmd, "memdump"))
 		rc = memdump(argc, argv);
 	else if (!strcmp(cmd, "tcb"))
 		rc = read_tcb(argc, argv);
 	else if (!strcmp(cmd, "i2c"))
 		rc = read_i2c(argc, argv);
 	else if (!strcmp(cmd, "clearstats"))
 		rc = clearstats(argc, argv);
 	else if (!strcmp(cmd, "tracer"))
 		rc = tracer_cmd(argc, argv);
 	else if (!strcmp(cmd, "modinfo"))
 		rc = modinfo(argc, argv);
 	else if (!strcmp(cmd, "sched-class"))
 		rc = sched_class(argc, argv);
 	else if (!strcmp(cmd, "sched-queue"))
 		rc = sched_queue(argc, argv);
 	else {
 		rc = EINVAL;
 		warnx("invalid command \"%s\"", cmd);
 	}
 
 	return (rc);
 }
 
 #define MAX_ARGS 15
 static int
 run_cmd_loop(void)
 {
 	int i, rc = 0;
 	char buffer[128], *buf;
 	const char *args[MAX_ARGS + 1];
 
 	/*
 	 * Simple loop: displays a "> " prompt and processes any input as a
 	 * cxgbetool command.  You're supposed to enter only the part after
 	 * "cxgbetool t4nexX".  Use "quit" or "exit" to exit.
 	 */
 	for (;;) {
 		fprintf(stdout, "> ");
 		fflush(stdout);
 		buf = fgets(buffer, sizeof(buffer), stdin);
 		if (buf == NULL) {
 			if (ferror(stdin)) {
 				warn("stdin error");
 				rc = errno;	/* errno from fgets */
 			}
 			break;
 		}
 
 		i = 0;
 		while ((args[i] = strsep(&buf, " \t\n")) != NULL) {
 			if (args[i][0] != 0 && ++i == MAX_ARGS)
 				break;
 		}
 		args[i] = 0;
 
 		if (i == 0)
 			continue;	/* skip empty line */
 
 		if (!strcmp(args[0], "quit") || !strcmp(args[0], "exit"))
 			break;
 
 		rc = run_cmd(i, args);
 	}
 
 	/* rc normally comes from the last command (not including quit/exit) */
 	return (rc);
 }
 
 int
 main(int argc, const char *argv[])
 {
 	int rc = -1;
 
 	progname = argv[0];
 
 	if (argc == 2) {
 		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
 			usage(stdout);
 			exit(0);
 		}
 	}
 
 	if (argc < 3) {
 		usage(stderr);
 		exit(EINVAL);
 	}
 
 	nexus = argv[1];
 
 	/* progname and nexus */
 	argc -= 2;
 	argv += 2;
 
 	if (argc == 1 && !strcmp(argv[0], "stdio"))
 		rc = run_cmd_loop();
 	else
 		rc = run_cmd(argc, argv);
 
 	return (rc);
 }
Index: projects/vnet/usr.bin/indent/lexi.c
===================================================================
--- projects/vnet/usr.bin/indent/lexi.c	(revision 301522)
+++ projects/vnet/usr.bin/indent/lexi.c	(revision 301523)
@@ -1,608 +1,608 @@
 /*
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Here we have the token scanner for indent.  It scans off one token and puts
  * it in the global variable "token".  It returns a code, indicating the type
  * of token scanned.
  */
 
 #include <err.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 #define alphanum 1
 #define opchar 3
 
 struct templ {
     const char *rwd;
     int         rwcode;
 };
 
 struct templ specials[1000] =
 {
     {"switch", 1},
     {"case", 2},
     {"break", 0},
     {"struct", 3},
     {"union", 3},
     {"enum", 3},
     {"default", 2},
     {"int", 4},
     {"char", 4},
     {"float", 4},
     {"double", 4},
     {"long", 4},
     {"short", 4},
-    {"typdef", 4},
+    {"typedef", 4},
     {"unsigned", 4},
     {"register", 4},
     {"static", 4},
     {"global", 4},
     {"extern", 4},
     {"void", 4},
     {"const", 4},
     {"volatile", 4},
     {"goto", 0},
     {"return", 0},
     {"if", 5},
     {"while", 5},
     {"for", 5},
     {"else", 6},
     {"do", 6},
     {"sizeof", 7},
     {0, 0}
 };
 
 char        chartype[128] =
 {				/* this is used to facilitate the decision of
 				 * what type (alphanumeric, operator) each
 				 * character is */
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 3, 0, 0, 1, 3, 3, 0,
     0, 0, 3, 3, 0, 3, 0, 3,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 0, 0, 3, 3, 3, 3,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 3, 1,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 3, 0, 3, 0
 };
 
 int
 lexi(void)
 {
     int         unary_delim;	/* this is set to 1 if the current token
 				 * forces a following operator to be unary */
     static int  last_code;	/* the last token type returned */
     static int  l_struct;	/* set to 1 if the last token was 'struct' */
     int         code;		/* internal code to be returned */
     char        qchar;		/* the delimiter character for a string */
 
     e_token = s_token;		/* point to start of place to save token */
     unary_delim = false;
     ps.col_1 = ps.last_nl;	/* tell world that this token started in
 				 * column 1 iff the last thing scanned was nl */
     ps.last_nl = false;
 
     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	ps.col_1 = false;	/* leading blanks imply token is not in column
 				 * 1 */
 	if (++buf_ptr >= buf_end)
 	    fill_buffer();
     }
 
     /* Scan an alphanumeric token */
     if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	/*
 	 * we have a character or number
 	 */
 	const char *j;		/* used for searching thru list of
 				 *
 				 * reserved words */
 	struct templ *p;
 
 	if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
 	    if (*buf_ptr == '0' &&
 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
 		*e_token++ = *buf_ptr++;
 		*e_token++ = *buf_ptr++;
 		while (isxdigit(*buf_ptr)) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		}
 	    }
 	    else
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
 			    break;
 			else
 			    seendot++;
 		    }
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
 			    break;
 			else {
 			    seenexp++;
 			    seendot++;
 			    CHECK_SIZE_TOKEN;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}
 	    while (1) {
 		if (!(seensfx & 1) &&
 			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
         	if (!(seensfx & 2) &&
 			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
 		    CHECK_SIZE_TOKEN;
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 2;
 		    continue;
 		}
 		break;
 	    }
 	}
 	else
 	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
 		/* fill_buffer() terminates buffer with newline */
 		if (*buf_ptr == BACKSLASH) {
 		    if (*(buf_ptr + 1) == '\n') {
 			buf_ptr += 2;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 			} else
 			    break;
 		}
 		CHECK_SIZE_TOKEN;
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
 	*e_token++ = '\0';
 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	ps.its_a_keyword = false;
 	ps.sizeof_keyword = false;
 	if (l_struct && !ps.p_l_follow) {
 				/* if last token was 'struct' and we're not
 				 * in parentheses, then this token
 				 * should be treated as a declaration */
 	    l_struct = false;
 	    last_code = ident;
 	    ps.last_u_d = true;
 	    return (decl);
 	}
 	ps.last_u_d = l_struct;	/* Operator after identifier is binary
 				 * unless last token was 'struct' */
 	l_struct = false;
 	last_code = ident;	/* Remember that this is the code we will
 				 * return */
 
 	if (auto_typedefs) {
 	    const char *q = s_token;
 	    size_t q_len = strlen(q);
 	    /* Check if we have an "_t" in the end */
 	    if (q_len > 2 &&
 	        (strcmp(q + q_len - 2, "_t") == 0)) {
 	        ps.its_a_keyword = true;
 		ps.last_u_d = true;
 	        goto found_auto_typedef;
 	    }
 	}
 
 	/*
 	 * This loop will check if the token is a keyword.
 	 */
 	for (p = specials; (j = p->rwd) != 0; p++) {
 	    const char *q = s_token;	/* point at scanned token */
 	    if (*j++ != *q++ || *j++ != *q++)
 		continue;	/* This test depends on the fact that
 				 * identifiers are always at least 1 character
 				 * long (ie. the first two bytes of the
 				 * identifier are always meaningful) */
 	    if (q[-1] == 0)
 		break;		/* If its a one-character identifier */
 	    while (*q++ == *j)
 		if (*j++ == 0)
 		    goto found_keyword;	/* I wish that C had a multi-level
 					 * break... */
 	}
 	if (p->rwd) {		/* we have a keyword */
     found_keyword:
 	    ps.its_a_keyword = true;
 	    ps.last_u_d = true;
 	    switch (p->rwcode) {
 	    case 1:		/* it is a switch */
 		return (swstmt);
 	    case 2:		/* a case or default */
 		return (casestmt);
 
 	    case 3:		/* a "struct" */
 		/*
 		 * Next time around, we will want to know that we have had a
 		 * 'struct'
 		 */
 		l_struct = true;
 		/* FALLTHROUGH */
 
 	    case 4:		/* one of the declaration keywords */
 	    found_auto_typedef:
 		if (ps.p_l_follow) {
 		    ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
 		    break;	/* inside parens: cast, param list or sizeof */
 		}
 		last_code = decl;
 		return (decl);
 
 	    case 5:		/* if, while, for */
 		return (sp_paren);
 
 	    case 6:		/* do, else */
 		return (sp_nparen);
 
 	    case 7:
 		ps.sizeof_keyword = true;
 	    default:		/* all others are treated like any other
 				 * identifier */
 		return (ident);
 	    }			/* end of switch */
 	}			/* end of if (found_it) */
 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
 	    char *tp = buf_ptr;
 	    while (tp < buf_end)
 		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
 		    goto not_proc;
 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
 	    ps.in_parameter_declaration = 1;
 	    rparen_count = 1;
     not_proc:;
 	}
 	/*
 	 * The following hack attempts to guess whether or not the current
 	 * token is in fact a declaration keyword -- one that has been
 	 * typedefd
 	 */
 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
 		&& !ps.p_l_follow
 	        && !ps.block_init
 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
 		    ps.last_token == decl ||
 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
 	    ps.its_a_keyword = true;
 	    ps.last_u_d = true;
 	    last_code = decl;
 	    return decl;
 	}
 	if (last_code == decl)	/* if this is a declared variable, then
 				 * following sign is unary */
 	    ps.last_u_d = true;	/* will make "int a -1" work */
 	last_code = ident;
 	return (ident);		/* the ident is not in the list */
     }				/* end of procesing for alpanum character */
 
     /* Scan a non-alphanumeric token */
 
     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
     *e_token = '\0';
     if (++buf_ptr >= buf_end)
 	fill_buffer();
 
     switch (*token) {
     case '\n':
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember that we just had a newline */
 	code = (had_eof ? 0 : newline);
 
 	/*
 	 * if data has been exhausted, the newline is a dummy, and we should
 	 * return code to stop
 	 */
 	break;
 
     case '\'':			/* start of quoted character */
     case '"':			/* start of string */
 	qchar = *token;
 	if (troff) {
 	    e_token[-1] = '`';
 	    if (qchar == '"')
 		*e_token++ = '`';
 	    e_token = chfont(&bodyf, &stringf, e_token);
 	}
 	do {			/* copy the string */
 	    while (1) {		/* move one character or [/<char>]<char> */
 		if (*buf_ptr == '\n') {
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
 		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
 					 * since CHECK_SIZE guarantees that there
 					 * are at least 5 entries left */
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
 		    if (*buf_ptr == '\n')	/* check for escaped newline */
 			++line_no;
 		    if (troff) {
 			*++e_token = BACKSLASH;
 			if (*buf_ptr == BACKSLASH)
 			    *++e_token = BACKSLASH;
 		    }
 		    *++e_token = *buf_ptr++;
 		    ++e_token;	/* we must increment this again because we
 				 * copied two chars */
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		else
 		    break;	/* we copied one character */
 	    }			/* end of while (1) */
 	} while (*e_token++ != qchar);
 	if (troff) {
 	    e_token = chfont(&stringf, &bodyf, e_token - 1);
 	    if (qchar == '"')
 		*e_token++ = '\'';
 	}
 stop_lit:
 	code = ident;
 	break;
 
     case ('('):
     case ('['):
 	unary_delim = true;
 	code = lparen;
 	break;
 
     case (')'):
     case (']'):
 	code = rparen;
 	break;
 
     case '#':
 	unary_delim = ps.last_u_d;
 	code = preesc;
 	break;
 
     case '?':
 	unary_delim = true;
 	code = question;
 	break;
 
     case (':'):
 	code = colon;
 	unary_delim = true;
 	break;
 
     case (';'):
 	unary_delim = true;
 	code = semicolon;
 	break;
 
     case ('{'):
 	unary_delim = true;
 
 	/*
 	 * if (ps.in_or_st) ps.block_init = 1;
 	 */
 	/* ?	code = ps.block_init ? lparen : lbrace; */
 	code = lbrace;
 	break;
 
     case ('}'):
 	unary_delim = true;
 	/* ?	code = ps.block_init ? rparen : rbrace; */
 	code = rbrace;
 	break;
 
     case 014:			/* a form feed */
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
 				 * right */
 	code = form_feed;
 	break;
 
     case (','):
 	unary_delim = true;
 	code = comma;
 	break;
 
     case '.':
 	unary_delim = false;
 	code = period;
 	break;
 
     case '-':
     case '+':			/* check for -, +, --, ++ */
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 	if (*buf_ptr == token[0]) {
 	    /* check for doubled character */
 	    *e_token++ = *buf_ptr++;
 	    /* buffer overflow will be checked at end of loop */
 	    if (last_code == ident || last_code == rparen) {
 		code = (ps.last_u_d ? unary_op : postop);
 		/* check for following ++ or -- */
 		unary_delim = false;
 	    }
 	}
 	else if (*buf_ptr == '=')
 	    /* check for operator += */
 	    *e_token++ = *buf_ptr++;
 	else if (*buf_ptr == '>') {
 	    /* check for operator -> */
 	    *e_token++ = *buf_ptr++;
 	    if (!pointer_as_binop) {
 		unary_delim = false;
 		code = unary_op;
 		ps.want_blank = false;
 	    }
 	}
 	break;			/* buffer overflow will be checked at end of
 				 * switch */
 
     case '=':
 	if (ps.in_or_st)
 	    ps.block_init = 1;
 #ifdef undef
 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
 	    e_token[-1] = *buf_ptr++;
 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
 		*e_token++ = *buf_ptr++;
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    *e_token = 0;
 	}
 #else
 	if (*buf_ptr == '=') {/* == */
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    buf_ptr++;
 	    *e_token = 0;
 	}
 #endif
 	code = binary_op;
 	unary_delim = true;
 	break;
 	/* can drop thru!!! */
 
     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	if (*buf_ptr == '=')
 	    *e_token++ = *buf_ptr++;
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 	break;
 
     default:
 	if (token[0] == '/' && *buf_ptr == '*') {
 	    /* it is start of comment */
 	    *e_token++ = '*';
 
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 
 	    code = comment;
 	    unary_delim = ps.last_u_d;
 	    break;
 	}
 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 
     }				/* end of switch */
     if (code != newline) {
 	l_struct = false;
 	last_code = code;
     }
     if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
     ps.last_u_d = unary_delim;
     *e_token = '\0';		/* null terminate the token */
     return (code);
 }
 
 /*
  * Add the given keyword to the keyword table, using val as the keyword type
  */
 void
 addkey(char *key, int val)
 {
     struct templ *p = specials;
     while (p->rwd)
 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
 	    return;
 	else
 	    p++;
     if (p >= specials + sizeof specials / sizeof specials[0])
 	return;			/* For now, table overflows are silently
 				 * ignored */
     p->rwd = key;
     p->rwcode = val;
     p[1].rwd = 0;
     p[1].rwcode = 0;
 }
Index: projects/vnet
===================================================================
--- projects/vnet	(revision 301522)
+++ projects/vnet	(revision 301523)

Property changes on: projects/vnet
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r301509-301522