Index: projects/vnet/etc/defaults/rc.conf =================================================================== --- projects/vnet/etc/defaults/rc.conf (revision 301522) +++ projects/vnet/etc/defaults/rc.conf (revision 301523) @@ -1,743 +1,743 @@ #!/bin/sh # This is rc.conf - a file full of useful variables that you can set # to change the default startup behavior of your system. You should # not edit this file! Put any overrides into one of the ${rc_conf_files} # instead and you will be able to update these defaults later without # spamming your local configuration information. # # The ${rc_conf_files} files should only contain values which override # values set in this file. This eases the upgrade path when defaults # are changed and new features are added. # # All arguments must be in double or single quotes. # # For a more detailed explanation of all the rc.conf variables, please # refer to the rc.conf(5) manual page. # # $FreeBSD$ ############################################################## ### Important initial Boot-time options #################### ############################################################## #rc_debug="NO" # Set to YES to enable debugging output from rc.d rc_info="NO" # Enables display of informational messages at boot. rc_startmsgs="YES" # Show "Starting foo:" messages at boot rcshutdown_timeout="90" # Seconds to wait before terminating rc.shutdown early_late_divider="FILESYSTEMS" # Script that separates early/late # stages of the boot process. Make sure you know # the ramifications if you change this. # See rc.conf(5) for more details. always_force_depends="NO" # Set to check that indicated dependencies are # running during boot (can increase boot time). apm_enable="NO" # Set to YES to enable APM BIOS functions (or NO). apmd_enable="NO" # Run apmd to handle APM event from userland. apmd_flags="" # Flags to apmd (if enabled). ddb_enable="NO" # Set to YES to load ddb scripts at boot. ddb_config="/etc/ddb.conf" # ddb(8) config file. devd_enable="YES" # Run devd, to trigger programs on device tree changes. devd_flags="" # Additional flags for devd(8). #kld_list="" # Kernel modules to load after local disks are mounted kldxref_enable="NO" # Build linker.hints files with kldxref(8). kldxref_clobber="NO" # Overwrite old linker.hints at boot. kldxref_module_path="" # Override kern.module_path. A ';'-delimited list. powerd_enable="NO" # Run powerd to lower our power usage. powerd_flags="" # Flags to powerd (if enabled). tmpmfs="AUTO" # Set to YES to always create an mfs /tmp, NO to never tmpsize="20m" # Size of mfs /tmp if created tmpmfs_flags="-S" # Extra mdmfs options for the mfs /tmp varmfs="AUTO" # Set to YES to always create an mfs /var, NO to never varsize="32m" # Size of mfs /var if created varmfs_flags="-S" # Extra mount options for the mfs /var populate_var="AUTO" # Set to YES to always (re)populate /var, NO to never cleanvar_enable="YES" # Clean the /var directory local_startup="/usr/local/etc/rc.d" # startup script dirs. script_name_sep=" " # Change if your startup scripts' names contain spaces rc_conf_files="/etc/rc.conf /etc/rc.conf.local" # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems # ZFSD support zfsd_enable="NO" # Set to YES to automatically start the ZFS fault # management daemon. gptboot_enable="YES" # GPT boot success/failure reporting. # Experimental - test before enabling gbde_autoattach_all="NO" # YES automatically mounts gbde devices from fstab gbde_devices="NO" # Devices to automatically attach (list, or AUTO) gbde_attach_attempts="3" # Number of times to attempt attaching gbde devices gbde_lockdir="/etc" # Where to look for gbde lockfiles # GELI disk encryption configuration. geli_devices="" # List of devices to automatically attach in addition to # GELI devices listed in /etc/fstab. geli_tries="" # Number of times to attempt attaching geli device. # If empty, kern.geom.eli.tries will be used. geli_default_flags="" # Default flags for geli(8). geli_autodetach="YES" # Automatically detach on last close. # Providers are marked as such when all file systems are # mounted. # Example use. #geli_devices="da1 mirror/home" #geli_da1_flags="-p -k /etc/geli/da1.keys" #geli_da1_autodetach="NO" #geli_mirror_home_flags="-k /etc/geli/home.keys" root_rw_mount="YES" # Set to NO to inhibit remounting root read-write. root_hold_delay="30" # Time to wait for root mount hold release. fsck_y_enable="NO" # Set to YES to do fsck -y if the initial preen fails. fsck_y_flags="" # Additional flags for fsck -y background_fsck="YES" # Attempt to run fsck in the background where possible. background_fsck_delay="60" # Time to wait (seconds) before starting the fsck. netfs_types="nfs:NFS smbfs:SMB" # Net filesystems. extra_netfs_types="NO" # List of network extra filesystem types for delayed # mount at startup (or NO). ############################################################## ### Network configuration sub-section ###################### ############################################################## ### Basic network and firewall/security options: ### hostname="" # Set this! hostid_enable="YES" # Set host UUID. hostid_file="/etc/hostid" # File with hostuuid. nisdomainname="NO" # Set to NIS domain if using NIS (or NO). dhclient_program="/sbin/dhclient" # Path to dhcp client program. dhclient_flags="" # Extra flags to pass to dhcp client. #dhclient_flags_fxp0="" # Extra dhclient flags for fxp0 only background_dhclient="NO" # Start dhcp client in the background. #background_dhclient_fxp0="YES" # Start dhcp client on fxp0 in the background. synchronous_dhclient="NO" # Start dhclient directly on configured # interfaces during startup. defaultroute_delay="30" # Time to wait for a default route on a DHCP interface. defaultroute_carrier_delay="5" # Time to wait for carrier while waiting for a default route. netif_enable="YES" # Set to YES to initialize network interfaces netif_ipexpand_max="2048" # Maximum number of IP addrs in a range spec. wpa_supplicant_program="/usr/sbin/wpa_supplicant" wpa_supplicant_flags="-s" # Extra flags to pass to wpa_supplicant wpa_supplicant_conf_file="/etc/wpa_supplicant.conf" # firewall_enable="NO" # Set to YES to enable firewall functionality firewall_script="/etc/rc.firewall" # Which script to run to set up the firewall firewall_type="UNKNOWN" # Firewall type (see /etc/rc.firewall) firewall_quiet="NO" # Set to YES to suppress rule display firewall_logging="NO" # Set to YES to enable events logging firewall_logif="NO" # Set to YES to create logging-pseudo interface firewall_flags="" # Flags passed to ipfw when type is a file firewall_coscripts="" # List of executables/scripts to run after # firewall starts/stops firewall_client_net="192.0.2.0/24" # IPv4 Network address for "client" # firewall. #firewall_client_net_ipv6="2001:db8:2:1::/64" # IPv6 network prefix for # "client" firewall. firewall_simple_iif="ed1" # Inside network interface for "simple" # firewall. firewall_simple_inet="192.0.2.16/28" # Inside network address for "simple" # firewall. firewall_simple_oif="ed0" # Outside network interface for "simple" # firewall. firewall_simple_onet="192.0.2.0/28" # Outside network address for "simple" # firewall. #firewall_simple_iif_ipv6="ed1" # Inside IPv6 network interface for "simple" # firewall. #firewall_simple_inet_ipv6="2001:db8:2:800::/56" # Inside IPv6 network prefix # for "simple" firewall. #firewall_simple_oif_ipv6="ed0" # Outside IPv6 network interface for "simple" # firewall. #firewall_simple_onet_ipv6="2001:db8:2:0::/56" # Outside IPv6 network prefix # for "simple" firewall. firewall_myservices="" # List of TCP ports on which this host # offers services for "workstation" firewall. firewall_allowservices="" # List of IPs which have access to # $firewall_myservices for "workstation" # firewall. firewall_trusted="" # List of IPs which have full access to this # host for "workstation" firewall. firewall_logdeny="NO" # Set to YES to log default denied incoming # packets for "workstation" firewall. firewall_nologports="135-139,445 1026,1027 1433,1434" # List of TCP/UDP ports # for which denied incoming packets are not # logged for "workstation" firewall. firewall_nat_enable="NO" # Enable kernel NAT (if firewall_enable == YES) firewall_nat_interface="" # Public interface or IPaddress to use firewall_nat_flags="" # Additional configuration parameters dummynet_enable="NO" # Load the dummynet(4) module ip_portrange_first="NO" # Set first dynamically allocated port ip_portrange_last="NO" # Set last dynamically allocated port ike_enable="NO" # Enable IKE daemon (usually racoon or isakmpd) ike_program="/usr/local/sbin/isakmpd" # Path to IKE daemon ike_flags="" # Additional flags for IKE daemon ipsec_enable="NO" # Set to YES to run setkey on ipsec_file ipsec_file="/etc/ipsec.conf" # Name of config file for setkey natd_program="/sbin/natd" # path to natd, if you want a different one. natd_enable="NO" # Enable natd (if firewall_enable == YES). natd_interface="" # Public interface or IPaddress to use. natd_flags="" # Additional flags for natd. ipfilter_enable="NO" # Set to YES to enable ipfilter functionality ipfilter_program="/sbin/ipf" # where the ipfilter program lives ipfilter_rules="/etc/ipf.rules" # rules definition file for ipfilter, see # /usr/src/contrib/ipfilter/rules for examples ipfilter_flags="" # additional flags for ipfilter ipnat_enable="NO" # Set to YES to enable ipnat functionality ipnat_program="/sbin/ipnat" # where the ipnat program lives ipnat_rules="/etc/ipnat.rules" # rules definition file for ipnat ipnat_flags="" # additional flags for ipnat ipmon_enable="NO" # Set to YES for ipmon; needs ipfilter or ipnat ipmon_program="/sbin/ipmon" # where the ipfilter monitor program lives ipmon_flags="-Ds" # typically "-Ds" or "-D /var/log/ipflog" ipfs_enable="NO" # Set to YES to enable saving and restoring # of state tables at shutdown and boot ipfs_program="/sbin/ipfs" # where the ipfs program lives ipfs_flags="" # additional flags for ipfs pf_enable="NO" # Set to YES to enable packet filter (pf) pf_rules="/etc/pf.conf" # rules definition file for pf pf_program="/sbin/pfctl" # where the pfctl program lives pf_flags="" # additional flags for pfctl pflog_enable="NO" # Set to YES to enable packet filter logging pflog_logfile="/var/log/pflog" # where pflogd should store the logfile pflog_program="/sbin/pflogd" # where the pflogd program lives pflog_flags="" # additional flags for pflogd ftpproxy_enable="NO" # Set to YES to enable ftp-proxy(8) for pf ftpproxy_flags="" # additional flags for ftp-proxy(8) pfsync_enable="NO" # Expose pf state to other hosts for syncing pfsync_syncdev="" # Interface for pfsync to work through pfsync_syncpeer="" # IP address of pfsync peer host pfsync_ifconfig="" # Additional options to ifconfig(8) for pfsync tcp_extensions="YES" # Set to NO to turn off RFC1323 extensions. log_in_vain="0" # >=1 to log connects to ports w/o listeners. tcp_keepalive="YES" # Enable stale TCP connection timeout (or NO). tcp_drop_synfin="NO" # Set to YES to drop TCP packets with SYN+FIN # NOTE: this violates the TCP specification icmp_drop_redirect="NO" # Set to YES to ignore ICMP REDIRECT packets icmp_log_redirect="NO" # Set to YES to log ICMP REDIRECT packets network_interfaces="auto" # List of network interfaces (or "auto"). cloned_interfaces="" # List of cloned network interfaces to create. #cloned_interfaces="gif0 gif1 gif2 gif3" # Pre-cloning GENERIC config. #ifconfig_lo0="inet 127.0.0.1" # default loopback device configuration. #ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry. #ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" # Sample IPv6 addr entry #ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" # Sample IPv6 alias #ifconfig_fxp0_name="net0" # Change interface name from fxp0 to net0. #vlans_fxp0="101 vlan0" # vlan(4) interfaces for fxp0 device #create_args_vlan0="vlan 102" # vlan tag for vlan0 device #wlans_ath0="wlan0" # wlan(4) interfaces for ath0 device #wlandebug_wlan0="scan+auth+assoc" # Set debug flags with wlandebug(8) #ipv4_addrs_fxp0="192.168.0.1/24 192.168.1.1-5/28" # example IPv4 address entry. # #autobridge_interfaces="bridge0" # List of bridges to check #autobridge_bridge0="tap* vlan0" # Interface glob to automatically add to the bridge # # If you have any sppp(4) interfaces above, you might also want to set # the following parameters. Refer to spppcontrol(8) for their meaning. sppp_interfaces="" # List of sppp interfaces. #sppp_interfaces="...0" # example: sppp over ... #spppconfig_...0="authproto=chap myauthname=foo myauthsecret='top secret' hisauthname=some-gw hisauthsecret='another secret'" # User ppp configuration. ppp_enable="NO" # Start user-ppp (or NO). ppp_program="/usr/sbin/ppp" # Path to user-ppp program. ppp_mode="auto" # Choice of "auto", "ddial", "direct" or "dedicated". # For details see man page for ppp(8). Default is auto. ppp_nat="YES" # Use PPP's internal network address translation or NO. ppp_profile="papchap" # Which profile to use from /etc/ppp/ppp.conf. ppp_user="root" # Which user to run ppp as # Start multiple instances of ppp at boot time #ppp_profile="profile1 profile2 profile3" # Which profiles to use #ppp_profile1_mode="ddial" # Override ppp mode for profile1 #ppp_profile2_nat="NO" # Override nat mode for profile2 # profile3 uses default ppp_mode and ppp_nat ### Network daemon (miscellaneous) ### hostapd_enable="NO" # Run hostap daemon. syslogd_enable="YES" # Run syslog daemon (or NO). syslogd_program="/usr/sbin/syslogd" # path to syslogd, if you want a different one. syslogd_flags="-s" # Flags to syslogd (if enabled). syslogd_oomprotect="YES" # Don't kill syslogd when swap space is exhausted. altlog_proglist="" # List of chrooted applicatioins in /var inetd_enable="NO" # Run the network daemon dispatcher (YES/NO). inetd_program="/usr/sbin/inetd" # path to inetd, if you want a different one. inetd_flags="-wW -C 60" # Optional flags to inetd iscsid_enable="NO" # iSCSI initiator daemon. iscsictl_enable="NO" # iSCSI initiator autostart. iscsictl_flags="-Aa" # Optional flags to iscsictl. hastd_enable="NO" # Run the HAST daemon (YES/NO). hastd_program="/sbin/hastd" # path to hastd, if you want a different one. hastd_flags="" # Optional flags to hastd. ctld_enable="NO" # CAM Target Layer / iSCSI target daemon. local_unbound_enable="NO" # local caching resolver -blacklistd_enable="YES" # Run blacklistd daemon (YES/NO). +blacklistd_enable="NO" # Run blacklistd daemon (YES/NO). blacklistd_flags="" # Optional flags for blacklistd(8). # # kerberos. Do not run the admin daemons on slave servers # kdc_enable="NO" # Run a kerberos 5 KDC (or NO). kdc_program="/usr/libexec/kdc" # path to kerberos 5 KDC kdc_flags="" # Additional flags to the kerberos 5 KDC kadmind_enable="NO" # Run kadmind (or NO) kadmind_program="/usr/libexec/kadmind" # path to kadmind kpasswdd_enable="NO" # Run kpasswdd (or NO) kpasswdd_program="/usr/libexec/kpasswdd" # path to kpasswdd kfd_enable="NO" # Run kfd (or NO) kfd_program="/usr/libexec/kfd" # path to kerberos 5 kfd daemon kfd_flags="" ipropd_master_enable="NO" # Run Heimdal incremental propagation daemon # (master daemon). ipropd_master_program="/usr/libexec/ipropd-master" ipropd_master_flags="" # Flags to ipropd-master. ipropd_master_keytab="/etc/krb5.keytab" # keytab for ipropd-master. ipropd_master_slaves="" # slave node names used for /var/heimdal/slaves. ipropd_slave_enable="NO" # Run Heimdal incremental propagation daemon # (slave daemon). ipropd_slave_program="/usr/libexec/ipropd-slave" ipropd_slave_flags="" # Flags to ipropd-slave. ipropd_slave_keytab="/etc/krb5.keytab" # keytab for ipropd-slave. ipropd_slave_master="" # master node name. gssd_enable="NO" # Run the gssd daemon (or NO). gssd_program="/usr/sbin/gssd" # Path to gssd. gssd_flags="" # Flags for gssd. rwhod_enable="NO" # Run the rwho daemon (or NO). rwhod_flags="" # Flags for rwhod rarpd_enable="NO" # Run rarpd (or NO). rarpd_flags="-a" # Flags to rarpd. bootparamd_enable="NO" # Run bootparamd (or NO). bootparamd_flags="" # Flags to bootparamd pppoed_enable="NO" # Run the PPP over Ethernet daemon. pppoed_provider="*" # Provider and ppp(8) config file entry. pppoed_flags="-P /var/run/pppoed.pid" # Flags to pppoed (if enabled). pppoed_interface="fxp0" # The interface that pppoed runs on. sshd_enable="NO" # Enable sshd sshd_program="/usr/sbin/sshd" # path to sshd, if you want a different one. sshd_flags="" # Additional flags for sshd. ftpd_enable="NO" # Enable stand-alone ftpd. ftpd_program="/usr/libexec/ftpd" # Path to ftpd, if you want a different one. ftpd_flags="" # Additional flags to stand-alone ftpd. ### Network daemon (NFS): All need rpcbind_enable="YES" ### amd_enable="NO" # Run amd service with $amd_flags (or NO). amd_program="/usr/sbin/amd" # path to amd, if you want a different one. amd_flags="-a /.amd_mnt -l syslog /host /etc/amd.map /net /etc/amd.map" amd_map_program="NO" # Can be set to "ypcat -k amd.master" autofs_enable="NO" # Run autofs daemons. automount_flags="" # Flags to automount(8) (if autofs enabled). automountd_flags="" # Flags to automountd(8) (if autofs enabled). autounmountd_flags="" # Flags to autounmountd(8) (if autofs enabled). nfs_client_enable="NO" # This host is an NFS client (or NO). nfs_access_cache="60" # Client cache timeout in seconds nfs_server_enable="NO" # This host is an NFS server (or NO). nfs_server_flags="-u -t" # Flags to nfsd (if enabled). nfs_server_managegids="NO" # The NFS server maps gids for AUTH_SYS (or NO). mountd_enable="NO" # Run mountd (or NO). mountd_flags="-r -S" # Flags to mountd (if NFS server enabled). weak_mountd_authentication="NO" # Allow non-root mount requests to be served. nfs_reserved_port_only="NO" # Provide NFS only on secure port (or NO). nfs_bufpackets="" # bufspace (in packets) for client rpc_lockd_enable="NO" # Run NFS rpc.lockd needed for client/server. rpc_lockd_flags="" # Flags to rpc.lockd (if enabled). rpc_statd_enable="NO" # Run NFS rpc.statd needed for client/server. rpc_statd_flags="" # Flags to rpc.statd (if enabled). rpcbind_enable="NO" # Run the portmapper service (YES/NO). rpcbind_program="/usr/sbin/rpcbind" # path to rpcbind, if you want a different one. rpcbind_flags="" # Flags to rpcbind (if enabled). rpc_ypupdated_enable="NO" # Run if NIS master and SecureRPC (or NO). keyserv_enable="NO" # Run the SecureRPC keyserver (or NO). keyserv_flags="" # Flags to keyserv (if enabled). nfsv4_server_enable="NO" # Enable support for NFSv4 nfscbd_enable="NO" # NFSv4 client side callback daemon nfscbd_flags="" # Flags for nfscbd nfsuserd_enable="NO" # NFSv4 user/group name mapping daemon nfsuserd_flags="" # Flags for nfsuserd ### Network Time Services options: ### timed_enable="NO" # Run the time daemon (or NO). timed_flags="" # Flags to timed (if enabled). ntpdate_enable="NO" # Run ntpdate to sync time on boot (or NO). ntpdate_program="/usr/sbin/ntpdate" # path to ntpdate, if you want a different one. ntpdate_flags="-b" # Flags to ntpdate (if enabled). ntpdate_config="/etc/ntp.conf" # ntpdate(8) configuration file ntpdate_hosts="" # Whitespace-separated list of ntpdate(8) servers. ntpd_enable="NO" # Run ntpd Network Time Protocol (or NO). ntpd_program="/usr/sbin/ntpd" # path to ntpd, if you want a different one. ntpd_config="/etc/ntp.conf" # ntpd(8) configuration file ntpd_sync_on_start="NO" # Sync time on ntpd startup, even if offset is high ntpd_flags="-p /var/run/ntpd.pid -f /var/db/ntpd.drift" # Flags to ntpd (if enabled). ntp_src_leapfile="/etc/ntp/leap-seconds" # Initial source for ntpd leapfile ntp_db_leapfile="/var/db/ntpd.leap-seconds.list" # Working copy (updated weekly) leapfile ntp_leapfile_sources="https://www.ietf.org/timezones/data/leap-seconds.list" # Source from which to fetch leapfile ntp_leapfile_fetch_opts="-mq" # Options to use for ntp leapfile fetch, # e.g. --no-verify-peer ntp_leapfile_expiry_days=30 # Check for new leapfile 30 days prior to # expiry. ntp_leapfile_fetch_verbose="NO" # Be verbose during NTP leapfile fetch # Network Information Services (NIS) options: All need rpcbind_enable="YES" ### nis_client_enable="NO" # We're an NIS client (or NO). nis_client_flags="" # Flags to ypbind (if enabled). nis_ypset_enable="NO" # Run ypset at boot time (or NO). nis_ypset_flags="" # Flags to ypset (if enabled). nis_server_enable="NO" # We're an NIS server (or NO). nis_server_flags="" # Flags to ypserv (if enabled). nis_ypxfrd_enable="NO" # Run rpc.ypxfrd at boot time (or NO). nis_ypxfrd_flags="" # Flags to rpc.ypxfrd (if enabled). nis_yppasswdd_enable="NO" # Run rpc.yppasswdd at boot time (or NO). nis_yppasswdd_flags="" # Flags to rpc.yppasswdd (if enabled). nis_ypldap_enable="NO" # Run ypldap at boot time (or NO). nis_ypldap_flags="" # Flags to ypldap (if enabled). ### SNMP daemon ### # Be sure to understand the security implications of running SNMP v1/v2 # in your network. bsnmpd_enable="NO" # Run the SNMP daemon (or NO). bsnmpd_flags="" # Flags for bsnmpd. ### Network routing options: ### defaultrouter="NO" # Set to default gateway (or NO). static_arp_pairs="" # Set to static ARP list (or leave empty). static_ndp_pairs="" # Set to static NDP list (or leave empty). static_routes="" # Set to static route list (or leave empty). natm_static_routes="" # Set to static route list for NATM (or leave empty). gateway_enable="NO" # Set to YES if this host will be a gateway. routed_enable="NO" # Set to YES to enable a routing daemon. routed_program="/sbin/routed" # Name of routing daemon to use if enabled. routed_flags="-q" # Flags for routing daemon. arpproxy_all="NO" # replaces obsolete kernel option ARP_PROXYALL. forward_sourceroute="NO" # do source routing (only if gateway_enable is set to "YES") accept_sourceroute="NO" # accept source routed packets to us ### ATM interface options: ### atm_enable="NO" # Configure ATM interfaces (or NO). #atm_netif_hea0="atm 1" # Network interfaces for physical interface. #atm_sigmgr_hea0="uni31" # Signalling manager for physical interface. #atm_prefix_hea0="ILMI" # NSAP prefix (UNI interfaces only) (or ILMI). #atm_macaddr_hea0="NO" # Override physical MAC address (or NO). #atm_arpserver_atm0="0x47.0005.80.999999.9999.9999.9999.999999999999.00" # ATMARP server address (or local). #atm_scsparp_atm0="NO" # Run SCSP/ATMARP on network interface (or NO). atm_pvcs="" # Set to PVC list (or leave empty). atm_arps="" # Set to permanent ARP list (or leave empty). ### Bluetooth ### hcsecd_enable="NO" # Enable hcsecd(8) (or NO) hcsecd_config="/etc/bluetooth/hcsecd.conf" # hcsecd(8) configuration file sdpd_enable="NO" # Enable sdpd(8) (or NO) sdpd_control="/var/run/sdp" # sdpd(8) control socket sdpd_groupname="nobody" # set spdp(8) user/group to run as after sdpd_username="nobody" # it initializes bthidd_enable="NO" # Enable bthidd(8) (or NO) bthidd_config="/etc/bluetooth/bthidd.conf" # bthidd(8) configuration file bthidd_hids="/var/db/bthidd.hids" # bthidd(8) known HID devices file rfcomm_pppd_server_enable="NO" # Enable rfcomm_pppd(8) in server mode (or NO) rfcomm_pppd_server_profile="one two" # Profile to use from /etc/ppp/ppp.conf # #rfcomm_pppd_server_one_bdaddr="" # Override local bdaddr for 'one' rfcomm_pppd_server_one_channel="1" # Override local channel for 'one' #rfcomm_pppd_server_one_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_one_register_dun="NO" # for 'one' # #rfcomm_pppd_server_two_bdaddr="" # Override local bdaddr for 'two' rfcomm_pppd_server_two_channel="3" # Override local channel for 'two' #rfcomm_pppd_server_two_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_two_register_dun="NO" # for 'two' ubthidhci_enable="NO" # Switch an USB BT controller present on #ubthidhci_busnum="3" # bus 3 and addr 2 from HID mode to HCI mode. #ubthidhci_addr="2" # Check usbconfig list to find the correct # numbers for your system. ### Network link/usability verification options netwait_enable="NO" # Enable rc.d/netwait (or NO) #netwait_ip="" # Wait for ping response from any IP in this list. netwait_timeout="60" # Total number of seconds to perform pings. #netwait_if="" # Wait for active link on each intf in this list. netwait_if_timeout="30" # Total number of seconds to monitor link state. ### Miscellaneous network options: ### icmp_bmcastecho="NO" # respond to broadcast ping packets ### IPv6 options: ### ipv6_network_interfaces="auto" # List of IPv6 network interfaces # (or "auto" or "none"). ipv6_activate_all_interfaces="NO" # If NO, interfaces which have no # corresponding $ifconfig_IF_ipv6 is # marked as IFDISABLED for security # reason. ipv6_defaultrouter="NO" # Set to IPv6 default gateway (or NO). #ipv6_defaultrouter="2002:c058:6301::" # Use this for 6to4 (RFC 3068) ipv6_static_routes="" # Set to static route list (or leave empty). #ipv6_static_routes="xxx" # An example to set fec0:0000:0000:0006::/64 # route toward loopback interface. #ipv6_route_xxx="fec0:0000:0000:0006:: -prefixlen 64 ::1" ipv6_gateway_enable="NO" # Set to YES if this host will be a gateway. ipv6_cpe_wanif="NO" # Set to the upstram interface name if this # node will work as a router to forward IPv6 # packets not explicitly addressed to itself. ipv6_privacy="NO" # Use privacy address on RA-receiving IFs # (RFC 4941) route6d_enable="NO" # Set to YES to enable an IPv6 routing daemon. route6d_program="/usr/sbin/route6d" # Name of IPv6 routing daemon. route6d_flags="" # Flags to IPv6 routing daemon. #route6d_flags="-l" # Example for route6d with only IPv6 site local # addrs. #route6d_flags="-q" # If you want to run a routing daemon on an end # node, you should stop advertisement. #ipv6_network_interfaces="ed0 ep0" # Examples for router # or static configuration for end node. # Choose correct prefix value. #ipv6_prefix_ed0="fec0:0000:0000:0001 fec0:0000:0000:0002" # Examples for rtr. #ipv6_prefix_ep0="fec0:0000:0000:0003 fec0:0000:0000:0004" # Examples for rtr. ipv6_default_interface="NO" # Default output interface for scoped addrs. # This works only with # ipv6_gateway_enable="NO". rtsol_flags="" # Flags to IPv6 router solicitation. rtsold_enable="NO" # Set to YES to enable an IPv6 router # solicitation daemon. rtsold_flags="-a" # Flags to an IPv6 router solicitation # daemon. rtadvd_enable="NO" # Set to YES to enable an IPv6 router # advertisement daemon. If set to YES, # this router becomes a possible candidate # IPv6 default router for local subnets. rtadvd_interfaces="" # Interfaces rtadvd sends RA packets. mroute6d_enable="NO" # Do IPv6 multicast routing. mroute6d_program="/usr/local/sbin/pim6dd" # Name of IPv6 multicast # routing daemon. You need to # install it from package or # port. mroute6d_flags="" # Flags to IPv6 multicast routing daemon. stf_interface_ipv4addr="" # Local IPv4 addr for 6to4 IPv6 over IPv4 # tunneling interface. Specify this entry # to enable 6to4 interface. stf_interface_ipv4plen="0" # Prefix length for 6to4 IPv4 addr, # to limit peer addr range. Effective value # is 0-31. stf_interface_ipv6_ifid="0:0:0:1" # IPv6 interface id for stf0. # If you like, you can set "AUTO" for this. stf_interface_ipv6_slaid="0000" # IPv6 Site Level Aggregator for stf0 ipv6_ipv4mapping="NO" # Set to "YES" to enable IPv4 mapped IPv6 addr # communication. (like ::ffff:a.b.c.d) ipv6_ipfilter_rules="/etc/ipf6.rules" # rules definition file for ipfilter, # see /usr/src/contrib/ipfilter/rules # for examples ip6addrctl_enable="YES" # Set to YES to enable default address selection ip6addrctl_verbose="NO" # Set to YES to enable verbose configuration messages ip6addrctl_policy="AUTO" # A pre-defined address selection policy # (ipv4_prefer, ipv6_prefer, or AUTO) ############################################################## ### System console options ################################# ############################################################## keyboard="" # keyboard device to use (default /dev/kbd0). keymap="NO" # keymap in /usr/share/{syscons,vt}/keymaps/* (or NO). keyrate="NO" # keyboard rate to: slow, normal, fast (or NO). keybell="NO" # See kbdcontrol(1) for options. Use "off" to disable. keychange="NO" # function keys default values (or NO). cursor="NO" # cursor type {normal|blink|destructive} (or NO). scrnmap="NO" # screen map in /usr/share/syscons/scrnmaps/* (or NO). font8x16="NO" # font 8x16 from /usr/share/{syscons,vt}/fonts/* (or NO). font8x14="NO" # font 8x14 from /usr/share/{syscons,vt}/fonts/* (or NO). font8x8="NO" # font 8x8 from /usr/share/{syscons,vt}/fonts/* (or NO). blanktime="300" # blank time (in seconds) or "NO" to turn it off. saver="NO" # screen saver: Uses /boot/kernel/${saver}_saver.ko moused_nondefault_enable="YES" # Treat non-default mice as enabled unless # specifically overriden in rc.conf(5). moused_enable="NO" # Run the mouse daemon. moused_type="auto" # See man page for rc.conf(5) for available settings. moused_port="/dev/psm0" # Set to your mouse port. moused_flags="" # Any additional flags to moused. mousechar_start="NO" # if 0xd0-0xd3 default range is occupied in your # language code table, specify alternative range # start like mousechar_start=3, see vidcontrol(1) allscreens_flags="" # Set this vidcontrol mode for all virtual screens allscreens_kbdflags="" # Set this kbdcontrol mode for all virtual screens ############################################################## ### Mail Transfer Agent (MTA) options ###################### ############################################################## mta_start_script="/etc/rc.sendmail" # Script to start your chosen MTA, called by /etc/rc. # Settings for /etc/rc.sendmail and /etc/rc.d/sendmail: sendmail_enable="NO" # Run the sendmail inbound daemon (YES/NO). sendmail_pidfile="/var/run/sendmail.pid" # sendmail pid file sendmail_procname="/usr/sbin/sendmail" # sendmail process name sendmail_flags="-L sm-mta -bd -q30m" # Flags to sendmail (as a server) sendmail_cert_create="YES" # Create a server certificate if none (YES/NO) #sendmail_cert_cn="CN" # CN of the generate certificate sendmail_submit_enable="YES" # Start a localhost-only MTA for mail submission sendmail_submit_flags="-L sm-mta -bd -q30m -ODaemonPortOptions=Addr=localhost" # Flags for localhost-only MTA sendmail_outbound_enable="YES" # Dequeue stuck mail (YES/NO). sendmail_outbound_flags="-L sm-queue -q30m" # Flags to sendmail (outbound only) sendmail_msp_queue_enable="YES" # Dequeue stuck clientmqueue mail (YES/NO). sendmail_msp_queue_flags="-L sm-msp-queue -Ac -q30m" # Flags for sendmail_msp_queue daemon. sendmail_rebuild_aliases="NO" # Run newaliases if necessary (YES/NO). ############################################################## ### Miscellaneous administrative options ################### ############################################################## auditd_enable="NO" # Run the audit daemon. auditd_program="/usr/sbin/auditd" # Path to the audit daemon. auditd_flags="" # Which options to pass to the audit daemon. auditdistd_enable="NO" # Run the audit daemon. auditdistd_program="/usr/sbin/auditdistd" # Path to the auditdistd daemon. auditdistd_flags="" # Which options to pass to the auditdistd daemon. cron_enable="YES" # Run the periodic job daemon. cron_program="/usr/sbin/cron" # Which cron executable to run (if enabled). cron_dst="YES" # Handle DST transitions intelligently (YES/NO) cron_flags="" # Which options to pass to the cron daemon. lpd_enable="NO" # Run the line printer daemon. lpd_program="/usr/sbin/lpd" # path to lpd, if you want a different one. lpd_flags="" # Flags to lpd (if enabled). nscd_enable="NO" # Run the nsswitch caching daemon. chkprintcap_enable="NO" # Run chkprintcap(8) before running lpd. chkprintcap_flags="-d" # Create missing directories by default. dumpdev="AUTO" # Device to crashdump to (device name, AUTO, or NO). dumpdir="/var/crash" # Directory where crash dumps are to be stored savecore_enable="YES" # Extract core from dump devices if any savecore_flags="-m 10" # Used if dumpdev is enabled above, and present. # By default, only the 10 most recent kernel dumps # are saved. crashinfo_enable="YES" # Automatically generate crash dump summary. crashinfo_program="/usr/sbin/crashinfo" # Script to generate crash dump summary. quota_enable="NO" # turn on quotas on startup (or NO). check_quotas="YES" # Check quotas on startup (or NO). quotaon_flags="-a" # Turn quotas on for all file systems (if enabled) quotaoff_flags="-a" # Turn quotas off for all file systems at shutdown quotacheck_flags="-a" # Check all file system quotas (if enabled) accounting_enable="NO" # Turn on process accounting (or NO). ibcs2_enable="NO" # Ibcs2 (SCO) emulation loaded at startup (or NO). ibcs2_loaders="coff" # List of additional Ibcs2 loaders (or NO). firstboot_sentinel="/firstboot" # Scripts with "firstboot" keyword are run if # this file exists. Should be on a R/W filesystem so # the file can be deleted after the boot completes. # Emulation/compatibility services provided by /etc/rc.d/abi sysvipc_enable="NO" # Load System V IPC primitives at startup (or NO). linux_enable="NO" # Linux binary compatibility loaded at startup (or NO). svr4_enable="NO" # SysVR4 emulation loaded at startup (or NO). clear_tmp_enable="NO" # Clear /tmp at startup. clear_tmp_X="YES" # Clear and recreate X11-related directories in /tmp ldconfig_insecure="NO" # Set to YES to disable ldconfig security checks ldconfig_paths="/usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg" # shared library search paths ldconfig32_paths="/usr/lib32 /usr/lib32/compat" # 32-bit compatibility shared library search paths ldconfigsoft_paths="/usr/libsoft /usr/libsoft/compat /usr/local/libsoft" # soft float compatibility shared library search paths # Note: temporarily with extra stuff for transition ldconfig_paths_aout="/usr/lib/compat/aout /usr/local/lib/aout" # a.out shared library search paths ldconfig_local_dirs="/usr/local/libdata/ldconfig" # Local directories with ldconfig configuration files. ldconfig_local32_dirs="/usr/local/libdata/ldconfig32" # Local directories with 32-bit compatibility ldconfig # configuration files. ldconfig_localsoft_dirs="/usr/local/libdata/ldconfigsoft" # Local directories with soft float compatibility ldconfig # configuration files. kern_securelevel_enable="NO" # kernel security level (see security(7)) kern_securelevel="-1" # range: -1..3 ; `-1' is the most insecure # Note that setting securelevel to 0 will result # in the system booting with securelevel set to 1, as # init(8) will raise the level when rc(8) completes. update_motd="YES" # update version info in /etc/motd (or NO) entropy_boot_file="/boot/entropy" # Set to NO to disable very early # (used at early boot time) entropy caching through reboots. entropy_file="/entropy" # Set to NO to disable late (used when going multi-user) # entropy through reboots. # /var/db/entropy-file is preferred if / is not avail. entropy_dir="/var/db/entropy" # Set to NO to disable caching entropy via cron. entropy_save_sz="4096" # Size of the entropy cache files. entropy_save_num="8" # Number of entropy cache files to save. harvest_mask="511" # Entropy device harvests all but the very invasive sources. # (See 'sysctl kern.random.harvest' and random(4)) dmesg_enable="YES" # Save dmesg(8) to /var/run/dmesg.boot watchdogd_enable="NO" # Start the software watchdog daemon watchdogd_flags="" # Flags to watchdogd (if enabled) devfs_rulesets="/etc/defaults/devfs.rules /etc/devfs.rules" # Files containing # devfs(8) rules. devfs_system_ruleset="" # The name (NOT number) of a ruleset to apply to /dev devfs_set_rulesets="" # A list of /mount/dev=ruleset_name settings to # apply (must be mounted already, i.e. fstab(5)) devfs_load_rulesets="YES" # Enable to always load the default rulesets performance_cx_lowest="C2" # Online CPU idle state performance_cpu_freq="NONE" # Online CPU frequency economy_cx_lowest="Cmax" # Offline CPU idle state economy_cpu_freq="NONE" # Offline CPU frequency virecover_enable="YES" # Perform housekeeping for the vi(1) editor ugidfw_enable="NO" # Load mac_bsdextended(4) rules on boot bsdextended_script="/etc/rc.bsdextended" # Default mac_bsdextended(4) # ruleset file. newsyslog_enable="YES" # Run newsyslog at startup. newsyslog_flags="-CN" # Newsyslog flags to create marked files mixer_enable="YES" # Run the sound mixer. opensm_enable="NO" # Opensm(8) for infiniband devices defaults to off # rctl(8) requires kernel options RACCT and RCTL rctl_enable="YES" # Load rctl(8) rules on boot rctl_rules="/etc/rctl.conf" # rctl(8) ruleset. See rctl.conf(5). iovctl_files="" # Config files for iovctl(8) ############################################################## ### Jail Configuration (see rc.conf(5) manual page) ########## ############################################################## jail_enable="NO" # Set to NO to disable starting of any jails jail_parallel_start="NO" # Start jails in the background jail_list="" # Space separated list of names of jails jail_reverse_stop="NO" # Stop jails in reverse order ############################################################## ### Define source_rc_confs, the mechanism used by /etc/rc.* ## ### scripts to source rc_conf_files overrides safely. ## ############################################################## if [ -z "${source_rc_confs_defined}" ]; then source_rc_confs_defined=yes source_rc_confs() { local i sourced_files for i in ${rc_conf_files}; do case ${sourced_files} in *:$i:*) ;; *) sourced_files="${sourced_files}:$i:" if [ -r $i ]; then . $i fi ;; esac done # Re-do process to pick up [possibly] redefined $rc_conf_files for i in ${rc_conf_files}; do case ${sourced_files} in *:$i:*) ;; *) sourced_files="${sourced_files}:$i:" if [ -r $i ]; then . $i fi ;; esac done } fi Index: projects/vnet/libexec/ftpd/blacklist.c =================================================================== --- projects/vnet/libexec/ftpd/blacklist.c (revision 301522) +++ projects/vnet/libexec/ftpd/blacklist.c (revision 301523) @@ -1,55 +1,53 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Kurt Lidl under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include #include #include #include #include "blacklist_client.h" #include static struct blacklist *blstate; void blacklist_init(void) { blstate = blacklist_open(); } void blacklist_notify(int action, int fd, char *msg) { if (blstate == NULL) - blacklist_init(); - if (blstate == NULL) return; (void)blacklist_r(blstate, action, fd, msg); } Index: projects/vnet/libexec/ftpd/ftpd.c =================================================================== --- projects/vnet/libexec/ftpd/ftpd.c (revision 301522) +++ projects/vnet/libexec/ftpd/ftpd.c (revision 301523) @@ -1,3505 +1,3505 @@ /* * Copyright (c) 1985, 1988, 1990, 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char copyright[] = "@(#) Copyright (c) 1985, 1988, 1990, 1992, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #endif #ifndef lint #if 0 static char sccsid[] = "@(#)ftpd.c 8.4 (Berkeley) 4/16/94"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); /* * FTP server. */ #include #include #include #include #include #include #include #include #include #include #include #define FTP_NAMES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef LOGIN_CAP #include #endif #ifdef USE_PAM #include #endif #ifdef USE_BLACKLIST #include "blacklist_client.h" #endif #include "pathnames.h" #include "extern.h" #include static char version[] = "Version 6.00LS"; #undef main union sockunion ctrl_addr; union sockunion data_source; union sockunion data_dest; union sockunion his_addr; union sockunion pasv_addr; int daemon_mode; int data; int dataport; int hostinfo = 1; /* print host-specific info in messages */ int logged_in; struct passwd *pw; char *homedir; int ftpdebug; int timeout = 900; /* timeout after 15 minutes of inactivity */ int maxtimeout = 7200;/* don't allow idle time to be set beyond 2 hours */ int logging; int restricted_data_ports = 1; int paranoid = 1; /* be extra careful about security */ int anon_only = 0; /* Only anonymous ftp allowed */ int assumeutf8 = 0; /* Assume that server file names are in UTF-8 */ int guest; int dochroot; char *chrootdir; int dowtmp = 1; int stats; int statfd = -1; int type; int form; int stru; /* avoid C keyword */ int mode; int usedefault = 1; /* for data transfers */ int pdata = -1; /* for passive mode */ int readonly = 0; /* Server is in readonly mode. */ int noepsv = 0; /* EPSV command is disabled. */ int noretr = 0; /* RETR command is disabled. */ int noguestretr = 0; /* RETR command is disabled for anon users. */ int noguestmkd = 0; /* MKD command is disabled for anon users. */ int noguestmod = 1; /* anon users may not modify existing files. */ off_t file_size; off_t byte_count; #if !defined(CMASK) || CMASK == 0 #undef CMASK #define CMASK 027 #endif int defumask = CMASK; /* default umask value */ char tmpline[7]; char *hostname; int epsvall = 0; #ifdef VIRTUAL_HOSTING char *ftpuser; static struct ftphost { struct ftphost *next; struct addrinfo *hostinfo; char *hostname; char *anonuser; char *statfile; char *welcome; char *loginmsg; } *thishost, *firsthost; #endif char remotehost[NI_MAXHOST]; char *ident = NULL; static char wtmpid[20]; #ifdef USE_PAM static int auth_pam(struct passwd**, const char*); pam_handle_t *pamh = NULL; #endif static struct opie opiedata; static char opieprompt[OPIE_CHALLENGE_MAX+1]; static int pwok; char *pid_file = NULL; /* means default location to pidfile(3) */ /* * Limit number of pathnames that glob can return. * A limit of 0 indicates the number of pathnames is unlimited. */ #define MAXGLOBARGS 16384 # /* * Timeout intervals for retrying connections * to hosts that don't accept PORT cmds. This * is a kludge, but given the problems with TCP... */ #define SWAITMAX 90 /* wait at most 90 seconds */ #define SWAITINT 5 /* interval between retries */ int swaitmax = SWAITMAX; int swaitint = SWAITINT; #ifdef SETPROCTITLE #ifdef OLD_SETPROCTITLE char **Argv = NULL; /* pointer to argument vector */ char *LastArgv = NULL; /* end of argv */ #endif /* OLD_SETPROCTITLE */ char proctitle[LINE_MAX]; /* initial part of title */ #endif /* SETPROCTITLE */ #define LOGCMD(cmd, file) logcmd((cmd), (file), NULL, -1) #define LOGCMD2(cmd, file1, file2) logcmd((cmd), (file1), (file2), -1) #define LOGBYTES(cmd, file, cnt) logcmd((cmd), (file), NULL, (cnt)) static volatile sig_atomic_t recvurg; static int transflag; /* NB: for debugging only */ #define STARTXFER flagxfer(1) #define ENDXFER flagxfer(0) #define START_UNSAFE maskurg(1) #define END_UNSAFE maskurg(0) /* It's OK to put an `else' clause after this macro. */ #define CHECKOOB(action) \ if (recvurg) { \ recvurg = 0; \ if (myoob()) { \ ENDXFER; \ action; \ } \ } #ifdef VIRTUAL_HOSTING static void inithosts(int); static void selecthost(union sockunion *); #endif static void ack(char *); static void sigurg(int); static void maskurg(int); static void flagxfer(int); static int myoob(void); static int checkuser(char *, char *, int, char **, int *); static FILE *dataconn(char *, off_t, char *); static void dolog(struct sockaddr *); static void end_login(void); static FILE *getdatasock(char *); static int guniquefd(char *, char **); static void lostconn(int); static void sigquit(int); static int receive_data(FILE *, FILE *); static int send_data(FILE *, FILE *, size_t, off_t, int); static struct passwd * sgetpwnam(char *); static char *sgetsave(char *); static void reapchild(int); static void appendf(char **, char *, ...) __printflike(2, 3); static void logcmd(char *, char *, char *, off_t); static void logxfer(char *, off_t, time_t); static char *doublequote(char *); static int *socksetup(int, char *, const char *); int main(int argc, char *argv[], char **envp) { socklen_t addrlen; - int ch, on = 1, tos; + int ch, on = 1, tos, s = STDIN_FILENO; char *cp, line[LINE_MAX]; FILE *fd; char *bindname = NULL; const char *bindport = "ftp"; int family = AF_UNSPEC; struct sigaction sa; tzset(); /* in case no timezone database in ~ftp */ sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESTART; #ifdef OLD_SETPROCTITLE /* * Save start and extent of argv for setproctitle. */ Argv = argv; while (*envp) envp++; LastArgv = envp[-1] + strlen(envp[-1]); #endif /* OLD_SETPROCTITLE */ /* * Prevent diagnostic messages from appearing on stderr. * We run as a daemon or from inetd; in both cases, there's * more reason in logging to syslog. */ (void) freopen(_PATH_DEVNULL, "w", stderr); opterr = 0; /* * LOG_NDELAY sets up the logging connection immediately, * necessary for anonymous ftp's that chroot and can't do it later. */ openlog("ftpd", LOG_PID | LOG_NDELAY, LOG_FTP); while ((ch = getopt(argc, argv, "468a:AdDEhlmMoOp:P:rRSt:T:u:UvW")) != -1) { switch (ch) { case '4': family = (family == AF_INET6) ? AF_UNSPEC : AF_INET; break; case '6': family = (family == AF_INET) ? AF_UNSPEC : AF_INET6; break; case '8': assumeutf8 = 1; break; case 'a': bindname = optarg; break; case 'A': anon_only = 1; break; case 'd': ftpdebug++; break; case 'D': daemon_mode++; break; case 'E': noepsv = 1; break; case 'h': hostinfo = 0; break; case 'l': logging++; /* > 1 == extra logging */ break; case 'm': noguestmod = 0; break; case 'M': noguestmkd = 1; break; case 'o': noretr = 1; break; case 'O': noguestretr = 1; break; case 'p': pid_file = optarg; break; case 'P': bindport = optarg; break; case 'r': readonly = 1; break; case 'R': paranoid = 0; break; case 'S': stats++; break; case 't': timeout = atoi(optarg); if (maxtimeout < timeout) maxtimeout = timeout; break; case 'T': maxtimeout = atoi(optarg); if (timeout > maxtimeout) timeout = maxtimeout; break; case 'u': { long val = 0; val = strtol(optarg, &optarg, 8); if (*optarg != '\0' || val < 0) syslog(LOG_WARNING, "bad value for -u"); else defumask = val; break; } case 'U': restricted_data_ports = 0; break; case 'v': ftpdebug++; break; case 'W': dowtmp = 0; break; default: syslog(LOG_WARNING, "unknown flag -%c ignored", optopt); break; } } if (daemon_mode) { int *ctl_sock, fd, maxfd = -1, nfds, i; fd_set defreadfds, readfds; pid_t pid; struct pidfh *pfh; if ((pfh = pidfile_open(pid_file, 0600, &pid)) == NULL) { if (errno == EEXIST) { syslog(LOG_ERR, "%s already running, pid %d", getprogname(), (int)pid); exit(1); } syslog(LOG_WARNING, "pidfile_open: %m"); } /* * Detach from parent. */ if (daemon(1, 1) < 0) { syslog(LOG_ERR, "failed to become a daemon"); exit(1); } if (pfh != NULL && pidfile_write(pfh) == -1) syslog(LOG_WARNING, "pidfile_write: %m"); sa.sa_handler = reapchild; (void)sigaction(SIGCHLD, &sa, NULL); #ifdef VIRTUAL_HOSTING inithosts(family); #endif /* * Open a socket, bind it to the FTP port, and start * listening. */ ctl_sock = socksetup(family, bindname, bindport); if (ctl_sock == NULL) exit(1); FD_ZERO(&defreadfds); for (i = 1; i <= *ctl_sock; i++) { FD_SET(ctl_sock[i], &defreadfds); if (listen(ctl_sock[i], 32) < 0) { syslog(LOG_ERR, "control listen: %m"); exit(1); } if (maxfd < ctl_sock[i]) maxfd = ctl_sock[i]; } /* * Loop forever accepting connection requests and forking off * children to handle them. */ while (1) { FD_COPY(&defreadfds, &readfds); nfds = select(maxfd + 1, &readfds, NULL, NULL, 0); if (nfds <= 0) { if (nfds < 0 && errno != EINTR) syslog(LOG_WARNING, "select: %m"); continue; } pid = -1; for (i = 1; i <= *ctl_sock; i++) if (FD_ISSET(ctl_sock[i], &readfds)) { addrlen = sizeof(his_addr); fd = accept(ctl_sock[i], (struct sockaddr *)&his_addr, &addrlen); if (fd == -1) { syslog(LOG_WARNING, "accept: %m"); continue; } switch (pid = fork()) { case 0: /* child */ - (void) dup2(fd, 0); - (void) dup2(fd, 1); + (void) dup2(fd, s); + (void) dup2(fd, STDOUT_FILENO); (void) close(fd); for (i = 1; i <= *ctl_sock; i++) close(ctl_sock[i]); if (pfh != NULL) pidfile_close(pfh); goto gotchild; case -1: syslog(LOG_WARNING, "fork: %m"); /* FALLTHROUGH */ default: close(fd); } } } } else { addrlen = sizeof(his_addr); - if (getpeername(0, (struct sockaddr *)&his_addr, &addrlen) < 0) { + if (getpeername(s, (struct sockaddr *)&his_addr, &addrlen) < 0) { syslog(LOG_ERR, "getpeername (%s): %m",argv[0]); exit(1); } #ifdef VIRTUAL_HOSTING if (his_addr.su_family == AF_INET6 && IN6_IS_ADDR_V4MAPPED(&his_addr.su_sin6.sin6_addr)) family = AF_INET; else family = his_addr.su_family; inithosts(family); #endif } gotchild: sa.sa_handler = SIG_DFL; (void)sigaction(SIGCHLD, &sa, NULL); sa.sa_handler = sigurg; sa.sa_flags = 0; /* don't restart syscalls for SIGURG */ (void)sigaction(SIGURG, &sa, NULL); sigfillset(&sa.sa_mask); /* block all signals in handler */ sa.sa_flags = SA_RESTART; sa.sa_handler = sigquit; (void)sigaction(SIGHUP, &sa, NULL); (void)sigaction(SIGINT, &sa, NULL); (void)sigaction(SIGQUIT, &sa, NULL); (void)sigaction(SIGTERM, &sa, NULL); sa.sa_handler = lostconn; (void)sigaction(SIGPIPE, &sa, NULL); addrlen = sizeof(ctrl_addr); - if (getsockname(0, (struct sockaddr *)&ctrl_addr, &addrlen) < 0) { + if (getsockname(s, (struct sockaddr *)&ctrl_addr, &addrlen) < 0) { syslog(LOG_ERR, "getsockname (%s): %m",argv[0]); exit(1); } dataport = ntohs(ctrl_addr.su_port) - 1; /* as per RFC 959 */ #ifdef VIRTUAL_HOSTING /* select our identity from virtual host table */ selecthost(&ctrl_addr); #endif #ifdef IP_TOS if (ctrl_addr.su_family == AF_INET) { tos = IPTOS_LOWDELAY; - if (setsockopt(0, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0) + if (setsockopt(s, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0) syslog(LOG_WARNING, "control setsockopt (IP_TOS): %m"); } #endif /* * Disable Nagle on the control channel so that we don't have to wait * for peer's ACK before issuing our next reply. */ - if (setsockopt(0, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)) < 0) + if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "control setsockopt (TCP_NODELAY): %m"); data_source.su_port = htons(ntohs(ctrl_addr.su_port) - 1); (void)snprintf(wtmpid, sizeof(wtmpid), "%xftpd", getpid()); /* Try to handle urgent data inline */ #ifdef SO_OOBINLINE - if (setsockopt(0, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)) < 0) + if (setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "control setsockopt (SO_OOBINLINE): %m"); #endif #ifdef F_SETOWN - if (fcntl(fileno(stdin), F_SETOWN, getpid()) == -1) + if (fcntl(s, F_SETOWN, getpid()) == -1) syslog(LOG_ERR, "fcntl F_SETOWN: %m"); #endif dolog((struct sockaddr *)&his_addr); /* * Set up default state */ data = -1; type = TYPE_A; form = FORM_N; stru = STRU_F; mode = MODE_S; tmpline[0] = '\0'; /* If logins are disabled, print out the message. */ if ((fd = fopen(_PATH_NOLOGIN,"r")) != NULL) { while (fgets(line, sizeof(line), fd) != NULL) { if ((cp = strchr(line, '\n')) != NULL) *cp = '\0'; lreply(530, "%s", line); } (void) fflush(stdout); (void) fclose(fd); reply(530, "System not available."); exit(0); } #ifdef VIRTUAL_HOSTING fd = fopen(thishost->welcome, "r"); #else fd = fopen(_PATH_FTPWELCOME, "r"); #endif if (fd != NULL) { while (fgets(line, sizeof(line), fd) != NULL) { if ((cp = strchr(line, '\n')) != NULL) *cp = '\0'; lreply(220, "%s", line); } (void) fflush(stdout); (void) fclose(fd); /* reply(220,) must follow */ } #ifndef VIRTUAL_HOSTING if ((hostname = malloc(MAXHOSTNAMELEN)) == NULL) fatalerror("Ran out of memory."); if (gethostname(hostname, MAXHOSTNAMELEN - 1) < 0) hostname[0] = '\0'; hostname[MAXHOSTNAMELEN - 1] = '\0'; #endif if (hostinfo) reply(220, "%s FTP server (%s) ready.", hostname, version); else reply(220, "FTP server ready."); #ifdef USE_BLACKLIST blacklist_init(); #endif for (;;) (void) yyparse(); /* NOTREACHED */ } static void lostconn(int signo) { if (ftpdebug) syslog(LOG_DEBUG, "lost connection"); dologout(1); } static void sigquit(int signo) { syslog(LOG_ERR, "got signal %d", signo); dologout(1); } #ifdef VIRTUAL_HOSTING /* * read in virtual host tables (if they exist) */ static void inithosts(int family) { int insert; size_t len; FILE *fp; char *cp, *mp, *line; char *hostname; char *vhost, *anonuser, *statfile, *welcome, *loginmsg; struct ftphost *hrp, *lhrp; struct addrinfo hints, *res, *ai; /* * Fill in the default host information */ if ((hostname = malloc(MAXHOSTNAMELEN)) == NULL) fatalerror("Ran out of memory."); if (gethostname(hostname, MAXHOSTNAMELEN - 1) < 0) hostname[0] = '\0'; hostname[MAXHOSTNAMELEN - 1] = '\0'; if ((hrp = malloc(sizeof(struct ftphost))) == NULL) fatalerror("Ran out of memory."); hrp->hostname = hostname; hrp->hostinfo = NULL; memset(&hints, 0, sizeof(hints)); hints.ai_flags = AI_PASSIVE; hints.ai_family = family; hints.ai_socktype = SOCK_STREAM; if (getaddrinfo(hrp->hostname, NULL, &hints, &res) == 0) hrp->hostinfo = res; hrp->statfile = _PATH_FTPDSTATFILE; hrp->welcome = _PATH_FTPWELCOME; hrp->loginmsg = _PATH_FTPLOGINMESG; hrp->anonuser = "ftp"; hrp->next = NULL; thishost = firsthost = lhrp = hrp; if ((fp = fopen(_PATH_FTPHOSTS, "r")) != NULL) { int addrsize, gothost; void *addr; struct hostent *hp; while ((line = fgetln(fp, &len)) != NULL) { int i, hp_error; /* skip comments */ if (line[0] == '#') continue; if (line[len - 1] == '\n') { line[len - 1] = '\0'; mp = NULL; } else { if ((mp = malloc(len + 1)) == NULL) fatalerror("Ran out of memory."); memcpy(mp, line, len); mp[len] = '\0'; line = mp; } cp = strtok(line, " \t"); /* skip empty lines */ if (cp == NULL) goto nextline; vhost = cp; /* set defaults */ anonuser = "ftp"; statfile = _PATH_FTPDSTATFILE; welcome = _PATH_FTPWELCOME; loginmsg = _PATH_FTPLOGINMESG; /* * Preparse the line so we can use its info * for all the addresses associated with * the virtual host name. * Field 0, the virtual host name, is special: * it's already parsed off and will be strdup'ed * later, after we know its canonical form. */ for (i = 1; i < 5 && (cp = strtok(NULL, " \t")); i++) if (*cp != '-' && (cp = strdup(cp))) switch (i) { case 1: /* anon user permissions */ anonuser = cp; break; case 2: /* statistics file */ statfile = cp; break; case 3: /* welcome message */ welcome = cp; break; case 4: /* login message */ loginmsg = cp; break; default: /* programming error */ abort(); /* NOTREACHED */ } hints.ai_flags = AI_PASSIVE; hints.ai_family = family; hints.ai_socktype = SOCK_STREAM; if (getaddrinfo(vhost, NULL, &hints, &res) != 0) goto nextline; for (ai = res; ai != NULL && ai->ai_addr != NULL; ai = ai->ai_next) { gothost = 0; for (hrp = firsthost; hrp != NULL; hrp = hrp->next) { struct addrinfo *hi; for (hi = hrp->hostinfo; hi != NULL; hi = hi->ai_next) if (hi->ai_addrlen == ai->ai_addrlen && memcmp(hi->ai_addr, ai->ai_addr, ai->ai_addr->sa_len) == 0) { gothost++; break; } if (gothost) break; } if (hrp == NULL) { if ((hrp = malloc(sizeof(struct ftphost))) == NULL) goto nextline; hrp->hostname = NULL; insert = 1; } else { if (hrp->hostinfo && hrp->hostinfo != res) freeaddrinfo(hrp->hostinfo); insert = 0; /* host already in the chain */ } hrp->hostinfo = res; /* * determine hostname to use. * force defined name if there is a valid alias * otherwise fallback to primary hostname */ /* XXX: getaddrinfo() can't do alias check */ switch(hrp->hostinfo->ai_family) { case AF_INET: addr = &((struct sockaddr_in *)hrp->hostinfo->ai_addr)->sin_addr; addrsize = sizeof(struct in_addr); break; case AF_INET6: addr = &((struct sockaddr_in6 *)hrp->hostinfo->ai_addr)->sin6_addr; addrsize = sizeof(struct in6_addr); break; default: /* should not reach here */ freeaddrinfo(hrp->hostinfo); if (insert) free(hrp); /*not in chain, can free*/ else hrp->hostinfo = NULL; /*mark as blank*/ goto nextline; /* NOTREACHED */ } if ((hp = getipnodebyaddr(addr, addrsize, hrp->hostinfo->ai_family, &hp_error)) != NULL) { if (strcmp(vhost, hp->h_name) != 0) { if (hp->h_aliases == NULL) vhost = hp->h_name; else { i = 0; while (hp->h_aliases[i] && strcmp(vhost, hp->h_aliases[i]) != 0) ++i; if (hp->h_aliases[i] == NULL) vhost = hp->h_name; } } } if (hrp->hostname && strcmp(hrp->hostname, vhost) != 0) { free(hrp->hostname); hrp->hostname = NULL; } if (hrp->hostname == NULL && (hrp->hostname = strdup(vhost)) == NULL) { freeaddrinfo(hrp->hostinfo); hrp->hostinfo = NULL; /* mark as blank */ if (hp) freehostent(hp); goto nextline; } hrp->anonuser = anonuser; hrp->statfile = statfile; hrp->welcome = welcome; hrp->loginmsg = loginmsg; if (insert) { hrp->next = NULL; lhrp->next = hrp; lhrp = hrp; } if (hp) freehostent(hp); } nextline: if (mp) free(mp); } (void) fclose(fp); } } static void selecthost(union sockunion *su) { struct ftphost *hrp; u_int16_t port; #ifdef INET6 struct in6_addr *mapped_in6 = NULL; #endif struct addrinfo *hi; #ifdef INET6 /* * XXX IPv4 mapped IPv6 addr consideraton, * specified in rfc2373. */ if (su->su_family == AF_INET6 && IN6_IS_ADDR_V4MAPPED(&su->su_sin6.sin6_addr)) mapped_in6 = &su->su_sin6.sin6_addr; #endif hrp = thishost = firsthost; /* default */ port = su->su_port; su->su_port = 0; while (hrp != NULL) { for (hi = hrp->hostinfo; hi != NULL; hi = hi->ai_next) { if (memcmp(su, hi->ai_addr, hi->ai_addrlen) == 0) { thishost = hrp; goto found; } #ifdef INET6 /* XXX IPv4 mapped IPv6 addr consideraton */ if (hi->ai_addr->sa_family == AF_INET && mapped_in6 != NULL && (memcmp(&mapped_in6->s6_addr[12], &((struct sockaddr_in *)hi->ai_addr)->sin_addr, sizeof(struct in_addr)) == 0)) { thishost = hrp; goto found; } #endif } hrp = hrp->next; } found: su->su_port = port; /* setup static variables as appropriate */ hostname = thishost->hostname; ftpuser = thishost->anonuser; } #endif /* * Helper function for sgetpwnam(). */ static char * sgetsave(char *s) { char *new = malloc(strlen(s) + 1); if (new == NULL) { reply(421, "Ran out of memory."); dologout(1); /* NOTREACHED */ } (void) strcpy(new, s); return (new); } /* * Save the result of a getpwnam. Used for USER command, since * the data returned must not be clobbered by any other command * (e.g., globbing). * NB: The data returned by sgetpwnam() will remain valid until * the next call to this function. Its difference from getpwnam() * is that sgetpwnam() is known to be called from ftpd code only. */ static struct passwd * sgetpwnam(char *name) { static struct passwd save; struct passwd *p; if ((p = getpwnam(name)) == NULL) return (p); if (save.pw_name) { free(save.pw_name); free(save.pw_passwd); free(save.pw_class); free(save.pw_gecos); free(save.pw_dir); free(save.pw_shell); } save = *p; save.pw_name = sgetsave(p->pw_name); save.pw_passwd = sgetsave(p->pw_passwd); save.pw_class = sgetsave(p->pw_class); save.pw_gecos = sgetsave(p->pw_gecos); save.pw_dir = sgetsave(p->pw_dir); save.pw_shell = sgetsave(p->pw_shell); return (&save); } static int login_attempts; /* number of failed login attempts */ static int askpasswd; /* had user command, ask for passwd */ static char curname[MAXLOGNAME]; /* current USER name */ /* * USER command. * Sets global passwd pointer pw if named account exists and is acceptable; * sets askpasswd if a PASS command is expected. If logged in previously, * need to reset state. If name is "ftp" or "anonymous", the name is not in * _PATH_FTPUSERS, and ftp account exists, set guest and pw, then just return. * If account doesn't exist, ask for passwd anyway. Otherwise, check user * requesting login privileges. Disallow anyone who does not have a standard * shell as returned by getusershell(). Disallow anyone mentioned in the file * _PATH_FTPUSERS to allow people such as root and uucp to be avoided. */ void user(char *name) { int ecode; char *cp, *shell; if (logged_in) { if (guest) { reply(530, "Can't change user from guest login."); return; } else if (dochroot) { reply(530, "Can't change user from chroot user."); return; } end_login(); } guest = 0; #ifdef VIRTUAL_HOSTING pw = sgetpwnam(thishost->anonuser); #else pw = sgetpwnam("ftp"); #endif if (strcmp(name, "ftp") == 0 || strcmp(name, "anonymous") == 0) { if (checkuser(_PATH_FTPUSERS, "ftp", 0, NULL, &ecode) || (ecode != 0 && ecode != ENOENT)) reply(530, "User %s access denied.", name); else if (checkuser(_PATH_FTPUSERS, "anonymous", 0, NULL, &ecode) || (ecode != 0 && ecode != ENOENT)) reply(530, "User %s access denied.", name); else if (pw != NULL) { guest = 1; askpasswd = 1; reply(331, "Guest login ok, send your email address as password."); } else reply(530, "User %s unknown.", name); if (!askpasswd && logging) syslog(LOG_NOTICE, "ANONYMOUS FTP LOGIN REFUSED FROM %s", remotehost); return; } if (anon_only != 0) { reply(530, "Sorry, only anonymous ftp allowed."); return; } if ((pw = sgetpwnam(name))) { if ((shell = pw->pw_shell) == NULL || *shell == 0) shell = _PATH_BSHELL; setusershell(); while ((cp = getusershell()) != NULL) if (strcmp(cp, shell) == 0) break; endusershell(); if (cp == NULL || (checkuser(_PATH_FTPUSERS, name, 1, NULL, &ecode) || (ecode != 0 && ecode != ENOENT))) { reply(530, "User %s access denied.", name); if (logging) syslog(LOG_NOTICE, "FTP LOGIN REFUSED FROM %s, %s", remotehost, name); pw = NULL; return; } } if (logging) strncpy(curname, name, sizeof(curname)-1); pwok = 0; #ifdef USE_PAM /* XXX Kluge! The conversation mechanism needs to be fixed. */ #endif if (opiechallenge(&opiedata, name, opieprompt) == 0) { pwok = (pw != NULL) && opieaccessfile(remotehost) && opiealways(pw->pw_dir); reply(331, "Response to %s %s for %s.", opieprompt, pwok ? "requested" : "required", name); } else { pwok = 1; reply(331, "Password required for %s.", name); } askpasswd = 1; /* * Delay before reading passwd after first failed * attempt to slow down passwd-guessing programs. */ if (login_attempts) sleep(login_attempts); } /* * Check if a user is in the file "fname", * return a pointer to a malloc'd string with the rest * of the matching line in "residue" if not NULL. */ static int checkuser(char *fname, char *name, int pwset, char **residue, int *ecode) { FILE *fd; int found = 0; size_t len; char *line, *mp, *p; if (ecode != NULL) *ecode = 0; if ((fd = fopen(fname, "r")) != NULL) { while (!found && (line = fgetln(fd, &len)) != NULL) { /* skip comments */ if (line[0] == '#') continue; if (line[len - 1] == '\n') { line[len - 1] = '\0'; mp = NULL; } else { if ((mp = malloc(len + 1)) == NULL) fatalerror("Ran out of memory."); memcpy(mp, line, len); mp[len] = '\0'; line = mp; } /* avoid possible leading and trailing whitespace */ p = strtok(line, " \t"); /* skip empty lines */ if (p == NULL) goto nextline; /* * if first chr is '@', check group membership */ if (p[0] == '@') { int i = 0; struct group *grp; if (p[1] == '\0') /* single @ matches anyone */ found = 1; else { if ((grp = getgrnam(p+1)) == NULL) goto nextline; /* * Check user's default group */ if (pwset && grp->gr_gid == pw->pw_gid) found = 1; /* * Check supplementary groups */ while (!found && grp->gr_mem[i]) found = strcmp(name, grp->gr_mem[i++]) == 0; } } /* * Otherwise, just check for username match */ else found = strcmp(p, name) == 0; /* * Save the rest of line to "residue" if matched */ if (found && residue) { if ((p = strtok(NULL, "")) != NULL) p += strspn(p, " \t"); if (p && *p) { if ((*residue = strdup(p)) == NULL) fatalerror("Ran out of memory."); } else *residue = NULL; } nextline: if (mp) free(mp); } (void) fclose(fd); } else if (ecode != NULL) *ecode = errno; return (found); } /* * Terminate login as previous user, if any, resetting state; * used when USER command is given or login fails. */ static void end_login(void) { #ifdef USE_PAM int e; #endif (void) seteuid(0); if (logged_in && dowtmp) ftpd_logwtmp(wtmpid, NULL, NULL); pw = NULL; #ifdef LOGIN_CAP setusercontext(NULL, getpwuid(0), 0, LOGIN_SETALL & ~(LOGIN_SETLOGIN | LOGIN_SETUSER | LOGIN_SETGROUP | LOGIN_SETPATH | LOGIN_SETENV)); #endif #ifdef USE_PAM if (pamh) { if ((e = pam_setcred(pamh, PAM_DELETE_CRED)) != PAM_SUCCESS) syslog(LOG_ERR, "pam_setcred: %s", pam_strerror(pamh, e)); if ((e = pam_close_session(pamh,0)) != PAM_SUCCESS) syslog(LOG_ERR, "pam_close_session: %s", pam_strerror(pamh, e)); if ((e = pam_end(pamh, e)) != PAM_SUCCESS) syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e)); pamh = NULL; } #endif logged_in = 0; guest = 0; dochroot = 0; } #ifdef USE_PAM /* * the following code is stolen from imap-uw PAM authentication module and * login.c */ #define COPY_STRING(s) (s ? strdup(s) : NULL) struct cred_t { const char *uname; /* user name */ const char *pass; /* password */ }; typedef struct cred_t cred_t; static int auth_conv(int num_msg, const struct pam_message **msg, struct pam_response **resp, void *appdata) { int i; cred_t *cred = (cred_t *) appdata; struct pam_response *reply; reply = calloc(num_msg, sizeof *reply); if (reply == NULL) return PAM_BUF_ERR; for (i = 0; i < num_msg; i++) { switch (msg[i]->msg_style) { case PAM_PROMPT_ECHO_ON: /* assume want user name */ reply[i].resp_retcode = PAM_SUCCESS; reply[i].resp = COPY_STRING(cred->uname); /* PAM frees resp. */ break; case PAM_PROMPT_ECHO_OFF: /* assume want password */ reply[i].resp_retcode = PAM_SUCCESS; reply[i].resp = COPY_STRING(cred->pass); /* PAM frees resp. */ break; case PAM_TEXT_INFO: case PAM_ERROR_MSG: reply[i].resp_retcode = PAM_SUCCESS; reply[i].resp = NULL; break; default: /* unknown message style */ free(reply); return PAM_CONV_ERR; } } *resp = reply; return PAM_SUCCESS; } /* * Attempt to authenticate the user using PAM. Returns 0 if the user is * authenticated, or 1 if not authenticated. If some sort of PAM system * error occurs (e.g., the "/etc/pam.conf" file is missing) then this * function returns -1. This can be used as an indication that we should * fall back to a different authentication mechanism. */ static int auth_pam(struct passwd **ppw, const char *pass) { const char *tmpl_user; const void *item; int rval; int e; cred_t auth_cred = { (*ppw)->pw_name, pass }; struct pam_conv conv = { &auth_conv, &auth_cred }; e = pam_start("ftpd", (*ppw)->pw_name, &conv, &pamh); if (e != PAM_SUCCESS) { /* * In OpenPAM, it's OK to pass NULL to pam_strerror() * if context creation has failed in the first place. */ syslog(LOG_ERR, "pam_start: %s", pam_strerror(NULL, e)); return -1; } e = pam_set_item(pamh, PAM_RHOST, remotehost); if (e != PAM_SUCCESS) { syslog(LOG_ERR, "pam_set_item(PAM_RHOST): %s", pam_strerror(pamh, e)); if ((e = pam_end(pamh, e)) != PAM_SUCCESS) { syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e)); } pamh = NULL; return -1; } e = pam_authenticate(pamh, 0); switch (e) { case PAM_SUCCESS: /* * With PAM we support the concept of a "template" * user. The user enters a login name which is * authenticated by PAM, usually via a remote service * such as RADIUS or TACACS+. If authentication * succeeds, a different but related "template" name * is used for setting the credentials, shell, and * home directory. The name the user enters need only * exist on the remote authentication server, but the * template name must be present in the local password * database. * * This is supported by two various mechanisms in the * individual modules. However, from the application's * point of view, the template user is always passed * back as a changed value of the PAM_USER item. */ if ((e = pam_get_item(pamh, PAM_USER, &item)) == PAM_SUCCESS) { tmpl_user = (const char *) item; if (strcmp((*ppw)->pw_name, tmpl_user) != 0) *ppw = getpwnam(tmpl_user); } else syslog(LOG_ERR, "Couldn't get PAM_USER: %s", pam_strerror(pamh, e)); rval = 0; break; case PAM_AUTH_ERR: case PAM_USER_UNKNOWN: case PAM_MAXTRIES: rval = 1; break; default: syslog(LOG_ERR, "pam_authenticate: %s", pam_strerror(pamh, e)); rval = -1; break; } if (rval == 0) { e = pam_acct_mgmt(pamh, 0); if (e != PAM_SUCCESS) { syslog(LOG_ERR, "pam_acct_mgmt: %s", pam_strerror(pamh, e)); rval = 1; } } if (rval != 0) { if ((e = pam_end(pamh, e)) != PAM_SUCCESS) { syslog(LOG_ERR, "pam_end: %s", pam_strerror(pamh, e)); } pamh = NULL; } return rval; } #endif /* USE_PAM */ void pass(char *passwd) { int rval, ecode; FILE *fd; #ifdef LOGIN_CAP login_cap_t *lc = NULL; #endif #ifdef USE_PAM int e; #endif char *residue = NULL; char *xpasswd; if (logged_in || askpasswd == 0) { reply(503, "Login with USER first."); return; } askpasswd = 0; if (!guest) { /* "ftp" is only account allowed no password */ if (pw == NULL) { rval = 1; /* failure below */ goto skip; } #ifdef USE_PAM rval = auth_pam(&pw, passwd); if (rval >= 0) { opieunlock(); goto skip; } #endif if (opieverify(&opiedata, passwd) == 0) xpasswd = pw->pw_passwd; else if (pwok) { xpasswd = crypt(passwd, pw->pw_passwd); if (passwd[0] == '\0' && pw->pw_passwd[0] != '\0') xpasswd = ":"; } else { rval = 1; goto skip; } rval = strcmp(pw->pw_passwd, xpasswd); if (pw->pw_expire && time(NULL) >= pw->pw_expire) rval = 1; /* failure */ skip: /* * If rval == 1, the user failed the authentication check * above. If rval == 0, either PAM or local authentication * succeeded. */ if (rval) { reply(530, "Login incorrect."); #ifdef USE_BLACKLIST - blacklist_notify(1, 0, "Login incorrect"); + blacklist_notify(1, STDIN_FILENO, "Login incorrect"); #endif if (logging) { syslog(LOG_NOTICE, "FTP LOGIN FAILED FROM %s", remotehost); syslog(LOG_AUTHPRIV | LOG_NOTICE, "FTP LOGIN FAILED FROM %s, %s", remotehost, curname); } pw = NULL; if (login_attempts++ >= 5) { syslog(LOG_NOTICE, "repeated login failures from %s", remotehost); exit(0); } return; } #ifdef USE_BLACKLIST else { - blacklist_notify(0, 0, "Login successful"); + blacklist_notify(0, STDIN_FILENO, "Login successful"); } #endif } login_attempts = 0; /* this time successful */ if (setegid(pw->pw_gid) < 0) { reply(550, "Can't set gid."); return; } /* May be overridden by login.conf */ (void) umask(defumask); #ifdef LOGIN_CAP if ((lc = login_getpwclass(pw)) != NULL) { char remote_ip[NI_MAXHOST]; if (getnameinfo((struct sockaddr *)&his_addr, his_addr.su_len, remote_ip, sizeof(remote_ip) - 1, NULL, 0, NI_NUMERICHOST)) *remote_ip = 0; remote_ip[sizeof(remote_ip) - 1] = 0; if (!auth_hostok(lc, remotehost, remote_ip)) { syslog(LOG_INFO|LOG_AUTH, "FTP LOGIN FAILED (HOST) as %s: permission denied.", pw->pw_name); reply(530, "Permission denied."); pw = NULL; return; } if (!auth_timeok(lc, time(NULL))) { reply(530, "Login not available right now."); pw = NULL; return; } } setusercontext(lc, pw, 0, LOGIN_SETALL & ~(LOGIN_SETUSER | LOGIN_SETPATH | LOGIN_SETENV)); #else setlogin(pw->pw_name); (void) initgroups(pw->pw_name, pw->pw_gid); #endif #ifdef USE_PAM if (pamh) { if ((e = pam_open_session(pamh, 0)) != PAM_SUCCESS) { syslog(LOG_ERR, "pam_open_session: %s", pam_strerror(pamh, e)); } else if ((e = pam_setcred(pamh, PAM_ESTABLISH_CRED)) != PAM_SUCCESS) { syslog(LOG_ERR, "pam_setcred: %s", pam_strerror(pamh, e)); } } #endif dochroot = checkuser(_PATH_FTPCHROOT, pw->pw_name, 1, &residue, &ecode) #ifdef LOGIN_CAP /* Allow login.conf configuration as well */ || login_getcapbool(lc, "ftp-chroot", 0) #endif ; /* * It is possible that checkuser() failed to open the chroot file. * If this is the case, report that logins are un-available, since we * have no way of checking whether or not the user should be chrooted. * We ignore ENOENT since it is not required that this file be present. */ if (ecode != 0 && ecode != ENOENT) { reply(530, "Login not available right now."); return; } chrootdir = NULL; /* Disable wtmp logging when chrooting. */ if (dochroot || guest) dowtmp = 0; if (dowtmp) ftpd_logwtmp(wtmpid, pw->pw_name, (struct sockaddr *)&his_addr); logged_in = 1; if (guest && stats && statfd < 0) #ifdef VIRTUAL_HOSTING statfd = open(thishost->statfile, O_WRONLY|O_APPEND); #else statfd = open(_PATH_FTPDSTATFILE, O_WRONLY|O_APPEND); #endif if (statfd < 0) stats = 0; /* * For a chrooted local user, * a) see whether ftpchroot(5) specifies a chroot directory, * b) extract the directory pathname from the line, * c) expand it to the absolute pathname if necessary. */ if (dochroot && residue && (chrootdir = strtok(residue, " \t")) != NULL) { if (chrootdir[0] != '/') asprintf(&chrootdir, "%s/%s", pw->pw_dir, chrootdir); else chrootdir = strdup(chrootdir); /* make it permanent */ if (chrootdir == NULL) fatalerror("Ran out of memory."); } if (guest || dochroot) { /* * If no chroot directory set yet, use the login directory. * Copy it so it can be modified while pw->pw_dir stays intact. */ if (chrootdir == NULL && (chrootdir = strdup(pw->pw_dir)) == NULL) fatalerror("Ran out of memory."); /* * Check for the "/chroot/./home" syntax, * separate the chroot and home directory pathnames. */ if ((homedir = strstr(chrootdir, "/./")) != NULL) { *(homedir++) = '\0'; /* wipe '/' */ homedir++; /* skip '.' */ } else { /* * We MUST do a chdir() after the chroot. Otherwise * the old current directory will be accessible as "." * outside the new root! */ homedir = "/"; } /* * Finally, do chroot() */ if (chroot(chrootdir) < 0) { reply(550, "Can't change root."); goto bad; } __FreeBSD_libc_enter_restricted_mode(); } else /* real user w/o chroot */ homedir = pw->pw_dir; /* * Set euid *before* doing chdir() so * a) the user won't be carried to a directory that he couldn't reach * on his own due to no permission to upper path components, * b) NFS mounted homedirs w/restrictive permissions will be accessible * (uid 0 has no root power over NFS if not mapped explicitly.) */ if (seteuid(pw->pw_uid) < 0) { reply(550, "Can't set uid."); goto bad; } if (chdir(homedir) < 0) { if (guest || dochroot) { reply(550, "Can't change to base directory."); goto bad; } else { if (chdir("/") < 0) { reply(550, "Root is inaccessible."); goto bad; } lreply(230, "No directory! Logging in with home=/."); } } /* * Display a login message, if it exists. * N.B. reply(230,) must follow the message. */ #ifdef VIRTUAL_HOSTING fd = fopen(thishost->loginmsg, "r"); #else fd = fopen(_PATH_FTPLOGINMESG, "r"); #endif if (fd != NULL) { char *cp, line[LINE_MAX]; while (fgets(line, sizeof(line), fd) != NULL) { if ((cp = strchr(line, '\n')) != NULL) *cp = '\0'; lreply(230, "%s", line); } (void) fflush(stdout); (void) fclose(fd); } if (guest) { if (ident != NULL) free(ident); ident = strdup(passwd); if (ident == NULL) fatalerror("Ran out of memory."); reply(230, "Guest login ok, access restrictions apply."); #ifdef SETPROCTITLE #ifdef VIRTUAL_HOSTING if (thishost != firsthost) snprintf(proctitle, sizeof(proctitle), "%s: anonymous(%s)/%s", remotehost, hostname, passwd); else #endif snprintf(proctitle, sizeof(proctitle), "%s: anonymous/%s", remotehost, passwd); setproctitle("%s", proctitle); #endif /* SETPROCTITLE */ if (logging) syslog(LOG_INFO, "ANONYMOUS FTP LOGIN FROM %s, %s", remotehost, passwd); } else { if (dochroot) reply(230, "User %s logged in, " "access restrictions apply.", pw->pw_name); else reply(230, "User %s logged in.", pw->pw_name); #ifdef SETPROCTITLE snprintf(proctitle, sizeof(proctitle), "%s: user/%s", remotehost, pw->pw_name); setproctitle("%s", proctitle); #endif /* SETPROCTITLE */ if (logging) syslog(LOG_INFO, "FTP LOGIN FROM %s as %s", remotehost, pw->pw_name); } if (logging && (guest || dochroot)) syslog(LOG_INFO, "session root changed to %s", chrootdir); #ifdef LOGIN_CAP login_close(lc); #endif if (residue) free(residue); return; bad: /* Forget all about it... */ #ifdef LOGIN_CAP login_close(lc); #endif if (residue) free(residue); end_login(); } void retrieve(char *cmd, char *name) { FILE *fin, *dout; struct stat st; int (*closefunc)(FILE *); time_t start; char line[BUFSIZ]; if (cmd == 0) { fin = fopen(name, "r"), closefunc = fclose; st.st_size = 0; } else { (void) snprintf(line, sizeof(line), cmd, name); name = line; fin = ftpd_popen(line, "r"), closefunc = ftpd_pclose; st.st_size = -1; st.st_blksize = BUFSIZ; } if (fin == NULL) { if (errno != 0) { perror_reply(550, name); if (cmd == 0) { LOGCMD("get", name); } } return; } byte_count = -1; if (cmd == 0) { if (fstat(fileno(fin), &st) < 0) { perror_reply(550, name); goto done; } if (!S_ISREG(st.st_mode)) { /* * Never sending a raw directory is a workaround * for buggy clients that will attempt to RETR * a directory before listing it, e.g., Mozilla. * Preventing a guest from getting irregular files * is a simple security measure. */ if (S_ISDIR(st.st_mode) || guest) { reply(550, "%s: not a plain file.", name); goto done; } st.st_size = -1; /* st.st_blksize is set for all descriptor types */ } } if (restart_point) { if (type == TYPE_A) { off_t i, n; int c; n = restart_point; i = 0; while (i++ < n) { if ((c=getc(fin)) == EOF) { perror_reply(550, name); goto done; } if (c == '\n') i++; } } else if (lseek(fileno(fin), restart_point, L_SET) < 0) { perror_reply(550, name); goto done; } } dout = dataconn(name, st.st_size, "w"); if (dout == NULL) goto done; time(&start); send_data(fin, dout, st.st_blksize, st.st_size, restart_point == 0 && cmd == 0 && S_ISREG(st.st_mode)); if (cmd == 0 && guest && stats && byte_count > 0) logxfer(name, byte_count, start); (void) fclose(dout); data = -1; pdata = -1; done: if (cmd == 0) LOGBYTES("get", name, byte_count); (*closefunc)(fin); } void store(char *name, char *mode, int unique) { int fd; FILE *fout, *din; int (*closefunc)(FILE *); if (*mode == 'a') { /* APPE */ if (unique) { /* Programming error */ syslog(LOG_ERR, "Internal: unique flag to APPE"); unique = 0; } if (guest && noguestmod) { reply(550, "Appending to existing file denied."); goto err; } restart_point = 0; /* not affected by preceding REST */ } if (unique) /* STOU overrides REST */ restart_point = 0; if (guest && noguestmod) { if (restart_point) { /* guest STOR w/REST */ reply(550, "Modifying existing file denied."); goto err; } else /* treat guest STOR as STOU */ unique = 1; } if (restart_point) mode = "r+"; /* so ASCII manual seek can work */ if (unique) { if ((fd = guniquefd(name, &name)) < 0) goto err; fout = fdopen(fd, mode); } else fout = fopen(name, mode); closefunc = fclose; if (fout == NULL) { perror_reply(553, name); goto err; } byte_count = -1; if (restart_point) { if (type == TYPE_A) { off_t i, n; int c; n = restart_point; i = 0; while (i++ < n) { if ((c=getc(fout)) == EOF) { perror_reply(550, name); goto done; } if (c == '\n') i++; } /* * We must do this seek to "current" position * because we are changing from reading to * writing. */ if (fseeko(fout, 0, SEEK_CUR) < 0) { perror_reply(550, name); goto done; } } else if (lseek(fileno(fout), restart_point, L_SET) < 0) { perror_reply(550, name); goto done; } } din = dataconn(name, -1, "r"); if (din == NULL) goto done; if (receive_data(din, fout) == 0) { if (unique) reply(226, "Transfer complete (unique file name:%s).", name); else reply(226, "Transfer complete."); } (void) fclose(din); data = -1; pdata = -1; done: LOGBYTES(*mode == 'a' ? "append" : "put", name, byte_count); (*closefunc)(fout); return; err: LOGCMD(*mode == 'a' ? "append" : "put" , name); return; } static FILE * getdatasock(char *mode) { int on = 1, s, t, tries; if (data >= 0) return (fdopen(data, mode)); s = socket(data_dest.su_family, SOCK_STREAM, 0); if (s < 0) goto bad; if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "data setsockopt (SO_REUSEADDR): %m"); /* anchor socket to avoid multi-homing problems */ data_source = ctrl_addr; data_source.su_port = htons(dataport); (void) seteuid(0); for (tries = 1; ; tries++) { /* * We should loop here since it's possible that * another ftpd instance has passed this point and is * trying to open a data connection in active mode now. * Until the other connection is opened, we'll be getting * EADDRINUSE because no SOCK_STREAM sockets in the system * can share both local and remote addresses, localIP:20 * and *:* in this case. */ if (bind(s, (struct sockaddr *)&data_source, data_source.su_len) >= 0) break; if (errno != EADDRINUSE || tries > 10) goto bad; sleep(tries); } (void) seteuid(pw->pw_uid); #ifdef IP_TOS if (data_source.su_family == AF_INET) { on = IPTOS_THROUGHPUT; if (setsockopt(s, IPPROTO_IP, IP_TOS, &on, sizeof(int)) < 0) syslog(LOG_WARNING, "data setsockopt (IP_TOS): %m"); } #endif #ifdef TCP_NOPUSH /* * Turn off push flag to keep sender TCP from sending short packets * at the boundaries of each write(). */ on = 1; if (setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &on, sizeof on) < 0) syslog(LOG_WARNING, "data setsockopt (TCP_NOPUSH): %m"); #endif return (fdopen(s, mode)); bad: /* Return the real value of errno (close may change it) */ t = errno; (void) seteuid(pw->pw_uid); (void) close(s); errno = t; return (NULL); } static FILE * dataconn(char *name, off_t size, char *mode) { char sizebuf[32]; FILE *file; int retry = 0, tos, conerrno; file_size = size; byte_count = 0; if (size != -1) (void) snprintf(sizebuf, sizeof(sizebuf), " (%jd bytes)", (intmax_t)size); else *sizebuf = '\0'; if (pdata >= 0) { union sockunion from; socklen_t fromlen = ctrl_addr.su_len; int flags, s; struct timeval timeout; fd_set set; FD_ZERO(&set); FD_SET(pdata, &set); timeout.tv_usec = 0; timeout.tv_sec = 120; /* * Granted a socket is in the blocking I/O mode, * accept() will block after a successful select() * if the selected connection dies in between. * Therefore set the non-blocking I/O flag here. */ if ((flags = fcntl(pdata, F_GETFL, 0)) == -1 || fcntl(pdata, F_SETFL, flags | O_NONBLOCK) == -1) goto pdata_err; if (select(pdata+1, &set, NULL, NULL, &timeout) <= 0 || (s = accept(pdata, (struct sockaddr *) &from, &fromlen)) < 0) goto pdata_err; (void) close(pdata); pdata = s; /* * Unset the inherited non-blocking I/O flag * on the child socket so stdio can work on it. */ if ((flags = fcntl(pdata, F_GETFL, 0)) == -1 || fcntl(pdata, F_SETFL, flags & ~O_NONBLOCK) == -1) goto pdata_err; #ifdef IP_TOS if (from.su_family == AF_INET) { tos = IPTOS_THROUGHPUT; if (setsockopt(s, IPPROTO_IP, IP_TOS, &tos, sizeof(int)) < 0) syslog(LOG_WARNING, "pdata setsockopt (IP_TOS): %m"); } #endif reply(150, "Opening %s mode data connection for '%s'%s.", type == TYPE_A ? "ASCII" : "BINARY", name, sizebuf); return (fdopen(pdata, mode)); pdata_err: reply(425, "Can't open data connection."); (void) close(pdata); pdata = -1; return (NULL); } if (data >= 0) { reply(125, "Using existing data connection for '%s'%s.", name, sizebuf); usedefault = 1; return (fdopen(data, mode)); } if (usedefault) data_dest = his_addr; usedefault = 1; do { file = getdatasock(mode); if (file == NULL) { char hostbuf[NI_MAXHOST], portbuf[NI_MAXSERV]; if (getnameinfo((struct sockaddr *)&data_source, data_source.su_len, hostbuf, sizeof(hostbuf) - 1, portbuf, sizeof(portbuf) - 1, NI_NUMERICHOST|NI_NUMERICSERV)) *hostbuf = *portbuf = 0; hostbuf[sizeof(hostbuf) - 1] = 0; portbuf[sizeof(portbuf) - 1] = 0; reply(425, "Can't create data socket (%s,%s): %s.", hostbuf, portbuf, strerror(errno)); return (NULL); } data = fileno(file); conerrno = 0; if (connect(data, (struct sockaddr *)&data_dest, data_dest.su_len) == 0) break; conerrno = errno; (void) fclose(file); data = -1; if (conerrno == EADDRINUSE) { sleep(swaitint); retry += swaitint; } else { break; } } while (retry <= swaitmax); if (conerrno != 0) { reply(425, "Can't build data connection: %s.", strerror(conerrno)); return (NULL); } reply(150, "Opening %s mode data connection for '%s'%s.", type == TYPE_A ? "ASCII" : "BINARY", name, sizebuf); return (file); } /* * A helper macro to avoid code duplication * in send_data() and receive_data(). * * XXX We have to block SIGURG during putc() because BSD stdio * is unable to restart interrupted write operations and hence * the entire buffer contents will be lost as soon as a write() * call indicates EINTR to stdio. */ #define FTPD_PUTC(ch, file, label) \ do { \ int ret; \ \ do { \ START_UNSAFE; \ ret = putc((ch), (file)); \ END_UNSAFE; \ CHECKOOB(return (-1)) \ else if (ferror(file)) \ goto label; \ clearerr(file); \ } while (ret == EOF); \ } while (0) /* * Transfer the contents of "instr" to "outstr" peer using the appropriate * encapsulation of the data subject to Mode, Structure, and Type. * * NB: Form isn't handled. */ static int send_data(FILE *instr, FILE *outstr, size_t blksize, off_t filesize, int isreg) { int c, cp, filefd, netfd; char *buf; STARTXFER; switch (type) { case TYPE_A: cp = EOF; for (;;) { c = getc(instr); CHECKOOB(return (-1)) else if (c == EOF && ferror(instr)) goto file_err; if (c == EOF) { if (ferror(instr)) { /* resume after OOB */ clearerr(instr); continue; } if (feof(instr)) /* EOF */ break; syslog(LOG_ERR, "Internal: impossible condition" " on file after getc()"); goto file_err; } if (c == '\n' && cp != '\r') { FTPD_PUTC('\r', outstr, data_err); byte_count++; } FTPD_PUTC(c, outstr, data_err); byte_count++; cp = c; } #ifdef notyet /* BSD stdio isn't ready for that */ while (fflush(outstr) == EOF) { CHECKOOB(return (-1)) else goto data_err; clearerr(outstr); } ENDXFER; #else ENDXFER; if (fflush(outstr) == EOF) goto data_err; #endif reply(226, "Transfer complete."); return (0); case TYPE_I: case TYPE_L: /* * isreg is only set if we are not doing restart and we * are sending a regular file */ netfd = fileno(outstr); filefd = fileno(instr); if (isreg) { char *msg = "Transfer complete."; off_t cnt, offset; int err; cnt = offset = 0; while (filesize > 0) { err = sendfile(filefd, netfd, offset, 0, NULL, &cnt, 0); /* * Calculate byte_count before OOB processing. * It can be used in myoob() later. */ byte_count += cnt; offset += cnt; filesize -= cnt; CHECKOOB(return (-1)) else if (err == -1) { if (errno != EINTR && cnt == 0 && offset == 0) goto oldway; goto data_err; } if (err == -1) /* resume after OOB */ continue; /* * We hit the EOF prematurely. * Perhaps the file was externally truncated. */ if (cnt == 0) { msg = "Transfer finished due to " "premature end of file."; break; } } ENDXFER; reply(226, "%s", msg); return (0); } oldway: if ((buf = malloc(blksize)) == NULL) { ENDXFER; reply(451, "Ran out of memory."); return (-1); } for (;;) { int cnt, len; char *bp; cnt = read(filefd, buf, blksize); CHECKOOB(free(buf); return (-1)) else if (cnt < 0) { free(buf); goto file_err; } if (cnt < 0) /* resume after OOB */ continue; if (cnt == 0) /* EOF */ break; for (len = cnt, bp = buf; len > 0;) { cnt = write(netfd, bp, len); CHECKOOB(free(buf); return (-1)) else if (cnt < 0) { free(buf); goto data_err; } if (cnt <= 0) continue; len -= cnt; bp += cnt; byte_count += cnt; } } ENDXFER; free(buf); reply(226, "Transfer complete."); return (0); default: ENDXFER; reply(550, "Unimplemented TYPE %d in send_data.", type); return (-1); } data_err: ENDXFER; perror_reply(426, "Data connection"); return (-1); file_err: ENDXFER; perror_reply(551, "Error on input file"); return (-1); } /* * Transfer data from peer to "outstr" using the appropriate encapulation of * the data subject to Mode, Structure, and Type. * * N.B.: Form isn't handled. */ static int receive_data(FILE *instr, FILE *outstr) { int c, cp; int bare_lfs = 0; STARTXFER; switch (type) { case TYPE_I: case TYPE_L: for (;;) { int cnt, len; char *bp; char buf[BUFSIZ]; cnt = read(fileno(instr), buf, sizeof(buf)); CHECKOOB(return (-1)) else if (cnt < 0) goto data_err; if (cnt < 0) /* resume after OOB */ continue; if (cnt == 0) /* EOF */ break; for (len = cnt, bp = buf; len > 0;) { cnt = write(fileno(outstr), bp, len); CHECKOOB(return (-1)) else if (cnt < 0) goto file_err; if (cnt <= 0) continue; len -= cnt; bp += cnt; byte_count += cnt; } } ENDXFER; return (0); case TYPE_E: ENDXFER; reply(553, "TYPE E not implemented."); return (-1); case TYPE_A: cp = EOF; for (;;) { c = getc(instr); CHECKOOB(return (-1)) else if (c == EOF && ferror(instr)) goto data_err; if (c == EOF && ferror(instr)) { /* resume after OOB */ clearerr(instr); continue; } if (cp == '\r') { if (c != '\n') FTPD_PUTC('\r', outstr, file_err); } else if (c == '\n') bare_lfs++; if (c == '\r') { byte_count++; cp = c; continue; } /* Check for EOF here in order not to lose last \r. */ if (c == EOF) { if (feof(instr)) /* EOF */ break; syslog(LOG_ERR, "Internal: impossible condition" " on data stream after getc()"); goto data_err; } byte_count++; FTPD_PUTC(c, outstr, file_err); cp = c; } #ifdef notyet /* BSD stdio isn't ready for that */ while (fflush(outstr) == EOF) { CHECKOOB(return (-1)) else goto file_err; clearerr(outstr); } ENDXFER; #else ENDXFER; if (fflush(outstr) == EOF) goto file_err; #endif if (bare_lfs) { lreply(226, "WARNING! %d bare linefeeds received in ASCII mode.", bare_lfs); (void)printf(" File may not have transferred correctly.\r\n"); } return (0); default: ENDXFER; reply(550, "Unimplemented TYPE %d in receive_data.", type); return (-1); } data_err: ENDXFER; perror_reply(426, "Data connection"); return (-1); file_err: ENDXFER; perror_reply(452, "Error writing to file"); return (-1); } void statfilecmd(char *filename) { FILE *fin; int atstart; int c, code; char line[LINE_MAX]; struct stat st; code = lstat(filename, &st) == 0 && S_ISDIR(st.st_mode) ? 212 : 213; (void)snprintf(line, sizeof(line), _PATH_LS " -lgA %s", filename); fin = ftpd_popen(line, "r"); if (fin == NULL) { perror_reply(551, filename); return; } lreply(code, "Status of %s:", filename); atstart = 1; while ((c = getc(fin)) != EOF) { if (c == '\n') { if (ferror(stdout)){ perror_reply(421, "Control connection"); (void) ftpd_pclose(fin); dologout(1); /* NOTREACHED */ } if (ferror(fin)) { perror_reply(551, filename); (void) ftpd_pclose(fin); return; } (void) putc('\r', stdout); } /* * RFC 959 says neutral text should be prepended before * a leading 3-digit number followed by whitespace, but * many ftp clients can be confused by any leading digits, * as a matter of fact. */ if (atstart && isdigit(c)) (void) putc(' ', stdout); (void) putc(c, stdout); atstart = (c == '\n'); } (void) ftpd_pclose(fin); reply(code, "End of status."); } void statcmd(void) { union sockunion *su; u_char *a, *p; char hname[NI_MAXHOST]; int ispassive; if (hostinfo) { lreply(211, "%s FTP server status:", hostname); printf(" %s\r\n", version); } else lreply(211, "FTP server status:"); printf(" Connected to %s", remotehost); if (!getnameinfo((struct sockaddr *)&his_addr, his_addr.su_len, hname, sizeof(hname) - 1, NULL, 0, NI_NUMERICHOST)) { hname[sizeof(hname) - 1] = 0; if (strcmp(hname, remotehost) != 0) printf(" (%s)", hname); } printf("\r\n"); if (logged_in) { if (guest) printf(" Logged in anonymously\r\n"); else printf(" Logged in as %s\r\n", pw->pw_name); } else if (askpasswd) printf(" Waiting for password\r\n"); else printf(" Waiting for user name\r\n"); printf(" TYPE: %s", typenames[type]); if (type == TYPE_A || type == TYPE_E) printf(", FORM: %s", formnames[form]); if (type == TYPE_L) #if CHAR_BIT == 8 printf(" %d", CHAR_BIT); #else printf(" %d", bytesize); /* need definition! */ #endif printf("; STRUcture: %s; transfer MODE: %s\r\n", strunames[stru], modenames[mode]); if (data != -1) printf(" Data connection open\r\n"); else if (pdata != -1) { ispassive = 1; su = &pasv_addr; goto printaddr; } else if (usedefault == 0) { ispassive = 0; su = &data_dest; printaddr: #define UC(b) (((int) b) & 0xff) if (epsvall) { printf(" EPSV only mode (EPSV ALL)\r\n"); goto epsvonly; } /* PORT/PASV */ if (su->su_family == AF_INET) { a = (u_char *) &su->su_sin.sin_addr; p = (u_char *) &su->su_sin.sin_port; printf(" %s (%d,%d,%d,%d,%d,%d)\r\n", ispassive ? "PASV" : "PORT", UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), UC(p[0]), UC(p[1])); } /* LPRT/LPSV */ { int alen, af, i; switch (su->su_family) { case AF_INET: a = (u_char *) &su->su_sin.sin_addr; p = (u_char *) &su->su_sin.sin_port; alen = sizeof(su->su_sin.sin_addr); af = 4; break; case AF_INET6: a = (u_char *) &su->su_sin6.sin6_addr; p = (u_char *) &su->su_sin6.sin6_port; alen = sizeof(su->su_sin6.sin6_addr); af = 6; break; default: af = 0; break; } if (af) { printf(" %s (%d,%d,", ispassive ? "LPSV" : "LPRT", af, alen); for (i = 0; i < alen; i++) printf("%d,", UC(a[i])); printf("%d,%d,%d)\r\n", 2, UC(p[0]), UC(p[1])); } } epsvonly:; /* EPRT/EPSV */ { int af; switch (su->su_family) { case AF_INET: af = 1; break; case AF_INET6: af = 2; break; default: af = 0; break; } if (af) { union sockunion tmp; tmp = *su; if (tmp.su_family == AF_INET6) tmp.su_sin6.sin6_scope_id = 0; if (!getnameinfo((struct sockaddr *)&tmp, tmp.su_len, hname, sizeof(hname) - 1, NULL, 0, NI_NUMERICHOST)) { hname[sizeof(hname) - 1] = 0; printf(" %s |%d|%s|%d|\r\n", ispassive ? "EPSV" : "EPRT", af, hname, htons(tmp.su_port)); } } } #undef UC } else printf(" No data connection\r\n"); reply(211, "End of status."); } void fatalerror(char *s) { reply(451, "Error in server: %s", s); reply(221, "Closing connection due to server error."); dologout(0); /* NOTREACHED */ } void reply(int n, const char *fmt, ...) { va_list ap; (void)printf("%d ", n); va_start(ap, fmt); (void)vprintf(fmt, ap); va_end(ap); (void)printf("\r\n"); (void)fflush(stdout); if (ftpdebug) { syslog(LOG_DEBUG, "<--- %d ", n); va_start(ap, fmt); vsyslog(LOG_DEBUG, fmt, ap); va_end(ap); } } void lreply(int n, const char *fmt, ...) { va_list ap; (void)printf("%d- ", n); va_start(ap, fmt); (void)vprintf(fmt, ap); va_end(ap); (void)printf("\r\n"); (void)fflush(stdout); if (ftpdebug) { syslog(LOG_DEBUG, "<--- %d- ", n); va_start(ap, fmt); vsyslog(LOG_DEBUG, fmt, ap); va_end(ap); } } static void ack(char *s) { reply(250, "%s command successful.", s); } void nack(char *s) { reply(502, "%s command not implemented.", s); } /* ARGSUSED */ void yyerror(char *s) { char *cp; if ((cp = strchr(cbuf,'\n'))) *cp = '\0'; reply(500, "%s: command not understood.", cbuf); } void delete(char *name) { struct stat st; LOGCMD("delete", name); if (lstat(name, &st) < 0) { perror_reply(550, name); return; } if (S_ISDIR(st.st_mode)) { if (rmdir(name) < 0) { perror_reply(550, name); return; } goto done; } if (guest && noguestmod) { reply(550, "Operation not permitted."); return; } if (unlink(name) < 0) { perror_reply(550, name); return; } done: ack("DELE"); } void cwd(char *path) { if (chdir(path) < 0) perror_reply(550, path); else ack("CWD"); } void makedir(char *name) { char *s; LOGCMD("mkdir", name); if (guest && noguestmkd) reply(550, "Operation not permitted."); else if (mkdir(name, 0777) < 0) perror_reply(550, name); else { if ((s = doublequote(name)) == NULL) fatalerror("Ran out of memory."); reply(257, "\"%s\" directory created.", s); free(s); } } void removedir(char *name) { LOGCMD("rmdir", name); if (rmdir(name) < 0) perror_reply(550, name); else ack("RMD"); } void pwd(void) { char *s, path[MAXPATHLEN + 1]; if (getcwd(path, sizeof(path)) == NULL) perror_reply(550, "Get current directory"); else { if ((s = doublequote(path)) == NULL) fatalerror("Ran out of memory."); reply(257, "\"%s\" is current directory.", s); free(s); } } char * renamefrom(char *name) { struct stat st; if (guest && noguestmod) { reply(550, "Operation not permitted."); return (NULL); } if (lstat(name, &st) < 0) { perror_reply(550, name); return (NULL); } reply(350, "File exists, ready for destination name."); return (name); } void renamecmd(char *from, char *to) { struct stat st; LOGCMD2("rename", from, to); if (guest && (stat(to, &st) == 0)) { reply(550, "%s: permission denied.", to); return; } if (rename(from, to) < 0) perror_reply(550, "rename"); else ack("RNTO"); } static void dolog(struct sockaddr *who) { char who_name[NI_MAXHOST]; realhostname_sa(remotehost, sizeof(remotehost) - 1, who, who->sa_len); remotehost[sizeof(remotehost) - 1] = 0; if (getnameinfo(who, who->sa_len, who_name, sizeof(who_name) - 1, NULL, 0, NI_NUMERICHOST)) *who_name = 0; who_name[sizeof(who_name) - 1] = 0; #ifdef SETPROCTITLE #ifdef VIRTUAL_HOSTING if (thishost != firsthost) snprintf(proctitle, sizeof(proctitle), "%s: connected (to %s)", remotehost, hostname); else #endif snprintf(proctitle, sizeof(proctitle), "%s: connected", remotehost); setproctitle("%s", proctitle); #endif /* SETPROCTITLE */ if (logging) { #ifdef VIRTUAL_HOSTING if (thishost != firsthost) syslog(LOG_INFO, "connection from %s (%s) to %s", remotehost, who_name, hostname); else #endif syslog(LOG_INFO, "connection from %s (%s)", remotehost, who_name); } } /* * Record logout in wtmp file * and exit with supplied status. */ void dologout(int status) { if (logged_in && dowtmp) { (void) seteuid(0); ftpd_logwtmp(wtmpid, NULL, NULL); } /* beware of flushing buffers after a SIGPIPE */ _exit(status); } static void sigurg(int signo) { recvurg = 1; } static void maskurg(int flag) { int oerrno; sigset_t sset; if (!transflag) { syslog(LOG_ERR, "Internal: maskurg() while no transfer"); return; } oerrno = errno; sigemptyset(&sset); sigaddset(&sset, SIGURG); sigprocmask(flag ? SIG_BLOCK : SIG_UNBLOCK, &sset, NULL); errno = oerrno; } static void flagxfer(int flag) { if (flag) { if (transflag) syslog(LOG_ERR, "Internal: flagxfer(1): " "transfer already under way"); transflag = 1; maskurg(0); recvurg = 0; } else { if (!transflag) syslog(LOG_ERR, "Internal: flagxfer(0): " "no active transfer"); maskurg(1); transflag = 0; } } /* * Returns 0 if OK to resume or -1 if abort requested. */ static int myoob(void) { char *cp; int ret; if (!transflag) { syslog(LOG_ERR, "Internal: myoob() while no transfer"); return (0); } cp = tmpline; ret = get_line(cp, 7, stdin); if (ret == -1) { reply(221, "You could at least say goodbye."); dologout(0); } else if (ret == -2) { /* Ignore truncated command. */ return (0); } upper(cp); if (strcmp(cp, "ABOR\r\n") == 0) { tmpline[0] = '\0'; reply(426, "Transfer aborted. Data connection closed."); reply(226, "Abort successful."); return (-1); } if (strcmp(cp, "STAT\r\n") == 0) { tmpline[0] = '\0'; if (file_size != -1) reply(213, "Status: %jd of %jd bytes transferred.", (intmax_t)byte_count, (intmax_t)file_size); else reply(213, "Status: %jd bytes transferred.", (intmax_t)byte_count); } return (0); } /* * Note: a response of 425 is not mentioned as a possible response to * the PASV command in RFC959. However, it has been blessed as * a legitimate response by Jon Postel in a telephone conversation * with Rick Adams on 25 Jan 89. */ void passive(void) { socklen_t len; int on; char *p, *a; if (pdata >= 0) /* close old port if one set */ close(pdata); pdata = socket(ctrl_addr.su_family, SOCK_STREAM, 0); if (pdata < 0) { perror_reply(425, "Can't open passive connection"); return; } on = 1; if (setsockopt(pdata, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "pdata setsockopt (SO_REUSEADDR): %m"); (void) seteuid(0); #ifdef IP_PORTRANGE if (ctrl_addr.su_family == AF_INET) { on = restricted_data_ports ? IP_PORTRANGE_HIGH : IP_PORTRANGE_DEFAULT; if (setsockopt(pdata, IPPROTO_IP, IP_PORTRANGE, &on, sizeof(on)) < 0) goto pasv_error; } #endif #ifdef IPV6_PORTRANGE if (ctrl_addr.su_family == AF_INET6) { on = restricted_data_ports ? IPV6_PORTRANGE_HIGH : IPV6_PORTRANGE_DEFAULT; if (setsockopt(pdata, IPPROTO_IPV6, IPV6_PORTRANGE, &on, sizeof(on)) < 0) goto pasv_error; } #endif pasv_addr = ctrl_addr; pasv_addr.su_port = 0; if (bind(pdata, (struct sockaddr *)&pasv_addr, pasv_addr.su_len) < 0) goto pasv_error; (void) seteuid(pw->pw_uid); len = sizeof(pasv_addr); if (getsockname(pdata, (struct sockaddr *) &pasv_addr, &len) < 0) goto pasv_error; if (listen(pdata, 1) < 0) goto pasv_error; if (pasv_addr.su_family == AF_INET) a = (char *) &pasv_addr.su_sin.sin_addr; else if (pasv_addr.su_family == AF_INET6 && IN6_IS_ADDR_V4MAPPED(&pasv_addr.su_sin6.sin6_addr)) a = (char *) &pasv_addr.su_sin6.sin6_addr.s6_addr[12]; else goto pasv_error; p = (char *) &pasv_addr.su_port; #define UC(b) (((int) b) & 0xff) reply(227, "Entering Passive Mode (%d,%d,%d,%d,%d,%d)", UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), UC(p[0]), UC(p[1])); return; pasv_error: (void) seteuid(pw->pw_uid); (void) close(pdata); pdata = -1; perror_reply(425, "Can't open passive connection"); return; } /* * Long Passive defined in RFC 1639. * 228 Entering Long Passive Mode * (af, hal, h1, h2, h3,..., pal, p1, p2...) */ void long_passive(char *cmd, int pf) { socklen_t len; int on; char *p, *a; if (pdata >= 0) /* close old port if one set */ close(pdata); if (pf != PF_UNSPEC) { if (ctrl_addr.su_family != pf) { switch (ctrl_addr.su_family) { case AF_INET: pf = 1; break; case AF_INET6: pf = 2; break; default: pf = 0; break; } /* * XXX * only EPRT/EPSV ready clients will understand this */ if (strcmp(cmd, "EPSV") == 0 && pf) { reply(522, "Network protocol mismatch, " "use (%d)", pf); } else reply(501, "Network protocol mismatch."); /*XXX*/ return; } } pdata = socket(ctrl_addr.su_family, SOCK_STREAM, 0); if (pdata < 0) { perror_reply(425, "Can't open passive connection"); return; } on = 1; if (setsockopt(pdata, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "pdata setsockopt (SO_REUSEADDR): %m"); (void) seteuid(0); pasv_addr = ctrl_addr; pasv_addr.su_port = 0; len = pasv_addr.su_len; #ifdef IP_PORTRANGE if (ctrl_addr.su_family == AF_INET) { on = restricted_data_ports ? IP_PORTRANGE_HIGH : IP_PORTRANGE_DEFAULT; if (setsockopt(pdata, IPPROTO_IP, IP_PORTRANGE, &on, sizeof(on)) < 0) goto pasv_error; } #endif #ifdef IPV6_PORTRANGE if (ctrl_addr.su_family == AF_INET6) { on = restricted_data_ports ? IPV6_PORTRANGE_HIGH : IPV6_PORTRANGE_DEFAULT; if (setsockopt(pdata, IPPROTO_IPV6, IPV6_PORTRANGE, &on, sizeof(on)) < 0) goto pasv_error; } #endif if (bind(pdata, (struct sockaddr *)&pasv_addr, len) < 0) goto pasv_error; (void) seteuid(pw->pw_uid); if (getsockname(pdata, (struct sockaddr *) &pasv_addr, &len) < 0) goto pasv_error; if (listen(pdata, 1) < 0) goto pasv_error; #define UC(b) (((int) b) & 0xff) if (strcmp(cmd, "LPSV") == 0) { p = (char *)&pasv_addr.su_port; switch (pasv_addr.su_family) { case AF_INET: a = (char *) &pasv_addr.su_sin.sin_addr; v4_reply: reply(228, "Entering Long Passive Mode (%d,%d,%d,%d,%d,%d,%d,%d,%d)", 4, 4, UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), 2, UC(p[0]), UC(p[1])); return; case AF_INET6: if (IN6_IS_ADDR_V4MAPPED(&pasv_addr.su_sin6.sin6_addr)) { a = (char *) &pasv_addr.su_sin6.sin6_addr.s6_addr[12]; goto v4_reply; } a = (char *) &pasv_addr.su_sin6.sin6_addr; reply(228, "Entering Long Passive Mode " "(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)", 6, 16, UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), UC(a[4]), UC(a[5]), UC(a[6]), UC(a[7]), UC(a[8]), UC(a[9]), UC(a[10]), UC(a[11]), UC(a[12]), UC(a[13]), UC(a[14]), UC(a[15]), 2, UC(p[0]), UC(p[1])); return; } } else if (strcmp(cmd, "EPSV") == 0) { switch (pasv_addr.su_family) { case AF_INET: case AF_INET6: reply(229, "Entering Extended Passive Mode (|||%d|)", ntohs(pasv_addr.su_port)); return; } } else { /* more proper error code? */ } pasv_error: (void) seteuid(pw->pw_uid); (void) close(pdata); pdata = -1; perror_reply(425, "Can't open passive connection"); return; } /* * Generate unique name for file with basename "local" * and open the file in order to avoid possible races. * Try "local" first, then "local.1", "local.2" etc, up to "local.99". * Return descriptor to the file, set "name" to its name. * * Generates failure reply on error. */ static int guniquefd(char *local, char **name) { static char new[MAXPATHLEN]; struct stat st; char *cp; int count; int fd; cp = strrchr(local, '/'); if (cp) *cp = '\0'; if (stat(cp ? local : ".", &st) < 0) { perror_reply(553, cp ? local : "."); return (-1); } if (cp) { /* * Let not overwrite dirname with counter suffix. * -4 is for /nn\0 * In this extreme case dot won't be put in front of suffix. */ if (strlen(local) > sizeof(new) - 4) { reply(553, "Pathname too long."); return (-1); } *cp = '/'; } /* -4 is for the .nn we put on the end below */ (void) snprintf(new, sizeof(new) - 4, "%s", local); cp = new + strlen(new); /* * Don't generate dotfile unless requested explicitly. * This covers the case when basename gets truncated off * by buffer size. */ if (cp > new && cp[-1] != '/') *cp++ = '.'; for (count = 0; count < 100; count++) { /* At count 0 try unmodified name */ if (count) (void)sprintf(cp, "%d", count); if ((fd = open(count ? new : local, O_RDWR | O_CREAT | O_EXCL, 0666)) >= 0) { *name = count ? new : local; return (fd); } if (errno != EEXIST) { perror_reply(553, count ? new : local); return (-1); } } reply(452, "Unique file name cannot be created."); return (-1); } /* * Format and send reply containing system error number. */ void perror_reply(int code, char *string) { reply(code, "%s: %s.", string, strerror(errno)); } static char *onefile[] = { "", 0 }; void send_file_list(char *whichf) { struct stat st; DIR *dirp = NULL; struct dirent *dir; FILE *dout = NULL; char **dirlist, *dirname; int simple = 0; int freeglob = 0; glob_t gl; if (strpbrk(whichf, "~{[*?") != NULL) { int flags = GLOB_BRACE|GLOB_NOCHECK|GLOB_TILDE; memset(&gl, 0, sizeof(gl)); gl.gl_matchc = MAXGLOBARGS; flags |= GLOB_LIMIT; freeglob = 1; if (glob(whichf, flags, 0, &gl)) { reply(550, "No matching files found."); goto out; } else if (gl.gl_pathc == 0) { errno = ENOENT; perror_reply(550, whichf); goto out; } dirlist = gl.gl_pathv; } else { onefile[0] = whichf; dirlist = onefile; simple = 1; } while ((dirname = *dirlist++)) { if (stat(dirname, &st) < 0) { /* * If user typed "ls -l", etc, and the client * used NLST, do what the user meant. */ if (dirname[0] == '-' && *dirlist == NULL && dout == NULL) retrieve(_PATH_LS " %s", dirname); else perror_reply(550, whichf); goto out; } if (S_ISREG(st.st_mode)) { if (dout == NULL) { dout = dataconn("file list", -1, "w"); if (dout == NULL) goto out; STARTXFER; } START_UNSAFE; fprintf(dout, "%s%s\n", dirname, type == TYPE_A ? "\r" : ""); END_UNSAFE; if (ferror(dout)) goto data_err; byte_count += strlen(dirname) + (type == TYPE_A ? 2 : 1); CHECKOOB(goto abrt); continue; } else if (!S_ISDIR(st.st_mode)) continue; if ((dirp = opendir(dirname)) == NULL) continue; while ((dir = readdir(dirp)) != NULL) { char nbuf[MAXPATHLEN]; CHECKOOB(goto abrt); if (dir->d_name[0] == '.' && dir->d_namlen == 1) continue; if (dir->d_name[0] == '.' && dir->d_name[1] == '.' && dir->d_namlen == 2) continue; snprintf(nbuf, sizeof(nbuf), "%s/%s", dirname, dir->d_name); /* * We have to do a stat to insure it's * not a directory or special file. */ if (simple || (stat(nbuf, &st) == 0 && S_ISREG(st.st_mode))) { if (dout == NULL) { dout = dataconn("file list", -1, "w"); if (dout == NULL) goto out; STARTXFER; } START_UNSAFE; if (nbuf[0] == '.' && nbuf[1] == '/') fprintf(dout, "%s%s\n", &nbuf[2], type == TYPE_A ? "\r" : ""); else fprintf(dout, "%s%s\n", nbuf, type == TYPE_A ? "\r" : ""); END_UNSAFE; if (ferror(dout)) goto data_err; byte_count += strlen(nbuf) + (type == TYPE_A ? 2 : 1); CHECKOOB(goto abrt); } } (void) closedir(dirp); dirp = NULL; } if (dout == NULL) reply(550, "No files found."); else if (ferror(dout)) data_err: perror_reply(550, "Data connection"); else reply(226, "Transfer complete."); out: if (dout) { ENDXFER; abrt: (void) fclose(dout); data = -1; pdata = -1; } if (dirp) (void) closedir(dirp); if (freeglob) { freeglob = 0; globfree(&gl); } } void reapchild(int signo) { while (waitpid(-1, NULL, WNOHANG) > 0); } #ifdef OLD_SETPROCTITLE /* * Clobber argv so ps will show what we're doing. (Stolen from sendmail.) * Warning, since this is usually started from inetd.conf, it often doesn't * have much of an environment or arglist to overwrite. */ void setproctitle(const char *fmt, ...) { int i; va_list ap; char *p, *bp, ch; char buf[LINE_MAX]; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); /* make ps print our process name */ p = Argv[0]; *p++ = '-'; i = strlen(buf); if (i > LastArgv - p - 2) { i = LastArgv - p - 2; buf[i] = '\0'; } bp = buf; while (ch = *bp++) if (ch != '\n' && ch != '\r') *p++ = ch; while (p < LastArgv) *p++ = ' '; } #endif /* OLD_SETPROCTITLE */ static void appendf(char **strp, char *fmt, ...) { va_list ap; char *ostr, *p; va_start(ap, fmt); vasprintf(&p, fmt, ap); va_end(ap); if (p == NULL) fatalerror("Ran out of memory."); if (*strp == NULL) *strp = p; else { ostr = *strp; asprintf(strp, "%s%s", ostr, p); if (*strp == NULL) fatalerror("Ran out of memory."); free(ostr); } } static void logcmd(char *cmd, char *file1, char *file2, off_t cnt) { char *msg = NULL; char wd[MAXPATHLEN + 1]; if (logging <= 1) return; if (getcwd(wd, sizeof(wd) - 1) == NULL) strcpy(wd, strerror(errno)); appendf(&msg, "%s", cmd); if (file1) appendf(&msg, " %s", file1); if (file2) appendf(&msg, " %s", file2); if (cnt >= 0) appendf(&msg, " = %jd bytes", (intmax_t)cnt); appendf(&msg, " (wd: %s", wd); if (guest || dochroot) appendf(&msg, "; chrooted"); appendf(&msg, ")"); syslog(LOG_INFO, "%s", msg); free(msg); } static void logxfer(char *name, off_t size, time_t start) { char buf[MAXPATHLEN + 1024]; char path[MAXPATHLEN + 1]; time_t now; if (statfd >= 0) { time(&now); if (realpath(name, path) == NULL) { syslog(LOG_NOTICE, "realpath failed on %s: %m", path); return; } snprintf(buf, sizeof(buf), "%.20s!%s!%s!%s!%jd!%ld\n", ctime(&now)+4, ident, remotehost, path, (intmax_t)size, (long)(now - start + (now == start))); write(statfd, buf, strlen(buf)); } } static char * doublequote(char *s) { int n; char *p, *s2; for (p = s, n = 0; *p; p++) if (*p == '"') n++; if ((s2 = malloc(p - s + n + 1)) == NULL) return (NULL); for (p = s2; *s; s++, p++) { if ((*p = *s) == '"') *(++p) = '"'; } *p = '\0'; return (s2); } /* setup server socket for specified address family */ /* if af is PF_UNSPEC more than one socket may be returned */ /* the returned list is dynamically allocated, so caller needs to free it */ static int * socksetup(int af, char *bindname, const char *bindport) { struct addrinfo hints, *res, *r; int error, maxs, *s, *socks; const int on = 1; memset(&hints, 0, sizeof(hints)); hints.ai_flags = AI_PASSIVE; hints.ai_family = af; hints.ai_socktype = SOCK_STREAM; error = getaddrinfo(bindname, bindport, &hints, &res); if (error) { syslog(LOG_ERR, "%s", gai_strerror(error)); if (error == EAI_SYSTEM) syslog(LOG_ERR, "%s", strerror(errno)); return NULL; } /* Count max number of sockets we may open */ for (maxs = 0, r = res; r; r = r->ai_next, maxs++) ; socks = malloc((maxs + 1) * sizeof(int)); if (!socks) { freeaddrinfo(res); syslog(LOG_ERR, "couldn't allocate memory for sockets"); return NULL; } *socks = 0; /* num of sockets counter at start of array */ s = socks + 1; for (r = res; r; r = r->ai_next) { *s = socket(r->ai_family, r->ai_socktype, r->ai_protocol); if (*s < 0) { syslog(LOG_DEBUG, "control socket: %m"); continue; } if (setsockopt(*s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "control setsockopt (SO_REUSEADDR): %m"); if (r->ai_family == AF_INET6) { if (setsockopt(*s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) syslog(LOG_WARNING, "control setsockopt (IPV6_V6ONLY): %m"); } if (bind(*s, r->ai_addr, r->ai_addrlen) < 0) { syslog(LOG_DEBUG, "control bind: %m"); close(*s); continue; } (*socks)++; s++; } if (res) freeaddrinfo(res); if (*socks == 0) { syslog(LOG_ERR, "control socket: Couldn't bind to any socket"); free(socks); return NULL; } return(socks); } Index: projects/vnet/share/man/man4/ddb.4 =================================================================== --- projects/vnet/share/man/man4/ddb.4 (revision 301522) +++ projects/vnet/share/man/man4/ddb.4 (revision 301523) @@ -1,1542 +1,1553 @@ .\" .\" Mach Operating System .\" Copyright (c) 1991,1990 Carnegie Mellon University .\" Copyright (c) 2007 Robert N. M. Watson .\" All Rights Reserved. .\" .\" Permission to use, copy, modify and distribute this software and its .\" documentation is hereby granted, provided that both the copyright .\" notice and this permission notice appear in all copies of the .\" software, derivative works or modified versions, and any portions .\" thereof, and that both notices appear in supporting documentation. .\" .\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" .\" CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR .\" ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. .\" .\" Carnegie Mellon requests users of this software to return to .\" .\" Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU .\" School of Computer Science .\" Carnegie Mellon University .\" Pittsburgh PA 15213-3890 .\" .\" any improvements or extensions that they make and grant Carnegie Mellon .\" the rights to redistribute these changes. .\" .\" changed a \# to #, since groff choked on it. .\" .\" HISTORY .\" ddb.4,v .\" Revision 1.1 1993/07/15 18:41:02 brezak .\" Man page for DDB .\" .\" Revision 2.6 92/04/08 08:52:57 rpd .\" Changes from OSF. .\" [92/01/17 14:19:22 jsb] .\" Changes for OSF debugger modifications. .\" [91/12/12 tak] .\" .\" Revision 2.5 91/06/25 13:50:22 rpd .\" Added some watchpoint explanation. .\" [91/06/25 rpd] .\" .\" Revision 2.4 91/06/17 15:47:31 jsb .\" Added documentation for continue/c, match, search, and watchpoints. .\" I've not actually explained what a watchpoint is; maybe Rich can .\" do that (hint, hint). .\" [91/06/17 10:58:08 jsb] .\" .\" Revision 2.3 91/05/14 17:04:23 mrt .\" Correcting copyright .\" .\" Revision 2.2 91/02/14 14:10:06 mrt .\" Changed to new Mach copyright .\" [91/02/12 18:10:12 mrt] .\" .\" Revision 2.2 90/08/30 14:23:15 dbg .\" Created. .\" [90/08/30 dbg] .\" .\" $FreeBSD$ .\" -.Dd May 18, 2016 +.Dd June 6, 2016 .Dt DDB 4 .Os .Sh NAME .Nm ddb .Nd interactive kernel debugger .Sh SYNOPSIS In order to enable kernel debugging facilities include: .Bd -ragged -offset indent .Cd options KDB .Cd options DDB .Ed .Pp To prevent activation of the debugger on kernel .Xr panic 9 : .Bd -ragged -offset indent .Cd options KDB_UNATTENDED .Ed .Pp In order to print a stack trace of the current thread on the console for a panic: .Bd -ragged -offset indent .Cd options KDB_TRACE .Ed .Pp To print the numerical value of symbols in addition to the symbolic representation, define: .Bd -ragged -offset indent .Cd options DDB_NUMSYM .Ed .Pp To enable the .Xr gdb 1 backend, so that remote debugging with .Xr kgdb 1 is possible, include: .Bd -ragged -offset indent .Cd options GDB .Ed .Sh DESCRIPTION The .Nm kernel debugger is an interactive debugger with a syntax inspired by .Xr gdb 1 . If linked into the running kernel, it can be invoked locally with the .Ql debug .Xr keymap 5 action. The debugger is also invoked on kernel .Xr panic 9 if the .Va debug.debugger_on_panic .Xr sysctl 8 MIB variable is set non-zero, which is the default unless the .Dv KDB_UNATTENDED option is specified. .Pp The current location is called .Va dot . The .Va dot is displayed with a hexadecimal format at a prompt. The commands .Ic examine and .Ic write update .Va dot to the address of the last line examined or the last location modified, and set .Va next to the address of the next location to be examined or changed. Other commands do not change .Va dot , and set .Va next to be the same as .Va dot . .Pp The general command syntax is: .Ar command Ns Op Li / Ns Ar modifier .Ar address Ns Op Li , Ns Ar count .Pp A blank line repeats the previous command from the address .Va next with count 1 and no modifiers. Specifying .Ar address sets .Va dot to the address. Omitting .Ar address uses .Va dot . A missing .Ar count is taken to be 1 for printing commands or infinity for stack traces. .Pp The .Nm debugger has a pager feature (like the .Xr more 1 command) for the output. If an output line exceeds the number set in the .Va lines variable, it displays .Dq Li --More-- and waits for a response. The valid responses for it are: .Pp .Bl -tag -compact -width ".Li SPC" .It Li SPC one more page .It Li RET one more line .It Li q abort the current command, and return to the command input mode .El .Pp Finally, .Nm provides a small (currently 10 items) command history, and offers simple .Nm emacs Ns -style command line editing capabilities. In addition to the .Nm emacs control keys, the usual .Tn ANSI arrow keys may be used to browse through the history buffer, and move the cursor within the current line. .Sh COMMANDS .Bl -tag -width indent -compact .It Ic examine .It Ic x Display the addressed locations according to the formats in the modifier. Multiple modifier formats display multiple locations. If no format is specified, the last format specified for this command is used. .Pp The format characters are: .Bl -tag -compact -width indent .It Cm b look at by bytes (8 bits) .It Cm h look at by half words (16 bits) .It Cm l look at by long words (32 bits) .It Cm g look at by quad words (64 bits) .It Cm a print the location being displayed .It Cm A print the location with a line number if possible .It Cm x display in unsigned hex .It Cm z display in signed hex .It Cm o display in unsigned octal .It Cm d display in signed decimal .It Cm u display in unsigned decimal .It Cm r display in current radix, signed .It Cm c display low 8 bits as a character. Non-printing characters are displayed as an octal escape code (e.g., .Ql \e000 ) . .It Cm s display the null-terminated string at the location. Non-printing characters are displayed as octal escapes. .It Cm m display in unsigned hex with character dump at the end of each line. The location is also displayed in hex at the beginning of each line. .It Cm i display as an instruction .It Cm I display as an instruction with possible alternate formats depending on the machine, but none of the supported architectures have an alternate format. .It Cm S display a symbol name for the pointer stored at the address .El .Pp .It Ic xf Examine forward: execute an .Ic examine command with the last specified parameters to it except that the next address displayed by it is used as the start address. .Pp .It Ic xb Examine backward: execute an .Ic examine command with the last specified parameters to it except that the last start address subtracted by the size displayed by it is used as the start address. .Pp .It Ic print Ns Op Li / Ns Cm acdoruxz .It Ic p Ns Op Li / Ns Cm acdoruxz Print .Ar addr Ns s according to the modifier character (as described above for .Cm examine ) . Valid formats are: .Cm a , x , z , o , d , u , r , and .Cm c . If no modifier is specified, the last one specified to it is used. The argument .Ar addr can be a string, in which case it is printed as it is. For example: .Bd -literal -offset indent print/x "eax = " $eax "\enecx = " $ecx "\en" .Ed .Pp will print like: .Bd -literal -offset indent eax = xxxxxx ecx = yyyyyy .Ed .Pp .It Xo .Ic write Ns Op Li / Ns Cm bhl .Ar addr expr1 Op Ar expr2 ... .Xc .It Xo .Ic w Ns Op Li / Ns Cm bhl .Ar addr expr1 Op Ar expr2 ... .Xc Write the expressions specified after .Ar addr on the command line at succeeding locations starting with .Ar addr . The write unit size can be specified in the modifier with a letter .Cm b (byte), .Cm h (half word) or .Cm l (long word) respectively. If omitted, long word is assumed. .Pp .Sy Warning : since there is no delimiter between expressions, strange things may happen. It is best to enclose each expression in parentheses. .Pp .It Ic set Li $ Ns Ar variable Oo Li = Oc Ar expr Set the named variable or register with the value of .Ar expr . Valid variable names are described below. .Pp .It Ic break Ns Op Li / Ns Cm u .It Ic b Ns Op Li / Ns Cm u Set a break point at .Ar addr . If .Ar count is supplied, continues .Ar count \- 1 times before stopping at the break point. If the break point is set, a break point number is printed with .Ql # . This number can be used in deleting the break point or adding conditions to it. .Pp If the .Cm u modifier is specified, this command sets a break point in user address space. Without the .Cm u option, the address is considered to be in the kernel space, and a wrong space address is rejected with an error message. This modifier can be used only if it is supported by machine dependent routines. .Pp .Sy Warning : If a user text is shadowed by a normal user space debugger, user space break points may not work correctly. Setting a break point at the low-level code paths may also cause strange behavior. .Pp .It Ic delete Ar addr .It Ic d Ar addr .It Ic delete Li # Ns Ar number .It Ic d Li # Ns Ar number Delete the break point. The target break point can be specified by a break point number with .Ql # , or by using the same .Ar addr specified in the original .Ic break command. .Pp .It Ic watch Ar addr Ns Li , Ns Ar size Set a watchpoint for a region. Execution stops when an attempt to modify the region occurs. The .Ar size argument defaults to 4. If you specify a wrong space address, the request is rejected with an error message. .Pp .Sy Warning : Attempts to watch wired kernel memory may cause unrecoverable error in some systems such as i386. Watchpoints on user addresses work best. .Pp .It Ic hwatch Ar addr Ns Li , Ns Ar size Set a hardware watchpoint for a region if supported by the architecture. Execution stops when an attempt to modify the region occurs. The .Ar size argument defaults to 4. .Pp .Sy Warning : The hardware debug facilities do not have a concept of separate address spaces like the watch command does. Use .Ic hwatch for setting watchpoints on kernel address locations only, and avoid its use on user mode address spaces. .Pp .It Ic dhwatch Ar addr Ns Li , Ns Ar size Delete specified hardware watchpoint. .Pp .It Ic step Ns Op Li / Ns Cm p .It Ic s Ns Op Li / Ns Cm p Single step .Ar count times (the comma is a mandatory part of the syntax). If the .Cm p modifier is specified, print each instruction at each step. Otherwise, only print the last instruction. .Pp .Sy Warning : depending on machine type, it may not be possible to single-step through some low-level code paths or user space code. On machines with software-emulated single-stepping (e.g., pmax), stepping through code executed by interrupt handlers will probably do the wrong thing. .Pp .It Ic continue Ns Op Li / Ns Cm c .It Ic c Ns Op Li / Ns Cm c Continue execution until a breakpoint or watchpoint. If the .Cm c modifier is specified, count instructions while executing. Some machines (e.g., pmax) also count loads and stores. .Pp .Sy Warning : when counting, the debugger is really silently single-stepping. This means that single-stepping on low-level code may cause strange behavior. .Pp .It Ic until Ns Op Li / Ns Cm p Stop at the next call or return instruction. If the .Cm p modifier is specified, print the call nesting depth and the cumulative instruction count at each call or return. Otherwise, only print when the matching return is hit. .Pp .It Ic next Ns Op Li / Ns Cm p .It Ic match Ns Op Li / Ns Cm p Stop at the matching return instruction. If the .Cm p modifier is specified, print the call nesting depth and the cumulative instruction count at each call or return. Otherwise, only print when the matching return is hit. .Pp .It Xo .Ic trace Ns Op Li / Ns Cm u .Op Ar pid | tid .Op Li , Ns Ar count .Xc .It Xo .Ic t Ns Op Li / Ns Cm u .Op Ar pid | tid .Op Li , Ns Ar count .Xc .It Xo .Ic where Ns Op Li / Ns Cm u .Op Ar pid | tid .Op Li , Ns Ar count .Xc .It Xo .Ic bt Ns Op Li / Ns Cm u .Op Ar pid | tid .Op Li , Ns Ar count .Xc Stack trace. The .Cm u option traces user space; if omitted, .Ic trace only traces kernel space. The optional argument .Ar count is the number of frames to be traced. If .Ar count is omitted, all frames are printed. .Pp .Sy Warning : User space stack trace is valid only if the machine dependent code supports it. .Pp .It Xo .Ic search Ns Op Li / Ns Cm bhl .Ar addr .Ar value .Op Ar mask .Op Li , Ns Ar count .Xc Search memory for .Ar value . This command might fail in interesting ways if it does not find the searched-for value. This is because .Nm does not always recover from touching bad memory. The optional .Ar count argument limits the search. .\" .Pp .It Xo .Ic findstack .Ar addr .Xc Prints the thread address for a thread kernel-mode stack of which contains the specified address. If the thread is not found, search the thread stack cache and prints the cached stack address. Otherwise, prints nothing. .Pp .It Ic show Cm all procs Ns Op Li / Ns Cm m .It Ic ps Ns Op Li / Ns Cm m Display all process information. The process information may not be shown if it is not supported in the machine, or the bottom of the stack of the target process is not in the main memory at that time. The .Cm m modifier will alter the display to show VM map addresses for the process and not show other information. .\" .Pp .It Ic show Cm all ttys Show all TTY's within the system. Output is similar to .Xr pstat 8 , but also includes the address of the TTY structure. .\" .Pp .It Ic show Cm all vnets Show the same output as "show vnet" does, but lists all virtualized network stacks within the system. .\" .Pp .It Ic show Cm allchains Show the same information like "show lockchain" does, but for every thread in the system. .\" .Pp .It Ic show Cm alllocks Show all locks that are currently held. This command is only available if .Xr witness 4 is included in the kernel. .\" .Pp .It Ic show Cm allpcpu The same as "show pcpu", but for every CPU present in the system. .\" .Pp .It Ic show Cm allrman Show information related with resource management, including interrupt request lines, DMA request lines, I/O ports, I/O memory addresses, and Resource IDs. .\" .Pp .It Ic show Cm apic Dump data about APIC IDT vector mappings. .\" .Pp .It Ic show Cm breaks Show breakpoints set with the "break" command. .\" .Pp .It Ic show Cm bio Ar addr Show information about the bio structure .Vt struct bio present at .Ar addr . See the .Pa sys/bio.h header file and .Xr g_bio 9 for more details on the exact meaning of the structure fields. .\" .Pp .It Ic show Cm buffer Ar addr Show information about the buf structure .Vt struct buf present at .Ar addr . See the .Pa sys/buf.h header file for more details on the exact meaning of the structure fields. .\" .Pp +.It Ic show Cm callout Ar addr +Show information about the callout structure +.Vt struct callout +present at +.Ar addr . +.\" +.Pp .It Ic show Cm cbstat Show brief information about the TTY subsystem. .\" .Pp .It Ic show Cm cdev Without argument, show the list of all created cdev's, consisting of devfs node name and struct cdev address. When address of cdev is supplied, show some internal devfs state of the cdev. .\" .Pp .It Ic show Cm conifhk Lists hooks currently waiting for completion in run_interrupt_driven_config_hooks(). .\" .Pp .It Ic show Cm cpusets Print numbered root and assigned CPU affinity sets. See .Xr cpuset 2 for more details. .\" .Pp .It Ic show Cm cyrixreg Show registers specific to the Cyrix processor. .\" .Pp .It Ic show Cm devmap Prints the contents of the static device mapping table. Currently only available on the ARM architecture. .\" .Pp .It Ic show Cm domain Ar addr Print protocol domain structure .Vt struct domain at address .Ar addr . See the .Pa sys/domain.h header file for more details on the exact meaning of the structure fields. .\" .Pp .It Ic show Cm ffs Op Ar addr Show brief information about ffs mount at the address .Ar addr , if argument is given. Otherwise, provides the summary about each ffs mount. .\" .Pp .It Ic show Cm file Ar addr Show information about the file structure .Vt struct file present at address .Ar addr . .\" .Pp .It Ic show Cm files Show information about every file structure in the system. .\" .Pp .It Ic show Cm freepages Show the number of physical pages in each of the free lists. .\" .Pp .It Ic show Cm geom Op Ar addr If the .Ar addr argument is not given, displays the entire GEOM topology. If .Ar addr is given, displays details about the given GEOM object (class, geom, provider or consumer). .\" .Pp .It Ic show Cm idt Show IDT layout. The first column specifies the IDT vector. The second one is the name of the interrupt/trap handler. Those functions are machine dependent. .\" .Pp .It Ic show Cm inodedeps Op Ar addr Show brief information about each inodedep structure. If .Ar addr is given, only inodedeps belonging to the fs located at the supplied address are shown. .\" .Pp .It Ic show Cm inpcb Ar addr Show information on IP Control Block .Vt struct in_pcb present at .Ar addr . .\" .Pp .It Ic show Cm intr Dump information about interrupt handlers. .\" .Pp .It Ic show Cm intrcnt Dump the interrupt statistics. .\" .Pp .It Ic show Cm irqs Show interrupt lines and their respective kernel threads. .\" .Pp .It Ic show Cm jails Show the list of .Xr jail 8 instances. In addition to what .Xr jls 8 shows, also list kernel internal details. .\" .Pp .It Ic show Cm lapic Show information from the local APIC registers for this CPU. .\" .Pp .It Ic show Cm lock Ar addr Show lock structure. The output format is as follows: .Bl -tag -width "flags" .It Ic class: Class of the lock. Possible types include .Xr mutex 9 , .Xr rmlock 9 , .Xr rwlock 9 , .Xr sx 9 . .It Ic name: Name of the lock. .It Ic flags: Flags passed to the lock initialization function. For exact possibilities see manual pages of possible lock types. .It Ic state: Current state of a lock. As well as .Ic flags it's lock-specific. .It Ic owner: Lock owner. .El .\" .Pp .It Ic show Cm lockchain Ar addr Show all threads a particular thread at address .Ar addr is waiting on based on non-sleepable and non-spin locks. .\" .Pp .It Ic show Cm lockedbufs Show the same information as "show buf", but for every locked .Vt struct buf object. .\" .Pp .It Ic show Cm lockedvnods List all locked vnodes in the system. .\" .Pp .It Ic show Cm locks Prints all locks that are currently acquired. This command is only available if .Xr witness 4 is included in the kernel. .\" .Pp .It Ic show Cm locktree .\" .Pp .It Ic show Cm malloc Prints .Xr malloc 9 memory allocator statistics. The output format is as follows: .Pp .Bl -tag -compact -offset indent -width "Requests" .It Ic Type Specifies a type of memory. It is the same as a description string used while defining the given memory type with .Xr MALLOC_DECLARE 9 . .It Ic InUse Number of memory allocations of the given type, for which .Xr free 9 has not been called yet. .It Ic MemUse Total memory consumed by the given allocation type. .It Ic Requests Number of memory allocation requests for the given memory type. .El .Pp The same information can be gathered in userspace with .Dq Nm vmstat Fl m . .\" .Pp .It Ic show Cm map Ns Oo Li / Ns Cm f Oc Ar addr Prints the VM map at .Ar addr . If the .Cm f modifier is specified the complete map is printed. .\" .Pp .It Ic show Cm msgbuf Print the system's message buffer. It is the same output as in the .Dq Nm dmesg case. It is useful if you got a kernel panic, attached a serial cable to the machine and want to get the boot messages from before the system hang. .\" .It Ic show Cm mount Displays short info about all currently mounted file systems. .Pp .It Ic show Cm mount Ar addr Displays details about the given mount point. .\" .Pp .It Ic show Cm object Ns Oo Li / Ns Cm f Oc Ar addr Prints the VM object at .Ar addr . If the .Cm f option is specified the complete object is printed. +.\" +.Pp +.It Ic show Cm panic +Print the panic message if set. .\" .Pp .It Ic show Cm page Show statistics on VM pages. .\" .Pp .It Ic show Cm pageq Show statistics on VM page queues. .\" .Pp .It Ic show Cm pciregs Print PCI bus registers. The same information can be gathered in userspace by running .Dq Nm pciconf Fl lv . .\" .Pp .It Ic show Cm pcpu Print current processor state. The output format is as follows: .Pp .Bl -tag -compact -offset indent -width "spin locks held:" .It Ic cpuid Processor identifier. .It Ic curthread Thread pointer, process identifier and the name of the process. .It Ic curpcb Control block pointer. .It Ic fpcurthread FPU thread pointer. .It Ic idlethread Idle thread pointer. .It Ic APIC ID CPU identifier coming from APIC. .It Ic currentldt LDT pointer. .It Ic spin locks held Names of spin locks held. .El .\" .Pp .It Ic show Cm pgrpdump Dump process groups present within the system. .\" .Pp .It Ic show Cm proc Op Ar addr If no .Op Ar addr is specified, print information about the current process. Otherwise, show information about the process at address .Ar addr . .\" .Pp .It Ic show Cm procvm Show process virtual memory layout. .\" .Pp .It Ic show Cm protosw Ar addr Print protocol switch structure .Vt struct protosw at address .Ar addr . .\" .Pp .It Ic show Cm registers Ns Op Li / Ns Cm u Display the register set. If the .Cm u modifier is specified, it displays user registers instead of kernel registers or the currently saved one. .Pp .Sy Warning : The support of the .Cm u modifier depends on the machine. If not supported, incorrect information will be displayed. .\" .Pp .It Ic show Cm rman Ar addr Show resource manager object .Vt struct rman at address .Ar addr . Addresses of particular pointers can be gathered with "show allrman" command. .\" .Pp .It Ic show Cm rtc Show real time clock value. Useful for long debugging sessions. .\" .Pp .It Ic show Cm sleepchain Show all the threads a particular thread is waiting on based on sleepable locks. .\" .Pp .It Ic show Cm sleepq .It Ic show Cm sleepqueue Both commands provide the same functionality. They show sleepqueue .Vt struct sleepqueue structure. Sleepqueues are used within the .Fx kernel to implement sleepable synchronization primitives (thread holding a lock might sleep or be context switched), which at the time of writing are: .Xr condvar 9 , .Xr sx 9 and standard .Xr msleep 9 interface. .\" .Pp .It Ic show Cm sockbuf Ar addr .It Ic show Cm socket Ar addr Those commands print .Vt struct sockbuf and .Vt struct socket objects placed at .Ar addr . Output consists of all values present in structures mentioned. For exact interpretation and more details, visit .Pa sys/socket.h header file. .\" .Pp .It Ic show Cm sysregs Show system registers (e.g., .Li cr0-4 on i386.) Not present on some platforms. .\" .Pp .It Ic show Cm tcpcb Ar addr Print TCP control block .Vt struct tcpcb lying at address .Ar addr . For exact interpretation of output, visit .Pa netinet/tcp.h header file. .\" .Pp .It Ic show Cm thread Op Ar addr If no .Ar addr is specified, show detailed information about current thread. Otherwise, information about thread at .Ar addr is printed. .\" .Pp .It Ic show Cm threads Show all threads within the system. Output format is as follows: .Pp .Bl -tag -compact -offset indent -width "Second column" .It Ic First column Thread identifier (TID) .It Ic Second column Thread structure address .It Ic Third column Backtrace. .El .\" .Pp .It Ic show Cm tty Ar addr Display the contents of a TTY structure in a readable form. .\" .Pp .It Ic show Cm turnstile Ar addr Show turnstile .Vt struct turnstile structure at address .Ar addr . Turnstiles are structures used within the .Fx kernel to implement synchronization primitives which, while holding a specific type of lock, cannot sleep or context switch to another thread. Currently, those are: .Xr mutex 9 , .Xr rwlock 9 , .Xr rmlock 9 . .\" .Pp .It Ic show Cm uma Show UMA allocator statistics. Output consists five columns: .Pp .Bl -tag -compact -offset indent -width "Requests" .It Cm "Zone" Name of the UMA zone. The same string that was passed to .Xr uma_zcreate 9 as a first argument. .It Cm "Size" Size of a given memory object (slab). .It Cm "Used" Number of slabs being currently used. .It Cm "Free" Number of free slabs within the UMA zone. .It Cm "Requests" Number of allocations requests to the given zone. .El .Pp The very same information might be gathered in the userspace with the help of .Dq Nm vmstat Fl z . .\" .Pp .It Ic show Cm unpcb Ar addr Shows UNIX domain socket private control block .Vt struct unpcb present at the address .Ar addr . .\" .Pp .It Ic show Cm vmochk Prints, whether the internal VM objects are in a map somewhere and none have zero ref counts. .\" .Pp .It Ic show Cm vmopag This is supposed to show physical addresses consumed by a VM object. Currently, it is not possible to use this command when .Xr witness 4 is compiled in the kernel. .\" .Pp .It Ic show Cm vnet Ar addr Prints virtualized network stack .Vt struct vnet structure present at the address .Ar addr . .\" .Pp .It Ic show Cm vnode Op Ar addr Prints vnode .Vt struct vnode structure lying at .Op Ar addr . For the exact interpretation of the output, look at the .Pa sys/vnode.h header file. .\" .Pp .It Ic show Cm vnodebufs Ar addr Shows clean/dirty buffer lists of the vnode located at .Ar addr . .\" .Pp .It Ic show Cm watches Displays all watchpoints. Shows watchpoints set with "watch" command. .\" .Pp .It Ic show Cm witness Shows information about lock acquisition coming from the .Xr witness 4 subsystem. .\" .Pp .It Ic gdb Toggles between remote GDB and DDB mode. In remote GDB mode, another machine is required that runs .Xr gdb 1 using the remote debug feature, with a connection to the serial console port on the target machine. Currently only available on the i386 architecture. .Pp .It Ic halt Halt the system. .Pp .It Ic kill Ar sig pid Send signal .Ar sig to process .Ar pid . The signal is acted on upon returning from the debugger. This command can be used to kill a process causing resource contention in the case of a hung system. See .Xr signal 3 for a list of signals. Note that the arguments are reversed relative to .Xr kill 2 . .Pp .It Ic reboot Op Ar seconds .It Ic reset Op Ar seconds Hard reset the system. If the optional argument .Ar seconds is given, the debugger will wait for this long, at most a week, before rebooting. .Pp .It Ic help Print a short summary of the available commands and command abbreviations. .Pp .It Ic capture on .It Ic capture off .It Ic capture reset .It Ic capture status .Nm supports a basic output capture facility, which can be used to retrieve the results of debugging commands from userspace using .Xr sysctl 3 . .Ic capture on enables output capture; .Ic capture off disables capture. .Ic capture reset will clear the capture buffer and disable capture. .Ic capture status will report current buffer use, buffer size, and disposition of output capture. .Pp Userspace processes may inspect and manage .Nm capture state using .Xr sysctl 8 : .Pp .Dv debug.ddb.capture.bufsize may be used to query or set the current capture buffer size. .Pp .Dv debug.ddb.capture.maxbufsize may be used to query the compile-time limit on the capture buffer size. .Pp .Dv debug.ddb.capture.bytes may be used to query the number of bytes of output currently in the capture buffer. .Pp .Dv debug.ddb.capture.data returns the contents of the buffer as a string to an appropriately privileged process. .Pp This facility is particularly useful in concert with the scripting and .Xr textdump 4 facilities, allowing scripted debugging output to be captured and committed to disk as part of a textdump for later analysis. The contents of the capture buffer may also be inspected in a kernel core dump using .Xr kgdb 1 . .Pp .It Ic run .It Ic script .It Ic scripts .It Ic unscript Run, define, list, and delete scripts. See the .Sx SCRIPTING section for more information on the scripting facility. .Pp .It Ic textdump dump .It Ic textdump set .It Ic textdump status .It Ic textdump unset Use the .Ic textdump dump command to immediately perform a textdump. More information may be found in .Xr textdump 4 . The .Ic textdump set command may be used to force the next kernel core dump to be a textdump rather than a traditional memory dump or minidump. .Ic textdump status reports whether a textdump has been scheduled. .Ic textdump unset cancels a request to perform a textdump as the next kernel core dump. .El .Sh VARIABLES The debugger accesses registers and variables as .Li $ Ns Ar name . Register names are as in the .Dq Ic show Cm registers command. Some variables are suffixed with numbers, and may have some modifier following a colon immediately after the variable name. For example, register variables can have a .Cm u modifier to indicate user register (e.g., .Dq Li $eax:u ) . .Pp Built-in variables currently supported are: .Pp .Bl -tag -width ".Va tabstops" -compact .It Va radix Input and output radix. .It Va maxoff Addresses are printed as .Dq Ar symbol Ns Li + Ns Ar offset unless .Ar offset is greater than .Va maxoff . .It Va maxwidth The width of the displayed line. .It Va lines The number of lines. It is used by the built-in pager. .It Va tabstops Tab stop width. .It Va work Ns Ar xx Work variable; .Ar xx can take values from 0 to 31. .El .Sh EXPRESSIONS Most expression operators in C are supported except .Ql ~ , .Ql ^ , and unary .Ql & . Special rules in .Nm are: .Bl -tag -width ".No Identifiers" .It Identifiers The name of a symbol is translated to the value of the symbol, which is the address of the corresponding object. .Ql \&. and .Ql \&: can be used in the identifier. If supported by an object format dependent routine, .Sm off .Oo Ar filename : Oc Ar func : lineno , .Sm on .Oo Ar filename : Oc Ns Ar variable , and .Oo Ar filename : Oc Ns Ar lineno can be accepted as a symbol. .It Numbers Radix is determined by the first two letters: .Ql 0x : hex, .Ql 0o : octal, .Ql 0t : decimal; otherwise, follow current radix. .It Li \&. .Va dot .It Li + .Va next .It Li .. address of the start of the last line examined. Unlike .Va dot or .Va next , this is only changed by .Ic examine or .Ic write command. .It Li ' last address explicitly specified. .It Li $ Ns Ar variable Translated to the value of the specified variable. It may be followed by a .Ql \&: and modifiers as described above. .It Ar a Ns Li # Ns Ar b A binary operator which rounds up the left hand side to the next multiple of right hand side. .It Li * Ns Ar expr Indirection. It may be followed by a .Ql \&: and modifiers as described above. .El .Sh SCRIPTING .Nm supports a basic scripting facility to allow automating tasks or responses to specific events. Each script consists of a list of DDB commands to be executed sequentially, and is assigned a unique name. Certain script names have special meaning, and will be automatically run on various .Nm events if scripts by those names have been defined. .Pp The .Ic script command may be used to define a script by name. Scripts consist of a series of .Nm commands separated with the .Ql \&; character. For example: .Bd -literal -offset indent script kdb.enter.panic=bt; show pcpu script lockinfo=show alllocks; show lockedvnods .Ed .Pp The .Ic scripts command lists currently defined scripts. .Pp The .Ic run command execute a script by name. For example: .Bd -literal -offset indent run lockinfo .Ed .Pp The .Ic unscript command may be used to delete a script by name. For example: .Bd -literal -offset indent unscript kdb.enter.panic .Ed .Pp These functions may also be performed from userspace using the .Xr ddb 8 command. .Pp Certain scripts are run automatically, if defined, for specific .Nm events. The follow scripts are run when various events occur: .Bl -tag -width kdb.enter.powerfail .It Dv kdb.enter.acpi The kernel debugger was entered as a result of an .Xr acpi 4 event. .It Dv kdb.enter.bootflags The kernel debugger was entered at boot as a result of the debugger boot flag being set. .It Dv kdb.enter.break The kernel debugger was entered as a result of a serial or console break. .It Dv kdb.enter.cam The kernel debugger was entered as a result of a .Xr CAM 4 event. .It Dv kdb.enter.mac The kernel debugger was entered as a result of an assertion failure in the .Xr mac_test 4 module of the TrustedBSD MAC Framework. .It Dv kdb.enter.ndis The kernel debugger was entered as a result of an .Xr ndis 4 breakpoint event. .It Dv kdb.enter.netgraph The kernel debugger was entered as a result of a .Xr netgraph 4 event. .It Dv kdb.enter.panic .Xr panic 9 was called. .It Dv kdb.enter.powerfail The kernel debugger was entered as a result of a powerfail NMI on the sparc64 platform. .It Dv kdb.enter.powerpc The kernel debugger was entered as a result of an unimplemented interrupt type on the powerpc platform. .It Dv kdb.enter.sysctl The kernel debugger was entered as a result of the .Dv debug.kdb.enter sysctl being set. .It Dv kdb.enter.trapsig The kernel debugger was entered as a result of a trapsig event on the sparc64 platform. .It Dv kdb.enter.unionfs The kernel debugger was entered as a result of an assertion failure in the union file system. .It Dv kdb.enter.unknown The kernel debugger was entered, but no reason has been set. .It Dv kdb.enter.vfslock The kernel debugger was entered as a result of a VFS lock violation. .It Dv kdb.enter.watchdog The kernel debugger was entered as a result of a watchdog firing. .It Dv kdb.enter.witness The kernel debugger was entered as a result of a .Xr witness 4 violation. .El .Pp In the event that none of these scripts is found, .Nm will attempt to execute a default script: .Bl -tag -width kdb.enter.powerfail .It Dv kdb.enter.default The kernel debugger was entered, but a script exactly matching the reason for entering was not defined. This can be used as a catch-all to handle cases not specifically of interest; for example, .Dv kdb.enter.witness might be defined to have special handling, and .Dv kdb.enter.default might be defined to simply panic and reboot. .El .Sh HINTS On machines with an ISA expansion bus, a simple NMI generation card can be constructed by connecting a push button between the A01 and B01 (CHCHK# and GND) card fingers. Momentarily shorting these two fingers together may cause the bridge chipset to generate an NMI, which causes the kernel to pass control to .Nm . Some bridge chipsets do not generate a NMI on CHCHK#, so your mileage may vary. The NMI allows one to break into the debugger on a wedged machine to diagnose problems. Other bus' bridge chipsets may be able to generate NMI using bus specific methods. There are many PCI and PCIe add-in cards which can generate NMI for debugging. Modern server systems typically use IPMI to generate signals to enter the debugger. The .Dv devel/ipmitool port can be used to send the .Cd chassis power diag command which delivers an NMI to the processor. Embedded systems often use JTAG for debugging, but rarely use it in combination with .Nm . .Pp For serial consoles, you can enter the debugger by sending a BREAK condition on the serial line if .Cd options BREAK_TO_DEBUGGER is specified in the kernel. Most terminal emulation programs can send a break sequence with a special key sequence or via a menu item. However, in some setups, sending the break can be difficult to arrange or happens spuriously, so if the kernel contains .Cd options ALT_BREAK_TO_DEBUGGER then the sequence of CR TILDE CTRL-B enters the debugger; CR TILDE CTRL-P causes a panic instead of entering the debugger; and CR TILDE CTRL-R causes an immediate reboot. In all the above sequences, CR is a Carriage Return and is usually sent by hitting the Enter or Return key. TILDE is the ASCII tilde character (~). CTRL-x is Control x created by hitting the control key and then x and then releasing both. .Pp The break to enter the debugger behavior may be enabled at run-time by setting the .Xr sysctl 8 .Dv debug.kdb.break_to_debugger to 1. The alternate sequence to enter the debugger behavior may be enabled at run-time by setting the .Xr sysctl 8 .Dv debug.kdb.alt_break_to_debugger to 1. The debugger may be entered by setting the .Xr sysctl 8 .Dv debug.kdb.enter to 1. .Sh FILES Header files mentioned in this manual page can be found below .Pa /usr/include directory. .Pp .Bl -dash -compact .It .Pa sys/buf.h .It .Pa sys/domain.h .It .Pa netinet/in_pcb.h .It .Pa sys/socket.h .It .Pa sys/vnode.h .El .Sh SEE ALSO .Xr gdb 1 , .Xr kgdb 1 , .Xr acpi 4 , .Xr CAM 4 , .Xr mac_test 4 , .Xr ndis 4 , .Xr netgraph 4 , .Xr textdump 4 , .Xr witness 4 , .Xr ddb 8 , .Xr sysctl 8 , .Xr panic 9 .Sh HISTORY The .Nm debugger was developed for Mach, and ported to .Bx 386 0.1 . This manual page translated from .Xr man 7 macros by .An Garrett Wollman . .Pp .An Robert N. M. Watson added support for .Nm output capture, .Xr textdump 4 and scripting in .Fx 7.1 . Index: projects/vnet/sys/dev/acpica/acpi_thermal.c =================================================================== --- projects/vnet/sys/dev/acpica/acpi_thermal.c (revision 301522) +++ projects/vnet/sys/dev/acpica/acpi_thermal.c (revision 301523) @@ -1,1214 +1,1223 @@ /*- * Copyright (c) 2000, 2001 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #include #include #include /* Hooks for the ACPI CA debugging infrastructure */ #define _COMPONENT ACPI_THERMAL ACPI_MODULE_NAME("THERMAL") #define TZ_ZEROC 2731 #define TZ_KELVTOC(x) (((x) - TZ_ZEROC) / 10), abs(((x) - TZ_ZEROC) % 10) #define TZ_NOTIFY_TEMPERATURE 0x80 /* Temperature changed. */ #define TZ_NOTIFY_LEVELS 0x81 /* Cooling levels changed. */ #define TZ_NOTIFY_DEVICES 0x82 /* Device lists changed. */ #define TZ_NOTIFY_CRITICAL 0xcc /* Fake notify that _CRT/_HOT reached. */ /* Check for temperature changes every 10 seconds by default */ #define TZ_POLLRATE 10 /* Make sure the reported temperature is valid for this number of polls. */ #define TZ_VALIDCHECKS 3 /* Notify the user we will be shutting down in one more poll cycle. */ #define TZ_NOTIFYCOUNT (TZ_VALIDCHECKS - 1) /* ACPI spec defines this */ #define TZ_NUMLEVELS 10 struct acpi_tz_zone { int ac[TZ_NUMLEVELS]; ACPI_BUFFER al[TZ_NUMLEVELS]; int crt; int hot; ACPI_BUFFER psl; int psv; int tc1; int tc2; int tsp; int tzp; }; struct acpi_tz_softc { device_t tz_dev; ACPI_HANDLE tz_handle; /*Thermal zone handle*/ int tz_temperature; /*Current temperature*/ int tz_active; /*Current active cooling*/ #define TZ_ACTIVE_NONE -1 #define TZ_ACTIVE_UNKNOWN -2 int tz_requested; /*Minimum active cooling*/ int tz_thflags; /*Current temp-related flags*/ #define TZ_THFLAG_NONE 0 #define TZ_THFLAG_PSV (1<<0) #define TZ_THFLAG_HOT (1<<2) #define TZ_THFLAG_CRT (1<<3) int tz_flags; #define TZ_FLAG_NO_SCP (1<<0) /*No _SCP method*/ #define TZ_FLAG_GETPROFILE (1<<1) /*Get power_profile in timeout*/ #define TZ_FLAG_GETSETTINGS (1<<2) /*Get devs/setpoints*/ struct timespec tz_cooling_started; /*Current cooling starting time*/ struct sysctl_ctx_list tz_sysctl_ctx; struct sysctl_oid *tz_sysctl_tree; eventhandler_tag tz_event; struct acpi_tz_zone tz_zone; /*Thermal zone parameters*/ int tz_validchecks; int tz_insane_tmp_notified; /* passive cooling */ struct proc *tz_cooling_proc; int tz_cooling_proc_running; int tz_cooling_enabled; int tz_cooling_active; int tz_cooling_updated; int tz_cooling_saved_freq; }; #define TZ_ACTIVE_LEVEL(act) ((act) >= 0 ? (act) : TZ_NUMLEVELS) #define CPUFREQ_MAX_LEVELS 64 /* XXX cpufreq should export this */ static int acpi_tz_probe(device_t dev); static int acpi_tz_attach(device_t dev); static int acpi_tz_establish(struct acpi_tz_softc *sc); static void acpi_tz_monitor(void *Context); static void acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg); static void acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg); static void acpi_tz_getparam(struct acpi_tz_softc *sc, char *node, int *data); static void acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what); static int acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS); static void acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify, void *context); static void acpi_tz_signal(struct acpi_tz_softc *sc, int flags); static void acpi_tz_timeout(struct acpi_tz_softc *sc, int flags); static void acpi_tz_power_profile(void *arg); static void acpi_tz_thread(void *arg); static int acpi_tz_cooling_is_available(struct acpi_tz_softc *sc); static int acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc); static device_method_t acpi_tz_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_tz_probe), DEVMETHOD(device_attach, acpi_tz_attach), DEVMETHOD_END }; static driver_t acpi_tz_driver = { "acpi_tz", acpi_tz_methods, sizeof(struct acpi_tz_softc), }; static char *acpi_tz_tmp_name = "_TMP"; static devclass_t acpi_tz_devclass; DRIVER_MODULE(acpi_tz, acpi, acpi_tz_driver, acpi_tz_devclass, 0, 0); MODULE_DEPEND(acpi_tz, acpi, 1, 1, 1); static struct sysctl_ctx_list acpi_tz_sysctl_ctx; static struct sysctl_oid *acpi_tz_sysctl_tree; /* Minimum cooling run time */ static int acpi_tz_min_runtime; static int acpi_tz_polling_rate = TZ_POLLRATE; static int acpi_tz_override; /* Timezone polling thread */ static struct proc *acpi_tz_proc; ACPI_LOCK_DECL(thermal, "ACPI thermal zone"); static int acpi_tz_cooling_unit = -1; static int acpi_tz_probe(device_t dev) { int result; if (acpi_get_type(dev) == ACPI_TYPE_THERMAL && !acpi_disabled("thermal")) { device_set_desc(dev, "Thermal Zone"); result = -10; } else result = ENXIO; return (result); } static int acpi_tz_attach(device_t dev) { struct acpi_tz_softc *sc; struct acpi_softc *acpi_sc; int error; char oidname[8]; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc(dev); sc->tz_dev = dev; sc->tz_handle = acpi_get_handle(dev); sc->tz_requested = TZ_ACTIVE_NONE; sc->tz_active = TZ_ACTIVE_UNKNOWN; sc->tz_thflags = TZ_THFLAG_NONE; sc->tz_cooling_proc = NULL; sc->tz_cooling_proc_running = FALSE; sc->tz_cooling_active = FALSE; sc->tz_cooling_updated = FALSE; sc->tz_cooling_enabled = FALSE; /* * Parse the current state of the thermal zone and build control * structures. We don't need to worry about interference with the * control thread since we haven't fully attached this device yet. */ if ((error = acpi_tz_establish(sc)) != 0) return (error); /* * Register for any Notify events sent to this zone. */ AcpiInstallNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY, acpi_tz_notify_handler, sc); /* * Create our sysctl nodes. * * XXX we need a mechanism for adding nodes under ACPI. */ if (device_get_unit(dev) == 0) { acpi_sc = acpi_device_get_parent_softc(dev); sysctl_ctx_init(&acpi_tz_sysctl_ctx); acpi_tz_sysctl_tree = SYSCTL_ADD_NODE(&acpi_tz_sysctl_ctx, SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree), OID_AUTO, "thermal", CTLFLAG_RD, 0, ""); SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx, SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO, "min_runtime", CTLFLAG_RW, &acpi_tz_min_runtime, 0, "minimum cooling run time in sec"); SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx, SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO, "polling_rate", CTLFLAG_RW, &acpi_tz_polling_rate, 0, "monitor polling interval in seconds"); SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx, SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO, "user_override", CTLFLAG_RW, &acpi_tz_override, 0, "allow override of thermal settings"); } sysctl_ctx_init(&sc->tz_sysctl_ctx); sprintf(oidname, "tz%d", device_get_unit(dev)); sc->tz_sysctl_tree = SYSCTL_ADD_NODE(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO, oidname, CTLFLAG_RD, 0, ""); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, &sc->tz_temperature, 0, sysctl_handle_int, "IK", "current thermal zone temperature"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "active", CTLTYPE_INT | CTLFLAG_RW, sc, 0, acpi_tz_active_sysctl, "I", "cooling is active"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "passive_cooling", CTLTYPE_INT | CTLFLAG_RW, sc, 0, acpi_tz_cooling_sysctl, "I", "enable passive (speed reduction) cooling"); SYSCTL_ADD_INT(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "thermal_flags", CTLFLAG_RD, &sc->tz_thflags, 0, "thermal zone flags"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_PSV", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.psv), acpi_tz_temp_sysctl, "IK", "passive cooling temp setpoint"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_HOT", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.hot), acpi_tz_temp_sysctl, "IK", "too hot temp setpoint (suspend now)"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_CRT", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.crt), acpi_tz_temp_sysctl, "IK", "critical temp setpoint (shutdown now)"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_ACx", CTLTYPE_INT | CTLFLAG_RD, &sc->tz_zone.ac, sizeof(sc->tz_zone.ac), sysctl_handle_opaque, "IK", ""); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_TC1", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.tc1), acpi_tz_passive_sysctl, "I", "thermal constant 1 for passive cooling"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_TC2", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.tc2), acpi_tz_passive_sysctl, "I", "thermal constant 2 for passive cooling"); SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree), OID_AUTO, "_TSP", CTLTYPE_INT | CTLFLAG_RW, sc, offsetof(struct acpi_tz_softc, tz_zone.tsp), acpi_tz_passive_sysctl, "I", "thermal sampling period for passive cooling"); /* - * Create thread to service all of the thermal zones. Register - * our power profile event handler. + * Register our power profile event handler. */ sc->tz_event = EVENTHANDLER_REGISTER(power_profile_change, acpi_tz_power_profile, sc, 0); - if (acpi_tz_proc == NULL) { - error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, - RFHIGHPID, 0, "acpi_thermal"); - if (error != 0) { - device_printf(sc->tz_dev, "could not create thread - %d", error); - goto out; - } - } /* + * Flag the event handler for a manual invocation by our timeout. + * We defer it like this so that the rest of the subsystem has time + * to come up. Don't bother evaluating/printing the temperature at + * this point; on many systems it'll be bogus until the EC is running. + */ + sc->tz_flags |= TZ_FLAG_GETPROFILE; + + return_VALUE (0); +} + +static void +acpi_tz_startup(void *arg __unused) +{ + struct acpi_tz_softc *sc; + device_t *devs; + int devcount, error, i; + + devclass_get_devices(acpi_tz_devclass, &devs, &devcount); + if (devcount == 0) + return; + + /* + * Create thread to service all of the thermal zones. + */ + error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, RFHIGHPID, 0, + "acpi_thermal"); + if (error != 0) + printf("acpi_tz: could not create thread - %d", error); + + /* * Create a thread to handle passive cooling for 1st zone which * has _PSV, _TSP, _TC1 and _TC2. Users can enable it for other * zones manually for now. * * XXX We enable only one zone to avoid multiple zones conflict * with each other since cpufreq currently sets all CPUs to the * given frequency whereas it's possible for different thermal * zones to specify independent settings for multiple CPUs. */ - if (acpi_tz_cooling_unit < 0 && acpi_tz_cooling_is_available(sc)) - sc->tz_cooling_enabled = TRUE; - if (sc->tz_cooling_enabled) { - error = acpi_tz_cooling_thread_start(sc); - if (error != 0) { - sc->tz_cooling_enabled = FALSE; - goto out; + for (i = 0; i < devcount; i++) { + sc = device_get_softc(devs[i]); + if (acpi_tz_cooling_is_available(sc)) { + sc->tz_cooling_enabled = TRUE; + error = acpi_tz_cooling_thread_start(sc); + if (error != 0) { + sc->tz_cooling_enabled = FALSE; + break; + } + acpi_tz_cooling_unit = device_get_unit(devs[i]); + break; } - acpi_tz_cooling_unit = device_get_unit(dev); } - - /* - * Flag the event handler for a manual invocation by our timeout. - * We defer it like this so that the rest of the subsystem has time - * to come up. Don't bother evaluating/printing the temperature at - * this point; on many systems it'll be bogus until the EC is running. - */ - sc->tz_flags |= TZ_FLAG_GETPROFILE; - -out: - if (error != 0) { - EVENTHANDLER_DEREGISTER(power_profile_change, sc->tz_event); - AcpiRemoveNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY, - acpi_tz_notify_handler); - sysctl_ctx_free(&sc->tz_sysctl_ctx); - } - return_VALUE (error); + free(devs, M_TEMP); } +SYSINIT(acpi_tz, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, acpi_tz_startup, NULL); /* * Parse the current state of this thermal zone and set up to use it. * * Note that we may have previous state, which will have to be discarded. */ static int acpi_tz_establish(struct acpi_tz_softc *sc) { ACPI_OBJECT *obj; int i; char nbuf[8]; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); /* Erase any existing state. */ for (i = 0; i < TZ_NUMLEVELS; i++) if (sc->tz_zone.al[i].Pointer != NULL) AcpiOsFree(sc->tz_zone.al[i].Pointer); if (sc->tz_zone.psl.Pointer != NULL) AcpiOsFree(sc->tz_zone.psl.Pointer); /* * XXX: We initialize only ACPI_BUFFER to avoid race condition * with passive cooling thread which refers psv, tc1, tc2 and tsp. */ bzero(sc->tz_zone.ac, sizeof(sc->tz_zone.ac)); bzero(sc->tz_zone.al, sizeof(sc->tz_zone.al)); bzero(&sc->tz_zone.psl, sizeof(sc->tz_zone.psl)); /* Evaluate thermal zone parameters. */ for (i = 0; i < TZ_NUMLEVELS; i++) { sprintf(nbuf, "_AC%d", i); acpi_tz_getparam(sc, nbuf, &sc->tz_zone.ac[i]); sprintf(nbuf, "_AL%d", i); sc->tz_zone.al[i].Length = ACPI_ALLOCATE_BUFFER; sc->tz_zone.al[i].Pointer = NULL; AcpiEvaluateObject(sc->tz_handle, nbuf, NULL, &sc->tz_zone.al[i]); obj = (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer; if (obj != NULL) { /* Should be a package containing a list of power objects */ if (obj->Type != ACPI_TYPE_PACKAGE) { device_printf(sc->tz_dev, "%s has unknown type %d, rejecting\n", nbuf, obj->Type); return_VALUE (ENXIO); } } } acpi_tz_getparam(sc, "_CRT", &sc->tz_zone.crt); acpi_tz_getparam(sc, "_HOT", &sc->tz_zone.hot); sc->tz_zone.psl.Length = ACPI_ALLOCATE_BUFFER; sc->tz_zone.psl.Pointer = NULL; AcpiEvaluateObject(sc->tz_handle, "_PSL", NULL, &sc->tz_zone.psl); acpi_tz_getparam(sc, "_PSV", &sc->tz_zone.psv); acpi_tz_getparam(sc, "_TC1", &sc->tz_zone.tc1); acpi_tz_getparam(sc, "_TC2", &sc->tz_zone.tc2); acpi_tz_getparam(sc, "_TSP", &sc->tz_zone.tsp); acpi_tz_getparam(sc, "_TZP", &sc->tz_zone.tzp); /* * Sanity-check the values we've been given. * * XXX what do we do about systems that give us the same value for * more than one of these setpoints? */ acpi_tz_sanity(sc, &sc->tz_zone.crt, "_CRT"); acpi_tz_sanity(sc, &sc->tz_zone.hot, "_HOT"); acpi_tz_sanity(sc, &sc->tz_zone.psv, "_PSV"); for (i = 0; i < TZ_NUMLEVELS; i++) acpi_tz_sanity(sc, &sc->tz_zone.ac[i], "_ACx"); return_VALUE (0); } static char *aclevel_string[] = { "NONE", "_AC0", "_AC1", "_AC2", "_AC3", "_AC4", "_AC5", "_AC6", "_AC7", "_AC8", "_AC9" }; static __inline const char * acpi_tz_aclevel_string(int active) { if (active < -1 || active >= TZ_NUMLEVELS) return (aclevel_string[0]); return (aclevel_string[active + 1]); } /* * Get the current temperature. */ static int acpi_tz_get_temperature(struct acpi_tz_softc *sc) { int temp; ACPI_STATUS status; ACPI_FUNCTION_NAME ("acpi_tz_get_temperature"); /* Evaluate the thermal zone's _TMP method. */ status = acpi_GetInteger(sc->tz_handle, acpi_tz_tmp_name, &temp); if (ACPI_FAILURE(status)) { ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "error fetching current temperature -- %s\n", AcpiFormatException(status)); return (FALSE); } /* Check it for validity. */ acpi_tz_sanity(sc, &temp, acpi_tz_tmp_name); if (temp == -1) return (FALSE); ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "got %d.%dC\n", TZ_KELVTOC(temp))); sc->tz_temperature = temp; return (TRUE); } /* * Evaluate the condition of a thermal zone, take appropriate actions. */ static void acpi_tz_monitor(void *Context) { struct acpi_tz_softc *sc; struct timespec curtime; int temp; int i; int newactive, newflags; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_tz_softc *)Context; /* Get the current temperature. */ if (!acpi_tz_get_temperature(sc)) { /* XXX disable zone? go to max cooling? */ return_VOID; } temp = sc->tz_temperature; /* * Work out what we ought to be doing right now. * * Note that the _ACx levels sort from hot to cold. */ newactive = TZ_ACTIVE_NONE; for (i = TZ_NUMLEVELS - 1; i >= 0; i--) { if (sc->tz_zone.ac[i] != -1 && temp >= sc->tz_zone.ac[i]) newactive = i; } /* * We are going to get _ACx level down (colder side), but give a guaranteed * minimum cooling run time if requested. */ if (acpi_tz_min_runtime > 0 && sc->tz_active != TZ_ACTIVE_NONE && sc->tz_active != TZ_ACTIVE_UNKNOWN && (newactive == TZ_ACTIVE_NONE || newactive > sc->tz_active)) { getnanotime(&curtime); timespecsub(&curtime, &sc->tz_cooling_started); if (curtime.tv_sec < acpi_tz_min_runtime) newactive = sc->tz_active; } /* Handle user override of active mode */ if (sc->tz_requested != TZ_ACTIVE_NONE && (newactive == TZ_ACTIVE_NONE || sc->tz_requested < newactive)) newactive = sc->tz_requested; /* update temperature-related flags */ newflags = TZ_THFLAG_NONE; if (sc->tz_zone.psv != -1 && temp >= sc->tz_zone.psv) newflags |= TZ_THFLAG_PSV; if (sc->tz_zone.hot != -1 && temp >= sc->tz_zone.hot) newflags |= TZ_THFLAG_HOT; if (sc->tz_zone.crt != -1 && temp >= sc->tz_zone.crt) newflags |= TZ_THFLAG_CRT; /* If the active cooling state has changed, we have to switch things. */ if (sc->tz_active == TZ_ACTIVE_UNKNOWN) { /* * We don't know which cooling device is on or off, * so stop them all, because we now know which * should be on (if any). */ for (i = 0; i < TZ_NUMLEVELS; i++) { if (sc->tz_zone.al[i].Pointer != NULL) { acpi_ForeachPackageObject( (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer, acpi_tz_switch_cooler_off, sc); } } /* now we know that all devices are off */ sc->tz_active = TZ_ACTIVE_NONE; } if (newactive != sc->tz_active) { /* Turn off unneeded cooling devices that are on, if any are */ for (i = TZ_ACTIVE_LEVEL(sc->tz_active); i < TZ_ACTIVE_LEVEL(newactive); i++) { acpi_ForeachPackageObject( (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer, acpi_tz_switch_cooler_off, sc); } /* Turn on cooling devices that are required, if any are */ for (i = TZ_ACTIVE_LEVEL(sc->tz_active) - 1; i >= TZ_ACTIVE_LEVEL(newactive); i--) { acpi_ForeachPackageObject( (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer, acpi_tz_switch_cooler_on, sc); } ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "switched from %s to %s: %d.%dC\n", acpi_tz_aclevel_string(sc->tz_active), acpi_tz_aclevel_string(newactive), TZ_KELVTOC(temp)); sc->tz_active = newactive; getnanotime(&sc->tz_cooling_started); } /* XXX (de)activate any passive cooling that may be required. */ /* * If the temperature is at _HOT or _CRT, increment our event count. * If it has occurred enough times, shutdown the system. This is * needed because some systems will report an invalid high temperature * for one poll cycle. It is suspected this is due to the embedded * controller timing out. A typical value is 138C for one cycle on * a system that is otherwise 65C. * * If we're almost at that threshold, notify the user through devd(8). */ if ((newflags & (TZ_THFLAG_HOT | TZ_THFLAG_CRT)) != 0) { sc->tz_validchecks++; if (sc->tz_validchecks == TZ_VALIDCHECKS) { device_printf(sc->tz_dev, "WARNING - current temperature (%d.%dC) exceeds safe limits\n", TZ_KELVTOC(sc->tz_temperature)); shutdown_nice(RB_POWEROFF); } else if (sc->tz_validchecks == TZ_NOTIFYCOUNT) acpi_UserNotify("Thermal", sc->tz_handle, TZ_NOTIFY_CRITICAL); } else { sc->tz_validchecks = 0; } sc->tz_thflags = newflags; return_VOID; } /* * Given an object, verify that it's a reference to a device of some sort, * and try to switch it off. */ static void acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg) { ACPI_HANDLE cooler; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); cooler = acpi_GetReference(NULL, obj); if (cooler == NULL) { ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n")); return_VOID; } ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s off\n", acpi_name(cooler))); acpi_pwr_switch_consumer(cooler, ACPI_STATE_D3); return_VOID; } /* * Given an object, verify that it's a reference to a device of some sort, * and try to switch it on. * * XXX replication of off/on function code is bad. */ static void acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg) { struct acpi_tz_softc *sc = (struct acpi_tz_softc *)arg; ACPI_HANDLE cooler; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); cooler = acpi_GetReference(NULL, obj); if (cooler == NULL) { ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n")); return_VOID; } ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s on\n", acpi_name(cooler))); status = acpi_pwr_switch_consumer(cooler, ACPI_STATE_D0); if (ACPI_FAILURE(status)) { ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "failed to activate %s - %s\n", acpi_name(cooler), AcpiFormatException(status)); } return_VOID; } /* * Read/debug-print a parameter, default it to -1. */ static void acpi_tz_getparam(struct acpi_tz_softc *sc, char *node, int *data) { ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); if (ACPI_FAILURE(acpi_GetInteger(sc->tz_handle, node, data))) { *data = -1; } else { ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "%s.%s = %d\n", acpi_name(sc->tz_handle), node, *data)); } return_VOID; } /* * Sanity-check a temperature value. Assume that setpoints * should be between 0C and 200C. */ static void acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what) { if (*val != -1 && (*val < TZ_ZEROC || *val > TZ_ZEROC + 2000)) { /* * If the value we are checking is _TMP, warn the user only * once. This avoids spamming messages if, for instance, the * sensor is broken and always returns an invalid temperature. * * This is only done for _TMP; other values always emit a * warning. */ if (what != acpi_tz_tmp_name || !sc->tz_insane_tmp_notified) { device_printf(sc->tz_dev, "%s value is absurd, ignored (%d.%dC)\n", what, TZ_KELVTOC(*val)); /* Don't warn the user again if the read value doesn't improve. */ if (what == acpi_tz_tmp_name) sc->tz_insane_tmp_notified = 1; } *val = -1; return; } /* This value is correct. Warn if it's incorrect again. */ if (what == acpi_tz_tmp_name) sc->tz_insane_tmp_notified = 0; } /* * Respond to a sysctl on the active state node. */ static int acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_tz_softc *sc; int active; int error; sc = (struct acpi_tz_softc *)oidp->oid_arg1; active = sc->tz_active; error = sysctl_handle_int(oidp, &active, 0, req); /* Error or no new value */ if (error != 0 || req->newptr == NULL) return (error); if (active < -1 || active >= TZ_NUMLEVELS) return (EINVAL); /* Set new preferred level and re-switch */ sc->tz_requested = active; acpi_tz_signal(sc, 0); return (0); } static int acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_tz_softc *sc; int enabled, error; sc = (struct acpi_tz_softc *)oidp->oid_arg1; enabled = sc->tz_cooling_enabled; error = sysctl_handle_int(oidp, &enabled, 0, req); /* Error or no new value */ if (error != 0 || req->newptr == NULL) return (error); if (enabled != TRUE && enabled != FALSE) return (EINVAL); if (enabled) { if (acpi_tz_cooling_is_available(sc)) error = acpi_tz_cooling_thread_start(sc); else error = ENODEV; if (error) enabled = FALSE; } sc->tz_cooling_enabled = enabled; return (error); } static int acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_tz_softc *sc; int temp, *temp_ptr; int error; sc = oidp->oid_arg1; temp_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2); temp = *temp_ptr; error = sysctl_handle_int(oidp, &temp, 0, req); /* Error or no new value */ if (error != 0 || req->newptr == NULL) return (error); /* Only allow changing settings if override is set. */ if (!acpi_tz_override) return (EPERM); /* Check user-supplied value for sanity. */ acpi_tz_sanity(sc, &temp, "user-supplied temp"); if (temp == -1) return (EINVAL); *temp_ptr = temp; return (0); } static int acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_tz_softc *sc; int val, *val_ptr; int error; sc = oidp->oid_arg1; val_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2); val = *val_ptr; error = sysctl_handle_int(oidp, &val, 0, req); /* Error or no new value */ if (error != 0 || req->newptr == NULL) return (error); /* Only allow changing settings if override is set. */ if (!acpi_tz_override) return (EPERM); *val_ptr = val; return (0); } static void acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify, void *context) { struct acpi_tz_softc *sc = (struct acpi_tz_softc *)context; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); switch (notify) { case TZ_NOTIFY_TEMPERATURE: /* Temperature change occurred */ acpi_tz_signal(sc, 0); break; case TZ_NOTIFY_DEVICES: case TZ_NOTIFY_LEVELS: /* Zone devices/setpoints changed */ acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS); break; default: ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "unknown Notify event 0x%x\n", notify); break; } acpi_UserNotify("Thermal", h, notify); return_VOID; } static void acpi_tz_signal(struct acpi_tz_softc *sc, int flags) { ACPI_LOCK(thermal); sc->tz_flags |= flags; ACPI_UNLOCK(thermal); wakeup(&acpi_tz_proc); } /* * Notifies can be generated asynchronously but have also been seen to be * triggered by other thermal methods. One system generates a notify of * 0x81 when the fan is turned on or off. Another generates it when _SCP * is called. To handle these situations, we check the zone via * acpi_tz_monitor() before evaluating changes to setpoints or the cooling * policy. */ static void acpi_tz_timeout(struct acpi_tz_softc *sc, int flags) { /* Check the current temperature and take action based on it */ acpi_tz_monitor(sc); /* If requested, get the power profile settings. */ if (flags & TZ_FLAG_GETPROFILE) acpi_tz_power_profile(sc); /* * If requested, check for new devices/setpoints. After finding them, * check if we need to switch fans based on the new values. */ if (flags & TZ_FLAG_GETSETTINGS) { acpi_tz_establish(sc); acpi_tz_monitor(sc); } /* XXX passive cooling actions? */ } /* * System power profile may have changed; fetch and notify the * thermal zone accordingly. * * Since this can be called from an arbitrary eventhandler, it needs * to get the ACPI lock itself. */ static void acpi_tz_power_profile(void *arg) { ACPI_STATUS status; struct acpi_tz_softc *sc = (struct acpi_tz_softc *)arg; int state; state = power_profile_get_state(); if (state != POWER_PROFILE_PERFORMANCE && state != POWER_PROFILE_ECONOMY) return; /* check that we haven't decided there's no _SCP method */ if ((sc->tz_flags & TZ_FLAG_NO_SCP) == 0) { /* Call _SCP to set the new profile */ status = acpi_SetInteger(sc->tz_handle, "_SCP", (state == POWER_PROFILE_PERFORMANCE) ? 0 : 1); if (ACPI_FAILURE(status)) { if (status != AE_NOT_FOUND) ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "can't evaluate %s._SCP - %s\n", acpi_name(sc->tz_handle), AcpiFormatException(status)); sc->tz_flags |= TZ_FLAG_NO_SCP; } else { /* We have to re-evaluate the entire zone now */ acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS); } } } /* * Thermal zone monitor thread. */ static void acpi_tz_thread(void *arg) { device_t *devs; int devcount, i; int flags; struct acpi_tz_softc **sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); devs = NULL; devcount = 0; sc = NULL; for (;;) { /* If the number of devices has changed, re-evaluate. */ if (devclass_get_count(acpi_tz_devclass) != devcount) { if (devs != NULL) { free(devs, M_TEMP); free(sc, M_TEMP); } devclass_get_devices(acpi_tz_devclass, &devs, &devcount); sc = malloc(sizeof(struct acpi_tz_softc *) * devcount, M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < devcount; i++) sc[i] = device_get_softc(devs[i]); } /* Check for temperature events and act on them. */ for (i = 0; i < devcount; i++) { ACPI_LOCK(thermal); flags = sc[i]->tz_flags; sc[i]->tz_flags &= TZ_FLAG_NO_SCP; ACPI_UNLOCK(thermal); acpi_tz_timeout(sc[i], flags); } /* If more work to do, don't go to sleep yet. */ ACPI_LOCK(thermal); for (i = 0; i < devcount; i++) { if (sc[i]->tz_flags & ~TZ_FLAG_NO_SCP) break; } /* * If we have no more work, sleep for a while, setting PDROP so that * the mutex will not be reacquired. Otherwise, drop the mutex and * loop to handle more events. */ if (i == devcount) msleep(&acpi_tz_proc, &thermal_mutex, PZERO | PDROP, "tzpoll", hz * acpi_tz_polling_rate); else ACPI_UNLOCK(thermal); } } static int acpi_tz_cpufreq_restore(struct acpi_tz_softc *sc) { device_t dev; int error; if (!sc->tz_cooling_updated) return (0); if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL) return (ENXIO); ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "temperature %d.%dC: resuming previous clock speed (%d MHz)\n", TZ_KELVTOC(sc->tz_temperature), sc->tz_cooling_saved_freq); error = CPUFREQ_SET(dev, NULL, CPUFREQ_PRIO_KERN); if (error == 0) sc->tz_cooling_updated = FALSE; return (error); } static int acpi_tz_cpufreq_update(struct acpi_tz_softc *sc, int req) { device_t dev; struct cf_level *levels; int num_levels, error, freq, desired_freq, perf, i; levels = malloc(CPUFREQ_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT); if (levels == NULL) return (ENOMEM); /* * Find the main device, cpufreq0. We don't yet support independent * CPU frequency control on SMP. */ if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL) { error = ENXIO; goto out; } /* Get the current frequency. */ error = CPUFREQ_GET(dev, &levels[0]); if (error) goto out; freq = levels[0].total_set.freq; /* Get the current available frequency levels. */ num_levels = CPUFREQ_MAX_LEVELS; error = CPUFREQ_LEVELS(dev, levels, &num_levels); if (error) { if (error == E2BIG) printf("cpufreq: need to increase CPUFREQ_MAX_LEVELS\n"); goto out; } /* Calculate the desired frequency as a percent of the max frequency. */ perf = 100 * freq / levels[0].total_set.freq - req; if (perf < 0) perf = 0; else if (perf > 100) perf = 100; desired_freq = levels[0].total_set.freq * perf / 100; if (desired_freq < freq) { /* Find the closest available frequency, rounding down. */ for (i = 0; i < num_levels; i++) if (levels[i].total_set.freq <= desired_freq) break; /* If we didn't find a relevant setting, use the lowest. */ if (i == num_levels) i--; } else { /* If we didn't decrease frequency yet, don't increase it. */ if (!sc->tz_cooling_updated) { sc->tz_cooling_active = FALSE; goto out; } /* Use saved cpu frequency as maximum value. */ if (desired_freq > sc->tz_cooling_saved_freq) desired_freq = sc->tz_cooling_saved_freq; /* Find the closest available frequency, rounding up. */ for (i = num_levels - 1; i >= 0; i--) if (levels[i].total_set.freq >= desired_freq) break; /* If we didn't find a relevant setting, use the highest. */ if (i == -1) i++; /* If we're going to the highest frequency, restore the old setting. */ if (i == 0 || desired_freq == sc->tz_cooling_saved_freq) { error = acpi_tz_cpufreq_restore(sc); if (error == 0) sc->tz_cooling_active = FALSE; goto out; } } /* If we are going to a new frequency, activate it. */ if (levels[i].total_set.freq != freq) { ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev), "temperature %d.%dC: %screasing clock speed " "from %d MHz to %d MHz\n", TZ_KELVTOC(sc->tz_temperature), (freq > levels[i].total_set.freq) ? "de" : "in", freq, levels[i].total_set.freq); error = CPUFREQ_SET(dev, &levels[i], CPUFREQ_PRIO_KERN); if (error == 0 && !sc->tz_cooling_updated) { sc->tz_cooling_saved_freq = freq; sc->tz_cooling_updated = TRUE; } } out: if (levels) free(levels, M_TEMP); return (error); } /* * Passive cooling thread; monitors current temperature according to the * cooling interval and calculates whether to scale back CPU frequency. */ static void acpi_tz_cooling_thread(void *arg) { struct acpi_tz_softc *sc; int error, perf, curr_temp, prev_temp; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_tz_softc *)arg; prev_temp = sc->tz_temperature; while (sc->tz_cooling_enabled) { if (sc->tz_cooling_active) (void)acpi_tz_get_temperature(sc); curr_temp = sc->tz_temperature; if (curr_temp >= sc->tz_zone.psv) sc->tz_cooling_active = TRUE; if (sc->tz_cooling_active) { perf = sc->tz_zone.tc1 * (curr_temp - prev_temp) + sc->tz_zone.tc2 * (curr_temp - sc->tz_zone.psv); perf /= 10; if (perf != 0) { error = acpi_tz_cpufreq_update(sc, perf); /* * If error and not simply a higher priority setting was * active, disable cooling. */ if (error != 0 && error != EPERM) { device_printf(sc->tz_dev, "failed to set new freq, disabling passive cooling\n"); sc->tz_cooling_enabled = FALSE; } } } prev_temp = curr_temp; tsleep(&sc->tz_cooling_proc, PZERO, "cooling", hz * sc->tz_zone.tsp / 10); } if (sc->tz_cooling_active) { acpi_tz_cpufreq_restore(sc); sc->tz_cooling_active = FALSE; } sc->tz_cooling_proc = NULL; ACPI_LOCK(thermal); sc->tz_cooling_proc_running = FALSE; ACPI_UNLOCK(thermal); kproc_exit(0); } /* * TODO: We ignore _PSL (list of cooling devices) since cpufreq enumerates * all CPUs for us. However, it's possible in the future _PSL will * reference non-CPU devices so we may want to support it then. */ static int acpi_tz_cooling_is_available(struct acpi_tz_softc *sc) { return (sc->tz_zone.tc1 != -1 && sc->tz_zone.tc2 != -1 && sc->tz_zone.tsp != -1 && sc->tz_zone.tsp != 0 && sc->tz_zone.psv != -1); } static int acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc) { int error; ACPI_LOCK(thermal); if (sc->tz_cooling_proc_running) { ACPI_UNLOCK(thermal); return (0); } sc->tz_cooling_proc_running = TRUE; ACPI_UNLOCK(thermal); error = 0; if (sc->tz_cooling_proc == NULL) { error = kproc_create(acpi_tz_cooling_thread, sc, &sc->tz_cooling_proc, RFHIGHPID, 0, "acpi_cooling%d", device_get_unit(sc->tz_dev)); if (error != 0) { device_printf(sc->tz_dev, "could not create thread - %d", error); ACPI_LOCK(thermal); sc->tz_cooling_proc_running = FALSE; ACPI_UNLOCK(thermal); } } return (error); } Index: projects/vnet/sys/dev/cxgbe/t4_ioctl.h =================================================================== --- projects/vnet/sys/dev/cxgbe/t4_ioctl.h (revision 301522) +++ projects/vnet/sys/dev/cxgbe/t4_ioctl.h (revision 301523) @@ -1,345 +1,347 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_IOCTL_H__ #define __T4_IOCTL_H__ #include #include /* * Ioctl commands specific to this driver. */ enum { T4_GETREG = 0x40, /* read register */ T4_SETREG, /* write register */ T4_REGDUMP, /* dump of all registers */ T4_GET_FILTER_MODE, /* get global filter mode */ T4_SET_FILTER_MODE, /* set global filter mode */ T4_GET_FILTER, /* get information about a filter */ T4_SET_FILTER, /* program a filter */ T4_DEL_FILTER, /* delete a filter */ T4_GET_SGE_CONTEXT, /* get SGE context for a queue */ T4_LOAD_FW, /* flash firmware */ T4_GET_MEM, /* read memory */ T4_GET_I2C, /* read from i2c addressible device */ T4_CLEAR_STATS, /* clear a port's MAC statistics */ T4_SET_OFLD_POLICY, /* Set offload policy */ T4_SET_SCHED_CLASS, /* set sched class */ T4_SET_SCHED_QUEUE, /* set queue class */ T4_GET_TRACER, /* get information about a tracer */ T4_SET_TRACER, /* program a tracer */ }; struct t4_reg { uint32_t addr; uint32_t size; uint64_t val; }; #define T4_REGDUMP_SIZE (160 * 1024) #define T5_REGDUMP_SIZE (332 * 1024) struct t4_regdump { uint32_t version; uint32_t len; /* bytes */ uint32_t *data; }; struct t4_data { uint32_t len; uint8_t *data; }; struct t4_i2c_data { uint8_t port_id; uint8_t dev_addr; uint8_t offset; uint8_t len; uint8_t data[8]; }; /* * A hardware filter is some valid combination of these. */ #define T4_FILTER_IPv4 0x1 /* IPv4 packet */ #define T4_FILTER_IPv6 0x2 /* IPv6 packet */ #define T4_FILTER_IP_SADDR 0x4 /* Source IP address or network */ #define T4_FILTER_IP_DADDR 0x8 /* Destination IP address or network */ #define T4_FILTER_IP_SPORT 0x10 /* Source IP port */ #define T4_FILTER_IP_DPORT 0x20 /* Destination IP port */ #define T4_FILTER_FCoE 0x40 /* Fibre Channel over Ethernet packet */ #define T4_FILTER_PORT 0x80 /* Physical ingress port */ #define T4_FILTER_VNIC 0x100 /* VNIC id or outer VLAN */ #define T4_FILTER_VLAN 0x200 /* VLAN ID */ #define T4_FILTER_IP_TOS 0x400 /* IPv4 TOS/IPv6 Traffic Class */ #define T4_FILTER_IP_PROTO 0x800 /* IP protocol */ #define T4_FILTER_ETH_TYPE 0x1000 /* Ethernet Type */ #define T4_FILTER_MAC_IDX 0x2000 /* MPS MAC address match index */ #define T4_FILTER_MPS_HIT_TYPE 0x4000 /* MPS match type */ #define T4_FILTER_IP_FRAGMENT 0x8000 /* IP fragment */ #define T4_FILTER_IC_VNIC 0x80000000 /* TP Ingress Config's F_VNIC bit. It indicates whether T4_FILTER_VNIC bit means VNIC id (PF/VF) or outer VLAN. 0 = oVLAN, 1 = VNIC */ /* Filter action */ enum { FILTER_PASS = 0, /* default */ FILTER_DROP, FILTER_SWITCH }; /* 802.1q manipulation on FILTER_SWITCH */ enum { VLAN_NOCHANGE = 0, /* default */ VLAN_REMOVE, VLAN_INSERT, VLAN_REWRITE }; /* MPS match type */ enum { UCAST_EXACT = 0, /* exact unicast match */ UCAST_HASH = 1, /* inexact (hashed) unicast match */ MCAST_EXACT = 2, /* exact multicast match */ MCAST_HASH = 3, /* inexact (hashed) multicast match */ PROMISC = 4, /* no match but port is promiscuous */ HYPPROMISC = 5, /* port is hypervisor-promisuous + not bcast */ BCAST = 6, /* broadcast packet */ }; /* Rx steering */ enum { DST_MODE_QUEUE, /* queue is directly specified by filter */ DST_MODE_RSS_QUEUE, /* filter specifies RSS entry containing queue */ DST_MODE_RSS, /* queue selected by default RSS hash lookup */ DST_MODE_FILT_RSS /* queue selected by hashing in filter-specified RSS subtable */ }; struct t4_filter_tuple { /* * These are always available. */ uint8_t sip[16]; /* source IP address (IPv4 in [3:0]) */ uint8_t dip[16]; /* destinatin IP address (IPv4 in [3:0]) */ uint16_t sport; /* source port */ uint16_t dport; /* destination port */ /* * A combination of these (up to 36 bits) is available. TP_VLAN_PRI_MAP * is used to select the global mode and all filters are limited to the * set of fields allowed by the global mode. */ uint16_t vnic; /* VNIC id (PF/VF) or outer VLAN tag */ uint16_t vlan; /* VLAN tag */ uint16_t ethtype; /* Ethernet type */ uint8_t tos; /* TOS/Traffic Type */ uint8_t proto; /* protocol type */ uint32_t fcoe:1; /* FCoE packet */ uint32_t iport:3; /* ingress port */ uint32_t matchtype:3; /* MPS match type */ uint32_t frag:1; /* fragmentation extension header */ uint32_t macidx:9; /* exact match MAC index */ uint32_t vlan_vld:1; /* VLAN valid */ uint32_t ovlan_vld:1; /* outer VLAN tag valid, value in "vnic" */ uint32_t pfvf_vld:1; /* VNIC id (PF/VF) valid, value in "vnic" */ }; struct t4_filter_specification { uint32_t hitcnts:1; /* count filter hits in TCB */ uint32_t prio:1; /* filter has priority over active/server */ uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ uint32_t action:2; /* drop, pass, switch */ uint32_t rpttid:1; /* report TID in RSS hash field */ uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ uint32_t iq:10; /* ingress queue */ uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ /* 1 => TCB contains IQ ID */ /* * Switch proxy/rewrite fields. An ingress packet which matches a * filter with "switch" set will be looped back out as an egress * packet -- potentially with some Ethernet header rewriting. */ uint32_t eport:2; /* egress port to switch packet out */ uint32_t newdmac:1; /* rewrite destination MAC address */ uint32_t newsmac:1; /* rewrite source MAC address */ uint32_t newvlan:2; /* rewrite VLAN Tag */ uint8_t dmac[ETHER_ADDR_LEN]; /* new destination MAC address */ uint8_t smac[ETHER_ADDR_LEN]; /* new source MAC address */ uint16_t vlan; /* VLAN Tag to insert */ /* * Filter rule value/mask pairs. */ struct t4_filter_tuple val; struct t4_filter_tuple mask; }; struct t4_filter { uint32_t idx; uint16_t l2tidx; uint16_t smtidx; uint64_t hits; struct t4_filter_specification fs; }; +/* Tx Scheduling Class parameters */ +struct t4_sched_class_params { + int8_t level; /* scheduler hierarchy level */ + int8_t mode; /* per-class or per-flow */ + int8_t rateunit; /* bit or packet rate */ + int8_t ratemode; /* %port relative or kbps absolute */ + int8_t channel; /* scheduler channel [0..N] */ + int8_t cl; /* scheduler class [0..N] */ + int32_t minrate; /* minimum rate */ + int32_t maxrate; /* maximum rate */ + int16_t weight; /* percent weight */ + int16_t pktsize; /* average packet size */ +}; + /* * Support for "sched-class" command to allow a TX Scheduling Class to be * programmed with various parameters. */ struct t4_sched_params { int8_t subcmd; /* sub-command */ int8_t type; /* packet or flow */ union { struct { /* sub-command SCHED_CLASS_CONFIG */ int8_t minmax; /* minmax enable */ } config; - struct { /* sub-command SCHED_CLASS_PARAMS */ - int8_t level; /* scheduler hierarchy level */ - int8_t mode; /* per-class or per-flow */ - int8_t rateunit; /* bit or packet rate */ - int8_t ratemode; /* %port relative or kbps - absolute */ - int8_t channel; /* scheduler channel [0..N] */ - int8_t cl; /* scheduler class [0..N] */ - int32_t minrate; /* minimum rate */ - int32_t maxrate; /* maximum rate */ - int16_t weight; /* percent weight */ - int16_t pktsize; /* average packet size */ - } params; + struct t4_sched_class_params params; uint8_t reserved[6 + 8 * 8]; } u; }; enum { SCHED_CLASS_SUBCMD_CONFIG, /* config sub-command */ SCHED_CLASS_SUBCMD_PARAMS, /* params sub-command */ }; enum { SCHED_CLASS_TYPE_PACKET, }; enum { SCHED_CLASS_LEVEL_CL_RL, /* class rate limiter */ SCHED_CLASS_LEVEL_CL_WRR, /* class weighted round robin */ SCHED_CLASS_LEVEL_CH_RL, /* channel rate limiter */ }; enum { SCHED_CLASS_MODE_CLASS, /* per-class scheduling */ SCHED_CLASS_MODE_FLOW, /* per-flow scheduling */ }; enum { SCHED_CLASS_RATEUNIT_BITS, /* bit rate scheduling */ SCHED_CLASS_RATEUNIT_PKTS, /* packet rate scheduling */ }; enum { SCHED_CLASS_RATEMODE_REL, /* percent of port bandwidth */ SCHED_CLASS_RATEMODE_ABS, /* Kb/s */ }; /* * Support for "sched_queue" command to allow one or more NIC TX Queues to be * bound to a TX Scheduling Class. */ struct t4_sched_queue { uint8_t port; int8_t queue; /* queue index; -1 => all queues */ int8_t cl; /* class index; -1 => unbind */ }; #define T4_SGE_CONTEXT_SIZE 24 enum { SGE_CONTEXT_EGRESS, SGE_CONTEXT_INGRESS, SGE_CONTEXT_FLM, SGE_CONTEXT_CNM }; struct t4_sge_context { uint32_t mem_id; uint32_t cid; uint32_t data[T4_SGE_CONTEXT_SIZE / 4]; }; struct t4_mem_range { uint32_t addr; uint32_t len; uint32_t *data; }; #define T4_TRACE_LEN 112 struct t4_trace_params { uint32_t data[T4_TRACE_LEN / 4]; uint32_t mask[T4_TRACE_LEN / 4]; uint16_t snap_len; uint16_t min_len; uint8_t skip_ofst; uint8_t skip_len; uint8_t invert; uint8_t port; }; struct t4_tracer { uint8_t idx; uint8_t enabled; uint8_t valid; struct t4_trace_params tp; }; #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) #define CHELSIO_T4_GET_FILTER_MODE _IOWR('f', T4_GET_FILTER_MODE, uint32_t) #define CHELSIO_T4_SET_FILTER_MODE _IOW('f', T4_SET_FILTER_MODE, uint32_t) #define CHELSIO_T4_GET_FILTER _IOWR('f', T4_GET_FILTER, struct t4_filter) #define CHELSIO_T4_SET_FILTER _IOW('f', T4_SET_FILTER, struct t4_filter) #define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter) #define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \ struct t4_sge_context) #define CHELSIO_T4_LOAD_FW _IOW('f', T4_LOAD_FW, struct t4_data) #define CHELSIO_T4_GET_MEM _IOW('f', T4_GET_MEM, struct t4_mem_range) #define CHELSIO_T4_GET_I2C _IOWR('f', T4_GET_I2C, struct t4_i2c_data) #define CHELSIO_T4_CLEAR_STATS _IOW('f', T4_CLEAR_STATS, uint32_t) #define CHELSIO_T4_SCHED_CLASS _IOW('f', T4_SET_SCHED_CLASS, \ struct t4_sched_params) #define CHELSIO_T4_SCHED_QUEUE _IOW('f', T4_SET_SCHED_QUEUE, \ struct t4_sched_queue) #define CHELSIO_T4_GET_TRACER _IOWR('f', T4_GET_TRACER, struct t4_tracer) #define CHELSIO_T4_SET_TRACER _IOW('f', T4_SET_TRACER, struct t4_tracer) #endif Index: projects/vnet/sys/dev/xen/netfront/netfront.c =================================================================== --- projects/vnet/sys/dev/xen/netfront/netfront.c (revision 301522) +++ projects/vnet/sys/dev/xen/netfront/netfront.c (revision 301523) @@ -1,2333 +1,2346 @@ /*- * Copyright (c) 2004-2006 Kip Macy * Copyright (c) 2015 Wei Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /* Features supported by all backends. TSO and LRO can be negotiated */ #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) /* * Should the driver do LRO on the RX end * this can be toggled on the fly, but the * interface must be reset (down/up) for it * to take effect. */ static int xn_enable_lro = 1; TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); /* * Number of pairs of queues. */ static unsigned long xn_num_queues = 4; TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); /** * \brief The maximum allowed data fragments in a single transmit * request. * * This limit is imposed by the backend driver. We assume here that * we are dealing with a Linux driver domain and have set our limit * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) #define RX_COPY_THRESHOLD 256 #define net_ratelimit() 0 struct netfront_rxq; struct netfront_txq; struct netfront_info; struct netfront_rx_info; static void xn_txeof(struct netfront_txq *); static void xn_rxeof(struct netfront_rxq *); static void xn_alloc_rx_buffers(struct netfront_rxq *); static void xn_alloc_rx_buffers_callout(void *arg); static void xn_release_rx_bufs(struct netfront_rxq *); static void xn_release_tx_bufs(struct netfront_txq *); static void xn_rxq_intr(struct netfront_rxq *); static void xn_txq_intr(struct netfront_txq *); static void xn_intr(void *); static inline int xn_count_frags(struct mbuf *m); static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); static int xn_ioctl(struct ifnet *, u_long, caddr_t); static void xn_ifinit_locked(struct netfront_info *); static void xn_ifinit(void *); static void xn_stop(struct netfront_info *); static void xn_query_features(struct netfront_info *np); static int xn_configure_features(struct netfront_info *np); static void netif_free(struct netfront_info *info); static int netfront_detach(device_t dev); static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); static int xn_txq_mq_start(struct ifnet *, struct mbuf *); static int talk_to_backend(device_t dev, struct netfront_info *info); static int create_netdev(device_t dev); static void netif_disconnect_backend(struct netfront_info *info); static int setup_device(device_t dev, struct netfront_info *info, unsigned long); static int xn_ifmedia_upd(struct ifnet *ifp); static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); -int xn_connect(struct netfront_info *); +static int xn_connect(struct netfront_info *); +static void xn_kick_rings(struct netfront_info *); static int xn_get_responses(struct netfront_rxq *, struct netfront_rx_info *, RING_IDX, RING_IDX *, struct mbuf **); #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) #define INVALID_P2M_ENTRY (~0UL) struct xn_rx_stats { u_long rx_packets; /* total packets received */ u_long rx_bytes; /* total bytes received */ u_long rx_errors; /* bad packets received */ }; struct xn_tx_stats { u_long tx_packets; /* total packets transmitted */ u_long tx_bytes; /* total bytes transmitted */ u_long tx_errors; /* packet transmit problems */ }; #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ struct netfront_rxq { struct netfront_info *info; u_int id; char name[XN_QUEUE_NAME_LEN]; struct mtx lock; int ring_ref; netif_rx_front_ring_t ring; xen_intr_handle_t xen_intr_handle; grant_ref_t gref_head; grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; struct lro_ctrl lro; struct callout rx_refill; struct xn_rx_stats stats; }; struct netfront_txq { struct netfront_info *info; u_int id; char name[XN_QUEUE_NAME_LEN]; struct mtx lock; int ring_ref; netif_tx_front_ring_t ring; xen_intr_handle_t xen_intr_handle; grant_ref_t gref_head; grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; int mbufs_cnt; struct buf_ring *br; struct taskqueue *tq; struct task defrtask; bool full; struct xn_tx_stats stats; }; struct netfront_info { struct ifnet *xn_ifp; struct mtx sc_lock; u_int num_queues; struct netfront_rxq *rxq; struct netfront_txq *txq; u_int carrier; u_int maxfrags; device_t xbdev; uint8_t mac[ETHER_ADDR_LEN]; int xn_if_flags; struct ifmedia sc_media; bool xn_reset; }; struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) #define netfront_carrier_ok(netif) ((netif)->carrier) /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ static inline void add_id_to_freelist(struct mbuf **list, uintptr_t id) { KASSERT(id != 0, ("%s: the head item (0) must always be free.", __func__)); list[id] = list[0]; list[0] = (struct mbuf *)id; } static inline unsigned short get_id_from_freelist(struct mbuf **list) { uintptr_t id; id = (uintptr_t)list[0]; KASSERT(id != 0, ("%s: the head item (0) must always remain free.", __func__)); list[0] = list[id]; return (id); } static inline int xn_rxidx(RING_IDX idx) { return idx & (NET_RX_RING_SIZE - 1); } static inline struct mbuf * xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) { int i; struct mbuf *m; i = xn_rxidx(ri); m = rxq->mbufs[i]; rxq->mbufs[i] = NULL; return (m); } static inline grant_ref_t xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) { int i = xn_rxidx(ri); grant_ref_t ref = rxq->grant_ref[i]; KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); rxq->grant_ref[i] = GRANT_REF_INVALID; return (ref); } #define IPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #ifdef INVARIANTS #define WPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #else #define WPRINTK(fmt, args...) #endif #ifdef DEBUG #define DPRINTK(fmt, args...) \ printf("[XEN] %s: " fmt, __func__, ##args) #else #define DPRINTK(fmt, args...) #endif /** * Read the 'mac' node at the given device's node in the store, and parse that * as colon-separated octets, placing result the given mac array. mac must be * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). * Return 0 on success, or errno on error. */ static int xen_net_read_mac(device_t dev, uint8_t mac[]) { int error, i; char *s, *e, *macstr; const char *path; path = xenbus_get_node(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); if (error == ENOENT) { /* * Deal with missing mac XenStore nodes on devices with * HVM emulation (the 'ioemu' configuration attribute) * enabled. * * The HVM emulator may execute in a stub device model * domain which lacks the permission, only given to Dom0, * to update the guest's XenStore tree. For this reason, * the HVM emulator doesn't even attempt to write the * front-side mac node, even when operating in Dom0. * However, there should always be a mac listed in the * backend tree. Fallback to this version if our query * of the front side XenStore location doesn't find * anything. */ path = xenbus_get_otherend_path(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); } if (error != 0) { xenbus_dev_fatal(dev, error, "parsing %s/mac", path); return (error); } s = macstr; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { free(macstr, M_XENBUS); return (ENOENT); } s = &e[1]; } free(macstr, M_XENBUS); return (0); } /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. Switch to * Connected state. */ static int netfront_probe(device_t dev) { if (xen_hvm_domain() && xen_disable_pv_nics != 0) return (ENXIO); if (!strcmp(xenbus_get_type(dev), "vif")) { device_set_desc(dev, "Virtual Network Interface"); return (0); } return (ENXIO); } static int netfront_attach(device_t dev) { int err; err = create_netdev(dev); if (err != 0) { xenbus_dev_fatal(dev, err, "creating netdev"); return (err); } SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_lro", CTLFLAG_RW, &xn_enable_lro, 0, "Large Receive Offload"); SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "num_queues", CTLFLAG_RD, &xn_num_queues, "Number of pairs of queues"); return (0); } static int netfront_suspend(device_t dev) { struct netfront_info *np = device_get_softc(dev); u_int i; for (i = 0; i < np->num_queues; i++) { XN_RX_LOCK(&np->rxq[i]); XN_TX_LOCK(&np->txq[i]); } netfront_carrier_off(np); for (i = 0; i < np->num_queues; i++) { XN_RX_UNLOCK(&np->rxq[i]); XN_TX_UNLOCK(&np->txq[i]); } return (0); } /** * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our netif structure and recreate it, but * leave the device-layer structures intact so that this is transparent to the * rest of the kernel. */ static int netfront_resume(device_t dev) { struct netfront_info *info = device_get_softc(dev); netif_disconnect_backend(info); return (0); } static int write_queue_xenstore_keys(device_t dev, struct netfront_rxq *rxq, struct netfront_txq *txq, struct xs_transaction *xst, bool hierarchy) { int err; const char *message; const char *node = xenbus_get_node(dev); char *path; size_t path_size; KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); /* Split event channel support is not yet there. */ KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, ("Split event channels are not supported")); if (hierarchy) { path_size = strlen(node) + 10; path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); snprintf(path, path_size, "%s/queue-%u", node, rxq->id); } else { path_size = strlen(node) + 1; path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); snprintf(path, path_size, "%s", node); } err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); if (err != 0) { message = "writing tx ring-ref"; goto error; } err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); if (err != 0) { message = "writing rx ring-ref"; goto error; } err = xs_printf(*xst, path, "event-channel", "%u", xen_intr_port(rxq->xen_intr_handle)); if (err != 0) { message = "writing event-channel"; goto error; } free(path, M_DEVBUF); return (0); error: free(path, M_DEVBUF); xenbus_dev_fatal(dev, err, "%s", message); return (err); } /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(device_t dev, struct netfront_info *info) { const char *message; struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; unsigned long num_queues, max_queues = 0; unsigned int i; err = xen_net_read_mac(dev, info->mac); if (err != 0) { xenbus_dev_fatal(dev, err, "parsing %s/mac", node); goto out; } err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), "multi-queue-max-queues", NULL, "%lu", &max_queues); if (err != 0) max_queues = 1; num_queues = xn_num_queues; if (num_queues > max_queues) num_queues = max_queues; err = setup_device(dev, info, num_queues); if (err != 0) goto out; again: err = xs_transaction_start(&xst); if (err != 0) { xenbus_dev_fatal(dev, err, "starting transaction"); goto free; } if (info->num_queues == 1) { err = write_queue_xenstore_keys(dev, &info->rxq[0], &info->txq[0], &xst, false); if (err != 0) goto abort_transaction_no_def_error; } else { err = xs_printf(xst, node, "multi-queue-num-queues", "%u", info->num_queues); if (err != 0) { message = "writing multi-queue-num-queues"; goto abort_transaction; } for (i = 0; i < info->num_queues; i++) { err = write_queue_xenstore_keys(dev, &info->rxq[i], &info->txq[i], &xst, true); if (err != 0) goto abort_transaction_no_def_error; } } err = xs_printf(xst, node, "request-rx-copy", "%u", 1); if (err != 0) { message = "writing request-rx-copy"; goto abort_transaction; } err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); if (err != 0) { message = "writing feature-rx-notify"; goto abort_transaction; } err = xs_printf(xst, node, "feature-sg", "%d", 1); if (err != 0) { message = "writing feature-sg"; goto abort_transaction; } if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) { err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); if (err != 0) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } } if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) { err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); if (err != 0) { message = "writing feature-no-csum-offload"; goto abort_transaction; } } err = xs_transaction_end(xst, 0); if (err != 0) { if (err == EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); goto free; } return 0; abort_transaction: xenbus_dev_fatal(dev, err, "%s", message); abort_transaction_no_def_error: xs_transaction_end(xst, 1); free: netif_free(info); out: return (err); } static void xn_rxq_intr(struct netfront_rxq *rxq) { XN_RX_LOCK(rxq); xn_rxeof(rxq); XN_RX_UNLOCK(rxq); } static void xn_txq_start(struct netfront_txq *txq) { struct netfront_info *np = txq->info; struct ifnet *ifp = np->xn_ifp; XN_TX_LOCK_ASSERT(txq); if (!drbr_empty(ifp, txq->br)) xn_txq_mq_start_locked(txq, NULL); } static void xn_txq_intr(struct netfront_txq *txq) { XN_TX_LOCK(txq); if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) xn_txeof(txq); xn_txq_start(txq); XN_TX_UNLOCK(txq); } static void xn_txq_tq_deferred(void *xtxq, int pending) { struct netfront_txq *txq = xtxq; XN_TX_LOCK(txq); xn_txq_start(txq); XN_TX_UNLOCK(txq); } static void disconnect_rxq(struct netfront_rxq *rxq) { xn_release_rx_bufs(rxq); gnttab_free_grant_references(rxq->gref_head); gnttab_end_foreign_access(rxq->ring_ref, NULL); /* * No split event channel support at the moment, handle will * be unbound in tx. So no need to call xen_intr_unbind here, * but we do want to reset the handler to 0. */ rxq->xen_intr_handle = 0; } static void destroy_rxq(struct netfront_rxq *rxq) { callout_drain(&rxq->rx_refill); free(rxq->ring.sring, M_DEVBUF); } static void destroy_rxqs(struct netfront_info *np) { int i; for (i = 0; i < np->num_queues; i++) destroy_rxq(&np->rxq[i]); free(np->rxq, M_DEVBUF); np->rxq = NULL; } static int setup_rxqs(device_t dev, struct netfront_info *info, unsigned long num_queues) { int q, i; int error; netif_rx_sring_t *rxs; struct netfront_rxq *rxq; info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, M_DEVBUF, M_WAITOK|M_ZERO); for (q = 0; q < num_queues; q++) { rxq = &info->rxq[q]; rxq->id = q; rxq->info = info; rxq->ring_ref = GRANT_REF_INVALID; rxq->ring.sring = NULL; snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); mtx_init(&rxq->lock, rxq->name, "netfront receive lock", MTX_DEF); for (i = 0; i <= NET_RX_RING_SIZE; i++) { rxq->mbufs[i] = NULL; rxq->grant_ref[i] = GRANT_REF_INVALID; } /* Start resources allocation */ if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, &rxq->gref_head) != 0) { device_printf(dev, "allocating rx gref"); error = ENOMEM; goto fail; } rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); SHARED_RING_INIT(rxs); FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &rxq->ring_ref); if (error != 0) { device_printf(dev, "granting rx ring page"); goto fail_grant_ring; } callout_init(&rxq->rx_refill, 1); } return (0); fail_grant_ring: gnttab_free_grant_references(rxq->gref_head); free(rxq->ring.sring, M_DEVBUF); fail: for (; q >= 0; q--) { disconnect_rxq(&info->rxq[q]); destroy_rxq(&info->rxq[q]); } free(info->rxq, M_DEVBUF); return (error); } static void disconnect_txq(struct netfront_txq *txq) { xn_release_tx_bufs(txq); gnttab_free_grant_references(txq->gref_head); gnttab_end_foreign_access(txq->ring_ref, NULL); xen_intr_unbind(&txq->xen_intr_handle); } static void destroy_txq(struct netfront_txq *txq) { free(txq->ring.sring, M_DEVBUF); buf_ring_free(txq->br, M_DEVBUF); taskqueue_drain_all(txq->tq); taskqueue_free(txq->tq); } static void destroy_txqs(struct netfront_info *np) { int i; for (i = 0; i < np->num_queues; i++) destroy_txq(&np->txq[i]); free(np->txq, M_DEVBUF); np->txq = NULL; } static int setup_txqs(device_t dev, struct netfront_info *info, unsigned long num_queues) { int q, i; int error; netif_tx_sring_t *txs; struct netfront_txq *txq; info->txq = malloc(sizeof(struct netfront_txq) * num_queues, M_DEVBUF, M_WAITOK|M_ZERO); for (q = 0; q < num_queues; q++) { txq = &info->txq[q]; txq->id = q; txq->info = info; txq->ring_ref = GRANT_REF_INVALID; txq->ring.sring = NULL; snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); mtx_init(&txq->lock, txq->name, "netfront transmit lock", MTX_DEF); for (i = 0; i <= NET_TX_RING_SIZE; i++) { txq->mbufs[i] = (void *) ((u_long) i+1); txq->grant_ref[i] = GRANT_REF_INVALID; } txq->mbufs[NET_TX_RING_SIZE] = (void *)0; /* Start resources allocation. */ if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, &txq->gref_head) != 0) { device_printf(dev, "failed to allocate tx grant refs\n"); error = ENOMEM; goto fail; } txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); SHARED_RING_INIT(txs); FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(txs), &txq->ring_ref); if (error != 0) { device_printf(dev, "failed to grant tx ring\n"); goto fail_grant_ring; } txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, M_WAITOK, &txq->lock); TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); txq->tq = taskqueue_create(txq->name, M_WAITOK, taskqueue_thread_enqueue, &txq->tq); error = taskqueue_start_threads(&txq->tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->id); if (error != 0) { device_printf(dev, "failed to start tx taskq %d\n", txq->id); goto fail_start_thread; } error = xen_intr_alloc_and_bind_local_port(dev, xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &txq->xen_intr_handle); if (error != 0) { device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); goto fail_bind_port; } } return (0); fail_bind_port: taskqueue_drain_all(txq->tq); fail_start_thread: buf_ring_free(txq->br, M_DEVBUF); taskqueue_free(txq->tq); gnttab_end_foreign_access(txq->ring_ref, NULL); fail_grant_ring: gnttab_free_grant_references(txq->gref_head); free(txq->ring.sring, M_DEVBUF); fail: for (; q >= 0; q--) { disconnect_txq(&info->txq[q]); destroy_txq(&info->txq[q]); } free(info->txq, M_DEVBUF); return (error); } static int setup_device(device_t dev, struct netfront_info *info, unsigned long num_queues) { int error; int q; if (info->txq) destroy_txqs(info); if (info->rxq) destroy_rxqs(info); info->num_queues = 0; error = setup_rxqs(dev, info, num_queues); if (error != 0) goto out; error = setup_txqs(dev, info, num_queues); if (error != 0) goto out; info->num_queues = num_queues; /* No split event channel at the moment. */ for (q = 0; q < num_queues; q++) info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; return (0); out: KASSERT(error != 0, ("Error path taken without providing an error code")); return (error); } #ifdef INET /** * If this interface has an ipv4 address, send an arp for it. This * helps to get the network going again after migrating hosts. */ static void netfront_send_fake_arp(device_t dev, struct netfront_info *info) { struct ifnet *ifp; struct ifaddr *ifa; ifp = info->xn_ifp; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { arp_ifinit(ifp, ifa); } } } #endif /** * Callback received when the backend's state changes. */ static void netfront_backend_changed(device_t dev, XenbusState newstate) { struct netfront_info *sc = device_get_softc(dev); DPRINTK("newstate=%d\n", newstate); switch (newstate) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateUnknown: case XenbusStateReconfigured: case XenbusStateReconfiguring: break; case XenbusStateInitWait: if (xenbus_get_state(dev) != XenbusStateInitialising) break; if (xn_connect(sc) != 0) break; - xenbus_set_state(dev, XenbusStateConnected); + /* Switch to connected state before kicking the rings. */ + xenbus_set_state(sc->xbdev, XenbusStateConnected); + xn_kick_rings(sc); break; case XenbusStateClosing: xenbus_set_state(dev, XenbusStateClosed); break; case XenbusStateClosed: if (sc->xn_reset) { netif_disconnect_backend(sc); xenbus_set_state(dev, XenbusStateInitialising); sc->xn_reset = false; } break; case XenbusStateConnected: #ifdef INET netfront_send_fake_arp(dev, sc); #endif break; } } /** * \brief Verify that there is sufficient space in the Tx ring * buffer for a maximally sized request to be enqueued. * * A transmit request requires a transmit descriptor for each packet * fragment, plus up to 2 entries for "options" (e.g. TSO). */ static inline int xn_tx_slot_available(struct netfront_txq *txq) { return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); } static void xn_release_tx_bufs(struct netfront_txq *txq) { int i; for (i = 1; i <= NET_TX_RING_SIZE; i++) { struct mbuf *m; m = txq->mbufs[i]; /* * We assume that no kernel addresses are * less than NET_TX_RING_SIZE. Any entry * in the table that is below this number * must be an index from free-list tracking. */ if (((uintptr_t)m) <= NET_TX_RING_SIZE) continue; gnttab_end_foreign_access_ref(txq->grant_ref[i]); gnttab_release_grant_reference(&txq->gref_head, txq->grant_ref[i]); txq->grant_ref[i] = GRANT_REF_INVALID; add_id_to_freelist(txq->mbufs, i); txq->mbufs_cnt--; if (txq->mbufs_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } m_free(m); } } static struct mbuf * xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) { struct mbuf *m; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (m == NULL) return NULL; m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; return (m); } static void xn_alloc_rx_buffers(struct netfront_rxq *rxq) { RING_IDX req_prod; int notify; XN_RX_LOCK_ASSERT(rxq); if (__predict_false(rxq->info->carrier == 0)) return; for (req_prod = rxq->ring.req_prod_pvt; req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; req_prod++) { struct mbuf *m; unsigned short id; grant_ref_t ref; struct netif_rx_request *req; unsigned long pfn; m = xn_alloc_one_rx_buffer(rxq); if (m == NULL) break; id = xn_rxidx(req_prod); KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); rxq->mbufs[id] = m; ref = gnttab_claim_grant_reference(&rxq->gref_head); KASSERT(ref != GNTTAB_LIST_END, ("reserved grant references exhuasted")); rxq->grant_ref[id] = ref; pfn = atop(vtophys(mtod(m, vm_offset_t))); req = RING_GET_REQUEST(&rxq->ring, req_prod); gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); req->id = id; req->gref = ref; } rxq->ring.req_prod_pvt = req_prod; /* Not enough requests? Try again later. */ if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { callout_reset_curcpu(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout, rxq); return; } wmb(); /* barrier so backend seens requests */ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); if (notify) xen_intr_signal(rxq->xen_intr_handle); } static void xn_alloc_rx_buffers_callout(void *arg) { struct netfront_rxq *rxq; rxq = (struct netfront_rxq *)arg; XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); XN_RX_UNLOCK(rxq); } static void xn_release_rx_bufs(struct netfront_rxq *rxq) { int i, ref; struct mbuf *m; for (i = 0; i < NET_RX_RING_SIZE; i++) { m = rxq->mbufs[i]; if (m == NULL) continue; ref = rxq->grant_ref[i]; if (ref == GRANT_REF_INVALID) continue; gnttab_end_foreign_access_ref(ref); gnttab_release_grant_reference(&rxq->gref_head, ref); rxq->mbufs[i] = NULL; rxq->grant_ref[i] = GRANT_REF_INVALID; m_freem(m); } } static void xn_rxeof(struct netfront_rxq *rxq) { struct ifnet *ifp; struct netfront_info *np = rxq->info; #if (defined(INET) || defined(INET6)) struct lro_ctrl *lro = &rxq->lro; #endif struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct mbuf *m; struct mbufq mbufq_rxq, mbufq_errq; int err, work_to_do; do { XN_RX_LOCK_ASSERT(rxq); if (!netfront_carrier_ok(np)) return; /* XXX: there should be some sane limit. */ mbufq_init(&mbufq_errq, INT_MAX); mbufq_init(&mbufq_rxq, INT_MAX); ifp = np->xn_ifp; rp = rxq->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ i = rxq->ring.rsp_cons; while ((i != rp)) { memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); m = NULL; err = xn_get_responses(rxq, &rinfo, rp, &i, &m); if (__predict_false(err)) { if (m) (void )mbufq_enqueue(&mbufq_errq, m); rxq->stats.rx_errors++; continue; } m->m_pkthdr.rcvif = ifp; if ( rx->flags & NETRXF_data_validated ) { /* Tell the stack the checksums are okay */ /* * XXX this isn't necessarily the case - need to add * check */ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } if ((rx->flags & NETRXF_extra_info) != 0 && (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == XEN_NETIF_EXTRA_TYPE_GSO)) { m->m_pkthdr.tso_segsz = extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; m->m_pkthdr.csum_flags |= CSUM_TSO; } rxq->stats.rx_packets++; rxq->stats.rx_bytes += m->m_pkthdr.len; (void )mbufq_enqueue(&mbufq_rxq, m); rxq->ring.rsp_cons = i; } mbufq_drain(&mbufq_errq); /* * Process all the mbufs after the remapping is complete. * Break the mbuf chain first though. */ while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* XXX: Do we really need to drop the rx lock? */ XN_RX_UNLOCK(rxq); #if (defined(INET) || defined(INET6)) /* Use LRO if possible */ if ((ifp->if_capenable & IFCAP_LRO) == 0 || lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { /* * If LRO fails, pass up to the stack * directly. */ (*ifp->if_input)(ifp, m); } #else (*ifp->if_input)(ifp, m); #endif XN_RX_LOCK(rxq); } rxq->ring.rsp_cons = i; #if (defined(INET) || defined(INET6)) /* * Flush any outstanding LRO work */ tcp_lro_flush_all(lro); #endif xn_alloc_rx_buffers(rxq); RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); } while (work_to_do); } static void xn_txeof(struct netfront_txq *txq) { RING_IDX i, prod; unsigned short id; struct ifnet *ifp; netif_tx_response_t *txr; struct mbuf *m; struct netfront_info *np = txq->info; XN_TX_LOCK_ASSERT(txq); if (!netfront_carrier_ok(np)) return; ifp = np->xn_ifp; do { prod = txq->ring.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (i = txq->ring.rsp_cons; i != prod; i++) { txr = RING_GET_RESPONSE(&txq->ring, i); if (txr->status == NETIF_RSP_NULL) continue; if (txr->status != NETIF_RSP_OKAY) { printf("%s: WARNING: response is %d!\n", __func__, txr->status); } id = txr->id; m = txq->mbufs[id]; KASSERT(m != NULL, ("mbuf not found in chain")); KASSERT((uintptr_t)m > NET_TX_RING_SIZE, ("mbuf already on the free list, but we're " "trying to free it again!")); M_ASSERTVALID(m); /* * Increment packet count if this is the last * mbuf of the chain. */ if (!m->m_next) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (__predict_false(gnttab_query_foreign_access( txq->grant_ref[id]) != 0)) { panic("%s: grant id %u still in use by the " "backend", __func__, id); } gnttab_end_foreign_access_ref(txq->grant_ref[id]); gnttab_release_grant_reference( &txq->gref_head, txq->grant_ref[id]); txq->grant_ref[id] = GRANT_REF_INVALID; txq->mbufs[id] = NULL; add_id_to_freelist(txq->mbufs, id); txq->mbufs_cnt--; m_free(m); /* Only mark the txq active if we've freed up at least one slot to try */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } txq->ring.rsp_cons = prod; /* * Set a new event, then check for race with update of * tx_cons. Note that it is essential to schedule a * callback, no matter how few buffers are pending. Even if * there is space in the transmit ring, higher layers may * be blocked because too much data is outstanding: in such * cases notification from Xen is likely to be the only kick * that we'll get. */ txq->ring.sring->rsp_event = prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; mb(); } while (prod != txq->ring.sring->rsp_prod); if (txq->full && ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { txq->full = false; xn_txq_start(txq); } } static void xn_intr(void *xsc) { struct netfront_txq *txq = xsc; struct netfront_info *np = txq->info; struct netfront_rxq *rxq = &np->rxq[txq->id]; /* kick both tx and rx */ xn_rxq_intr(rxq); xn_txq_intr(txq); } static void xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, grant_ref_t ref) { int new = xn_rxidx(rxq->ring.req_prod_pvt); KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); rxq->mbufs[new] = m; rxq->grant_ref[new] = ref; RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; rxq->ring.req_prod_pvt++; } static int xn_get_extras(struct netfront_rxq *rxq, struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) { struct netif_extra_info *extra; int err = 0; do { struct mbuf *m; grant_ref_t ref; if (__predict_false(*cons + 1 == rp)) { err = EINVAL; break; } extra = (struct netif_extra_info *) RING_GET_RESPONSE(&rxq->ring, ++(*cons)); if (__predict_false(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { err = EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } m = xn_get_rx_mbuf(rxq, *cons); ref = xn_get_rx_ref(rxq, *cons); xn_move_rx_slot(rxq, m, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return err; } static int xn_get_responses(struct netfront_rxq *rxq, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list) { struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; struct mbuf *m, *m0, *m_prev; grant_ref_t ref = xn_get_rx_ref(rxq, *cons); RING_IDX ref_cons = *cons; int frags = 1; int err = 0; u_long ret; m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); if (rx->flags & NETRXF_extra_info) { err = xn_get_extras(rxq, extras, rp, cons); } if (m0 != NULL) { m0->m_pkthdr.len = 0; m0->m_next = NULL; } for (;;) { #if 0 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", rx->status, rx->offset, frags); #endif if (__predict_false(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { xn_move_rx_slot(rxq, m, ref); if (m0 == m) m0 = NULL; m = NULL; err = EINVAL; goto next_skip_queue; } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ if (ref == GRANT_REF_INVALID) { printf("%s: Bad rx response id %d.\n", __func__, rx->id); err = EINVAL; goto next; } ret = gnttab_end_foreign_access_ref(ref); KASSERT(ret, ("Unable to end access to grant references")); gnttab_release_grant_reference(&rxq->gref_head, ref); next: if (m == NULL) break; m->m_len = rx->status; m->m_data += rx->offset; m0->m_pkthdr.len += rx->status; next_skip_queue: if (!(rx->flags & NETRXF_more_data)) break; if (*cons + frags == rp) { if (net_ratelimit()) WPRINTK("Need more frags\n"); err = ENOENT; printf("%s: cons %u frags %u rp %u, not enough frags\n", __func__, *cons, frags, rp); break; } /* * Note that m can be NULL, if rx->status < 0 or if * rx->offset + rx->status > PAGE_SIZE above. */ m_prev = m; rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); m = xn_get_rx_mbuf(rxq, *cons + frags); /* * m_prev == NULL can happen if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m_prev != NULL) m_prev->m_next = m; /* * m0 can be NULL if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m0 == NULL) m0 = m; m->m_next = NULL; ref = xn_get_rx_ref(rxq, *cons + frags); ref_cons = *cons + frags; frags++; } *list = m0; *cons += frags; return (err); } /** * \brief Count the number of fragments in an mbuf chain. * * Surprisingly, there isn't an M* macro for this. */ static inline int xn_count_frags(struct mbuf *m) { int nfrags; for (nfrags = 0; m != NULL; m = m->m_next) nfrags++; return (nfrags); } /** * Given an mbuf chain, make sure we have enough room and then push * it onto the transmit ring. */ static int xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) { struct mbuf *m; struct netfront_info *np = txq->info; struct ifnet *ifp = np->xn_ifp; u_int nfrags; int otherend_id; /** * Defragment the mbuf if necessary. */ nfrags = xn_count_frags(m_head); /* * Check to see whether this request is longer than netback * can handle, and try to defrag it. */ /** * It is a bit lame, but the netback driver in Linux can't * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of * the Linux network stack. */ if (nfrags > np->maxfrags) { m = m_defrag(m_head, M_NOWAIT); if (!m) { /* * Defrag failed, so free the mbuf and * therefore drop the packet. */ m_freem(m_head); return (EMSGSIZE); } m_head = m; } /* Determine how many fragments now exist */ nfrags = xn_count_frags(m_head); /* * Check to see whether the defragmented packet has too many * segments for the Linux netback driver. */ /** * The FreeBSD TCP stack, with TSO enabled, can produce a chain * of mbufs longer than Linux can handle. Make sure we don't * pass a too-long chain over to the other side by dropping the * packet. It doesn't look like there is currently a way to * tell the TCP stack to generate a shorter chain of packets. */ if (nfrags > MAX_TX_REQ_FRAGS) { #ifdef DEBUG printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " "won't be able to handle it, dropping\n", __func__, nfrags, MAX_TX_REQ_FRAGS); #endif m_freem(m_head); return (EMSGSIZE); } /* * This check should be redundant. We've already verified that we * have enough slots in the ring to handle a packet of maximum * size, and that our packet is less than the maximum size. Keep * it in here as an assert for now just to make certain that * chain_cnt is accurate. */ KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " "(%d)!", __func__, (int) txq->mbufs_cnt, (int) nfrags, (int) NET_TX_RING_SIZE)); /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; otherend_id = xenbus_get_otherend_id(np->xbdev); for (m = m_head; m; m = m->m_next) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); id = get_id_from_freelist(txq->mbufs); if (id == 0) panic("%s: was allocated the freelist head!\n", __func__); txq->mbufs_cnt++; if (txq->mbufs_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); txq->mbufs[id] = m; tx->id = id; ref = gnttab_claim_grant_reference(&txq->gref_head); KASSERT((short)ref >= 0, ("Negative ref")); mfn = virt_to_mfn(mtod(m, vm_offset_t)); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); tx->gref = txq->grant_ref[id] = ref; tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); tx->flags = 0; if (m == m_head) { /* * The first fragment has the entire packet * size, subsequent fragments have just the * fragment size. The backend works out the * true size of the first fragment by * subtracting the sizes of the other * fragments. */ tx->size = m->m_pkthdr.len; /* * The first fragment contains the checksum flags * and is optionally followed by extra data for * TSO etc. */ /** * CSUM_TSO requires checksum offloading. * Some versions of FreeBSD fail to * set CSUM_TCP in the CSUM_TSO case, * so we have to test for CSUM_TSO * explicitly. */ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { tx->flags |= (NETTXF_csum_blank | NETTXF_data_validated); } if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&txq->ring, ++txq->ring.req_prod_pvt); tx->flags |= NETTXF_extra_info; gso->u.gso.size = m->m_pkthdr.tso_segsz; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; } } else { tx->size = m->m_len; } if (m->m_next) tx->flags |= NETTXF_more_data; txq->ring.req_prod_pvt++; } BPF_MTAP(ifp, m_head); xn_txeof(txq); txq->stats.tx_bytes += m_head->m_pkthdr.len; txq->stats.tx_packets++; return (0); } /* equivalent of network_open() in Linux */ static void xn_ifinit_locked(struct netfront_info *np) { struct ifnet *ifp; int i; struct netfront_rxq *rxq; XN_LOCK_ASSERT(np); ifp = np->xn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) return; xn_stop(np); for (i = 0; i < np->num_queues; i++) { rxq = &np->rxq[i]; XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) xn_rxeof(rxq); XN_RX_UNLOCK(rxq); } ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } static void xn_ifinit(void *xsc) { struct netfront_info *sc = xsc; XN_LOCK(sc); xn_ifinit_locked(sc); XN_UNLOCK(sc); } static int xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct netfront_info *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; device_t dev; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int mask, error = 0; dev = sc->xbdev; switch(cmd) { case SIOCSIFADDR: #ifdef INET XN_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) xn_ifinit_locked(sc); arp_ifinit(ifp, ifa); XN_UNLOCK(sc); } else { XN_UNLOCK(sc); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xn_ifinit(sc); break; case SIOCSIFFLAGS: XN_LOCK(sc); if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ xn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xn_stop(sc); } } sc->xn_if_flags = ifp->if_flags; XN_UNLOCK(sc); break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); } } if (mask & IFCAP_RXCSUM) { ifp->if_capenable ^= IFCAP_RXCSUM; } if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { IPRINTK("Xen requires tx checksum offload" " be enabled to use TSO\n"); error = EINVAL; } } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; } /* * We must reset the interface so the backend picks up the * new features. */ XN_LOCK(sc); netfront_carrier_off(sc); sc->xn_reset = true; /* * NB: the pending packet queue is not flushed, since * the interface should still support the old options. */ XN_UNLOCK(sc); /* * Delete the xenstore nodes that export features. * * NB: There's a xenbus state called * "XenbusStateReconfiguring", which is what we should set * here. Sadly none of the backends know how to handle it, * and simply disconnect from the frontend, so we will just * switch back to XenbusStateInitialising in order to force * a reconnection. */ xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); xenbus_set_state(dev, XenbusStateClosing); break; case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); } return (error); } static void xn_stop(struct netfront_info *sc) { struct ifnet *ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } static void xn_rebuild_rx_bufs(struct netfront_rxq *rxq) { int requeue_idx, i; grant_ref_t ref; netif_rx_request_t *req; for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { struct mbuf *m; u_long pfn; if (rxq->mbufs[i] == NULL) continue; m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); req = RING_GET_REQUEST(&rxq->ring, requeue_idx); pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); req->gref = ref; req->id = requeue_idx; requeue_idx++; } rxq->ring.req_prod_pvt = requeue_idx; } /* START of Xenolinux helper functions adapted to FreeBSD */ -int +static int xn_connect(struct netfront_info *np) { int i, error; u_int feature_rx_copy; struct netfront_rxq *rxq; struct netfront_txq *txq; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); if (error != 0) feature_rx_copy = 0; /* We only support rx copy. */ if (!feature_rx_copy) return (EPROTONOSUPPORT); /* Recovery procedure: */ error = talk_to_backend(np->xbdev, np); if (error != 0) return (error); /* Step 1: Reinitialise variables. */ xn_query_features(np); xn_configure_features(np); /* Step 2: Release TX buffer */ for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; xn_release_tx_bufs(txq); } /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ for (i = 0; i < np->num_queues; i++) { rxq = &np->rxq[i]; xn_rebuild_rx_bufs(rxq); } /* Step 4: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ netfront_carrier_on(np); + + return (0); +} + +static void +xn_kick_rings(struct netfront_info *np) +{ + struct netfront_rxq *rxq; + struct netfront_txq *txq; + int i; + for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; + rxq = &np->rxq[i]; xen_intr_signal(txq->xen_intr_handle); XN_TX_LOCK(txq); xn_txeof(txq); XN_TX_UNLOCK(txq); XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); XN_RX_UNLOCK(rxq); } - - return (0); } static void xn_query_features(struct netfront_info *np) { int val; device_printf(np->xbdev, "backend features:"); if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-sg", NULL, "%d", &val) != 0) val = 0; np->maxfrags = 1; if (val) { np->maxfrags = MAX_TX_REQ_FRAGS; printf(" feature-sg"); } if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-gso-tcpv4", NULL, "%d", &val) != 0) val = 0; np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); if (val) { np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; printf(" feature-gso-tcp4"); } /* * HW CSUM offload is assumed to be available unless * feature-no-csum-offload is set in xenstore. */ if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-no-csum-offload", NULL, "%d", &val) != 0) val = 0; np->xn_ifp->if_capabilities |= IFCAP_HWCSUM; if (val) { np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM); printf(" feature-no-csum-offload"); } printf("\n"); } static int xn_configure_features(struct netfront_info *np) { int err, cap_enabled; #if (defined(INET) || defined(INET6)) int i; #endif struct ifnet *ifp; ifp = np->xn_ifp; err = 0; if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) { /* Current options are available, no need to do anything. */ return (0); } /* Try to preserve as many options as possible. */ cap_enabled = ifp->if_capenable; ifp->if_capenable = ifp->if_hwassist = 0; #if (defined(INET) || defined(INET6)) if ((cap_enabled & IFCAP_LRO) != 0) for (i = 0; i < np->num_queues; i++) tcp_lro_free(&np->rxq[i].lro); if (xn_enable_lro && (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) { ifp->if_capenable |= IFCAP_LRO; for (i = 0; i < np->num_queues; i++) { err = tcp_lro_init(&np->rxq[i].lro); if (err != 0) { device_printf(np->xbdev, "LRO initialization failed\n"); ifp->if_capenable &= ~IFCAP_LRO; break; } np->rxq[i].lro.ifp = ifp; } } if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } #endif if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= CSUM_TCP|CSUM_UDP; } if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0) ifp->if_capenable |= IFCAP_RXCSUM; return (err); } static int xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) { struct netfront_info *np; struct ifnet *ifp; struct buf_ring *br; int error, notify; np = txq->info; br = txq->br; ifp = np->xn_ifp; error = 0; XN_TX_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !netfront_carrier_ok(np)) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error != 0) return (error); } while ((m = drbr_peek(ifp, br)) != NULL) { if (!xn_tx_slot_available(txq)) { drbr_putback(ifp, br, m); break; } error = xn_assemble_tx_request(txq, m); /* xn_assemble_tx_request always consumes the mbuf*/ if (error != 0) { drbr_advance(ifp, br); break; } RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); if (notify) xen_intr_signal(txq->xen_intr_handle); drbr_advance(ifp, br); } if (RING_FULL(&txq->ring)) txq->full = true; return (0); } static int xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) { struct netfront_info *np; struct netfront_txq *txq; int i, npairs, error; np = ifp->if_softc; npairs = np->num_queues; KASSERT(npairs != 0, ("called with 0 available queues")); /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &np->txq[i]; if (XN_TX_TRYLOCK(txq) != 0) { error = xn_txq_mq_start_locked(txq, m); XN_TX_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->br, m); taskqueue_enqueue(txq->tq, &txq->defrtask); } return (error); } static void xn_qflush(struct ifnet *ifp) { struct netfront_info *np; struct netfront_txq *txq; struct mbuf *m; int i; np = ifp->if_softc; for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; XN_TX_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) m_freem(m); XN_TX_UNLOCK(txq); } if_qflush(ifp); } /** * Create a network device. * @param dev Newbus device representing this virtual NIC. */ int create_netdev(device_t dev) { struct netfront_info *np; int err; struct ifnet *ifp; np = device_get_softc(dev); np->xbdev = dev; mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); err = xen_net_read_mac(dev, np->mac); if (err != 0) goto error; /* Set up ifnet structure */ ifp = np->xn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = np; if_initname(ifp, "xn", device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xn_ioctl; ifp->if_transmit = xn_txq_mq_start; ifp->if_qflush = xn_qflush; ifp->if_init = xn_ifinit; ifp->if_hwassist = XN_CSUM_FEATURES; /* Enable all supported features at device creation. */ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ether_ifattach(ifp, np->mac); netfront_carrier_off(np); return (0); error: KASSERT(err != 0, ("Error path with no error code specified")); return (err); } static int netfront_detach(device_t dev) { struct netfront_info *info = device_get_softc(dev); DPRINTK("%s\n", xenbus_get_node(dev)); netif_free(info); return 0; } static void netif_free(struct netfront_info *np) { XN_LOCK(np); xn_stop(np); XN_UNLOCK(np); netif_disconnect_backend(np); ether_ifdetach(np->xn_ifp); free(np->rxq, M_DEVBUF); free(np->txq, M_DEVBUF); if_free(np->xn_ifp); np->xn_ifp = NULL; ifmedia_removeall(&np->sc_media); } static void netif_disconnect_backend(struct netfront_info *np) { u_int i; for (i = 0; i < np->num_queues; i++) { XN_RX_LOCK(&np->rxq[i]); XN_TX_LOCK(&np->txq[i]); } netfront_carrier_off(np); for (i = 0; i < np->num_queues; i++) { XN_RX_UNLOCK(&np->rxq[i]); XN_TX_UNLOCK(&np->txq[i]); } for (i = 0; i < np->num_queues; i++) { disconnect_rxq(&np->rxq[i]); disconnect_txq(&np->txq[i]); } } static int xn_ifmedia_upd(struct ifnet *ifp) { return (0); } static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /* ** Driver registration ** */ static device_method_t netfront_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netfront_probe), DEVMETHOD(device_attach, netfront_attach), DEVMETHOD(device_detach, netfront_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, netfront_suspend), DEVMETHOD(device_resume, netfront_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), DEVMETHOD_END }; static driver_t netfront_driver = { "xn", netfront_methods, sizeof(struct netfront_info), }; devclass_t netfront_devclass; DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, NULL); Index: projects/vnet/sys/kern/kern_shutdown.c =================================================================== --- projects/vnet/sys/kern/kern_shutdown.c (revision 301522) +++ projects/vnet/sys/kern/kern_shutdown.c (revision 301523) @@ -1,942 +1,942 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kdb.h" #include "opt_panic.h" #include "opt_sched.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic"); /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &debugger_on_panic, 0, "Run debugger on kernel panic"); #ifdef KDB_TRACE static int trace_on_panic = 1; #else static int trace_on_panic = 0; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &trace_on_panic, 0, "Print stack trace on kernel panic"); #endif /* KDB */ static int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); #ifndef DIAGNOSTIC static int show_busybufs; #else static int show_busybufs = 1; #endif SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, &show_busybufs, 0, ""); int suspend_blocked = 0; SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, &suspend_blocked, 0, "Block suspend due to a pending shutdown"); /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ static struct cdevsw reroot_cdevsw = { .d_version = D_VERSION, .d_name = "reroot", }; static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); static int kern_reroot(void); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* * The only reason this exists is to create the /dev/reroot/ directory, * used by reroot code in init(8) as a mountpoint for tmpfs. */ static void reroot_conf(void *unused) { int error; struct cdev *cdev; error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); if (error != 0) { printf("%s: failed to create device node, error %d", __func__, error); } } SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); /* * The system call that results in a reboot. */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_system_check_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { if (uap->opt & RB_REROOT) { error = kern_reroot(); } else { mtx_lock(&Giant); kern_reboot(uap->opt); mtx_unlock(&Giant); } } return (error); } /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice(int howto) { if (initproc != NULL) { /* Send a signal to init(8) and have it shutdown the world. */ PROC_LOCK(initproc); if (howto & RB_POWEROFF) kern_psignal(initproc, SIGUSR2); else if (howto & RB_HALT) kern_psignal(initproc, SIGUSR1); else kern_psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } else { /* No init(8) running, so simply reboot. */ kern_reboot(howto | RB_NOSYNC); } } static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } int doadump(boolean_t textdump) { boolean_t coredump; int error; error = 0; if (dumping) return (EBUSY); if (dumper.dumper == NULL) return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; coredump = TRUE; #ifdef DDB if (textdump && textdump_pending) { coredump = FALSE; textdump_dumpsys(&dumper); } #endif if (coredump) error = dumpsys(&dumper); dumping--; return (error); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int once = 0; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { once = 1; bufshutdown(show_busybufs); } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * The system call that results in changing the rootfs. */ static int kern_reroot(void) { struct vnode *oldrootvnode, *vp; struct mount *mp, *devmp; int error; if (curproc != initproc) return (EPERM); /* * Mark the filesystem containing currently-running executable * (the temporary copy of init(8)) busy. */ vp = curproc->p_textvp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (error); mp = vp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp, 0); error = vfs_busy(mp, 0); vn_lock(vp, LK_SHARED | LK_RETRY); vfs_rel(mp); if (error != 0) { VOP_UNLOCK(vp, 0); return (ENOENT); } if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp, 0); /* * Remove the filesystem containing currently-running executable * from the mount list, to prevent it from being unmounted * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). * * Also preserve /dev - forcibly unmounting it could cause driver * reinitialization. */ vfs_ref(rootdevmp); devmp = rootdevmp; rootdevmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); TAILQ_REMOVE(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); oldrootvnode = rootvnode; /* * Unmount everything except for the two filesystems preserved above. */ vfs_unmountall(); /* * Add /dev back; vfs_mountroot() will move it into its new place. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); rootdevmp = devmp; vfs_rel(rootdevmp); /* * Mount the new rootfs. */ vfs_mountroot(); /* * Update all references to the old rootvnode. */ mountcheckdirs(oldrootvnode, rootvnode); /* * Add the temporary filesystem back and unbusy it. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_unbusy(mp); return (0); } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (panic_reboot_wait_time != 0) { if (panic_reboot_wait_time != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", panic_reboot_wait_time); for (loop = panic_reboot_wait_time * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* * Acquiring smp_ipi_mtx here has a double effect: * - it disables interrupts avoiding CPU0 preemption * by fast handlers (thus deadlocking against other CPUs) * - it avoids deadlocks against smp_rendezvous() or, more * generally, threads busy-waiting, with this spinlock held, * and waiting for responses by threads on other CPUs * (ie. smp_tlb_shootdown()). * * For the !SMP case it just needs to handle the former problem. */ #ifdef SMP mtx_lock_spin(&smp_ipi_mtx); #else spinlock_enter(); #endif /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #if defined(WITNESS) || defined(INVARIANTS) static int kassert_warn_only = 0; #ifdef KDB static int kassert_do_kdb = 0; #endif #ifdef KTR static int kassert_do_ktr = 0; #endif static int kassert_do_log = 1; static int kassert_log_pps_limit = 4; static int kassert_log_mute_at = 0; static int kassert_log_panic_at = 0; static int kassert_warnings = 0; SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN, &kassert_warn_only, 0, "KASSERT triggers a panic (1) or just a warning (0)"); #ifdef KDB SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN, &kassert_do_kdb, 0, "KASSERT will enter the debugger"); #endif #ifdef KTR SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN, &kassert_do_ktr, 0, "KASSERT does a KTR, set this to the KTRMASK you want"); #endif SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN, &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN, &kassert_warnings, 0, "number of KASSERTs that have been triggered"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN, &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN, &kassert_log_pps_limit, 0, "limit number of log messages per second"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN, &kassert_log_mute_at, 0, "max number of KASSERTS to log"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, kassert_sysctl_kassert, "I", "set to trigger a test kassert"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); return (0); } /* * Called by KASSERT, this decides if we will panic * or if we will log via printf and/or ktr. */ void kassert_panic(const char *fmt, ...) { static char buf[256]; va_list ap; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); /* * panic if we're not just warning, or if we've exceeded * kassert_log_panic_at warnings. */ if (!kassert_warn_only || (kassert_log_panic_at > 0 && kassert_warnings >= kassert_log_panic_at)) { va_start(ap, fmt); vpanic(fmt, ap); /* NORETURN */ } #ifdef KTR if (kassert_do_ktr) CTR0(ktr_mask, buf); #endif /* KTR */ /* * log if we've not yet met the mute limit. */ if (kassert_do_log && (kassert_log_mute_at == 0 || kassert_warnings < kassert_log_mute_at)) { static struct timeval lasterr; static int curerr; if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { printf("KASSERT failed: %s\n", buf); kdb_backtrace(); } } #ifdef KDB if (kassert_do_kdb) { kdb_enter(KDB_WHY_KASSERT, buf); } #endif atomic_add_int(&kassert_warnings, 1); } #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vpanic(fmt, ap); } void vpanic(const char *fmt, va_list ap) { #ifdef SMP cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; static char buf[256]; spinlock_enter(); #ifdef SMP /* * stop_cpus_hard(other_cpus) should prevent multiple CPUs from * concurrently entering panic. Only the winner will proceed * further. */ if (panicstr == NULL && !kdb_active) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); } /* * Ensure that the scheduler is stopped while panicking, even if panic * has been entered from kdb. */ td->td_stopsched = 1; #endif bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; cngrab(); printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) kdb_enter(KDB_WHY_PANIC, "panic"); #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; kern_reboot(bootopt); } /* * Support for poweroff delay. * * Please note that setting this delay too short might power off your machine * before the write cache on your hard disk has been flushed, leading to * soft-updates inconsistencies. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); static void poweroff_wait(void *junk, int howto) { if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); void kproc_shutdown(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop... ", kproc_shutdown_wait, p->p_comm); error = kproc_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } void kthread_shutdown(void *arg, int howto) { struct thread *td; int error; if (panicstr) return; td = (struct thread *)arg; printf("Waiting (max %d seconds) for system thread `%s' to stop... ", kproc_shutdown_wait, td->td_name); error = kthread_suspend(td, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, "Device for kernel dumps"); /* Registration of dumpers */ int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) { size_t wantcopy; int error; error = priv_check(td, PRIV_SETDUMPER); if (error != 0) return (error); if (di == NULL) { if (dumper.blockbuf != NULL) free(dumper.blockbuf, M_DUMPER); bzero(&dumper, sizeof(dumper)); dumpdevname[0] = '\0'; return (0); } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); if (wantcopy >= sizeof(dumpdevname)) { printf("set_dumper: device name truncated from '%s' -> '%s'\n", devname, dumpdevname); } dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); return (0); } /* Call dumper with bounds checking. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { if (length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { printf("Attempt to write outside dump device boundaries.\n" "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", (intmax_t)offset, (intmax_t)di->mediaoffset, (uintmax_t)length, (intmax_t)di->mediasize); return (ENOSPC); } return (di->dumper(di->priv, virtual, physical, offset, length)); } /* Call dumper with bounds checking. */ int dump_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length, size_t *size) { char *temp; int ret; if (length > di->blocksize) return (ENOMEM); *size = di->blocksize; if (length == di->blocksize) temp = virtual; else { temp = di->blockbuf; memset(temp + length, 0, di->blocksize - length); memcpy(temp, virtual, length); } ret = dump_write(di, temp, physical, offset, *size); return (ret); } void mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, uint64_t dumplen, uint32_t blksz) { bzero(kdh, sizeof(*kdh)); strlcpy(kdh->magic, magic, sizeof(kdh->magic)); strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); kdh->version = htod32(KERNELDUMPVERSION); kdh->architectureversion = htod32(archver); kdh->dumplength = htod64(dumplen); kdh->dumptime = htod64(time_second); kdh->blocksize = htod32(blksz); strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); if (panicstr != NULL) strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); kdh->parity = kerneldump_parity(kdh); } #ifdef DDB DB_SHOW_COMMAND(panic, db_show_panic) { if (panicstr == NULL) - db_printf("Not paniced\n"); + db_printf("panicstr not set\n"); else db_printf("panic: %s\n", panicstr); } #endif Index: projects/vnet/sys/kern/kern_timeout.c =================================================================== --- projects/vnet/sys/kern/kern_timeout.c (revision 301522) +++ projects/vnet/sys/kern/kern_timeout.c (revision 301523) @@ -1,1655 +1,1654 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #include "opt_ddb.h" #if defined(__arm__) #include "opt_timer.h" #endif #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef SMP #include #endif #ifndef NO_EVENTTIMERS DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); static int avg_gcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); static int avg_lockcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); static int avg_depth_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); static int avg_mpcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS static int pin_default_swi = 1; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; static int pin_pcpu_swi = 0; #endif SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); /* * TODO: * allocate more timeout table slots when table overflows. */ u_int callwheelsize, callwheelmask; /* * The callout cpu exec entities represent informations necessary for * describing the state of callouts currently running on the CPU and the ones * necessary for migrating callouts to the new callout cpu. In particular, * the first entry of the array cc_exec_entity holds informations for callout * running in SWI thread context, while the second one holds informations * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ struct cc_exec { struct callout *cc_curr; void (*cc_drain)(void *); #ifdef SMP void (*ce_migration_func)(void *); void *ce_migration_arg; int ce_migration_cpu; sbintime_t ce_migration_time; sbintime_t ce_migration_prec; #endif bool cc_cancel; bool cc_waiting; }; /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; struct callout *cc_next; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; u_int cc_bucket; u_int cc_inited; char cc_ktr_event_name[20]; }; #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr #define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain #define cc_exec_next(cc) cc->cc_next #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel #define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP #define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func #define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else struct callout_cpu cc_cpu; #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: * cc_curr - If a callout is in progress, it is cc_curr. * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when * cc_curr is non-NULL. */ /* * Resets the execution entity tied to a specific callout cpu. */ static void cc_cce_cleanup(struct callout_cpu *cc, int direct) { cc_exec_curr(cc, direct) = NULL; cc_exec_cancel(cc, direct) = false; cc_exec_waiting(cc, direct) = false; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } /* * Checks if migration is requested by a specific callout cpu. */ static int cc_cce_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP return (cc_migration_cpu(cc, direct) != CPUBLOCK); #else return (0); #endif } /* * Kernel low level callwheel initialization * called on cpu0 during kernel startup. */ static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ memset(CC_CPU(0), 0, sizeof(cc_cpu)); ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Fetch whether we're pinning the swi's or not. */ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); /* * Only cpu0 handles timeout(9) and receives a preallocation. * * XXX: Once all timeout(9) consumers are converted this can * be removed. */ timeout_cpu = PCPU_GET(cpuid); cc = CC_CPU(timeout_cpu); cc->cc_callout = malloc(ncallout * sizeof(struct callout), M_CALLOUT, M_WAITOK); callout_cpu_init(cc, timeout_cpu); } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); cc->cc_inited = 1; cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); c->c_iflags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } #ifdef SMP /* * Switches the cpu tied to a specific callout. * The function expects a locked incoming callout cpu and returns with * locked outcoming callout cpu. */ static struct callout_cpu * callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) { struct callout_cpu *new_cc; MPASS(c != NULL && cc != NULL); CC_LOCK_ASSERT(cc); /* * Avoid interrupts and preemption firing after the callout cpu * is blocked in order to avoid deadlocks as the new thread * may be willing to acquire the callout cpu lock. */ c->c_cpu = CPUBLOCK; spinlock_enter(); CC_UNLOCK(cc); new_cc = CC_CPU(new_cpu); CC_LOCK(new_cc); spinlock_exit(); c->c_cpu = new_cpu; return (new_cc); } #endif /* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; char name[MAXCOMLEN]; #ifdef SMP int cpu; struct intr_event *ie; #endif cc = CC_CPU(timeout_cpu); snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_default_swi && (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { printf("%s: timeout clock couldn't be pinned to cpu %d\n", __func__, timeout_cpu); } #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ callout_cpu_init(cc, cpu); snprintf(name, sizeof(name), "clock (%d)", cpu); ie = NULL; if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { printf("%s: per-cpu clock couldn't be pinned to " "cpu %d\n", __func__, cpu); } } #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { struct callout *tmp, *tmpn; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = (SBT_1S / 2); first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; tmp = LIST_FIRST(sc); while (tmp != NULL) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* * Consumer told us the callout may be run * directly from hardware interrupt context. */ if (tmp->c_iflags & CALLOUT_DIRECT) { #ifdef CALLOUT_PROFILING ++depth_dir; #endif cc_exec_next(cc) = LIST_NEXT(tmp, c_links.le); cc->cc_bucket = firstb & callwheelmask; LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, #ifdef CALLOUT_PROFILING &mpcalls_dir, &lockcalls_dir, NULL, #endif 1); tmp = cc_exec_next(cc); cc_exec_next(cc) = NULL; } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_iflags |= CALLOUT_PROCESSED; tmp = tmpn; } continue; } /* Skip events from distant future. */ if (tmp->c_time >= max) goto next; /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; goto next; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; next: tmp = LIST_NEXT(tmp, c_links.le); } /* Proceed with the next bucket. */ firstb++; /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; #ifndef NO_EVENTTIMERS cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { while (c->c_cpu == CPUBLOCK) cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } static void callout_cc_add(struct callout *c, struct callout_cpu *cc, sbintime_t sbt, sbintime_t precision, void (*func)(void *), void *arg, int cpu, int flags) { int bucket; CC_LOCK_ASSERT(cc); if (sbt < cc->cc_lastscan) sbt = cc->cc_lastscan; c->c_arg = arg; c->c_iflags |= CALLOUT_PENDING; c->c_iflags &= ~CALLOUT_PROCESSED; c->c_flags |= CALLOUT_ACTIVE; if (flags & C_DIRECT_EXEC) c->c_iflags |= CALLOUT_DIRECT; c->c_func = func; c->c_time = sbt; c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) cc_exec_next(cc) = c; #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (SBT_MAX - c->c_time < c->c_precision) c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; cpu_new_callout(cpu, sbt, c->c_time); } #endif } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct) { struct rm_priotracker tracker; void (*c_func)(void *); void *c_arg; struct lock_class *class; struct lock_object *c_lock; uintptr_t lock_status; int c_iflags; #ifdef SMP struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; int flags, new_cpu; sbintime_t new_prec, new_time; #endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, ("softclock_call_cc: pend %p %x", c, c->c_iflags)); KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, ("softclock_call_cc: act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; lock_status = 0; if (c->c_flags & CALLOUT_SHAREDLOCK) { if (class == &lock_class_rm) lock_status = (uintptr_t)&tracker; else lock_status = 1; } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_iflags = c->c_iflags; if (c->c_iflags & CALLOUT_LOCAL_ALLOC) c->c_iflags = CALLOUT_LOCAL_ALLOC; else c->c_iflags &= ~CALLOUT_PENDING; cc_exec_curr(cc, direct) = c; cc_exec_cancel(cc, direct) = false; cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); /* * The callout may have been cancelled * while we switched locks. */ if (cc_exec_cancel(cc, direct)) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING (*lockcalls)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING (*mpcalls)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE1(callout_execute, , , callout__start, c); c_func(c_arg); SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) class->lc_unlock(c_lock); skip: CC_LOCK(cc); KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; if (cc_exec_drain(cc, direct)) { void (*drain)(void *); drain = cc_exec_drain(cc, direct); cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); drain(c_arg); CC_LOCK(cc); } if (cc_exec_waiting(cc, direct)) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ if (cc_cce_migrating(cc, direct)) { cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not * destroyed but that is not easy. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; } cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); wakeup(&cc_exec_waiting(cc, direct)); CC_LOCK(cc); } else if (cc_cce_migrating(cc, direct)) { KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, ("Migrating legacy callout %p", c)); #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ new_cpu = cc_migration_cpu(cc, direct); new_time = cc_migration_time(cc, direct); new_prec = cc_migration_prec(cc, direct); new_func = cc_migration_func(cc, direct); new_arg = cc_migration_arg(cc, direct); cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not destroyed * but that is not easy. * * As first thing, handle deferred callout stops. */ if (!callout_migrating(c)) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); callout_cc_del(c, cc); return; } c->c_iflags &= ~CALLOUT_DFRMIGRATION; new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; callout_cc_add(c, new_cc, new_time, new_prec, new_func, new_arg, new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else panic("migration should not happen"); #endif } /* * If the current callout is locally allocated (from * timeout(9)) then put it on the freelist. * * Note: we need to check the cached copy of c_iflags because * if it was not local, then it's not safe to deref the * callout pointer. */ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || c->c_iflags == CALLOUT_LOCAL_ALLOC, ("corrupted callout")); if (c_iflags & CALLOUT_LOCAL_ALLOC) callout_cc_del(c, cc); } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. */ void softclock(void *arg) { struct callout_cpu *cc; struct callout *c; #ifdef CALLOUT_PROFILING int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; #endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING &mpcalls, &lockcalls, &gcalls, #endif 0); #ifdef CALLOUT_PROFILING ++depth; #endif } #ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; #endif CC_UNLOCK(cc); } /* * timeout -- * Execute a function after a specified length of time. * * untimeout -- * Cancel previous timeout function call. * * callout_handle_init -- * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks) { struct callout_cpu *cc; struct callout *new; struct callout_handle handle; cc = CC_CPU(timeout_cpu); CC_LOCK(cc); /* Fill in the next free callout structure. */ new = SLIST_FIRST(&cc->cc_callfree); if (new == NULL) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); return (handle); } void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; /* * Check for a handle that was initialized * by callout_handle_init, but never used * for a real timeout. */ if (handle.callout == NULL) return; cc = callout_lock(handle.callout); if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); CC_UNLOCK(cc); } void callout_handle_init(struct callout_handle *handle) { handle->callout = NULL; } /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, void (*ftn)(void *), void *arg, int cpu, int flags) { sbintime_t to_sbt, pr; struct callout_cpu *cc; int cancelled, direct; int ignore_cpu=0; cancelled = 0; if (cpu == -1) { ignore_cpu = 1; } else if ((cpu >= MAXCPU) || ((CC_CPU(cpu))->cc_inited == 0)) { /* Invalid CPU spec */ panic("Invalid CPU in callout %d", cpu); } if (flags & C_ABSOLUTE) { to_sbt = sbt; } else { if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; if ((flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { to_sbt = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ to_sbt += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else to_sbt = sbinuptime(); if (SBT_MAX - to_sbt < sbt) to_sbt = SBT_MAX; else to_sbt += sbt; pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : sbt >> C_PRELGET(flags)); if (pr > precision) precision = pr; } /* * This flag used to be added by callout_cc_add, but the * first time you call this we could end up with the * wrong direct flag if we don't do it before we add. */ if (flags & C_DIRECT_EXEC) { direct = 1; } else { direct = 0; } KASSERT(!direct || c->c_lock == NULL, ("%s: direct callout %p has lock", __func__, c)); cc = callout_lock(c); /* * Don't allow migration of pre-allocated callouts lest they * become unbalanced or handle the case where the user does * not care. */ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || ignore_cpu) { cpu = c->c_cpu; } if (cc_exec_curr(cc, direct) == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) cancelled = cc_exec_cancel(cc, direct) = true; if (cc_exec_waiting(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. */ CTR4(KTR_CALLOUT, "%s %p func %p arg %p", cancelled ? "cancelled" : "failed to cancel", c, c->c_func, c->c_arg); CC_UNLOCK(cc); return (cancelled); } #ifdef SMP if (callout_migrating(c)) { /* * This only occurs when a second callout_reset_sbt_on * is made after a previous one moved it into * deferred migration (below). Note we do *not* change * the prev_cpu even though the previous target may * be different. */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; cancelled = 1; CC_UNLOCK(cc); return (cancelled); } #endif } if (c->c_iflags & CALLOUT_PENDING) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } cancelled = 1; c->c_iflags &= ~ CALLOUT_PENDING; c->c_flags &= ~ CALLOUT_ACTIVE; } #ifdef SMP /* * If the callout must migrate try to perform it immediately. * If the callout is currently running, just defer the migration * to a more appropriate moment. */ if (c->c_cpu != cpu) { if (cc_exec_curr(cc, direct) == c) { /* * Pending will have been removed since we are * actually executing the callout on another * CPU. That callout should be waiting on the * lock the caller holds. If we set both * active/and/pending after we return and the * lock on the executing callout proceeds, it * will then see pending is true and return. * At the return from the actual callout execution * the migration will occur in softclock_call_cc * and this new callout will be placed on the * new CPU via a call to callout_cpu_switch() which * will get the lock on the right CPU followed * by a call callout_cc_add() which will add it there. * (see above in softclock_call_cc()). */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); c->c_flags |= CALLOUT_ACTIVE; CTR6(KTR_CALLOUT, "migration of %p func %p arg %p in %d.%08x to %u deferred", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff), cpu); CC_UNLOCK(cc); return (cancelled); } cc = callout_cpu_switch(c, cc, cpu); } #endif callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff)); CC_UNLOCK(cc); return (cancelled); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int _callout_stop_safe(struct callout *c, int flags, void (*drain)(void *)) { struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; int not_on_a_list; if ((flags & CS_DRAIN) != 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, "calling %s", __func__); /* * Some old subsystems don't hold Giant while running a callout_stop(), * so just discard this check for the moment. */ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { if (c->c_lock == &Giant.lock_object) use_lock = mtx_owned(&Giant); else { use_lock = 1; class = LOCK_CLASS(c->c_lock); class->lc_assert(c->c_lock, LA_XLOCKED); } } else use_lock = 0; if (c->c_iflags & CALLOUT_DIRECT) { direct = 1; } else { direct = 0; } sq_locked = 0; old_cc = NULL; again: cc = callout_lock(c); if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { /* * Special case where this slipped in while we * were migrating *as* the callout is about to * execute. The caller probably holds the lock * the callout wants. * * Get rid of the migration first. Then set * the flag that tells this code *not* to * try to remove it from any lists (its not * on one yet). When the callout wheel runs, * it will ignore this callout. */ c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; not_on_a_list = 1; } else { not_on_a_list = 0; } /* * If the callout was migrating while the callout cpu lock was * dropped, just drop the sleepqueue lock and check the states * again. */ if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); sleepq_release(&cc_exec_waiting(old_cc, direct)); sq_locked = 0; old_cc = NULL; goto again; #else panic("migration should not happen"); #endif } /* * If the callout isn't pending, it's not on the queue, so * don't attempt to remove it from the queue. We can try to * stop it by other means however. */ if (!(c->c_iflags & CALLOUT_PENDING)) { /* * If it wasn't on the queue and it isn't the current * callout, then we can't stop it, so just bail. * It probably has already been run (if locking * is properly done). You could get here if the caller * calls stop twice in a row for example. The second * call would fall here without CALLOUT_ACTIVE set. */ c->c_flags &= ~CALLOUT_ACTIVE; if (cc_exec_curr(cc, direct) != c) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); if (sq_locked) sleepq_release(&cc_exec_waiting(cc, direct)); return (-1); } if ((flags & CS_DRAIN) != 0) { /* * The current callout is running (or just * about to run) and blocking is allowed, so * just wait for the current invocation to * finish. */ while (cc_exec_curr(cc, direct) == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid * a LOR between cc_lock and sleepqueue * chain spinlocks. This piece of code * emulates a msleep_spin() call actually. * * If we already have the sleepqueue chain * locked, then we can safely block. If we * don't already have it locked, however, * we have to drop the cc_lock to lock * it. This opens several races, so we * restart at the beginning once we have * both locks. If nothing has changed, then * we will end up back here with sq_locked * set. */ if (!sq_locked) { CC_UNLOCK(cc); sleepq_lock( &cc_exec_waiting(cc, direct)); sq_locked = 1; old_cc = cc; goto again; } /* * Migration could be cancelled here, but * as long as it is still not sure when it * will be packed up, just let softclock() * take care of it. */ cc_exec_waiting(cc, direct) = true; DROP_GIANT(); CC_UNLOCK(cc); sleepq_add( &cc_exec_waiting(cc, direct), &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); sleepq_wait( &cc_exec_waiting(cc, direct), 0); sq_locked = 0; old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); CC_LOCK(cc); } } else if (use_lock && !cc_exec_cancel(cc, direct) && (drain == NULL)) { /* * The current callout is waiting for its * lock which we hold. Cancel the callout * and return. After our caller drops the * lock, the callout will be skipped in * softclock(). This *only* works with a * callout_stop() *not* callout_drain() or * callout_async_drain(). */ cc_exec_cancel(cc, direct) = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); KASSERT(!cc_cce_migrating(cc, direct), ("callout wrongly scheduled for migration")); if (callout_migrating(c)) { c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); } else if (callout_migrating(c)) { /* * The callout is currently being serviced * and the "next" callout is scheduled at * its completion with a migration. We remove * the migration flag so it *won't* get rescheduled, * but we can't stop the one thats running so * we return 0. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP /* * We can't call cc_cce_cleanup here since * if we do it will remove .ce_curr and * its still running. This will prevent a * reschedule of the callout when the * execution completes. */ cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { cc_exec_drain(cc, direct) = drain; } CC_UNLOCK(cc); return ((flags & CS_MIGRBLOCK) != 0); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { cc_exec_drain(cc, direct) = drain; } CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain still locked")); return (0); } if (sq_locked) sleepq_release(&cc_exec_waiting(cc, direct)); c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); if (not_on_a_list == 0) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } } callout_cc_del(c, cc); CC_UNLOCK(cc); return (1); } void callout_init(struct callout *c, int mpsafe) { bzero(c, sizeof *c); if (mpsafe) { c->c_lock = NULL; c->c_iflags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; c->c_iflags = 0; } c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); c->c_lock = lock; KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, ("callout_init_lock: bad flags %d", flags)); KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", __func__)); c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after * an APM resume to recalculate the calltodo timer list values with the * number of hz's we have been sleeping. The next hardclock() will detect * that there are fired timers and run softclock() to execute them. * * Please note, I have not done an exhaustive analysis of what code this * might break. I am motivated to have my select()'s and alarm()'s that * have expired during suspend firing upon resume so that the applications * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers * firing, which seemed independent on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key */ void adjust_timeout_calltodo(struct timeval *time_change) { register struct callout *p; unsigned long delta_ticks; /* * How many ticks were we asleep? * (stolen from tvtohz()). */ /* Don't do anything */ if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) delta_ticks = howmany(time_change->tv_sec * 1000000 + time_change->tv_usec, tick) + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + howmany(time_change->tv_usec, tick) + 1; else delta_ticks = LONG_MAX; if (delta_ticks > INT_MAX) delta_ticks = INT_MAX; /* * Now rip through the timer calltodo list looking for timers * to expire. */ /* don't collide with softclock() */ CC_LOCK(cc); for (p = calltodo.c_next; p != NULL; p = p->c_next) { p->c_time -= delta_ticks; /* Break if the timer had more time on it than delta_ticks */ if (p->c_time > 0) break; /* take back the ticks the timer didn't use (p->c_time <= 0) */ delta_ticks = -p->c_time; } CC_UNLOCK(cc); return; } #endif /* APM_FIXUP_CALLTODO */ static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; #ifdef SMP int cpu; #endif val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); #ifdef SMP CPU_FOREACH(cpu) { cc = CC_CPU(cpu); #else cc = CC_CPU(timeout_cpu); #endif CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); #ifdef SMP } #endif for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); #ifdef DDB - static void _show_callout(struct callout *c) { db_printf("callout %p\n", c); #define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); db_printf(" &c_links = %p\n", &(c->c_links)); C_DB_PRINTF("%" PRId64, c_time); C_DB_PRINTF("%" PRId64, c_precision); C_DB_PRINTF("%p", c_arg); C_DB_PRINTF("%p", c_func); C_DB_PRINTF("%p", c_lock); C_DB_PRINTF("%#x", c_flags); C_DB_PRINTF("%#x", c_iflags); C_DB_PRINTF("%d", c_cpu); #undef C_DB_PRINTF } DB_SHOW_COMMAND(callout, db_show_callout) { if (!have_addr) { db_printf("usage: show callout \n"); return; } _show_callout((struct callout *)addr); } #endif /* DDB */ Index: projects/vnet/sys/xen/xen-os.h =================================================================== --- projects/vnet/sys/xen/xen-os.h (revision 301522) +++ projects/vnet/sys/xen/xen-os.h (revision 301523) @@ -1,145 +1,145 @@ /****************************************************************************** * xen/xen-os.h * * Random collection of macros and definition * * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_XEN_OS_H_ #define _XEN_XEN_OS_H_ #if !defined(__XEN_INTERFACE_VERSION__) #define __XEN_INTERFACE_VERSION__ 0x00030208 #endif #define GRANT_REF_INVALID 0xffffffff #ifdef LOCORE #define __ASSEMBLY__ #endif #include #include /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ extern shared_info_t *HYPERVISOR_shared_info; extern start_info_t *HYPERVISOR_start_info; /* XXX: we need to get rid of this and use HYPERVISOR_start_info directly */ extern char *console_page; extern int xen_disable_pv_disks; extern int xen_disable_pv_nics; enum xen_domain_type { XEN_NATIVE, /* running on bare hardware */ XEN_PV_DOMAIN, /* running in a PV domain */ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ }; extern enum xen_domain_type xen_domain_type; static inline int xen_domain(void) { return (xen_domain_type != XEN_NATIVE); } static inline int xen_pv_domain(void) { return (xen_domain_type == XEN_PV_DOMAIN); } static inline int xen_hvm_domain(void) { return (xen_domain_type == XEN_HVM_DOMAIN); } static inline bool xen_initial_domain(void) { return (xen_domain() && HYPERVISOR_start_info != NULL && (HYPERVISOR_start_info->flags & SIF_INITDOMAIN) != 0); } /* * Based on ofed/include/linux/bitops.h * * Those helpers are prefixed by xen_ because xen-os.h is widely included * and we don't want the other drivers using them. * */ #define NBPL (NBBY * sizeof(long)) static inline bool xen_test_bit(int bit, volatile long *addr) { unsigned long mask = 1UL << (bit % NBPL); return !!(atomic_load_acq_long(&addr[bit / NBPL]) & mask); } static inline void xen_set_bit(int bit, volatile long *addr) { atomic_set_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } static inline void xen_clear_bit(int bit, volatile long *addr) { atomic_clear_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } -#undef NPBL +#undef NBPL /* * Functions to allocate/free unused memory in order * to map memory from other domains. */ struct resource *xenmem_alloc(device_t dev, int *res_id, size_t size); int xenmem_free(device_t dev, int res_id, struct resource *res); /* Debug/emergency function, prints directly to hypervisor console */ void xc_printf(const char *, ...) __printflike(1, 2); #ifndef xen_mb #define xen_mb() mb() #endif #ifndef xen_rmb #define xen_rmb() rmb() #endif #ifndef xen_wmb #define xen_wmb() wmb() #endif #endif /* !__ASSEMBLY__ */ #endif /* _XEN_XEN_OS_H_ */ Index: projects/vnet/tools/tools/cxgbetool/cxgbetool.c =================================================================== --- projects/vnet/tools/tools/cxgbetool/cxgbetool.c (revision 301522) +++ projects/vnet/tools/tools/cxgbetool/cxgbetool.c (revision 301523) @@ -1,2718 +1,2718 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "t4_ioctl.h" #define in_range(val, lo, hi) ( val < 0 || (val <= hi && val >= lo)) #define max(x, y) ((x) > (y) ? (x) : (y)) static const char *progname, *nexus; static int chip_id; /* 4 for T4, 5 for T5 */ struct reg_info { const char *name; uint32_t addr; uint32_t len; }; struct mod_regs { const char *name; const struct reg_info *ri; }; struct field_desc { const char *name; /* Field name */ unsigned short start; /* Start bit position */ unsigned short end; /* End bit position */ unsigned char shift; /* # of low order bits omitted and implicitly 0 */ unsigned char hex; /* Print field in hex instead of decimal */ unsigned char islog2; /* Field contains the base-2 log of the value */ }; #include "reg_defs_t4.c" #include "reg_defs_t5.c" #include "reg_defs_t6.c" #include "reg_defs_t4vf.c" static void usage(FILE *fp) { fprintf(fp, "Usage: %s [operation]\n", progname); fprintf(fp, "\tclearstats clear port statistics\n" "\tcontext show an SGE context\n" "\tfilter [ ] ... set a filter\n" "\tfilter delete|clear delete a filter\n" "\tfilter list list all filters\n" "\tfilter mode [] ... get/set global filter mode\n" "\ti2c [] read from i2c device\n" "\tloadfw install firmware\n" "\tmemdump dump a memory range\n" "\tmodinfo [raw] optics/cable information\n" "\treg
[=] read/write register\n" "\treg64
[=] read/write 64 bit register\n" "\tregdump [] ... dump registers\n" "\tsched-class params .. configure TX scheduler class\n" "\tsched-queue bind NIC queues to TX Scheduling class\n" "\tstdio interactive mode\n" "\ttcb read TCB\n" "\ttracer tx|rx set and enable a tracer\n" "\ttracer disable|enable disable or enable a tracer\n" "\ttracer list list all tracers\n" ); } static inline unsigned int get_card_vers(unsigned int version) { return (version & 0x3ff); } static int real_doit(unsigned long cmd, void *data, const char *cmdstr) { static int fd = -1; int rc = 0; if (fd == -1) { char buf[64]; snprintf(buf, sizeof(buf), "/dev/%s", nexus); if ((fd = open(buf, O_RDWR)) < 0) { warn("open(%s)", nexus); rc = errno; return (rc); } chip_id = nexus[1] - '0'; } rc = ioctl(fd, cmd, data); if (rc < 0) { warn("%s", cmdstr); rc = errno; } return (rc); } #define doit(x, y) real_doit(x, y, #x) static char * str_to_number(const char *s, long *val, long long *vall) { char *p; if (vall) *vall = strtoll(s, &p, 0); else if (val) *val = strtol(s, &p, 0); else p = NULL; return (p); } static int read_reg(long addr, int size, long long *val) { struct t4_reg reg; int rc; reg.addr = (uint32_t) addr; reg.size = (uint32_t) size; reg.val = 0; rc = doit(CHELSIO_T4_GETREG, ®); *val = reg.val; return (rc); } static int write_reg(long addr, int size, long long val) { struct t4_reg reg; reg.addr = (uint32_t) addr; reg.size = (uint32_t) size; reg.val = (uint64_t) val; return doit(CHELSIO_T4_SETREG, ®); } static int register_io(int argc, const char *argv[], int size) { char *p, *v; long addr; long long val; int w = 0, rc; if (argc == 1) { /* OR = */ p = str_to_number(argv[0], &addr, NULL); if (*p) { if (*p != '=') { warnx("invalid register \"%s\"", argv[0]); return (EINVAL); } w = 1; v = p + 1; p = str_to_number(v, NULL, &val); if (*p) { warnx("invalid value \"%s\"", v); return (EINVAL); } } } else if (argc == 2) { /* */ w = 1; p = str_to_number(argv[0], &addr, NULL); if (*p) { warnx("invalid register \"%s\"", argv[0]); return (EINVAL); } p = str_to_number(argv[1], NULL, &val); if (*p) { warnx("invalid value \"%s\"", argv[1]); return (EINVAL); } } else { warnx("reg: invalid number of arguments (%d)", argc); return (EINVAL); } if (w) rc = write_reg(addr, size, val); else { rc = read_reg(addr, size, &val); if (rc == 0) printf("0x%llx [%llu]\n", val, val); } return (rc); } static inline uint32_t xtract(uint32_t val, int shift, int len) { return (val >> shift) & ((1 << len) - 1); } static int dump_block_regs(const struct reg_info *reg_array, const uint32_t *regs) { uint32_t reg_val = 0; for ( ; reg_array->name; ++reg_array) if (!reg_array->len) { reg_val = regs[reg_array->addr / 4]; printf("[%#7x] %-47s %#-10x %u\n", reg_array->addr, reg_array->name, reg_val, reg_val); } else { uint32_t v = xtract(reg_val, reg_array->addr, reg_array->len); printf(" %*u:%u %-47s %#-10x %u\n", reg_array->addr < 10 ? 3 : 2, reg_array->addr + reg_array->len - 1, reg_array->addr, reg_array->name, v, v); } return (1); } static int dump_regs_table(int argc, const char *argv[], const uint32_t *regs, const struct mod_regs *modtab, int nmodules) { int i, j, match; for (i = 0; i < argc; i++) { for (j = 0; j < nmodules; j++) { if (!strcmp(argv[i], modtab[j].name)) break; } if (j == nmodules) { warnx("invalid register block \"%s\"", argv[i]); fprintf(stderr, "\nAvailable blocks:"); for ( ; nmodules; nmodules--, modtab++) fprintf(stderr, " %s", modtab->name); fprintf(stderr, "\n"); return (EINVAL); } } for ( ; nmodules; nmodules--, modtab++) { match = argc == 0 ? 1 : 0; for (i = 0; !match && i < argc; i++) { if (!strcmp(argv[i], modtab->name)) match = 1; } if (match) dump_block_regs(modtab->ri, regs); } return (0); } #define T4_MODREGS(name) { #name, t4_##name##_regs } static int dump_regs_t4(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t4_mod[] = { T4_MODREGS(sge), { "pci", t4_pcie_regs }, T4_MODREGS(dbg), T4_MODREGS(mc), T4_MODREGS(ma), { "edc0", t4_edc_0_regs }, { "edc1", t4_edc_1_regs }, T4_MODREGS(cim), T4_MODREGS(tp), T4_MODREGS(ulp_rx), T4_MODREGS(ulp_tx), { "pmrx", t4_pm_rx_regs }, { "pmtx", t4_pm_tx_regs }, T4_MODREGS(mps), { "cplsw", t4_cpl_switch_regs }, T4_MODREGS(smb), { "i2c", t4_i2cm_regs }, T4_MODREGS(mi), T4_MODREGS(uart), T4_MODREGS(pmu), T4_MODREGS(sf), T4_MODREGS(pl), T4_MODREGS(le), T4_MODREGS(ncsi), T4_MODREGS(xgmac) }; return dump_regs_table(argc, argv, regs, t4_mod, nitems(t4_mod)); } #undef T4_MODREGS #define T5_MODREGS(name) { #name, t5_##name##_regs } static int dump_regs_t5(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t5_mod[] = { T5_MODREGS(sge), { "pci", t5_pcie_regs }, T5_MODREGS(dbg), { "mc0", t5_mc_0_regs }, { "mc1", t5_mc_1_regs }, T5_MODREGS(ma), { "edc0", t5_edc_t50_regs }, { "edc1", t5_edc_t51_regs }, T5_MODREGS(cim), T5_MODREGS(tp), { "ulprx", t5_ulp_rx_regs }, { "ulptx", t5_ulp_tx_regs }, { "pmrx", t5_pm_rx_regs }, { "pmtx", t5_pm_tx_regs }, T5_MODREGS(mps), { "cplsw", t5_cpl_switch_regs }, T5_MODREGS(smb), { "i2c", t5_i2cm_regs }, T5_MODREGS(mi), T5_MODREGS(uart), T5_MODREGS(pmu), T5_MODREGS(sf), T5_MODREGS(pl), T5_MODREGS(le), T5_MODREGS(ncsi), T5_MODREGS(mac), { "hma", t5_hma_t5_regs } }; return dump_regs_table(argc, argv, regs, t5_mod, nitems(t5_mod)); } #undef T5_MODREGS #define T6_MODREGS(name) { #name, t6_##name##_regs } static int dump_regs_t6(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t6_mod[] = { T6_MODREGS(sge), { "pci", t6_pcie_regs }, T6_MODREGS(dbg), { "mc0", t6_mc_0_regs }, T6_MODREGS(ma), { "edc0", t6_edc_t60_regs }, { "edc1", t6_edc_t61_regs }, T6_MODREGS(cim), T6_MODREGS(tp), { "ulprx", t6_ulp_rx_regs }, { "ulptx", t6_ulp_tx_regs }, { "pmrx", t6_pm_rx_regs }, { "pmtx", t6_pm_tx_regs }, T6_MODREGS(mps), { "cplsw", t6_cpl_switch_regs }, T6_MODREGS(smb), { "i2c", t6_i2cm_regs }, T6_MODREGS(mi), T6_MODREGS(uart), T6_MODREGS(pmu), T6_MODREGS(sf), T6_MODREGS(pl), T6_MODREGS(le), T6_MODREGS(ncsi), T6_MODREGS(mac), { "hma", t6_hma_t6_regs } }; return dump_regs_table(argc, argv, regs, t6_mod, nitems(t6_mod)); } #undef T6_MODREGS static int dump_regs_t4vf(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t4vf_mod[] = { { "sge", t4vf_sge_regs }, { "mps", t4vf_mps_regs }, { "pl", t4vf_pl_regs }, { "mbdata", t4vf_mbdata_regs }, { "cim", t4vf_cim_regs }, }; return dump_regs_table(argc, argv, regs, t4vf_mod, nitems(t4vf_mod)); } static int dump_regs_t5vf(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t5vf_mod[] = { { "sge", t5vf_sge_regs }, { "mps", t4vf_mps_regs }, { "pl", t5vf_pl_regs }, { "mbdata", t4vf_mbdata_regs }, { "cim", t4vf_cim_regs }, }; return dump_regs_table(argc, argv, regs, t5vf_mod, nitems(t5vf_mod)); } static int dump_regs_t6vf(int argc, const char *argv[], const uint32_t *regs) { static struct mod_regs t6vf_mod[] = { { "sge", t5vf_sge_regs }, { "mps", t4vf_mps_regs }, { "pl", t6vf_pl_regs }, { "mbdata", t4vf_mbdata_regs }, { "cim", t4vf_cim_regs }, }; return dump_regs_table(argc, argv, regs, t6vf_mod, nitems(t6vf_mod)); } static int dump_regs(int argc, const char *argv[]) { int vers, revision, rc; struct t4_regdump regs; uint32_t len; len = max(T4_REGDUMP_SIZE, T5_REGDUMP_SIZE); regs.data = calloc(1, len); if (regs.data == NULL) { warnc(ENOMEM, "regdump"); return (ENOMEM); } regs.len = len; rc = doit(CHELSIO_T4_REGDUMP, ®s); if (rc != 0) return (rc); vers = get_card_vers(regs.version); revision = (regs.version >> 10) & 0x3f; if (vers == 4) { if (revision == 0x3f) rc = dump_regs_t4vf(argc, argv, regs.data); else rc = dump_regs_t4(argc, argv, regs.data); } else if (vers == 5) { if (revision == 0x3f) rc = dump_regs_t5vf(argc, argv, regs.data); else rc = dump_regs_t5(argc, argv, regs.data); } else if (vers == 6) { if (revision == 0x3f) rc = dump_regs_t6vf(argc, argv, regs.data); else rc = dump_regs_t6(argc, argv, regs.data); } else { warnx("%s (type %d, rev %d) is not a known card.", nexus, vers, revision); return (ENOTSUP); } free(regs.data); return (rc); } static void do_show_info_header(uint32_t mode) { uint32_t i; printf("%4s %8s", "Idx", "Hits"); for (i = T4_FILTER_FCoE; i <= T4_FILTER_IP_FRAGMENT; i <<= 1) { switch (mode & i) { case T4_FILTER_FCoE: printf(" FCoE"); break; case T4_FILTER_PORT: printf(" Port"); break; case T4_FILTER_VNIC: if (mode & T4_FILTER_IC_VNIC) printf(" VFvld:PF:VF"); else printf(" vld:oVLAN"); break; case T4_FILTER_VLAN: printf(" vld:VLAN"); break; case T4_FILTER_IP_TOS: printf(" TOS"); break; case T4_FILTER_IP_PROTO: printf(" Prot"); break; case T4_FILTER_ETH_TYPE: printf(" EthType"); break; case T4_FILTER_MAC_IDX: printf(" MACIdx"); break; case T4_FILTER_MPS_HIT_TYPE: printf(" MPS"); break; case T4_FILTER_IP_FRAGMENT: printf(" Frag"); break; default: /* compressed filter field not enabled */ break; } } printf(" %20s %20s %9s %9s %s\n", "DIP", "SIP", "DPORT", "SPORT", "Action"); } /* * Parse an argument sub-vector as a { [:] } * ordered tuple. If the parameter name in the argument sub-vector does not * match the passed in parameter name, then a zero is returned for the * function and no parsing is performed. If there is a match, then the value * and optional mask are parsed and returned in the provided return value * pointers. If no optional mask is specified, then a default mask of all 1s * will be returned. * * An error in parsing the value[:mask] will result in an error message and * program termination. */ static int parse_val_mask(const char *param, const char *args[], uint32_t *val, uint32_t *mask) { char *p; if (strcmp(param, args[0]) != 0) return (EINVAL); *val = strtoul(args[1], &p, 0); if (p > args[1]) { if (p[0] == 0) { *mask = ~0; return (0); } if (p[0] == ':' && p[1] != 0) { *mask = strtoul(p+1, &p, 0); if (p[0] == 0) return (0); } } warnx("parameter \"%s\" has bad \"value[:mask]\" %s", args[0], args[1]); return (EINVAL); } /* * Parse an argument sub-vector as a { [/] } * ordered tuple. If the parameter name in the argument sub-vector does not * match the passed in parameter name, then a zero is returned for the * function and no parsing is performed. If there is a match, then the value * and optional mask are parsed and returned in the provided return value * pointers. If no optional mask is specified, then a default mask of all 1s * will be returned. * * The value return parameter "afp" is used to specify the expected address * family -- IPv4 or IPv6 -- of the address[/mask] and return its actual * format. A passed in value of AF_UNSPEC indicates that either IPv4 or IPv6 * is acceptable; AF_INET means that only IPv4 addresses are acceptable; and * AF_INET6 means that only IPv6 are acceptable. AF_INET is returned for IPv4 * and AF_INET6 for IPv6 addresses, respectively. IPv4 address/mask pairs are * returned in the first four bytes of the address and mask return values with * the address A.B.C.D returned with { A, B, C, D } returned in addresses { 0, * 1, 2, 3}, respectively. * * An error in parsing the value[:mask] will result in an error message and * program termination. */ static int parse_ipaddr(const char *param, const char *args[], int *afp, uint8_t addr[], uint8_t mask[]) { const char *colon, *afn; char *slash; uint8_t *m; int af, ret; unsigned int masksize; /* * Is this our parameter? */ if (strcmp(param, args[0]) != 0) return (EINVAL); /* * Fundamental IPv4 versus IPv6 selection. */ colon = strchr(args[1], ':'); if (!colon) { afn = "IPv4"; af = AF_INET; masksize = 32; } else { afn = "IPv6"; af = AF_INET6; masksize = 128; } if (*afp == AF_UNSPEC) *afp = af; else if (*afp != af) { warnx("address %s is not of expected family %s", args[1], *afp == AF_INET ? "IP" : "IPv6"); return (EINVAL); } /* * Parse address (temporarily stripping off any "/mask" * specification). */ slash = strchr(args[1], '/'); if (slash) *slash = 0; ret = inet_pton(af, args[1], addr); if (slash) *slash = '/'; if (ret <= 0) { warnx("Cannot parse %s %s address %s", param, afn, args[1]); return (EINVAL); } /* * Parse optional mask specification. */ if (slash) { char *p; unsigned int prefix = strtoul(slash + 1, &p, 10); if (p == slash + 1) { warnx("missing address prefix for %s", param); return (EINVAL); } if (*p) { warnx("%s is not a valid address prefix", slash + 1); return (EINVAL); } if (prefix > masksize) { warnx("prefix %u is too long for an %s address", prefix, afn); return (EINVAL); } memset(mask, 0, masksize / 8); masksize = prefix; } /* * Fill in mask. */ for (m = mask; masksize >= 8; m++, masksize -= 8) *m = ~0; if (masksize) *m = ~0 << (8 - masksize); return (0); } /* * Parse an argument sub-vector as a { } ordered * tuple. If the parameter name in the argument sub-vector does not match the * passed in parameter name, then a zero is returned for the function and no * parsing is performed. If there is a match, then the value is parsed and * returned in the provided return value pointer. */ static int parse_val(const char *param, const char *args[], uint32_t *val) { char *p; if (strcmp(param, args[0]) != 0) return (EINVAL); *val = strtoul(args[1], &p, 0); if (p > args[1] && p[0] == 0) return (0); warnx("parameter \"%s\" has bad \"value\" %s", args[0], args[1]); return (EINVAL); } static void filters_show_ipaddr(int type, uint8_t *addr, uint8_t *addrm) { int noctets, octet; printf(" "); if (type == 0) { noctets = 4; printf("%3s", " "); } else noctets = 16; for (octet = 0; octet < noctets; octet++) printf("%02x", addr[octet]); printf("/"); for (octet = 0; octet < noctets; octet++) printf("%02x", addrm[octet]); } static void do_show_one_filter_info(struct t4_filter *t, uint32_t mode) { uint32_t i; printf("%4d", t->idx); if (t->hits == UINT64_MAX) printf(" %8s", "-"); else printf(" %8ju", t->hits); /* * Compressed header portion of filter. */ for (i = T4_FILTER_FCoE; i <= T4_FILTER_IP_FRAGMENT; i <<= 1) { switch (mode & i) { case T4_FILTER_FCoE: printf(" %1d/%1d", t->fs.val.fcoe, t->fs.mask.fcoe); break; case T4_FILTER_PORT: printf(" %1d/%1d", t->fs.val.iport, t->fs.mask.iport); break; case T4_FILTER_VNIC: if (mode & T4_FILTER_IC_VNIC) { printf(" %1d:%1x:%02x/%1d:%1x:%02x", t->fs.val.pfvf_vld, (t->fs.val.vnic >> 13) & 0x7, t->fs.val.vnic & 0x1fff, t->fs.mask.pfvf_vld, (t->fs.mask.vnic >> 13) & 0x7, t->fs.mask.vnic & 0x1fff); } else { printf(" %1d:%04x/%1d:%04x", t->fs.val.ovlan_vld, t->fs.val.vnic, t->fs.mask.ovlan_vld, t->fs.mask.vnic); } break; case T4_FILTER_VLAN: printf(" %1d:%04x/%1d:%04x", t->fs.val.vlan_vld, t->fs.val.vlan, t->fs.mask.vlan_vld, t->fs.mask.vlan); break; case T4_FILTER_IP_TOS: printf(" %02x/%02x", t->fs.val.tos, t->fs.mask.tos); break; case T4_FILTER_IP_PROTO: printf(" %02x/%02x", t->fs.val.proto, t->fs.mask.proto); break; case T4_FILTER_ETH_TYPE: printf(" %04x/%04x", t->fs.val.ethtype, t->fs.mask.ethtype); break; case T4_FILTER_MAC_IDX: printf(" %03x/%03x", t->fs.val.macidx, t->fs.mask.macidx); break; case T4_FILTER_MPS_HIT_TYPE: printf(" %1x/%1x", t->fs.val.matchtype, t->fs.mask.matchtype); break; case T4_FILTER_IP_FRAGMENT: printf(" %1d/%1d", t->fs.val.frag, t->fs.mask.frag); break; default: /* compressed filter field not enabled */ break; } } /* * Fixed portion of filter. */ filters_show_ipaddr(t->fs.type, t->fs.val.dip, t->fs.mask.dip); filters_show_ipaddr(t->fs.type, t->fs.val.sip, t->fs.mask.sip); printf(" %04x/%04x %04x/%04x", t->fs.val.dport, t->fs.mask.dport, t->fs.val.sport, t->fs.mask.sport); /* * Variable length filter action. */ if (t->fs.action == FILTER_DROP) printf(" Drop"); else if (t->fs.action == FILTER_SWITCH) { printf(" Switch: port=%d", t->fs.eport); if (t->fs.newdmac) printf( ", dmac=%02x:%02x:%02x:%02x:%02x:%02x " ", l2tidx=%d", t->fs.dmac[0], t->fs.dmac[1], t->fs.dmac[2], t->fs.dmac[3], t->fs.dmac[4], t->fs.dmac[5], t->l2tidx); if (t->fs.newsmac) printf( ", smac=%02x:%02x:%02x:%02x:%02x:%02x " ", smtidx=%d", t->fs.smac[0], t->fs.smac[1], t->fs.smac[2], t->fs.smac[3], t->fs.smac[4], t->fs.smac[5], t->smtidx); if (t->fs.newvlan == VLAN_REMOVE) printf(", vlan=none"); else if (t->fs.newvlan == VLAN_INSERT) printf(", vlan=insert(%x)", t->fs.vlan); else if (t->fs.newvlan == VLAN_REWRITE) printf(", vlan=rewrite(%x)", t->fs.vlan); } else { printf(" Pass: Q="); if (t->fs.dirsteer == 0) { printf("RSS"); if (t->fs.maskhash) printf("(TCB=hash)"); } else { printf("%d", t->fs.iq); if (t->fs.dirsteerhash == 0) printf("(QID)"); else printf("(hash)"); } } if (t->fs.prio) printf(" Prio"); if (t->fs.rpttid) printf(" RptTID"); printf("\n"); } static int show_filters(void) { uint32_t mode = 0, header = 0; struct t4_filter t; int rc; /* Get the global filter mode first */ rc = doit(CHELSIO_T4_GET_FILTER_MODE, &mode); if (rc != 0) return (rc); t.idx = 0; for (t.idx = 0; ; t.idx++) { rc = doit(CHELSIO_T4_GET_FILTER, &t); if (rc != 0 || t.idx == 0xffffffff) break; if (!header) { do_show_info_header(mode); header = 1; } do_show_one_filter_info(&t, mode); }; return (rc); } static int get_filter_mode(void) { uint32_t mode = 0; int rc; rc = doit(CHELSIO_T4_GET_FILTER_MODE, &mode); if (rc != 0) return (rc); if (mode & T4_FILTER_IPv4) printf("ipv4 "); if (mode & T4_FILTER_IPv6) printf("ipv6 "); if (mode & T4_FILTER_IP_SADDR) printf("sip "); if (mode & T4_FILTER_IP_DADDR) printf("dip "); if (mode & T4_FILTER_IP_SPORT) printf("sport "); if (mode & T4_FILTER_IP_DPORT) printf("dport "); if (mode & T4_FILTER_IP_FRAGMENT) printf("frag "); if (mode & T4_FILTER_MPS_HIT_TYPE) printf("matchtype "); if (mode & T4_FILTER_MAC_IDX) printf("macidx "); if (mode & T4_FILTER_ETH_TYPE) printf("ethtype "); if (mode & T4_FILTER_IP_PROTO) printf("proto "); if (mode & T4_FILTER_IP_TOS) printf("tos "); if (mode & T4_FILTER_VLAN) printf("vlan "); if (mode & T4_FILTER_VNIC) { if (mode & T4_FILTER_IC_VNIC) printf("vnic_id "); else printf("ovlan "); } if (mode & T4_FILTER_PORT) printf("iport "); if (mode & T4_FILTER_FCoE) printf("fcoe "); printf("\n"); return (0); } static int set_filter_mode(int argc, const char *argv[]) { uint32_t mode = 0; int vnic = 0, ovlan = 0; for (; argc; argc--, argv++) { if (!strcmp(argv[0], "frag")) mode |= T4_FILTER_IP_FRAGMENT; if (!strcmp(argv[0], "matchtype")) mode |= T4_FILTER_MPS_HIT_TYPE; if (!strcmp(argv[0], "macidx")) mode |= T4_FILTER_MAC_IDX; if (!strcmp(argv[0], "ethtype")) mode |= T4_FILTER_ETH_TYPE; if (!strcmp(argv[0], "proto")) mode |= T4_FILTER_IP_PROTO; if (!strcmp(argv[0], "tos")) mode |= T4_FILTER_IP_TOS; if (!strcmp(argv[0], "vlan")) mode |= T4_FILTER_VLAN; if (!strcmp(argv[0], "ovlan")) { mode |= T4_FILTER_VNIC; ovlan++; } if (!strcmp(argv[0], "vnic_id")) { mode |= T4_FILTER_VNIC; mode |= T4_FILTER_IC_VNIC; vnic++; } if (!strcmp(argv[0], "iport")) mode |= T4_FILTER_PORT; if (!strcmp(argv[0], "fcoe")) mode |= T4_FILTER_FCoE; } if (vnic > 0 && ovlan > 0) { warnx("\"vnic_id\" and \"ovlan\" are mutually exclusive."); return (EINVAL); } return doit(CHELSIO_T4_SET_FILTER_MODE, &mode); } static int del_filter(uint32_t idx) { struct t4_filter t; t.idx = idx; return doit(CHELSIO_T4_DEL_FILTER, &t); } static int set_filter(uint32_t idx, int argc, const char *argv[]) { int af = AF_UNSPEC, start_arg = 0; struct t4_filter t; if (argc < 2) { warnc(EINVAL, "%s", __func__); return (EINVAL); }; bzero(&t, sizeof (t)); t.idx = idx; t.fs.hitcnts = 1; for (start_arg = 0; start_arg + 2 <= argc; start_arg += 2) { const char **args = &argv[start_arg]; uint32_t val, mask; if (!strcmp(argv[start_arg], "type")) { int newaf; if (!strcasecmp(argv[start_arg + 1], "ipv4")) newaf = AF_INET; else if (!strcasecmp(argv[start_arg + 1], "ipv6")) newaf = AF_INET6; else { warnx("invalid type \"%s\"; " "must be one of \"ipv4\" or \"ipv6\"", argv[start_arg + 1]); return (EINVAL); } if (af != AF_UNSPEC && af != newaf) { warnx("conflicting IPv4/IPv6 specifications."); return (EINVAL); } af = newaf; } else if (!parse_val_mask("fcoe", args, &val, &mask)) { t.fs.val.fcoe = val; t.fs.mask.fcoe = mask; } else if (!parse_val_mask("iport", args, &val, &mask)) { t.fs.val.iport = val; t.fs.mask.iport = mask; } else if (!parse_val_mask("ovlan", args, &val, &mask)) { t.fs.val.vnic = val; t.fs.mask.vnic = mask; t.fs.val.ovlan_vld = 1; t.fs.mask.ovlan_vld = 1; } else if (!parse_val_mask("ivlan", args, &val, &mask)) { t.fs.val.vlan = val; t.fs.mask.vlan = mask; t.fs.val.vlan_vld = 1; t.fs.mask.vlan_vld = 1; } else if (!parse_val_mask("pf", args, &val, &mask)) { t.fs.val.vnic &= 0x1fff; t.fs.val.vnic |= (val & 0x7) << 13; t.fs.mask.vnic &= 0x1fff; t.fs.mask.vnic |= (mask & 0x7) << 13; t.fs.val.pfvf_vld = 1; t.fs.mask.pfvf_vld = 1; } else if (!parse_val_mask("vf", args, &val, &mask)) { t.fs.val.vnic &= 0xe000; t.fs.val.vnic |= val & 0x1fff; t.fs.mask.vnic &= 0xe000; t.fs.mask.vnic |= mask & 0x1fff; t.fs.val.pfvf_vld = 1; t.fs.mask.pfvf_vld = 1; } else if (!parse_val_mask("tos", args, &val, &mask)) { t.fs.val.tos = val; t.fs.mask.tos = mask; } else if (!parse_val_mask("proto", args, &val, &mask)) { t.fs.val.proto = val; t.fs.mask.proto = mask; } else if (!parse_val_mask("ethtype", args, &val, &mask)) { t.fs.val.ethtype = val; t.fs.mask.ethtype = mask; } else if (!parse_val_mask("macidx", args, &val, &mask)) { t.fs.val.macidx = val; t.fs.mask.macidx = mask; } else if (!parse_val_mask("matchtype", args, &val, &mask)) { t.fs.val.matchtype = val; t.fs.mask.matchtype = mask; } else if (!parse_val_mask("frag", args, &val, &mask)) { t.fs.val.frag = val; t.fs.mask.frag = mask; } else if (!parse_val_mask("dport", args, &val, &mask)) { t.fs.val.dport = val; t.fs.mask.dport = mask; } else if (!parse_val_mask("sport", args, &val, &mask)) { t.fs.val.sport = val; t.fs.mask.sport = mask; } else if (!parse_ipaddr("dip", args, &af, t.fs.val.dip, t.fs.mask.dip)) { /* nada */; } else if (!parse_ipaddr("sip", args, &af, t.fs.val.sip, t.fs.mask.sip)) { /* nada */; } else if (!strcmp(argv[start_arg], "action")) { if (!strcmp(argv[start_arg + 1], "pass")) t.fs.action = FILTER_PASS; else if (!strcmp(argv[start_arg + 1], "drop")) t.fs.action = FILTER_DROP; else if (!strcmp(argv[start_arg + 1], "switch")) t.fs.action = FILTER_SWITCH; else { warnx("invalid action \"%s\"; must be one of" " \"pass\", \"drop\" or \"switch\"", argv[start_arg + 1]); return (EINVAL); } } else if (!parse_val("hitcnts", args, &val)) { t.fs.hitcnts = val; } else if (!parse_val("prio", args, &val)) { t.fs.prio = val; } else if (!parse_val("rpttid", args, &val)) { t.fs.rpttid = 1; } else if (!parse_val("queue", args, &val)) { t.fs.dirsteer = 1; t.fs.iq = val; } else if (!parse_val("tcbhash", args, &val)) { t.fs.maskhash = 1; t.fs.dirsteerhash = 1; } else if (!parse_val("eport", args, &val)) { t.fs.eport = val; } else if (!strcmp(argv[start_arg], "dmac")) { struct ether_addr *daddr; daddr = ether_aton(argv[start_arg + 1]); if (daddr == NULL) { warnx("invalid dmac address \"%s\"", argv[start_arg + 1]); return (EINVAL); } memcpy(t.fs.dmac, daddr, ETHER_ADDR_LEN); t.fs.newdmac = 1; } else if (!strcmp(argv[start_arg], "smac")) { struct ether_addr *saddr; saddr = ether_aton(argv[start_arg + 1]); if (saddr == NULL) { warnx("invalid smac address \"%s\"", argv[start_arg + 1]); return (EINVAL); } memcpy(t.fs.smac, saddr, ETHER_ADDR_LEN); t.fs.newsmac = 1; } else if (!strcmp(argv[start_arg], "vlan")) { char *p; if (!strcmp(argv[start_arg + 1], "none")) { t.fs.newvlan = VLAN_REMOVE; } else if (argv[start_arg + 1][0] == '=') { t.fs.newvlan = VLAN_REWRITE; } else if (argv[start_arg + 1][0] == '+') { t.fs.newvlan = VLAN_INSERT; } else if (isdigit(argv[start_arg + 1][0]) && !parse_val_mask("vlan", args, &val, &mask)) { t.fs.val.vlan = val; t.fs.mask.vlan = mask; t.fs.val.vlan_vld = 1; t.fs.mask.vlan_vld = 1; } else { warnx("unknown vlan parameter \"%s\"; must" " be one of \"none\", \"=\", " " \"+\", or \"\"", argv[start_arg + 1]); return (EINVAL); } if (t.fs.newvlan == VLAN_REWRITE || t.fs.newvlan == VLAN_INSERT) { t.fs.vlan = strtoul(argv[start_arg + 1] + 1, &p, 0); if (p == argv[start_arg + 1] + 1 || p[0] != 0) { warnx("invalid vlan \"%s\"", argv[start_arg + 1]); return (EINVAL); } } } else { warnx("invalid parameter \"%s\"", argv[start_arg]); return (EINVAL); } } if (start_arg != argc) { warnx("no value for \"%s\"", argv[start_arg]); return (EINVAL); } /* * Check basic sanity of option combinations. */ if (t.fs.action != FILTER_SWITCH && (t.fs.eport || t.fs.newdmac || t.fs.newsmac || t.fs.newvlan)) { warnx("prio, port dmac, smac and vlan only make sense with" " \"action switch\""); return (EINVAL); } if (t.fs.action != FILTER_PASS && (t.fs.rpttid || t.fs.dirsteer || t.fs.maskhash)) { warnx("rpttid, queue and tcbhash don't make sense with" " action \"drop\" or \"switch\""); return (EINVAL); } if (t.fs.val.ovlan_vld && t.fs.val.pfvf_vld) { warnx("ovlan and vnic_id (pf/vf) are mutually exclusive"); return (EINVAL); } t.fs.type = (af == AF_INET6 ? 1 : 0); /* default IPv4 */ return doit(CHELSIO_T4_SET_FILTER, &t); } static int filter_cmd(int argc, const char *argv[]) { long long val; uint32_t idx; char *s; if (argc == 0) { warnx("filter: no arguments."); return (EINVAL); }; /* list */ if (strcmp(argv[0], "list") == 0) { if (argc != 1) warnx("trailing arguments after \"list\" ignored."); return show_filters(); } /* mode */ if (argc == 1 && strcmp(argv[0], "mode") == 0) return get_filter_mode(); /* mode */ if (strcmp(argv[0], "mode") == 0) return set_filter_mode(argc - 1, argv + 1); /* ... */ s = str_to_number(argv[0], NULL, &val); if (*s || val > 0xffffffffU) { warnx("\"%s\" is neither an index nor a filter subcommand.", argv[0]); return (EINVAL); } idx = (uint32_t) val; /* delete|clear */ if (argc == 2 && (strcmp(argv[1], "delete") == 0 || strcmp(argv[1], "clear") == 0)) { return del_filter(idx); } /* [ ] ... */ return set_filter(idx, argc - 1, argv + 1); } /* * Shows the fields of a multi-word structure. The structure is considered to * consist of @nwords 32-bit words (i.e, it's an (@nwords * 32)-bit structure) * whose fields are described by @fd. The 32-bit words are given in @words * starting with the least significant 32-bit word. */ static void show_struct(const uint32_t *words, int nwords, const struct field_desc *fd) { unsigned int w = 0; const struct field_desc *p; for (p = fd; p->name; p++) w = max(w, strlen(p->name)); while (fd->name) { unsigned long long data; int first_word = fd->start / 32; int shift = fd->start % 32; int width = fd->end - fd->start + 1; unsigned long long mask = (1ULL << width) - 1; data = (words[first_word] >> shift) | ((uint64_t)words[first_word + 1] << (32 - shift)); if (shift) data |= ((uint64_t)words[first_word + 2] << (64 - shift)); data &= mask; if (fd->islog2) data = 1 << data; printf("%-*s ", w, fd->name); printf(fd->hex ? "%#llx\n" : "%llu\n", data << fd->shift); fd++; } } #define FIELD(name, start, end) { name, start, end, 0, 0, 0 } #define FIELD1(name, start) FIELD(name, start, start) static void show_t5_ctxt(const struct t4_sge_context *p) { static struct field_desc egress_t5[] = { FIELD("DCA_ST:", 181, 191), FIELD1("StatusPgNS:", 180), FIELD1("StatusPgRO:", 179), FIELD1("FetchNS:", 178), FIELD1("FetchRO:", 177), FIELD1("Valid:", 176), FIELD("PCIeDataChannel:", 174, 175), FIELD1("StatusPgTPHintEn:", 173), FIELD("StatusPgTPHint:", 171, 172), FIELD1("FetchTPHintEn:", 170), FIELD("FetchTPHint:", 168, 169), FIELD1("FCThreshOverride:", 167), { "WRLength:", 162, 166, 9, 0, 1 }, FIELD1("WRLengthKnown:", 161), FIELD1("ReschedulePending:", 160), FIELD1("OnChipQueue:", 159), FIELD1("FetchSizeMode:", 158), { "FetchBurstMin:", 156, 157, 4, 0, 1 }, FIELD1("FLMPacking:", 155), FIELD("FetchBurstMax:", 153, 154), FIELD("uPToken:", 133, 152), FIELD1("uPTokenEn:", 132), FIELD1("UserModeIO:", 131), FIELD("uPFLCredits:", 123, 130), FIELD1("uPFLCreditEn:", 122), FIELD("FID:", 111, 121), FIELD("HostFCMode:", 109, 110), FIELD1("HostFCOwner:", 108), { "CIDXFlushThresh:", 105, 107, 0, 0, 1 }, FIELD("CIDX:", 89, 104), FIELD("PIDX:", 73, 88), { "BaseAddress:", 18, 72, 9, 1 }, FIELD("QueueSize:", 2, 17), FIELD1("QueueType:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc fl_t5[] = { FIELD("DCA_ST:", 181, 191), FIELD1("StatusPgNS:", 180), FIELD1("StatusPgRO:", 179), FIELD1("FetchNS:", 178), FIELD1("FetchRO:", 177), FIELD1("Valid:", 176), FIELD("PCIeDataChannel:", 174, 175), FIELD1("StatusPgTPHintEn:", 173), FIELD("StatusPgTPHint:", 171, 172), FIELD1("FetchTPHintEn:", 170), FIELD("FetchTPHint:", 168, 169), FIELD1("FCThreshOverride:", 167), FIELD1("ReschedulePending:", 160), FIELD1("OnChipQueue:", 159), FIELD1("FetchSizeMode:", 158), { "FetchBurstMin:", 156, 157, 4, 0, 1 }, FIELD1("FLMPacking:", 155), FIELD("FetchBurstMax:", 153, 154), FIELD1("FLMcongMode:", 152), FIELD("MaxuPFLCredits:", 144, 151), FIELD("FLMcontextID:", 133, 143), FIELD1("uPTokenEn:", 132), FIELD1("UserModeIO:", 131), FIELD("uPFLCredits:", 123, 130), FIELD1("uPFLCreditEn:", 122), FIELD("FID:", 111, 121), FIELD("HostFCMode:", 109, 110), FIELD1("HostFCOwner:", 108), { "CIDXFlushThresh:", 105, 107, 0, 0, 1 }, FIELD("CIDX:", 89, 104), FIELD("PIDX:", 73, 88), { "BaseAddress:", 18, 72, 9, 1 }, FIELD("QueueSize:", 2, 17), FIELD1("QueueType:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc ingress_t5[] = { FIELD("DCA_ST:", 143, 153), FIELD1("ISCSICoalescing:", 142), FIELD1("Queue_Valid:", 141), FIELD1("TimerPending:", 140), FIELD1("DropRSS:", 139), FIELD("PCIeChannel:", 137, 138), FIELD1("SEInterruptArmed:", 136), FIELD1("CongestionMgtEnable:", 135), FIELD1("NoSnoop:", 134), FIELD1("RelaxedOrdering:", 133), FIELD1("GTSmode:", 132), FIELD1("TPHintEn:", 131), FIELD("TPHint:", 129, 130), FIELD1("UpdateScheduling:", 128), FIELD("UpdateDelivery:", 126, 127), FIELD1("InterruptSent:", 125), FIELD("InterruptIDX:", 114, 124), FIELD1("InterruptDestination:", 113), FIELD1("InterruptArmed:", 112), FIELD("RxIntCounter:", 106, 111), FIELD("RxIntCounterThreshold:", 104, 105), FIELD1("Generation:", 103), { "BaseAddress:", 48, 102, 9, 1 }, FIELD("PIDX:", 32, 47), FIELD("CIDX:", 16, 31), { "QueueSize:", 4, 15, 4, 0 }, { "QueueEntrySize:", 2, 3, 4, 0, 1 }, FIELD1("QueueEntryOverride:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc flm_t5[] = { FIELD1("Valid:", 89), FIELD("SplitLenMode:", 87, 88), FIELD1("TPHintEn:", 86), FIELD("TPHint:", 84, 85), FIELD1("NoSnoop:", 83), FIELD1("RelaxedOrdering:", 82), FIELD("DCA_ST:", 71, 81), FIELD("EQid:", 54, 70), FIELD("SplitEn:", 52, 53), FIELD1("PadEn:", 51), FIELD1("PackEn:", 50), FIELD1("Cache_Lock :", 49), FIELD1("CongDrop:", 48), FIELD("PackOffset:", 16, 47), FIELD("CIDX:", 8, 15), FIELD("PIDX:", 0, 7), { NULL } }; static struct field_desc conm_t5[] = { FIELD1("CngMPSEnable:", 21), FIELD("CngTPMode:", 19, 20), FIELD1("CngDBPHdr:", 18), FIELD1("CngDBPData:", 17), FIELD1("CngIMSG:", 16), { "CngChMap:", 0, 15, 0, 1, 0 }, { NULL } }; if (p->mem_id == SGE_CONTEXT_EGRESS) show_struct(p->data, 6, (p->data[0] & 2) ? fl_t5 : egress_t5); else if (p->mem_id == SGE_CONTEXT_FLM) show_struct(p->data, 3, flm_t5); else if (p->mem_id == SGE_CONTEXT_INGRESS) show_struct(p->data, 5, ingress_t5); else if (p->mem_id == SGE_CONTEXT_CNM) show_struct(p->data, 1, conm_t5); } static void show_t4_ctxt(const struct t4_sge_context *p) { static struct field_desc egress_t4[] = { FIELD1("StatusPgNS:", 180), FIELD1("StatusPgRO:", 179), FIELD1("FetchNS:", 178), FIELD1("FetchRO:", 177), FIELD1("Valid:", 176), FIELD("PCIeDataChannel:", 174, 175), FIELD1("DCAEgrQEn:", 173), FIELD("DCACPUID:", 168, 172), FIELD1("FCThreshOverride:", 167), FIELD("WRLength:", 162, 166), FIELD1("WRLengthKnown:", 161), FIELD1("ReschedulePending:", 160), FIELD1("OnChipQueue:", 159), FIELD1("FetchSizeMode", 158), { "FetchBurstMin:", 156, 157, 4, 0, 1 }, { "FetchBurstMax:", 153, 154, 6, 0, 1 }, FIELD("uPToken:", 133, 152), FIELD1("uPTokenEn:", 132), FIELD1("UserModeIO:", 131), FIELD("uPFLCredits:", 123, 130), FIELD1("uPFLCreditEn:", 122), FIELD("FID:", 111, 121), FIELD("HostFCMode:", 109, 110), FIELD1("HostFCOwner:", 108), { "CIDXFlushThresh:", 105, 107, 0, 0, 1 }, FIELD("CIDX:", 89, 104), FIELD("PIDX:", 73, 88), { "BaseAddress:", 18, 72, 9, 1 }, FIELD("QueueSize:", 2, 17), FIELD1("QueueType:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc fl_t4[] = { FIELD1("StatusPgNS:", 180), FIELD1("StatusPgRO:", 179), FIELD1("FetchNS:", 178), FIELD1("FetchRO:", 177), FIELD1("Valid:", 176), FIELD("PCIeDataChannel:", 174, 175), FIELD1("DCAEgrQEn:", 173), FIELD("DCACPUID:", 168, 172), FIELD1("FCThreshOverride:", 167), FIELD1("ReschedulePending:", 160), FIELD1("OnChipQueue:", 159), FIELD1("FetchSizeMode", 158), { "FetchBurstMin:", 156, 157, 4, 0, 1 }, { "FetchBurstMax:", 153, 154, 6, 0, 1 }, FIELD1("FLMcongMode:", 152), FIELD("MaxuPFLCredits:", 144, 151), FIELD("FLMcontextID:", 133, 143), FIELD1("uPTokenEn:", 132), FIELD1("UserModeIO:", 131), FIELD("uPFLCredits:", 123, 130), FIELD1("uPFLCreditEn:", 122), FIELD("FID:", 111, 121), FIELD("HostFCMode:", 109, 110), FIELD1("HostFCOwner:", 108), { "CIDXFlushThresh:", 105, 107, 0, 0, 1 }, FIELD("CIDX:", 89, 104), FIELD("PIDX:", 73, 88), { "BaseAddress:", 18, 72, 9, 1 }, FIELD("QueueSize:", 2, 17), FIELD1("QueueType:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc ingress_t4[] = { FIELD1("NoSnoop:", 145), FIELD1("RelaxedOrdering:", 144), FIELD1("GTSmode:", 143), FIELD1("ISCSICoalescing:", 142), FIELD1("Valid:", 141), FIELD1("TimerPending:", 140), FIELD1("DropRSS:", 139), FIELD("PCIeChannel:", 137, 138), FIELD1("SEInterruptArmed:", 136), FIELD1("CongestionMgtEnable:", 135), FIELD1("DCAIngQEnable:", 134), FIELD("DCACPUID:", 129, 133), FIELD1("UpdateScheduling:", 128), FIELD("UpdateDelivery:", 126, 127), FIELD1("InterruptSent:", 125), FIELD("InterruptIDX:", 114, 124), FIELD1("InterruptDestination:", 113), FIELD1("InterruptArmed:", 112), FIELD("RxIntCounter:", 106, 111), FIELD("RxIntCounterThreshold:", 104, 105), FIELD1("Generation:", 103), { "BaseAddress:", 48, 102, 9, 1 }, FIELD("PIDX:", 32, 47), FIELD("CIDX:", 16, 31), { "QueueSize:", 4, 15, 4, 0 }, { "QueueEntrySize:", 2, 3, 4, 0, 1 }, FIELD1("QueueEntryOverride:", 1), FIELD1("CachePriority:", 0), { NULL } }; static struct field_desc flm_t4[] = { FIELD1("NoSnoop:", 79), FIELD1("RelaxedOrdering:", 78), FIELD1("Valid:", 77), FIELD("DCACPUID:", 72, 76), FIELD1("DCAFLEn:", 71), FIELD("EQid:", 54, 70), FIELD("SplitEn:", 52, 53), FIELD1("PadEn:", 51), FIELD1("PackEn:", 50), FIELD1("DBpriority:", 48), FIELD("PackOffset:", 16, 47), FIELD("CIDX:", 8, 15), FIELD("PIDX:", 0, 7), { NULL } }; static struct field_desc conm_t4[] = { FIELD1("CngDBPHdr:", 6), FIELD1("CngDBPData:", 5), FIELD1("CngIMSG:", 4), { "CngChMap:", 0, 3, 0, 1, 0}, { NULL } }; if (p->mem_id == SGE_CONTEXT_EGRESS) show_struct(p->data, 6, (p->data[0] & 2) ? fl_t4 : egress_t4); else if (p->mem_id == SGE_CONTEXT_FLM) show_struct(p->data, 3, flm_t4); else if (p->mem_id == SGE_CONTEXT_INGRESS) show_struct(p->data, 5, ingress_t4); else if (p->mem_id == SGE_CONTEXT_CNM) show_struct(p->data, 1, conm_t4); } #undef FIELD #undef FIELD1 static int get_sge_context(int argc, const char *argv[]) { int rc; char *p; long cid; struct t4_sge_context cntxt = {0}; if (argc != 2) { warnx("sge_context: incorrect number of arguments."); return (EINVAL); } if (!strcmp(argv[0], "egress")) cntxt.mem_id = SGE_CONTEXT_EGRESS; else if (!strcmp(argv[0], "ingress")) cntxt.mem_id = SGE_CONTEXT_INGRESS; else if (!strcmp(argv[0], "fl")) cntxt.mem_id = SGE_CONTEXT_FLM; else if (!strcmp(argv[0], "cong")) cntxt.mem_id = SGE_CONTEXT_CNM; else { warnx("unknown context type \"%s\"; known types are egress, " "ingress, fl, and cong.", argv[0]); return (EINVAL); } p = str_to_number(argv[1], &cid, NULL); if (*p) { warnx("invalid context id \"%s\"", argv[1]); return (EINVAL); } cntxt.cid = cid; rc = doit(CHELSIO_T4_GET_SGE_CONTEXT, &cntxt); if (rc != 0) return (rc); if (chip_id == 4) show_t4_ctxt(&cntxt); else show_t5_ctxt(&cntxt); return (0); } static int loadfw(int argc, const char *argv[]) { int rc, fd; struct t4_data data = {0}; const char *fname = argv[0]; struct stat st = {0}; if (argc != 1) { warnx("loadfw: incorrect number of arguments."); return (EINVAL); } fd = open(fname, O_RDONLY); if (fd < 0) { warn("open(%s)", fname); return (errno); } if (fstat(fd, &st) < 0) { warn("fstat"); close(fd); return (errno); } data.len = st.st_size; data.data = mmap(0, data.len, PROT_READ, MAP_PRIVATE, fd, 0); if (data.data == MAP_FAILED) { warn("mmap"); close(fd); return (errno); } rc = doit(CHELSIO_T4_LOAD_FW, &data); munmap(data.data, data.len); close(fd); return (rc); } static int read_mem(uint32_t addr, uint32_t len, void (*output)(uint32_t *, uint32_t)) { int rc; struct t4_mem_range mr; mr.addr = addr; mr.len = len; mr.data = malloc(mr.len); if (mr.data == 0) { warn("read_mem: malloc"); return (errno); } rc = doit(CHELSIO_T4_GET_MEM, &mr); if (rc != 0) goto done; if (output) (*output)(mr.data, mr.len); done: free(mr.data); return (rc); } /* * Display memory as list of 'n' 4-byte values per line. */ static void show_mem(uint32_t *buf, uint32_t len) { const char *s; int i, n = 8; while (len) { for (i = 0; len && i < n; i++, buf++, len -= 4) { s = i ? " " : ""; printf("%s%08x", s, htonl(*buf)); } printf("\n"); } } static int memdump(int argc, const char *argv[]) { char *p; long l; uint32_t addr, len; if (argc != 2) { warnx("incorrect number of arguments."); return (EINVAL); } p = str_to_number(argv[0], &l, NULL); if (*p) { warnx("invalid address \"%s\"", argv[0]); return (EINVAL); } addr = l; p = str_to_number(argv[1], &l, NULL); if (*p) { warnx("memdump: invalid length \"%s\"", argv[1]); return (EINVAL); } len = l; return (read_mem(addr, len, show_mem)); } /* * Display TCB as list of 'n' 4-byte values per line. */ static void show_tcb(uint32_t *buf, uint32_t len) { const char *s; int i, n = 8; while (len) { for (i = 0; len && i < n; i++, buf++, len -= 4) { s = i ? " " : ""; printf("%s%08x", s, htonl(*buf)); } printf("\n"); } } #define A_TP_CMM_TCB_BASE 0x7d10 #define TCB_SIZE 128 static int read_tcb(int argc, const char *argv[]) { char *p; long l; long long val; unsigned int tid; uint32_t addr; int rc; if (argc != 1) { warnx("incorrect number of arguments."); return (EINVAL); } p = str_to_number(argv[0], &l, NULL); if (*p) { warnx("invalid tid \"%s\"", argv[0]); return (EINVAL); } tid = l; rc = read_reg(A_TP_CMM_TCB_BASE, 4, &val); if (rc != 0) return (rc); addr = val + tid * TCB_SIZE; return (read_mem(addr, TCB_SIZE, show_tcb)); } static int read_i2c(int argc, const char *argv[]) { char *p; long l; struct t4_i2c_data i2cd; int rc, i; if (argc < 3 || argc > 4) { warnx("incorrect number of arguments."); return (EINVAL); } p = str_to_number(argv[0], &l, NULL); if (*p || l > UCHAR_MAX) { warnx("invalid port id \"%s\"", argv[0]); return (EINVAL); } i2cd.port_id = l; p = str_to_number(argv[1], &l, NULL); if (*p || l > UCHAR_MAX) { warnx("invalid i2c device address \"%s\"", argv[1]); return (EINVAL); } i2cd.dev_addr = l; p = str_to_number(argv[2], &l, NULL); if (*p || l > UCHAR_MAX) { warnx("invalid byte offset \"%s\"", argv[2]); return (EINVAL); } i2cd.offset = l; if (argc == 4) { p = str_to_number(argv[3], &l, NULL); if (*p || l > sizeof(i2cd.data)) { warnx("invalid number of bytes \"%s\"", argv[3]); return (EINVAL); } i2cd.len = l; } else i2cd.len = 1; rc = doit(CHELSIO_T4_GET_I2C, &i2cd); if (rc != 0) return (rc); for (i = 0; i < i2cd.len; i++) printf("0x%x [%u]\n", i2cd.data[i], i2cd.data[i]); return (0); } static int clearstats(int argc, const char *argv[]) { char *p; long l; uint32_t port; if (argc != 1) { warnx("incorrect number of arguments."); return (EINVAL); } p = str_to_number(argv[0], &l, NULL); if (*p) { warnx("invalid port id \"%s\"", argv[0]); return (EINVAL); } port = l; return doit(CHELSIO_T4_CLEAR_STATS, &port); } static int show_tracers(void) { struct t4_tracer t; char *s; int rc, port_idx, i; long long val; /* Magic values: MPS_TRC_CFG = 0x9800. MPS_TRC_CFG[1:1] = TrcEn */ rc = read_reg(0x9800, 4, &val); if (rc != 0) return (rc); printf("tracing is %s\n", val & 2 ? "ENABLED" : "DISABLED"); t.idx = 0; for (t.idx = 0; ; t.idx++) { rc = doit(CHELSIO_T4_GET_TRACER, &t); if (rc != 0 || t.idx == 0xff) break; if (t.tp.port < 4) { s = "Rx"; port_idx = t.tp.port; } else if (t.tp.port < 8) { s = "Tx"; port_idx = t.tp.port - 4; } else if (t.tp.port < 12) { s = "loopback"; port_idx = t.tp.port - 8; } else if (t.tp.port < 16) { s = "MPS Rx"; port_idx = t.tp.port - 12; } else if (t.tp.port < 20) { s = "MPS Tx"; port_idx = t.tp.port - 16; } else { s = "unknown"; port_idx = t.tp.port; } printf("\ntracer %u (currently %s) captures ", t.idx, t.enabled ? "ENABLED" : "DISABLED"); if (t.tp.port < 8) printf("port %u %s, ", port_idx, s); else printf("%s %u, ", s, port_idx); printf("snap length: %u, min length: %u\n", t.tp.snap_len, t.tp.min_len); printf("packets captured %smatch filter\n", t.tp.invert ? "do not " : ""); if (t.tp.skip_ofst) { printf("filter pattern: "); for (i = 0; i < t.tp.skip_ofst * 2; i += 2) printf("%08x%08x", t.tp.data[i], t.tp.data[i + 1]); printf("/"); for (i = 0; i < t.tp.skip_ofst * 2; i += 2) printf("%08x%08x", t.tp.mask[i], t.tp.mask[i + 1]); printf("@0\n"); } printf("filter pattern: "); for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2) printf("%08x%08x", t.tp.data[i], t.tp.data[i + 1]); printf("/"); for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2) printf("%08x%08x", t.tp.mask[i], t.tp.mask[i + 1]); printf("@%u\n", (t.tp.skip_ofst + t.tp.skip_len) * 8); } return (rc); } static int tracer_onoff(uint8_t idx, int enabled) { struct t4_tracer t; t.idx = idx; t.enabled = enabled; t.valid = 0; return doit(CHELSIO_T4_SET_TRACER, &t); } static void create_tracing_ifnet() { char *cmd[] = { "/sbin/ifconfig", __DECONST(char *, nexus), "create", NULL }; char *env[] = {NULL}; if (vfork() == 0) { close(STDERR_FILENO); execve(cmd[0], cmd, env); _exit(0); } } /* * XXX: Allow user to specify snaplen, minlen, and pattern (including inverted * matching). Right now this is a quick-n-dirty implementation that traces the * first 128B of all tx or rx on a port */ static int set_tracer(uint8_t idx, int argc, const char *argv[]) { struct t4_tracer t; int len, port; bzero(&t, sizeof (t)); t.idx = idx; t.enabled = 1; t.valid = 1; if (argc != 1) { warnx("must specify tx or rx."); return (EINVAL); } len = strlen(argv[0]); if (len != 3) { warnx("argument must be 3 characters (tx or rx)"); return (EINVAL); } if (strncmp(argv[0], "tx", 2) == 0) { port = argv[0][2] - '0'; if (port < 0 || port > 3) { warnx("'%c' in %s is invalid", argv[0][2], argv[0]); return (EINVAL); } port += 4; } else if (strncmp(argv[0], "rx", 2) == 0) { port = argv[0][2] - '0'; if (port < 0 || port > 3) { warnx("'%c' in %s is invalid", argv[0][2], argv[0]); return (EINVAL); } } else { warnx("argument '%s' isn't tx or rx", argv[0]); return (EINVAL); } t.tp.snap_len = 128; t.tp.min_len = 0; t.tp.skip_ofst = 0; t.tp.skip_len = 0; t.tp.invert = 0; t.tp.port = port; create_tracing_ifnet(); return doit(CHELSIO_T4_SET_TRACER, &t); } static int tracer_cmd(int argc, const char *argv[]) { long long val; uint8_t idx; char *s; if (argc == 0) { warnx("tracer: no arguments."); return (EINVAL); }; /* list */ if (strcmp(argv[0], "list") == 0) { if (argc != 1) warnx("trailing arguments after \"list\" ignored."); return show_tracers(); } /* ... */ s = str_to_number(argv[0], NULL, &val); if (*s || val > 0xff) { warnx("\"%s\" is neither an index nor a tracer subcommand.", argv[0]); return (EINVAL); } idx = (int8_t)val; /* disable */ if (argc == 2 && strcmp(argv[1], "disable") == 0) return tracer_onoff(idx, 0); /* enable */ if (argc == 2 && strcmp(argv[1], "enable") == 0) return tracer_onoff(idx, 1); /* ... */ return set_tracer(idx, argc - 1, argv + 1); } static int modinfo_raw(int port_id) { uint8_t offset; struct t4_i2c_data i2cd; int rc; for (offset = 0; offset < 96; offset += sizeof(i2cd.data)) { bzero(&i2cd, sizeof(i2cd)); i2cd.port_id = port_id; i2cd.dev_addr = 0xa0; i2cd.offset = offset; i2cd.len = sizeof(i2cd.data); rc = doit(CHELSIO_T4_GET_I2C, &i2cd); if (rc != 0) return (rc); printf("%02x: %02x %02x %02x %02x %02x %02x %02x %02x", offset, i2cd.data[0], i2cd.data[1], i2cd.data[2], i2cd.data[3], i2cd.data[4], i2cd.data[5], i2cd.data[6], i2cd.data[7]); printf(" %c%c%c%c %c%c%c%c\n", isprint(i2cd.data[0]) ? i2cd.data[0] : '.', isprint(i2cd.data[1]) ? i2cd.data[1] : '.', isprint(i2cd.data[2]) ? i2cd.data[2] : '.', isprint(i2cd.data[3]) ? i2cd.data[3] : '.', isprint(i2cd.data[4]) ? i2cd.data[4] : '.', isprint(i2cd.data[5]) ? i2cd.data[5] : '.', isprint(i2cd.data[6]) ? i2cd.data[6] : '.', isprint(i2cd.data[7]) ? i2cd.data[7] : '.'); } return (0); } static int modinfo(int argc, const char *argv[]) { long port; char string[16], *p; struct t4_i2c_data i2cd; int rc, i; uint16_t temp, vcc, tx_bias, tx_power, rx_power; if (argc < 1) { warnx("must supply a port"); return (EINVAL); } if (argc > 2) { warnx("too many arguments"); return (EINVAL); } p = str_to_number(argv[0], &port, NULL); if (*p || port > UCHAR_MAX) { warnx("invalid port id \"%s\"", argv[0]); return (EINVAL); } if (argc == 2) { if (!strcmp(argv[1], "raw")) return (modinfo_raw(port)); else { warnx("second argument can only be \"raw\""); return (EINVAL); } } bzero(&i2cd, sizeof(i2cd)); i2cd.len = 1; i2cd.port_id = port; i2cd.dev_addr = SFF_8472_BASE; i2cd.offset = SFF_8472_ID; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; if (i2cd.data[0] > SFF_8472_ID_LAST) printf("Unknown ID\n"); else printf("ID: %s\n", sff_8472_id[i2cd.data[0]]); bzero(&string, sizeof(string)); for (i = SFF_8472_VENDOR_START; i < SFF_8472_VENDOR_END; i++) { i2cd.offset = i; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; string[i - SFF_8472_VENDOR_START] = i2cd.data[0]; } printf("Vendor %s\n", string); bzero(&string, sizeof(string)); for (i = SFF_8472_SN_START; i < SFF_8472_SN_END; i++) { i2cd.offset = i; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; string[i - SFF_8472_SN_START] = i2cd.data[0]; } printf("SN %s\n", string); bzero(&string, sizeof(string)); for (i = SFF_8472_PN_START; i < SFF_8472_PN_END; i++) { i2cd.offset = i; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; string[i - SFF_8472_PN_START] = i2cd.data[0]; } printf("PN %s\n", string); bzero(&string, sizeof(string)); for (i = SFF_8472_REV_START; i < SFF_8472_REV_END; i++) { i2cd.offset = i; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; string[i - SFF_8472_REV_START] = i2cd.data[0]; } printf("Rev %s\n", string); i2cd.offset = SFF_8472_DIAG_TYPE; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; if ((char )i2cd.data[0] & (SFF_8472_DIAG_IMPL | SFF_8472_DIAG_INTERNAL)) { /* Switch to reading from the Diagnostic address. */ i2cd.dev_addr = SFF_8472_DIAG; i2cd.len = 1; i2cd.offset = SFF_8472_TEMP; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; temp = i2cd.data[0] << 8; printf("Temp: "); if ((temp & SFF_8472_TEMP_SIGN) == SFF_8472_TEMP_SIGN) printf("-"); else printf("+"); printf("%dC\n", (temp & SFF_8472_TEMP_MSK) >> SFF_8472_TEMP_SHIFT); i2cd.offset = SFF_8472_VCC; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; vcc = i2cd.data[0] << 8; printf("Vcc %fV\n", vcc / SFF_8472_VCC_FACTOR); i2cd.offset = SFF_8472_TX_BIAS; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; tx_bias = i2cd.data[0] << 8; printf("TX Bias %fuA\n", tx_bias / SFF_8472_BIAS_FACTOR); i2cd.offset = SFF_8472_TX_POWER; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; tx_power = i2cd.data[0] << 8; printf("TX Power %fmW\n", tx_power / SFF_8472_POWER_FACTOR); i2cd.offset = SFF_8472_RX_POWER; if ((rc = doit(CHELSIO_T4_GET_I2C, &i2cd)) != 0) goto fail; rx_power = i2cd.data[0] << 8; printf("RX Power %fmW\n", rx_power / SFF_8472_POWER_FACTOR); } else printf("Diagnostics not supported.\n"); return(0); fail: if (rc == EPERM) warnx("No module/cable in port %ld", port); return (rc); } /* XXX: pass in a low/high and do range checks as well */ static int get_sched_param(const char *param, const char *args[], long *val) { char *p; if (strcmp(param, args[0]) != 0) return (EINVAL); p = str_to_number(args[1], val, NULL); if (*p) { warnx("parameter \"%s\" has bad value \"%s\"", args[0], args[1]); return (EINVAL); } return (0); } static int sched_class(int argc, const char *argv[]) { struct t4_sched_params op; int errs, i; memset(&op, 0xff, sizeof(op)); op.subcmd = -1; op.type = -1; if (argc == 0) { warnx("missing scheduling sub-command"); return (EINVAL); } if (!strcmp(argv[0], "config")) { op.subcmd = SCHED_CLASS_SUBCMD_CONFIG; op.u.config.minmax = -1; } else if (!strcmp(argv[0], "params")) { op.subcmd = SCHED_CLASS_SUBCMD_PARAMS; op.u.params.level = op.u.params.mode = op.u.params.rateunit = op.u.params.ratemode = op.u.params.channel = op.u.params.cl = op.u.params.minrate = op.u.params.maxrate = op.u.params.weight = op.u.params.pktsize = -1; } else { warnx("invalid scheduling sub-command \"%s\"", argv[0]); return (EINVAL); } /* Decode remaining arguments ... */ errs = 0; for (i = 1; i < argc; i += 2) { const char **args = &argv[i]; long l; if (i + 1 == argc) { warnx("missing argument for \"%s\"", args[0]); errs++; break; } if (!strcmp(args[0], "type")) { if (!strcmp(args[1], "packet")) op.type = SCHED_CLASS_TYPE_PACKET; else { warnx("invalid type parameter \"%s\"", args[1]); errs++; } continue; } if (op.subcmd == SCHED_CLASS_SUBCMD_CONFIG) { if(!get_sched_param("minmax", args, &l)) op.u.config.minmax = (int8_t)l; else { warnx("unknown scheduler config parameter " "\"%s\"", args[0]); errs++; } continue; } /* Rest applies only to SUBCMD_PARAMS */ if (op.subcmd != SCHED_CLASS_SUBCMD_PARAMS) continue; if (!strcmp(args[0], "level")) { if (!strcmp(args[1], "cl-rl")) op.u.params.level = SCHED_CLASS_LEVEL_CL_RL; else if (!strcmp(args[1], "cl-wrr")) op.u.params.level = SCHED_CLASS_LEVEL_CL_WRR; else if (!strcmp(args[1], "ch-rl")) op.u.params.level = SCHED_CLASS_LEVEL_CH_RL; else { warnx("invalid level parameter \"%s\"", args[1]); errs++; } } else if (!strcmp(args[0], "mode")) { if (!strcmp(args[1], "class")) op.u.params.mode = SCHED_CLASS_MODE_CLASS; else if (!strcmp(args[1], "flow")) op.u.params.mode = SCHED_CLASS_MODE_FLOW; else { warnx("invalid mode parameter \"%s\"", args[1]); errs++; } } else if (!strcmp(args[0], "rate-unit")) { if (!strcmp(args[1], "bits")) op.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS; else if (!strcmp(args[1], "pkts")) op.u.params.rateunit = SCHED_CLASS_RATEUNIT_PKTS; else { warnx("invalid rate-unit parameter \"%s\"", args[1]); errs++; } } else if (!strcmp(args[0], "rate-mode")) { if (!strcmp(args[1], "relative")) op.u.params.ratemode = SCHED_CLASS_RATEMODE_REL; else if (!strcmp(args[1], "absolute")) op.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS; else { warnx("invalid rate-mode parameter \"%s\"", args[1]); errs++; } } else if (!get_sched_param("channel", args, &l)) op.u.params.channel = (int8_t)l; else if (!get_sched_param("class", args, &l)) op.u.params.cl = (int8_t)l; else if (!get_sched_param("min-rate", args, &l)) op.u.params.minrate = (int32_t)l; else if (!get_sched_param("max-rate", args, &l)) op.u.params.maxrate = (int32_t)l; else if (!get_sched_param("weight", args, &l)) op.u.params.weight = (int16_t)l; else if (!get_sched_param("pkt-size", args, &l)) op.u.params.pktsize = (int16_t)l; else { warnx("unknown scheduler parameter \"%s\"", args[0]); errs++; } } /* * Catch some logical fallacies in terms of argument combinations here * so we can offer more than just the EINVAL return from the driver. * The driver will be able to catch a lot more issues since it knows * the specifics of the device hardware capabilities like how many * channels, classes, etc. the device supports. */ if (op.type < 0) { warnx("sched \"type\" parameter missing"); errs++; } if (op.subcmd == SCHED_CLASS_SUBCMD_CONFIG) { if (op.u.config.minmax < 0) { warnx("sched config \"minmax\" parameter missing"); errs++; } } if (op.subcmd == SCHED_CLASS_SUBCMD_PARAMS) { if (op.u.params.level < 0) { warnx("sched params \"level\" parameter missing"); errs++; } if (op.u.params.mode < 0) { warnx("sched params \"mode\" parameter missing"); errs++; } if (op.u.params.rateunit < 0) { warnx("sched params \"rate-unit\" parameter missing"); errs++; } if (op.u.params.ratemode < 0) { warnx("sched params \"rate-mode\" parameter missing"); errs++; } if (op.u.params.channel < 0) { warnx("sched params \"channel\" missing"); errs++; } if (op.u.params.cl < 0) { warnx("sched params \"class\" missing"); errs++; } if (op.u.params.maxrate < 0 && (op.u.params.level == SCHED_CLASS_LEVEL_CL_RL || op.u.params.level == SCHED_CLASS_LEVEL_CH_RL)) { warnx("sched params \"max-rate\" missing for " "rate-limit level"); errs++; } if (op.u.params.weight < 0 && op.u.params.level == SCHED_CLASS_LEVEL_CL_WRR) { warnx("sched params \"weight\" missing for " "weighted-round-robin level"); errs++; } if (op.u.params.pktsize < 0 && (op.u.params.level == SCHED_CLASS_LEVEL_CL_RL || op.u.params.level == SCHED_CLASS_LEVEL_CH_RL)) { warnx("sched params \"pkt-size\" missing for " "rate-limit level"); errs++; } if (op.u.params.mode == SCHED_CLASS_MODE_FLOW && op.u.params.ratemode != SCHED_CLASS_RATEMODE_ABS) { warnx("sched params mode flow needs rate-mode absolute"); errs++; } if (op.u.params.ratemode == SCHED_CLASS_RATEMODE_REL && !in_range(op.u.params.maxrate, 1, 100)) { warnx("sched params \"max-rate\" takes " "percentage value(1-100) for rate-mode relative"); errs++; } if (op.u.params.ratemode == SCHED_CLASS_RATEMODE_ABS && - !in_range(op.u.params.maxrate, 1, 10000000)) { + !in_range(op.u.params.maxrate, 1, 100000000)) { warnx("sched params \"max-rate\" takes " - "value(1-10000000) for rate-mode absolute"); + "value(1-100000000) for rate-mode absolute"); errs++; } if (op.u.params.maxrate > 0 && op.u.params.maxrate < op.u.params.minrate) { warnx("sched params \"max-rate\" is less than " "\"min-rate\""); errs++; } } if (errs > 0) { warnx("%d error%s in sched-class command", errs, errs == 1 ? "" : "s"); return (EINVAL); } return doit(CHELSIO_T4_SCHED_CLASS, &op); } static int sched_queue(int argc, const char *argv[]) { struct t4_sched_queue op = {0}; char *p; long val; if (argc != 3) { /* need " */ warnx("incorrect number of arguments."); return (EINVAL); } p = str_to_number(argv[0], &val, NULL); if (*p || val > UCHAR_MAX) { warnx("invalid port id \"%s\"", argv[0]); return (EINVAL); } op.port = (uint8_t)val; if (!strcmp(argv[1], "all") || !strcmp(argv[1], "*")) op.queue = -1; else { p = str_to_number(argv[1], &val, NULL); if (*p || val < -1) { warnx("invalid queue \"%s\"", argv[1]); return (EINVAL); } op.queue = (int8_t)val; } if (!strcmp(argv[2], "unbind") || !strcmp(argv[2], "clear")) op.cl = -1; else { p = str_to_number(argv[2], &val, NULL); if (*p || val < -1) { warnx("invalid class \"%s\"", argv[2]); return (EINVAL); } op.cl = (int8_t)val; } return doit(CHELSIO_T4_SCHED_QUEUE, &op); } static int run_cmd(int argc, const char *argv[]) { int rc = -1; const char *cmd = argv[0]; /* command */ argc--; argv++; if (!strcmp(cmd, "reg") || !strcmp(cmd, "reg32")) rc = register_io(argc, argv, 4); else if (!strcmp(cmd, "reg64")) rc = register_io(argc, argv, 8); else if (!strcmp(cmd, "regdump")) rc = dump_regs(argc, argv); else if (!strcmp(cmd, "filter")) rc = filter_cmd(argc, argv); else if (!strcmp(cmd, "context")) rc = get_sge_context(argc, argv); else if (!strcmp(cmd, "loadfw")) rc = loadfw(argc, argv); else if (!strcmp(cmd, "memdump")) rc = memdump(argc, argv); else if (!strcmp(cmd, "tcb")) rc = read_tcb(argc, argv); else if (!strcmp(cmd, "i2c")) rc = read_i2c(argc, argv); else if (!strcmp(cmd, "clearstats")) rc = clearstats(argc, argv); else if (!strcmp(cmd, "tracer")) rc = tracer_cmd(argc, argv); else if (!strcmp(cmd, "modinfo")) rc = modinfo(argc, argv); else if (!strcmp(cmd, "sched-class")) rc = sched_class(argc, argv); else if (!strcmp(cmd, "sched-queue")) rc = sched_queue(argc, argv); else { rc = EINVAL; warnx("invalid command \"%s\"", cmd); } return (rc); } #define MAX_ARGS 15 static int run_cmd_loop(void) { int i, rc = 0; char buffer[128], *buf; const char *args[MAX_ARGS + 1]; /* * Simple loop: displays a "> " prompt and processes any input as a * cxgbetool command. You're supposed to enter only the part after * "cxgbetool t4nexX". Use "quit" or "exit" to exit. */ for (;;) { fprintf(stdout, "> "); fflush(stdout); buf = fgets(buffer, sizeof(buffer), stdin); if (buf == NULL) { if (ferror(stdin)) { warn("stdin error"); rc = errno; /* errno from fgets */ } break; } i = 0; while ((args[i] = strsep(&buf, " \t\n")) != NULL) { if (args[i][0] != 0 && ++i == MAX_ARGS) break; } args[i] = 0; if (i == 0) continue; /* skip empty line */ if (!strcmp(args[0], "quit") || !strcmp(args[0], "exit")) break; rc = run_cmd(i, args); } /* rc normally comes from the last command (not including quit/exit) */ return (rc); } int main(int argc, const char *argv[]) { int rc = -1; progname = argv[0]; if (argc == 2) { if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { usage(stdout); exit(0); } } if (argc < 3) { usage(stderr); exit(EINVAL); } nexus = argv[1]; /* progname and nexus */ argc -= 2; argv += 2; if (argc == 1 && !strcmp(argv[0], "stdio")) rc = run_cmd_loop(); else rc = run_cmd(argc, argv); return (rc); } Index: projects/vnet/usr.bin/indent/lexi.c =================================================================== --- projects/vnet/usr.bin/indent/lexi.c (revision 301522) +++ projects/vnet/usr.bin/indent/lexi.c (revision 301523) @@ -1,608 +1,608 @@ /* * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" #define alphanum 1 #define opchar 3 struct templ { const char *rwd; int rwcode; }; struct templ specials[1000] = { {"switch", 1}, {"case", 2}, {"break", 0}, {"struct", 3}, {"union", 3}, {"enum", 3}, {"default", 2}, {"int", 4}, {"char", 4}, {"float", 4}, {"double", 4}, {"long", 4}, {"short", 4}, - {"typdef", 4}, + {"typedef", 4}, {"unsigned", 4}, {"register", 4}, {"static", 4}, {"global", 4}, {"extern", 4}, {"void", 4}, {"const", 4}, {"volatile", 4}, {"goto", 0}, {"return", 0}, {"if", 5}, {"while", 5}, {"for", 5}, {"else", 6}, {"do", 6}, {"sizeof", 7}, {0, 0} }; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; int lexi(void) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; ps.col_1 = ps.last_nl; /* tell world that this token started in * column 1 iff the last thing scanned was nl */ ps.last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ps.col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { /* * we have a character or number */ const char *j; /* used for searching thru list of * * reserved words */ struct templ *p; if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { int seendot = 0, seenexp = 0, seensfx = 0; if (*buf_ptr == '0' && (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; while (isxdigit(*buf_ptr)) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; } } else while (1) { if (*buf_ptr == '.') { if (seendot) break; else seendot++; } CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (!isdigit(*buf_ptr) && *buf_ptr != '.') { if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') *e_token++ = *buf_ptr++; } } } while (1) { if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; seensfx |= 1; continue; } if (!(seensfx & 2) && (*buf_ptr == 'L' || *buf_ptr == 'l')) { CHECK_SIZE_TOKEN; if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; seensfx |= 2; continue; } break; } } else while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { buf_ptr += 2; if (buf_ptr >= buf_end) fill_buffer(); } else break; } CHECK_SIZE_TOKEN; /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } *e_token++ = '\0'; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } ps.its_a_keyword = false; ps.sizeof_keyword = false; if (l_struct && !ps.p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ l_struct = false; last_code = ident; ps.last_u_d = true; return (decl); } ps.last_u_d = l_struct; /* Operator after identifier is binary * unless last token was 'struct' */ l_struct = false; last_code = ident; /* Remember that this is the code we will * return */ if (auto_typedefs) { const char *q = s_token; size_t q_len = strlen(q); /* Check if we have an "_t" in the end */ if (q_len > 2 && (strcmp(q + q_len - 2, "_t") == 0)) { ps.its_a_keyword = true; ps.last_u_d = true; goto found_auto_typedef; } } /* * This loop will check if the token is a keyword. */ for (p = specials; (j = p->rwd) != 0; p++) { const char *q = s_token; /* point at scanned token */ if (*j++ != *q++ || *j++ != *q++) continue; /* This test depends on the fact that * identifiers are always at least 1 character * long (ie. the first two bytes of the * identifier are always meaningful) */ if (q[-1] == 0) break; /* If its a one-character identifier */ while (*q++ == *j) if (*j++ == 0) goto found_keyword; /* I wish that C had a multi-level * break... */ } if (p->rwd) { /* we have a keyword */ found_keyword: ps.its_a_keyword = true; ps.last_u_d = true; switch (p->rwcode) { case 1: /* it is a switch */ return (swstmt); case 2: /* a case or default */ return (casestmt); case 3: /* a "struct" */ /* * Next time around, we will want to know that we have had a * 'struct' */ l_struct = true; /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ found_auto_typedef: if (ps.p_l_follow) { ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; break; /* inside parens: cast, param list or sizeof */ } last_code = decl; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); case 7: ps.sizeof_keyword = true; default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; strncpy(ps.procname, token, sizeof ps.procname - 1); ps.in_parameter_declaration = 1; rparen_count = 1; not_proc:; } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') && !ps.p_l_follow && !ps.block_init && (ps.last_token == rparen || ps.last_token == semicolon || ps.last_token == decl || ps.last_token == lbrace || ps.last_token == rbrace)) { ps.its_a_keyword = true; ps.last_u_d = true; last_code = decl; return decl; } if (last_code == decl) /* if this is a declared variable, then * following sign is unary */ ps.last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = ps.last_u_d; ps.last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* * if data has been exhausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; if (troff) { e_token[-1] = '`'; if (qchar == '"') *e_token++ = '`'; e_token = chfont(&bodyf, &stringf, e_token); } do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, * since CHECK_SIZE guarantees that there * are at least 5 entries left */ *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; if (troff) { *++e_token = BACKSLASH; if (*buf_ptr == BACKSLASH) *++e_token = BACKSLASH; } *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); } else break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); if (troff) { e_token = chfont(&stringf, &bodyf, e_token - 1); if (qchar == '"') *e_token++ = '\''; } stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = ps.last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* * if (ps.in_or_st) ps.block_init = 1; */ /* ? code = ps.block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; /* ? code = ps.block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ unary_delim = ps.last_u_d; ps.last_nl = true; /* remember this so we can set 'ps.col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (ps.last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ *e_token++ = *buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; if (!pointer_as_binop) { unary_delim = false; code = unary_op; ps.want_blank = false; } } break; /* buffer overflow will be checked at end of * switch */ case '=': if (ps.in_or_st) ps.block_init = 1; #ifdef undef if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ e_token[-1] = *buf_ptr++; if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) *e_token++ = *buf_ptr++; *e_token++ = '='; /* Flip =+ to += */ *e_token = 0; } #else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } #endif code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *e_token++ = '*'; if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = ps.last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); ps.last_u_d = unary_delim; *e_token = '\0'; /* null terminate the token */ return (code); } /* * Add the given keyword to the keyword table, using val as the keyword type */ void addkey(char *key, int val) { struct templ *p = specials; while (p->rwd) if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) return; else p++; if (p >= specials + sizeof specials / sizeof specials[0]) return; /* For now, table overflows are silently * ignored */ p->rwd = key; p->rwcode = val; p[1].rwd = 0; p[1].rwcode = 0; } Index: projects/vnet =================================================================== --- projects/vnet (revision 301522) +++ projects/vnet (revision 301523) Property changes on: projects/vnet ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r301509-301522