Index: user/attilio/vmcontention/crypto/openssh/sshd_config =================================================================== --- user/attilio/vmcontention/crypto/openssh/sshd_config (revision 252343) +++ user/attilio/vmcontention/crypto/openssh/sshd_config (revision 252344) @@ -1,142 +1,142 @@ # $OpenBSD: sshd_config,v 1.89 2013/02/06 00:20:42 dtucker Exp $ # $FreeBSD$ # This is the sshd server system-wide configuration file. See # sshd_config(5) for more information. # This sshd was compiled with PATH=/usr/bin:/bin:/usr/sbin:/sbin # The strategy used for options in the default sshd_config shipped with # OpenSSH is to specify options with their default value where # possible, but leave them commented. Uncommented options override the # default value. # Note that some of FreeBSD's defaults differ from OpenBSD's, and # FreeBSD has a few additional options. #Port 22 #AddressFamily any #ListenAddress 0.0.0.0 #ListenAddress :: # The default requires explicit activation of protocol 1 #Protocol 2 # HostKey for protocol version 1 #HostKey /etc/ssh/ssh_host_key # HostKeys for protocol version 2 #HostKey /etc/ssh/ssh_host_rsa_key #HostKey /etc/ssh/ssh_host_dsa_key #HostKey /etc/ssh/ssh_host_ecdsa_key # Lifetime and size of ephemeral version 1 server key #KeyRegenerationInterval 1h #ServerKeyBits 1024 # Logging # obsoletes QuietMode and FascistLogging #SyslogFacility AUTH #LogLevel INFO # Authentication: #LoginGraceTime 2m #PermitRootLogin no #StrictModes yes #MaxAuthTries 6 #MaxSessions 10 #RSAAuthentication yes #PubkeyAuthentication yes # The default is to check both .ssh/authorized_keys and .ssh/authorized_keys2 #AuthorizedKeysFile .ssh/authorized_keys .ssh/authorized_keys2 #AuthorizedPrincipalsFile none #AuthorizedKeysCommand none #AuthorizedKeysCommandUser nobody # For this to work you will also need host keys in /etc/ssh/ssh_known_hosts #RhostsRSAAuthentication no # similar for protocol version 2 #HostbasedAuthentication no # Change to yes if you don't trust ~/.ssh/known_hosts for # RhostsRSAAuthentication and HostbasedAuthentication #IgnoreUserKnownHosts no # Don't read the user's ~/.rhosts and ~/.shosts files #IgnoreRhosts yes # Change to yes to enable built-in password authentication. #PasswordAuthentication no #PermitEmptyPasswords no # Change to no to disable PAM authentication #ChallengeResponseAuthentication yes # Kerberos options #KerberosAuthentication no #KerberosOrLocalPasswd yes #KerberosTicketCleanup yes #KerberosGetAFSToken no # GSSAPI options #GSSAPIAuthentication no #GSSAPICleanupCredentials yes # Set this to 'no' to disable PAM authentication, account processing, # and session processing. If this is enabled, PAM authentication will # be allowed through the ChallengeResponseAuthentication and # PasswordAuthentication. Depending on your PAM configuration, # PAM authentication via ChallengeResponseAuthentication may bypass # the setting of "PermitRootLogin without-password". # If you just want the PAM account and session checks to run without # PAM authentication, then enable this but set PasswordAuthentication # and ChallengeResponseAuthentication to 'no'. #UsePAM yes #AllowAgentForwarding yes #AllowTcpForwarding yes #GatewayPorts no #X11Forwarding yes #X11DisplayOffset 10 #X11UseLocalhost yes #PrintMotd yes #PrintLastLog yes #TCPKeepAlive yes #UseLogin no -#UsePrivilegeSeparation sandbox +#UsePrivilegeSeparation yes #PermitUserEnvironment no #Compression delayed #ClientAliveInterval 0 #ClientAliveCountMax 3 #UseDNS yes #PidFile /var/run/sshd.pid #MaxStartups 10:30:100 #PermitTunnel no #ChrootDirectory none #VersionAddendum FreeBSD-20130515 # no default banner path #Banner none # override default of no subsystems Subsystem sftp /usr/libexec/sftp-server # Disable HPN tuning improvements. #HPNDisabled no # Buffer size for HPN to non-HPN connections. #HPNBufferSize 2048 # TCP receive socket buffer polling for HPN. Disable on non autotuning kernels. #TcpRcvBufPoll yes # Allow the use of the NONE cipher. #NoneEnabled no # Example of overriding settings on a per-user basis #Match User anoncvs # X11Forwarding no # AllowTcpForwarding no # ForceCommand cvs server Index: user/attilio/vmcontention/crypto/openssh/sshd_config.5 =================================================================== --- user/attilio/vmcontention/crypto/openssh/sshd_config.5 (revision 252343) +++ user/attilio/vmcontention/crypto/openssh/sshd_config.5 (revision 252344) @@ -1,1317 +1,1317 @@ .\" .\" Author: Tatu Ylonen .\" Copyright (c) 1995 Tatu Ylonen , Espoo, Finland .\" All rights reserved .\" .\" As far as I am concerned, the code I have written for this software .\" can be used freely for any purpose. Any derived versions of this .\" software must be clearly marked as such, and if the derived work is .\" incompatible with the protocol description in the RFC file, it must be .\" called by a name other than "ssh" or "Secure Shell". .\" .\" Copyright (c) 1999,2000 Markus Friedl. All rights reserved. .\" Copyright (c) 1999 Aaron Campbell. All rights reserved. .\" Copyright (c) 1999 Theo de Raadt. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $OpenBSD: sshd_config.5,v 1.156 2013/02/06 00:20:42 dtucker Exp $ .\" $FreeBSD$ .Dd February 6, 2013 .Dt SSHD_CONFIG 5 .Os .Sh NAME .Nm sshd_config .Nd OpenSSH SSH daemon configuration file .Sh SYNOPSIS .Nm /etc/ssh/sshd_config .Sh DESCRIPTION .Xr sshd 8 reads configuration data from .Pa /etc/ssh/sshd_config (or the file specified with .Fl f on the command line). The file contains keyword-argument pairs, one per line. Lines starting with .Ql # and empty lines are interpreted as comments. Arguments may optionally be enclosed in double quotes .Pq \&" in order to represent arguments containing spaces. .Pp The possible keywords and their meanings are as follows (note that keywords are case-insensitive and arguments are case-sensitive): .Bl -tag -width Ds .It Cm AcceptEnv Specifies what environment variables sent by the client will be copied into the session's .Xr environ 7 . See .Cm SendEnv in .Xr ssh_config 5 for how to configure the client. Note that environment passing is only supported for protocol 2. Variables are specified by name, which may contain the wildcard characters .Ql * and .Ql \&? . Multiple environment variables may be separated by whitespace or spread across multiple .Cm AcceptEnv directives. Be warned that some environment variables could be used to bypass restricted user environments. For this reason, care should be taken in the use of this directive. The default is not to accept any environment variables. .It Cm AddressFamily Specifies which address family should be used by .Xr sshd 8 . Valid arguments are .Dq any , .Dq inet (use IPv4 only), or .Dq inet6 (use IPv6 only). The default is .Dq any . .It Cm AllowAgentForwarding Specifies whether .Xr ssh-agent 1 forwarding is permitted. The default is .Dq yes . Note that disabling agent forwarding does not improve security unless users are also denied shell access, as they can always install their own forwarders. .It Cm AllowGroups This keyword can be followed by a list of group name patterns, separated by spaces. If specified, login is allowed only for users whose primary group or supplementary group list matches one of the patterns. Only group names are valid; a numerical group ID is not recognized. By default, login is allowed for all groups. The allow/deny directives are processed in the following order: .Cm DenyUsers , .Cm AllowUsers , .Cm DenyGroups , and finally .Cm AllowGroups . .Pp See .Sx PATTERNS in .Xr ssh_config 5 for more information on patterns. .It Cm AllowTcpForwarding Specifies whether TCP forwarding is permitted. The available options are .Dq yes or .Dq all to allow TCP forwarding, .Dq no to prevent all TCP forwarding, .Dq local to allow local (from the perspective of .Xr ssh 1 ) forwarding only or .Dq remote to allow remote forwarding only. The default is .Dq yes . Note that disabling TCP forwarding does not improve security unless users are also denied shell access, as they can always install their own forwarders. .It Cm AllowUsers This keyword can be followed by a list of user name patterns, separated by spaces. If specified, login is allowed only for user names that match one of the patterns. Only user names are valid; a numerical user ID is not recognized. By default, login is allowed for all users. If the pattern takes the form USER@HOST then USER and HOST are separately checked, restricting logins to particular users from particular hosts. The allow/deny directives are processed in the following order: .Cm DenyUsers , .Cm AllowUsers , .Cm DenyGroups , and finally .Cm AllowGroups . .Pp See .Sx PATTERNS in .Xr ssh_config 5 for more information on patterns. .It Cm AuthenticationMethods Specifies the authentication methods that must be successfully completed for a user to be granted access. This option must be followed by one or more comma-separated lists of authentication method names. Successful authentication requires completion of every method in at least one of these lists. .Pp For example, an argument of .Dq publickey,password publickey,keyboard-interactive would require the user to complete public key authentication, followed by either password or keyboard interactive authentication. Only methods that are next in one or more lists are offered at each stage, so for this example, it would not be possible to attempt password or keyboard-interactive authentication before public key. .Pp This option is only available for SSH protocol 2 and will yield a fatal error if enabled if protocol 1 is also enabled. Note that each authentication method listed should also be explicitly enabled in the configuration. The default is not to require multiple authentication; successful completion of a single authentication method is sufficient. .It Cm AuthorizedKeysCommand Specifies a program to be used to look up the user's public keys. The program will be invoked with a single argument of the username being authenticated, and should produce on standard output zero or more lines of authorized_keys output (see .Sx AUTHORIZED_KEYS in .Xr sshd 8 ) . If a key supplied by AuthorizedKeysCommand does not successfully authenticate and authorize the user then public key authentication continues using the usual .Cm AuthorizedKeysFile files. By default, no AuthorizedKeysCommand is run. .It Cm AuthorizedKeysCommandUser Specifies the user under whose account the AuthorizedKeysCommand is run. It is recommended to use a dedicated user that has no other role on the host than running authorized keys commands. .It Cm AuthorizedKeysFile Specifies the file that contains the public keys that can be used for user authentication. The format is described in the .Sx AUTHORIZED_KEYS FILE FORMAT section of .Xr sshd 8 . .Cm AuthorizedKeysFile may contain tokens of the form %T which are substituted during connection setup. The following tokens are defined: %% is replaced by a literal '%', %h is replaced by the home directory of the user being authenticated, and %u is replaced by the username of that user. After expansion, .Cm AuthorizedKeysFile is taken to be an absolute path or one relative to the user's home directory. Multiple files may be listed, separated by whitespace. The default is .Dq .ssh/authorized_keys .ssh/authorized_keys2 . .It Cm AuthorizedPrincipalsFile Specifies a file that lists principal names that are accepted for certificate authentication. When using certificates signed by a key listed in .Cm TrustedUserCAKeys , this file lists names, one of which must appear in the certificate for it to be accepted for authentication. Names are listed one per line preceded by key options (as described in .Sx AUTHORIZED_KEYS FILE FORMAT in .Xr sshd 8 ) . Empty lines and comments starting with .Ql # are ignored. .Pp .Cm AuthorizedPrincipalsFile may contain tokens of the form %T which are substituted during connection setup. The following tokens are defined: %% is replaced by a literal '%', %h is replaced by the home directory of the user being authenticated, and %u is replaced by the username of that user. After expansion, .Cm AuthorizedPrincipalsFile is taken to be an absolute path or one relative to the user's home directory. .Pp The default is .Dq none , i.e. not to use a principals file \(en in this case, the username of the user must appear in a certificate's principals list for it to be accepted. Note that .Cm AuthorizedPrincipalsFile is only used when authentication proceeds using a CA listed in .Cm TrustedUserCAKeys and is not consulted for certification authorities trusted via .Pa ~/.ssh/authorized_keys , though the .Cm principals= key option offers a similar facility (see .Xr sshd 8 for details). .It Cm Banner The contents of the specified file are sent to the remote user before authentication is allowed. If the argument is .Dq none then no banner is displayed. This option is only available for protocol version 2. By default, no banner is displayed. .It Cm ChallengeResponseAuthentication Specifies whether challenge-response authentication is allowed (e.g. via PAM or though authentication styles supported in .Xr login.conf 5 ) The default is .Dq yes . .It Cm ChrootDirectory Specifies the pathname of a directory to .Xr chroot 2 to after authentication. All components of the pathname must be root-owned directories that are not writable by any other user or group. After the chroot, .Xr sshd 8 changes the working directory to the user's home directory. .Pp The pathname may contain the following tokens that are expanded at runtime once the connecting user has been authenticated: %% is replaced by a literal '%', %h is replaced by the home directory of the user being authenticated, and %u is replaced by the username of that user. .Pp The .Cm ChrootDirectory must contain the necessary files and directories to support the user's session. For an interactive session this requires at least a shell, typically .Xr sh 1 , and basic .Pa /dev nodes such as .Xr null 4 , .Xr zero 4 , .Xr stdin 4 , .Xr stdout 4 , .Xr stderr 4 , .Xr arandom 4 and .Xr tty 4 devices. For file transfer sessions using .Dq sftp , no additional configuration of the environment is necessary if the in-process sftp server is used, though sessions which use logging do require .Pa /dev/log inside the chroot directory (see .Xr sftp-server 8 for details). .Pp The default is not to .Xr chroot 2 . .It Cm Ciphers Specifies the ciphers allowed for protocol version 2. Multiple ciphers must be comma-separated. The supported ciphers are .Dq 3des-cbc , .Dq aes128-cbc , .Dq aes192-cbc , .Dq aes256-cbc , .Dq aes128-ctr , .Dq aes192-ctr , .Dq aes256-ctr , .Dq aes128-gcm@openssh.com , .Dq aes256-gcm@openssh.com , .Dq arcfour128 , .Dq arcfour256 , .Dq arcfour , .Dq blowfish-cbc , and .Dq cast128-cbc . The default is: .Bd -literal -offset 3n aes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128, aes128-gcm@openssh.com,aes256-gcm@openssh.com, aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc, aes256-cbc,arcfour .Ed .It Cm ClientAliveCountMax Sets the number of client alive messages (see below) which may be sent without .Xr sshd 8 receiving any messages back from the client. If this threshold is reached while client alive messages are being sent, sshd will disconnect the client, terminating the session. It is important to note that the use of client alive messages is very different from .Cm TCPKeepAlive (below). The client alive messages are sent through the encrypted channel and therefore will not be spoofable. The TCP keepalive option enabled by .Cm TCPKeepAlive is spoofable. The client alive mechanism is valuable when the client or server depend on knowing when a connection has become inactive. .Pp The default value is 3. If .Cm ClientAliveInterval (see below) is set to 15, and .Cm ClientAliveCountMax is left at the default, unresponsive SSH clients will be disconnected after approximately 45 seconds. This option applies to protocol version 2 only. .It Cm ClientAliveInterval Sets a timeout interval in seconds after which if no data has been received from the client, .Xr sshd 8 will send a message through the encrypted channel to request a response from the client. The default is 0, indicating that these messages will not be sent to the client. This option applies to protocol version 2 only. .It Cm Compression Specifies whether compression is allowed, or delayed until the user has authenticated successfully. The argument must be .Dq yes , .Dq delayed , or .Dq no . The default is .Dq delayed . .It Cm DenyGroups This keyword can be followed by a list of group name patterns, separated by spaces. Login is disallowed for users whose primary group or supplementary group list matches one of the patterns. Only group names are valid; a numerical group ID is not recognized. By default, login is allowed for all groups. The allow/deny directives are processed in the following order: .Cm DenyUsers , .Cm AllowUsers , .Cm DenyGroups , and finally .Cm AllowGroups . .Pp See .Sx PATTERNS in .Xr ssh_config 5 for more information on patterns. .It Cm DenyUsers This keyword can be followed by a list of user name patterns, separated by spaces. Login is disallowed for user names that match one of the patterns. Only user names are valid; a numerical user ID is not recognized. By default, login is allowed for all users. If the pattern takes the form USER@HOST then USER and HOST are separately checked, restricting logins to particular users from particular hosts. The allow/deny directives are processed in the following order: .Cm DenyUsers , .Cm AllowUsers , .Cm DenyGroups , and finally .Cm AllowGroups . .Pp See .Sx PATTERNS in .Xr ssh_config 5 for more information on patterns. .It Cm ForceCommand Forces the execution of the command specified by .Cm ForceCommand , ignoring any command supplied by the client and .Pa ~/.ssh/rc if present. The command is invoked by using the user's login shell with the -c option. This applies to shell, command, or subsystem execution. It is most useful inside a .Cm Match block. The command originally supplied by the client is available in the .Ev SSH_ORIGINAL_COMMAND environment variable. Specifying a command of .Dq internal-sftp will force the use of an in-process sftp server that requires no support files when used with .Cm ChrootDirectory . .It Cm GatewayPorts Specifies whether remote hosts are allowed to connect to ports forwarded for the client. By default, .Xr sshd 8 binds remote port forwardings to the loopback address. This prevents other remote hosts from connecting to forwarded ports. .Cm GatewayPorts can be used to specify that sshd should allow remote port forwardings to bind to non-loopback addresses, thus allowing other hosts to connect. The argument may be .Dq no to force remote port forwardings to be available to the local host only, .Dq yes to force remote port forwardings to bind to the wildcard address, or .Dq clientspecified to allow the client to select the address to which the forwarding is bound. The default is .Dq no . .It Cm GSSAPIAuthentication Specifies whether user authentication based on GSSAPI is allowed. The default is .Dq no . Note that this option applies to protocol version 2 only. .It Cm GSSAPICleanupCredentials Specifies whether to automatically destroy the user's credentials cache on logout. The default is .Dq yes . Note that this option applies to protocol version 2 only. .It Cm HostbasedAuthentication Specifies whether rhosts or /etc/hosts.equiv authentication together with successful public key client host authentication is allowed (host-based authentication). This option is similar to .Cm RhostsRSAAuthentication and applies to protocol version 2 only. The default is .Dq no . .It Cm HostbasedUsesNameFromPacketOnly Specifies whether or not the server will attempt to perform a reverse name lookup when matching the name in the .Pa ~/.shosts , .Pa ~/.rhosts , and .Pa /etc/hosts.equiv files during .Cm HostbasedAuthentication . A setting of .Dq yes means that .Xr sshd 8 uses the name supplied by the client rather than attempting to resolve the name from the TCP connection itself. The default is .Dq no . .It Cm HostCertificate Specifies a file containing a public host certificate. The certificate's public key must match a private host key already specified by .Cm HostKey . The default behaviour of .Xr sshd 8 is not to load any certificates. .It Cm HostKey Specifies a file containing a private host key used by SSH. The default is .Pa /etc/ssh/ssh_host_key for protocol version 1, and .Pa /etc/ssh/ssh_host_dsa_key , .Pa /etc/ssh/ssh_host_ecdsa_key and .Pa /etc/ssh/ssh_host_rsa_key for protocol version 2. Note that .Xr sshd 8 will refuse to use a file if it is group/world-accessible. It is possible to have multiple host key files. .Dq rsa1 keys are used for version 1 and .Dq dsa , .Dq ecdsa or .Dq rsa are used for version 2 of the SSH protocol. .It Cm IgnoreRhosts Specifies that .Pa .rhosts and .Pa .shosts files will not be used in .Cm RhostsRSAAuthentication or .Cm HostbasedAuthentication . .Pp .Pa /etc/hosts.equiv and .Pa /etc/ssh/shosts.equiv are still used. The default is .Dq yes . .It Cm IgnoreUserKnownHosts Specifies whether .Xr sshd 8 should ignore the user's .Pa ~/.ssh/known_hosts during .Cm RhostsRSAAuthentication or .Cm HostbasedAuthentication . The default is .Dq no . .It Cm IPQoS Specifies the IPv4 type-of-service or DSCP class for the connection. Accepted values are .Dq af11 , .Dq af12 , .Dq af13 , .Dq af21 , .Dq af22 , .Dq af23 , .Dq af31 , .Dq af32 , .Dq af33 , .Dq af41 , .Dq af42 , .Dq af43 , .Dq cs0 , .Dq cs1 , .Dq cs2 , .Dq cs3 , .Dq cs4 , .Dq cs5 , .Dq cs6 , .Dq cs7 , .Dq ef , .Dq lowdelay , .Dq throughput , .Dq reliability , or a numeric value. This option may take one or two arguments, separated by whitespace. If one argument is specified, it is used as the packet class unconditionally. If two values are specified, the first is automatically selected for interactive sessions and the second for non-interactive sessions. The default is .Dq lowdelay for interactive sessions and .Dq throughput for non-interactive sessions. .It Cm KerberosAuthentication Specifies whether the password provided by the user for .Cm PasswordAuthentication will be validated through the Kerberos KDC. To use this option, the server needs a Kerberos servtab which allows the verification of the KDC's identity. The default is .Dq no . .It Cm KerberosGetAFSToken If AFS is active and the user has a Kerberos 5 TGT, attempt to acquire an AFS token before accessing the user's home directory. The default is .Dq no . .It Cm KerberosOrLocalPasswd If password authentication through Kerberos fails then the password will be validated via any additional local mechanism such as .Pa /etc/passwd . The default is .Dq yes . .It Cm KerberosTicketCleanup Specifies whether to automatically destroy the user's ticket cache file on logout. The default is .Dq yes . .It Cm KexAlgorithms Specifies the available KEX (Key Exchange) algorithms. Multiple algorithms must be comma-separated. The default is .Dq ecdh-sha2-nistp256 , .Dq ecdh-sha2-nistp384 , .Dq ecdh-sha2-nistp521 , .Dq diffie-hellman-group-exchange-sha256 , .Dq diffie-hellman-group-exchange-sha1 , .Dq diffie-hellman-group14-sha1 , .Dq diffie-hellman-group1-sha1 . .It Cm KeyRegenerationInterval In protocol version 1, the ephemeral server key is automatically regenerated after this many seconds (if it has been used). The purpose of regeneration is to prevent decrypting captured sessions by later breaking into the machine and stealing the keys. The key is never stored anywhere. If the value is 0, the key is never regenerated. The default is 3600 (seconds). .It Cm ListenAddress Specifies the local addresses .Xr sshd 8 should listen on. The following forms may be used: .Pp .Bl -item -offset indent -compact .It .Cm ListenAddress .Sm off .Ar host No | Ar IPv4_addr No | Ar IPv6_addr .Sm on .It .Cm ListenAddress .Sm off .Ar host No | Ar IPv4_addr No : Ar port .Sm on .It .Cm ListenAddress .Sm off .Oo .Ar host No | Ar IPv6_addr Oc : Ar port .Sm on .El .Pp If .Ar port is not specified, sshd will listen on the address and all prior .Cm Port options specified. The default is to listen on all local addresses. Multiple .Cm ListenAddress options are permitted. Additionally, any .Cm Port options must precede this option for non-port qualified addresses. .It Cm LoginGraceTime The server disconnects after this time if the user has not successfully logged in. If the value is 0, there is no time limit. The default is 120 seconds. .It Cm LogLevel Gives the verbosity level that is used when logging messages from .Xr sshd 8 . The possible values are: QUIET, FATAL, ERROR, INFO, VERBOSE, DEBUG, DEBUG1, DEBUG2, and DEBUG3. The default is INFO. DEBUG and DEBUG1 are equivalent. DEBUG2 and DEBUG3 each specify higher levels of debugging output. Logging with a DEBUG level violates the privacy of users and is not recommended. .It Cm MACs Specifies the available MAC (message authentication code) algorithms. The MAC algorithm is used in protocol version 2 for data integrity protection. Multiple algorithms must be comma-separated. The algorithms that contain .Dq -etm calculate the MAC after encryption (encrypt-then-mac). These are considered safer and their use recommended. The default is: .Bd -literal -offset indent hmac-md5-etm@openssh.com,hmac-sha1-etm@openssh.com, umac-64-etm@openssh.com,umac-128-etm@openssh.com, hmac-sha2-256-etm@openssh.com,hmac-sha2-512-etm@openssh.com, hmac-ripemd160-etm@openssh.com,hmac-sha1-96-etm@openssh.com, hmac-md5-96-etm@openssh.com, hmac-md5,hmac-sha1,umac-64@openssh.com,umac-128@openssh.com, hmac-sha2-256,hmac-sha2-512,hmac-ripemd160, hmac-sha1-96,hmac-md5-96 .Ed .It Cm Match Introduces a conditional block. If all of the criteria on the .Cm Match line are satisfied, the keywords on the following lines override those set in the global section of the config file, until either another .Cm Match line or the end of the file. .Pp The arguments to .Cm Match are one or more criteria-pattern pairs. The available criteria are .Cm User , .Cm Group , .Cm Host , .Cm LocalAddress , .Cm LocalPort , and .Cm Address . The match patterns may consist of single entries or comma-separated lists and may use the wildcard and negation operators described in the .Sx PATTERNS section of .Xr ssh_config 5 . .Pp The patterns in an .Cm Address criteria may additionally contain addresses to match in CIDR address/masklen format, e.g.\& .Dq 192.0.2.0/24 or .Dq 3ffe:ffff::/32 . Note that the mask length provided must be consistent with the address - it is an error to specify a mask length that is too long for the address or one with bits set in this host portion of the address. For example, .Dq 192.0.2.0/33 and .Dq 192.0.2.0/8 respectively. .Pp Only a subset of keywords may be used on the lines following a .Cm Match keyword. Available keywords are .Cm AcceptEnv , .Cm AllowAgentForwarding , .Cm AllowGroups , .Cm AllowTcpForwarding , .Cm AllowUsers , .Cm AuthenticationMethods , .Cm AuthorizedKeysCommand , .Cm AuthorizedKeysCommandUser , .Cm AuthorizedKeysFile , .Cm AuthorizedPrincipalsFile , .Cm Banner , .Cm ChrootDirectory , .Cm DenyGroups , .Cm DenyUsers , .Cm ForceCommand , .Cm GatewayPorts , .Cm GSSAPIAuthentication , .Cm HostbasedAuthentication , .Cm HostbasedUsesNameFromPacketOnly , .Cm KbdInteractiveAuthentication , .Cm KerberosAuthentication , .Cm MaxAuthTries , .Cm MaxSessions , .Cm PasswordAuthentication , .Cm PermitEmptyPasswords , .Cm PermitOpen , .Cm PermitRootLogin , .Cm PermitTunnel , .Cm PubkeyAuthentication , .Cm RhostsRSAAuthentication , .Cm RSAAuthentication , .Cm X11DisplayOffset , .Cm X11Forwarding and .Cm X11UseLocalHost . .It Cm MaxAuthTries Specifies the maximum number of authentication attempts permitted per connection. Once the number of failures reaches half this value, additional failures are logged. The default is 6. .It Cm MaxSessions Specifies the maximum number of open sessions permitted per network connection. The default is 10. .It Cm MaxStartups Specifies the maximum number of concurrent unauthenticated connections to the SSH daemon. Additional connections will be dropped until authentication succeeds or the .Cm LoginGraceTime expires for a connection. The default is 10:30:100. .Pp Alternatively, random early drop can be enabled by specifying the three colon separated values .Dq start:rate:full (e.g. "10:30:60"). .Xr sshd 8 will refuse connection attempts with a probability of .Dq rate/100 (30%) if there are currently .Dq start (10) unauthenticated connections. The probability increases linearly and all connection attempts are refused if the number of unauthenticated connections reaches .Dq full (60). .It Cm PasswordAuthentication Specifies whether password authentication is allowed. See also .Cm UsePAM . The default is .Dq no . .It Cm PermitEmptyPasswords When password authentication is allowed, it specifies whether the server allows login to accounts with empty password strings. The default is .Dq no . .It Cm PermitOpen Specifies the destinations to which TCP port forwarding is permitted. The forwarding specification must be one of the following forms: .Pp .Bl -item -offset indent -compact .It .Cm PermitOpen .Sm off .Ar host : port .Sm on .It .Cm PermitOpen .Sm off .Ar IPv4_addr : port .Sm on .It .Cm PermitOpen .Sm off .Ar \&[ IPv6_addr \&] : port .Sm on .El .Pp Multiple forwards may be specified by separating them with whitespace. An argument of .Dq any can be used to remove all restrictions and permit any forwarding requests. An argument of .Dq none can be used to prohibit all forwarding requests. By default all port forwarding requests are permitted. .It Cm PermitRootLogin Specifies whether root can log in using .Xr ssh 1 . The argument must be .Dq yes , .Dq without-password , .Dq forced-commands-only , or .Dq no . The default is .Dq no . Note that if .Cm ChallengeResponseAuthentication is .Dq yes , the root user may be allowed in with its password even if .Cm PermitRootLogin is set to .Dq without-password . .Pp If this option is set to .Dq without-password , password authentication is disabled for root. .Pp If this option is set to .Dq forced-commands-only , root login with public key authentication will be allowed, but only if the .Ar command option has been specified (which may be useful for taking remote backups even if root login is normally not allowed). All other authentication methods are disabled for root. .Pp If this option is set to .Dq no , root is not allowed to log in. .It Cm PermitTunnel Specifies whether .Xr tun 4 device forwarding is allowed. The argument must be .Dq yes , .Dq point-to-point (layer 3), .Dq ethernet (layer 2), or .Dq no . Specifying .Dq yes permits both .Dq point-to-point and .Dq ethernet . The default is .Dq no . .It Cm PermitUserEnvironment Specifies whether .Pa ~/.ssh/environment and .Cm environment= options in .Pa ~/.ssh/authorized_keys are processed by .Xr sshd 8 . The default is .Dq no . Enabling environment processing may enable users to bypass access restrictions in some configurations using mechanisms such as .Ev LD_PRELOAD . .It Cm PidFile Specifies the file that contains the process ID of the SSH daemon. The default is .Pa /var/run/sshd.pid . .It Cm Port Specifies the port number that .Xr sshd 8 listens on. The default is 22. Multiple options of this type are permitted. See also .Cm ListenAddress . .It Cm PrintLastLog Specifies whether .Xr sshd 8 should print the date and time of the last user login when a user logs in interactively. The default is .Dq yes . .It Cm PrintMotd Specifies whether .Xr sshd 8 should print .Pa /etc/motd when a user logs in interactively. (On some systems it is also printed by the shell, .Pa /etc/profile , or equivalent.) The default is .Dq yes . .It Cm Protocol Specifies the protocol versions .Xr sshd 8 supports. The possible values are .Sq 1 and .Sq 2 . Multiple versions must be comma-separated. The default is .Sq 2 . Note that the order of the protocol list does not indicate preference, because the client selects among multiple protocol versions offered by the server. Specifying .Dq 2,1 is identical to .Dq 1,2 . .It Cm PubkeyAuthentication Specifies whether public key authentication is allowed. The default is .Dq yes . Note that this option applies to protocol version 2 only. .It Cm RevokedKeys Specifies revoked public keys. Keys listed in this file will be refused for public key authentication. Note that if this file is not readable, then public key authentication will be refused for all users. Keys may be specified as a text file, listing one public key per line, or as an OpenSSH Key Revocation List (KRL) as generated by .Xr ssh-keygen 1 . For more information on KRLs, see the .Sx KEY REVOCATION LISTS section in .Xr ssh-keygen 1 . .It Cm RhostsRSAAuthentication Specifies whether rhosts or .Pa /etc/hosts.equiv authentication together with successful RSA host authentication is allowed. The default is .Dq no . This option applies to protocol version 1 only. .It Cm RSAAuthentication Specifies whether pure RSA authentication is allowed. The default is .Dq yes . This option applies to protocol version 1 only. .It Cm ServerKeyBits Defines the number of bits in the ephemeral protocol version 1 server key. The minimum value is 512, and the default is 1024. .It Cm StrictModes Specifies whether .Xr sshd 8 should check file modes and ownership of the user's files and home directory before accepting login. This is normally desirable because novices sometimes accidentally leave their directory or files world-writable. The default is .Dq yes . Note that this does not apply to .Cm ChrootDirectory , whose permissions and ownership are checked unconditionally. .It Cm Subsystem Configures an external subsystem (e.g. file transfer daemon). Arguments should be a subsystem name and a command (with optional arguments) to execute upon subsystem request. .Pp The command .Xr sftp-server 8 implements the .Dq sftp file transfer subsystem. .Pp Alternately the name .Dq internal-sftp implements an in-process .Dq sftp server. This may simplify configurations using .Cm ChrootDirectory to force a different filesystem root on clients. .Pp By default no subsystems are defined. Note that this option applies to protocol version 2 only. .It Cm SyslogFacility Gives the facility code that is used when logging messages from .Xr sshd 8 . The possible values are: DAEMON, USER, AUTH, LOCAL0, LOCAL1, LOCAL2, LOCAL3, LOCAL4, LOCAL5, LOCAL6, LOCAL7. The default is AUTH. .It Cm TCPKeepAlive Specifies whether the system should send TCP keepalive messages to the other side. If they are sent, death of the connection or crash of one of the machines will be properly noticed. However, this means that connections will die if the route is down temporarily, and some people find it annoying. On the other hand, if TCP keepalives are not sent, sessions may hang indefinitely on the server, leaving .Dq ghost users and consuming server resources. .Pp The default is .Dq yes (to send TCP keepalive messages), and the server will notice if the network goes down or the client host crashes. This avoids infinitely hanging sessions. .Pp To disable TCP keepalive messages, the value should be set to .Dq no . .It Cm TrustedUserCAKeys Specifies a file containing public keys of certificate authorities that are trusted to sign user certificates for authentication. Keys are listed one per line; empty lines and comments starting with .Ql # are allowed. If a certificate is presented for authentication and has its signing CA key listed in this file, then it may be used for authentication for any user listed in the certificate's principals list. Note that certificates that lack a list of principals will not be permitted for authentication using .Cm TrustedUserCAKeys . For more details on certificates, see the .Sx CERTIFICATES section in .Xr ssh-keygen 1 . .It Cm UseDNS Specifies whether .Xr sshd 8 should look up the remote host name and check that the resolved host name for the remote IP address maps back to the very same IP address. The default is .Dq yes . .It Cm UseLogin Specifies whether .Xr login 1 is used for interactive login sessions. The default is .Dq no . Note that .Xr login 1 is never used for remote command execution. Note also, that if this is enabled, .Cm X11Forwarding will be disabled because .Xr login 1 does not know how to handle .Xr xauth 1 cookies. If .Cm UsePrivilegeSeparation is specified, it will be disabled after authentication. .It Cm UsePAM Enables the Pluggable Authentication Module interface. If set to .Dq yes this will enable PAM authentication using .Cm ChallengeResponseAuthentication and .Cm PasswordAuthentication in addition to PAM account and session module processing for all authentication types. .Pp Because PAM challenge-response authentication usually serves an equivalent role to password authentication, you should disable either .Cm PasswordAuthentication or .Cm ChallengeResponseAuthentication. .Pp If .Cm UsePAM is enabled, you will not be able to run .Xr sshd 8 as a non-root user. The default is .Dq yes . .It Cm UsePrivilegeSeparation Specifies whether .Xr sshd 8 separates privileges by creating an unprivileged child process to deal with incoming network traffic. After successful authentication, another process will be created that has the privilege of the authenticated user. The goal of privilege separation is to prevent privilege escalation by containing any corruption within the unprivileged processes. The default is -.Dq sandbox . +.Dq yes . If .Cm UsePrivilegeSeparation is set to .Dq sandbox then the pre-authentication unprivileged process is subject to additional restrictions. .It Cm VersionAddendum Optionally specifies additional text to append to the SSH protocol banner sent by the server upon connection. The default is .Dq FreeBSD-20130515 . .It Cm X11DisplayOffset Specifies the first display number available for .Xr sshd 8 Ns 's X11 forwarding. This prevents sshd from interfering with real X11 servers. The default is 10. .It Cm X11Forwarding Specifies whether X11 forwarding is permitted. The argument must be .Dq yes or .Dq no . The default is .Dq yes . .Pp When X11 forwarding is enabled, there may be additional exposure to the server and to client displays if the .Xr sshd 8 proxy display is configured to listen on the wildcard address (see .Cm X11UseLocalhost below), though this is not the default. Additionally, the authentication spoofing and authentication data verification and substitution occur on the client side. The security risk of using X11 forwarding is that the client's X11 display server may be exposed to attack when the SSH client requests forwarding (see the warnings for .Cm ForwardX11 in .Xr ssh_config 5 ) . A system administrator may have a stance in which they want to protect clients that may expose themselves to attack by unwittingly requesting X11 forwarding, which can warrant a .Dq no setting. .Pp Note that disabling X11 forwarding does not prevent users from forwarding X11 traffic, as users can always install their own forwarders. X11 forwarding is automatically disabled if .Cm UseLogin is enabled. .It Cm X11UseLocalhost Specifies whether .Xr sshd 8 should bind the X11 forwarding server to the loopback address or to the wildcard address. By default, sshd binds the forwarding server to the loopback address and sets the hostname part of the .Ev DISPLAY environment variable to .Dq localhost . This prevents remote hosts from connecting to the proxy display. However, some older X11 clients may not function with this configuration. .Cm X11UseLocalhost may be set to .Dq no to specify that the forwarding server should be bound to the wildcard address. The argument must be .Dq yes or .Dq no . The default is .Dq yes . .It Cm XAuthLocation Specifies the full pathname of the .Xr xauth 1 program. The default is .Pa /usr/local/bin/xauth . .El .Sh TIME FORMATS .Xr sshd 8 command-line arguments and configuration file options that specify time may be expressed using a sequence of the form: .Sm off .Ar time Op Ar qualifier , .Sm on where .Ar time is a positive integer value and .Ar qualifier is one of the following: .Pp .Bl -tag -width Ds -compact -offset indent .It Aq Cm none seconds .It Cm s | Cm S seconds .It Cm m | Cm M minutes .It Cm h | Cm H hours .It Cm d | Cm D days .It Cm w | Cm W weeks .El .Pp Each member of the sequence is added together to calculate the total time value. .Pp Time format examples: .Pp .Bl -tag -width Ds -compact -offset indent .It 600 600 seconds (10 minutes) .It 10m 10 minutes .It 1h30m 1 hour 30 minutes (90 minutes) .El .Sh FILES .Bl -tag -width Ds .It Pa /etc/ssh/sshd_config Contains configuration data for .Xr sshd 8 . This file should be writable by root only, but it is recommended (though not necessary) that it be world-readable. .El .Sh SEE ALSO .Xr sshd 8 .Sh AUTHORS OpenSSH is a derivative of the original and free ssh 1.2.12 release by Tatu Ylonen. Aaron Campbell, Bob Beck, Markus Friedl, Niels Provos, Theo de Raadt and Dug Song removed many bugs, re-added newer features and created OpenSSH. Markus Friedl contributed the support for SSH protocol versions 1.5 and 2.0. Niels Provos and Markus Friedl contributed support for privilege separation. Index: user/attilio/vmcontention/crypto/openssh =================================================================== --- user/attilio/vmcontention/crypto/openssh (revision 252343) +++ user/attilio/vmcontention/crypto/openssh (revision 252344) Property changes on: user/attilio/vmcontention/crypto/openssh ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/crypto/openssh:r251185-252340 Index: user/attilio/vmcontention/etc/defaults/rc.conf =================================================================== --- user/attilio/vmcontention/etc/defaults/rc.conf (revision 252343) +++ user/attilio/vmcontention/etc/defaults/rc.conf (revision 252344) @@ -1,738 +1,733 @@ #!/bin/sh # This is rc.conf - a file full of useful variables that you can set # to change the default startup behavior of your system. You should # not edit this file! Put any overrides into one of the ${rc_conf_files} # instead and you will be able to update these defaults later without # spamming your local configuration information. # # The ${rc_conf_files} files should only contain values which override # values set in this file. This eases the upgrade path when defaults # are changed and new features are added. # # All arguments must be in double or single quotes. # # For a more detailed explanation of all the rc.conf variables, please # refer to the rc.conf(5) manual page. # # $FreeBSD$ ############################################################## ### Important initial Boot-time options #################### ############################################################## rc_debug="NO" # Set to YES to enable debugging output from rc.d rc_info="NO" # Enables display of informational messages at boot. rc_startmsgs="YES" # Show "Starting foo:" messages at boot rcshutdown_timeout="90" # Seconds to wait before terminating rc.shutdown early_late_divider="FILESYSTEMS" # Script that separates early/late # stages of the boot process. Make sure you know # the ramifications if you change this. # See rc.conf(5) for more details. always_force_depends="NO" # Set to check that indicated dependencies are # running during boot (can increase boot time). -swapfile="NO" # Set to name of swapfile if aux swapfile desired. -swapfile_mdunit="99" # Swapfile md(4) unit number created by mdconfig(8). apm_enable="NO" # Set to YES to enable APM BIOS functions (or NO). apmd_enable="NO" # Run apmd to handle APM event from userland. apmd_flags="" # Flags to apmd (if enabled). ddb_enable="NO" # Set to YES to load ddb scripts at boot. ddb_config="/etc/ddb.conf" # ddb(8) config file. devd_enable="YES" # Run devd, to trigger programs on device tree changes. devd_flags="" # Additional flags for devd(8). #kld_list="" # Kernel modules to load after local disks are mounted kldxref_enable="NO" # Build linker.hints files with kldxref(8). kldxref_clobber="NO" # Overwrite old linker.hints at boot. kldxref_module_path="" # Override kern.module_path. A ';'-delimited list. powerd_enable="NO" # Run powerd to lower our power usage. powerd_flags="" # Flags to powerd (if enabled). tmpmfs="AUTO" # Set to YES to always create an mfs /tmp, NO to never tmpsize="20m" # Size of mfs /tmp if created tmpmfs_flags="-S" # Extra mdmfs options for the mfs /tmp varmfs="AUTO" # Set to YES to always create an mfs /var, NO to never varsize="32m" # Size of mfs /var if created varmfs_flags="-S" # Extra mount options for the mfs /var populate_var="AUTO" # Set to YES to always (re)populate /var, NO to never cleanvar_enable="YES" # Clean the /var directory local_startup="/usr/local/etc/rc.d" # startup script dirs. script_name_sep=" " # Change if your startup scripts' names contain spaces rc_conf_files="/etc/rc.conf /etc/rc.conf.local" # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems gptboot_enable="YES" # GPT boot success/failure reporting. # Experimental - test before enabling gbde_autoattach_all="NO" # YES automatically mounts gbde devices from fstab gbde_devices="NO" # Devices to automatically attach (list, or AUTO) gbde_attach_attempts="3" # Number of times to attempt attaching gbde devices gbde_lockdir="/etc" # Where to look for gbde lockfiles # GELI disk encryption configuration. geli_devices="" # List of devices to automatically attach in addition to # GELI devices listed in /etc/fstab. geli_tries="" # Number of times to attempt attaching geli device. # If empty, kern.geom.eli.tries will be used. geli_default_flags="" # Default flags for geli(8). geli_autodetach="YES" # Automatically detach on last close. # Providers are marked as such when all file systems are # mounted. # Example use. #geli_devices="da1 mirror/home" #geli_da1_flags="-p -k /etc/geli/da1.keys" #geli_da1_autodetach="NO" #geli_mirror_home_flags="-k /etc/geli/home.keys" - -geli_swap_flags="-e aes -l 256 -s 4096 -d" # Options for GELI-encrypted - # swap partitions. root_rw_mount="YES" # Set to NO to inhibit remounting root read-write. fsck_y_enable="NO" # Set to YES to do fsck -y if the initial preen fails. fsck_y_flags="" # Additional flags for fsck -y background_fsck="YES" # Attempt to run fsck in the background where possible. background_fsck_delay="60" # Time to wait (seconds) before starting the fsck. netfs_types="nfs:NFS oldnfs:OLDNFS" # Net filesystems. extra_netfs_types="NO" # List of network extra filesystem types for delayed # mount at startup (or NO). ############################################################## ### Network configuration sub-section ###################### ############################################################## ### Basic network and firewall/security options: ### hostname="" # Set this! hostid_enable="YES" # Set host UUID. hostid_file="/etc/hostid" # File with hostuuid. nisdomainname="NO" # Set to NIS domain if using NIS (or NO). dhclient_program="/sbin/dhclient" # Path to dhcp client program. dhclient_flags="" # Extra flags to pass to dhcp client. #dhclient_flags_fxp0="" # Extra dhclient flags for fxp0 only background_dhclient="NO" # Start dhcp client in the background. #background_dhclient_fxp0="YES" # Start dhcp client on fxp0 in the background. synchronous_dhclient="NO" # Start dhclient directly on configured # interfaces during startup. defaultroute_delay="30" # Time to wait for a default route on a DHCP interface. defaultroute_carrier_delay="5" # Time to wait for carrier while waiting for a default route. wpa_supplicant_program="/usr/sbin/wpa_supplicant" wpa_supplicant_flags="-s" # Extra flags to pass to wpa_supplicant wpa_supplicant_conf_file="/etc/wpa_supplicant.conf" # firewall_enable="NO" # Set to YES to enable firewall functionality firewall_script="/etc/rc.firewall" # Which script to run to set up the firewall firewall_type="UNKNOWN" # Firewall type (see /etc/rc.firewall) firewall_quiet="NO" # Set to YES to suppress rule display firewall_logging="NO" # Set to YES to enable events logging firewall_logif="NO" # Set to YES to create logging-pseudo interface firewall_flags="" # Flags passed to ipfw when type is a file firewall_coscripts="" # List of executables/scripts to run after # firewall starts/stops firewall_client_net="192.0.2.0/24" # IPv4 Network address for "client" # firewall. #firewall_client_net_ipv6="2001:db8:2:1::/64" # IPv6 network prefix for # "client" firewall. firewall_simple_iif="ed1" # Inside network interface for "simple" # firewall. firewall_simple_inet="192.0.2.16/28" # Inside network address for "simple" # firewall. firewall_simple_oif="ed0" # Outside network interface for "simple" # firewall. firewall_simple_onet="192.0.2.0/28" # Outside network address for "simple" # firewall. #firewall_simple_iif_ipv6="ed1" # Inside IPv6 network interface for "simple" # firewall. #firewall_simple_inet_ipv6="2001:db8:2:800::/56" # Inside IPv6 network prefix # for "simple" firewall. #firewall_simple_oif_ipv6="ed0" # Outside IPv6 network interface for "simple" # firewall. #firewall_simple_onet_ipv6="2001:db8:2:0::/56" # Outside IPv6 network prefix # for "simple" firewall. firewall_myservices="" # List of TCP ports on which this host # offers services for "workstation" firewall. firewall_allowservices="" # List of IPs which have access to # $firewall_myservices for "workstation" # firewall. firewall_trusted="" # List of IPs which have full access to this # host for "workstation" firewall. firewall_logdeny="NO" # Set to YES to log default denied incoming # packets for "workstation" firewall. firewall_nologports="135-139,445 1026,1027 1433,1434" # List of TCP/UDP ports # for which denied incoming packets are not # logged for "workstation" firewall. firewall_nat_enable="NO" # Enable kernel NAT (if firewall_enable == YES) firewall_nat_interface="" # Public interface or IPaddress to use firewall_nat_flags="" # Additional configuration parameters dummynet_enable="NO" # Load the dummynet(4) module ip_portrange_first="NO" # Set first dynamically allocated port ip_portrange_last="NO" # Set last dynamically allocated port ike_enable="NO" # Enable IKE daemon (usually racoon or isakmpd) ike_program="/usr/local/sbin/isakmpd" # Path to IKE daemon ike_flags="" # Additional flags for IKE daemon ipsec_enable="NO" # Set to YES to run setkey on ipsec_file ipsec_file="/etc/ipsec.conf" # Name of config file for setkey natd_program="/sbin/natd" # path to natd, if you want a different one. natd_enable="NO" # Enable natd (if firewall_enable == YES). natd_interface="" # Public interface or IPaddress to use. natd_flags="" # Additional flags for natd. ipfilter_enable="NO" # Set to YES to enable ipfilter functionality ipfilter_program="/sbin/ipf" # where the ipfilter program lives ipfilter_rules="/etc/ipf.rules" # rules definition file for ipfilter, see # /usr/src/contrib/ipfilter/rules for examples ipfilter_flags="" # additional flags for ipfilter ipnat_enable="NO" # Set to YES to enable ipnat functionality ipnat_program="/sbin/ipnat" # where the ipnat program lives ipnat_rules="/etc/ipnat.rules" # rules definition file for ipnat ipnat_flags="" # additional flags for ipnat ipmon_enable="NO" # Set to YES for ipmon; needs ipfilter or ipnat ipmon_program="/sbin/ipmon" # where the ipfilter monitor program lives ipmon_flags="-Ds" # typically "-Ds" or "-D /var/log/ipflog" ipfs_enable="NO" # Set to YES to enable saving and restoring # of state tables at shutdown and boot ipfs_program="/sbin/ipfs" # where the ipfs program lives ipfs_flags="" # additional flags for ipfs pf_enable="NO" # Set to YES to enable packet filter (pf) pf_rules="/etc/pf.conf" # rules definition file for pf pf_program="/sbin/pfctl" # where the pfctl program lives pf_flags="" # additional flags for pfctl pflog_enable="NO" # Set to YES to enable packet filter logging pflog_logfile="/var/log/pflog" # where pflogd should store the logfile pflog_program="/sbin/pflogd" # where the pflogd program lives pflog_flags="" # additional flags for pflogd ftpproxy_enable="NO" # Set to YES to enable ftp-proxy(8) for pf ftpproxy_flags="" # additional flags for ftp-proxy(8) pfsync_enable="NO" # Expose pf state to other hosts for syncing pfsync_syncdev="" # Interface for pfsync to work through pfsync_syncpeer="" # IP address of pfsync peer host pfsync_ifconfig="" # Additional options to ifconfig(8) for pfsync tcp_extensions="YES" # Set to NO to turn off RFC1323 extensions. log_in_vain="0" # >=1 to log connects to ports w/o listeners. tcp_keepalive="YES" # Enable stale TCP connection timeout (or NO). tcp_drop_synfin="NO" # Set to YES to drop TCP packets with SYN+FIN # NOTE: this violates the TCP specification icmp_drop_redirect="NO" # Set to YES to ignore ICMP REDIRECT packets icmp_log_redirect="NO" # Set to YES to log ICMP REDIRECT packets network_interfaces="auto" # List of network interfaces (or "auto"). cloned_interfaces="" # List of cloned network interfaces to create. #cloned_interfaces="gif0 gif1 gif2 gif3" # Pre-cloning GENERIC config. #ifconfig_lo0="inet 127.0.0.1" # default loopback device configuration. #ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry. #ifconfig_ed0_ipx="ipx 0x00010010" # Sample IPX address family entry. #ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" # Sample IPv6 addr entry #ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" # Sample IPv6 alias #ifconfig_fxp0_name="net0" # Change interface name from fxp0 to net0. #vlans_fxp0="101 vlan0" # vlan(4) interfaces for fxp0 device #create_args_vlan0="vlan 102" # vlan tag for vlan0 device #wlans_ath0="wlan0" # wlan(4) interfaces for ath0 device #wlandebug_wlan0="scan+auth+assoc" # Set debug flags with wlanddebug(8) #ipv4_addrs_fxp0="192.168.0.1/24 192.168.1.1-5/28" # example IPv4 address entry. # #autobridge_interfaces="bridge0" # List of bridges to check #autobridge_bridge0="tap* vlan0" # Interface glob to automatically add to the bridge # # If you have any sppp(4) interfaces above, you might also want to set # the following parameters. Refer to spppcontrol(8) for their meaning. sppp_interfaces="" # List of sppp interfaces. #sppp_interfaces="...0" # example: sppp over ... #spppconfig_...0="authproto=chap myauthname=foo myauthsecret='top secret' hisauthname=some-gw hisauthsecret='another secret'" gif_interfaces="" # List of GIF tunnels. #gif_interfaces="gif0 gif1" # Examples typically for a router. # Choose correct tunnel addrs. #gifconfig_gif0="10.1.1.1 10.1.2.1" # Examples typically for a router. #gifconfig_gif1="10.1.1.2 10.1.2.2" # Examples typically for a router. fec_interfaces="" # List of Fast EtherChannels. #fec_interfaces="fec0 fec1" #fecconfig_fec0="fxp0 dc0" # Examples typically for two NICs #fecconfig_fec1="em0 em1 bge0 bge1" # Examples typically for four NICs # User ppp configuration. ppp_enable="NO" # Start user-ppp (or NO). ppp_program="/usr/sbin/ppp" # Path to user-ppp program. ppp_mode="auto" # Choice of "auto", "ddial", "direct" or "dedicated". # For details see man page for ppp(8). Default is auto. ppp_nat="YES" # Use PPP's internal network address translation or NO. ppp_profile="papchap" # Which profile to use from /etc/ppp/ppp.conf. ppp_user="root" # Which user to run ppp as # Start multiple instances of ppp at boot time #ppp_profile="profile1 profile2 profile3" # Which profiles to use #ppp_profile1_mode="ddial" # Override ppp mode for profile1 #ppp_profile2_nat="NO" # Override nat mode for profile2 # profile3 uses default ppp_mode and ppp_nat ### Network daemon (miscellaneous) ### hostapd_enable="NO" # Run hostap daemon. syslogd_enable="YES" # Run syslog daemon (or NO). syslogd_program="/usr/sbin/syslogd" # path to syslogd, if you want a different one. syslogd_flags="-s" # Flags to syslogd (if enabled). inetd_enable="NO" # Run the network daemon dispatcher (YES/NO). inetd_program="/usr/sbin/inetd" # path to inetd, if you want a different one. inetd_flags="-wW -C 60" # Optional flags to inetd hastd_enable="NO" # Run the HAST daemon (YES/NO). hastd_program="/sbin/hastd" # path to hastd, if you want a different one. hastd_flags="" # Optional flags to hastd. # # named. It may be possible to run named in a sandbox, man security for # details. # named_enable="NO" # Run named, the DNS server (or NO). named_program="/usr/sbin/named" # Path to named, if you want a different one. named_conf="/etc/namedb/named.conf" # Path to the configuration file #named_flags="" # Use this for flags OTHER than -u and -c named_uid="bind" # User to run named as named_chrootdir="/var/named" # Chroot directory (or "" not to auto-chroot it) named_chroot_autoupdate="YES" # Automatically install/update chrooted # components of named. See /etc/rc.d/named. named_symlink_enable="YES" # Symlink the chrooted pid file named_wait="NO" # Wait for working name service before exiting named_wait_host="localhost" # Hostname to check if named_wait is enabled named_auto_forward="NO" # Set up forwarders from /etc/resolv.conf named_auto_forward_only="NO" # Do "forward only" instead of "forward first" # # kerberos. Do not run the admin daemons on slave servers # kerberos5_server_enable="NO" # Run a kerberos 5 master server (or NO). kerberos5_server="/usr/libexec/kdc" # path to kerberos 5 KDC kerberos5_server_flags="--detach" # Additional flags to the kerberos 5 server kadmind5_server_enable="NO" # Run kadmind (or NO) kadmind5_server="/usr/libexec/kadmind" # path to kerberos 5 admin daemon kpasswdd_server_enable="NO" # Run kpasswdd (or NO) kpasswdd_server="/usr/libexec/kpasswdd" # path to kerberos 5 passwd daemon kfd_enable="NO" # Run kfd (or NO) kfd_program="/usr/libexec/kfd" # path to kerberos 5 kfd daemon gssd_enable="NO" # Run the gssd daemon (or NO). gssd_flags="" # Flags for gssd. rwhod_enable="NO" # Run the rwho daemon (or NO). rwhod_flags="" # Flags for rwhod rarpd_enable="NO" # Run rarpd (or NO). rarpd_flags="-a" # Flags to rarpd. bootparamd_enable="NO" # Run bootparamd (or NO). bootparamd_flags="" # Flags to bootparamd pppoed_enable="NO" # Run the PPP over Ethernet daemon. pppoed_provider="*" # Provider and ppp(8) config file entry. pppoed_flags="-P /var/run/pppoed.pid" # Flags to pppoed (if enabled). pppoed_interface="fxp0" # The interface that pppoed runs on. sshd_enable="NO" # Enable sshd sshd_program="/usr/sbin/sshd" # path to sshd, if you want a different one. sshd_flags="" # Additional flags for sshd. ftpd_enable="NO" # Enable stand-alone ftpd. ftpd_program="/usr/libexec/ftpd" # Path to ftpd, if you want a different one. ftpd_flags="" # Additional flags to stand-alone ftpd. ### Network daemon (NFS): All need rpcbind_enable="YES" ### amd_enable="NO" # Run amd service with $amd_flags (or NO). amd_program="/usr/sbin/amd" # path to amd, if you want a different one. amd_flags="-a /.amd_mnt -l syslog /host /etc/amd.map /net /etc/amd.map" amd_map_program="NO" # Can be set to "ypcat -k amd.master" nfs_client_enable="NO" # This host is an NFS client (or NO). nfs_access_cache="60" # Client cache timeout in seconds nfs_server_enable="NO" # This host is an NFS server (or NO). oldnfs_server_enable="NO" # Run the old NFS server (YES/NO). nfs_server_flags="-u -t -n 4" # Flags to nfsd (if enabled). mountd_enable="NO" # Run mountd (or NO). mountd_flags="-r" # Flags to mountd (if NFS server enabled). weak_mountd_authentication="NO" # Allow non-root mount requests to be served. nfs_reserved_port_only="NO" # Provide NFS only on secure port (or NO). nfs_bufpackets="" # bufspace (in packets) for client rpc_lockd_enable="NO" # Run NFS rpc.lockd needed for client/server. rpc_lockd_flags="" # Flags to rpc.lockd (if enabled). rpc_statd_enable="NO" # Run NFS rpc.statd needed for client/server. rpc_statd_flags="" # Flags to rpc.statd (if enabled). rpcbind_enable="NO" # Run the portmapper service (YES/NO). rpcbind_program="/usr/sbin/rpcbind" # path to rpcbind, if you want a different one. rpcbind_flags="" # Flags to rpcbind (if enabled). rpc_ypupdated_enable="NO" # Run if NIS master and SecureRPC (or NO). keyserv_enable="NO" # Run the SecureRPC keyserver (or NO). keyserv_flags="" # Flags to keyserv (if enabled). nfsv4_server_enable="NO" # Enable support for NFSv4 nfscbd_enable="NO" # NFSv4 client side callback daemon nfscbd_flags="" # Flags for nfscbd nfsuserd_enable="NO" # NFSv4 user/group name mapping daemon nfsuserd_flags="" # Flags for nfsuserd ### Network Time Services options: ### timed_enable="NO" # Run the time daemon (or NO). timed_flags="" # Flags to timed (if enabled). ntpdate_enable="NO" # Run ntpdate to sync time on boot (or NO). ntpdate_program="/usr/sbin/ntpdate" # path to ntpdate, if you want a different one. ntpdate_flags="-b" # Flags to ntpdate (if enabled). ntpdate_config="/etc/ntp.conf" # ntpdate(8) configuration file ntpdate_hosts="" # Whitespace-separated list of ntpdate(8) servers. ntpd_enable="NO" # Run ntpd Network Time Protocol (or NO). ntpd_program="/usr/sbin/ntpd" # path to ntpd, if you want a different one. ntpd_config="/etc/ntp.conf" # ntpd(8) configuration file ntpd_sync_on_start="NO" # Sync time on ntpd startup, even if offset is high ntpd_flags="-p /var/run/ntpd.pid -f /var/db/ntpd.drift" # Flags to ntpd (if enabled). # Network Information Services (NIS) options: All need rpcbind_enable="YES" ### nis_client_enable="NO" # We're an NIS client (or NO). nis_client_flags="" # Flags to ypbind (if enabled). nis_ypset_enable="NO" # Run ypset at boot time (or NO). nis_ypset_flags="" # Flags to ypset (if enabled). nis_server_enable="NO" # We're an NIS server (or NO). nis_server_flags="" # Flags to ypserv (if enabled). nis_ypxfrd_enable="NO" # Run rpc.ypxfrd at boot time (or NO). nis_ypxfrd_flags="" # Flags to rpc.ypxfrd (if enabled). nis_yppasswdd_enable="NO" # Run rpc.yppasswdd at boot time (or NO). nis_yppasswdd_flags="" # Flags to rpc.yppasswdd (if enabled). ### SNMP daemon ### # Be sure to understand the security implications of running SNMP v1/v2 # in your network. bsnmpd_enable="NO" # Run the SNMP daemon (or NO). bsnmpd_flags="" # Flags for bsnmpd. ### Network routing options: ### defaultrouter="NO" # Set to default gateway (or NO). static_arp_pairs="" # Set to static ARP list (or leave empty). static_ndp_pairs="" # Set to static NDP list (or leave empty). static_routes="" # Set to static route list (or leave empty). natm_static_routes="" # Set to static route list for NATM (or leave empty). gateway_enable="NO" # Set to YES if this host will be a gateway. routed_enable="NO" # Set to YES to enable a routing daemon. routed_program="/sbin/routed" # Name of routing daemon to use if enabled. routed_flags="-q" # Flags for routing daemon. mrouted_enable="NO" # Do IPv4 multicast routing. mrouted_program="/usr/local/sbin/mrouted" # Name of IPv4 multicast # routing daemon. You need to # install it from package or # port. mrouted_flags="" # Flags for multicast routing daemon. ipxgateway_enable="NO" # Set to YES to enable IPX routing. ipxrouted_enable="NO" # Set to YES to run the IPX routing daemon. ipxrouted_flags="" # Flags for IPX routing daemon. arpproxy_all="NO" # replaces obsolete kernel option ARP_PROXYALL. forward_sourceroute="NO" # do source routing (only if gateway_enable is set to "YES") accept_sourceroute="NO" # accept source routed packets to us ### ATM interface options: ### atm_enable="NO" # Configure ATM interfaces (or NO). #atm_netif_hea0="atm 1" # Network interfaces for physical interface. #atm_sigmgr_hea0="uni31" # Signalling manager for physical interface. #atm_prefix_hea0="ILMI" # NSAP prefix (UNI interfaces only) (or ILMI). #atm_macaddr_hea0="NO" # Override physical MAC address (or NO). #atm_arpserver_atm0="0x47.0005.80.999999.9999.9999.9999.999999999999.00" # ATMARP server address (or local). #atm_scsparp_atm0="NO" # Run SCSP/ATMARP on network interface (or NO). atm_pvcs="" # Set to PVC list (or leave empty). atm_arps="" # Set to permanent ARP list (or leave empty). ### Bluetooth ### hcsecd_enable="NO" # Enable hcsecd(8) (or NO) hcsecd_config="/etc/bluetooth/hcsecd.conf" # hcsecd(8) configuration file sdpd_enable="NO" # Enable sdpd(8) (or NO) sdpd_control="/var/run/sdp" # sdpd(8) control socket sdpd_groupname="nobody" # set spdp(8) user/group to run as after sdpd_username="nobody" # it initializes bthidd_enable="NO" # Enable bthidd(8) (or NO) bthidd_config="/etc/bluetooth/bthidd.conf" # bthidd(8) configuration file bthidd_hids="/var/db/bthidd.hids" # bthidd(8) known HID devices file rfcomm_pppd_server_enable="NO" # Enable rfcomm_pppd(8) in server mode (or NO) rfcomm_pppd_server_profile="one two" # Profile to use from /etc/ppp/ppp.conf # #rfcomm_pppd_server_one_bdaddr="" # Override local bdaddr for 'one' rfcomm_pppd_server_one_channel="1" # Override local channel for 'one' #rfcomm_pppd_server_one_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_one_register_dun="NO" # for 'one' # #rfcomm_pppd_server_two_bdaddr="" # Override local bdaddr for 'two' rfcomm_pppd_server_two_channel="3" # Override local channel for 'two' #rfcomm_pppd_server_two_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_two_register_dun="NO" # for 'two' ubthidhci_enable="NO" # Switch an USB BT controller present on #ubthidhci_busnum="3" # bus 3 and addr 2 from HID mode to HCI mode. #ubthidhci_addr="2" # Check usbconfig list to find the correct # numbers for your system. ### Network link/usability verification options netwait_enable="NO" # Enable rc.d/netwait (or NO) #netwait_ip="" # IP addresses to be pinged by netwait. netwait_timeout="60" # Total number of seconds to perform pings. #netwait_if="" # Interface name to watch link state on. netwait_if_timeout="30" # Total number of seconds to monitor link state. ### Miscellaneous network options: ### icmp_bmcastecho="NO" # respond to broadcast ping packets ### IPv6 options: ### ipv6_network_interfaces="auto" # List of IPv6 network interfaces # (or "auto" or "none"). ipv6_activate_all_interfaces="NO" # If NO, interfaces which have no # corresponding $ifconfig_IF_ipv6 is # marked as IFDISABLED for security # reason. ipv6_defaultrouter="NO" # Set to IPv6 default gateway (or NO). #ipv6_defaultrouter="2002:c058:6301::" # Use this for 6to4 (RFC 3068) ipv6_static_routes="" # Set to static route list (or leave empty). #ipv6_static_routes="xxx" # An example to set fec0:0000:0000:0006::/64 # route toward loopback interface. #ipv6_route_xxx="fec0:0000:0000:0006:: -prefixlen 64 ::1" ipv6_gateway_enable="NO" # Set to YES if this host will be a gateway. ipv6_cpe_wanif="NO" # Set to the upstram interface name if this # node will work as a router to forward IPv6 # packets not explicitly addressed to itself. ipv6_privacy="NO" # Use privacy address on RA-receiving IFs # (RFC 4941) route6d_enable="NO" # Set to YES to enable an IPv6 routing daemon. route6d_program="/usr/sbin/route6d" # Name of IPv6 routing daemon. route6d_flags="" # Flags to IPv6 routing daemon. #route6d_flags="-l" # Example for route6d with only IPv6 site local # addrs. #route6d_flags="-q" # If you want to run a routing daemon on an end # node, you should stop advertisement. #ipv6_network_interfaces="ed0 ep0" # Examples for router # or static configuration for end node. # Choose correct prefix value. #ipv6_prefix_ed0="fec0:0000:0000:0001 fec0:0000:0000:0002" # Examples for rtr. #ipv6_prefix_ep0="fec0:0000:0000:0003 fec0:0000:0000:0004" # Examples for rtr. ipv6_default_interface="NO" # Default output interface for scoped addrs. # This works only with # ipv6_gateway_enable="NO". rtsol_flags="" # Flags to IPv6 router solicitation. rtsold_enable="NO" # Set to YES to enable an IPv6 router # solicitation daemon. rtsold_flags="-a" # Flags to an IPv6 router solicitation # daemon. rtadvd_enable="NO" # Set to YES to enable an IPv6 router # advertisement daemon. If set to YES, # this router becomes a possible candidate # IPv6 default router for local subnets. rtadvd_interfaces="" # Interfaces rtadvd sends RA packets. mroute6d_enable="NO" # Do IPv6 multicast routing. mroute6d_program="/usr/local/sbin/pim6dd" # Name of IPv6 multicast # routing daemon. You need to # install it from package or # port. mroute6d_flags="" # Flags to IPv6 multicast routing daemon. stf_interface_ipv4addr="" # Local IPv4 addr for 6to4 IPv6 over IPv4 # tunneling interface. Specify this entry # to enable 6to4 interface. stf_interface_ipv4plen="0" # Prefix length for 6to4 IPv4 addr, # to limit peer addr range. Effective value # is 0-31. stf_interface_ipv6_ifid="0:0:0:1" # IPv6 interface id for stf0. # If you like, you can set "AUTO" for this. stf_interface_ipv6_slaid="0000" # IPv6 Site Level Aggregator for stf0 ipv6_faith_prefix="NO" # Set faith prefix to enable a FAITH # IPv6-to-IPv4 TCP translator. You also need # faithd(8) setup. ipv6_ipv4mapping="NO" # Set to "YES" to enable IPv4 mapped IPv6 addr # communication. (like ::ffff:a.b.c.d) ipv6_ipfilter_rules="/etc/ipf6.rules" # rules definition file for ipfilter, # see /usr/src/contrib/ipfilter/rules # for examples ip6addrctl_enable="YES" # Set to YES to enable default address selection ip6addrctl_verbose="NO" # Set to YES to enable verbose configuration messages ip6addrctl_policy="AUTO" # A pre-defined address selection policy # (ipv4_prefer, ipv6_prefer, or AUTO) ############################################################## ### System console options ################################# ############################################################## keyboard="" # keyboard device to use (default /dev/kbd0). keymap="NO" # keymap in /usr/share/syscons/keymaps/* (or NO). keyrate="NO" # keyboard rate to: slow, normal, fast (or NO). keybell="NO" # See kbdcontrol(1) for options. Use "off" to disable. keychange="NO" # function keys default values (or NO). cursor="NO" # cursor type {normal|blink|destructive} (or NO). scrnmap="NO" # screen map in /usr/share/syscons/scrnmaps/* (or NO). font8x16="NO" # font 8x16 from /usr/share/syscons/fonts/* (or NO). font8x14="NO" # font 8x14 from /usr/share/syscons/fonts/* (or NO). font8x8="NO" # font 8x8 from /usr/share/syscons/fonts/* (or NO). blanktime="300" # blank time (in seconds) or "NO" to turn it off. saver="NO" # screen saver: Uses /boot/kernel/${saver}_saver.ko moused_nondefault_enable="YES" # Treat non-default mice as enabled unless # specifically overriden in rc.conf(5). moused_enable="NO" # Run the mouse daemon. moused_type="auto" # See man page for rc.conf(5) for available settings. moused_port="/dev/psm0" # Set to your mouse port. moused_flags="" # Any additional flags to moused. mousechar_start="NO" # if 0xd0-0xd3 default range is occupied in your # language code table, specify alternative range # start like mousechar_start=3, see vidcontrol(1) allscreens_flags="" # Set this vidcontrol mode for all virtual screens allscreens_kbdflags="" # Set this kbdcontrol mode for all virtual screens ############################################################## ### Mail Transfer Agent (MTA) options ###################### ############################################################## mta_start_script="/etc/rc.sendmail" # Script to start your chosen MTA, called by /etc/rc. # Settings for /etc/rc.sendmail and /etc/rc.d/sendmail: sendmail_enable="NO" # Run the sendmail inbound daemon (YES/NO). sendmail_pidfile="/var/run/sendmail.pid" # sendmail pid file sendmail_procname="/usr/sbin/sendmail" # sendmail process name sendmail_flags="-L sm-mta -bd -q30m" # Flags to sendmail (as a server) sendmail_submit_enable="YES" # Start a localhost-only MTA for mail submission sendmail_submit_flags="-L sm-mta -bd -q30m -ODaemonPortOptions=Addr=localhost" # Flags for localhost-only MTA sendmail_outbound_enable="YES" # Dequeue stuck mail (YES/NO). sendmail_outbound_flags="-L sm-queue -q30m" # Flags to sendmail (outbound only) sendmail_msp_queue_enable="YES" # Dequeue stuck clientmqueue mail (YES/NO). sendmail_msp_queue_flags="-L sm-msp-queue -Ac -q30m" # Flags for sendmail_msp_queue daemon. sendmail_rebuild_aliases="NO" # Run newaliases if necessary (YES/NO). ############################################################## ### Miscellaneous administrative options ################### ############################################################## auditd_enable="NO" # Run the audit daemon. auditd_program="/usr/sbin/auditd" # Path to the audit daemon. auditd_flags="" # Which options to pass to the audit daemon. auditdistd_enable="NO" # Run the audit daemon. auditdistd_program="/usr/sbin/auditdistd" # Path to the auditdistd daemon. auditdistd_flags="" # Which options to pass to the auditdistd daemon. cron_enable="YES" # Run the periodic job daemon. cron_program="/usr/sbin/cron" # Which cron executable to run (if enabled). cron_dst="YES" # Handle DST transitions intelligently (YES/NO) cron_flags="" # Which options to pass to the cron daemon. lpd_enable="NO" # Run the line printer daemon. lpd_program="/usr/sbin/lpd" # path to lpd, if you want a different one. lpd_flags="" # Flags to lpd (if enabled). nscd_enable="NO" # Run the nsswitch caching daemon. chkprintcap_enable="NO" # Run chkprintcap(8) before running lpd. chkprintcap_flags="-d" # Create missing directories by default. dumpdev="AUTO" # Device to crashdump to (device name, AUTO, or NO). dumpdir="/var/crash" # Directory where crash dumps are to be stored savecore_flags="-m 10" # Used if dumpdev is enabled above, and present. # By default, only the 10 most recent kernel dumps # are saved. crashinfo_enable="YES" # Automatically generate crash dump summary. crashinfo_program="/usr/sbin/crashinfo" # Script to generate crash dump summary. quota_enable="NO" # turn on quotas on startup (or NO). check_quotas="YES" # Check quotas on startup (or NO). quotaon_flags="-a" # Turn quotas on for all file systems (if enabled) quotaoff_flags="-a" # Turn quotas off for all file systems at shutdown quotacheck_flags="-a" # Check all file system quotas (if enabled) accounting_enable="NO" # Turn on process accounting (or NO). ibcs2_enable="NO" # Ibcs2 (SCO) emulation loaded at startup (or NO). ibcs2_loaders="coff" # List of additional Ibcs2 loaders (or NO). # Emulation/compatibility services provided by /etc/rc.d/abi sysvipc_enable="NO" # Load System V IPC primitives at startup (or NO). linux_enable="NO" # Linux binary compatibility loaded at startup (or NO). svr4_enable="NO" # SysVR4 emulation loaded at startup (or NO). clear_tmp_enable="NO" # Clear /tmp at startup. clear_tmp_X="YES" # Clear and recreate X11-related directories in /tmp ldconfig_insecure="NO" # Set to YES to disable ldconfig security checks ldconfig_paths="/usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg" # shared library search paths ldconfig32_paths="/usr/lib32" # 32-bit compatibility shared library search paths ldconfig_paths_aout="/usr/lib/compat/aout /usr/local/lib/aout" # a.out shared library search paths ldconfig_local_dirs="/usr/local/libdata/ldconfig" # Local directories with ldconfig configuration files. ldconfig_local32_dirs="/usr/local/libdata/ldconfig32" # Local directories with 32-bit compatibility ldconfig # configuration files. kern_securelevel_enable="NO" # kernel security level (see security(7)) kern_securelevel="-1" # range: -1..3 ; `-1' is the most insecure # Note that setting securelevel to 0 will result # in the system booting with securelevel set to 1, as # init(8) will raise the level when rc(8) completes. update_motd="YES" # update version info in /etc/motd (or NO) entropy_file="/entropy" # Set to NO to disable caching entropy through reboots. # /var/db/entropy-file is preferred if / is not avail. entropy_dir="/var/db/entropy" # Set to NO to disable caching entropy via cron. entropy_save_sz="2048" # Size of the entropy cache files. entropy_save_num="8" # Number of entropy cache files to save. harvest_interrupt="YES" # Entropy device harvests interrupt randomness harvest_ethernet="YES" # Entropy device harvests ethernet randomness harvest_p_to_p="YES" # Entropy device harvests point-to-point randomness dmesg_enable="YES" # Save dmesg(8) to /var/run/dmesg.boot watchdogd_enable="NO" # Start the software watchdog daemon watchdogd_flags="" # Flags to watchdogd (if enabled) devfs_rulesets="/etc/defaults/devfs.rules /etc/devfs.rules" # Files containing # devfs(8) rules. devfs_system_ruleset="" # The name (NOT number) of a ruleset to apply to /dev devfs_set_rulesets="" # A list of /mount/dev=ruleset_name settings to # apply (must be mounted already, i.e. fstab(5)) devfs_load_rulesets="NO" # Enable to always load the default rulesets performance_cx_lowest="HIGH" # Online CPU idle state performance_cpu_freq="NONE" # Online CPU frequency economy_cx_lowest="HIGH" # Offline CPU idle state economy_cpu_freq="NONE" # Offline CPU frequency virecover_enable="YES" # Perform housekeeping for the vi(1) editor ugidfw_enable="NO" # Load mac_bsdextended(4) rules on boot bsdextended_script="/etc/rc.bsdextended" # Default mac_bsdextended(4) # ruleset file. newsyslog_enable="YES" # Run newsyslog at startup. newsyslog_flags="-CN" # Newsyslog flags to create marked files mixer_enable="YES" # Run the sound mixer. opensm_enable="NO" # Opensm(8) for infiniband devices defaults to off ############################################################## ### Jail Configuration ####################################### ############################################################## jail_enable="NO" # Set to NO to disable starting of any jails jail_parallel_start="NO" # Start jails in the background jail_list="" # Space separated list of names of jails jail_set_hostname_allow="YES" # Allow root user in a jail to change its hostname jail_socket_unixiproute_only="YES" # Route only TCP/IP within a jail jail_sysvipc_allow="NO" # Allow SystemV IPC use from within a jail # # To use rc's built-in jail infrastructure create entries for # each jail, specified in jail_list, with the following variables. # NOTES: # - replace 'example' with the jail's name. # - except rootdir, hostname, ip and the _multi addresses, # all of the following variables may be made global jail variables # if you don't specify a jail name (ie. jail_interface, jail_devfs_ruleset). # #jail_example_rootdir="/usr/jail/default" # Jail's root directory #jail_example_hostname="default.domain.com" # Jail's hostname #jail_example_interface="" # Jail's interface variable to create IP aliases on #jail_example_fib="0" # Routing table for setfib(1) #jail_example_ip="192.0.2.10,2001:db8::17" # Jail's primary IPv4 and IPv6 address #jail_example_ip_multi0="2001:db8::10" # and another IPv6 address #jail_example_exec_start="/bin/sh /etc/rc" # command to execute in jail for starting #jail_example_exec_afterstart0="/bin/sh command" # command to execute after the one for # starting the jail. More than one can be # specified using a trailing number #jail_example_exec_stop="/bin/sh /etc/rc.shutdown" # command to execute in jail for stopping #jail_example_devfs_enable="NO" # mount devfs in the jail #jail_example_devfs_ruleset="ruleset_name" # devfs ruleset to apply to jail - # usually you want "devfsrules_jail". #jail_example_fdescfs_enable="NO" # mount fdescfs in the jail #jail_example_procfs_enable="NO" # mount procfs in jail #jail_example_mount_enable="NO" # mount/umount jail's fs #jail_example_fstab="" # fstab(5) for mount/umount #jail_example_flags="-l -U root" # flags for jail(8) #jail_example_parameters="allow.raw_sockets=1" # extra parameters for this jail ############################################################## ### Define source_rc_confs, the mechanism used by /etc/rc.* ## ### scripts to source rc_conf_files overrides safely. ## ############################################################## if [ -z "${source_rc_confs_defined}" ]; then source_rc_confs_defined=yes source_rc_confs() { local i sourced_files for i in ${rc_conf_files}; do case ${sourced_files} in *:$i:*) ;; *) sourced_files="${sourced_files}:$i:" if [ -r $i ]; then . $i fi ;; esac done } fi Index: user/attilio/vmcontention/etc/rc.d/swap1 =================================================================== --- user/attilio/vmcontention/etc/rc.d/swap1 (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/swap1 (nonexistent) @@ -1,17 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -# PROVIDE: localswap -# REQUIRE: disks -# KEYWORD: nojail shutdown - -. /etc/rc.subr - -name="swap1" -start_cmd='swapon -aq' -stop_cmd=':' - -load_rc_config swap -run_rc_command "$1" Property changes on: user/attilio/vmcontention/etc/rc.d/swap1 ___________________________________________________________________ Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: user/attilio/vmcontention/etc/rc.d/encswap =================================================================== --- user/attilio/vmcontention/etc/rc.d/encswap (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/encswap (nonexistent) @@ -1,57 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -# PROVIDE: disks -# REQUIRE: initrandom -# KEYWORD: nojail - -. /etc/rc.subr - -name="encswap" -start_cmd="encswap_attach" -stop_cmd="encswap_detach" - -encswap_attach() -{ - while read device mountpoint type options rest ; do - case ":${device}:${type}:${options}" in - :#*) - continue - ;; - *.bde:swap:sw) - passphrase=`dd if=/dev/random count=1 2>/dev/null | md5 -q` - device="${device%.bde}" - gbde init "${device}" -P "${passphrase}" || return 1 - gbde attach "${device}" -p "${passphrase}" || return 1 - ;; - *.eli:swap:sw) - device="${device%.eli}" - geli onetime ${geli_swap_flags} "${device}" || return 1 - ;; - esac - done < /etc/fstab -} - -encswap_detach() -{ - while read device mountpoint type options rest ; do - case ":${device}:${type}:${options}" in - :#*) - continue - ;; - *.bde:swap:sw) - device="${device%.bde}" - gbde detach "${device}" - ;; - *.eli:swap:sw) - # Nothing here, because geli swap devices should be - # created with the auto-detach-on-last-close option. - ;; - esac - done < /etc/fstab -} - -load_rc_config $name -run_rc_command "$1" Property changes on: user/attilio/vmcontention/etc/rc.d/encswap ___________________________________________________________________ Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: user/attilio/vmcontention/etc/rc.d/Makefile =================================================================== --- user/attilio/vmcontention/etc/rc.d/Makefile (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/Makefile (revision 252344) @@ -1,191 +1,191 @@ # $FreeBSD$ .include FILES= DAEMON \ FILESYSTEMS \ LOGIN \ NETWORKING \ SERVERS \ abi \ accounting \ addswap \ adjkerntz \ amd \ apm \ apmd \ archdep \ atm1 \ atm2 \ atm3 \ auditd \ auditdistd \ bgfsck \ bluetooth \ bootparams \ bridge \ bsnmpd \ bthidd \ ccd \ cleanvar \ cleartmp \ cron \ ddb \ defaultroute \ devd \ devfs \ dhclient \ dmesg \ dumpon \ - encswap \ faith \ fsck \ ftp-proxy \ ftpd \ gbde \ geli \ geli2 \ gptboot \ gssd \ hastd \ hcsecd \ hostapd \ hostid \ hostid_save \ hostname \ inetd \ initrandom \ ip6addrctl \ ipfilter \ ipfs \ ipfw \ ipmon \ ipnat \ ipsec \ ${_ipxrouted} \ jail \ kadmind \ kerberos \ keyserv \ kfd \ kld \ kldxref \ kpasswdd \ ldconfig \ local \ localpkg \ lockd \ lpd \ mixer \ motd \ mountcritlocal \ mountcritremote \ mountlate \ mdconfig \ mdconfig2 \ mountd \ moused \ mroute6d \ mrouted \ msgs \ named \ natd \ netif \ netoptions \ netwait \ newsyslog \ nfsclient \ nfscbd \ nfsd \ nfsuserd \ nisdomain \ ${_nscd} \ nsswitch \ ntpd \ ntpdate \ ${_opensm} \ othermta \ pf \ pflog \ pfsync \ postrandom \ powerd \ power_profile \ ppp \ pppoed \ pwcheck \ quota \ random \ rarpd \ rctl \ resolv \ rfcomm_pppd_server \ root \ route6d \ routed \ routing \ rpcbind \ rtadvd \ rtsold \ rwho \ savecore \ sdpd \ securelevel \ sendmail \ serial \ sppp \ ${_sshd} \ statd \ static_arp \ static_ndp \ stf \ - swap1 \ + swap \ + swaplate \ syscons \ sysctl \ syslogd \ timed \ tmp \ ${_ubthidhci} \ ugidfw \ ${_utx} \ var \ virecover \ watchdogd \ wpa_supplicant \ ypbind \ yppasswdd \ ypserv \ ypset \ ypupdated \ ypxfrd \ zfs \ zvol .if ${MK_IPX} != "no" _ipxrouted= ipxrouted .endif .if ${MK_OFED} != "no" _opensm= opensm .endif .if ${MK_OPENSSH} != "no" _sshd= sshd .endif .if ${MK_NS_CACHING} != "no" _nscd= nscd .endif .if ${MK_BLUETOOTH} != "no" _ubthidhci= ubthidhci .endif .if ${MK_UTMPX} != "no" _utx= utx .endif FILESDIR= /etc/rc.d FILESMODE= ${BINMODE} .include Index: user/attilio/vmcontention/etc/rc.d/addswap =================================================================== --- user/attilio/vmcontention/etc/rc.d/addswap (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/addswap (revision 252344) @@ -1,69 +1,24 @@ #!/bin/sh # # Add additional swap files # # $FreeBSD$ # # PROVIDE: addswap # REQUIRE: FILESYSTEMS kld # BEFORE: netif # KEYWORD: nojail shutdown . /etc/rc.subr name="addswap" -start_cmd="addswap_start" -stop_cmd="addswap_stop" +start_cmd=":" +stop_cmd=":" +rcvar= -addswap_start() -{ - case ${swapfile} in - [Nn][Oo] | '') - ;; - *) - if [ -w "${swapfile}" ]; then - check_startmsgs && echo "Adding ${swapfile} as additional swap" - - if [ -n "${swapfile_mdunit}" ]; then - mdev="/dev/md${swapfile_mdunit#md}" - mdconfig -a -t vnode -f "${swapfile}" -u ${swapfile_mdunit} - else - mdev="/dev/`mdconfig -a -t vnode -f "${swapfile}"`" - fi - - if [ $? -eq 0 ]; then - swapon ${mdev} - else - echo "error creating swapfile device" - fi - fi - ;; - esac -} - -addswap_stop() -{ - case ${swapfile} in - [Nn][Oo] | '') - ;; - *) - if [ -n "${swapfile_mdunit}" ]; then - mdev="/dev/md${swapfile_mdunit#md}" - else - mdev="/dev/`mdconfig -lv | grep "${swapfile}" | cut -f1`" - swapfile_mdunit=${mdev#md} - fi - if [ -n "${swapfile_mdunit}" ]; then - swapctl -l | grep -q ${mdev} - if [ $? -eq 0 ]; then - echo "Dismounting swapfile ${swapfile}" - swapoff ${mdev} && mdconfig -d -u ${swapfile_mdunit} - fi - fi - ;; - esac -} +set_rcvar_obsolete swapfile +set_rcvar_obsolete geli_swap_flags load_rc_config $name run_rc_command "$1" Index: user/attilio/vmcontention/etc/rc.d/fsck =================================================================== --- user/attilio/vmcontention/etc/rc.d/fsck (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/fsck (revision 252344) @@ -1,78 +1,78 @@ #!/bin/sh # # $FreeBSD$ # # PROVIDE: fsck -# REQUIRE: localswap +# REQUIRE: swap # KEYWORD: nojail . /etc/rc.subr name="fsck" start_cmd="fsck_start" stop_cmd=":" fsck_start() { if [ "$autoboot" = no ]; then echo "Fast boot: skipping disk checks." elif [ ! -r /etc/fstab ]; then echo "Warning! No /etc/fstab: skipping disk checks." elif [ "$autoboot" = yes ]; then # During fsck ignore SIGQUIT trap : 3 check_startmsgs && echo "Starting file system checks:" if checkyesno background_fsck; then fsck -F -p else fsck -p fi case $? in 0) ;; 2) stop_boot ;; 4) echo "Rebooting..." reboot echo "Reboot failed; help!" stop_boot ;; 8) if checkyesno fsck_y_enable; then echo "File system preen failed, trying fsck -y ${fsck_y_flags}" fsck -y ${fsck_y_flags} case $? in 0) ;; *) echo "Automatic file system check failed; help!" stop_boot ;; esac else echo "Automatic file system check failed; help!" stop_boot fi ;; 12) echo "Boot interrupted." stop_boot ;; 130) stop_boot ;; *) echo "Unknown error; help!" stop_boot ;; esac fi } load_rc_config $name run_rc_command "$1" Index: user/attilio/vmcontention/etc/rc.d/mdconfig =================================================================== --- user/attilio/vmcontention/etc/rc.d/mdconfig (revision 252343) +++ user/attilio/vmcontention/etc/rc.d/mdconfig (revision 252344) @@ -1,197 +1,197 @@ #!/bin/sh # # Copyright (c) 2006 The FreeBSD Project # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # # PROVIDE: mdconfig -# REQUIRE: localswap root +# REQUIRE: swap root . /etc/rc.subr name="mdconfig" stop_cmd="mdconfig_stop" start_cmd="mdconfig_start" start_precmd='[ -n "${_mdconfig_list}" ]' required_modules="geom_md:g_md" is_readonly() { local _mp _ret _mp=$1 _ret=`mount | while read _line; do case ${_line} in *" ${_mp} "*read-only*) echo "yes" ;; *) ;; esac; done` if [ -n "${_ret}" ]; then return 0 else return 1 fi } init_variables() { local _i _fs="" _mp="" _dev="/dev/${_md}" eval _config=\$mdconfig_${_md} eval _newfs=\$mdconfig_${_md}_newfs _type=${_config##*-t\ } _type=${_type%%\ *} if [ -z "${_type}" ]; then err 1 "You need to specify \"-t \" in mdconfig_${_md}" fi if [ "${_type}" = "vnode" ]; then _file=${_config##*-f\ } _file=${_file%%\ *} if [ -z "${_file}" ]; then err 2 "You need to specify \"-f \" in mdconfig_${_md} for vnode devices" fi if [ "${_file}" != "${_file%.uzip}" ]; then _dev="/dev/${_md}.uzip" fi for _i in `df ${_file} 2>/dev/null`; do _fs=${_i}; done fi # Debugging help. debug "${_md} config: ${_config}" debug "${_md} type: ${_type}" debug "${_md} dev: ${_dev}" debug "${_md} file: ${_file}" debug "${_md} fs: ${_fs}" debug "${_md} newfs flags: ${_newfs}" } mdconfig_start() { local _md _mp _config _type _dev _file _fs _newfs _fsck_cmd for _md in ${_mdconfig_list}; do init_variables ${_md} # Create md(4) devices of types swap, malloc and vnode if the # file is on the root partition. if [ "${_type}" != "vnode" -o "${_fs}" = "/" ]; then if [ "${_type}" = "vnode" ]; then if is_readonly ${_fs}; then warn "${_fs} is mounted read-only, skipping ${_md}." continue fi if [ "${_file}" != "${_file%.uzip}" ]; then load_kld -m g_uzip geom_uzip || return 3 # sleep a bit to allow creation of /dev/mdX.uzip sleep 2 fi fi if mdconfig -l -u ${_md} >/dev/null 2>&1; then err 3 "${_md} already exists" fi echo "Creating ${_md} device (${_type})." if ! mdconfig -a ${_config} -u ${_md}; then echo "Creating ${_md} device failed, moving on." continue fi # Skip fsck for uzip devices. if [ "${_type}" = "vnode" ]; then if [ "${_file}" != "${_file%.uzip}" ]; then _fsck_cmd=":" elif checkyesno background_fsck; then _fsck_cmd="fsck -F" else _fsck_cmd="fsck" fi if ! eval ${_fsck_cmd} -p ${_dev} >/dev/null; then echo "Fsck failed on ${_dev}, not mounting the filesystem." continue fi else newfs ${_newfs} ${_dev} >/dev/null fi if mount -d ${_dev} 2>&1 >/dev/null; then echo "Mounting ${_dev}." mount ${_dev} fi fi done } mdconfig_stop() { local _md _mp _config _type _dev _file _fs _newfs _i for _md in ${_mdconfig_list}; do init_variables ${_md} if [ "${_type}" != "vnode" -o "${_fs}" = "/" ]; then for _i in `df ${_dev} 2>/dev/null`; do _mp=${_i}; done if [ -z "${_mp}" -o "${_mp}" != "${_mp%%%}" ]; then echo "Device ${_dev} isn't mounted." else echo "Umounting ${_dev}." umount ${_dev} fi if mdconfig -l -u ${_md} >/dev/null 2>&1; then echo "Destroying ${_md}." mdconfig -d -u ${_md} fi fi done } _mdconfig_cmd="$1" if [ $# -gt 0 ]; then shift fi [ -n "$*" ] && _mdconfig_list="$*" load_rc_config $name _mdconfig_unit=0 if [ -z "${_mdconfig_list}" ]; then while :; do eval _mdconfig_config=\$mdconfig_md${_mdconfig_unit} if [ -z "${_mdconfig_config}" ]; then break else _mdconfig_list="${_mdconfig_list}${_mdconfig_list:+ }md${_mdconfig_unit}" _mdconfig_unit=$((${_mdconfig_unit} + 1)) fi done fi run_rc_command "${_mdconfig_cmd}" Index: user/attilio/vmcontention/etc/rc.d/swap =================================================================== --- user/attilio/vmcontention/etc/rc.d/swap (nonexistent) +++ user/attilio/vmcontention/etc/rc.d/swap (revision 252344) @@ -0,0 +1,17 @@ +#!/bin/sh +# +# $FreeBSD$ +# + +# PROVIDE: swap +# REQUIRE: disks +# KEYWORD: nojail shutdown + +. /etc/rc.subr + +name="swap" +start_cmd='/sbin/swapon -aq' +stop_cmd=':' + +load_rc_config $name +run_rc_command "$1" Property changes on: user/attilio/vmcontention/etc/rc.d/swap ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: user/attilio/vmcontention/etc/rc.d/swaplate =================================================================== --- user/attilio/vmcontention/etc/rc.d/swaplate (nonexistent) +++ user/attilio/vmcontention/etc/rc.d/swaplate (revision 252344) @@ -0,0 +1,17 @@ +#!/bin/sh +# +# $FreeBSD$ +# + +# PROVIDE: swaplate +# REQUIRE: mountlate +# KEYWORD: nojail shutdown + +. /etc/rc.subr + +name="swaplate" +start_cmd='/sbin/swapon -aLq' +stop_cmd='/sbin/swapoff -aq' + +load_rc_config swap +run_rc_command "$1" Property changes on: user/attilio/vmcontention/etc/rc.d/swaplate ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: user/attilio/vmcontention/include/paths.h =================================================================== --- user/attilio/vmcontention/include/paths.h (revision 252343) +++ user/attilio/vmcontention/include/paths.h (revision 252344) @@ -1,141 +1,143 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)paths.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _PATHS_H_ #define _PATHS_H_ #include /* Default search path. */ #define _PATH_DEFPATH "/usr/bin:/bin" /* All standard utilities path. */ #define _PATH_STDPATH "/usr/bin:/bin:/usr/sbin:/sbin" /* Locate system binaries. */ #define _PATH_SYSPATH "/sbin:/usr/sbin" #define _PATH_BSHELL "/bin/sh" #define _PATH_CAPABILITY "/etc/capability" #define _PATH_CAPABILITY_DB "/etc/capability.db" #define _PATH_CONSOLE "/dev/console" #define _PATH_CP "/bin/cp" #define _PATH_CSHELL "/bin/csh" #define _PATH_CSMAPPER "/usr/share/i18n/csmapper" #define _PATH_DEFTAPE "/dev/sa0" #define _PATH_DEVNULL "/dev/null" #define _PATH_DEVZERO "/dev/zero" #define _PATH_DRUM "/dev/drum" #define _PATH_ESDB "/usr/share/i18n/esdb" #define _PATH_ETC "/etc" #define _PATH_FTPUSERS "/etc/ftpusers" #define _PATH_FWMEM "/dev/fwmem" +#define _PATH_GBDE "/sbin/gbde" +#define _PATH_GELI "/sbin/geli" #define _PATH_HALT "/sbin/halt" #ifdef COMPAT_32BIT #define _PATH_I18NMODULE "/usr/lib32/i18n" #else #define _PATH_I18NMODULE "/usr/lib/i18n" #endif #define _PATH_IFCONFIG "/sbin/ifconfig" #define _PATH_KMEM "/dev/kmem" #define _PATH_LIBMAP_CONF "/etc/libmap.conf" #define _PATH_LOCALE "/usr/share/locale" #define _PATH_LOGIN "/usr/bin/login" #define _PATH_MAILDIR "/var/mail" #define _PATH_MAN "/usr/share/man" #define _PATH_MDCONFIG "/sbin/mdconfig" #define _PATH_MEM "/dev/mem" #define _PATH_MKSNAP_FFS "/sbin/mksnap_ffs" #define _PATH_MOUNT "/sbin/mount" #define _PATH_NEWFS "/sbin/newfs" #define _PATH_NOLOGIN "/var/run/nologin" #define _PATH_RCP "/bin/rcp" #define _PATH_REBOOT "/sbin/reboot" #define _PATH_RLOGIN "/usr/bin/rlogin" #define _PATH_RM "/bin/rm" #define _PATH_RSH "/usr/bin/rsh" #define _PATH_SENDMAIL "/usr/sbin/sendmail" #define _PATH_SHELLS "/etc/shells" #define _PATH_TTY "/dev/tty" #define _PATH_UNIX "don't use _PATH_UNIX" #define _PATH_UFSSUSPEND "/dev/ufssuspend" #define _PATH_VI "/usr/bin/vi" #define _PATH_WALL "/usr/bin/wall" /* Provide trailing slash, since mostly used for building pathnames. */ #define _PATH_DEV "/dev/" #define _PATH_TMP "/tmp/" #define _PATH_VARDB "/var/db/" #define _PATH_VARRUN "/var/run/" #define _PATH_VARTMP "/var/tmp/" #define _PATH_YP "/var/yp/" #define _PATH_UUCPLOCK "/var/spool/lock/" /* How to get the correct name of the kernel. */ __BEGIN_DECLS const char *getbootfile(void); __END_DECLS #ifdef RESCUE #undef _PATH_DEFPATH #define _PATH_DEFPATH "/rescue:/usr/bin:/bin" #undef _PATH_STDPATH #define _PATH_STDPATH "/rescue:/usr/bin:/bin:/usr/sbin:/sbin" #undef _PATH_SYSPATH #define _PATH_SYSPATH "/rescue:/sbin:/usr/sbin" #undef _PATH_BSHELL #define _PATH_BSHELL "/rescue/sh" #undef _PATH_CP #define _PATH_CP "/rescue/cp" #undef _PATH_CSHELL #define _PATH_CSHELL "/rescue/csh" #undef _PATH_HALT #define _PATH_HALT "/rescue/halt" #undef _PATH_IFCONFIG #define _PATH_IFCONFIG "/rescue/ifconfig" #undef _PATH_MDCONFIG #define _PATH_MDCONFIG "/rescue/mdconfig" #undef _PATH_MOUNT #define _PATH_MOUNT "/rescue/mount" #undef _PATH_NEWFS #define _PATH_NEWFS "/rescue/newfs" #undef _PATH_RCP #define _PATH_RCP "/rescue/rcp" #undef _PATH_REBOOT #define _PATH_REBOOT "/rescue/reboot" #undef _PATH_RM #define _PATH_RM "/rescue/rm" #undef _PATH_VI #define _PATH_VI "/rescue/vi" #undef _PATH_WALL #define _PATH_WALL "/rescue/wall" #endif /* RESCUE */ #endif /* !_PATHS_H_ */ Index: user/attilio/vmcontention/sbin/nvmecontrol/Makefile =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/Makefile (revision 252343) +++ user/attilio/vmcontention/sbin/nvmecontrol/Makefile (revision 252344) @@ -1,6 +1,8 @@ # $FreeBSD$ PROG= nvmecontrol +SRCS= nvmecontrol.c devlist.c firmware.c identify.c logpage.c \ + perftest.c reset.c MAN= nvmecontrol.8 .include Index: user/attilio/vmcontention/sbin/nvmecontrol/devlist.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/devlist.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/devlist.c (revision 252344) @@ -0,0 +1,115 @@ +/*- + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static void +devlist_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, DEVLIST_USAGE); + exit(EX_USAGE); +} + +static inline uint32_t +ns_get_sector_size(struct nvme_namespace_data *nsdata) +{ + + return (1 << nsdata->lbaf[0].lbads); +} + +void +devlist(int argc, char *argv[]) +{ + struct nvme_controller_data cdata; + struct nvme_namespace_data nsdata; + char name[64]; + uint32_t i; + int ch, ctrlr, exit_code, fd, found; + + exit_code = EX_OK; + + while ((ch = getopt(argc, argv, "")) != -1) { + switch ((char)ch) { + default: + devlist_usage(); + } + } + + ctrlr = -1; + found = 0; + + while (1) { + ctrlr++; + sprintf(name, "%s%d", NVME_CTRLR_PREFIX, ctrlr); + + exit_code = open_dev(name, &fd, 0, 0); + + if (exit_code == EX_NOINPUT) + break; + else if (exit_code == EX_NOPERM) { + printf("Could not open /dev/%s, errno = %d (%s)\n", + name, errno, strerror(errno)); + continue; + } + + found++; + read_controller_data(fd, &cdata); + printf("%6s: %s\n", name, cdata.mn); + + for (i = 0; i < cdata.nn; i++) { + sprintf(name, "%s%d%s%d", NVME_CTRLR_PREFIX, ctrlr, + NVME_NS_PREFIX, i+1); + read_namespace_data(fd, i+1, &nsdata); + printf(" %10s (%lldGB)\n", + name, + nsdata.nsze * + (long long)ns_get_sector_size(&nsdata) / + 1024 / 1024 / 1024); + } + + close(fd); + } + + if (found == 0) + printf("No NVMe controllers found.\n"); + + exit(EX_OK); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/devlist.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/firmware.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/firmware.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/firmware.c (revision 252344) @@ -0,0 +1,335 @@ +/*- + * Copyright (c) 2013 EMC Corp. + * All rights reserved. + * + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static int +slot_has_valid_firmware(int fd, int slot) +{ + struct nvme_firmware_page fw; + int has_fw = false; + + read_logpage(fd, NVME_LOG_FIRMWARE_SLOT, + NVME_GLOBAL_NAMESPACE_TAG, &fw, sizeof(fw)); + + if (fw.revision[slot-1] != 0LLU) + has_fw = true; + + return (has_fw); +} + +static void +read_image_file(char *path, void **buf, ssize_t *size) +{ + struct stat sb; + int fd; + + *size = 0; + *buf = NULL; + + if ((fd = open(path, O_RDONLY)) < 0) { + fprintf(stderr, "Unable to open '%s'.\n", path); + exit(EX_IOERR); + } + if (fstat(fd, &sb) < 0) { + fprintf(stderr, "Unable to stat '%s'.\n", path); + close(fd); + exit(EX_IOERR); + } + if ((*buf = malloc(sb.st_size)) == NULL) { + fprintf(stderr, "Unable to malloc %zd bytes.\n", + sb.st_size); + close(fd); + exit(EX_IOERR); + } + if ((*size = read(fd, *buf, sb.st_size)) < 0) { + fprintf(stderr, "Error reading '%s', errno=%d (%s)\n", + path, errno, strerror(errno)); + close(fd); + exit(EX_IOERR); + } + if (*size != sb.st_size) { + fprintf(stderr, "Error reading '%s', " + "read %zd bytes, requested %zd bytes\n", + path, *size, sb.st_size); + close(fd); + exit(EX_IOERR); + } +} + +static void +update_firmware(int fd, uint8_t *payload, uint32_t payload_size) +{ + struct nvme_pt_command pt; + size_t size; + void *chunk; + uint32_t off, resid; + int exit_code = EX_OK; + + off = 0; + resid = payload_size; + + if ((chunk = malloc((size_t)NVME_MAX_XFER_SIZE)) == NULL) { + printf("Unable to malloc %d bytes.\n", NVME_MAX_XFER_SIZE); + exit(EX_IOERR); + } + + while (resid > 0) { + size = (resid >= NVME_MAX_XFER_SIZE) ? + NVME_MAX_XFER_SIZE : resid; + memcpy(chunk, payload + off, size); + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; + pt.cmd.cdw10 = (size / sizeof(uint32_t)) - 1; + pt.cmd.cdw11 = (off / sizeof(uint32_t)); + pt.buf = chunk; + pt.len = size; + pt.is_read = 0; + + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) { + printf("Firmware image download request failed. " + "errno=%d (%s)\n", + errno, strerror(errno)); + exit_code = EX_IOERR; + break; + } + + if (nvme_completion_is_error(&pt.cpl)) { + printf("Passthrough command returned error.\n"); + exit_code = EX_IOERR; + break; + } + + resid -= size; + off += size; + } + + if (exit_code != EX_OK) + exit(exit_code); +} + +static void +activate_firmware(int fd, int slot, int activate_action) +{ + struct nvme_pt_command pt; + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_FIRMWARE_ACTIVATE; + pt.cmd.cdw10 = (activate_action << 3) | slot; + pt.is_read = 0; + + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) { + printf("Firmware activate request failed. errno=%d (%s)\n", + errno, strerror(errno)); + exit(EX_IOERR); + } + + if (nvme_completion_is_error(&pt.cpl)) { + printf("Passthrough command returned error.\n"); + exit(EX_IOERR); + } +} + +static void +firmware_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, FIRMWARE_USAGE); + exit(EX_USAGE); +} + +void +firmware(int argc, char *argv[]) +{ + int fd = -1, slot = 0; + int a_flag, s_flag, f_flag; + char ch, *p, *image = NULL; + char *controller = NULL, prompt[64]; + void *buf = NULL; + ssize_t size; + struct nvme_controller_data cdata; + + a_flag = s_flag = f_flag = false; + + while ((ch = getopt(argc, argv, "af:s:")) != -1) { + switch (ch) { + case 'a': + a_flag = true; + break; + case 's': + slot = strtol(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid slot.\n", + optarg); + firmware_usage(); + } else if (slot == 0) { + fprintf(stderr, + "0 is not a valid slot number. " + "Slot numbers start at 1.\n"); + firmware_usage(); + } else if (slot > 7) { + fprintf(stderr, + "Slot number %s specified which is " + "greater than max allowed slot number of " + "7.\n", optarg); + firmware_usage(); + } + s_flag = true; + break; + case 'f': + image = optarg; + f_flag = true; + break; + } + } + + /* Check that a controller (and not a namespace) was specified. */ + if (optind >= argc || strstr(argv[optind], NVME_NS_PREFIX) != NULL) + firmware_usage(); + + if (!f_flag && !a_flag) { + fprintf(stderr, + "Neither a replace ([-f path_to_firmware]) nor " + "activate ([-a]) firmware image action\n" + "was specified.\n"); + firmware_usage(); + } + + if (!f_flag && a_flag && slot == 0) { + fprintf(stderr, + "Slot number to activate not specified.\n"); + firmware_usage(); + } + + controller = argv[optind]; + open_dev(controller, &fd, 1, 1); + read_controller_data(fd, &cdata); + + if (cdata.oacs.firmware == 0) { + fprintf(stderr, + "Controller does not support firmware " + "activate/download.\n"); + exit(EX_IOERR); + } + + if (f_flag && slot == 1 && cdata.frmw.slot1_ro) { + fprintf(stderr, "Slot %d is marked as read only.\n", slot); + exit(EX_IOERR); + } + + if (slot > cdata.frmw.num_slots) { + fprintf(stderr, + "Slot %d was specified but controller only " + "supports %d firmware slots.\n", + slot, cdata.frmw.num_slots); + exit(EX_IOERR); + } + + if (!slot_has_valid_firmware(fd, slot)) { + fprintf(stderr, + "Slot %d does not contain valid firmware.\n" + "Try 'nvmecontrol logpage -p 3 %s' to get a list " + "of available firmware images.\n", + slot, controller); + exit(EX_IOERR); + } + + if (f_flag && a_flag) + printf("You are about to download and activate " + "firmware image (%s) to controller %s.\n" + "This may damage your controller and/or " + "overwrite an existing firmware image.\n", + image, controller); + else if (a_flag) + printf("You are about to activate a new firmware " + "image on controller %s.\n" + "This may damage your controller.\n", + controller); + else if (f_flag) + printf("You are about to download firmware image " + "(%s) to controller %s.\n" + "This may damage your controller and/or " + "overwrite an existing firmware image.\n", + image, controller); + + printf("Are you sure you want to continue? (yes/no) "); + while (1) { + fgets(prompt, sizeof(prompt), stdin); + if (strncasecmp(prompt, "yes", 3) == 0) + break; + if (strncasecmp(prompt, "no", 2) == 0) + exit(EX_OK); + printf("Please answer \"yes\" or \"no\". "); + } + + if (f_flag) { + read_image_file(image, &buf, &size); + update_firmware(fd, buf, size); + if (a_flag) + activate_firmware(fd, slot, + NVME_AA_REPLACE_ACTIVATE); + else + activate_firmware(fd, slot, + NVME_AA_REPLACE_NO_ACTIVATE); + } else { + activate_firmware(fd, slot, NVME_AA_ACTIVATE); + } + + if (a_flag) { + printf("New firmware image activated and will take " + "effect after next controller reset.\n" + "Controller reset can be initiated via " + "'nvmecontrol reset %s'\n", + controller); + } + + close(fd); + exit(EX_OK); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/firmware.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/identify.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/identify.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/identify.c (revision 252344) @@ -0,0 +1,291 @@ +/*- + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static void +print_controller(struct nvme_controller_data *cdata) +{ + printf("Controller Capabilities/Features\n"); + printf("================================\n"); + printf("Vendor ID: %04x\n", cdata->vid); + printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); + printf("Serial Number: %s\n", cdata->sn); + printf("Model Number: %s\n", cdata->mn); + printf("Firmware Version: %s\n", cdata->fr); + printf("Recommended Arb Burst: %d\n", cdata->rab); + printf("IEEE OUI Identifier: %02x %02x %02x\n", + cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); + printf("Multi-Interface Cap: %02x\n", cdata->mic); + /* TODO: Use CAP.MPSMIN to determine true memory page size. */ + printf("Max Data Transfer Size: "); + if (cdata->mdts == 0) + printf("Unlimited\n"); + else + printf("%d\n", PAGE_SIZE * (1 << cdata->mdts)); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Security Send/Receive: %s\n", + cdata->oacs.security ? "Supported" : "Not Supported"); + printf("Format NVM: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("Firmware Activate/Download: %s\n", + cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl+1); + printf("Async Event Request Limit: %d\n", cdata->aerl+1); + printf("Number of Firmware Slots: "); + if (cdata->oacs.firmware != 0) + printf("%d\n", cdata->frmw.num_slots); + else + printf("N/A\n"); + printf("Firmware Slot 1 Read-Only: "); + if (cdata->oacs.firmware != 0) + printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + else + printf("N/A\n"); + printf("Per-Namespace SMART Log: %s\n", + cdata->lpa.ns_smart ? "Yes" : "No"); + printf("Error Log Page Entries: %d\n", cdata->elpe+1); + printf("Number of Power States: %d\n", cdata->npss+1); + printf("\n"); + + printf("NVM Command Set Attributes\n"); + printf("==========================\n"); + printf("Submission Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->sqes.max); + printf(" Min: %d\n", 1 << cdata->sqes.min); + printf("Completion Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->cqes.max); + printf(" Min: %d\n", 1 << cdata->cqes.min); + printf("Number of Namespaces: %d\n", cdata->nn); + printf("Compare Command: %s\n", + cdata->oncs.compare ? "Supported" : "Not Supported"); + printf("Write Uncorrectable Command: %s\n", + cdata->oncs.write_unc ? "Supported" : "Not Supported"); + printf("Dataset Management Command: %s\n", + cdata->oncs.dsm ? "Supported" : "Not Supported"); + printf("Volatile Write Cache: %s\n", + cdata->vwc.present ? "Present" : "Not Present"); +} + +static void +print_namespace(struct nvme_namespace_data *nsdata) +{ + uint32_t i; + + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + printf("Thin Provisioning: %s\n", + nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + printf("Number of LBA Formats: %d\n", nsdata->nlbaf+1); + printf("Current LBA Format: LBA Format #%d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) { + printf("LBA Format #%d:\n", i); + printf(" LBA Data Size: %d\n", + 1 << nsdata->lbaf[i].lbads); + } +} + +static void +identify_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, IDENTIFY_USAGE); + exit(EX_USAGE); +} + +static void +identify_ctrlr(int argc, char *argv[]) +{ + struct nvme_controller_data cdata; + int ch, fd, hexflag = 0, hexlength; + int verboseflag = 0; + + while ((ch = getopt(argc, argv, "vx")) != -1) { + switch ((char)ch) { + case 'v': + verboseflag = 1; + break; + case 'x': + hexflag = 1; + break; + default: + identify_usage(); + } + } + + /* Check that a controller was specified. */ + if (optind >= argc) + identify_usage(); + + open_dev(argv[optind], &fd, 1, 1); + read_controller_data(fd, &cdata); + close(fd); + + if (hexflag == 1) { + if (verboseflag == 1) + hexlength = sizeof(struct nvme_controller_data); + else + hexlength = offsetof(struct nvme_controller_data, + reserved5); + print_hex(&cdata, hexlength); + exit(EX_OK); + } + + if (verboseflag == 1) { + printf("-v not currently supported without -x.\n"); + identify_usage(); + } + + print_controller(&cdata); + exit(EX_OK); +} + +static void +identify_ns(int argc, char *argv[]) +{ + struct nvme_namespace_data nsdata; + char path[64]; + char *nsloc; + int ch, fd, hexflag = 0, hexlength, nsid; + int verboseflag = 0; + + while ((ch = getopt(argc, argv, "vx")) != -1) { + switch ((char)ch) { + case 'v': + verboseflag = 1; + break; + case 'x': + hexflag = 1; + break; + default: + identify_usage(); + } + } + + /* Check that a namespace was specified. */ + if (optind >= argc) + identify_usage(); + + /* + * Check if the specified device node exists before continuing. + * This is a cleaner check for cases where the correct controller + * is specified, but an invalid namespace on that controller. + */ + open_dev(argv[optind], &fd, 1, 1); + close(fd); + + /* + * Pull the namespace id from the string. +2 skips past the "ns" part + * of the string. Don't search past 10 characters into the string, + * otherwise we know it is malformed. + */ + nsloc = strnstr(argv[optind], NVME_NS_PREFIX, 10); + if (nsloc != NULL) + nsid = strtol(nsloc + 2, NULL, 10); + if (nsloc == NULL || (nsid == 0 && errno != 0)) { + printf("Invalid namespace ID %s.\n", argv[optind]); + exit(EX_IOERR); + } + + /* + * We send IDENTIFY commands to the controller, not the namespace, + * since it is an admin cmd. So the path should only include the + * nvmeX part of the nvmeXnsY string. + */ + snprintf(path, nsloc - argv[optind] + 1, "%s", argv[optind]); + open_dev(path, &fd, 1, 1); + read_namespace_data(fd, nsid, &nsdata); + close(fd); + + if (hexflag == 1) { + if (verboseflag == 1) + hexlength = sizeof(struct nvme_namespace_data); + else + hexlength = offsetof(struct nvme_namespace_data, + reserved6); + print_hex(&nsdata, hexlength); + exit(EX_OK); + } + + if (verboseflag == 1) { + printf("-v not currently supported without -x.\n"); + identify_usage(); + } + + print_namespace(&nsdata); + exit(EX_OK); +} + +void +identify(int argc, char *argv[]) +{ + char *target; + + if (argc < 2) + identify_usage(); + + while (getopt(argc, argv, "vx") != -1) ; + + target = argv[optind]; + + optreset = 1; + optind = 1; + + /* + * If device node contains "ns", we consider it a namespace, + * otherwise, consider it a controller. + */ + if (strstr(target, NVME_NS_PREFIX) == NULL) + identify_ctrlr(argc, argv); + else + identify_ns(argc, argv); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/identify.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/logpage.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/logpage.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/logpage.c (revision 252344) @@ -0,0 +1,388 @@ +/*- + * Copyright (c) 2013 EMC Corp. + * All rights reserved. + * + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +#define DEFAULT_SIZE (4096) +#define MAX_FW_SLOTS (7) + +typedef void (*print_fn_t)(void *buf, uint32_t size); + +static void * +get_log_buffer(size_t size) +{ + void *buf; + + if ((buf = malloc(size)) == NULL) { + fprintf(stderr, "Unable to malloc %zd bytes\n", size); + exit(EX_IOERR); + } + memset(buf, 0, size); + return (buf); +} + +void +read_logpage(int fd, uint8_t log_page, int nsid, void *payload, + uint32_t payload_size) +{ + struct nvme_pt_command pt; + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_GET_LOG_PAGE; + pt.cmd.nsid = nsid; + pt.cmd.cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16; + pt.cmd.cdw10 |= log_page; + pt.buf = payload; + pt.len = payload_size; + pt.is_read = 1; + + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) { + printf("Get log page request failed. errno=%d (%s)\n", + errno, strerror(errno)); + exit(EX_IOERR); + } + + if (nvme_completion_is_error(&pt.cpl)) { + printf("Passthrough command returned error.\n"); + exit(EX_IOERR); + } +} + +static void +print_log_error(void *buf, uint32_t size) +{ + int i, nentries; + struct nvme_error_information_entry *entry = buf; + struct nvme_status *status; + + printf("Error Information Log\n"); + printf("=====================\n"); + + if (entry->error_count == 0) { + printf("No error entries found\n"); + return; + } + + nentries = size/sizeof(struct nvme_error_information_entry); + for (i = 0; i < nentries; i++, entry++) { + if (entry->error_count == 0) + break; + + status = &entry->status; + printf("Entry %02d\n", i + 1); + printf("=========\n"); + printf(" Error count: %ju\n", entry->error_count); + printf(" Submission queue ID: %u\n", entry->sqid); + printf(" Command ID: %u\n", entry->cid); + /* TODO: Export nvme_status_string structures from kernel? */ + printf(" Status:\n"); + printf(" Phase tag: %d\n", status->p); + printf(" Status code: %d\n", status->sc); + printf(" Status code type: %d\n", status->sct); + printf(" More: %d\n", status->m); + printf(" DNR: %d\n", status->dnr); + printf(" Error location: %u\n", entry->error_location); + printf(" LBA: %ju\n", entry->lba); + printf(" Namespace ID: %u\n", entry->nsid); + printf(" Vendor specific info: %u\n", entry->vendor_specific); + } +} + +static void +print_log_health(void *buf, uint32_t size __unused) +{ + struct nvme_health_information_page *health = buf; + + printf("SMART/Health Information Log\n"); + printf("============================\n"); + + printf("Critical Warning State: 0x%02x\n", + health->critical_warning.raw); + printf(" Available spare: %d\n", + health->critical_warning.bits.available_spare); + printf(" Temperature: %d\n", + health->critical_warning.bits.temperature); + printf(" Device reliability: %d\n", + health->critical_warning.bits.device_reliability); + printf(" Read only: %d\n", + health->critical_warning.bits.read_only); + printf(" Volatile memory backup: %d\n", + health->critical_warning.bits.volatile_memory_backup); + printf("Temperature: %u K, %2.2f C, %3.2f F\n", + health->temperature, + (float)health->temperature - (float)273.15, + ((float)health->temperature * (float)9/5) - (float)459.67); + printf("Available spare: %u\n", + health->available_spare); + printf("Available spare threshold: %u\n", + health->available_spare_threshold); + printf("Percentage used: %u\n", + health->percentage_used); + + /* + * TODO: These are pretty ugly in hex. Is there a library that + * will convert 128-bit unsigned values to decimal? + */ + printf("Data units (512 byte) read: 0x%016jx%016jx\n", + health->data_units_read[1], + health->data_units_read[0]); + printf("Data units (512 byte) written: 0x%016jx%016jx\n", + health->data_units_written[1], + health->data_units_written[0]); + printf("Host read commands: 0x%016jx%016jx\n", + health->host_read_commands[1], + health->host_read_commands[0]); + printf("Host write commands: 0x%016jx%016jx\n", + health->host_write_commands[1], + health->host_write_commands[0]); + printf("Controller busy time (minutes): 0x%016jx%016jx\n", + health->controller_busy_time[1], + health->controller_busy_time[0]); + printf("Power cycles: 0x%016jx%016jx\n", + health->power_cycles[1], + health->power_cycles[0]); + printf("Power on hours: 0x%016jx%016jx\n", + health->power_on_hours[1], + health->power_on_hours[0]); + printf("Unsafe shutdowns: 0x%016jx%016jx\n", + health->unsafe_shutdowns[1], + health->unsafe_shutdowns[0]); + printf("Media errors: 0x%016jx%016jx\n", + health->media_errors[1], + health->media_errors[0]); + printf("No. error info log entries: 0x%016jx%016jx\n", + health->num_error_info_log_entries[1], + health->num_error_info_log_entries[0]); +} + +static void +print_log_firmware(void *buf, uint32_t size __unused) +{ + int i; + const char *status; + struct nvme_firmware_page *fw = buf; + + printf("Firmware Slot Log\n"); + printf("=================\n"); + + for (i = 0; i < MAX_FW_SLOTS; i++) { + printf("Slot %d: ", i + 1); + if (fw->afi.slot == i + 1) + status = " Active"; + else + status = "Inactive"; + + if (fw->revision[i] == 0LLU) + printf("Empty\n"); + else + if (isprint(*(char *)&fw->revision[i])) + printf("[%s] %.8s\n", status, + (char *)&fw->revision[i]); + else + printf("[%s] %016jx\n", status, + fw->revision[i]); + } +} + +static struct logpage_function { + uint8_t log_page; + print_fn_t fn; +} logfuncs[] = { + {NVME_LOG_ERROR, print_log_error }, + {NVME_LOG_HEALTH_INFORMATION, print_log_health }, + {NVME_LOG_FIRMWARE_SLOT, print_log_firmware }, + {0, NULL }, +}; + +static void +logpage_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, LOGPAGE_USAGE); + exit(EX_USAGE); +} + +void +logpage(int argc, char *argv[]) +{ + int fd, nsid, len; + int log_page = 0, pageflag = false; + int hexflag = false; + int allow_ns = false; + char ch, *p, *nsloc = NULL; + char *cname = NULL; + size_t size; + void *buf; + struct logpage_function *f; + struct nvme_controller_data cdata; + print_fn_t print_fn; + + while ((ch = getopt(argc, argv, "p:x")) != -1) { + switch (ch) { + case 'p': + /* TODO: Add human-readable ASCII page IDs */ + log_page = strtol(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid log page id.\n", + optarg); + logpage_usage(); + /* TODO: Define valid log page id ranges in nvme.h? */ + } else if (log_page == 0 || + (log_page >= 0x04 && log_page <= 0x7F) || + (log_page >= 0x80 && log_page <= 0xBF)) { + fprintf(stderr, + "\"%s\" not valid log page id.\n", + optarg); + logpage_usage(); + } + pageflag = true; + break; + case 'x': + hexflag = true; + break; + } + } + + if (!pageflag) { + printf("Missing page_id (-p).\n"); + logpage_usage(); + } + + /* Check that a controller and/or namespace was specified. */ + if (optind >= argc) + logpage_usage(); + + /* + * The log page attribtues indicate whether or not the controller + * supports the SMART/Health information log page on a per + * namespace basis. + */ + cname = malloc(strlen(NVME_CTRLR_PREFIX) + 2); + len = strlen(NVME_CTRLR_PREFIX) + 1; + cname = strncpy(cname, argv[optind], len); + open_dev(cname, &fd, 1, 1); + read_controller_data(fd, &cdata); + + if (log_page == NVME_LOG_HEALTH_INFORMATION && cdata.lpa.ns_smart != 0) + allow_ns = true; + + /* If a namespace id was specified, validate it's use */ + if (strstr(argv[optind], NVME_NS_PREFIX) != NULL) { + if (!allow_ns) { + if (log_page != NVME_LOG_HEALTH_INFORMATION) { + fprintf(stderr, + "Namespace ID not valid for log page %d.\n", + log_page); + } else if (cdata.lpa.ns_smart == 0) { + fprintf(stderr, + "Controller does not support per " + "namespace SMART/Health information.\n"); + } + close(fd); + exit(EX_IOERR); + } + nsloc = strnstr(argv[optind], NVME_NS_PREFIX, 10); + if (nsloc != NULL) + nsid = strtol(nsloc + 2, NULL, 10); + if (nsloc == NULL || (nsid == 0 && errno != 0)) { + fprintf(stderr, + "Invalid namespace ID %s.\n", + argv[optind]); + close(fd); + exit(EX_IOERR); + } + + /* + * User is asking for per namespace log page information + * so close the controller and open up the namespace. + */ + close(fd); + open_dev(argv[optind], &fd, 1, 1); + } else + nsid = NVME_GLOBAL_NAMESPACE_TAG; + + print_fn = print_hex; + if (!hexflag) { + /* + * See if there is a pretty print function for the + * specified log page. If one isn't found, we + * just revert to the default (print_hex). + */ + f = logfuncs; + while (f->log_page > 0) { + if (log_page == f->log_page) { + print_fn = f->fn; + break; + } + f++; + } + } + + /* Read the log page */ + switch (log_page) { + case NVME_LOG_ERROR: + size = sizeof(struct nvme_error_information_entry); + size *= (cdata.elpe + 1); + break; + case NVME_LOG_HEALTH_INFORMATION: + size = sizeof(struct nvme_health_information_page); + break; + case NVME_LOG_FIRMWARE_SLOT: + size = sizeof(struct nvme_firmware_page); + break; + default: + size = DEFAULT_SIZE; + break; + } + + buf = get_log_buffer(size); + read_logpage(fd, log_page, nsid, buf, size); + print_fn(buf, size); + + close(fd); + exit(EX_OK); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/logpage.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.8 =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.8 (revision 252343) +++ user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.8 (revision 252344) @@ -1,94 +1,130 @@ .\" .\" Copyright (c) 2012 Intel Corporation .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" nvmecontrol man page. .\" .\" Author: Jim Harris .\" .\" $FreeBSD$ .\" .Dd March 26, 2013 .Dt NVMECONTROL 8 .Os .Sh NAME .Nm nvmecontrol .Nd NVM Express control utility .Sh SYNOPSIS .Nm .Ic devlist .Nm .Ic identify .Op Fl v .Op Fl x .Aq device id .Nm .Ic perftest .Aq Fl n Ar num_threads .Aq Fl o Ar read|write .Op Fl p .Aq Fl s Ar size_in_bytes .Aq Fl t Ar time_in_sec .Aq namespace id .Nm .Ic reset .Aq controller id +.Nm +.Ic logpage +.Aq Fl p Ar page_id +.Op Fl x +.Aq device id +.Aq namespace id +.Nm +.Ic firmware +.Op Fl s Ar slot +.Op Fl f Ar path_to_firmware +.Op Fl a +.Aq device id .Sh DESCRIPTION NVM Express (NVMe) is a storage protocol standard, for SSDs and other high-speed storage devices over PCI Express. .Sh EXAMPLES .Dl nvmecontrol devlist .Pp Display a list of NVMe controllers and namespaces along with their device nodes. .Pp .Dl nvmecontrol identify nvme0 .Pp Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data. .Pp .Dl nvmecontrol identify -x -v nvme0ns1 .Pp Display a hexadecimal dump of the nvme0 IDENTIFY_NAMESPACE data for namespace 1. .Pp .Dl nvmecontrol perftest -n 32 -o read -s 512 -t 30 nvme0ns1 .Pp Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds. Each thread will issue a single 512 byte read command. Results are printed to stdout when 30 seconds expires. .Pp .Dl nvmecontrol reset nvme0 .Pp Perform a controller-level reset of the nvme0 controller. +.Pp +.Dl nvmecontrol logpage -p 1 nvme0 +.Pp +Display a human-readable summary of the nvme0 controller's Error Information Log. +Log pages defined by the NVMe specification include Error Information Log (ID=1), +SMART/Health Information Log (ID=2), and Firmware Slot Log (ID=3). +.Pp +.Dl nvmecontrol logpage -p 1 -x nvme0 +.Pp +Display a hexidecimal dump of the nvme0 controller's Error Information Log. +.Pp +.Dl nvmecontrol firmware -s 2 -f /tmp/nvme_firmware nvme0 +.Pp +Download the firmware image contained in "/tmp/nvme_firmware" to slot 2 of the +nvme0 controller, but do not activate the image. +.Pp +.Dl nvmecontrol firmware -s 4 -a nvme0 +.Pp +Activate the firmware in slot 4 of the nvme0 controller on the next reset. +.Pp +.Dl nvmecontrol firmware -s 7 -f /tmp/nvme_firmware -a nvme0 +.Pp +Download the firmware image contained in "/tmp/nvme_firmware" to slot 7 of the +nvme0 controller and activate it on the next reset. .Sh AUTHORS .An -nosplit .Nm was developed by Intel and originally written by .An Jim Harris Aq jimharris@FreeBSD.org . .Pp This man page was written by .An Jim Harris Aq jimharris@FreeBSD.org . Index: user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.c (revision 252343) +++ user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.c (revision 252344) @@ -1,707 +1,233 @@ /*- - * Copyright (C) 2012 Intel Corporation + * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include -#include - #include #include #include #include #include #include #include #include #include #include -#define DEVLIST_USAGE \ -" nvmecontrol devlist\n" +#include "nvmecontrol.h" -#define IDENTIFY_USAGE \ -" nvmecontrol identify \n" +typedef void (*nvme_fn_t)(int argc, char *argv[]); -#define PERFTEST_USAGE \ -" nvmecontrol perftest <-n num_threads> <-o read|write>\n" \ -" <-s size_in_bytes> <-t time_in_seconds>\n" \ -" <-i intr|wait> [-f refthread] [-p]\n" \ -" \n" +static struct nvme_function { + const char *name; + nvme_fn_t fn; + const char *usage; +} funcs[] = { + {"devlist", devlist, DEVLIST_USAGE}, + {"identify", identify, IDENTIFY_USAGE}, + {"perftest", perftest, PERFTEST_USAGE}, + {"reset", reset, RESET_USAGE}, + {"logpage", logpage, LOGPAGE_USAGE}, + {"firmware", firmware, FIRMWARE_USAGE}, + {NULL, NULL, NULL}, +}; -#define RESET_USAGE \ -" nvmecontrol reset \n" - -static void perftest_usage(void); - static void usage(void) { + struct nvme_function *f; + + f = funcs; fprintf(stderr, "usage:\n"); - fprintf(stderr, DEVLIST_USAGE); - fprintf(stderr, IDENTIFY_USAGE); - fprintf(stderr, RESET_USAGE); - fprintf(stderr, PERFTEST_USAGE); + while (f->name != NULL) { + fprintf(stderr, "%s", f->usage); + f++; + } exit(EX_USAGE); } static void -print_controller_hex(struct nvme_controller_data *cdata, uint32_t length) +print_bytes(void *data, uint32_t length) { - uint32_t *p; uint32_t i, j; + uint8_t *p, *end; - p = (uint32_t *)cdata; - length /= sizeof(uint32_t); + end = (uint8_t *)data + length; - for (i = 0; i < length; i+=8) { - printf("%03x: ", i*4); - for (j = 0; j < 8; j++) - printf("%08x ", p[i+j]); + for (i = 0; i < length; i++) { + p = (uint8_t *)data + (i*16); + printf("%03x: ", i*16); + for (j = 0; j < 16 && p < end; j++) + printf("%02x ", *p++); + if (p >= end) + break; printf("\n"); } - printf("\n"); } static void -print_controller(struct nvme_controller_data *cdata) +print_dwords(void *data, uint32_t length) { - printf("Controller Capabilities/Features\n"); - printf("================================\n"); - printf("Vendor ID: %04x\n", cdata->vid); - printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); - printf("Serial Number: %s\n", cdata->sn); - printf("Model Number: %s\n", cdata->mn); - printf("Firmware Version: %s\n", cdata->fr); - printf("Recommended Arb Burst: %d\n", cdata->rab); - printf("IEEE OUI Identifier: %02x %02x %02x\n", - cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); - printf("Multi-Interface Cap: %02x\n", cdata->mic); - /* TODO: Use CAP.MPSMIN to determine true memory page size. */ - printf("Max Data Transfer Size: "); - if (cdata->mdts == 0) - printf("Unlimited\n"); - else - printf("%d\n", PAGE_SIZE * (1 << cdata->mdts)); - printf("\n"); - - printf("Admin Command Set Attributes\n"); - printf("============================\n"); - printf("Security Send/Receive: %s\n", - cdata->oacs.security ? "Supported" : "Not Supported"); - printf("Format NVM: %s\n", - cdata->oacs.format ? "Supported" : "Not Supported"); - printf("Firmware Activate/Download: %s\n", - cdata->oacs.firmware ? "Supported" : "Not Supported"); - printf("Abort Command Limit: %d\n", cdata->acl+1); - printf("Async Event Request Limit: %d\n", cdata->aerl+1); - printf("Number of Firmware Slots: "); - if (cdata->oacs.firmware != 0) - printf("%d\n", cdata->frmw.num_slots); - else - printf("N/A\n"); - printf("Firmware Slot 1 Read-Only: "); - if (cdata->oacs.firmware != 0) - printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); - else - printf("N/A\n"); - printf("Per-Namespace SMART Log: %s\n", - cdata->lpa.ns_smart ? "Yes" : "No"); - printf("Error Log Page Entries: %d\n", cdata->elpe+1); - printf("Number of Power States: %d\n", cdata->npss+1); - printf("\n"); - - printf("NVM Command Set Attributes\n"); - printf("==========================\n"); - printf("Submission Queue Entry Size\n"); - printf(" Max: %d\n", 1 << cdata->sqes.max); - printf(" Min: %d\n", 1 << cdata->sqes.min); - printf("Completion Queue Entry Size\n"); - printf(" Max: %d\n", 1 << cdata->cqes.max); - printf(" Min: %d\n", 1 << cdata->cqes.min); - printf("Number of Namespaces: %d\n", cdata->nn); - printf("Compare Command: %s\n", - cdata->oncs.compare ? "Supported" : "Not Supported"); - printf("Write Uncorrectable Command: %s\n", - cdata->oncs.write_unc ? "Supported" : "Not Supported"); - printf("Dataset Management Command: %s\n", - cdata->oncs.dsm ? "Supported" : "Not Supported"); - printf("Volatile Write Cache: %s\n", - cdata->vwc.present ? "Present" : "Not Present"); -} - -static void -print_namespace_hex(struct nvme_namespace_data *nsdata, uint32_t length) -{ uint32_t *p; uint32_t i, j; - p = (uint32_t *)nsdata; + p = (uint32_t *)data; length /= sizeof(uint32_t); for (i = 0; i < length; i+=8) { printf("%03x: ", i*4); for (j = 0; j < 8; j++) printf("%08x ", p[i+j]); printf("\n"); } printf("\n"); } -static void -print_namespace(struct nvme_namespace_data *nsdata) +void +print_hex(void *data, uint32_t length) { - uint32_t i; - - printf("Size (in LBAs): %lld (%lldM)\n", - (long long)nsdata->nsze, - (long long)nsdata->nsze / 1024 / 1024); - printf("Capacity (in LBAs): %lld (%lldM)\n", - (long long)nsdata->ncap, - (long long)nsdata->ncap / 1024 / 1024); - printf("Utilization (in LBAs): %lld (%lldM)\n", - (long long)nsdata->nuse, - (long long)nsdata->nuse / 1024 / 1024); - printf("Thin Provisioning: %s\n", - nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); - printf("Number of LBA Formats: %d\n", nsdata->nlbaf+1); - printf("Current LBA Format: LBA Format #%d\n", - nsdata->flbas.format); - for (i = 0; i <= nsdata->nlbaf; i++) { - printf("LBA Format #%d:\n", i); - printf(" LBA Data Size: %d\n", - 1 << nsdata->lbaf[i].lbads); - } + if (length >= sizeof(uint32_t) || length % sizeof(uint32_t) == 0) + print_dwords(data, length); + else + print_bytes(data, length); } -static uint32_t -ns_get_sector_size(struct nvme_namespace_data *nsdata) -{ - - return (1 << nsdata->lbaf[0].lbads); -} - -static void +void read_controller_data(int fd, struct nvme_controller_data *cdata) { struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.cdw10 = 1; pt.buf = cdata; pt.len = sizeof(*cdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) { printf("Identify request failed. errno=%d (%s)\n", errno, strerror(errno)); exit(EX_IOERR); } if (nvme_completion_is_error(&pt.cpl)) { printf("Passthrough command returned error.\n"); exit(EX_IOERR); } } -static void +void read_namespace_data(int fd, int nsid, struct nvme_namespace_data *nsdata) { struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = nsid; pt.buf = nsdata; pt.len = sizeof(*nsdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) { printf("Identify request failed. errno=%d (%s)\n", errno, strerror(errno)); exit(EX_IOERR); } if (nvme_completion_is_error(&pt.cpl)) { printf("Passthrough command returned error.\n"); exit(EX_IOERR); } } -static void -devlist(int argc, char *argv[]) +int +open_dev(const char *str, int *fd, int show_error, int exit_on_error) { - struct nvme_controller_data cdata; - struct nvme_namespace_data nsdata; - struct stat devstat; - char name[64], path[64]; - uint32_t i; - int ch, ctrlr, exit_code, fd, found; + struct stat devstat; + char full_path[64]; - exit_code = EX_OK; - - while ((ch = getopt(argc, argv, "")) != -1) { - switch ((char)ch) { - default: - usage(); - } + if (!strnstr(str, NVME_CTRLR_PREFIX, strlen(NVME_CTRLR_PREFIX))) { + if (show_error) + fprintf(stderr, + "Controller/namespace IDs must begin with '%s'.\n", + NVME_CTRLR_PREFIX); + if (exit_on_error) + exit(EX_USAGE); + else + return (EX_USAGE); } - ctrlr = -1; - found = 0; - - while (1) { - ctrlr++; - sprintf(name, "nvme%d", ctrlr); - sprintf(path, "/dev/%s", name); - - if (stat(path, &devstat) != 0) - break; - - found++; - - fd = open(path, O_RDWR); - if (fd < 0) { - printf("Could not open %s. errno=%d (%s)\n", path, - errno, strerror(errno)); - exit_code = EX_NOPERM; - continue; - } - - read_controller_data(fd, &cdata); - printf("%6s: %s\n", name, cdata.mn); - - for (i = 0; i < cdata.nn; i++) { - sprintf(name, "nvme%dns%d", ctrlr, i+1); - read_namespace_data(fd, i+1, &nsdata); - printf(" %10s (%lldGB)\n", - name, - nsdata.nsze * - (long long)ns_get_sector_size(&nsdata) / - 1024 / 1024 / 1024); - } - } - - if (found == 0) - printf("No NVMe controllers found.\n"); - - exit(exit_code); -} - -static void -identify_ctrlr(int argc, char *argv[]) -{ - struct nvme_controller_data cdata; - struct stat devstat; - char path[64]; - int ch, fd, hexflag = 0, hexlength; - int verboseflag = 0; - - while ((ch = getopt(argc, argv, "vx")) != -1) { - switch ((char)ch) { - case 'v': - verboseflag = 1; - break; - case 'x': - hexflag = 1; - break; - default: - usage(); - } - } - - sprintf(path, "/dev/%s", argv[optind]); - - if (stat(path, &devstat) < 0) { - printf("Invalid device node %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_IOERR); - } - - fd = open(path, O_RDWR); - if (fd < 0) { - printf("Could not open %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_NOPERM); - } - - read_controller_data(fd, &cdata); - - if (hexflag == 1) { - if (verboseflag == 1) - hexlength = sizeof(struct nvme_controller_data); + snprintf(full_path, sizeof(full_path), "/dev/%s", str); + if (stat(full_path, &devstat) != 0) { + if (show_error) + fprintf(stderr, "Could not stat %s. errno=%d (%s)\n", + full_path, errno, strerror(errno)); + if (exit_on_error) + exit(EX_NOINPUT); else - hexlength = offsetof(struct nvme_controller_data, - reserved5); - print_controller_hex(&cdata, hexlength); - exit(EX_OK); + return (EX_NOINPUT); } - if (verboseflag == 1) { - printf("-v not currently supported without -x.\n"); - usage(); - } - - print_controller(&cdata); - exit(EX_OK); -} - -static void -identify_ns(int argc, char *argv[]) -{ - struct nvme_namespace_data nsdata; - struct stat devstat; - char path[64]; - char *nsloc; - int ch, fd, hexflag = 0, hexlength, nsid; - int verboseflag = 0; - - while ((ch = getopt(argc, argv, "vx")) != -1) { - switch ((char)ch) { - case 'v': - verboseflag = 1; - break; - case 'x': - hexflag = 1; - break; - default: - usage(); - } - } - - /* - * Check if the specified device node exists before continuing. - * This is a cleaner check for cases where the correct controller - * is specified, but an invalid namespace on that controller. - */ - sprintf(path, "/dev/%s", argv[optind]); - if (stat(path, &devstat) < 0) { - printf("Invalid device node %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_IOERR); - } - - nsloc = strstr(argv[optind], "ns"); - if (nsloc == NULL) { - printf("Invalid namepsace %s.\n", argv[optind]); - exit(EX_IOERR); - } - - /* - * Pull the namespace id from the string. +2 skips past the "ns" part - * of the string. - */ - nsid = strtol(nsloc + 2, NULL, 10); - if (nsid == 0 && errno != 0) { - printf("Invalid namespace ID %s.\n", argv[optind]); - exit(EX_IOERR); - } - - /* - * We send IDENTIFY commands to the controller, not the namespace, - * since it is an admin cmd. So the path should only include the - * nvmeX part of the nvmeXnsY string. - */ - sprintf(path, "/dev/"); - strncat(path, argv[optind], nsloc - argv[optind]); - if (stat(path, &devstat) < 0) { - printf("Invalid device node %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_IOERR); - } - - fd = open(path, O_RDWR); - if (fd < 0) { - printf("Could not open %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_NOPERM); - } - - read_namespace_data(fd, nsid, &nsdata); - - if (hexflag == 1) { - if (verboseflag == 1) - hexlength = sizeof(struct nvme_namespace_data); + *fd = open(full_path, O_RDWR); + if (*fd < 0) { + if (show_error) + fprintf(stderr, "Could not open %s. errno=%d (%s)\n", + full_path, errno, strerror(errno)); + if (exit_on_error) + exit(EX_NOPERM); else - hexlength = offsetof(struct nvme_namespace_data, - reserved6); - print_namespace_hex(&nsdata, hexlength); - exit(EX_OK); + return (EX_NOPERM); } - if (verboseflag == 1) { - printf("-v not currently supported without -x.\n"); - usage(); - } - - print_namespace(&nsdata); - exit(EX_OK); + return (EX_OK); } -static void -identify(int argc, char *argv[]) -{ - char *target; - - if (argc < 2) - usage(); - - while (getopt(argc, argv, "vx") != -1) ; - - target = argv[optind]; - - /* Specified device node must have "nvme" in it. */ - if (strstr(argv[optind], "nvme") == NULL) { - printf("Invalid device node '%s'.\n", argv[optind]); - exit(EX_IOERR); - } - - optreset = 1; - optind = 1; - - /* - * If device node contains "ns", we consider it a namespace, - * otherwise, consider it a controller. - */ - if (strstr(target, "ns") == NULL) - identify_ctrlr(argc, argv); - else - identify_ns(argc, argv); -} - -static void -print_perftest(struct nvme_io_test *io_test, bool perthread) -{ - uint32_t i, io_completed = 0, iops, mbps; - - for (i = 0; i < io_test->num_threads; i++) - io_completed += io_test->io_completed[i]; - - iops = io_completed/io_test->time; - mbps = iops * io_test->size / (1024*1024); - - printf("Threads: %2d Size: %6d %5s Time: %3d IO/s: %7d MB/s: %4d\n", - io_test->num_threads, io_test->size, - io_test->opc == NVME_OPC_READ ? "READ" : "WRITE", - io_test->time, iops, mbps); - - if (perthread) - for (i = 0; i < io_test->num_threads; i++) - printf("\t%3d: %8d IO/s\n", i, - io_test->io_completed[i]/io_test->time); - - exit(1); -} - -static void -perftest_usage(void) -{ - fprintf(stderr, "usage:\n"); - fprintf(stderr, PERFTEST_USAGE); - exit(EX_USAGE); -} - -static void -perftest(int argc, char *argv[]) -{ - struct nvme_io_test io_test; - int fd; - char ch; - char *p; - const char *name; - char path[64]; - u_long ioctl_cmd = NVME_IO_TEST; - bool nflag, oflag, sflag, tflag; - int perthread = 0; - - nflag = oflag = sflag = tflag = false; - name = NULL; - - memset(&io_test, 0, sizeof(io_test)); - - while ((ch = getopt(argc, argv, "f:i:n:o:ps:t:")) != -1) { - switch (ch) { - case 'f': - if (!strcmp(optarg, "refthread")) - io_test.flags |= NVME_TEST_FLAG_REFTHREAD; - break; - case 'i': - if (!strcmp(optarg, "bio") || - !strcmp(optarg, "wait")) - ioctl_cmd = NVME_BIO_TEST; - else if (!strcmp(optarg, "io") || - !strcmp(optarg, "intr")) - ioctl_cmd = NVME_IO_TEST; - break; - case 'n': - nflag = true; - io_test.num_threads = strtoul(optarg, &p, 0); - if (p != NULL && *p != '\0') { - fprintf(stderr, - "\"%s\" not valid number of threads.\n", - optarg); - perftest_usage(); - } else if (io_test.num_threads == 0 || - io_test.num_threads > 128) { - fprintf(stderr, - "\"%s\" not valid number of threads.\n", - optarg); - perftest_usage(); - } - break; - case 'o': - oflag = true; - if (!strcmp(optarg, "read") || !strcmp(optarg, "READ")) - io_test.opc = NVME_OPC_READ; - else if (!strcmp(optarg, "write") || - !strcmp(optarg, "WRITE")) - io_test.opc = NVME_OPC_WRITE; - else { - fprintf(stderr, "\"%s\" not valid opcode.\n", - optarg); - perftest_usage(); - } - break; - case 'p': - perthread = 1; - break; - case 's': - sflag = true; - io_test.size = strtoul(optarg, &p, 0); - if (p == NULL || *p == '\0' || toupper(*p) == 'B') { - // do nothing - } else if (toupper(*p) == 'K') { - io_test.size *= 1024; - } else if (toupper(*p) == 'M') { - io_test.size *= 1024 * 1024; - } else { - fprintf(stderr, "\"%s\" not valid size.\n", - optarg); - perftest_usage(); - } - break; - case 't': - tflag = true; - io_test.time = strtoul(optarg, &p, 0); - if (p != NULL && *p != '\0') { - fprintf(stderr, - "\"%s\" not valid time duration.\n", - optarg); - perftest_usage(); - } - break; - } - } - - name = argv[optind]; - - if (!nflag || !oflag || !sflag || !tflag || name == NULL) - perftest_usage(); - - sprintf(path, "/dev/%s", name); - - fd = open(path, O_RDWR); - if (fd < 0) { - fprintf(stderr, "%s not valid device. errno=%d (%s)\n", path, - errno, strerror(errno)); - perftest_usage(); - } - - if (ioctl(fd, ioctl_cmd, &io_test) < 0) { - fprintf(stderr, "NVME_IO_TEST failed. errno=%d (%s)\n", errno, - strerror(errno)); - exit(EX_IOERR); - } - - print_perftest(&io_test, perthread); - exit(EX_OK); -} - -static void -reset_ctrlr(int argc, char *argv[]) -{ - struct stat devstat; - char path[64]; - int ch, fd; - - while ((ch = getopt(argc, argv, "")) != -1) { - switch ((char)ch) { - default: - usage(); - } - } - - sprintf(path, "/dev/%s", argv[optind]); - - if (stat(path, &devstat) < 0) { - printf("Invalid device node %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_IOERR); - } - - fd = open(path, O_RDWR); - if (fd < 0) { - printf("Could not open %s. errno=%d (%s)\n", path, errno, - strerror(errno)); - exit(EX_NOPERM); - } - - if (ioctl(fd, NVME_RESET_CONTROLLER) < 0) { - printf("Reset request to %s failed. errno=%d (%s)\n", path, - errno, strerror(errno)); - exit(EX_IOERR); - } - - exit(EX_OK); -} - int main(int argc, char *argv[]) { + struct nvme_function *f; if (argc < 2) usage(); - if (strcmp(argv[1], "devlist") == 0) - devlist(argc-1, &argv[1]); - else if (strcmp(argv[1], "identify") == 0) - identify(argc-1, &argv[1]); - else if (strcmp(argv[1], "perftest") == 0) - perftest(argc-1, &argv[1]); - else if (strcmp(argv[1], "reset") == 0) - reset_ctrlr(argc-1, &argv[1]); + f = funcs; + while (f->name != NULL) { + if (strcmp(argv[1], f->name) == 0) + f->fn(argc-1, &argv[1]); + f++; + } usage(); return (0); } Index: user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.h =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.h (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.h (revision 252344) @@ -0,0 +1,73 @@ +/*- + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __NVMECONTROL_H__ +#define __NVMECONTROL_H__ + +#include + +#define NVME_CTRLR_PREFIX "nvme" +#define NVME_NS_PREFIX "ns" + +#define DEVLIST_USAGE \ +" nvmecontrol devlist\n" + +#define IDENTIFY_USAGE \ +" nvmecontrol identify \n" + +#define PERFTEST_USAGE \ +" nvmecontrol perftest <-n num_threads> <-o read|write>\n" \ +" <-s size_in_bytes> <-t time_in_seconds>\n" \ +" <-i intr|wait> [-f refthread] [-p]\n" \ +" \n" + +#define RESET_USAGE \ +" nvmecontrol reset \n" + +#define LOGPAGE_USAGE \ +" nvmecontrol logpage <-p page_id> [-x] \n" \ + +#define FIRMWARE_USAGE \ +" nvmecontrol firmware [-s slot] [-f path_to_firmware] [-a] \n" + +void devlist(int argc, char *argv[]); +void identify(int argc, char *argv[]); +void perftest(int argc, char *argv[]); +void reset(int argc, char *argv[]); +void logpage(int argc, char *argv[]); +void firmware(int argc, char *argv[]); + +int open_dev(const char *str, int *fd, int show_error, int exit_on_error); +void read_controller_data(int fd, struct nvme_controller_data *cdata); +void read_namespace_data(int fd, int nsid, struct nvme_namespace_data *nsdata); +void print_hex(void *data, uint32_t length); +void read_logpage(int fd, uint8_t log_page, int nsid, void *payload, + uint32_t payload_size); + +#endif + Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/nvmecontrol.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/perftest.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/perftest.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/perftest.c (revision 252344) @@ -0,0 +1,181 @@ +/*- + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static void +print_perftest(struct nvme_io_test *io_test, bool perthread) +{ + uint32_t i, io_completed = 0, iops, mbps; + + for (i = 0; i < io_test->num_threads; i++) + io_completed += io_test->io_completed[i]; + + iops = io_completed/io_test->time; + mbps = iops * io_test->size / (1024*1024); + + printf("Threads: %2d Size: %6d %5s Time: %3d IO/s: %7d MB/s: %4d\n", + io_test->num_threads, io_test->size, + io_test->opc == NVME_OPC_READ ? "READ" : "WRITE", + io_test->time, iops, mbps); + + if (perthread) + for (i = 0; i < io_test->num_threads; i++) + printf("\t%3d: %8d IO/s\n", i, + io_test->io_completed[i]/io_test->time); + + exit(1); +} + +static void +perftest_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, PERFTEST_USAGE); + exit(EX_USAGE); +} + +void +perftest(int argc, char *argv[]) +{ + struct nvme_io_test io_test; + int fd; + char ch; + char *p; + u_long ioctl_cmd = NVME_IO_TEST; + bool nflag, oflag, sflag, tflag; + int perthread = 0; + + nflag = oflag = sflag = tflag = false; + + memset(&io_test, 0, sizeof(io_test)); + + while ((ch = getopt(argc, argv, "f:i:n:o:ps:t:")) != -1) { + switch (ch) { + case 'f': + if (!strcmp(optarg, "refthread")) + io_test.flags |= NVME_TEST_FLAG_REFTHREAD; + break; + case 'i': + if (!strcmp(optarg, "bio") || + !strcmp(optarg, "wait")) + ioctl_cmd = NVME_BIO_TEST; + else if (!strcmp(optarg, "io") || + !strcmp(optarg, "intr")) + ioctl_cmd = NVME_IO_TEST; + break; + case 'n': + nflag = true; + io_test.num_threads = strtoul(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid number of threads.\n", + optarg); + perftest_usage(); + } else if (io_test.num_threads == 0 || + io_test.num_threads > 128) { + fprintf(stderr, + "\"%s\" not valid number of threads.\n", + optarg); + perftest_usage(); + } + break; + case 'o': + oflag = true; + if (!strcmp(optarg, "read") || !strcmp(optarg, "READ")) + io_test.opc = NVME_OPC_READ; + else if (!strcmp(optarg, "write") || + !strcmp(optarg, "WRITE")) + io_test.opc = NVME_OPC_WRITE; + else { + fprintf(stderr, "\"%s\" not valid opcode.\n", + optarg); + perftest_usage(); + } + break; + case 'p': + perthread = 1; + break; + case 's': + sflag = true; + io_test.size = strtoul(optarg, &p, 0); + if (p == NULL || *p == '\0' || toupper(*p) == 'B') { + // do nothing + } else if (toupper(*p) == 'K') { + io_test.size *= 1024; + } else if (toupper(*p) == 'M') { + io_test.size *= 1024 * 1024; + } else { + fprintf(stderr, "\"%s\" not valid size.\n", + optarg); + perftest_usage(); + } + break; + case 't': + tflag = true; + io_test.time = strtoul(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid time duration.\n", + optarg); + perftest_usage(); + } + break; + } + } + + if (!nflag || !oflag || !sflag || !tflag || optind >= argc) + perftest_usage(); + + open_dev(argv[optind], &fd, 1, 1); + if (ioctl(fd, ioctl_cmd, &io_test) < 0) { + fprintf(stderr, "NVME_IO_TEST failed. errno=%d (%s)\n", errno, + strerror(errno)); + close(fd); + exit(EX_IOERR); + } + + close(fd); + print_perftest(&io_test, perthread); + exit(EX_OK); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/perftest.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/nvmecontrol/reset.c =================================================================== --- user/attilio/vmcontention/sbin/nvmecontrol/reset.c (nonexistent) +++ user/attilio/vmcontention/sbin/nvmecontrol/reset.c (revision 252344) @@ -0,0 +1,75 @@ +/*- + * Copyright (C) 2012-2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static void +reset_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, RESET_USAGE); + exit(EX_USAGE); +} + +void +reset(int argc, char *argv[]) +{ + int ch, fd; + + while ((ch = getopt(argc, argv, "")) != -1) { + switch ((char)ch) { + default: + reset_usage(); + } + } + + /* Check that a controller was specified. */ + if (optind >= argc) + reset_usage(); + + open_dev(argv[optind], &fd, 1, 1); + if (ioctl(fd, NVME_RESET_CONTROLLER) < 0) { + printf("Reset request to %s failed. errno=%d (%s)\n", + argv[optind], errno, strerror(errno)); + exit(EX_IOERR); + } + + exit(EX_OK); +} Property changes on: user/attilio/vmcontention/sbin/nvmecontrol/reset.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/attilio/vmcontention/sbin/swapon/swapon.8 =================================================================== --- user/attilio/vmcontention/sbin/swapon/swapon.8 (revision 252343) +++ user/attilio/vmcontention/sbin/swapon/swapon.8 (revision 252344) @@ -1,207 +1,225 @@ .\" Copyright (c) 1980, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)swapon.8 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd June 23, 2008 +.Dd June 21, 2013 .Dt SWAPON 8 .Os .Sh NAME .Nm swapon , swapoff , swapctl .Nd "specify devices for paging and swapping" .Sh SYNOPSIS .Nm swapon .Oo Fl F Ar fstab .Oc -.Fl aq | Ar +.Fl aLq | Ar .Nm swapoff .Oo Fl F Ar fstab .Oc -.Fl aq | Ar +.Fl aLq | Ar .Nm swapctl .Op Fl AghklmsU .Oo .Fl a Ar | .Fl d Ar .Oc .Sh DESCRIPTION The .Nm swapon , swapoff and .Nm swapctl utilities are used to control swap devices in the system. At boot time all swap entries in .Pa /etc/fstab are added automatically when the system goes multi-user. Swap devices use a fixed interleave; the maximum number of devices is specified by the kernel configuration option .Dv NSWAPDEV , which is typically set to 4. There is no priority mechanism. .Pp The .Nm swapon utility adds the specified swap devices to the system. If the .Fl a option is used, all swap devices in .Pa /etc/fstab will be added, unless their .Dq noauto +or +.Dq late option is also set. If the +.Fl L +option is specified, +swap devices with the +.Dq late +option will be added as well as ones with no option. +If the .Fl q -option is used informational messages will not be +option is used, +informational messages will not be written to standard output when a swap device is added. .Pp The .Nm swapoff utility removes the specified swap devices from the system. If the .Fl a option is used, all swap devices in .Pa /etc/fstab will be removed, unless their .Dq noauto +or +.Dq late option is also set. If the +.Fl L +option is specified, +swap devices with the +.Dq late +option will be removed as well as ones with no option. +If the .Fl q -option is used informational messages will not be +option is used, +informational messages will not be written to standard output when a swap device is removed. Note that .Nm swapoff will fail and refuse to remove a swap device if there is insufficient VM (memory + remaining swap devices) to run the system. The .Nm swapoff utility must move swapped pages out of the device being removed which could lead to high system loads for a period of time, depending on how much data has been swapped out to that device. .Pp Other options supported by both .Nm swapon and .Nm swapoff are as follows: .Bl -tag -width indent .It Fl F Ar fstab Specify the .Pa fstab file to use. .El .Pp The .Nm swapctl utility exists primarily for those familiar with other .Bx Ns s and may be used to add, remove, or list swap devices. Note that the .Fl a option is used differently in .Nm swapctl and indicates that a specific list of devices should be added. The .Fl d option indicates that a specific list should be removed. The .Fl A and .Fl U options to .Nm swapctl operate on all swap entries in .Pa /etc/fstab which do not have their .Dq noauto option set. .Pp Swap information can be generated using the .Xr swapinfo 8 utility, .Nm pstat .Fl s , or .Nm swapctl .Fl l . The .Nm swapctl utility has the following options for listing swap: .Bl -tag -width indent .It Fl h Output values in human-readable form. .It Fl g Output values in gigabytes. .It Fl k Output values in kilobytes. .It Fl m Output values in megabytes. .It Fl l List the devices making up system swap. .It Fl s Print a summary line for system swap. .Pp The .Ev BLOCKSIZE environment variable is used if not specifically overridden. 512 byte blocks are used by default. .El .Sh FILES .Bl -tag -width ".Pa /dev/{ad,da}?s?b" -compact .It Pa /dev/{ad,da}?s?b standard paging devices .It Pa /dev/md? memory disk devices .It Pa /etc/fstab .Tn ASCII file system description table .El .Sh DIAGNOSTICS These utilities may fail for the reasons described in .Xr swapon 2 . .Sh SEE ALSO .Xr swapon 2 , .Xr fstab 5 , .Xr init 8 , .Xr mdconfig 8 , .Xr pstat 8 , .Xr rc 8 .Sh HISTORY The .Nm swapon utility appeared in .Bx 4.0 . The .Nm swapoff and .Nm swapctl utilities appeared in .Fx 5.1 . Index: user/attilio/vmcontention/sbin/swapon/swapon.c =================================================================== --- user/attilio/vmcontention/sbin/swapon/swapon.c (revision 252343) +++ user/attilio/vmcontention/sbin/swapon/swapon.c (revision 252344) @@ -1,329 +1,848 @@ /* * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)swapon.c 8.1 (Berkeley) 6/5/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); -#include #include +#include +#include +#include #include +#include #include #include #include +#include +#include #include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include static void usage(void); -static int swap_on_off(char *name, int ignoreebusy); +static const char *swap_on_off(char *, int, char *); +static const char *swap_on_off_gbde(char *, int); +static const char *swap_on_off_geli(char *, char *, int); +static const char *swap_on_off_md(char *, char *, int); +static const char *swap_on_off_sfile(char *, int); static void swaplist(int, int, int); +static int run_cmd(int *, const char *, ...) __printflike(2, 3); static enum { SWAPON, SWAPOFF, SWAPCTL } orig_prog, which_prog = SWAPCTL; +static int qflag; + int main(int argc, char **argv) { struct fstab *fsp; + const char *swfile; char *ptr; int ret; int ch, doall; - int sflag = 0, lflag = 0, hflag = 0, qflag = 0; + int sflag = 0, lflag = 0, late = 0, hflag = 0; const char *etc_fstab; if ((ptr = strrchr(argv[0], '/')) == NULL) ptr = argv[0]; if (strstr(ptr, "swapon")) which_prog = SWAPON; else if (strstr(ptr, "swapoff")) which_prog = SWAPOFF; orig_prog = which_prog; doall = 0; etc_fstab = NULL; - while ((ch = getopt(argc, argv, "AadghklmqsUF:")) != -1) { + while ((ch = getopt(argc, argv, "AadghklLmqsUF:")) != -1) { switch(ch) { case 'A': if (which_prog == SWAPCTL) { doall = 1; which_prog = SWAPON; } else { usage(); } break; case 'a': if (which_prog == SWAPON || which_prog == SWAPOFF) doall = 1; else which_prog = SWAPON; break; case 'd': if (which_prog == SWAPCTL) which_prog = SWAPOFF; else usage(); break; case 'g': hflag = 'G'; break; case 'h': hflag = 'H'; break; case 'k': hflag = 'K'; break; case 'l': lflag = 1; break; + case 'L': + late = 1; + break; case 'm': hflag = 'M'; break; case 'q': if (which_prog == SWAPON || which_prog == SWAPOFF) qflag = 1; break; case 's': sflag = 1; break; case 'U': if (which_prog == SWAPCTL) { doall = 1; which_prog = SWAPOFF; } else { usage(); } break; case 'F': etc_fstab = optarg; break; case '?': default: usage(); } } argv += optind; ret = 0; + swfile = NULL; if (etc_fstab != NULL) setfstab(etc_fstab); if (which_prog == SWAPON || which_prog == SWAPOFF) { if (doall) { while ((fsp = getfsent()) != NULL) { if (strcmp(fsp->fs_type, FSTAB_SW)) continue; if (strstr(fsp->fs_mntops, "noauto")) continue; - if (swap_on_off(fsp->fs_spec, 1)) { + if (which_prog != SWAPOFF && + strstr(fsp->fs_mntops, "late") && + !late) + continue; + swfile = swap_on_off(fsp->fs_spec, 1, + fsp->fs_mntops); + if (swfile == NULL) { ret = 1; - } else { - if (!qflag) { - printf("%s: %sing %s as swap device\n", - getprogname(), - which_prog == SWAPOFF ? "remov" : "add", - fsp->fs_spec); - } + continue; } + if (!qflag) { + printf("%s: %sing %s as swap device\n", + getprogname(), + (which_prog == SWAPOFF) ? + "remov" : "add", swfile); + } } } else if (!*argv) usage(); for (; *argv; ++argv) { - if (swap_on_off(*argv, 0)) { + swfile = swap_on_off(*argv, 0, NULL); + if (swfile == NULL) { ret = 1; - } else if (orig_prog == SWAPCTL) { + continue; + } + if (orig_prog == SWAPCTL) { printf("%s: %sing %s as swap device\n", - getprogname(), which_prog == SWAPOFF ? "remov" : "add", - *argv); + getprogname(), + (which_prog == SWAPOFF) ? "remov" : "add", + swfile); } } } else { if (lflag || sflag) swaplist(lflag, sflag, hflag); else usage(); } exit(ret); } +static const char * +swap_on_off(char *name, int doingall, char *mntops) +{ + char base[PATH_MAX]; + + /* Swap on vnode-backed md(4) device. */ + if (mntops != NULL && + (fnmatch(_PATH_DEV MD_NAME "[0-9]*", name, 0) != FNM_NOMATCH || + fnmatch(MD_NAME "[0-9]*", name, 0) != FNM_NOMATCH || + strncmp(_PATH_DEV MD_NAME, name, + sizeof(_PATH_DEV) + sizeof(MD_NAME)) == 0 || + strncmp(MD_NAME, name, sizeof(MD_NAME)) == 0)) + return (swap_on_off_md(name, mntops, doingall)); + + /* Swap on encrypted device by GEOM_BDE. */ + basename_r(name, base); + if (fnmatch("*.bde", base, 0) != FNM_NOMATCH) + return (swap_on_off_gbde(name, doingall)); + + /* Swap on encrypted device by GEOM_ELI. */ + if (fnmatch("*.eli", base, 0) != FNM_NOMATCH) + return (swap_on_off_geli(name, mntops, doingall)); + + /* Swap on special file. */ + return (swap_on_off_sfile(name, doingall)); +} + +static const char * +swap_on_off_gbde(char *name, int doingall) +{ + const char *ret; + char pass[64 * 2 + 1], bpass[64]; + char *dname, *p; + int i, fd, error; + + dname = strdup(name); + p = strrchr(dname, '.'); + if (p == NULL) { + warnx("%s: Malformed device name", name); + return (NULL); + } + *p = '\0'; + + fd = -1; + switch (which_prog) { + case SWAPON: + arc4random_buf(bpass, sizeof(bpass)); + for (i = 0; i < (int)sizeof(bpass); i++) + sprintf(&pass[2 * i], "%02x", bpass[i]); + pass[sizeof(pass) - 1] = '\0'; + + error = run_cmd(&fd, "%s init %s -P %s", _PATH_GBDE, + dname, pass); + if (error) { + /* bde device found. Ignore it. */ + close(fd); + if (!qflag) + warnx("%s: Device already in use", name); + return (NULL); + } + close(fd); + error = run_cmd(&fd, "%s attach %s -p %s", _PATH_GBDE, + dname, pass); + if (error) { + close(fd); + warnx("gbde (attach) error: %s", name); + return (NULL); + } + break; + case SWAPOFF: + break; + default: + return (NULL); + break; + } + if (fd != -1) + close(fd); + ret = swap_on_off_sfile(name, doingall); + + fd = -1; + switch (which_prog) { + case SWAPOFF: + error = run_cmd(&fd, "%s detach %s", _PATH_GBDE, dname); + if (error) { + /* bde device not found. Ignore it. */ + if (!qflag) + warnx("%s: Device not found", dname); + return (NULL); + } + break; + default: + return (NULL); + break; + } + + if (fd != -1) + close(fd); + return (ret); +} + +static const char * +swap_on_off_geli(char *name, char *mntops, int doingall) +{ + const char *ops, *aalgo, *ealgo, *keylen_str, *sectorsize_str; + char *dname, *p; + char args[4096]; + struct stat sb; + int fd, error, keylen, sectorsize; + u_long ul; + + dname = strdup(name); + p = strrchr(dname, '.'); + if (p == NULL) { + warnx("%s: Malformed device name", name); + return (NULL); + } + *p = '\0'; + + ops = strdup(mntops); + + /* Default parameters for geli(8). */ + aalgo = "hmac/sha256"; + ealgo = "aes"; + keylen = 256; + sectorsize = 4096; + + if ((p = strstr(ops, "aalgo=")) != NULL) { + aalgo = p + sizeof("aalgo=") - 1; + p = strchr(aalgo, ','); + if (p != NULL) + *p = '\0'; + } + if ((p = strstr(ops, "ealgo=")) != NULL) { + ealgo = p + sizeof("ealgo=") - 1; + p = strchr(ealgo, ','); + if (p != NULL) + *p = '\0'; + } + if ((p = strstr(ops, "keylen=")) != NULL) { + keylen_str = p + sizeof("keylen=") - 1; + p = strchr(keylen_str, ','); + if (p != NULL) + *p = '\0'; + errno = 0; + ul = strtoul(keylen_str, &p, 10); + if (errno == 0) { + if (*p != '\0' || ul > INT_MAX) + errno = EINVAL; + } + if (errno) { + warn("Invalid keylen: %s", keylen_str); + return (NULL); + } + keylen = (int)ul; + } + if ((p = strstr(ops, "sectorsize=")) != NULL) { + sectorsize_str = p + sizeof("sectorsize=") - 1; + p = strchr(sectorsize_str, ','); + if (p != NULL) + *p = '\0'; + errno = 0; + ul = strtoul(sectorsize_str, &p, 10); + if (errno == 0) { + if (*p != '\0' || ul > INT_MAX) + errno = EINVAL; + } + if (errno) { + warn("Invalid sectorsize: %s", sectorsize_str); + return (NULL); + } + sectorsize = (int)ul; + } + snprintf(args, sizeof(args), "-a %s -e %s -l %d -s %d -d", + aalgo, ealgo, keylen, sectorsize); + args[sizeof(args) - 1] = '\0'; + free((void *)ops); + + fd = -1; + switch (which_prog) { + case SWAPON: + error = run_cmd(&fd, "%s onetime %s %s", _PATH_GELI, args, + dname); + if (error) { + /* eli device found. Ignore it. */ + close(fd); + if (!qflag) + warnx("%s: Device already in use " + "or invalid parameters", name); + return (NULL); + } + break; + case SWAPOFF: + if (stat(name, &sb) == -1 && errno == ENOENT) { + if (!qflag) + warnx("%s: Device not found", name); + return (NULL); + } + break; + default: + return (NULL); + break; + } + if (fd != -1) + close(fd); + + return (swap_on_off_sfile(name, doingall)); +} + +static const char * +swap_on_off_md(char *name, char *mntops, int doingall) +{ + FILE *sfd; + int fd, mdunit, error; + const char *ret; + char mdpath[PATH_MAX], linebuf[PATH_MAX]; + char *p, *vnodefile; + size_t linelen; + u_long ul; + + fd = -1; + sfd = NULL; + if (strlen(name) == (sizeof(MD_NAME) - 1)) + mdunit = -1; + else { + errno = 0; + ul = strtoul(name + 2, &p, 10); + if (errno == 0) { + if (*p != '\0' || ul > INT_MAX) + errno = EINVAL; + } + if (errno) { + warn("Bad device unit: %s", name); + return (NULL); + } + mdunit = (int)ul; + } + + vnodefile = NULL; + if ((p = strstr(mntops, "file=")) != NULL) { + vnodefile = strdup(p + sizeof("file=") - 1); + p = strchr(vnodefile, ','); + if (p != NULL) + *p = '\0'; + } + if (vnodefile == NULL) { + warnx("file option not found for %s", name); + return (NULL); + } + + switch (which_prog) { + case SWAPON: + if (mdunit == -1) { + error = run_cmd(&fd, "%s -l -n -f %s", + _PATH_MDCONFIG, vnodefile); + if (error == 0) { + /* md device found. Ignore it. */ + close(fd); + if (!qflag) + warnx("%s: Device already in use", + vnodefile); + return (NULL); + } + error = run_cmd(&fd, "%s -a -t vnode -n -f %s", + _PATH_MDCONFIG, vnodefile); + if (error) { + warnx("mdconfig (attach) error: file=%s", + vnodefile); + return (NULL); + } + sfd = fdopen(fd, "r"); + if (sfd == NULL) { + warn("mdconfig (attach) fdopen error"); + ret = NULL; + goto err; + } + p = fgetln(sfd, &linelen); + if (p == NULL && + (linelen < 2 || linelen > sizeof(linebuf))) { + warn("mdconfig (attach) unexpected output"); + ret = NULL; + goto err; + } + strncpy(linebuf, p, linelen); + linebuf[linelen - 1] = '\0'; + errno = 0; + ul = strtoul(linebuf, &p, 10); + if (errno == 0) { + if (*p != '\0' || ul > INT_MAX) + errno = EINVAL; + } + if (errno) { + warn("mdconfig (attach) unexpected output: %s", + linebuf); + ret = NULL; + goto err; + } + mdunit = (int)ul; + } else { + error = run_cmd(&fd, "%s -l -n -f %s -u %d", + _PATH_MDCONFIG, vnodefile, mdunit); + if (error == 0) { + /* md device found. Ignore it. */ + close(fd); + if (!qflag) + warnx("md%d on %s: Device already " + "in use", mdunit, vnodefile); + return (NULL); + } + error = run_cmd(NULL, "%s -a -t vnode -u %d -f %s", + _PATH_MDCONFIG, mdunit, vnodefile); + if (error) { + warnx("mdconfig (attach) error: " + "md%d on file=%s", mdunit, vnodefile); + return (NULL); + } + } + break; + case SWAPOFF: + if (mdunit == -1) { + error = run_cmd(&fd, "%s -l -n -f %s", + _PATH_MDCONFIG, vnodefile); + if (error) { + /* md device not found. Ignore it. */ + close(fd); + if (!qflag) + warnx("md on %s: Device not found", + vnodefile); + return (NULL); + } + sfd = fdopen(fd, "r"); + if (sfd == NULL) { + warn("mdconfig (list) fdopen error"); + ret = NULL; + goto err; + } + p = fgetln(sfd, &linelen); + if (p == NULL && + (linelen < 2 || linelen > sizeof(linebuf) - 1)) { + warn("mdconfig (list) unexpected output"); + ret = NULL; + goto err; + } + strncpy(linebuf, p, linelen); + linebuf[linelen - 1] = '\0'; + p = strchr(linebuf, ' '); + if (p != NULL) + *p = '\0'; + errno = 0; + ul = strtoul(linebuf, &p, 10); + if (errno == 0) { + if (*p != '\0' || ul > INT_MAX) + errno = EINVAL; + } + if (errno) { + warn("mdconfig (list) unexpected output: %s", + linebuf); + ret = NULL; + goto err; + } + mdunit = (int)ul; + } else { + error = run_cmd(&fd, "%s -l -n -f %s -u %d", + _PATH_MDCONFIG, vnodefile, mdunit); + if (error) { + /* md device not found. Ignore it. */ + close(fd); + if (!qflag) + warnx("md%d on %s: Device not found", + mdunit, vnodefile); + return (NULL); + } + } + break; + default: + return (NULL); + } + snprintf(mdpath, sizeof(mdpath), "%s%s%d", _PATH_DEV, + MD_NAME, mdunit); + mdpath[sizeof(mdpath) - 1] = '\0'; + ret = swap_on_off_sfile(mdpath, doingall); + + switch (which_prog) { + case SWAPOFF: + if (ret != NULL) { + error = run_cmd(NULL, "%s -d -u %d", + _PATH_MDCONFIG, mdunit); + if (error) + warn("mdconfig (detach) detach failed: %s%s%d", + _PATH_DEV, MD_NAME, mdunit); + } + break; + default: + break; + } +err: + if (sfd != NULL) + fclose(sfd); + if (fd != -1) + close(fd); + return (ret); +} + static int -swap_on_off(char *name, int doingall) +run_cmd(int *ofd, const char *cmdline, ...) { - if ((which_prog == SWAPOFF ? swapoff(name) : swapon(name)) == -1) { + va_list ap; + char **argv, **argvp, *cmd, *p; + int argc, pid, status, rv; + int pfd[2], nfd, dup2dn; + + va_start(ap, cmdline); + rv = vasprintf(&cmd, cmdline, ap); + if (rv == -1) { + warn("%s", __func__); + return (rv); + } + va_end(ap); + + for (argc = 1, p = cmd; (p = strchr(p, ' ')) != NULL; p++) + argc++; + argv = (char **)malloc(sizeof(*argv) * (argc + 1)); + for (p = cmd, argvp = argv; (*argvp = strsep(&p, " ")) != NULL;) + if (**argvp != '\0' && (++argvp > &argv[argc])) { + *argvp = NULL; + break; + } + /* The argv array ends up NULL-terminated here. */ +#if 0 + { + int i; + + fprintf(stderr, "DEBUG: running:"); + /* Should be equivalent to 'cmd' (before strsep, of course). */ + for (i = 0; argv[i] != NULL; i++) + fprintf(stderr, " %s", argv[i]); + fprintf(stderr, "\n"); + } +#endif + dup2dn = 1; + if (ofd != NULL) { + if (pipe(&pfd[0]) == -1) { + warn("%s: pipe", __func__); + return (-1); + } + *ofd = pfd[0]; + dup2dn = 0; + } + pid = fork(); + switch (pid) { + case 0: + /* Child process. */ + if (ofd != NULL) + if (dup2(pfd[1], STDOUT_FILENO) < 0) + err(1, "dup2 in %s", __func__); + nfd = open(_PATH_DEVNULL, O_RDWR); + if (nfd == -1) + err(1, "%s: open %s", __func__, _PATH_DEVNULL); + if (dup2(nfd, STDIN_FILENO) < 0) + err(1, "%s: dup2", __func__); + if (dup2dn && dup2(nfd, STDOUT_FILENO) < 0) + err(1, "%s: dup2", __func__); + if (dup2(nfd, STDERR_FILENO) < 0) + err(1, "%s: dup2", __func__); + execv(argv[0], argv); + warn("exec: %s", argv[0]); + _exit(-1); + case -1: + err(1, "%s: fork", __func__); + } + free(cmd); + free(argv); + while (waitpid(pid, &status, 0) != pid) + ; + return (WEXITSTATUS(status)); +} + +static const char * +swap_on_off_sfile(char *name, int doingall) +{ + int error; + + switch (which_prog) { + case SWAPON: + error = swapon(name); + break; + case SWAPOFF: + error = swapoff(name); + break; + default: + error = 0; + break; + } + if (error == -1) { switch (errno) { case EBUSY: if (!doingall) - warnx("%s: device already in use", name); + warnx("%s: Device already in use", name); break; case EINVAL: if (which_prog == SWAPON) warnx("%s: NSWAPDEV limit reached", name); else if (!doingall) warn("%s", name); break; default: warn("%s", name); break; } - return(1); + return (NULL); } - return(0); + return (name); } static void usage(void) { fprintf(stderr, "usage: %s ", getprogname()); switch(orig_prog) { case SWAPON: case SWAPOFF: - fprintf(stderr, "[-F fstab] -aq | file ...\n"); + fprintf(stderr, "[-F fstab] -aLq | file ...\n"); break; case SWAPCTL: fprintf(stderr, "[-AghklmsU] [-a file ... | -d file ...]\n"); break; } exit(1); } static void sizetobuf(char *buf, size_t bufsize, int hflag, long long val, int hlen, long blocksize) { if (hflag == 'H') { char tmp[16]; humanize_number(tmp, 5, (int64_t)val, "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); snprintf(buf, bufsize, "%*s", hlen, tmp); } else { snprintf(buf, bufsize, "%*lld", hlen, val / blocksize); } } static void swaplist(int lflag, int sflag, int hflag) { size_t mibsize, size; struct xswdev xsw; int hlen, mib[16], n, pagesize; long blocksize; long long total = 0; long long used = 0; long long tmp_total; long long tmp_used; char buf[32]; pagesize = getpagesize(); switch(hflag) { case 'G': blocksize = 1024 * 1024 * 1024; strlcpy(buf, "1GB-blocks", sizeof(buf)); hlen = 10; break; case 'H': blocksize = -1; strlcpy(buf, "Bytes", sizeof(buf)); hlen = 10; break; case 'K': blocksize = 1024; strlcpy(buf, "1kB-blocks", sizeof(buf)); hlen = 10; break; case 'M': blocksize = 1024 * 1024; strlcpy(buf, "1MB-blocks", sizeof(buf)); hlen = 10; break; default: getbsize(&hlen, &blocksize); snprintf(buf, sizeof(buf), "%ld-blocks", blocksize); break; } mibsize = sizeof mib / sizeof mib[0]; if (sysctlnametomib("vm.swap_info", mib, &mibsize) == -1) err(1, "sysctlnametomib()"); if (lflag) { printf("%-13s %*s %*s\n", "Device:", hlen, buf, hlen, "Used:"); } for (n = 0; ; ++n) { mib[mibsize] = n; size = sizeof xsw; if (sysctl(mib, mibsize + 1, &xsw, &size, NULL, 0) == -1) break; if (xsw.xsw_version != XSWDEV_VERSION) errx(1, "xswdev version mismatch"); tmp_total = (long long)xsw.xsw_nblks * pagesize; tmp_used = (long long)xsw.xsw_used * pagesize; total += tmp_total; used += tmp_used; if (lflag) { sizetobuf(buf, sizeof(buf), hflag, tmp_total, hlen, blocksize); printf("/dev/%-8s %s ", devname(xsw.xsw_dev, S_IFCHR), buf); sizetobuf(buf, sizeof(buf), hflag, tmp_used, hlen, blocksize); printf("%s\n", buf); } } if (errno != ENOENT) err(1, "sysctl()"); if (sflag) { sizetobuf(buf, sizeof(buf), hflag, total, hlen, blocksize); printf("Total: %s ", buf); sizetobuf(buf, sizeof(buf), hflag, used, hlen, blocksize); printf("%s\n", buf); } } Index: user/attilio/vmcontention/sbin =================================================================== --- user/attilio/vmcontention/sbin (revision 252343) +++ user/attilio/vmcontention/sbin (revision 252344) Property changes on: user/attilio/vmcontention/sbin ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sbin:r252128-252340 Index: user/attilio/vmcontention/share/examples/etc/make.conf =================================================================== --- user/attilio/vmcontention/share/examples/etc/make.conf (revision 252343) +++ user/attilio/vmcontention/share/examples/etc/make.conf (revision 252344) @@ -1,286 +1,286 @@ # $FreeBSD$ # # NOTE: Please would any committer updating this file also update the # make.conf(5) manual page, if necessary, which is located in # src/share/man/man5/make.conf.5. # # /etc/make.conf, if present, will be read by make (see # /usr/share/mk/sys.mk). It allows you to override macro definitions # to make without changing your source tree, or anything the source # tree installs. # # This file must be in valid Makefile syntax. # # There are additional things you can put into /etc/make.conf. # You have to find those in the Makefiles and documentation of # the source tree. # # Note, that you should not set MAKEOBJDIRPREFIX or MAKEOBJDIR # from make.conf (or as command line variables to make). # Both variables are environment variables for make and must be used as: # # env MAKEOBJDIRPREFIX=/big/directory make # # # The CPUTYPE variable controls which processor should be targeted for # generated code. This controls processor-specific optimizations in # certain code (currently only OpenSSL) as well as modifying the value # of CFLAGS to contain the appropriate optimization directive to cc. # The automatic setting of CFLAGS may be overridden using the # NO_CPU_CFLAGS variable below. # Currently the following CPU types are recognized: # Intel x86 architecture: # (AMD CPUs) amdfam10, opteron-sse3, athlon64-sse3, k8-sse3, # opteron, athlon64, athlon-fx, k8, athlon-mp, # athlen-xp, athlon-4, athlon-tbird, athlon, k7, # geode, k6-3, k6-2, k6 # (Intel CPUs) core2, core, nocona, pentium4m, pentium4, prescott, # pentium3m, pentium3, pentium-m, pentium2, # pentiumpro, pentium-mmx, pentium, i486 # (VIA CPUs) c7, c3-2, c3 -# AMD64 architecture: opteron-sse3, athlon64-sse3, k8-sse3, opteron, -# athlon64, k8, core2, nocona +# AMD64 architecture: amdfam10, opteron-sse3, athlon64-sse3, k8-sse3, +# opteron, athlon64, k8, core2, nocona # Intel ia64 architecture: itanium2, itanium # SPARC-V9 architecture: v9 (generic 64-bit V9), ultrasparc (default # if omitted), ultrasparc3 # Additionally the following CPU types are recognized by clang: # Intel x86 architecture (for both amd64 and i386): # (AMD CPUs) bdver2, bdver1, btver2, btver1 # (Intel CPUs) core-avx2, core-avx-i, corei7-avx, corei7, atom # # (?= allows to buildworld for a different CPUTYPE.) # #CPUTYPE?=pentium3 #NO_CPU_CFLAGS= # Don't add -march= to CFLAGS automatically #NO_CPU_COPTFLAGS= # Don't add -march= to COPTFLAGS automatically # # CFLAGS controls the compiler settings used when compiling C code. # Note that optimization settings other than -O and -O2 are not recommended # or supported for compiling the world or the kernel - please revert any # nonstandard optimization settings # before submitting bug reports without patches to the developers. # # CXXFLAGS controls the compiler settings used when compiling C++ code. # Note that CXXFLAGS is initially set to the value of CFLAGS. If you wish # to add to CXXFLAGS value, "+=" must be used rather than "=". Using "=" # alone will remove the often needed contents of CFLAGS from CXXFLAGS. # # Additional compiler flags can be specified that extend or override # default ones. However, neither the base system nor ports are guaranteed # to build and function without problems with non-default settings. # # CFLAGS+= -msse3 # CXXFLAGS+= -msse3 # # MAKE_SHELL controls the shell used internally by make(1) to process the # command scripts in makefiles. Three shells are supported, sh, ksh, and # csh. Using sh is most common, and advised. Using ksh *may* work, but is # not guaranteed to. Using csh is absurd. The default is to use sh. # #MAKE_SHELL?=sh # # BDECFLAGS are a set of gcc warning settings that Bruce Evans has suggested # for use in developing FreeBSD and testing changes. They can be used by # putting "CFLAGS+=${BDECFLAGS}" in /etc/make.conf. -Wconversion is not # included here due to compiler bugs, e.g., mkdir()'s mode_t argument. # #BDECFLAGS= -W -Wall -ansi -pedantic -Wbad-function-cast -Wcast-align \ # -Wcast-qual -Wchar-subscripts -Winline \ # -Wmissing-prototypes -Wnested-externs -Wpointer-arith \ # -Wredundant-decls -Wshadow -Wstrict-prototypes -Wwrite-strings # # To compile just the kernel with special optimizations, you should use # this instead of CFLAGS (which is not applicable to kernel builds anyway). # There is very little to gain by using higher optimization levels, and doing # so can cause problems. # #COPTFLAGS= -O -pipe # # Compare before install. #INSTALL+= -C # # Mtree will follow symlinks. #MTREE_FOLLOWS_SYMLINKS= -L # # To enable installing newgrp(1) with the setuid bit turned on. # Without the setuid bit, newgrp cannot change users' groups. #ENABLE_SUID_NEWGRP= # # To avoid building various parts of the base system: #NO_MODULES= # do not build modules with the kernel #NO_SHARE= # do not go into the share subdir #NO_SHARED= # build /bin and /sbin statically linked (bad idea) # # Variables that control how ppp(8) is built. #PPP_NO_NAT= # do not build with NAT support (see make.conf(5)) #PPP_NO_NETGRAPH= # do not build with Netgraph support #PPP_NO_RADIUS= # do not build with RADIUS support #PPP_NO_SUID= # build with normal permissions # #TRACEROUTE_NO_IPSEC= # do not build traceroute(8) with IPSEC support # # To build sys/modules when building the world (our old way of doing things). #MODULES_WITH_WORLD= # do not build modules when building kernel # # The list of modules to build instead of all of them. #MODULES_OVERRIDE= linux ipfw # # The list of modules to never build, applied *after* MODULES_OVERRIDE. #WITHOUT_MODULES= bktr plip # # If you do not want unformatted manual pages to be compressed # when they are installed: # #NO_MANCOMPRESS= # # # Default format for system documentation, depends on your printer. # Set this to "ascii" for simple printers or screen. # #PRINTERDEVICE= ps # # # How long to wait for a console keypress before booting the default kernel. # This value is approximately in milliseconds. Keypresses are accepted by the # BIOS before booting from disk, making it possible to give custom boot # parameters even when this is set to 0. # #BOOTWAIT=0 #BOOTWAIT=30000 # # By default, the system will always use the keyboard/video card as system # console. However, the boot blocks may be dynamically configured to use a # serial port in addition to or instead of the keyboard/video console. # # By default we use COM1 as our serial console port *if* we're going to use # a serial port as our console at all. Alter as necessary. # # COM1: = 0x3F8, COM2: = 0x2F8, COM3: = 0x3E8, COM4: = 0x2E8 # #BOOT_COMCONSOLE_PORT= 0x3F8 # # The default serial console speed is 9600. Set the speed to a larger value # for better interactive response. # #BOOT_COMCONSOLE_SPEED= 115200 # # By default the 'pxeboot' loader retrieves the kernel via NFS. Defining # this and recompiling /usr/src/sys/boot will cause it to retrieve the kernel # via TFTP. This allows pxeboot to load a custom BOOTP diskless kernel yet # still mount the server's '/' (i.e. rather than load the server's kernel). # #LOADER_TFTP_SUPPORT= YES # # # Kerberos 5 su (k5su) # If you want to use the k5su utility, define this to have it installed # set-user-ID. #ENABLE_SUID_K5SU= # # # CVSup update flags. Edit SUPFILE settings to reflect whichever distribution # file(s) you use on your site (see /usr/share/examples/cvsup/README for more # information on CVSup and these files). To use, do "make update" in /usr/src. # #SUP_UPDATE= # #SUP= /usr/bin/csup #SUPFLAGS= -L 2 #SUPHOST= cvsup.uk.FreeBSD.org #SUPFILE= /usr/share/examples/cvsup/standard-supfile #PORTSSUPFILE= /usr/share/examples/cvsup/ports-supfile # # top(1) uses a hash table for the user names. The size of this hash # can be tuned to match the number of local users. The table size should # be a prime number approximately twice as large as the number of lines in # /etc/passwd. The default number is 20011. # #TOP_TABLE_SIZE= 101 # # Documentation # # The list of languages and encodings to build and install. # #DOC_LANG= en_US.ISO8859-1 ru_RU.KOI8-R # # # sendmail # # The following sets the default m4 configuration file to use at # install time. Use with caution as a make install will overwrite # any existing /etc/mail/sendmail.cf. Note that SENDMAIL_CF is now # deprecated. The value should be a fully qualified path name. # #SENDMAIL_MC=/etc/mail/myconfig.mc # # The following sets the default m4 configuration file for mail # submission to use at install time. Use with caution as a make # install will overwrite any existing /etc/mail/submit.cf. The # value should be a fully qualified path name. # #SENDMAIL_SUBMIT_MC=/etc/mail/mysubmit.mc # # If you need to build additional .cf files during a make buildworld, # include the full paths to the .mc files in SENDMAIL_ADDITIONAL_MC. # #SENDMAIL_ADDITIONAL_MC=/etc/mail/foo.mc /etc/mail/bar.mc # # The following overrides the default location for the m4 configuration # files used to build a .cf file from a .mc file. # #SENDMAIL_CF_DIR=/usr/local/share/sendmail/cf # # Setting the following variable modifies the flags passed to m4 when # building a .cf file from a .mc file. It can be used to enable # features disabled by default. # #SENDMAIL_M4_FLAGS= # # Setting the following variables modifies the build environment for # sendmail and its related utilities. For example, SASL support can be # added with settings such as: # # with SASLv1: # SENDMAIL_CFLAGS=-I/usr/local/include/sasl1 -DSASL # SENDMAIL_LDFLAGS=-L/usr/local/lib # SENDMAIL_LDADD=-lsasl # # with SASLv2: # SENDMAIL_CFLAGS=-I/usr/local/include -DSASL=2 # SENDMAIL_LDFLAGS=-L/usr/local/lib # SENDMAIL_LDADD=-lsasl2 # # Note: If you are using Cyrus SASL with other applications which require # access to the sasldb file, you should add the following to your # sendmail.mc file: # # define(`confDONT_BLAME_SENDMAIL',`GroupReadableSASLDBFile') # #SENDMAIL_CFLAGS= #SENDMAIL_LDFLAGS= #SENDMAIL_LDADD= #SENDMAIL_DPADD= # # Setting SENDMAIL_SET_USER_ID will install the sendmail binary as a # set-user-ID root binary instead of a set-group-ID smmsp binary and will # prevent the installation of /etc/mail/submit.cf. # This is a deprecated mode of operation. See etc/mail/README for more # information. # #SENDMAIL_SET_USER_ID= # # The permissions to use on alias and map databases generated using # /etc/mail/Makefile. Defaults to 0640. # #SENDMAIL_MAP_PERMS= # # # It is also possible to set variables in make.conf which will only be # used when compiling a specific port. For more details see make(1). # #.if ${.CURDIR:M*/irc/irssi-devel*} #WITH_DEBUG=YES #.endif # # Another approach is to use /usr/ports/ports-mgmt/portconf which has # its own config file for port specific options. Index: user/attilio/vmcontention/share/man/man5/fstab.5 =================================================================== --- user/attilio/vmcontention/share/man/man5/fstab.5 (revision 252343) +++ user/attilio/vmcontention/share/man/man5/fstab.5 (revision 252344) @@ -1,356 +1,385 @@ .\" Copyright (c) 1980, 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by the University of .\" California, Berkeley and its contributors. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)fstab.5 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd October 11, 2012 +.Dd June 21, 2013 .Dt FSTAB 5 .Os .Sh NAME .Nm fstab .Nd static information about the file systems .Sh SYNOPSIS .In fstab.h .Sh DESCRIPTION The file .Nm contains descriptive information about the various file systems. .Nm is only read by programs, and not written; it is the duty of the system administrator to properly create and maintain this file. Each file system is described on a separate line; fields on each line are separated by tabs or spaces. The order of records in .Nm is important because .Xr fsck 8 , .Xr mount 8 , and .Xr umount 8 sequentially iterate through .Nm doing their thing. .Pp The first field, .Pq Fa fs_spec , describes the special device or remote file system to be mounted. The contents are decoded by the .Xr strunvis 3 function. This allows using spaces or tabs in the device name which would be interpreted as field separators otherwise. .Pp The second field, .Pq Fa fs_file , describes the mount point for the file system. For swap partitions, this field should be specified as .Dq none . The contents are decoded by the .Xr strunvis 3 function, as above. .Pp The third field, .Pq Fa fs_vfstype , describes the type of the file system. The system can support various file system types. Only the root, /usr, and /tmp file systems need be statically compiled into the kernel; everything else will be automatically loaded at mount time. (Exception: the FFS cannot currently be demand-loaded.) Some people still prefer to statically compile other file systems as well. .Pp The fourth field, .Pq Fa fs_mntops , describes the mount options associated with the file system. It is formatted as a comma separated list of options. It contains at least the type of mount (see .Fa fs_type below) plus any additional options appropriate to the file system type. See the options flag .Pq Fl o in the .Xr mount 8 page and the file system specific page, such as .Xr mount_nfs 8 , for additional options that may be specified. All options that can be given to the file system specific mount commands can be used in .Nm as well. They just need to be formatted a bit differently. The arguments of the .Fl o option can be used without the preceding .Fl o flag. Other options need both the file system specific flag and its argument, separated by an equal sign. For example, mounting an .Xr msdosfs 5 filesystem, the options .Bd -literal -offset indent -o sync -o noatime -m 644 -M 755 -u foo -g bar .Ed .Pp should be written as .Bd -literal -offset indent sync,noatime,-m=644,-M=755,-u=foo,-g=bar .Ed .Pp in the option field of .Nm . .Pp If the options .Dq userquota and/or .Dq groupquota are specified, the file system is automatically processed by the .Xr quotacheck 8 command, and user and/or group disk quotas are enabled with .Xr quotaon 8 . By default, file system quotas are maintained in files named .Pa quota.user and .Pa quota.group which are located at the root of the associated file system. These defaults may be overridden by putting an equal sign and an alternative absolute pathname following the quota option. Thus, if the user quota file for .Pa /tmp is stored in .Pa /var/quotas/tmp.user , this location can be specified as: .Bd -literal -offset indent userquota=/var/quotas/tmp.user .Ed .Pp If the option .Dq failok is specified, the system will ignore any error which happens during the mount of that filesystem, which would otherwise cause the system to drop into single user mode. This option is implemented by the .Xr mount 8 command and will not be passed to the kernel. .Pp If the option .Dq noauto is specified, the file system will not be automatically mounted at system startup. Note that, for network file systems of third party types (i.e., types supported by additional software not included in the base system) to be automatically mounted at system startup, the .Va extra_netfs_types .Xr rc.conf 5 variable must be used to extend the .Xr rc 8 startup script's list of network file system types. .Pp +If the option +.Dq late +is specified, the file system will be automatically mounted +at a stage of system startup after remote mount points are mounted. +For more detail about this option, +see the +.Xr mount 8 +manual page. +.Pp The type of the mount is extracted from the .Fa fs_mntops field and stored separately in the .Fa fs_type field (it is not deleted from the .Fa fs_mntops field). If .Fa fs_type is .Dq rw or .Dq ro then the file system whose name is given in the .Fa fs_file field is normally mounted read-write or read-only on the specified special file. +.Pp If .Fa fs_type is .Dq sw then the special file is made available as a piece of swap space by the .Xr swapon 8 command at the end of the system reboot procedure. +For vnode-backed swap spaces, +.Dq file +is supported in the +.Fa fs_mntops +field. +When +.Fa fs_spec +is an +.Xr md 4 +device file +.Pq Do md Dc or Do md[0-9]* Dc +and +.Dq file +is specified in +.Fa fs_mntopts , +an +.Xr md 4 +device is created with the specified file used as backing store, +and then the new device is used as swap space. The fields other than .Fa fs_spec and .Fa fs_type are unused. If .Fa fs_type is specified as .Dq xx the entry is ignored. This is useful to show disk partitions which are currently unused. .Pp The fifth field, .Pq Fa fs_freq , is used for these file systems by the .Xr dump 8 command to determine which file systems need to be dumped. If the fifth field is not present, a value of zero is returned and .Nm dump will assume that the file system does not need to be dumped. If the fifth field is greater than 0, then it specifies the number of days between dumps for this file system. .Pp The sixth field, .Pq Fa fs_passno , is used by the .Xr fsck 8 and .Xr quotacheck 8 programs to determine the order in which file system and quota checks are done at reboot time. The .Fa fs_passno field can be any value between 0 and .Ql INT_MAX Ns -1 . .Pp The root file system should be specified with a .Fa fs_passno of 1, and other file systems should have a .Fa fs_passno of 2 or greater. A file system with a .Fa fs_passno value of 1 is always checked sequentially and be completed before another file system is processed, and it will be processed before all file systems with a larger .Fa fs_passno . .Pp For any given value of .Fa fs_passno , file systems within a drive will be checked sequentially, but file systems on different drives will be checked at the same time to utilize parallelism available in the hardware. Once all file system checks are complete for the current .Fa fs_passno , the same process will start over for the next .Fa fs_passno . .Pp If the sixth field is not present or is zero, a value of zero is returned and .Xr fsck 8 and .Xr quotacheck 8 will assume that the file system does not need to be checked. .Pp The .Fa fs_passno field can be used to implement finer control when the system utilities may determine that the file system resides on a different physical device, when it actually does not, as with a .Xr ccd 4 device. All file systems with a lower .Fa fs_passno value will be completed before starting on file systems with a higher .Fa fs_passno value. E.g. all file systems with a .Fa fs_passno of 2 will be completed before any file systems with a .Fa fs_passno of 3 or greater are started. Gaps are allowed between the different .Fa fs_passno values. E.g. file systems listed in .Pa /etc/fstab may have .Fa fs_passno values such as 0, 1, 2, 15, 100, 200, 300, and may appear in any order within .Pa /etc/fstab . .Bd -literal #define FSTAB_RW "rw" /* read/write device */ #define FSTAB_RQ "rq" /* read/write with quotas */ #define FSTAB_RO "ro" /* read-only device */ #define FSTAB_SW "sw" /* swap device */ #define FSTAB_XX "xx" /* ignore totally */ struct fstab { char *fs_spec; /* block special device name */ char *fs_file; /* file system path prefix */ char *fs_vfstype; /* File system type, ufs, nfs */ char *fs_mntops; /* Mount options ala -o */ char *fs_type; /* FSTAB_* from fs_mntops */ int fs_freq; /* dump frequency, in days */ int fs_passno; /* pass number on parallel fsck */ }; .Ed .Pp The proper way to read records from .Pa fstab is to use the routines .Xr getfsent 3 , .Xr getfsspec 3 , .Xr getfstype 3 , and .Xr getfsfile 3 . .Sh FILES .Bl -tag -width /etc/fstab -compact .It Pa /etc/fstab The file .Nm resides in .Pa /etc . .El .Sh SEE ALSO .Xr getfsent 3 , .Xr getvfsbyname 3 , .Xr ccd 4 , .Xr dump 8 , .Xr fsck 8 , .Xr mount 8 , .Xr quotacheck 8 , .Xr quotaon 8 , .Xr strunvis 3 , .Xr swapon 8 , .Xr umount 8 .Sh HISTORY The .Nm file format appeared in .Bx 4.0 . Index: user/attilio/vmcontention/sys/amd64/include/acpica_machdep.h =================================================================== --- user/attilio/vmcontention/sys/amd64/include/acpica_machdep.h (revision 252343) +++ user/attilio/vmcontention/sys/amd64/include/acpica_machdep.h (revision 252344) @@ -1,84 +1,80 @@ /*- * Copyright (c) 2002 Mitsuru IWASAKI * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /****************************************************************************** * * Name: acpica_machdep.h - arch-specific defines, etc. * $Revision$ * *****************************************************************************/ #ifndef __ACPICA_MACHDEP_H__ #define __ACPICA_MACHDEP_H__ #ifdef _KERNEL /* * Calling conventions: * * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) * ACPI_EXTERNAL_XFACE - External ACPI interfaces * ACPI_INTERNAL_XFACE - Internal ACPI interfaces * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces */ #define ACPI_SYSTEM_XFACE #define ACPI_EXTERNAL_XFACE #define ACPI_INTERNAL_XFACE #define ACPI_INTERNAL_VAR_XFACE /* Asm macros */ #define ACPI_ASM_MACROS #define BREAKPOINT3 #define ACPI_DISABLE_IRQS() disable_intr() #define ACPI_ENABLE_IRQS() enable_intr() #define ACPI_FLUSH_CPU_CACHE() wbinvd() /* Section 5.2.9.1: global lock acquire/release functions */ extern int acpi_acquire_global_lock(uint32_t *lock); extern int acpi_release_global_lock(uint32_t *lock); #define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) do { \ (Acq) = acpi_acquire_global_lock(&((GLptr)->GlobalLock)); \ } while (0) #define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) do { \ (Acq) = acpi_release_global_lock(&((GLptr)->GlobalLock)); \ } while (0) -#endif /* _KERNEL */ - -#define ACPI_MACHINE_WIDTH 64 -#define COMPILER_DEPENDENT_INT64 long -#define COMPILER_DEPENDENT_UINT64 unsigned long - void acpi_SetDefaultIntrModel(int model); void acpi_cpu_c1(void); void *acpi_map_table(vm_paddr_t pa, const char *sig); void acpi_unmap_table(void *table); vm_paddr_t acpi_find_table(const char *sig); + +#endif /* _KERNEL */ #endif /* __ACPICA_MACHDEP_H__ */ Index: user/attilio/vmcontention/sys/amd64/vmm/x86.c =================================================================== --- user/attilio/vmcontention/sys/amd64/vmm/x86.c (revision 252343) +++ user/attilio/vmcontention/sys/amd64/vmm/x86.c (revision 252344) @@ -1,219 +1,247 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "x86.h" #define CPUID_VM_HIGH 0x40000000 -static const char bhyve_id[12] = "BHyVE BHyVE "; +static const char bhyve_id[12] = "bhyve bhyve "; +static uint64_t bhyve_xcpuids; + int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { int error; unsigned int func, regs[4]; enum x2apic_state x2apic_state; /* * Requests for invalid CPUID levels should map to the highest * available level instead. */ if (cpu_exthigh != 0 && *eax >= 0x80000000) { if (*eax > cpu_exthigh) *eax = cpu_exthigh; } else if (*eax >= 0x40000000) { if (*eax > CPUID_VM_HIGH) *eax = CPUID_VM_HIGH; } else if (*eax > cpu_high) { *eax = cpu_high; } func = *eax; /* * In general the approach used for CPU topology is to * advertise a flat topology where all CPUs are packages with * no multi-core or SMT. */ switch (func) { + /* + * Pass these through to the guest + */ case CPUID_0000_0000: case CPUID_0000_0002: case CPUID_0000_0003: - case CPUID_0000_000A: - cpuid_count(*eax, *ecx, regs); - break; - case CPUID_8000_0000: - case CPUID_8000_0001: case CPUID_8000_0002: case CPUID_8000_0003: case CPUID_8000_0004: case CPUID_8000_0006: case CPUID_8000_0008: cpuid_count(*eax, *ecx, regs); break; + case CPUID_8000_0001: + /* + * Hide rdtscp/ia32_tsc_aux until we know how + * to deal with them. + */ + cpuid_count(*eax, *ecx, regs); + regs[3] &= ~AMDID_RDTSCP; + break; + case CPUID_8000_0007: cpuid_count(*eax, *ecx, regs); /* * If the host TSCs are not synchronized across * physical cpus then we cannot advertise an * invariant tsc to a vcpu. * * XXX This still falls short because the vcpu * can observe the TSC moving backwards as it * migrates across physical cpus. But at least * it should discourage the guest from using the * TSC to keep track of time. */ if (!smp_tsc) regs[3] &= ~AMDPM_TSC_INVARIANT; break; case CPUID_0000_0001: do_cpuid(1, regs); error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state); if (error) { panic("x86_emulate_cpuid: error %d " "fetching x2apic state", error); } /* * Override the APIC ID only in ebx */ regs[1] &= ~(CPUID_LOCAL_APIC_ID); regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT); /* * Don't expose VMX, SpeedStep or TME capability. * Advertise x2APIC capability and Hypervisor guest. */ regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2); regs[2] |= CPUID2_HV; if (x2apic_state != X2APIC_DISABLED) regs[2] |= CPUID2_X2APIC; /* * Hide xsave/osxsave/avx until the FPU save/restore * issues are resolved */ regs[2] &= ~(CPUID2_XSAVE | CPUID2_OSXSAVE | CPUID2_AVX); /* * Hide monitor/mwait until we know how to deal with * these instructions. */ regs[2] &= ~CPUID2_MON; + /* + * Hide the performance and debug features. + */ + regs[2] &= ~CPUID2_PDCM; + /* * Hide thermal monitoring */ regs[3] &= ~(CPUID_ACPI | CPUID_TM); /* * Machine check handling is done in the host. * Hide MTRR capability. */ regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR); + /* + * Hide the debug store capability. + */ + regs[3] &= ~CPUID_DS; + /* * Disable multi-core. */ regs[1] &= ~CPUID_HTT_CORES; regs[3] &= ~CPUID_HTT; break; case CPUID_0000_0004: do_cpuid(4, regs); /* * Do not expose topology. */ regs[0] &= 0xffff8000; regs[0] |= 0x04008000; break; case CPUID_0000_0006: case CPUID_0000_0007: + case CPUID_0000_000A: /* * Handle the access, but report 0 for * all options */ regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; case CPUID_0000_000B: /* * Processor topology enumeration */ regs[0] = 0; regs[1] = 0; regs[2] = *ecx & 0xff; regs[3] = vcpu_id; break; case 0x40000000: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4); - bcopy(bhyve_id, ®s[2], 4); - bcopy(bhyve_id, ®s[3], 4); + bcopy(bhyve_id + 4, ®s[2], 4); + bcopy(bhyve_id + 8, ®s[3], 4); break; + default: - /* XXX: Leaf 5? */ - return (0); + /* + * The leaf value has already been clamped so + * simply pass this through, keeping count of + * how many unhandled leaf values have been seen. + */ + atomic_add_long(&bhyve_xcpuids, 1); + cpuid_count(*eax, *ecx, regs); + break; } *eax = regs[0]; *ebx = regs[1]; *ecx = regs[2]; *edx = regs[3]; + return (1); } Index: user/attilio/vmcontention/sys/amd64/vmm =================================================================== --- user/attilio/vmcontention/sys/amd64/vmm (revision 252343) +++ user/attilio/vmcontention/sys/amd64/vmm (revision 252344) Property changes on: user/attilio/vmcontention/sys/amd64/vmm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/amd64/vmm:r252029-252340 Index: user/attilio/vmcontention/sys/arm/arm/db_trace.c =================================================================== --- user/attilio/vmcontention/sys/arm/arm/db_trace.c (revision 252343) +++ user/attilio/vmcontention/sys/arm/arm/db_trace.c (revision 252344) @@ -1,623 +1,642 @@ /* $NetBSD: db_trace.c,v 1.8 2003/01/17 22:28:48 thorpej Exp $ */ /*- * Copyright (c) 2000, 2001 Ben Harris * Copyright (c) 1996 Scott K. Stevens * * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __ARM_EABI__ /* * Definitions for the instruction interpreter. * * The ARM EABI specifies how to perform the frame unwinding in the * Exception Handling ABI for the ARM Architecture document. To perform * the unwind we need to know the initial frame pointer, stack pointer, * link register and program counter. We then find the entry within the * index table that points to the function the program counter is within. * This gives us either a list of three instructions to process, a 31-bit * relative offset to a table of instructions, or a value telling us * we can't unwind any further. * * When we have the instructions to process we need to decode them * following table 4 in section 9.3. This describes a collection of bit * patterns to encode that steps to take to update the stack pointer and * link register to the correct values at the start of the function. */ /* A special case when we are unable to unwind past this function */ #define EXIDX_CANTUNWIND 1 /* The register names */ #define FP 11 #define SP 13 #define LR 14 #define PC 15 /* * These are set in the linker script. Their addresses will be * either the start or end of the exception table or index. */ extern int extab_start, extab_end, exidx_start, exidx_end; /* * Entry types. * These are the only entry types that have been seen in the kernel. */ #define ENTRY_MASK 0xff000000 #define ENTRY_ARM_SU16 0x80000000 #define ENTRY_ARM_LU16 0x81000000 /* Instruction masks. */ #define INSN_VSP_MASK 0xc0 #define INSN_VSP_SIZE_MASK 0x3f #define INSN_STD_MASK 0xf0 #define INSN_STD_DATA_MASK 0x0f #define INSN_POP_TYPE_MASK 0x08 #define INSN_POP_COUNT_MASK 0x07 #define INSN_VSP_LARGE_INC_MASK 0xff /* Instruction definitions */ #define INSN_VSP_INC 0x00 #define INSN_VSP_DEC 0x40 #define INSN_POP_MASKED 0x80 #define INSN_VSP_REG 0x90 #define INSN_POP_COUNT 0xa0 #define INSN_FINISH 0xb0 +#define INSN_POP_REGS 0xb1 #define INSN_VSP_LARGE_INC 0xb2 /* An item in the exception index table */ struct unwind_idx { uint32_t offset; uint32_t insn; }; /* The state of the unwind process */ struct unwind_state { uint32_t registers[16]; uint32_t start_pc; uint32_t *insn; u_int entries; u_int byte; uint16_t update_mask; }; /* Expand a 31-bit signed value to a 32-bit signed value */ static __inline int32_t db_expand_prel31(uint32_t prel31) { return ((int32_t)(prel31 & 0x7fffffffu) << 1) / 2; } /* * Perform a binary search of the index table to find the function * with the largest address that doesn't exceed addr. */ static struct unwind_idx * db_find_index(uint32_t addr) { unsigned int min, mid, max; struct unwind_idx *start; struct unwind_idx *item; int32_t prel31_addr; uint32_t func_addr; start = (struct unwind_idx *)&exidx_start; min = 0; max = (&exidx_end - &exidx_start) / 2; while (min != max) { mid = min + (max - min + 1) / 2; item = &start[mid]; prel31_addr = db_expand_prel31(item->offset); func_addr = (uint32_t)&item->offset + prel31_addr; if (func_addr <= addr) { min = mid; } else { max = mid - 1; } } return &start[min]; } /* Reads the next byte from the instruction list */ static uint8_t db_unwind_exec_read_byte(struct unwind_state *state) { uint8_t insn; /* Read the unwind instruction */ insn = (*state->insn) >> (state->byte * 8); /* Update the location of the next instruction */ if (state->byte == 0) { state->byte = 3; state->insn++; state->entries--; } else state->byte--; return insn; } /* Executes the next instruction on the list */ static int db_unwind_exec_insn(struct unwind_state *state) { unsigned int insn; uint32_t *vsp = (uint32_t *)state->registers[SP]; int update_vsp = 0; /* This should never happen */ if (state->entries == 0) return 1; /* Read the next instruction */ insn = db_unwind_exec_read_byte(state); if ((insn & INSN_VSP_MASK) == INSN_VSP_INC) { state->registers[SP] += ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; } else if ((insn & INSN_VSP_MASK) == INSN_VSP_DEC) { state->registers[SP] -= ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; } else if ((insn & INSN_STD_MASK) == INSN_POP_MASKED) { unsigned int mask, reg; /* Load the mask */ mask = db_unwind_exec_read_byte(state); mask |= (insn & INSN_STD_DATA_MASK) << 8; /* We have a refuse to unwind instruction */ if (mask == 0) return 1; /* Update SP */ update_vsp = 1; /* Load the registers */ for (reg = 4; mask && reg < 16; mask >>= 1, reg++) { if (mask & 1) { state->registers[reg] = *vsp++; state->update_mask |= 1 << reg; /* If we have updated SP kep its value */ if (reg == SP) update_vsp = 0; } } } else if ((insn & INSN_STD_MASK) == INSN_VSP_REG && ((insn & INSN_STD_DATA_MASK) != 13) && ((insn & INSN_STD_DATA_MASK) != 15)) { /* sp = register */ state->registers[SP] = state->registers[insn & INSN_STD_DATA_MASK]; } else if ((insn & INSN_STD_MASK) == INSN_POP_COUNT) { unsigned int count, reg; /* Read how many registers to load */ count = insn & INSN_POP_COUNT_MASK; /* Update sp */ update_vsp = 1; /* Pop the registers */ for (reg = 4; reg <= 4 + count; reg++) { state->registers[reg] = *vsp++; state->update_mask |= 1 << reg; } /* Check if we are in the pop r14 version */ if ((insn & INSN_POP_TYPE_MASK) != 0) { state->registers[14] = *vsp++; } } else if (insn == INSN_FINISH) { /* Stop processing */ state->entries = 0; + + } else if ((insn == INSN_POP_REGS)) { + unsigned int mask, reg; + + mask = db_unwind_exec_read_byte(state); + if (mask == 0 || (mask & 0xf0) != 0) + return 1; + + /* Update SP */ + update_vsp = 1; + + /* Load the registers */ + for (reg = 0; mask && reg < 4; mask >>= 1, reg++) { + if (mask & 1) { + state->registers[reg] = *vsp++; + state->update_mask |= 1 << reg; + } + } } else if ((insn & INSN_VSP_LARGE_INC_MASK) == INSN_VSP_LARGE_INC) { unsigned int uleb128; /* Read the increment value */ uleb128 = db_unwind_exec_read_byte(state); state->registers[SP] += 0x204 + (uleb128 << 2); } else { /* We hit a new instruction that needs to be implemented */ db_printf("Unhandled instruction %.2x\n", insn); return 1; } if (update_vsp) { state->registers[SP] = (uint32_t)vsp; } #if 0 db_printf("fp = %08x, sp = %08x, lr = %08x, pc = %08x\n", state->registers[FP], state->registers[SP], state->registers[LR], state->registers[PC]); #endif return 0; } /* Performs the unwind of a function */ static int db_unwind_tab(struct unwind_state *state) { uint32_t entry; /* Set PC to a known value */ state->registers[PC] = 0; /* Read the personality */ entry = *state->insn & ENTRY_MASK; if (entry == ENTRY_ARM_SU16) { state->byte = 2; state->entries = 1; } else if (entry == ENTRY_ARM_LU16) { state->byte = 1; state->entries = ((*state->insn >> 16) & 0xFF) + 1; } else { db_printf("Unknown entry: %x\n", entry); return 1; } while (state->entries > 0) { if (db_unwind_exec_insn(state) != 0) return 1; } /* * The program counter was not updated, load it from the link register. */ if (state->registers[PC] == 0) state->registers[PC] = state->registers[LR]; return 0; } static void db_stack_trace_cmd(struct unwind_state *state) { struct unwind_idx *index; const char *name; db_expr_t value; db_expr_t offset; c_db_sym_t sym; u_int reg, i; char *sep; uint16_t upd_mask; bool finished; finished = false; while (!finished) { /* Reset the mask of updated registers */ state->update_mask = 0; /* The pc value is correct and will be overwritten, save it */ state->start_pc = state->registers[PC]; /* Find the item to run */ index = db_find_index(state->start_pc); if (index->insn != EXIDX_CANTUNWIND) { if (index->insn & (1 << 31)) { /* The data is within the instruction */ state->insn = &index->insn; } else { /* A prel31 offset to the unwind table */ state->insn = (uint32_t *) ((uintptr_t)&index->insn + db_expand_prel31(index->insn)); } /* Run the unwind function */ finished = db_unwind_tab(state); } /* Print the frame details */ sym = db_search_symbol(state->start_pc, DB_STGY_ANY, &offset); if (sym == C_DB_SYM_NULL) { value = 0; name = "(null)"; } else db_symbol_values(sym, &name, &value); db_printf("%s() at ", name); db_printsym(state->start_pc, DB_STGY_PROC); db_printf("\n"); db_printf("\t pc = 0x%08x lr = 0x%08x (", state->start_pc, state->registers[LR]); db_printsym(state->registers[LR], DB_STGY_PROC); db_printf(")\n"); db_printf("\t sp = 0x%08x fp = 0x%08x", state->registers[SP], state->registers[FP]); /* Don't print the registers we have already printed */ upd_mask = state->update_mask & ~((1 << SP) | (1 << FP) | (1 << LR) | (1 << PC)); sep = "\n\t"; for (i = 0, reg = 0; upd_mask != 0; upd_mask >>= 1, reg++) { if ((upd_mask & 1) != 0) { db_printf("%s%sr%d = 0x%08x", sep, (reg < 10) ? " " : "", reg, state->registers[reg]); i++; if (i == 2) { sep = "\n\t"; i = 0; } else sep = " "; } } db_printf("\n"); /* * Stop if directed to do so, or if we've unwound back to the * kernel entry point, or if the unwind function didn't change * anything (to avoid getting stuck in this loop forever). * If the latter happens, it's an indication that the unwind * information is incorrect somehow for the function named in * the last frame printed before you see the unwind failure * message (maybe it needs a STOP_UNWINDING). */ if (index->insn == EXIDX_CANTUNWIND) { db_printf("Unable to unwind further\n"); finished = true; } else if (state->registers[PC] < VM_MIN_KERNEL_ADDRESS) { db_printf("Unable to unwind into user mode\n"); finished = true; } else if (state->update_mask == 0) { db_printf("Unwind failure (no registers changed)\n"); finished = true; } } } #endif /* * APCS stack frames are awkward beasts, so I don't think even trying to use * a structure to represent them is a good idea. * * Here's the diagram from the APCS. Increasing address is _up_ the page. * * save code pointer [fp] <- fp points to here * return link value [fp, #-4] * return sp value [fp, #-8] * return fp value [fp, #-12] * [saved v7 value] * [saved v6 value] * [saved v5 value] * [saved v4 value] * [saved v3 value] * [saved v2 value] * [saved v1 value] * [saved a4 value] * [saved a3 value] * [saved a2 value] * [saved a1 value] * * The save code pointer points twelve bytes beyond the start of the * code sequence (usually a single STM) that created the stack frame. * We have to disassemble it if we want to know which of the optional * fields are actually present. */ #ifndef __ARM_EABI__ /* The frame format is differend in AAPCS */ static void db_stack_trace_cmd(db_expr_t addr, db_expr_t count, boolean_t kernel_only) { u_int32_t *frame, *lastframe; c_db_sym_t sym; const char *name; db_expr_t value; db_expr_t offset; int scp_offset; frame = (u_int32_t *)addr; lastframe = NULL; scp_offset = -(get_pc_str_offset() >> 2); while (count-- && frame != NULL && !db_pager_quit) { db_addr_t scp; u_int32_t savecode; int r; u_int32_t *rp; const char *sep; /* * In theory, the SCP isn't guaranteed to be in the function * that generated the stack frame. We hope for the best. */ scp = frame[FR_SCP]; sym = db_search_symbol(scp, DB_STGY_ANY, &offset); if (sym == C_DB_SYM_NULL) { value = 0; name = "(null)"; } else db_symbol_values(sym, &name, &value); db_printf("%s() at ", name); db_printsym(scp, DB_STGY_PROC); db_printf("\n"); #ifdef __PROG26 db_printf("scp=0x%08x rlv=0x%08x (", scp, frame[FR_RLV] & R15_PC); db_printsym(frame[FR_RLV] & R15_PC, DB_STGY_PROC); db_printf(")\n"); #else db_printf("scp=0x%08x rlv=0x%08x (", scp, frame[FR_RLV]); db_printsym(frame[FR_RLV], DB_STGY_PROC); db_printf(")\n"); #endif db_printf("\trsp=0x%08x rfp=0x%08x", frame[FR_RSP], frame[FR_RFP]); savecode = ((u_int32_t *)scp)[scp_offset]; if ((savecode & 0x0e100000) == 0x08000000) { /* Looks like an STM */ rp = frame - 4; sep = "\n\t"; for (r = 10; r >= 0; r--) { if (savecode & (1 << r)) { db_printf("%sr%d=0x%08x", sep, r, *rp--); sep = (frame - rp) % 4 == 2 ? "\n\t" : " "; } } } db_printf("\n"); /* * Switch to next frame up */ if (frame[FR_RFP] == 0) break; /* Top of stack */ lastframe = frame; frame = (u_int32_t *)(frame[FR_RFP]); if (INKERNEL((int)frame)) { /* staying in kernel */ if (frame <= lastframe) { db_printf("Bad frame pointer: %p\n", frame); break; } } else if (INKERNEL((int)lastframe)) { /* switch from user to kernel */ if (kernel_only) break; /* kernel stack only */ } else { /* in user */ if (frame <= lastframe) { db_printf("Bad user frame pointer: %p\n", frame); break; } } } } #endif /* XXX stubs */ void db_md_list_watchpoints() { } int db_md_clr_watchpoint(db_expr_t addr, db_expr_t size) { return (0); } int db_md_set_watchpoint(db_expr_t addr, db_expr_t size) { return (0); } int db_trace_thread(struct thread *thr, int count) { #ifdef __ARM_EABI__ struct unwind_state state; #endif struct pcb *ctx; if (thr != curthread) { ctx = kdb_thr_ctx(thr); #ifdef __ARM_EABI__ state.registers[FP] = ctx->un_32.pcb32_r11; state.registers[SP] = ctx->un_32.pcb32_sp; state.registers[LR] = ctx->un_32.pcb32_lr; state.registers[PC] = ctx->un_32.pcb32_pc; db_stack_trace_cmd(&state); #else db_stack_trace_cmd(ctx->un_32.pcb32_r11, -1, TRUE); #endif } else db_trace_self(); return (0); } void db_trace_self(void) { #ifdef __ARM_EABI__ struct unwind_state state; uint32_t sp; /* Read the stack pointer */ __asm __volatile("mov %0, sp" : "=&r" (sp)); state.registers[FP] = (uint32_t)__builtin_frame_address(0); state.registers[SP] = sp; state.registers[LR] = (uint32_t)__builtin_return_address(0); state.registers[PC] = (uint32_t)db_trace_self; db_stack_trace_cmd(&state); #else db_addr_t addr; addr = (db_addr_t)__builtin_frame_address(0); db_stack_trace_cmd(addr, -1, FALSE); #endif } Index: user/attilio/vmcontention/sys/arm/arm/exception.S =================================================================== --- user/attilio/vmcontention/sys/arm/arm/exception.S (revision 252343) +++ user/attilio/vmcontention/sys/arm/arm/exception.S (revision 252344) @@ -1,266 +1,266 @@ /* $NetBSD: exception.S,v 1.13 2003/10/31 16:30:15 scw Exp $ */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * exception.S * * Low level handlers for exception vectors * * Created : 24/09/94 * * Based on kate/display/abort.s * */ #include "assym.s" #include #include #include __FBSDID("$FreeBSD$"); .text .align 0 AST_LOCALS /* * reset_entry: * * Handler for Reset exception. */ ASENTRY_NP(reset_entry) adr r0, Lreset_panicmsg bl _C_LABEL(panic) /* NOTREACHED */ Lreset_panicmsg: .asciz "Reset vector called, LR = 0x%08x" .balign 4 END(reset_entry) /* * swi_entry * * Handler for the Software Interrupt exception. */ ASENTRY_NP(swi_entry) STOP_UNWINDING /* Don't unwind past here */ PUSHFRAME mov r0, sp /* Pass the frame to any function */ mov r6, sp /* Backup the stack pointer */ bic sp, sp, #7 /* Align the stack pointer */ bl _C_LABEL(swi_handler) /* It's a SWI ! */ mov sp, r6 /* Restore the stack */ DO_AST PULLFRAME movs pc, lr /* Exit */ END(swi_entry) /* * prefetch_abort_entry: * * Handler for the Prefetch Abort exception. */ ASENTRY_NP(prefetch_abort_entry) #ifdef __XSCALE__ nop /* Make absolutely sure any pending */ nop /* imprecise aborts have occurred. */ #endif sub lr, lr, #0x00000004 /* Adjust the lr */ PUSHFRAMEINSVC ldr r1, Lprefetch_abort_handler_address adr lr, exception_exit mov r0, sp /* pass the stack pointer as r0 */ ldr pc, [r1] Lprefetch_abort_handler_address: .word _C_LABEL(prefetch_abort_handler_address) .data .global _C_LABEL(prefetch_abort_handler_address) _C_LABEL(prefetch_abort_handler_address): .word abortprefetch .text abortprefetch: adr r0, abortprefetchmsg b _C_LABEL(panic) abortprefetchmsg: .asciz "abortprefetch" .align 0 END(prefetch_abort_entry) /* * data_abort_entry: * * Handler for the Data Abort exception. */ ASENTRY_NP(data_abort_entry) #ifdef __XSCALE__ nop /* Make absolutely sure any pending */ nop /* imprecise aborts have occurred. */ #endif sub lr, lr, #0x00000008 /* Adjust the lr */ PUSHFRAMEINSVC /* Push trap frame and switch */ /* to SVC32 mode */ ldr r1, Ldata_abort_handler_address adr lr, exception_exit mov r0, sp /* pass the stack pointer as r0 */ ldr pc, [r1] Ldata_abort_handler_address: .word _C_LABEL(data_abort_handler_address) .data .global _C_LABEL(data_abort_handler_address) _C_LABEL(data_abort_handler_address): .word abortdata .text abortdata: adr r0, abortdatamsg b _C_LABEL(panic) abortdatamsg: .asciz "abortdata" .align 0 END(data_abort_entry) /* * address_exception_entry: * * Handler for the Address Exception exception. * * NOTE: This exception isn't really used on arm32. We * print a warning message to the console and then treat * it like a Data Abort. */ ASENTRY_NP(address_exception_entry) mrs r1, cpsr_all mrs r2, spsr_all mov r3, lr adr r0, Laddress_exception_msg bl _C_LABEL(printf) /* XXX CLOBBERS LR!! */ b data_abort_entry Laddress_exception_msg: .asciz "Address Exception CPSR=0x%08x SPSR=0x%08x LR=0x%08x\n" .balign 4 END(address_exception_entry) /* * General exception exit handler * (Placed here to be within range of all the references to it) * * It exits straight away if not returning to USR mode. * This loops around delivering any pending ASTs. * Interrupts are disabled at suitable points to avoid ASTs * being posted between testing and exit to user mode. * * This function uses PULLFRAMEFROMSVCANDEXIT and DO_AST and can * only be called if the exception handler used PUSHFRAMEINSVC. * * For EABI, don't try to unwind any further than this. This is a * stopgap measure to avoid getting stuck in a loop in the unwinder, * which happens because we don't yet provide the proper unwind info * here that describes which registers are being restored. */ ASENTRY_NP(exception_exit) - STOP_UNWINDING + UNWINDSVCFRAME DO_AST PULLFRAMEFROMSVCANDEXIT END(exception_exit) /* * undefined_entry: * * Handler for the Undefined Instruction exception. * * We indirect the undefined vector via the handler address * in the data area. Entry to the undefined handler must * look like direct entry from the vector. */ ASENTRY_NP(undefined_entry) stmfd sp!, {r0, r1} ldr r0, Lundefined_handler_indirection ldr r1, [sp], #0x0004 str r1, [r0, #0x0000] ldr r1, [sp], #0x0004 str r1, [r0, #0x0004] ldmia r0, {r0, r1, pc} Lundefined_handler_indirection: .word Lundefined_handler_indirection_data END(undefined_entry) /* * assembly bounce code for calling the kernel * undefined instruction handler. This uses * a standard trap frame and is called in SVC mode. */ ENTRY_NP(undefinedinstruction_bounce) PUSHFRAMEINSVC mov r0, sp adr lr, exception_exit b _C_LABEL(undefinedinstruction) .data .align 0 /* * Indirection data * 2 words use for preserving r0 and r1 * 3rd word contains the undefined handler address. */ Lundefined_handler_indirection_data: .word 0 .word 0 .global _C_LABEL(undefined_handler_address) _C_LABEL(undefined_handler_address): .word _C_LABEL(undefinedinstruction_bounce) END(undefinedinstruction_bounce) Index: user/attilio/vmcontention/sys/arm/include/asmacros.h =================================================================== --- user/attilio/vmcontention/sys/arm/include/asmacros.h (revision 252343) +++ user/attilio/vmcontention/sys/arm/include/asmacros.h (revision 252344) @@ -1,259 +1,268 @@ /* $NetBSD: frame.h,v 1.6 2003/10/05 19:44:58 matt Exp $ */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ASMACROS_H_ #define _MACHINE_ASMACROS_H_ #include #ifdef _KERNEL #ifdef LOCORE #include "opt_global.h" /* * ASM macros for pushing and pulling trapframes from the stack * * These macros are used to handle the irqframe and trapframe structures * defined above. */ /* * PUSHFRAME - macro to push a trap frame on the stack in the current mode * Since the current mode is used, the SVC lr field is not defined. * * NOTE: r13 and r14 are stored separately as a work around for the * SA110 rev 2 STM^ bug */ #ifdef ARM_TP_ADDRESS #define PUSHFRAME \ str lr, [sp, #-4]!; /* Push the return address */ \ sub sp, sp, #(4*17); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr_all; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]!; \ ldr r0, =ARM_RAS_START; \ mov r1, #0; \ str r1, [r0]; \ mov r1, #0xffffffff; \ str r1, [r0, #4]; #else #define PUSHFRAME \ str lr, [sp, #-4]!; /* Push the return address */ \ sub sp, sp, #(4*17); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr_all; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]!; #endif /* * PULLFRAME - macro to pull a trap frame from the stack in the current mode * Since the current mode is used, the SVC lr field is ignored. */ #ifdef ARM_TP_ADDRESS #define PULLFRAME \ ldr r0, [sp], #0x0004; /* Get the SPSR from stack */ \ msr spsr_all, r0; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*17); /* Adjust the stack pointer */ \ ldr lr, [sp], #0x0004; /* Pull the return address */ #else #define PULLFRAME \ ldr r0, [sp], #0x0004; /* Get the SPSR from stack */ \ msr spsr_all, r0; \ clrex; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*17); /* Adjust the stack pointer */ \ ldr lr, [sp], #0x0004; /* Pull the return address */ #endif /* * PUSHFRAMEINSVC - macro to push a trap frame on the stack in SVC32 mode * This should only be used if the processor is not currently in SVC32 * mode. The processor mode is switched to SVC mode and the trap frame is * stored. The SVC lr field is used to store the previous value of * lr in SVC mode. * * NOTE: r13 and r14 are stored separately as a work around for the * SA110 rev 2 STM^ bug */ #ifdef ARM_TP_ADDRESS #define PUSHFRAMEINSVC \ stmdb sp, {r0-r3}; /* Save 4 registers */ \ mov r0, lr; /* Save xxx32 r14 */ \ mov r1, sp; /* Save xxx32 sp */ \ mrs r3, spsr; /* Save xxx32 spsr */ \ mrs r2, cpsr; /* Get the CPSR */ \ bic r2, r2, #(PSR_MODE); /* Fix for SVC mode */ \ orr r2, r2, #(PSR_SVC32_MODE); \ msr cpsr_c, r2; /* Punch into SVC mode */ \ mov r2, sp; /* Save SVC sp */ \ str r0, [sp, #-4]!; /* Push return address */ \ str lr, [sp, #-4]!; /* Push SVC lr */ \ str r2, [sp, #-4]!; /* Push SVC sp */ \ msr spsr_all, r3; /* Restore correct spsr */ \ ldmdb r1, {r0-r3}; /* Restore 4 regs from xxx mode */ \ sub sp, sp, #(4*15); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ ldr r5, =ARM_RAS_START; /* Check if there's any RAS */ \ ldr r4, [r5, #4]; /* reset it to point at the */ \ cmp r4, #0xffffffff; /* end of memory if necessary; */ \ movne r1, #0xffffffff; /* leave value in r4 for later */ \ strne r1, [r5, #4]; /* comparision against PC. */ \ ldr r3, [r5]; /* Retrieve global RAS_START */ \ cmp r3, #0; /* and reset it if non-zero. */ \ movne r1, #0; /* If non-zero RAS_START and */ \ strne r1, [r5]; /* PC was lower than RAS_END, */ \ ldrne r1, [r0, #16]; /* adjust the saved PC so that */ \ cmpne r4, r1; /* execution later resumes at */ \ strhi r3, [r0, #16]; /* the RAS_START location. */ \ mrs r0, spsr_all; \ str r0, [sp, #-4]! #else #define PUSHFRAMEINSVC \ stmdb sp, {r0-r3}; /* Save 4 registers */ \ mov r0, lr; /* Save xxx32 r14 */ \ mov r1, sp; /* Save xxx32 sp */ \ mrs r3, spsr; /* Save xxx32 spsr */ \ mrs r2, cpsr; /* Get the CPSR */ \ bic r2, r2, #(PSR_MODE); /* Fix for SVC mode */ \ orr r2, r2, #(PSR_SVC32_MODE); \ msr cpsr_c, r2; /* Punch into SVC mode */ \ mov r2, sp; /* Save SVC sp */ \ str r0, [sp, #-4]!; /* Push return address */ \ str lr, [sp, #-4]!; /* Push SVC lr */ \ str r2, [sp, #-4]!; /* Push SVC sp */ \ msr spsr_all, r3; /* Restore correct spsr */ \ ldmdb r1, {r0-r3}; /* Restore 4 regs from xxx mode */ \ sub sp, sp, #(4*15); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr_all; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]! #endif /* * PULLFRAMEFROMSVCANDEXIT - macro to pull a trap frame from the stack * in SVC32 mode and restore the saved processor mode and PC. * This should be used when the SVC lr register needs to be restored on * exit. */ #ifdef ARM_TP_ADDRESS #define PULLFRAMEFROMSVCANDEXIT \ ldr r0, [sp], #0x0004; /* Get the SPSR from stack */ \ msr spsr_all, r0; /* restore SPSR */ \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*15); /* Adjust the stack pointer */ \ ldmia sp, {sp, lr, pc}^ /* Restore lr and exit */ #else #define PULLFRAMEFROMSVCANDEXIT \ ldr r0, [sp], #0x0004; /* Get the SPSR from stack */ \ msr spsr_all, r0; /* restore SPSR */ \ clrex; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*15); /* Adjust the stack pointer */ \ ldmia sp, {sp, lr, pc}^ /* Restore lr and exit */ -#endif +#endif +#if defined(__ARM_EABI__) +#define UNWINDSVCFRAME \ + .save {r13-r15}; /* Restore sp, lr, pc */ \ + .pad #(2*4); /* Skip user sp and lr */ \ + .save {r0-r12}; /* Restore r0-r12 */ \ + .pad #(4) /* Skip spsr */ +#else +#define UNWINDSVCFRAME +#endif #define DATA(name) \ .data ; \ _ALIGN_DATA ; \ .globl name ; \ .type name, %object ; \ name: #ifdef _ARM_ARCH_6 #define AST_LOCALS #define GET_CURTHREAD_PTR(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(PC_CURTHREAD) #else #define AST_LOCALS ;\ .Lcurthread: ;\ .word _C_LABEL(__pcpu) + PC_CURTHREAD #define GET_CURTHREAD_PTR(tmp) \ ldr tmp, .Lcurthread #endif #define DO_AST \ ldr r0, [sp] /* Get the SPSR from stack */ ;\ mrs r4, cpsr /* save CPSR */ ;\ orr r1, r4, #(I32_bit|F32_bit) ;\ msr cpsr_c, r1 /* Disable interrupts */ ;\ and r0, r0, #(PSR_MODE) /* Returning to USR mode? */ ;\ teq r0, #(PSR_USR32_MODE) ;\ bne 2f /* Nope, get out now */ ;\ bic r4, r4, #(I32_bit|F32_bit) ;\ 1: GET_CURTHREAD_PTR(r5) ;\ ldr r5, [r5] ;\ ldr r1, [r5, #(TD_FLAGS)] ;\ and r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED) ;\ teq r1, #0x00000000 ;\ beq 2f /* Nope. Just bail */ ;\ msr cpsr_c, r4 /* Restore interrupts */ ;\ mov r0, sp ;\ bl _C_LABEL(ast) /* ast(frame) */ ;\ orr r0, r4, #(I32_bit|F32_bit) ;\ msr cpsr_c, r0 ;\ b 1b ;\ 2: #endif /* LOCORE */ #endif /* _KERNEL */ #endif /* !_MACHINE_ASMACROS_H_ */ Index: user/attilio/vmcontention/sys/arm/ti/am335x/am335x_lcd.c =================================================================== --- user/attilio/vmcontention/sys/arm/ti/am335x/am335x_lcd.c (revision 252343) +++ user/attilio/vmcontention/sys/arm/ti/am335x/am335x_lcd.c (revision 252344) @@ -1,667 +1,689 @@ /*- * Copyright 2013 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include +/* syscons bits */ +#include +#include + #include #include #include #include #include +#include +#include + #include #include #include "am335x_lcd.h" #include "am335x_pwm.h" #define LCD_PID 0x00 #define LCD_CTRL 0x04 #define CTRL_DIV_MASK 0xff #define CTRL_DIV_SHIFT 8 #define CTRL_AUTO_UFLOW_RESTART (1 << 1) #define CTRL_RASTER_MODE 1 #define CTRL_LIDD_MODE 0 #define LCD_LIDD_CTRL 0x0C #define LCD_LIDD_CS0_CONF 0x10 #define LCD_LIDD_CS0_ADDR 0x14 #define LCD_LIDD_CS0_DATA 0x18 #define LCD_LIDD_CS1_CONF 0x1C #define LCD_LIDD_CS1_ADDR 0x20 #define LCD_LIDD_CS1_DATA 0x24 #define LCD_RASTER_CTRL 0x28 #define RASTER_CTRL_TFT24_UNPACKED (1 << 26) #define RASTER_CTRL_TFT24 (1 << 25) #define RASTER_CTRL_STN565 (1 << 24) #define RASTER_CTRL_TFTPMAP (1 << 23) #define RASTER_CTRL_NIBMODE (1 << 22) #define RASTER_CTRL_PALMODE_SHIFT 20 #define PALETTE_PALETTE_AND_DATA 0x00 #define PALETTE_PALETTE_ONLY 0x01 #define PALETTE_DATA_ONLY 0x02 #define RASTER_CTRL_REQDLY_SHIFT 12 #define RASTER_CTRL_MONO8B (1 << 9) #define RASTER_CTRL_RBORDER (1 << 8) #define RASTER_CTRL_LCDTFT (1 << 7) #define RASTER_CTRL_LCDBW (1 << 1) #define RASTER_CTRL_LCDEN (1 << 0) #define LCD_RASTER_TIMING_0 0x2C #define RASTER_TIMING_0_HBP_SHIFT 24 #define RASTER_TIMING_0_HFP_SHIFT 16 #define RASTER_TIMING_0_HSW_SHIFT 10 #define RASTER_TIMING_0_PPLLSB_SHIFT 4 #define RASTER_TIMING_0_PPLMSB_SHIFT 3 #define LCD_RASTER_TIMING_1 0x30 #define RASTER_TIMING_1_VBP_SHIFT 24 #define RASTER_TIMING_1_VFP_SHIFT 16 #define RASTER_TIMING_1_VSW_SHIFT 10 #define RASTER_TIMING_1_LPP_SHIFT 0 #define LCD_RASTER_TIMING_2 0x34 #define RASTER_TIMING_2_HSWHI_SHIFT 27 #define RASTER_TIMING_2_LPP_B10_SHIFT 26 #define RASTER_TIMING_2_PHSVS (1 << 25) #define RASTER_TIMING_2_PHSVS_RISE (1 << 24) #define RASTER_TIMING_2_PHSVS_FALL (0 << 24) #define RASTER_TIMING_2_IOE (1 << 23) #define RASTER_TIMING_2_IPC (1 << 22) #define RASTER_TIMING_2_IHS (1 << 21) #define RASTER_TIMING_2_IVS (1 << 20) #define RASTER_TIMING_2_ACBI_SHIFT 16 #define RASTER_TIMING_2_ACB_SHIFT 8 #define RASTER_TIMING_2_HBPHI_SHIFT 4 #define RASTER_TIMING_2_HFPHI_SHIFT 0 #define LCD_RASTER_SUBPANEL 0x38 #define LCD_RASTER_SUBPANEL2 0x3C #define LCD_LCDDMA_CTRL 0x40 #define LCDDMA_CTRL_DMA_MASTER_PRIO_SHIFT 16 #define LCDDMA_CTRL_TH_FIFO_RDY_SHIFT 8 #define LCDDMA_CTRL_BURST_SIZE_SHIFT 4 #define LCDDMA_CTRL_BYTES_SWAP (1 << 3) #define LCDDMA_CTRL_BE (1 << 1) #define LCDDMA_CTRL_FB0_ONLY 0 #define LCDDMA_CTRL_FB0_FB1 (1 << 0) #define LCD_LCDDMA_FB0_BASE 0x44 #define LCD_LCDDMA_FB0_CEILING 0x48 #define LCD_LCDDMA_FB1_BASE 0x4C #define LCD_LCDDMA_FB1_CEILING 0x50 #define LCD_SYSCONFIG 0x54 #define SYSCONFIG_STANDBY_FORCE (0 << 4) #define SYSCONFIG_STANDBY_NONE (1 << 4) #define SYSCONFIG_STANDBY_SMART (2 << 4) #define SYSCONFIG_IDLE_FORCE (0 << 2) #define SYSCONFIG_IDLE_NONE (1 << 2) #define SYSCONFIG_IDLE_SMART (2 << 2) #define LCD_IRQSTATUS_RAW 0x58 #define LCD_IRQSTATUS 0x5C #define LCD_IRQENABLE_SET 0x60 #define LCD_IRQENABLE_CLEAR 0x64 #define IRQ_EOF1 (1 << 9) #define IRQ_EOF0 (1 << 8) #define IRQ_PL (1 << 6) #define IRQ_FUF (1 << 5) #define IRQ_ACB (1 << 3) #define IRQ_SYNC_LOST (1 << 2) #define IRQ_RASTER_DONE (1 << 1) #define IRQ_FRAME_DONE (1 << 0) #define LCD_CLKC_ENABLE 0x6C #define CLKC_ENABLE_DMA (1 << 2) #define CLKC_ENABLE_LDID (1 << 1) #define CLKC_ENABLE_CORE (1 << 0) #define LCD_CLKC_RESET 0x70 #define CLKC_RESET_MAIN (1 << 3) #define CLKC_RESET_DMA (1 << 2) #define CLKC_RESET_LDID (1 << 1) #define CLKC_RESET_CORE (1 << 0) #define LCD_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define LCD_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define LCD_LOCK_INIT(_sc) mtx_init(&(_sc)->sc_mtx, \ device_get_nameunit(_sc->sc_dev), "am335x_lcd", MTX_DEF) #define LCD_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx); #define LCD_READ4(_sc, reg) bus_read_4((_sc)->sc_mem_res, reg); #define LCD_WRITE4(_sc, reg, value) \ bus_write_4((_sc)->sc_mem_res, reg, value); /* Backlight is controlled by eCAS interface on PWM unit 0 */ #define PWM_UNIT 0 #define PWM_PERIOD 100 struct am335x_lcd_softc { device_t sc_dev; struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_intr_hl; struct mtx sc_mtx; int sc_backlight; struct sysctl_oid *sc_oid; /* Framebuffer */ bus_dma_tag_t sc_dma_tag; bus_dmamap_t sc_dma_map; size_t sc_fb_size; bus_addr_t sc_fb_phys; uint8_t *sc_fb_base; }; static void am335x_fb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { bus_addr_t *addr; if (err) return; addr = (bus_addr_t*)arg; *addr = segs[0].ds_addr; } static uint32_t am335x_lcd_calc_divisor(uint32_t reference, uint32_t freq) { uint32_t div; /* Raster mode case: divisors are in range from 2 to 255 */ for (div = 2; div < 255; div++) if (reference/div <= freq) return (div); return (255); } static int am335x_lcd_sysctl_backlight(SYSCTL_HANDLER_ARGS) { struct am335x_lcd_softc *sc = (struct am335x_lcd_softc*)arg1; int error; int backlight; backlight = sc->sc_backlight;; error = sysctl_handle_int(oidp, &backlight, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (backlight < 0) backlight = 0; if (backlight > 100) backlight = 100; LCD_LOCK(sc); error = am335x_pwm_config_ecas(PWM_UNIT, PWM_PERIOD, backlight*PWM_PERIOD/100); if (error == 0) sc->sc_backlight = backlight; LCD_UNLOCK(sc); return (error); } static int am335x_read_panel_property(device_t dev, const char *name, uint32_t *val) { phandle_t node; pcell_t cell; node = ofw_bus_get_node(dev); if ((OF_getprop(node, name, &cell, sizeof(cell))) <= 0) { device_printf(dev, "missing '%s' attribute in LCD panel info\n", name); return (ENXIO); } *val = fdt32_to_cpu(cell); return (0); } static int am335x_read_panel_info(device_t dev, struct panel_info *panel) { int error; error = 0; if ((error = am335x_read_panel_property(dev, "panel_width", &panel->panel_width))) goto out; if ((error = am335x_read_panel_property(dev, "panel_height", &panel->panel_height))) goto out; if ((error = am335x_read_panel_property(dev, "panel_hfp", &panel->panel_hfp))) goto out; if ((error = am335x_read_panel_property(dev, "panel_hbp", &panel->panel_hbp))) goto out; if ((error = am335x_read_panel_property(dev, "panel_hsw", &panel->panel_hsw))) goto out; if ((error = am335x_read_panel_property(dev, "panel_vfp", &panel->panel_vfp))) goto out; if ((error = am335x_read_panel_property(dev, "panel_vbp", &panel->panel_vbp))) goto out; if ((error = am335x_read_panel_property(dev, "panel_vsw", &panel->panel_vsw))) goto out; if ((error = am335x_read_panel_property(dev, "panel_pxl_clk", &panel->panel_pxl_clk))) goto out; if ((error = am335x_read_panel_property(dev, "panel_invert_pxl_clk", &panel->panel_invert_pxl_clk))) goto out; if ((error = am335x_read_panel_property(dev, "ac_bias", &panel->ac_bias))) goto out; if ((error = am335x_read_panel_property(dev, "ac_bias_intrpt", &panel->ac_bias_intrpt))) goto out; if ((error = am335x_read_panel_property(dev, "dma_burst_sz", &panel->dma_burst_sz))) goto out; if ((error = am335x_read_panel_property(dev, "bpp", &panel->bpp))) goto out; if ((error = am335x_read_panel_property(dev, "fdd", &panel->fdd))) goto out; if ((error = am335x_read_panel_property(dev, "invert_line_clock", &panel->invert_line_clock))) goto out; if ((error = am335x_read_panel_property(dev, "invert_frm_clock", &panel->invert_frm_clock))) goto out; if ((error = am335x_read_panel_property(dev, "sync_edge", &panel->sync_edge))) goto out; error = am335x_read_panel_property(dev, "sync_ctrl", &panel->sync_ctrl); out: return (error); } static void am335x_lcd_intr(void *arg) { struct am335x_lcd_softc *sc = arg; uint32_t reg; reg = LCD_READ4(sc, LCD_IRQSTATUS); LCD_WRITE4(sc, LCD_IRQSTATUS, reg); if (reg & IRQ_SYNC_LOST) { reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg &= ~RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); return; } if (reg & IRQ_PL) { reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg &= ~RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); return; } if (reg & IRQ_EOF0) { LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); reg &= ~IRQ_EOF0; } if (reg & IRQ_EOF1) { LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); reg &= ~IRQ_EOF1; } if (reg & IRQ_FUF) { /* TODO: Handle FUF */ } if (reg & IRQ_ACB) { /* TODO: Handle ACB */ } } static int am335x_lcd_probe(device_t dev) { + int err; + if (!ofw_bus_is_compatible(dev, "ti,am335x-lcd")) return (ENXIO); device_set_desc(dev, "AM335x LCD controller"); - return (0); + err = sc_probe_unit(device_get_unit(dev), + device_get_flags(dev) | SC_AUTODETECT_KBD); + if (err != 0) + return (err); + + return (BUS_PROBE_DEFAULT); } static int am335x_lcd_attach(device_t dev) { struct am335x_lcd_softc *sc; int rid; int div; struct panel_info panel; uint32_t reg, timing0, timing1, timing2; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; uint32_t burst_log; int err; size_t dma_size; sc = device_get_softc(dev); sc->sc_dev = dev; if (am335x_read_panel_info(dev, &panel)) return (ENXIO); int ref_freq = 0; ti_prcm_clk_enable(LCDC_CLK); if (ti_prcm_clk_get_source_freq(LCDC_CLK, &ref_freq)) { device_printf(dev, "Can't get reference frequency\n"); return (ENXIO); } rid = 0; sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->sc_mem_res) { device_printf(dev, "cannot allocate memory window\n"); return (ENXIO); } rid = 0; sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->sc_irq_res) { bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); device_printf(dev, "cannot allocate interrupt\n"); return (ENXIO); } if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, am335x_lcd_intr, sc, &sc->sc_intr_hl) != 0) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->sc_irq_res); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem_res); device_printf(dev, "Unable to setup the irq handler.\n"); return (ENXIO); } LCD_LOCK_INIT(sc); /* Panle initialization */ dma_size = round_page(panel.panel_width*panel.panel_height*panel.bpp/8); /* * Now allocate framebuffer memory */ err = bus_dma_tag_create( bus_get_dma_tag(dev), 4, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_size, 1, /* maxsize, nsegments */ dma_size, 0, /* maxsegsize, flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sc_dma_tag); if (err) goto fail; err = bus_dmamem_alloc(sc->sc_dma_tag, (void **)&sc->sc_fb_base, - 0, &sc->sc_dma_map); + BUS_DMA_COHERENT, &sc->sc_dma_map); if (err) { device_printf(dev, "cannot allocate framebuffer\n"); goto fail; } err = bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, sc->sc_fb_base, dma_size, am335x_fb_dmamap_cb, &sc->sc_fb_phys, BUS_DMA_NOWAIT); if (err) { device_printf(dev, "cannot load DMA map\n"); goto fail; } /* Make sure it's blank */ memset(sc->sc_fb_base, 0x00, dma_size); /* Calculate actual FB Size */ sc->sc_fb_size = panel.panel_width*panel.panel_height*panel.bpp/8; /* Only raster mode is supported */ reg = CTRL_RASTER_MODE; div = am335x_lcd_calc_divisor(ref_freq, panel.panel_pxl_clk); reg |= (div << CTRL_DIV_SHIFT); LCD_WRITE4(sc, LCD_CTRL, reg); /* Set timing */ timing0 = timing1 = timing2 = 0; /* Horizontal back porch */ timing0 |= (panel.panel_hbp & 0xff) << RASTER_TIMING_0_HBP_SHIFT; timing2 |= ((panel.panel_hbp >> 8) & 3) << RASTER_TIMING_2_HBPHI_SHIFT; /* Horizontal front porch */ timing0 |= (panel.panel_hfp & 0xff) << RASTER_TIMING_0_HFP_SHIFT; timing2 |= ((panel.panel_hfp >> 8) & 3) << RASTER_TIMING_2_HFPHI_SHIFT; /* Horizontal sync width */ timing0 |= (panel.panel_hsw & 0x3f) << RASTER_TIMING_0_HSW_SHIFT; timing2 |= ((panel.panel_hsw >> 6) & 0xf) << RASTER_TIMING_2_HSWHI_SHIFT; /* Vertical back porch, front porch, sync width */ timing1 |= (panel.panel_vbp & 0xff) << RASTER_TIMING_1_VBP_SHIFT; timing1 |= (panel.panel_vfp & 0xff) << RASTER_TIMING_1_VFP_SHIFT; timing1 |= (panel.panel_vsw & 0x3f) << RASTER_TIMING_1_VSW_SHIFT; /* Pixels per line */ timing0 |= (((panel.panel_width - 1) >> 10) & 1) << RASTER_TIMING_0_PPLMSB_SHIFT; timing0 |= (((panel.panel_width - 1) >> 4) & 0x3f) << RASTER_TIMING_0_PPLLSB_SHIFT; /* Lines per panel */ timing1 |= ((panel.panel_height - 1) & 0x3ff) << RASTER_TIMING_1_LPP_SHIFT; timing2 |= (((panel.panel_height - 1) >> 10 ) & 1) << RASTER_TIMING_2_LPP_B10_SHIFT; /* clock signal settings */ if (panel.sync_ctrl) timing2 |= RASTER_TIMING_2_PHSVS; if (panel.sync_edge) timing2 |= RASTER_TIMING_2_PHSVS_RISE; else timing2 |= RASTER_TIMING_2_PHSVS_FALL; if (panel.invert_line_clock) timing2 |= RASTER_TIMING_2_IHS; if (panel.invert_frm_clock) timing2 |= RASTER_TIMING_2_IVS; if (panel.panel_invert_pxl_clk) timing2 |= RASTER_TIMING_2_IPC; /* AC bias */ timing2 |= (panel.ac_bias << RASTER_TIMING_2_ACB_SHIFT); timing2 |= (panel.ac_bias_intrpt << RASTER_TIMING_2_ACBI_SHIFT); LCD_WRITE4(sc, LCD_RASTER_TIMING_0, timing0); LCD_WRITE4(sc, LCD_RASTER_TIMING_1, timing1); LCD_WRITE4(sc, LCD_RASTER_TIMING_2, timing2); /* DMA settings */ reg = LCDDMA_CTRL_FB0_FB1; /* Find power of 2 for current burst size */ switch (panel.dma_burst_sz) { case 1: burst_log = 0; break; case 2: burst_log = 1; break; case 4: burst_log = 2; break; case 8: burst_log = 3; break; case 16: default: burst_log = 4; break; } reg |= (burst_log << LCDDMA_CTRL_BURST_SIZE_SHIFT); /* XXX: FIFO TH */ reg |= (0 << LCDDMA_CTRL_TH_FIFO_RDY_SHIFT); LCD_WRITE4(sc, LCD_LCDDMA_CTRL, reg); LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); /* Enable LCD */ reg = RASTER_CTRL_LCDTFT; reg |= (panel.fdd << RASTER_CTRL_REQDLY_SHIFT); reg |= (PALETTE_DATA_ONLY << RASTER_CTRL_PALMODE_SHIFT); if (panel.bpp >= 24) reg |= RASTER_CTRL_TFT24; if (panel.bpp == 32) reg |= RASTER_CTRL_TFT24_UNPACKED; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); LCD_WRITE4(sc, LCD_CLKC_ENABLE, CLKC_ENABLE_DMA | CLKC_ENABLE_LDID | CLKC_ENABLE_CORE); LCD_WRITE4(sc, LCD_CLKC_RESET, CLKC_RESET_MAIN); DELAY(100); LCD_WRITE4(sc, LCD_CLKC_RESET, 0); reg = IRQ_EOF1 | IRQ_EOF0 | IRQ_FUF | IRQ_PL | IRQ_ACB | IRQ_SYNC_LOST | IRQ_RASTER_DONE | IRQ_FRAME_DONE; LCD_WRITE4(sc, LCD_IRQENABLE_SET, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); LCD_WRITE4(sc, LCD_SYSCONFIG, SYSCONFIG_STANDBY_SMART | SYSCONFIG_IDLE_SMART); /* Init backlight interface */ ctx = device_get_sysctl_ctx(sc->sc_dev); tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_oid = SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "backlight", CTLTYPE_INT | CTLFLAG_RW, sc, 0, am335x_lcd_sysctl_backlight, "I", "LCD backlight"); sc->sc_backlight = 0; /* Check if eCAS interface is available at this point */ if (am335x_pwm_config_ecas(PWM_UNIT, PWM_PERIOD, PWM_PERIOD) == 0) sc->sc_backlight = 100; + + err = (sc_attach_unit(device_get_unit(dev), + device_get_flags(dev) | SC_AUTODETECT_KBD)); + + if (err) { + device_printf(dev, "failed to attach syscons\n"); + goto fail; + } am335x_lcd_syscons_setup((vm_offset_t)sc->sc_fb_base, sc->sc_fb_phys, &panel); return (0); fail: return (err); } static int am335x_lcd_detach(device_t dev) { /* Do not let unload driver */ return (EBUSY); } static device_method_t am335x_lcd_methods[] = { DEVMETHOD(device_probe, am335x_lcd_probe), DEVMETHOD(device_attach, am335x_lcd_attach), DEVMETHOD(device_detach, am335x_lcd_detach), DEVMETHOD_END }; static driver_t am335x_lcd_driver = { "am335x_lcd", am335x_lcd_methods, sizeof(struct am335x_lcd_softc), }; static devclass_t am335x_lcd_devclass; DRIVER_MODULE(am335x_lcd, simplebus, am335x_lcd_driver, am335x_lcd_devclass, 0, 0); MODULE_VERSION(am335x_lcd, 1); MODULE_DEPEND(am335x_lcd, simplebus, 1, 1, 1); Index: user/attilio/vmcontention/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- user/attilio/vmcontention/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (revision 252343) +++ user/attilio/vmcontention/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (revision 252344) @@ -1,6886 +1,6889 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Programming rules. * * Each vnode op performs some logical unit of work. To do this, the ZPL must * properly lock its in-core state, create a DMU transaction, do the work, * record this work in the intent log (ZIL), commit the DMU transaction, * and wait for the intent log to commit if it is a synchronous operation. * Moreover, the vnode ops must work in both normal and log replay context. * The ordering of events is important to avoid deadlocks and references * to freed memory. The example below illustrates the following Big Rules: * * (1) A check must be made in each zfs thread for a mounted file system. * This is done avoiding races using ZFS_ENTER(zfsvfs). * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros * can return EIO from the calling function. * * (2) VN_RELE() should always be the last thing except for zil_commit() * (if necessary) and ZFS_EXIT(). This is for 3 reasons: * First, if it's the last reference, the vnode/znode * can be freed, so the zp may point to freed memory. Second, the last * reference will call zfs_zinactive(), which may induce a lot of work -- * pushing cached pages (which acquires range locks) and syncing out * cached atime changes. Third, zfs_zinactive() may require a new tx, * which could deadlock the system if you were already holding one. * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). * * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. * * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). * This is critical because we don't want to block while holding locks. * Note, in particular, that if a lock is sometimes acquired before * the tx assigns, and sometimes after (e.g. z_lock), then failing to * use a non-blocking assign can deadlock the system. The scenario: * * Thread A has grabbed a lock before calling dmu_tx_assign(). * Thread B is in an already-assigned tx, and blocks for this lock. * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() * forever, because the previous txg can't quiesce until B's tx commits. * * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, * then drop all locks, call dmu_tx_wait(), and try again. * * (5) If the operation succeeded, generate the intent log entry for it * before dropping locks. This ensures that the ordering of events * in the intent log matches the order in which they actually occurred. * During ZIL replay the zfs_log_* functions will update the sequence * number to indicate the zil transaction has replayed. * * (6) At the end of each vnode op, the DMU tx must always commit, * regardless of whether there were any errors. * * (7) After dropping all locks, invoke zil_commit(zilog, foid) * to ensure that synchronous semantics are provided when necessary. * * In general, this is how things should be ordered in each vnode op: * * ZFS_ENTER(zfsvfs); // exit if unmounted * top: * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) * rw_enter(...); // grab any other locks you need * tx = dmu_tx_create(...); // get DMU tx * dmu_tx_hold_*(); // hold each object you might modify * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign * if (error) { * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * VN_RELE(...); // release held vnodes * if (error == ERESTART) { * dmu_tx_wait(tx); * dmu_tx_abort(tx); * goto top; * } * dmu_tx_abort(tx); // abort DMU tx * ZFS_EXIT(zfsvfs); // finished in zfs * return (error); // really out of space * } * error = do_real_work(); // do whatever this VOP does * if (error == 0) * zfs_log_*(...); // on success, make ZIL entry * dmu_tx_commit(tx); // commit DMU tx -- error or not * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * VN_RELE(...); // release held vnodes * zil_commit(zilog, foid); // synchronous when necessary * ZFS_EXIT(zfsvfs); // finished in zfs * return (error); // done, report error */ /* ARGSUSED */ static int zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(*vpp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && ((flag & FAPPEND) == 0)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && ZTOV(zp)->v_type == VREG && !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { if (fs_vscan(*vpp, cr, 0) != 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EACCES)); } } /* Keep a count of the synchronous opens in the znode */ if (flag & (FSYNC | FDSYNC)) atomic_inc_32(&zp->z_sync_cnt); ZFS_EXIT(zfsvfs); return (0); } /* ARGSUSED */ static int zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; /* * Clean up any locks held by this process on the vp. */ cleanlocks(vp, ddi_get_pid(), 0); cleanshares(vp, ddi_get_pid()); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); /* Decrement the synchronous opens in the znode */ if ((flag & (FSYNC | FDSYNC)) && (count == 1)) atomic_dec_32(&zp->z_sync_cnt); if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && ZTOV(zp)->v_type == VREG && !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) VERIFY(fs_vscan(vp, cr, 1) == 0); ZFS_EXIT(zfsvfs); return (0); } /* * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. */ static int zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) { znode_t *zp = VTOZ(vp); uint64_t noff = (uint64_t)*off; /* new offset */ uint64_t file_sz; int error; boolean_t hole; file_sz = zp->z_size; if (noff >= file_sz) { return (SET_ERROR(ENXIO)); } if (cmd == _FIO_SEEK_HOLE) hole = B_TRUE; else hole = B_FALSE; error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); /* end of file? */ if ((error == ESRCH) || (noff > file_sz)) { /* * Handle the virtual hole at the end of file. */ if (hole) { *off = file_sz; return (0); } return (SET_ERROR(ENXIO)); } if (noff < *off) return (error); *off = noff; return (error); } /* ARGSUSED */ static int zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, int *rvalp, caller_context_t *ct) { offset_t off; int error; zfsvfs_t *zfsvfs; znode_t *zp; switch (com) { case _FIOFFS: return (0); /* * The following two ioctls are used by bfu. Faking out, * necessary to avoid bfu errors. */ case _FIOGDIO: case _FIOSDIO: return (0); case _FIO_SEEK_DATA: case _FIO_SEEK_HOLE: #ifdef sun if (ddi_copyin((void *)data, &off, sizeof (off), flag)) return (SET_ERROR(EFAULT)); #else off = *(offset_t *)data; #endif zp = VTOZ(vp); zfsvfs = zp->z_zfsvfs; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); /* offset parameter is in/out */ error = zfs_holey(vp, com, &off); ZFS_EXIT(zfsvfs); if (error) return (error); #ifdef sun if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) return (SET_ERROR(EFAULT)); #else *(offset_t *)data = off; #endif return (0); } return (SET_ERROR(ENOTTY)); } static vm_page_t page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) { vm_object_t obj; vm_page_t pp; obj = vp->v_object; zfs_vmobject_assert_wlocked(obj); for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { if ((pp->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_reference(pp); vm_page_sleep(pp, "zfsmwb"); continue; } - } else { + } else if (pp == NULL) { pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); + } else { + ASSERT(pp != NULL && !pp->valid); + pp = NULL; } if (pp != NULL) { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); vm_object_pip_add(obj, 1); vm_page_io_start(pp); pmap_remove_write(pp); vm_page_clear_dirty(pp, off, nbytes); } break; } return (pp); } static void page_unbusy(vm_page_t pp) { vm_page_io_finish(pp); vm_object_pip_subtract(pp->object, 1); } static vm_page_t page_hold(vnode_t *vp, int64_t start) { vm_object_t obj; vm_page_t pp; obj = vp->v_object; zfs_vmobject_assert_wlocked(obj); for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { if ((pp->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_reference(pp); vm_page_sleep(pp, "zfsmwb"); continue; } ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); vm_page_lock(pp); vm_page_hold(pp); vm_page_unlock(pp); } else pp = NULL; break; } return (pp); } static void page_unhold(vm_page_t pp) { vm_page_lock(pp); vm_page_unhold(pp); vm_page_unlock(pp); } static caddr_t zfs_map_page(vm_page_t pp, struct sf_buf **sfp) { *sfp = sf_buf_alloc(pp, 0); return ((caddr_t)sf_buf_kva(*sfp)); } static void zfs_unmap_page(struct sf_buf *sf) { sf_buf_free(sf); } /* * When a file is memory mapped, we must keep the IO data synchronized * between the DMU cache and the memory mapped pages. What this means: * * On Write: If we find a memory mapped page, we write to *both* * the page and the dmu buffer. */ static void update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, int segflg, dmu_tx_t *tx) { vm_object_t obj; struct sf_buf *sf; caddr_t va; int off; ASSERT(vp->v_mount != NULL); obj = vp->v_object; ASSERT(obj != NULL); off = start & PAGEOFFSET; zfs_vmobject_wlock(obj); for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; int nbytes = imin(PAGESIZE - off, len); if (segflg == UIO_NOCOPY) { pp = vm_page_lookup(obj, OFF_TO_IDX(start)); KASSERT(pp != NULL, ("zfs update_pages: NULL page in putpages case")); KASSERT(off == 0, ("zfs update_pages: unaligned data in putpages case")); KASSERT(pp->valid == VM_PAGE_BITS_ALL, ("zfs update_pages: invalid page in putpages case")); KASSERT(pp->busy > 0, ("zfs update_pages: unbusy page in putpages case")); KASSERT(!pmap_page_is_write_mapped(pp), ("zfs update_pages: writable page in putpages case")); zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); (void) dmu_write(os, oid, start, nbytes, va, tx); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); vm_page_undirty(pp); } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); (void) dmu_read(os, oid, start+off, nbytes, va+off, DMU_READ_PREFETCH);; zfs_unmap_page(sf); zfs_vmobject_wlock(obj); page_unbusy(pp); } len -= nbytes; off = 0; } if (segflg != UIO_NOCOPY) vm_object_pip_wakeupn(obj, 0); zfs_vmobject_wunlock(obj); } /* * Read with UIO_NOCOPY flag means that sendfile(2) requests * ZFS to populate a range of page cache pages with data. * * NOTE: this function could be optimized to pre-allocate * all pages in advance, drain VPO_BUSY on all of them, * map them into contiguous KVA region and populate them * in one single dmu_read() call. */ static int mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) { znode_t *zp = VTOZ(vp); objset_t *os = zp->z_zfsvfs->z_os; struct sf_buf *sf; vm_object_t obj; vm_page_t pp; int64_t start; caddr_t va; int len = nbytes; int off; int error = 0; ASSERT(uio->uio_segflg == UIO_NOCOPY); ASSERT(vp->v_mount != NULL); obj = vp->v_object; ASSERT(obj != NULL); ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); zfs_vmobject_wlock(obj); for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { int bytes = MIN(PAGESIZE, len); pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); if (pp->valid == 0) { vm_page_io_start(pp); zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, DMU_READ_PREFETCH); if (bytes != PAGESIZE && error == 0) bzero(va + bytes, PAGESIZE - bytes); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); vm_page_io_finish(pp); vm_page_lock(pp); if (error) { vm_page_free(pp); } else { pp->valid = VM_PAGE_BITS_ALL; vm_page_activate(pp); } vm_page_unlock(pp); } if (error) break; uio->uio_resid -= bytes; uio->uio_offset += bytes; len -= bytes; } zfs_vmobject_wunlock(obj); return (error); } /* * When a file is memory mapped, we must keep the IO data synchronized * between the DMU cache and the memory mapped pages. What this means: * * On Read: We "read" preferentially from memory mapped pages, * else we default from the dmu buffer. * * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when * the file is memory mapped. */ static int mappedread(vnode_t *vp, int nbytes, uio_t *uio) { znode_t *zp = VTOZ(vp); objset_t *os = zp->z_zfsvfs->z_os; vm_object_t obj; int64_t start; caddr_t va; int len = nbytes; int off; int error = 0; ASSERT(vp->v_mount != NULL); obj = vp->v_object; ASSERT(obj != NULL); start = uio->uio_loffset; off = start & PAGEOFFSET; zfs_vmobject_wlock(obj); for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); if (pp = page_hold(vp, start)) { struct sf_buf *sf; caddr_t va; zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); page_unhold(pp); } else { zfs_vmobject_wunlock(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes); zfs_vmobject_wlock(obj); } len -= bytes; off = 0; if (error) break; } zfs_vmobject_wunlock(obj); return (error); } offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ /* * Read bytes from specified file into supplied buffer. * * IN: vp - vnode of file to be read from. * uio - structure supplying read location, range info, * and return buffer. * ioflag - SYNC flags; used to provide FRSYNC semantics. * cr - credentials of caller. * ct - caller context * * OUT: uio - updated offset and range, buffer filled. * * RETURN: 0 on success, error code on failure. * * Side Effects: * vp - atime updated if byte count > 0 */ /* ARGSUSED */ static int zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os; ssize_t n, nbytes; int error = 0; rl_t *rl; xuio_t *xuio = NULL; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); os = zfsvfs->z_os; if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EACCES)); } /* * Validate file offset */ if (uio->uio_loffset < (offset_t)0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * Fasttrack empty reads */ if (uio->uio_resid == 0) { ZFS_EXIT(zfsvfs); return (0); } /* * Check for mandatory locks */ if (MANDMODE(zp->z_mode)) { if (error = chklock(vp, FREAD, uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { ZFS_EXIT(zfsvfs); return (error); } } /* * If we're in FRSYNC mode, sync out this znode before reading it. */ if (zfsvfs->z_log && (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) zil_commit(zfsvfs->z_log, zp->z_id); /* * Lock the range against changes. */ rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); /* * If we are reading past end-of-file we can skip * to the end; but we might still need to set atime. */ if (uio->uio_loffset >= zp->z_size) { error = 0; goto out; } ASSERT(uio->uio_loffset < zp->z_size); n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); #ifdef sun if ((uio->uio_extflg == UIO_XUIO) && (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { int nblk; int blksz = zp->z_blksz; uint64_t offset = uio->uio_loffset; xuio = (xuio_t *)uio; if ((ISP2(blksz))) { nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, blksz)) / blksz; } else { ASSERT(offset + n <= blksz); nblk = 1; } (void) dmu_xuio_init(xuio, nblk); if (vn_has_cached_data(vp)) { /* * For simplicity, we always allocate a full buffer * even if we only expect to read a portion of a block. */ while (--nblk >= 0) { (void) dmu_xuio_add(xuio, dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), blksz), 0, blksz); } } } #endif /* sun */ while (n > 0) { nbytes = MIN(n, zfs_read_chunk_size - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); #ifdef __FreeBSD__ if (uio->uio_segflg == UIO_NOCOPY) error = mappedread_sf(vp, nbytes, uio); else #endif /* __FreeBSD__ */ if (vn_has_cached_data(vp)) error = mappedread(vp, nbytes, uio); else error = dmu_read_uio(os, zp->z_id, uio, nbytes); if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) error = SET_ERROR(EIO); break; } n -= nbytes; } out: zfs_range_unlock(rl); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); return (error); } /* * Write the bytes to a file. * * IN: vp - vnode of file to be written to. * uio - structure supplying write location, range info, * and data buffer. * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is * set if in append mode. * cr - credentials of caller. * ct - caller context (NFS/CIFS fem monitor only) * * OUT: uio - updated offset and range. * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - ctime|mtime updated if byte count > 0 */ /* ARGSUSED */ static int zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); rlim64_t limit = MAXOFFSET_T; ssize_t start_resid = uio->uio_resid; ssize_t tx_bytes; uint64_t end_size; dmu_tx_t *tx; zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog; offset_t woff; ssize_t n, nbytes; rl_t *rl; int max_blksz = zfsvfs->z_max_blksz; int error = 0; arc_buf_t *abuf; iovec_t *aiov = NULL; xuio_t *xuio = NULL; int i_iov = 0; int iovcnt = uio->uio_iovcnt; iovec_t *iovp = uio->uio_iov; int write_eof; int count = 0; sa_bulk_attr_t bulk[4]; uint64_t mtime[2], ctime[2]; /* * Fasttrack empty write */ n = start_resid; if (n == 0) return (0); if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) limit = MAXOFFSET_T; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); /* * If immutable or not appending then return EPERM */ if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && (uio->uio_loffset < zp->z_size))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } zilog = zfsvfs->z_log; /* * Validate file offset */ woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; if (woff < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * Check for mandatory locks before calling zfs_range_lock() * in order to prevent a deadlock with locks set via fcntl(). */ if (MANDMODE((mode_t)zp->z_mode) && (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { ZFS_EXIT(zfsvfs); return (error); } #ifdef sun /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. * Skip this if uio contains loaned arc_buf. */ if ((uio->uio_extflg == UIO_XUIO) && (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) xuio = (xuio_t *)uio; else uio_prefaultpages(MIN(n, max_blksz), uio); #endif /* sun */ /* * If in append mode, set the io offset pointer to eof. */ if (ioflag & FAPPEND) { /* * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ rl = zfs_range_lock(zp, 0, n, RL_APPEND); woff = rl->r_off; if (rl->r_len == UINT64_MAX) { /* * We overlocked the file because this write will cause * the file block size to increase. * Note that zp_size cannot change with this lock held. */ woff = zp->z_size; } uio->uio_loffset = woff; } else { /* * Note that if the file block size will change as a result of * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ rl = zfs_range_lock(zp, woff, n, RL_WRITER); } if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { zfs_range_unlock(rl); ZFS_EXIT(zfsvfs); return (EFBIG); } if (woff >= limit) { zfs_range_unlock(rl); ZFS_EXIT(zfsvfs); return (SET_ERROR(EFBIG)); } if ((woff + n) > limit || woff > (limit - n)) n = limit - woff; /* Will this write extend the file length? */ write_eof = (woff + n > zp->z_size); end_size = MAX(zp->z_size, woff + n); /* * Write the file in reasonable size chunks. Each chunk is written * in a separate transaction; this keeps the intent log records small * and allows us to do more fine-grained space accounting. */ while (n > 0) { abuf = NULL; woff = uio->uio_loffset; again: if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { if (abuf != NULL) dmu_return_arcbuf(abuf); error = SET_ERROR(EDQUOT); break; } if (xuio && abuf == NULL) { ASSERT(i_iov < iovcnt); aiov = &iovp[i_iov]; abuf = dmu_xuio_arcbuf(xuio, i_iov); dmu_xuio_clear(xuio, i_iov); DTRACE_PROBE3(zfs_cp_write, int, i_iov, iovec_t *, aiov, arc_buf_t *, abuf); ASSERT((aiov->iov_base == abuf->b_data) || ((char *)aiov->iov_base - (char *)abuf->b_data + aiov->iov_len == arc_buf_size(abuf))); i_iov++; } else if (abuf == NULL && n >= max_blksz && woff >= zp->z_size && P2PHASE(woff, max_blksz) == 0 && zp->z_blksz == max_blksz) { /* * This write covers a full block. "Borrow" a buffer * from the dmu so that we can fill it before we enter * a transaction. This avoids the possibility of * holding up the transaction if the data copy hangs * up on a pagefault (e.g., from an NFS server mapping). */ size_t cbytes; abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); if (error = uiocopy(abuf->b_data, max_blksz, UIO_WRITE, uio, &cbytes)) { dmu_return_arcbuf(abuf); break; } ASSERT(cbytes == max_blksz); } /* * Start a transaction. */ tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto again; } dmu_tx_abort(tx); if (abuf != NULL) dmu_return_arcbuf(abuf); break; } /* * If zfs_range_lock() over-locked we grow the blocksize * and then reduce the lock range. This will only happen * on the first iteration since zfs_range_reduce() will * shrink down r_len to the appropriate size. */ if (rl->r_len == UINT64_MAX) { uint64_t new_blksz; if (zp->z_blksz > max_blksz) { ASSERT(!ISP2(zp->z_blksz)); new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); } else { new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); zfs_range_reduce(rl, woff, n); } /* * XXX - should we really limit each write to z_max_blksz? * Perhaps we should use SPA_MAXBLOCKSIZE chunks? */ nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); if (woff + nbytes > zp->z_size) vnode_pager_setsize(vp, woff + nbytes); if (abuf == NULL) { tx_bytes = uio->uio_resid; error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, nbytes, tx); tx_bytes -= uio->uio_resid; } else { tx_bytes = nbytes; ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); /* * If this is not a full block write, but we are * extending the file past EOF and this data starts * block-aligned, use assign_arcbuf(). Otherwise, * write via dmu_write(). */ if (tx_bytes < max_blksz && (!write_eof || aiov->iov_base != abuf->b_data)) { ASSERT(xuio); dmu_write(zfsvfs->z_os, zp->z_id, woff, aiov->iov_len, aiov->iov_base, tx); dmu_return_arcbuf(abuf); xuio_stat_wbuf_copied(); } else { ASSERT(xuio || tx_bytes == max_blksz); dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); } if (tx_bytes && vn_has_cached_data(vp)) { update_pages(vp, woff, tx_bytes, zfsvfs->z_os, zp->z_id, uio->uio_segflg, tx); } /* * If we made no progress, we're done. If we made even * partial progress, update the znode and ZIL accordingly. */ if (tx_bytes == 0) { (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), (void *)&zp->z_size, sizeof (uint64_t), tx); dmu_tx_commit(tx); ASSERT(error != 0); break; } /* * Clear Set-UID/Set-GID bits on successful write if not * privileged and at least one of the excute bits is set. * * It would be nice to to this after all writes have * been done, but that would still expose the ISUID/ISGID * to another app after the partial write is committed. * * Note: we don't call zfs_fuid_map_id() here because * user 0 is not an ephemeral uid. */ mutex_enter(&zp->z_acl_lock); if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 && (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && secpolicy_vnode_setid_retain(vp, cr, (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { uint64_t newmode; zp->z_mode &= ~(S_ISUID | S_ISGID); newmode = zp->z_mode; (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), (void *)&newmode, sizeof (uint64_t), tx); } mutex_exit(&zp->z_acl_lock); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); /* * Update the file size (zp_size) if it has changed; * account for possible concurrent updates. */ while ((end_size = zp->z_size) < uio->uio_loffset) { (void) atomic_cas_64(&zp->z_size, end_size, uio->uio_loffset); ASSERT(error == 0); } /* * If we are replaying and eof is non zero then force * the file size to the specified eof. Note, there's no * concurrency during replay. */ if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) zp->z_size = zfsvfs->z_replay_eof; error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); dmu_tx_commit(tx); if (error != 0) break; ASSERT(tx_bytes == nbytes); n -= nbytes; #ifdef sun if (!xuio && n > 0) uio_prefaultpages(MIN(n, max_blksz), uio); #endif /* sun */ } zfs_range_unlock(rl); /* * If we're in replay mode, or we made no progress, return error. * Otherwise, it's at least a partial write, so it's successful. */ if (zfsvfs->z_replay || uio->uio_resid == start_resid) { ZFS_EXIT(zfsvfs); return (error); } if (ioflag & (FSYNC | FDSYNC) || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); ZFS_EXIT(zfsvfs); return (0); } void zfs_get_done(zgd_t *zgd, int error) { znode_t *zp = zgd->zgd_private; objset_t *os = zp->z_zfsvfs->z_os; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); zfs_range_unlock(zgd->zgd_rl); /* * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); kmem_free(zgd, sizeof (zgd_t)); } #ifdef DEBUG static int zil_fault_io = 0; #endif /* * Get data to generate a TX_WRITE intent log record. */ int zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) { zfsvfs_t *zfsvfs = arg; objset_t *os = zfsvfs->z_os; znode_t *zp; uint64_t object = lr->lr_foid; uint64_t offset = lr->lr_offset; uint64_t size = lr->lr_length; blkptr_t *bp = &lr->lr_blkptr; dmu_buf_t *db; zgd_t *zgd; int error = 0; ASSERT(zio != NULL); ASSERT(size != 0); /* * Nothing to do if the file has been removed */ if (zfs_zget(zfsvfs, object, &zp) != 0) return (SET_ERROR(ENOENT)); if (zp->z_unlinked) { /* * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); return (SET_ERROR(ENOENT)); } zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zfsvfs->z_log; zgd->zgd_private = zp; /* * Write records come in two flavors: immediate and indirect. * For small writes it's cheaper to store the data with the * log record (immediate); for large writes it's cheaper to * sync the data and get a pointer to it (indirect) so that * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = SET_ERROR(ENOENT); } else { error = dmu_read(os, object, offset, size, buf, DMU_READ_NO_PREFETCH); } ASSERT(error == 0 || error == ENOENT); } else { /* indirect write */ /* * Have to lock the whole block to ensure when it's * written out and it's checksum is being calculated * that no one can change the data. We need to re-check * blocksize after we get the lock in case it's changed! */ for (;;) { uint64_t blkoff; size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); if (zp->z_blksz == size) break; offset += blkoff; zfs_range_unlock(zgd->zgd_rl); } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) error = SET_ERROR(ENOENT); #ifdef DEBUG if (zil_fault_io) { error = SET_ERROR(EIO); zil_fault_io = 0; } #endif if (error == 0) error = dmu_buf_hold(os, object, offset, zgd, &db, DMU_READ_NO_PREFETCH); if (error == 0) { blkptr_t *obp = dmu_buf_get_blkptr(db); if (obp) { ASSERT(BP_IS_HOLE(bp)); *bp = *obp; } zgd->zgd_db = db; zgd->zgd_bp = bp; ASSERT(db->db_offset == offset); ASSERT(db->db_size == size); error = dmu_sync(zio, lr->lr_common.lrc_txg, zfs_get_done, zgd); ASSERT(error || lr->lr_length <= zp->z_blksz); /* * On success, we need to wait for the write I/O * initiated by dmu_sync() to complete before we can * release this dbuf. We will finish everything up * in the zfs_get_done() callback. */ if (error == 0) return (0); if (error == EALREADY) { lr->lr_common.lrc_txtype = TX_WRITE2; error = 0; } } } zfs_get_done(zgd, error); return (error); } /*ARGSUSED*/ static int zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); if (flag & V_ACE_MASK) error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); else error = zfs_zaccess_rwx(zp, mode, flag, cr); ZFS_EXIT(zfsvfs); return (error); } /* * If vnode is for a device return a specfs vnode instead. */ static int specvp_check(vnode_t **vpp, cred_t *cr) { int error = 0; if (IS_DEVVP(*vpp)) { struct vnode *svp; svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); VN_RELE(*vpp); if (svp == NULL) error = SET_ERROR(ENOSYS); *vpp = svp; } return (error); } /* * Lookup an entry in a directory, or an extended attribute directory. * If it exists, return a held vnode reference for it. * * IN: dvp - vnode of directory to search. * nm - name of entry to lookup. * pnp - full pathname to lookup [UNUSED]. * flags - LOOKUP_XATTR set if looking for an attribute. * rdir - root directory vnode [UNUSED]. * cr - credentials of caller. * ct - caller context * direntflags - directory lookup flags * realpnp - returned pathname. * * OUT: vpp - vnode of located entry, NULL if not found. * * RETURN: 0 on success, error code on failure. * * Timestamps: * NA */ /* ARGSUSED */ static int zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, int nameiop, cred_t *cr, kthread_t *td, int flags) { znode_t *zdp = VTOZ(dvp); zfsvfs_t *zfsvfs = zdp->z_zfsvfs; int error = 0; int *direntflags = NULL; void *realpnp = NULL; /* fast path */ if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { if (dvp->v_type != VDIR) { return (SET_ERROR(ENOTDIR)); } else if (zdp->z_sa_hdl == NULL) { return (SET_ERROR(EIO)); } if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { error = zfs_fastaccesschk_execute(zdp, cr); if (!error) { *vpp = dvp; VN_HOLD(*vpp); return (0); } return (error); } else { vnode_t *tvp = dnlc_lookup(dvp, nm); if (tvp) { error = zfs_fastaccesschk_execute(zdp, cr); if (error) { VN_RELE(tvp); return (error); } if (tvp == DNLC_NO_VNODE) { VN_RELE(tvp); return (SET_ERROR(ENOENT)); } else { *vpp = tvp; return (specvp_check(vpp, cr)); } } } } DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zdp); *vpp = NULL; if (flags & LOOKUP_XATTR) { #ifdef TODO /* * If the xattr property is off, refuse the lookup request. */ if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } #endif /* * We don't allow recursive attributes.. * Maybe someday we will. */ if (zdp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { ZFS_EXIT(zfsvfs); return (error); } /* * Do we have permission to get into attribute directory? */ if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, B_FALSE, cr)) { VN_RELE(*vpp); *vpp = NULL; } ZFS_EXIT(zfsvfs); return (error); } if (dvp->v_type != VDIR) { ZFS_EXIT(zfsvfs); return (SET_ERROR(ENOTDIR)); } /* * Check accessibility of directory. */ if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { ZFS_EXIT(zfsvfs); return (error); } if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); if (error == 0) error = specvp_check(vpp, cr); /* Translate errors and add SAVENAME when needed. */ if (cnp->cn_flags & ISLASTCN) { switch (nameiop) { case CREATE: case RENAME: if (error == ENOENT) { error = EJUSTRETURN; cnp->cn_flags |= SAVENAME; break; } /* FALLTHROUGH */ case DELETE: if (error == 0) cnp->cn_flags |= SAVENAME; break; } } if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { int ltype = 0; if (cnp->cn_flags & ISDOTDOT) { ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } ZFS_EXIT(zfsvfs); error = zfs_vnode_lock(*vpp, cnp->cn_lkflags); if (cnp->cn_flags & ISDOTDOT) vn_lock(dvp, ltype | LK_RETRY); if (error != 0) { VN_RELE(*vpp); *vpp = NULL; return (error); } } else { ZFS_EXIT(zfsvfs); } #ifdef FREEBSD_NAMECACHE /* * Insert name into cache (as non-existent) if appropriate. */ if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(dvp, *vpp, cnp); /* * Insert name into cache if appropriate. */ if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { if (!(cnp->cn_flags & ISLASTCN) || (nameiop != DELETE && nameiop != RENAME)) { cache_enter(dvp, *vpp, cnp); } } #endif return (error); } /* * Attempt to create a new entry in a directory. If the entry * already exists, truncate the file if permissible, else return * an error. Return the vp of the created or trunc'd file. * * IN: dvp - vnode of directory to put new file entry in. * name - name of new file entry. * vap - attributes of new file. * excl - flag indicating exclusive or non-exclusive mode. * mode - mode to open file with. * cr - credentials of caller. * flag - large file flag [UNUSED]. * ct - caller context * vsecp - ACL to be set * * OUT: vpp - vnode of created or trunc'd entry. * * RETURN: 0 on success, error code on failure. * * Timestamps: * dvp - ctime|mtime updated if new entry created * vp - ctime|mtime always, atime if new */ /* ARGSUSED */ static int zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, vnode_t **vpp, cred_t *cr, kthread_t *td) { znode_t *zp, *dzp = VTOZ(dvp); zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; objset_t *os; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; ksid_t *ksid; uid_t uid; gid_t gid = crgetgid(cr); zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; boolean_t have_acl = B_FALSE; void *vsecp = NULL; int flag = 0; /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version */ ksid = crgetsid(cr, KSID_OWNER); if (ksid) uid = ksid_getid(ksid); else uid = crgetuid(cr); if (zfsvfs->z_use_fuids == B_FALSE && (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); os = zfsvfs->z_os; zilog = zfsvfs->z_log; if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } if (vap->va_mask & AT_XVATTR) { if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, crgetuid(cr), cr, vap->va_type)) != 0) { ZFS_EXIT(zfsvfs); return (error); } } top: *vpp = NULL; if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) vap->va_mode &= ~S_ISVTX; if (*name == '\0') { /* * Null component name refers to the directory itself. */ VN_HOLD(dvp); zp = dzp; dl = NULL; error = 0; } else { /* possible VN_HOLD(zp) */ int zflg = 0; if (flag & FIGNORECASE) zflg |= ZCILOOK; error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); if (error) { if (have_acl) zfs_acl_ids_free(&acl_ids); if (strcmp(name, "..") == 0) error = SET_ERROR(EISDIR); ZFS_EXIT(zfsvfs); return (error); } } if (zp == NULL) { uint64_t txtype; /* * Create a new file object and update the directory * to reference it. */ if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { if (have_acl) zfs_acl_ids_free(&acl_ids); goto out; } /* * We only support the creation of regular files in * extended attribute directories. */ if ((dzp->z_pflags & ZFS_XATTR) && (vap->va_type != VREG)) { if (have_acl) zfs_acl_ids_free(&acl_ids); error = SET_ERROR(EINVAL); goto out; } if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, &acl_ids)) != 0) goto out; have_acl = B_TRUE; if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); error = SET_ERROR(EDQUOT); goto out; } tx = dmu_tx_create(os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); (void) zfs_link_create(dl, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); if (flag & FIGNORECASE) txtype |= TX_CI; zfs_log_create(zilog, tx, txtype, dzp, zp, name, vsecp, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); } else { int aflags = (flag & FAPPEND) ? V_APPEND : 0; if (have_acl) zfs_acl_ids_free(&acl_ids); have_acl = B_FALSE; /* * A directory entry already exists for this name. */ /* * Can't truncate an existing file if in exclusive mode. */ if (excl == EXCL) { error = SET_ERROR(EEXIST); goto out; } /* * Can't open a directory for writing. */ if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { error = SET_ERROR(EISDIR); goto out; } /* * Verify requested access to file. */ if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { goto out; } mutex_enter(&dzp->z_lock); dzp->z_seq++; mutex_exit(&dzp->z_lock); /* * Truncate regular files if requested. */ if ((ZTOV(zp)->v_type == VREG) && (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { /* we can't hold any locks when calling zfs_freesp() */ zfs_dirent_unlock(dl); dl = NULL; error = zfs_freesp(zp, 0, 0, mode, TRUE); if (error == 0) { vnevent_create(ZTOV(zp), ct); } } } out: if (dl) zfs_dirent_unlock(dl); if (error) { if (zp) VN_RELE(ZTOV(zp)); } else { *vpp = ZTOV(zp); error = specvp_check(vpp, cr); } if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } /* * Remove an entry from a directory. * * IN: dvp - vnode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. * ct - caller context * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * dvp - ctime|mtime * vp - ctime (if nlink > 0) */ uint64_t null_xattr = 0; /*ARGSUSED*/ static int zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, int flags) { znode_t *zp, *dzp = VTOZ(dvp); znode_t *xzp; vnode_t *vp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; uint64_t acl_obj, xattr_obj; uint64_t xattr_obj_unlinked = 0; uint64_t obj = 0; zfs_dirlock_t *dl; dmu_tx_t *tx; boolean_t may_delete_now, delete_now = FALSE; boolean_t unlinked, toobig = FALSE; uint64_t txtype; pathname_t *realnmp = NULL; pathname_t realnm; int error; int zflg = ZEXISTS; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (flags & FIGNORECASE) { zflg |= ZCILOOK; pn_alloc(&realnm); realnmp = &realnm; } top: xattr_obj = 0; xzp = NULL; /* * Attempt to lock directory; fail if entry doesn't exist. */ if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, realnmp)) { if (realnmp) pn_free(realnmp); ZFS_EXIT(zfsvfs); return (error); } vp = ZTOV(zp); if (error = zfs_zaccess_delete(dzp, zp, cr)) { goto out; } /* * Need to use rmdir for removing directories. */ if (vp->v_type == VDIR) { error = SET_ERROR(EPERM); goto out; } vnevent_remove(vp, dvp, name, ct); if (realnmp) dnlc_remove(dvp, realnmp->pn_buf); else dnlc_remove(dvp, name); VI_LOCK(vp); may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); VI_UNLOCK(vp); /* * We may delete the znode now, or we may put it in the unlinked set; * it depends on whether we're the last link, and on whether there are * other holds on the vnode. So we dmu_tx_hold() the right things to * allow for either case. */ obj = zp->z_id; tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); if (may_delete_now) { toobig = zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; /* if the file is too big, only hold_free a token amount */ dmu_tx_hold_free(tx, zp->z_id, 0, (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); } /* are there any extended attributes? */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT0(error); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } mutex_enter(&zp->z_lock); if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); mutex_exit(&zp->z_lock); /* charge as an update -- would be nice not to charge at all */ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); VN_RELE(vp); if (xzp) VN_RELE(ZTOV(xzp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } if (realnmp) pn_free(realnmp); dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } /* * Remove the directory entry. */ error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); if (error) { dmu_tx_commit(tx); goto out; } if (unlinked) { /* * Hold z_lock so that we can make sure that the ACL obj * hasn't changed. Could have been deleted due to * zfs_sa_upgrade(). */ mutex_enter(&zp->z_lock); VI_LOCK(vp); (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); delete_now = may_delete_now && !toobig && vp->v_count == 1 && !vn_has_cached_data(vp) && xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == acl_obj; VI_UNLOCK(vp); } if (delete_now) { #ifdef __FreeBSD__ panic("zfs_remove: delete_now branch taken"); #endif if (xattr_obj_unlinked) { ASSERT3U(xzp->z_links, ==, 2); mutex_enter(&xzp->z_lock); xzp->z_unlinked = 1; xzp->z_links = 0; error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &xzp->z_links, sizeof (xzp->z_links), tx); ASSERT3U(error, ==, 0); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); if (zp->z_is_sa) error = sa_remove(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), tx); else error = sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &null_xattr, sizeof (uint64_t), tx); ASSERT0(error); } VI_LOCK(vp); vp->v_count--; ASSERT0(vp->v_count); VI_UNLOCK(vp); mutex_exit(&zp->z_lock); zfs_znode_delete(zp, tx); } else if (unlinked) { mutex_exit(&zp->z_lock); zfs_unlinked_add(zp, tx); #ifdef __FreeBSD__ vp->v_vflag |= VV_NOSYNC; #endif } txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_remove(zilog, tx, txtype, dzp, name, obj); dmu_tx_commit(tx); out: if (realnmp) pn_free(realnmp); zfs_dirent_unlock(dl); if (!delete_now) VN_RELE(vp); if (xzp) VN_RELE(ZTOV(xzp)); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } /* * Create a new directory and insert it into dvp using the name * provided. Return a pointer to the inserted directory. * * IN: dvp - vnode of directory to add subdir to. * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. * ct - caller context * flags - case flags * vsecp - ACL to be set * * OUT: vpp - vnode of created directory. * * RETURN: 0 on success, error code on failure. * * Timestamps: * dvp - ctime|mtime updated * vp - ctime|mtime|atime updated */ /*ARGSUSED*/ static int zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) { znode_t *zp, *dzp = VTOZ(dvp); zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; zfs_dirlock_t *dl; uint64_t txtype; dmu_tx_t *tx; int error; int zf = ZNEW; ksid_t *ksid; uid_t uid; gid_t gid = crgetgid(cr); zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; ASSERT(vap->va_type == VDIR); /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version */ ksid = crgetsid(cr, KSID_OWNER); if (ksid) uid = ksid_getid(ksid); else uid = crgetuid(cr); if (zfsvfs->z_use_fuids == B_FALSE && (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (dzp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } if (zfsvfs->z_utf8 && u8_validate(dirname, strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; if (vap->va_mask & AT_XVATTR) { if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, crgetuid(cr), cr, vap->va_type)) != 0) { ZFS_EXIT(zfsvfs); return (error); } } if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, &acl_ids)) != 0) { ZFS_EXIT(zfsvfs); return (error); } /* * First make sure the new directory doesn't exist. * * Existence is checked first to make sure we don't return * EACCES instead of EEXIST which can cause some applications * to fail. */ top: *vpp = NULL; if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, NULL, NULL)) { zfs_acl_ids_free(&acl_ids); ZFS_EXIT(zfsvfs); return (error); } if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); return (error); } if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); return (SET_ERROR(EDQUOT)); } /* * Add a new entry to the directory. */ tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } /* * Create new node. */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); /* * Now put new name in parent dir. */ (void) zfs_link_create(dl, zp, tx, ZNEW); *vpp = ZTOV(zp); txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_dirent_unlock(dl); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (0); } /* * Remove a directory subdir entry. If the current working * directory is the same as the subdir to be removed, the * remove will fail. * * IN: dvp - vnode of directory to remove from. * name - name of directory to be removed. * cwd - vnode of current working directory. * cr - credentials of caller. * ct - caller context * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * dvp - ctime|mtime updated */ /*ARGSUSED*/ static int zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, caller_context_t *ct, int flags) { znode_t *dzp = VTOZ(dvp); znode_t *zp; vnode_t *vp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; int zflg = ZEXISTS; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (flags & FIGNORECASE) zflg |= ZCILOOK; top: zp = NULL; /* * Attempt to lock directory; fail if entry doesn't exist. */ if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL)) { ZFS_EXIT(zfsvfs); return (error); } vp = ZTOV(zp); if (error = zfs_zaccess_delete(dzp, zp, cr)) { goto out; } if (vp->v_type != VDIR) { error = SET_ERROR(ENOTDIR); goto out; } if (vp == cwd) { error = SET_ERROR(EINVAL); goto out; } vnevent_rmdir(vp, dvp, name, ct); /* * Grab a lock on the directory to make sure that noone is * trying to add (or lookup) entries while we are removing it. */ rw_enter(&zp->z_name_lock, RW_WRITER); /* * Grab a lock on the parent pointer to make sure we play well * with the treewalk and directory rename code. */ rw_enter(&zp->z_parent_lock, RW_WRITER); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); zfs_dirent_unlock(dl); VN_RELE(vp); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } #ifdef FREEBSD_NAMECACHE cache_purge(dvp); #endif error = zfs_link_destroy(dl, zp, tx, zflg, NULL); if (error == 0) { uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); } dmu_tx_commit(tx); rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); #ifdef FREEBSD_NAMECACHE cache_purge(vp); #endif out: zfs_dirent_unlock(dl); VN_RELE(vp); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } /* * Read as many directory entries as will fit into the provided * buffer from the given directory cursor position (specified in * the uio structure). * * IN: vp - vnode of directory to read. * uio - structure supplying read location, range info, * and return buffer. * cr - credentials of caller. * ct - caller context * flags - case flags * * OUT: uio - updated offset and range, buffer filled. * eofp - set to true if end-of-file detected. * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - atime updated * * Note that the low 4 bits of the cookie returned by zap is always zero. * This allows us to use the low range for "special" directory entries: * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, * we use the offset 2 for the '.zfs' directory. */ /* ARGSUSED */ static int zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) { znode_t *zp = VTOZ(vp); iovec_t *iovp; edirent_t *eodp; dirent64_t *odp; zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os; caddr_t outbuf; size_t bufsize; zap_cursor_t zc; zap_attribute_t zap; uint_t bytes_wanted; uint64_t offset; /* must be unsigned; checks for < 1 */ uint64_t parent; int local_eof; int outcount; int error; uint8_t prefetch; boolean_t check_sysattrs; uint8_t type; int ncooks; u_long *cooks = NULL; int flags = 0; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) { ZFS_EXIT(zfsvfs); return (error); } /* * If we are not given an eof variable, * use a local one. */ if (eofp == NULL) eofp = &local_eof; /* * Check for valid iov_len. */ if (uio->uio_iov->iov_len <= 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * Quit if directory has been removed (posix) */ if ((*eofp = zp->z_unlinked) != 0) { ZFS_EXIT(zfsvfs); return (0); } error = 0; os = zfsvfs->z_os; offset = uio->uio_loffset; prefetch = zp->z_zn_prefetch; /* * Initialize the iterator cursor. */ if (offset <= 3) { /* * Start iteration from the beginning of the directory. */ zap_cursor_init(&zc, os, zp->z_id); } else { /* * The offset is a serialized cursor. */ zap_cursor_init_serialized(&zc, os, zp->z_id, offset); } /* * Get space to change directory entries into fs independent format. */ iovp = uio->uio_iov; bytes_wanted = iovp->iov_len; if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { bufsize = bytes_wanted; outbuf = kmem_alloc(bufsize, KM_SLEEP); odp = (struct dirent64 *)outbuf; } else { bufsize = bytes_wanted; outbuf = NULL; odp = (struct dirent64 *)iovp->iov_base; } eodp = (struct edirent *)odp; if (ncookies != NULL) { /* * Minimum entry size is dirent size and 1 byte for a file name. */ ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); *cookies = cooks; *ncookies = ncooks; } /* * If this VFS supports the system attribute view interface; and * we're looking at an extended attribute directory; and we care * about normalization conflicts on this vfs; then we must check * for normalization conflicts with the sysattr name space. */ #ifdef TODO check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && (flags & V_RDDIR_ENTFLAGS); #else check_sysattrs = 0; #endif /* * Transform to file-system independent format */ outcount = 0; while (outcount < bytes_wanted) { ino64_t objnum; ushort_t reclen; off64_t *next = NULL; /* * Special case `.', `..', and `.zfs'. */ if (offset == 0) { (void) strcpy(zap.za_name, "."); zap.za_normalization_conflict = 0; objnum = zp->z_id; type = DT_DIR; } else if (offset == 1) { (void) strcpy(zap.za_name, ".."); zap.za_normalization_conflict = 0; objnum = parent; type = DT_DIR; } else if (offset == 2 && zfs_show_ctldir(zp)) { (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); zap.za_normalization_conflict = 0; objnum = ZFSCTL_INO_ROOT; type = DT_DIR; } else { /* * Grab next entry. */ if (error = zap_cursor_retrieve(&zc, &zap)) { if ((*eofp = (error == ENOENT)) != 0) break; else goto update; } if (zap.za_integer_length != 8 || zap.za_num_integers != 1) { cmn_err(CE_WARN, "zap_readdir: bad directory " "entry, obj = %lld, offset = %lld\n", (u_longlong_t)zp->z_id, (u_longlong_t)offset); error = SET_ERROR(ENXIO); goto update; } objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); /* * MacOS X can extract the object type here such as: * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); */ type = ZFS_DIRENT_TYPE(zap.za_first_integer); if (check_sysattrs && !zap.za_normalization_conflict) { #ifdef TODO zap.za_normalization_conflict = xattr_sysattr_casechk(zap.za_name); #else panic("%s:%u: TODO", __func__, __LINE__); #endif } } if (flags & V_RDDIR_ACCFILTER) { /* * If we have no access at all, don't include * this entry in the returned information */ znode_t *ezp; if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) goto skip_entry; if (!zfs_has_access(ezp, cr)) { VN_RELE(ZTOV(ezp)); goto skip_entry; } VN_RELE(ZTOV(ezp)); } if (flags & V_RDDIR_ENTFLAGS) reclen = EDIRENT_RECLEN(strlen(zap.za_name)); else reclen = DIRENT64_RECLEN(strlen(zap.za_name)); /* * Will this entry fit in the buffer? */ if (outcount + reclen > bufsize) { /* * Did we manage to fit anything in the buffer? */ if (!outcount) { error = SET_ERROR(EINVAL); goto update; } break; } if (flags & V_RDDIR_ENTFLAGS) { /* * Add extended flag entry: */ eodp->ed_ino = objnum; eodp->ed_reclen = reclen; /* NOTE: ed_off is the offset for the *next* entry */ next = &(eodp->ed_off); eodp->ed_eflags = zap.za_normalization_conflict ? ED_CASE_CONFLICT : 0; (void) strncpy(eodp->ed_name, zap.za_name, EDIRENT_NAMELEN(reclen)); eodp = (edirent_t *)((intptr_t)eodp + reclen); } else { /* * Add normal entry: */ odp->d_ino = objnum; odp->d_reclen = reclen; odp->d_namlen = strlen(zap.za_name); (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); odp->d_type = type; odp = (dirent64_t *)((intptr_t)odp + reclen); } outcount += reclen; ASSERT(outcount <= bufsize); /* Prefetch znode */ if (prefetch) dmu_prefetch(os, objnum, 0, 0); skip_entry: /* * Move to the next entry, fill in the previous offset. */ if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { zap_cursor_advance(&zc); offset = zap_cursor_serialize(&zc); } else { offset += 1; } if (cooks != NULL) { *cooks++ = offset; ncooks--; KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); } } zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ /* Subtract unused cookies */ if (ncookies != NULL) *ncookies -= ncooks; if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { iovp->iov_base += outcount; iovp->iov_len -= outcount; uio->uio_resid -= outcount; } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { /* * Reset the pointer. */ offset = uio->uio_loffset; } update: zap_cursor_fini(&zc); if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) kmem_free(outbuf, bufsize); if (error == ENOENT) error = 0; ZFS_ACCESSTIME_STAMP(zfsvfs, zp); uio->uio_loffset = offset; ZFS_EXIT(zfsvfs); if (error != 0 && cookies != NULL) { free(*cookies, M_TEMP); *cookies = NULL; *ncookies = 0; } return (error); } ulong_t zfs_fsync_sync_cnt = 4; static int zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); zil_commit(zfsvfs->z_log, zp->z_id); ZFS_EXIT(zfsvfs); } return (0); } /* * Get the requested file attributes and place them in the provided * vattr structure. * * IN: vp - vnode of file. * vap - va_mask identifies requested attributes. * If AT_XVATTR set, then optional attrs are requested * flags - ATTR_NOACLCHECK (CIFS server context) * cr - credentials of caller. * ct - caller context * * OUT: vap - attribute values. * * RETURN: 0 (always succeeds). */ /* ARGSUSED */ static int zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error = 0; uint32_t blksize; u_longlong_t nblocks; uint64_t links; uint64_t mtime[2], ctime[2], crtime[2], rdev; xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ xoptattr_t *xoap = NULL; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; sa_bulk_attr_t bulk[4]; int count = 0; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); if (vp->v_type == VBLK || vp->v_type == VCHR) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { ZFS_EXIT(zfsvfs); return (error); } /* * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. * Also, if we are the owner don't bother, since owner should * always be allowed to read basic attributes of file. */ if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && (vap->va_uid != crgetuid(cr))) { if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, skipaclchk, cr)) { ZFS_EXIT(zfsvfs); return (error); } } /* * Return all attributes. It's cheaper to provide the answer * than to determine whether we were asked the question. */ mutex_enter(&zp->z_lock); vap->va_type = IFTOVT(zp->z_mode); vap->va_mode = zp->z_mode & ~S_IFMT; #ifdef sun vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; #else vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; #endif vap->va_nodeid = zp->z_id; if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) links = zp->z_links + 1; else links = zp->z_links; vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ vap->va_size = zp->z_size; #ifdef sun vap->va_rdev = vp->v_rdev; #else if (vp->v_type == VBLK || vp->v_type == VCHR) vap->va_rdev = zfs_cmpldev(rdev); #endif vap->va_seq = zp->z_seq; vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ /* * Add in any requested optional attributes and the create time. * Also set the corresponding bits in the returned attribute bitmap. */ if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { xoap->xoa_archive = ((zp->z_pflags & ZFS_ARCHIVE) != 0); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { xoap->xoa_readonly = ((zp->z_pflags & ZFS_READONLY) != 0); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { xoap->xoa_system = ((zp->z_pflags & ZFS_SYSTEM) != 0); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { xoap->xoa_hidden = ((zp->z_pflags & ZFS_HIDDEN) != 0); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { xoap->xoa_nounlink = ((zp->z_pflags & ZFS_NOUNLINK) != 0); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { xoap->xoa_immutable = ((zp->z_pflags & ZFS_IMMUTABLE) != 0); XVA_SET_RTN(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { xoap->xoa_appendonly = ((zp->z_pflags & ZFS_APPENDONLY) != 0); XVA_SET_RTN(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { xoap->xoa_nodump = ((zp->z_pflags & ZFS_NODUMP) != 0); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { xoap->xoa_opaque = ((zp->z_pflags & ZFS_OPAQUE) != 0); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { xoap->xoa_av_quarantined = ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { xoap->xoa_av_modified = ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && vp->v_type == VREG) { zfs_sa_get_scanstamp(zp, xvap); } if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { uint64_t times[2]; (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), times, sizeof (times)); ZFS_TIME_DECODE(&xoap->xoa_createtime, times); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); XVA_SET_RTN(xvap, XAT_REPARSE); } if (XVA_ISSET_REQ(xvap, XAT_GEN)) { xoap->xoa_generation = zp->z_gen; XVA_SET_RTN(xvap, XAT_GEN); } if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { xoap->xoa_offline = ((zp->z_pflags & ZFS_OFFLINE) != 0); XVA_SET_RTN(xvap, XAT_OFFLINE); } if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { xoap->xoa_sparse = ((zp->z_pflags & ZFS_SPARSE) != 0); XVA_SET_RTN(xvap, XAT_SPARSE); } } ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); ZFS_TIME_DECODE(&vap->va_mtime, mtime); ZFS_TIME_DECODE(&vap->va_ctime, ctime); ZFS_TIME_DECODE(&vap->va_birthtime, crtime); mutex_exit(&zp->z_lock); sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); vap->va_blksize = blksize; vap->va_bytes = nblocks << 9; /* nblocks * 512 */ if (zp->z_blksz == 0) { /* * Block size hasn't been set; suggest maximal I/O transfers. */ vap->va_blksize = zfsvfs->z_max_blksz; } ZFS_EXIT(zfsvfs); return (0); } /* * Set the file attributes to the values contained in the * vattr structure. * * IN: vp - vnode of file to be modified. * vap - new attribute values. * If AT_XVATTR set, then optional attrs are being set * flags - ATTR_UTIME set if non-default time values provided. * - ATTR_NOACLCHECK (CIFS context only). * cr - credentials of caller. * ct - caller context * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - ctime updated, mtime updated if size changed. */ /* ARGSUSED */ static int zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog; dmu_tx_t *tx; vattr_t oldva; xvattr_t tmpxvattr; uint_t mask = vap->va_mask; uint_t saved_mask = 0; uint64_t saved_mode; int trim_mask = 0; uint64_t new_mode; uint64_t new_uid, new_gid; uint64_t xattr_obj; uint64_t mtime[2], ctime[2]; znode_t *attrzp; int need_policy = FALSE; int err, err2; zfs_fuid_info_t *fuidp = NULL; xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ xoptattr_t *xoap; zfs_acl_t *aclp; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; boolean_t fuid_dirtied = B_FALSE; sa_bulk_attr_t bulk[7], xattr_bulk[7]; int count = 0, xattr_count = 0; if (mask == 0) return (0); if (mask & AT_NOSET) return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); zilog = zfsvfs->z_log; /* * Make sure that if we have ephemeral uid/gid or xvattr specified * that file system is at proper version level */ if (zfsvfs->z_use_fuids == B_FALSE && (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || (mask & AT_XVATTR))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } if (mask & AT_SIZE && vp->v_type == VDIR) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EISDIR)); } if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * If this is an xvattr_t, then get a pointer to the structure of * optional attributes. If this is NULL, then we have a vattr_t. */ xoap = xva_getxoptattr(xvap); xva_init(&tmpxvattr); /* * Immutable files can only alter immutable bit and atime */ if ((zp->z_pflags & ZFS_IMMUTABLE) && ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } /* * Verify timestamps doesn't overflow 32 bits. * ZFS can handle large timestamps, but 32bit syscalls can't * handle times greater than 2039. This check should be removed * once large timestamps are fully supported. */ if (mask & (AT_ATIME | AT_MTIME)) { if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EOVERFLOW)); } } top: attrzp = NULL; aclp = NULL; /* Can this be moved to before the top label? */ if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EROFS)); } /* * First validate permissions */ if (mask & AT_SIZE) { /* * XXX - Note, we are not providing any open * mode flags here (like FNDELAY), so we may * block if there are locks present... this * should be addressed in openat(). */ /* XXX - would it be OK to generate a log record here? */ err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); if (err) { ZFS_EXIT(zfsvfs); return (err); } } if (mask & (AT_ATIME|AT_MTIME) || ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || XVA_ISSET_REQ(xvap, XAT_READONLY) || XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || XVA_ISSET_REQ(xvap, XAT_OFFLINE) || XVA_ISSET_REQ(xvap, XAT_SPARSE) || XVA_ISSET_REQ(xvap, XAT_CREATETIME) || XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, skipaclchk, cr); } if (mask & (AT_UID|AT_GID)) { int idmask = (mask & (AT_UID|AT_GID)); int take_owner; int take_group; /* * NOTE: even if a new mode is being set, * we may clear S_ISUID/S_ISGID bits. */ if (!(mask & AT_MODE)) vap->va_mode = zp->z_mode; /* * Take ownership or chgrp to group we are a member of */ take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); take_group = (mask & AT_GID) && zfs_groupmember(zfsvfs, vap->va_gid, cr); /* * If both AT_UID and AT_GID are set then take_owner and * take_group must both be set in order to allow taking * ownership. * * Otherwise, send the check through secpolicy_vnode_setattr() * */ if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || ((idmask == AT_UID) && take_owner) || ((idmask == AT_GID) && take_group)) { if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, skipaclchk, cr) == 0) { /* * Remove setuid/setgid for non-privileged users */ secpolicy_setid_clear(vap, vp, cr); trim_mask = (mask & (AT_UID|AT_GID)); } else { need_policy = TRUE; } } else { need_policy = TRUE; } } mutex_enter(&zp->z_lock); oldva.va_mode = zp->z_mode; zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); if (mask & AT_XVATTR) { /* * Update xvattr mask to include only those attributes * that are actually changing. * * the bits will be restored prior to actually setting * the attributes so the caller thinks they were set. */ if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { if (xoap->xoa_appendonly != ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_APPENDONLY); XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); } } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { if (xoap->xoa_nounlink != ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_NOUNLINK); XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); } } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { if (xoap->xoa_immutable != ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_IMMUTABLE); XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); } } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { if (xoap->xoa_nodump != ((zp->z_pflags & ZFS_NODUMP) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_NODUMP); XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); } } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { if (xoap->xoa_av_modified != ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); } } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { if ((vp->v_type != VREG && xoap->xoa_av_quarantined) || xoap->xoa_av_quarantined != ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); } } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { mutex_exit(&zp->z_lock); ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if (need_policy == FALSE && (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { need_policy = TRUE; } } mutex_exit(&zp->z_lock); if (mask & AT_MODE) { if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { err = secpolicy_setid_setsticky_clear(vp, vap, &oldva, cr); if (err) { ZFS_EXIT(zfsvfs); return (err); } trim_mask |= AT_MODE; } else { need_policy = TRUE; } } if (need_policy) { /* * If trim_mask is set then take ownership * has been granted or write_acl is present and user * has the ability to modify mode. In that case remove * UID|GID and or MODE from mask so that * secpolicy_vnode_setattr() doesn't revoke it. */ if (trim_mask) { saved_mask = vap->va_mask; vap->va_mask &= ~trim_mask; if (trim_mask & AT_MODE) { /* * Save the mode, as secpolicy_vnode_setattr() * will overwrite it with ova.va_mode. */ saved_mode = vap->va_mode; } } err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); if (err) { ZFS_EXIT(zfsvfs); return (err); } if (trim_mask) { vap->va_mask |= saved_mask; if (trim_mask & AT_MODE) { /* * Recover the mode after * secpolicy_vnode_setattr(). */ vap->va_mode = saved_mode; } } } /* * secpolicy_vnode_setattr, or take ownership may have * changed va_mask */ mask = vap->va_mask; if ((mask & (AT_UID | AT_GID))) { err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (err == 0 && xattr_obj) { err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); if (err) goto out2; } if (mask & AT_UID) { new_uid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); if (new_uid != zp->z_uid && zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { if (attrzp) VN_RELE(ZTOV(attrzp)); err = SET_ERROR(EDQUOT); goto out2; } } if (mask & AT_GID) { new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp); if (new_gid != zp->z_gid && zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { if (attrzp) VN_RELE(ZTOV(attrzp)); err = SET_ERROR(EDQUOT); goto out2; } } } tx = dmu_tx_create(zfsvfs->z_os); if (mask & AT_MODE) { uint64_t pmode = zp->z_mode; uint64_t acl_obj; new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { err = SET_ERROR(EPERM); goto out; } if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) goto out; mutex_enter(&zp->z_lock); if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { /* * Are we upgrading ACL from old V0 format * to V1 format? */ if (zfsvfs->z_version >= ZPL_VERSION_FUID && zfs_znode_acl_version(zp) == ZFS_ACL_VERSION_INITIAL) { dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } else { dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes); } } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } mutex_exit(&zp->z_lock); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); } else { if ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); else dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); } if (attrzp) { dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); } fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); zfs_sa_upgrade_txholds(tx, zp); err = dmu_tx_assign(tx, TXG_NOWAIT); if (err) { if (err == ERESTART) dmu_tx_wait(tx); goto out; } count = 0; /* * Set each attribute requested. * We group settings according to the locks they need to acquire. * * Note: you cannot set ctime directly, although it will be * updated as a side-effect of calling this function. */ if (mask & (AT_UID|AT_GID|AT_MODE)) mutex_enter(&zp->z_acl_lock); mutex_enter(&zp->z_lock); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); if (attrzp) { if (mask & (AT_UID|AT_GID|AT_MODE)) mutex_enter(&attrzp->z_acl_lock); mutex_enter(&attrzp->z_lock); SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, sizeof (attrzp->z_pflags)); } if (mask & (AT_UID|AT_GID)) { if (mask & AT_UID) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &new_uid, sizeof (new_uid)); zp->z_uid = new_uid; if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_UID(zfsvfs), NULL, &new_uid, sizeof (new_uid)); attrzp->z_uid = new_uid; } } if (mask & AT_GID) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &new_gid, sizeof (new_gid)); zp->z_gid = new_gid; if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_GID(zfsvfs), NULL, &new_gid, sizeof (new_gid)); attrzp->z_gid = new_gid; } } if (!(mask & AT_MODE)) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &new_mode, sizeof (new_mode)); new_mode = zp->z_mode; } err = zfs_acl_chown_setattr(zp); ASSERT(err == 0); if (attrzp) { err = zfs_acl_chown_setattr(attrzp); ASSERT(err == 0); } } if (mask & AT_MODE) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &new_mode, sizeof (new_mode)); zp->z_mode = new_mode; ASSERT3U((uintptr_t)aclp, !=, 0); err = zfs_aclset_common(zp, aclp, cr, tx); ASSERT0(err); if (zp->z_acl_cached) zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = aclp; aclp = NULL; } if (mask & AT_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, sizeof (zp->z_atime)); } if (mask & AT_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, sizeof (mtime)); } /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ if (mask & AT_SIZE && !(mask & AT_MTIME)) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, sizeof (mtime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); } else if (mask != 0) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(attrzp, STATE_CHANGED, mtime, ctime, B_TRUE); } } /* * Do this after setting timestamps to prevent timestamp * update from toggling bit */ if (xoap && (mask & AT_XVATTR)) { /* * restore trimmed off masks * so that return masks can be set for caller. */ if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { XVA_SET_REQ(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { XVA_SET_REQ(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { XVA_SET_REQ(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { XVA_SET_REQ(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { XVA_SET_REQ(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ASSERT(vp->v_type == VREG); zfs_xvattr_set(zp, xvap, tx); } if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); if (mask != 0) zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); mutex_exit(&zp->z_lock); if (mask & (AT_UID|AT_GID|AT_MODE)) mutex_exit(&zp->z_acl_lock); if (attrzp) { if (mask & (AT_UID|AT_GID|AT_MODE)) mutex_exit(&attrzp->z_acl_lock); mutex_exit(&attrzp->z_lock); } out: if (err == 0 && attrzp) { err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, xattr_count, tx); ASSERT(err2 == 0); } if (attrzp) VN_RELE(ZTOV(attrzp)); if (aclp) zfs_acl_free(aclp); if (fuidp) { zfs_fuid_info_free(fuidp); fuidp = NULL; } if (err) { dmu_tx_abort(tx); if (err == ERESTART) goto top; } else { err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); dmu_tx_commit(tx); } out2: if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (err); } typedef struct zfs_zlock { krwlock_t *zl_rwlock; /* lock we acquired */ znode_t *zl_znode; /* znode we held */ struct zfs_zlock *zl_next; /* next in list */ } zfs_zlock_t; /* * Drop locks and release vnodes that were held by zfs_rename_lock(). */ static void zfs_rename_unlock(zfs_zlock_t **zlpp) { zfs_zlock_t *zl; while ((zl = *zlpp) != NULL) { if (zl->zl_znode != NULL) VN_RELE(ZTOV(zl->zl_znode)); rw_exit(zl->zl_rwlock); *zlpp = zl->zl_next; kmem_free(zl, sizeof (*zl)); } } /* * Search back through the directory tree, using the ".." entries. * Lock each directory in the chain to prevent concurrent renames. * Fail any attempt to move a directory into one of its own descendants. * XXX - z_parent_lock can overlap with map or grow locks */ static int zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) { zfs_zlock_t *zl; znode_t *zp = tdzp; uint64_t rootid = zp->z_zfsvfs->z_root; uint64_t oidp = zp->z_id; krwlock_t *rwlp = &szp->z_parent_lock; krw_t rw = RW_WRITER; /* * First pass write-locks szp and compares to zp->z_id. * Later passes read-lock zp and compare to zp->z_parent. */ do { if (!rw_tryenter(rwlp, rw)) { /* * Another thread is renaming in this path. * Note that if we are a WRITER, we don't have any * parent_locks held yet. */ if (rw == RW_READER && zp->z_id > szp->z_id) { /* * Drop our locks and restart */ zfs_rename_unlock(&zl); *zlpp = NULL; zp = tdzp; oidp = zp->z_id; rwlp = &szp->z_parent_lock; rw = RW_WRITER; continue; } else { /* * Wait for other thread to drop its locks */ rw_enter(rwlp, rw); } } zl = kmem_alloc(sizeof (*zl), KM_SLEEP); zl->zl_rwlock = rwlp; zl->zl_znode = NULL; zl->zl_next = *zlpp; *zlpp = zl; if (oidp == szp->z_id) /* We're a descendant of szp */ return (SET_ERROR(EINVAL)); if (oidp == rootid) /* We've hit the top */ return (0); if (rw == RW_READER) { /* i.e. not the first pass */ int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); if (error) return (error); zl->zl_znode = zp; } (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), &oidp, sizeof (oidp)); rwlp = &zp->z_parent_lock; rw = RW_READER; } while (zp->z_id != sdzp->z_id); return (0); } /* * Move an entry from the provided source directory to the target * directory. Change the entry name as indicated. * * IN: sdvp - Source directory containing the "old entry". * snm - Old entry name. * tdvp - Target directory to contain the "new entry". * tnm - New entry name. * cr - credentials of caller. * ct - caller context * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * sdvp,tdvp - ctime|mtime updated */ /*ARGSUSED*/ static int zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, caller_context_t *ct, int flags) { znode_t *tdzp, *szp, *tzp; znode_t *sdzp = VTOZ(sdvp); zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; zilog_t *zilog; vnode_t *realvp; zfs_dirlock_t *sdl, *tdl; dmu_tx_t *tx; zfs_zlock_t *zl; int cmp, serr, terr; int error = 0; int zflg = 0; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(sdzp); zilog = zfsvfs->z_log; /* * Make sure we have the real vp for the target directory. */ if (VOP_REALVP(tdvp, &realvp, ct) == 0) tdvp = realvp; if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EXDEV)); } tdzp = VTOZ(tdvp); ZFS_VERIFY_ZP(tdzp); if (zfsvfs->z_utf8 && u8_validate(tnm, strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zflg |= ZCILOOK; top: szp = NULL; tzp = NULL; zl = NULL; /* * This is to prevent the creation of links into attribute space * by renaming a linked file into/outof an attribute directory. * See the comment in zfs_link() for why this is considered bad. */ if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } /* * Lock source and target directory entries. To prevent deadlock, * a lock ordering must be defined. We lock the directory with * the smallest object id first, or if it's a tie, the one with * the lexically first name. */ if (sdzp->z_id < tdzp->z_id) { cmp = -1; } else if (sdzp->z_id > tdzp->z_id) { cmp = 1; } else { /* * First compare the two name arguments without * considering any case folding. */ int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); ASSERT(error == 0 || !zfsvfs->z_utf8); if (cmp == 0) { /* * POSIX: "If the old argument and the new argument * both refer to links to the same existing file, * the rename() function shall return successfully * and perform no other action." */ ZFS_EXIT(zfsvfs); return (0); } /* * If the file system is case-folding, then we may * have some more checking to do. A case-folding file * system is either supporting mixed case sensitivity * access or is completely case-insensitive. Note * that the file system is always case preserving. * * In mixed sensitivity mode case sensitive behavior * is the default. FIGNORECASE must be used to * explicitly request case insensitive behavior. * * If the source and target names provided differ only * by case (e.g., a request to rename 'tim' to 'Tim'), * we will treat this as a special case in the * case-insensitive mode: as long as the source name * is an exact match, we will allow this to proceed as * a name-change request. */ if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || (zfsvfs->z_case == ZFS_CASE_MIXED && flags & FIGNORECASE)) && u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, &error) == 0) { /* * case preserving rename request, require exact * name matches */ zflg |= ZCIEXACT; zflg &= ~ZCILOOK; } } /* * If the source and destination directories are the same, we should * grab the z_name_lock of that directory only once. */ if (sdzp == tdzp) { zflg |= ZHAVELOCK; rw_enter(&sdzp->z_name_lock, RW_READER); } if (cmp < 0) { serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS | zflg, NULL, NULL); terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); } else { terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, zflg, NULL, NULL); serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, NULL, NULL); } if (serr) { /* * Source entry invalid or not there. */ if (!terr) { zfs_dirent_unlock(tdl); if (tzp) VN_RELE(ZTOV(tzp)); } if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); /* * FreeBSD: In OpenSolaris they only check if rename source is * ".." here, because "." is handled in their lookup. This is * not the case for FreeBSD, so we check for "." explicitly. */ if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) serr = SET_ERROR(EINVAL); ZFS_EXIT(zfsvfs); return (serr); } if (terr) { zfs_dirent_unlock(sdl); VN_RELE(ZTOV(szp)); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (strcmp(tnm, "..") == 0) terr = SET_ERROR(EINVAL); ZFS_EXIT(zfsvfs); return (terr); } /* * Must have write access at the source to remove the old entry * and write access at the target to create the new entry. * Note that if target and source are the same, this can be * done in a single check. */ if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) goto out; if (ZTOV(szp)->v_type == VDIR) { /* * Check to make sure rename is valid. * Can't do a move like this: /usr/a/b to /usr/a/b/c/d */ if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) goto out; } /* * Does target exist? */ if (tzp) { /* * Source and target must be the same type. */ if (ZTOV(szp)->v_type == VDIR) { if (ZTOV(tzp)->v_type != VDIR) { error = SET_ERROR(ENOTDIR); goto out; } } else { if (ZTOV(tzp)->v_type == VDIR) { error = SET_ERROR(EISDIR); goto out; } } /* * POSIX dictates that when the source and target * entries refer to the same file object, rename * must do nothing and exit without error. */ if (szp->z_id == tzp->z_id) { error = 0; goto out; } } vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); if (tzp) vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); /* * notify the target directory if it is not the same * as source directory. */ if (tdvp != sdvp) { vnevent_rename_dest_dir(tdvp, ct); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); if (sdzp != tdzp) { dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tdzp); } if (tzp) { dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tzp); } zfs_sa_upgrade_txholds(tx, szp); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (zl != NULL) zfs_rename_unlock(&zl); zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); VN_RELE(ZTOV(szp)); if (tzp) VN_RELE(ZTOV(tzp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } if (tzp) /* Attempt to remove the existing target */ error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); if (error == 0) { error = zfs_link_create(tdl, szp, tx, ZRENAMING); if (error == 0) { szp->z_pflags |= ZFS_AV_MODIFIED; error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), (void *)&szp->z_pflags, sizeof (uint64_t), tx); ASSERT0(error); error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); if (error == 0) { zfs_log_rename(zilog, tx, TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); /* * Update path information for the target vnode */ vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); } else { /* * At this point, we have successfully created * the target name, but have failed to remove * the source name. Since the create was done * with the ZRENAMING flag, there are * complications; for one, the link count is * wrong. The easiest way to deal with this * is to remove the newly created target, and * return the original error. This must * succeed; fortunately, it is very unlikely to * fail, since we just created it. */ VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0); } } #ifdef FREEBSD_NAMECACHE if (error == 0) { cache_purge(sdvp); cache_purge(tdvp); cache_purge(ZTOV(szp)); if (tzp) cache_purge(ZTOV(tzp)); } #endif } dmu_tx_commit(tx); out: if (zl != NULL) zfs_rename_unlock(&zl); zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); VN_RELE(ZTOV(szp)); if (tzp) VN_RELE(ZTOV(tzp)); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } /* * Insert the indicated symbolic reference entry into the directory. * * IN: dvp - Directory to contain new symbolic link. * link - Name for new symlink entry. * vap - Attributes of new entry. * cr - credentials of caller. * ct - caller context * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * dvp - ctime|mtime updated */ /*ARGSUSED*/ static int zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, cred_t *cr, kthread_t *td) { znode_t *zp, *dzp = VTOZ(dvp); zfs_dirlock_t *dl; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; uint64_t len = strlen(link); int error; int zflg = ZNEW; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; uint64_t txtype = TX_SYMLINK; int flags = 0; ASSERT(vap->va_type == VLNK); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zflg |= ZCILOOK; if (len > MAXPATHLEN) { ZFS_EXIT(zfsvfs); return (SET_ERROR(ENAMETOOLONG)); } if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)) != 0) { ZFS_EXIT(zfsvfs); return (error); } top: /* * Attempt to lock directory; fail if entry already exists. */ error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); if (error) { zfs_acl_ids_free(&acl_ids); ZFS_EXIT(zfsvfs); return (error); } if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); return (error); } if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); return (SET_ERROR(EDQUOT)); } tx = dmu_tx_create(zfsvfs->z_os); fuid_dirtied = zfsvfs->z_fuid_dirty; dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE + len); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } /* * Create a new object for the symlink. * for version 4 ZPL datsets the symlink will be an SA attribute */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); mutex_enter(&zp->z_lock); if (zp->z_is_sa) error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), link, len, tx); else zfs_sa_symlink(zp, link, len, tx); mutex_exit(&zp->z_lock); zp->z_size = len; (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), &zp->z_size, sizeof (zp->z_size), tx); /* * Insert the new object into the directory. */ (void) zfs_link_create(dl, zp, tx, ZNEW); if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); *vpp = ZTOV(zp); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_dirent_unlock(dl); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } /* * Return, in the buffer contained in the provided uio structure, * the symbolic path referred to by vp. * * IN: vp - vnode of symbolic link. * uio - structure to contain the link path. * cr - credentials of caller. * ct - caller context * * OUT: uio - structure containing the link path. * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - atime updated */ /* ARGSUSED */ static int zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); mutex_enter(&zp->z_lock); if (zp->z_is_sa) error = sa_lookup_uio(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), uio); else error = zfs_sa_readlink(zp, uio); mutex_exit(&zp->z_lock); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); return (error); } /* * Insert a new entry into directory tdvp referencing svp. * * IN: tdvp - Directory to contain new entry. * svp - vnode of new entry. * name - name of new entry. * cr - credentials of caller. * ct - caller context * * RETURN: 0 on success, error code on failure. * * Timestamps: * tdvp - ctime|mtime updated * svp - ctime updated */ /* ARGSUSED */ static int zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, caller_context_t *ct, int flags) { znode_t *dzp = VTOZ(tdvp); znode_t *tzp, *szp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; vnode_t *realvp; int error; int zf = ZNEW; uint64_t parent; uid_t owner; ASSERT(tdvp->v_type == VDIR); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (VOP_REALVP(svp, &realvp, ct) == 0) svp = realvp; /* * POSIX dictates that we return EPERM here. * Better choices include ENOTSUP or EISDIR. */ if (svp->v_type == VDIR) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EXDEV)); } szp = VTOZ(svp); ZFS_VERIFY_ZP(szp); /* Prevent links to .zfs/shares files */ if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (uint64_t))) != 0) { ZFS_EXIT(zfsvfs); return (error); } if (parent == zfsvfs->z_shares_dir) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; /* * We do not support links between attributes and non-attributes * because of the potential security risk of creating links * into "normal" file space in order to circumvent restrictions * imposed in attribute space. */ if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); } owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { ZFS_EXIT(zfsvfs); return (error); } top: /* * Attempt to lock directory; fail if entry already exists. */ error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); if (error) { ZFS_EXIT(zfsvfs); return (error); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); zfs_sa_upgrade_txholds(tx, szp); zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { uint64_t txtype = TX_LINK; if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_link(zilog, tx, txtype, dzp, szp, name); } dmu_tx_commit(tx); zfs_dirent_unlock(dl); if (error == 0) { vnevent_link(svp, ct); } if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); } #ifdef sun /* * zfs_null_putapage() is used when the file system has been force * unmounted. It just drops the pages. */ /* ARGSUSED */ static int zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, int flags, cred_t *cr) { pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); return (0); } /* * Push a page out to disk, klustering if possible. * * IN: vp - file to push page to. * pp - page to push. * flags - additional flags. * cr - credentials of caller. * * OUT: offp - start of range pushed. * lenp - len of range pushed. * * RETURN: 0 on success, error code on failure. * * NOTE: callers must have locked the page to be pushed. On * exit, the page (and all other pages in the kluster) must be * unlocked. */ /* ARGSUSED */ static int zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, int flags, cred_t *cr) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_tx_t *tx; u_offset_t off, koff; size_t len, klen; int err; off = pp->p_offset; len = PAGESIZE; /* * If our blocksize is bigger than the page size, try to kluster * multiple pages so that we write a full block (thus avoiding * a read-modify-write). */ if (off < zp->z_size && zp->z_blksz > PAGESIZE) { klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; ASSERT(koff <= zp->z_size); if (koff + klen > zp->z_size) klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); } ASSERT3U(btop(len), ==, btopr(len)); /* * Can't push pages past end-of-file. */ if (off >= zp->z_size) { /* ignore all pages */ err = 0; goto out; } else if (off + len > zp->z_size) { int npages = btopr(zp->z_size - off); page_t *trunc; page_list_break(&pp, &trunc, npages); /* ignore pages past end of file */ if (trunc) pvn_write_done(trunc, flags); len = zp->z_size - off; } if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { err = SET_ERROR(EDQUOT); goto out; } top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_write(tx, zp->z_id, off, len); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); err = dmu_tx_assign(tx, TXG_NOWAIT); if (err != 0) { if (err == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); goto out; } if (zp->z_blksz <= PAGESIZE) { caddr_t va = zfs_map_page(pp, S_READ); ASSERT3U(len, <=, PAGESIZE); dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); zfs_unmap_page(pp, va); } else { err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); } if (err == 0) { uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int count = 0; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); } dmu_tx_commit(tx); out: pvn_write_done(pp, (err ? B_ERROR : 0) | flags); if (offp) *offp = off; if (lenp) *lenp = len; return (err); } /* * Copy the portion of the file indicated from pages into the file. * The pages are stored in a page list attached to the files vnode. * * IN: vp - vnode of file to push page data to. * off - position in file to put data. * len - amount of data to write. * flags - flags to control the operation. * cr - credentials of caller. * ct - caller context. * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - ctime|mtime updated */ /*ARGSUSED*/ static int zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; page_t *pp; size_t io_len; u_offset_t io_off; uint_t blksz; rl_t *rl; int error = 0; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); /* * Align this request to the file block size in case we kluster. * XXX - this can result in pretty aggresive locking, which can * impact simultanious read/write access. One option might be * to break up long requests (len == 0) into block-by-block * operations to get narrower locking. */ blksz = zp->z_blksz; if (ISP2(blksz)) io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); else io_off = 0; if (len > 0 && ISP2(blksz)) io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); else io_len = 0; if (io_len == 0) { /* * Search the entire vp list for pages >= io_off. */ rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); goto out; } rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); if (off > zp->z_size) { /* past end of file */ zfs_range_unlock(rl); ZFS_EXIT(zfsvfs); return (0); } len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); for (off = io_off; io_off < off + len; io_off += io_len) { if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { pp = page_lookup(vp, io_off, (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); } else { pp = page_lookup_nowait(vp, io_off, (flags & B_FREE) ? SE_EXCL : SE_SHARED); } if (pp != NULL && pvn_getdirty(pp, flags)) { int err; /* * Found a dirty page to push */ err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); if (err) error = err; } else { io_len = PAGESIZE; } } out: zfs_range_unlock(rl); if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zfsvfs->z_log, zp->z_id); ZFS_EXIT(zfsvfs); return (error); } #endif /* sun */ /*ARGSUSED*/ void zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); if (zp->z_sa_hdl == NULL) { /* * The fs has been unmounted, or we did a * suspend/resume and this file no longer exists. */ rw_exit(&zfsvfs->z_teardown_inactive_lock); vrecycle(vp); return; } mutex_enter(&zp->z_lock); if (zp->z_unlinked) { /* * Fast path to recycle a vnode of a removed file. */ mutex_exit(&zp->z_lock); rw_exit(&zfsvfs->z_teardown_inactive_lock); vrecycle(vp); return; } mutex_exit(&zp->z_lock); if (zp->z_atime_dirty && zp->z_unlinked == 0) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { mutex_enter(&zp->z_lock); (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), (void *)&zp->z_atime, sizeof (zp->z_atime), tx); zp->z_atime_dirty = 0; mutex_exit(&zp->z_lock); dmu_tx_commit(tx); } } rw_exit(&zfsvfs->z_teardown_inactive_lock); } #ifdef sun /* * Bounds-check the seek operation. * * IN: vp - vnode seeking within * ooff - old file offset * noffp - pointer to new file offset * ct - caller context * * RETURN: 0 on success, EINVAL if new offset invalid. */ /* ARGSUSED */ static int zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) { if (vp->v_type == VDIR) return (0); return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); } /* * Pre-filter the generic locking function to trap attempts to place * a mandatory lock on a memory mapped file. */ static int zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); /* * We are following the UFS semantics with respect to mapcnt * here: If we see that the file is mapped already, then we will * return an error, but we don't worry about races between this * function and zfs_map(). */ if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EAGAIN)); } ZFS_EXIT(zfsvfs); return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); } /* * If we can't find a page in the cache, we will create a new page * and fill it with file data. For efficiency, we may try to fill * multiple pages at once (klustering) to fill up the supplied page * list. Note that the pages to be filled are held with an exclusive * lock to prevent access by other threads while they are being filled. */ static int zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) { znode_t *zp = VTOZ(vp); page_t *pp, *cur_pp; objset_t *os = zp->z_zfsvfs->z_os; u_offset_t io_off, total; size_t io_len; int err; if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { /* * We only have a single page, don't bother klustering */ io_off = off; io_len = PAGESIZE; pp = page_create_va(vp, io_off, io_len, PG_EXCL | PG_WAIT, seg, addr); } else { /* * Try to find enough pages to fill the page list */ pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len, off, plsz, 0); } if (pp == NULL) { /* * The page already exists, nothing to do here. */ *pl = NULL; return (0); } /* * Fill the pages in the kluster. */ cur_pp = pp; for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { caddr_t va; ASSERT3U(io_off, ==, cur_pp->p_offset); va = zfs_map_page(cur_pp, S_WRITE); err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, DMU_READ_PREFETCH); zfs_unmap_page(cur_pp, va); if (err) { /* On error, toss the entire kluster */ pvn_read_done(pp, B_ERROR); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } cur_pp = cur_pp->p_next; } /* * Fill in the page list array from the kluster starting * from the desired offset `off'. * NOTE: the page list will always be null terminated. */ pvn_plist_init(pp, pl, plsz, off, io_len, rw); ASSERT(pl == NULL || (*pl)->p_offset == off); return (0); } /* * Return pointers to the pages for the file region [off, off + len] * in the pl array. If plsz is greater than len, this function may * also return page pointers from after the specified region * (i.e. the region [off, off + plsz]). These additional pages are * only returned if they are already in the cache, or were created as * part of a klustered read. * * IN: vp - vnode of file to get data from. * off - position in file to get data from. * len - amount of data to retrieve. * plsz - length of provided page list. * seg - segment to obtain pages for. * addr - virtual address of fault. * rw - mode of created pages. * cr - credentials of caller. * ct - caller context. * * OUT: protp - protection mode of created pages. * pl - list of pages created. * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - atime updated */ /* ARGSUSED */ static int zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; page_t **pl0 = pl; int err = 0; /* we do our own caching, faultahead is unnecessary */ if (pl == NULL) return (0); else if (len > plsz) len = plsz; else len = P2ROUNDUP(len, PAGESIZE); ASSERT(plsz >= len); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); if (protp) *protp = PROT_ALL; /* * Loop through the requested range [off, off + len) looking * for pages. If we don't find a page, we will need to create * a new page and fill it with data from the file. */ while (len > 0) { if (*pl = page_lookup(vp, off, SE_SHARED)) *(pl+1) = NULL; else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) goto out; while (*pl) { ASSERT3U((*pl)->p_offset, ==, off); off += PAGESIZE; addr += PAGESIZE; if (len > 0) { ASSERT3U(len, >=, PAGESIZE); len -= PAGESIZE; } ASSERT3U(plsz, >=, PAGESIZE); plsz -= PAGESIZE; pl++; } } /* * Fill out the page array with any pages already in the cache. */ while (plsz > 0 && (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { off += PAGESIZE; plsz -= PAGESIZE; } out: if (err) { /* * Release any pages we have previously locked. */ while (pl > pl0) page_unlock(*--pl); } else { ZFS_ACCESSTIME_STAMP(zfsvfs, zp); } *pl = NULL; ZFS_EXIT(zfsvfs); return (err); } /* * Request a memory map for a section of a file. This code interacts * with common code and the VM system as follows: * * - common code calls mmap(), which ends up in smmap_common() * - this calls VOP_MAP(), which takes you into (say) zfs * - zfs_map() calls as_map(), passing segvn_create() as the callback * - segvn_create() creates the new segment and calls VOP_ADDMAP() * - zfs_addmap() updates z_mapcnt */ /*ARGSUSED*/ static int zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; segvn_crargs_t vn_a; int error; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); if ((prot & PROT_WRITE) && (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); } if ((prot & (PROT_READ | PROT_EXEC)) && (zp->z_pflags & ZFS_AV_QUARANTINED)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EACCES)); } if (vp->v_flag & VNOMAP) { ZFS_EXIT(zfsvfs); return (SET_ERROR(ENOSYS)); } if (off < 0 || len > MAXOFFSET_T - off) { ZFS_EXIT(zfsvfs); return (SET_ERROR(ENXIO)); } if (vp->v_type != VREG) { ZFS_EXIT(zfsvfs); return (SET_ERROR(ENODEV)); } /* * If file is locked, disallow mapping. */ if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EAGAIN)); } as_rangelock(as); error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); if (error != 0) { as_rangeunlock(as); ZFS_EXIT(zfsvfs); return (error); } vn_a.vp = vp; vn_a.offset = (u_offset_t)off; vn_a.type = flags & MAP_TYPE; vn_a.prot = prot; vn_a.maxprot = maxprot; vn_a.cred = cr; vn_a.amp = NULL; vn_a.flags = flags & ~MAP_TYPE; vn_a.szc = 0; vn_a.lgrp_mem_policy_flags = 0; error = as_map(as, *addrp, len, segvn_create, &vn_a); as_rangeunlock(as); ZFS_EXIT(zfsvfs); return (error); } /* ARGSUSED */ static int zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, caller_context_t *ct) { uint64_t pages = btopr(len); atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); return (0); } /* * The reason we push dirty pages as part of zfs_delmap() is so that we get a * more accurate mtime for the associated file. Since we don't have a way of * detecting when the data was actually modified, we have to resort to * heuristics. If an explicit msync() is done, then we mark the mtime when the * last page is pushed. The problem occurs when the msync() call is omitted, * which by far the most common case: * * open() * mmap() * * munmap() * close() *